diff --git a/src/arena.hpp b/src/arena.hpp index 3b5e673..555d6fc 100644 --- a/src/arena.hpp +++ b/src/arena.hpp @@ -31,6 +31,7 @@ class GcRoot : public GcRootBase { GcRoot() : GcRootBase(0, 0) {} GcRoot(T* ptr, GcRootList* node) : GcRootBase(ptr, node) {} GcRoot(GcRoot&& rhs); + GcRoot& operator=(GcRoot&& rhs); static Result> create(T* ptr, Arena& arena); Result> copy(Arena& arena) { @@ -165,5 +166,20 @@ GcRoot::GcRoot(GcRoot&& rhs) { rhs._node->update(this); _ptr = rhs._ptr; _node = rhs._node; + rhs._ptr = 0; rhs._node = 0; } + +template + requires std::derived_from +GcRoot& GcRoot::operator=(GcRoot&& rhs) { + if (_node != 0) _node->remove(); + + rhs._node->update(this); + _ptr = rhs._ptr; + _node = rhs._node; + rhs._ptr = 0; + rhs._node = 0; + + return *this; +} diff --git a/src/common.cpp b/src/common.cpp index cfdd21b..8a6fcf5 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -32,6 +32,17 @@ Result Value::create(Arena& arena, PodObject* obj) { return Value(); } +Result Symbol::create(Arena& arena, String& rhs) { + uint64_t rhs_size = rhs.size(); + uint64_t res_size = rhs_size; + + auto pod = TRY(arena.alloc(res_size * sizeof(char32_t))); + pod->size = res_size; + memcpy(pod->data, rhs._value->data, sizeof(char32_t) * rhs_size); + + return Symbol(TRY(MkGcRoot(pod, arena))); +} + Result syntax_unwrap(Arena& arena, Value& val) { Syntax* syntax = val.to(); if (syntax == 0) return val.copy(arena); diff --git a/src/common.hpp b/src/common.hpp index fe73c33..3438751 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -52,6 +52,12 @@ class String : public Object { String() {} String(String&& rhs) : _value(std::move(rhs._value)) {} String(GcRoot&& val) : _value(std::move(val)) {} + + String& operator=(String&& rhs) { + _value = std::move(rhs._value); + return *this; + } + virtual Tag tag() final { return Tag::String; } virtual PodObject* pod() final { return _value.get(); } @@ -78,13 +84,62 @@ class String : public Object { } uint64_t size() { return _value->size; } + virtual Result copy(Arena& arena) final; Result operator[](uint64_t idx) { if (idx >= _value->size) return ErrorCode::IndexOutOfRange; return _value->data[idx]; } - virtual Result copy(Arena& arena) final; + Result concat(Arena& arena, const char* rhs) { + uint64_t rhs_size = strlen(rhs); + uint64_t lhs_size = size(); + uint64_t res_size = lhs_size + rhs_size; + + auto pod = TRY(arena.alloc(res_size * sizeof(char32_t))); + pod->size = res_size; + memcpy(pod->data, _value->data, sizeof(char32_t) * lhs_size); + for (uint64_t i = 0; i < rhs_size; i++) pod->data[lhs_size + i] = rhs[i]; + + return String(TRY(MkGcRoot(pod, arena))); + } + + Result concat(Arena& arena, const char32_t* rhs, uint64_t rhs_size) { + uint64_t lhs_size = size(); + uint64_t res_size = lhs_size + rhs_size; + + auto pod = TRY(arena.alloc(res_size * sizeof(char32_t))); + pod->size = res_size; + memcpy(pod->data, _value->data, sizeof(char32_t) * lhs_size); + for (uint64_t i = 0; i < rhs_size; i++) pod->data[lhs_size + i] = rhs[i]; + + return String(TRY(MkGcRoot(pod, arena))); + } + + Result concat(Arena& arena, String& rhs) { + uint64_t rhs_size = rhs.size(); + uint64_t lhs_size = size(); + uint64_t res_size = lhs_size + rhs_size; + + auto pod = TRY(arena.alloc(res_size * sizeof(char32_t))); + pod->size = res_size; + memcpy(pod->data, _value->data, sizeof(char32_t) * lhs_size); + memcpy(pod->data + lhs_size, rhs._value->data, sizeof(char32_t) * rhs_size); + + return String(TRY(MkGcRoot(pod, arena))); + } + + Result sub(Arena& arena, uint64_t start, uint64_t end) { + if (start > end) return ErrorCode::IndexOutOfRange; + uint64_t res_size = end - start; + auto pod = TRY(arena.alloc(res_size * sizeof(char32_t))); + pod->size = res_size; + memcpy(pod->data, _value->data + start, sizeof(char32_t) * res_size); + + return String(TRY(MkGcRoot(pod, arena))); + } + + friend class Symbol; private: GcRoot _value; @@ -110,6 +165,7 @@ class Symbol : public Object { return Symbol(TRY(MkGcRoot(pod_symbol, arena))); } + static Result create(Arena& arena, String& rhs); virtual Result copy(Arena& arena) final; private: diff --git a/src/error.hpp b/src/error.hpp index c09caa2..4818cb2 100644 --- a/src/error.hpp +++ b/src/error.hpp @@ -5,5 +5,7 @@ enum class ErrorCode { OutOfMemory, IndexOutOfRange, TypeMismatch, - ReadError + ReadError, + UnterminatedStringLiteral, + InvalidNumericLiteral }; diff --git a/src/reader.cpp b/src/reader.cpp index a0bb4e5..79b6251 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -1,6 +1,7 @@ #include "reader.hpp" #include "common.hpp" +#include "error.hpp" static bool is_digit(char32_t c) { return c >= '0' && c <= '9'; } static bool is_alpha(char32_t c) { @@ -55,9 +56,13 @@ Result Reader::read_one() { } else if (match('(')) { return read_list(); } else if (match('[')) { - return read_array(); + // TODO: implement array + // return read_array(); + return ErrorCode::ReadError; } else if (match('{')) { - return read_dict(); + // TODO: implement dicts + // return read_dict(); + return ErrorCode::ReadError; } position_ = saved_position; @@ -125,3 +130,255 @@ Result Reader::read_bool() { return ErrorCode::ReadError; } + +Result Reader::read_string() { + if (!match('"')) return ErrorCode::ReadError; + size_t start = position_.offset + 1; + + forward(); + + while (!match('"') && !match('\r') && !match('\n') && !is_eof()) { + if (match('\\')) { + forward(); + } + + forward(); + } + + if (!match('"')) return ErrorCode::UnterminatedStringLiteral; + + forward(); + + String result = TRY(String::create(_arena, "")); + + for (size_t i = 0; i < position_.offset - start - 1; i++) { + char32_t cur = TRY(_str[start + i]); + if (cur != '\\') { + // TODO: optimize this + result = TRY(result.concat(_arena, &cur, 1)); + continue; + } + ++i; + if (i >= position_.offset - start) continue; + char32_t next = TRY(_str[start + i]); + + if (next == '\0') { + result = TRY(result.concat(_arena, "\0")); + continue; + } + switch (next) { + case 'b': + result = TRY(result.concat(_arena, "\b")); + continue; + case 'f': + result = TRY(result.concat(_arena, "\f")); + continue; + case 'n': + result = TRY(result.concat(_arena, "\n")); + continue; + case 'r': + result = TRY(result.concat(_arena, "\r")); + continue; + case 't': + result = TRY(result.concat(_arena, "\t")); + continue; + case 'v': + result = TRY(result.concat(_arena, "\v")); + continue; + } + + result = TRY(result.concat(_arena, "\\")); + result = TRY(result.concat(_arena, &next, 1)); + } + + if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError; + + return Value(std::move(result)); +} + +Result Reader::read_number() { + if (!is_numeric_start()) return ErrorCode::ReadError; + size_t start = position_.offset; + + bool is_float = false; + + if (match('+') || match('-')) forward(); + + if (match('0')) { + forward(); + if (match('.')) { + is_float = true; + forward(); + while (is_digit(get())) forward(); + + if (match('e') || match('E')) { + if (!forward_exponent()) return ErrorCode::InvalidNumericLiteral; + } + } else if (match('e') || match('E')) { + is_float = true; + if (!forward_exponent()) return ErrorCode::InvalidNumericLiteral; + } else if (match('x') || match('X')) { + if (!forward_hex_number()) return ErrorCode::InvalidNumericLiteral; + } else if (is_digit(get())) { + do { + forward(); + } while (is_digit(get())); + } + } else { + while (is_digit(get())) forward(); + if (match('.')) { + is_float = true; + forward(); + while (is_digit(get())) forward(); + } + if (match('e') || match('E')) { + is_float = true; + if (!forward_exponent()) return ErrorCode::InvalidNumericLiteral; + } + } + + Value res; + + if (position_.offset - start >= 32) return ErrorCode::InvalidNumericLiteral; + + char buf[32]; + for (size_t i = 0; i < position_.offset - start; ++i) { + buf[i] = char(TRY(_str[start + i])); + } + buf[position_.offset - start] = '\0'; + + if (is_float) { + res = Value(TRY(Float::create(_arena, strtod(buf, 0)))); + if (match("f64")) { + forward(3); + } + } else { + res = Value(TRY(Int64::create(_arena, strtoll(buf, 0, 10)))); + } + + if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError; + + return res; +} + +Result Reader::read_symbol() { + if (!is_symbol_char(get())) return ErrorCode::ReadError; + + size_t start = position_.offset; + + while (is_symbol_char(get())) forward(); + + size_t end = position_.offset; + + String str = TRY(_str.sub(_arena, start, end)); + + if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError; + + return Value(TRY(Symbol::create(_arena, str))); +} + +char32_t Reader::get(size_t offset) { + size_t pos = position_.offset + offset; + auto res = _str[pos]; + if (res.has_value()) return *res; + return 0; +} + +bool Reader::is_eol() { return match('\n') || match('\r'); } + +bool Reader::is_eof() { return position_.offset == _str.size(); } + +bool Reader::is_whitespace() { + return match(' ') || match('\t') || match('\v') || match('\f'); + + return false; +} + +bool Reader::is_comment_start() { return match(';'); } + +bool Reader::is_string_start() { return match('"'); } + +bool Reader::is_symbol_start() { return is_symbol_char(get()); } + +bool Reader::is_numeric_start() { + char32_t c = get(); + if (is_digit(c)) return true; + if (c == '.' && is_digit(get(1))) return true; + if ((c == '+' || c == '-') && is_digit(get(1))) return true; + if ((c == '+' || c == '-') && get(1) == '.' && is_digit(get(2))) return true; + return false; +} + +bool Reader::match(const char* str) { + size_t slen = strlen(str); + for (size_t i = 0; i < slen; i++) { + if (get(i) != char32_t(str[i])) return false; + } + return true; +} + +bool Reader::match(char c) { return get() == char32_t(c); } + +void Reader::forward(size_t n) { + for (size_t i = 0; i < n; i++) { + forward(); + } +} + +void Reader::forward() { + if (is_eof()) { + return; + } + + if (is_eol()) { + if (match("\r\n")) { + position_.offset++; + } + position_.line++; + position_.column = 1; + } else { + position_.column++; + } + + position_.offset++; +} + +void Reader::forward_whitespace() { + while (true) { + if (is_eol()) { + forward(); + } else if (is_whitespace()) { + forward(); + } else if (is_comment_start()) { + forward(); + do { + forward(); + } while (!is_eof() && !is_eol()); + } else { + break; + } + } +} + +bool Reader::forward_decimal_number() { + if (!is_digit(get())) return false; + + while (is_digit(get())) forward(); + return true; +} + +bool Reader::forward_hex_number() { + if (!is_hex_digit(get())) return false; + + while (is_hex_digit(get())) forward(); + return true; +} + +bool Reader::forward_exponent() { + if (!match('e') && !match('E')) return false; + forward(); + + if (match('-') || match('+')) forward(); + + return forward_decimal_number(); +}