Implement most of the reader

This commit is contained in:
Konstantin Nazarov 2024-07-27 23:13:59 +01:00
parent 110a7c8433
commit 1c2f272c1d
Signed by: knazarov
GPG key ID: 4CFE0A42FA409C22
5 changed files with 346 additions and 4 deletions

View file

@ -31,6 +31,7 @@ class GcRoot : public GcRootBase {
GcRoot() : GcRootBase(0, 0) {}
GcRoot(T* ptr, GcRootList* node) : GcRootBase(ptr, node) {}
GcRoot(GcRoot&& rhs);
GcRoot& operator=(GcRoot&& rhs);
static Result<GcRoot<T>> create(T* ptr, Arena& arena);
Result<GcRoot<T>> copy(Arena& arena) {
@ -165,5 +166,20 @@ GcRoot<T>::GcRoot(GcRoot<T>&& rhs) {
rhs._node->update(this);
_ptr = rhs._ptr;
_node = rhs._node;
rhs._ptr = 0;
rhs._node = 0;
}
template <class T>
requires std::derived_from<T, PodObject>
GcRoot<T>& GcRoot<T>::operator=(GcRoot<T>&& rhs) {
if (_node != 0) _node->remove();
rhs._node->update(this);
_ptr = rhs._ptr;
_node = rhs._node;
rhs._ptr = 0;
rhs._node = 0;
return *this;
}

View file

@ -32,6 +32,17 @@ Result<Value> Value::create(Arena& arena, PodObject* obj) {
return Value();
}
Result<Symbol> Symbol::create(Arena& arena, String& rhs) {
uint64_t rhs_size = rhs.size();
uint64_t res_size = rhs_size;
auto pod = TRY(arena.alloc<PodSymbol>(res_size * sizeof(char32_t)));
pod->size = res_size;
memcpy(pod->data, rhs._value->data, sizeof(char32_t) * rhs_size);
return Symbol(TRY(MkGcRoot(pod, arena)));
}
Result<Value> syntax_unwrap(Arena& arena, Value& val) {
Syntax* syntax = val.to<Syntax>();
if (syntax == 0) return val.copy(arena);

View file

@ -52,6 +52,12 @@ class String : public Object {
String() {}
String(String&& rhs) : _value(std::move(rhs._value)) {}
String(GcRoot<PodString>&& val) : _value(std::move(val)) {}
String& operator=(String&& rhs) {
_value = std::move(rhs._value);
return *this;
}
virtual Tag tag() final { return Tag::String; }
virtual PodObject* pod() final { return _value.get(); }
@ -78,13 +84,62 @@ class String : public Object {
}
uint64_t size() { return _value->size; }
virtual Result<Value> copy(Arena& arena) final;
Result<char32_t> operator[](uint64_t idx) {
if (idx >= _value->size) return ErrorCode::IndexOutOfRange;
return _value->data[idx];
}
virtual Result<Value> copy(Arena& arena) final;
Result<String> concat(Arena& arena, const char* rhs) {
uint64_t rhs_size = strlen(rhs);
uint64_t lhs_size = size();
uint64_t res_size = lhs_size + rhs_size;
auto pod = TRY(arena.alloc<PodString>(res_size * sizeof(char32_t)));
pod->size = res_size;
memcpy(pod->data, _value->data, sizeof(char32_t) * lhs_size);
for (uint64_t i = 0; i < rhs_size; i++) pod->data[lhs_size + i] = rhs[i];
return String(TRY(MkGcRoot(pod, arena)));
}
Result<String> concat(Arena& arena, const char32_t* rhs, uint64_t rhs_size) {
uint64_t lhs_size = size();
uint64_t res_size = lhs_size + rhs_size;
auto pod = TRY(arena.alloc<PodString>(res_size * sizeof(char32_t)));
pod->size = res_size;
memcpy(pod->data, _value->data, sizeof(char32_t) * lhs_size);
for (uint64_t i = 0; i < rhs_size; i++) pod->data[lhs_size + i] = rhs[i];
return String(TRY(MkGcRoot(pod, arena)));
}
Result<String> concat(Arena& arena, String& rhs) {
uint64_t rhs_size = rhs.size();
uint64_t lhs_size = size();
uint64_t res_size = lhs_size + rhs_size;
auto pod = TRY(arena.alloc<PodString>(res_size * sizeof(char32_t)));
pod->size = res_size;
memcpy(pod->data, _value->data, sizeof(char32_t) * lhs_size);
memcpy(pod->data + lhs_size, rhs._value->data, sizeof(char32_t) * rhs_size);
return String(TRY(MkGcRoot(pod, arena)));
}
Result<String> sub(Arena& arena, uint64_t start, uint64_t end) {
if (start > end) return ErrorCode::IndexOutOfRange;
uint64_t res_size = end - start;
auto pod = TRY(arena.alloc<PodString>(res_size * sizeof(char32_t)));
pod->size = res_size;
memcpy(pod->data, _value->data + start, sizeof(char32_t) * res_size);
return String(TRY(MkGcRoot(pod, arena)));
}
friend class Symbol;
private:
GcRoot<PodString> _value;
@ -110,6 +165,7 @@ class Symbol : public Object {
return Symbol(TRY(MkGcRoot(pod_symbol, arena)));
}
static Result<Symbol> create(Arena& arena, String& rhs);
virtual Result<Value> copy(Arena& arena) final;
private:

View file

@ -5,5 +5,7 @@ enum class ErrorCode {
OutOfMemory,
IndexOutOfRange,
TypeMismatch,
ReadError
ReadError,
UnterminatedStringLiteral,
InvalidNumericLiteral
};

View file

@ -1,6 +1,7 @@
#include "reader.hpp"
#include "common.hpp"
#include "error.hpp"
static bool is_digit(char32_t c) { return c >= '0' && c <= '9'; }
static bool is_alpha(char32_t c) {
@ -55,9 +56,13 @@ Result<Value> Reader::read_one() {
} else if (match('(')) {
return read_list();
} else if (match('[')) {
return read_array();
// TODO: implement array
// return read_array();
return ErrorCode::ReadError;
} else if (match('{')) {
return read_dict();
// TODO: implement dicts
// return read_dict();
return ErrorCode::ReadError;
}
position_ = saved_position;
@ -125,3 +130,255 @@ Result<Value> Reader::read_bool() {
return ErrorCode::ReadError;
}
Result<Value> Reader::read_string() {
if (!match('"')) return ErrorCode::ReadError;
size_t start = position_.offset + 1;
forward();
while (!match('"') && !match('\r') && !match('\n') && !is_eof()) {
if (match('\\')) {
forward();
}
forward();
}
if (!match('"')) return ErrorCode::UnterminatedStringLiteral;
forward();
String result = TRY(String::create(_arena, ""));
for (size_t i = 0; i < position_.offset - start - 1; i++) {
char32_t cur = TRY(_str[start + i]);
if (cur != '\\') {
// TODO: optimize this
result = TRY(result.concat(_arena, &cur, 1));
continue;
}
++i;
if (i >= position_.offset - start) continue;
char32_t next = TRY(_str[start + i]);
if (next == '\0') {
result = TRY(result.concat(_arena, "\0"));
continue;
}
switch (next) {
case 'b':
result = TRY(result.concat(_arena, "\b"));
continue;
case 'f':
result = TRY(result.concat(_arena, "\f"));
continue;
case 'n':
result = TRY(result.concat(_arena, "\n"));
continue;
case 'r':
result = TRY(result.concat(_arena, "\r"));
continue;
case 't':
result = TRY(result.concat(_arena, "\t"));
continue;
case 'v':
result = TRY(result.concat(_arena, "\v"));
continue;
}
result = TRY(result.concat(_arena, "\\"));
result = TRY(result.concat(_arena, &next, 1));
}
if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError;
return Value(std::move(result));
}
Result<Value> Reader::read_number() {
if (!is_numeric_start()) return ErrorCode::ReadError;
size_t start = position_.offset;
bool is_float = false;
if (match('+') || match('-')) forward();
if (match('0')) {
forward();
if (match('.')) {
is_float = true;
forward();
while (is_digit(get())) forward();
if (match('e') || match('E')) {
if (!forward_exponent()) return ErrorCode::InvalidNumericLiteral;
}
} else if (match('e') || match('E')) {
is_float = true;
if (!forward_exponent()) return ErrorCode::InvalidNumericLiteral;
} else if (match('x') || match('X')) {
if (!forward_hex_number()) return ErrorCode::InvalidNumericLiteral;
} else if (is_digit(get())) {
do {
forward();
} while (is_digit(get()));
}
} else {
while (is_digit(get())) forward();
if (match('.')) {
is_float = true;
forward();
while (is_digit(get())) forward();
}
if (match('e') || match('E')) {
is_float = true;
if (!forward_exponent()) return ErrorCode::InvalidNumericLiteral;
}
}
Value res;
if (position_.offset - start >= 32) return ErrorCode::InvalidNumericLiteral;
char buf[32];
for (size_t i = 0; i < position_.offset - start; ++i) {
buf[i] = char(TRY(_str[start + i]));
}
buf[position_.offset - start] = '\0';
if (is_float) {
res = Value(TRY(Float::create(_arena, strtod(buf, 0))));
if (match("f64")) {
forward(3);
}
} else {
res = Value(TRY(Int64::create(_arena, strtoll(buf, 0, 10))));
}
if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError;
return res;
}
Result<Value> Reader::read_symbol() {
if (!is_symbol_char(get())) return ErrorCode::ReadError;
size_t start = position_.offset;
while (is_symbol_char(get())) forward();
size_t end = position_.offset;
String str = TRY(_str.sub(_arena, start, end));
if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError;
return Value(TRY(Symbol::create(_arena, str)));
}
char32_t Reader::get(size_t offset) {
size_t pos = position_.offset + offset;
auto res = _str[pos];
if (res.has_value()) return *res;
return 0;
}
bool Reader::is_eol() { return match('\n') || match('\r'); }
bool Reader::is_eof() { return position_.offset == _str.size(); }
bool Reader::is_whitespace() {
return match(' ') || match('\t') || match('\v') || match('\f');
return false;
}
bool Reader::is_comment_start() { return match(';'); }
bool Reader::is_string_start() { return match('"'); }
bool Reader::is_symbol_start() { return is_symbol_char(get()); }
bool Reader::is_numeric_start() {
char32_t c = get();
if (is_digit(c)) return true;
if (c == '.' && is_digit(get(1))) return true;
if ((c == '+' || c == '-') && is_digit(get(1))) return true;
if ((c == '+' || c == '-') && get(1) == '.' && is_digit(get(2))) return true;
return false;
}
bool Reader::match(const char* str) {
size_t slen = strlen(str);
for (size_t i = 0; i < slen; i++) {
if (get(i) != char32_t(str[i])) return false;
}
return true;
}
bool Reader::match(char c) { return get() == char32_t(c); }
void Reader::forward(size_t n) {
for (size_t i = 0; i < n; i++) {
forward();
}
}
void Reader::forward() {
if (is_eof()) {
return;
}
if (is_eol()) {
if (match("\r\n")) {
position_.offset++;
}
position_.line++;
position_.column = 1;
} else {
position_.column++;
}
position_.offset++;
}
void Reader::forward_whitespace() {
while (true) {
if (is_eol()) {
forward();
} else if (is_whitespace()) {
forward();
} else if (is_comment_start()) {
forward();
do {
forward();
} while (!is_eof() && !is_eol());
} else {
break;
}
}
}
bool Reader::forward_decimal_number() {
if (!is_digit(get())) return false;
while (is_digit(get())) forward();
return true;
}
bool Reader::forward_hex_number() {
if (!is_hex_digit(get())) return false;
while (is_hex_digit(get())) forward();
return true;
}
bool Reader::forward_exponent() {
if (!match('e') && !match('E')) return false;
forward();
if (match('-') || match('+')) forward();
return forward_decimal_number();
}