valeri/src/reader.cpp

426 lines
9.6 KiB
C++
Raw Normal View History

2024-07-27 15:25:44 +00:00
#include "reader.hpp"
#include "common.hpp"
2024-07-27 22:13:59 +00:00
#include "error.hpp"
2024-07-27 15:25:44 +00:00
static bool is_digit(char32_t c) { return c >= '0' && c <= '9'; }
static bool is_alpha(char32_t c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
static bool is_hex_digit(char32_t c) {
return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
static bool is_symbol_char(char32_t c) {
static char valid_char[] = {'!', '$', '%', '&', '*', '+', '-', '.', '/',
':', '<', '=', '>', '?', '@', '^', '_', '~'};
if (is_digit(c)) return true;
if (is_alpha(c)) return true;
for (size_t i = 0; i < sizeof(valid_char); ++i) {
if (char32_t(valid_char[i]) == c) return true;
}
return false;
}
static bool is_whitespace(char32_t c) {
return c == ' ' || c == '\t' || c == '\v' || c == '\f';
}
static bool is_brace(char32_t c) {
return c == '[' || c == ']' || c == '(' || c == ')' || c == '{' || c == '}';
}
static bool is_newline(char32_t c) { return c == '\r' || c == '\n'; }
static bool is_separator(char32_t c) {
return is_whitespace(c) || is_brace(c) || is_newline(c) || c == ';';
}
#define EOS 0
Result<Value> Reader::read_one() {
forward_whitespace();
auto saved_position = position_;
if (is_numeric_start()) {
auto res = read_number();
if (res.has_value()) return res;
} else if (match("true") || match("false")) {
auto res = read_bool();
if (res.has_value()) return res;
} else if (is_string_start()) {
return read_string();
} else if (match('(')) {
return read_list();
} else if (match('[')) {
2024-07-27 22:13:59 +00:00
// TODO: implement array
// return read_array();
return ErrorCode::ReadError;
2024-07-27 15:25:44 +00:00
} else if (match('{')) {
2024-08-03 12:43:59 +00:00
return read_dict();
2024-07-27 22:13:59 +00:00
return ErrorCode::ReadError;
2024-07-27 15:25:44 +00:00
}
position_ = saved_position;
if (is_symbol_start()) {
return read_symbol();
}
return ErrorCode::ReadError;
}
Result<Value> Reader::read_multiple() {
Value res = TRY(Nil::create(_arena));
while (1) {
forward_whitespace();
if (is_eof()) {
return reverse(_arena, res);
}
auto val = TRY(read_one());
res = Value(TRY(Pair::create(_arena, val, res)));
}
return ErrorCode::ReadError;
}
2024-07-27 18:40:13 +00:00
Result<Value> Reader::read_list() {
if (!match('(')) return ErrorCode::ReadError;
forward();
Value res = TRY(Nil::create(_arena));
while (1) {
forward_whitespace();
if (is_eof()) {
return ErrorCode::ReadError;
}
if (match(')')) {
forward();
return reverse(_arena, res);
}
auto val = TRY(read_one());
res = Value(TRY(Pair::create(_arena, val, res)));
}
return ErrorCode::ReadError;
}
2024-08-03 12:43:59 +00:00
Result<Value> Reader::read_dict() {
if (!match('{')) return ErrorCode::ReadError;
forward();
auto res = TRY(Dict::create(_arena));
while (1) {
forward_whitespace();
if (is_eof()) {
return ErrorCode::ReadError;
}
if (match('}')) {
forward();
return Value(std::move(res));
}
auto val1 = TRY(read_one());
auto val2 = TRY(read_one());
res = TRY(res.insert(_arena, val1, val2));
}
return ErrorCode::ReadError;
}
2024-07-27 18:40:13 +00:00
Result<Value> Reader::read_bool() {
if (match("true")) {
forward(4);
if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError;
return Value(TRY(Bool::create(_arena, true)));
}
if (match("false")) {
forward(5);
if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError;
return Value(TRY(Bool::create(_arena, false)));
}
return ErrorCode::ReadError;
}
2024-07-27 22:13:59 +00:00
Result<Value> Reader::read_string() {
if (!match('"')) return ErrorCode::ReadError;
size_t start = position_.offset + 1;
forward();
while (!match('"') && !match('\r') && !match('\n') && !is_eof()) {
if (match('\\')) {
forward();
}
forward();
}
if (!match('"')) return ErrorCode::UnterminatedStringLiteral;
forward();
String result = TRY(String::create(_arena, ""));
for (size_t i = 0; i < position_.offset - start - 1; i++) {
char32_t cur = TRY(_str[start + i]);
if (cur != '\\') {
// TODO: optimize this
result = TRY(result.concat(_arena, &cur, 1));
continue;
}
++i;
if (i >= position_.offset - start) continue;
char32_t next = TRY(_str[start + i]);
if (next == '\0') {
result = TRY(result.concat(_arena, "\0"));
continue;
}
switch (next) {
case 'b':
result = TRY(result.concat(_arena, "\b"));
continue;
case 'f':
result = TRY(result.concat(_arena, "\f"));
continue;
case 'n':
result = TRY(result.concat(_arena, "\n"));
continue;
case 'r':
result = TRY(result.concat(_arena, "\r"));
continue;
case 't':
result = TRY(result.concat(_arena, "\t"));
continue;
case 'v':
result = TRY(result.concat(_arena, "\v"));
continue;
}
result = TRY(result.concat(_arena, "\\"));
result = TRY(result.concat(_arena, &next, 1));
}
if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError;
return Value(std::move(result));
}
Result<Value> Reader::read_number() {
if (!is_numeric_start()) return ErrorCode::ReadError;
size_t start = position_.offset;
bool is_float = false;
if (match('+') || match('-')) forward();
if (match('0')) {
forward();
if (match('.')) {
is_float = true;
forward();
while (is_digit(get())) forward();
if (match('e') || match('E')) {
if (!forward_exponent()) return ErrorCode::InvalidNumericLiteral;
}
} else if (match('e') || match('E')) {
is_float = true;
if (!forward_exponent()) return ErrorCode::InvalidNumericLiteral;
} else if (match('x') || match('X')) {
if (!forward_hex_number()) return ErrorCode::InvalidNumericLiteral;
} else if (is_digit(get())) {
do {
forward();
} while (is_digit(get()));
}
} else {
while (is_digit(get())) forward();
if (match('.')) {
is_float = true;
forward();
while (is_digit(get())) forward();
}
if (match('e') || match('E')) {
is_float = true;
if (!forward_exponent()) return ErrorCode::InvalidNumericLiteral;
}
}
Value res;
if (position_.offset - start >= 32) return ErrorCode::InvalidNumericLiteral;
char buf[32];
for (size_t i = 0; i < position_.offset - start; ++i) {
buf[i] = char(TRY(_str[start + i]));
}
buf[position_.offset - start] = '\0';
if (is_float) {
res = Value(TRY(Float::create(_arena, strtod(buf, 0))));
if (match("f64")) {
forward(3);
}
} else {
res = Value(TRY(Int64::create(_arena, strtoll(buf, 0, 10))));
}
if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError;
return res;
}
Result<Value> Reader::read_symbol() {
if (!is_symbol_char(get())) return ErrorCode::ReadError;
size_t start = position_.offset;
while (is_symbol_char(get())) forward();
size_t end = position_.offset;
String str = TRY(_str.sub(_arena, start, end));
if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError;
return Value(TRY(Symbol::create(_arena, str)));
}
char32_t Reader::get(size_t offset) {
size_t pos = position_.offset + offset;
auto res = _str[pos];
if (res.has_value()) return *res;
return 0;
}
bool Reader::is_eol() { return match('\n') || match('\r'); }
bool Reader::is_eof() { return position_.offset == _str.size(); }
bool Reader::is_whitespace() {
return match(' ') || match('\t') || match('\v') || match('\f');
return false;
}
bool Reader::is_comment_start() { return match(';'); }
bool Reader::is_string_start() { return match('"'); }
bool Reader::is_symbol_start() { return is_symbol_char(get()); }
bool Reader::is_numeric_start() {
char32_t c = get();
if (is_digit(c)) return true;
if (c == '.' && is_digit(get(1))) return true;
if ((c == '+' || c == '-') && is_digit(get(1))) return true;
if ((c == '+' || c == '-') && get(1) == '.' && is_digit(get(2))) return true;
return false;
}
bool Reader::match(const char* str) {
size_t slen = strlen(str);
for (size_t i = 0; i < slen; i++) {
if (get(i) != char32_t(str[i])) return false;
}
return true;
}
bool Reader::match(char c) { return get() == char32_t(c); }
void Reader::forward(size_t n) {
for (size_t i = 0; i < n; i++) {
forward();
}
}
void Reader::forward() {
if (is_eof()) {
return;
}
if (is_eol()) {
if (match("\r\n")) {
position_.offset++;
}
position_.line++;
position_.column = 1;
} else {
position_.column++;
}
position_.offset++;
}
void Reader::forward_whitespace() {
while (true) {
if (is_eol()) {
forward();
} else if (is_whitespace()) {
forward();
} else if (is_comment_start()) {
forward();
do {
forward();
} while (!is_eof() && !is_eol());
} else {
break;
}
}
}
bool Reader::forward_decimal_number() {
if (!is_digit(get())) return false;
while (is_digit(get())) forward();
return true;
}
bool Reader::forward_hex_number() {
if (!is_hex_digit(get())) return false;
while (is_hex_digit(get())) forward();
return true;
}
bool Reader::forward_exponent() {
if (!match('e') && !match('E')) return false;
forward();
if (match('-') || match('+')) forward();
return forward_decimal_number();
}
2024-08-03 12:43:59 +00:00
Result<Value> read_one(Arena& arena, Value& value) {
if (!value.is<String>()) return ErrorCode::TypeMismatch;
auto r = Reader(arena, *value.to<String>());
return r.read_one();
}
Result<Value> read_one(Arena& arena, String& value) {
auto r = Reader(arena, value);
return r.read_one();
}
Result<Value> read_one(Arena& arena, const char* value) {
auto s = TRY(String::create(arena, value));
auto r = Reader(arena, s);
return r.read_one();
}