#include "reader.hpp" #include "common.hpp" #include "error.hpp" static bool is_digit(char32_t c) { return c >= '0' && c <= '9'; } static bool is_alpha(char32_t c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } static bool is_hex_digit(char32_t c) { return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } static bool is_symbol_char(char32_t c) { static char valid_char[] = {'!', '$', '%', '&', '*', '+', '-', '.', '/', ':', '<', '=', '>', '?', '@', '^', '_', '~'}; if (is_digit(c)) return true; if (is_alpha(c)) return true; for (size_t i = 0; i < sizeof(valid_char); ++i) { if (char32_t(valid_char[i]) == c) return true; } return false; } static bool is_whitespace(char32_t c) { return c == ' ' || c == '\t' || c == '\v' || c == '\f'; } static bool is_brace(char32_t c) { return c == '[' || c == ']' || c == '(' || c == ')' || c == '{' || c == '}'; } static bool is_newline(char32_t c) { return c == '\r' || c == '\n'; } static bool is_separator(char32_t c) { return is_whitespace(c) || is_brace(c) || is_newline(c) || c == ';'; } #define EOS 0 Result Reader::read_one() { forward_whitespace(); auto saved_position = position_; if (is_numeric_start()) { auto res = read_number(); if (res.has_value()) return res; } else if (match("true") || match("false")) { auto res = read_bool(); if (res.has_value()) return res; } else if (is_string_start()) { return read_string(); } else if (match('(')) { return read_list(); } else if (match('[')) { // TODO: implement array // return read_array(); return ErrorCode::ReadError; } else if (match('{')) { return read_dict(); return ErrorCode::ReadError; } position_ = saved_position; if (is_symbol_start()) { return read_symbol(); } return ErrorCode::ReadError; } Result Reader::read_multiple() { Value res = TRY(Nil::create(_arena)); while (1) { forward_whitespace(); if (is_eof()) { return reverse(_arena, res); } auto val = TRY(read_one()); res = Value(TRY(Pair::create(_arena, val, res))); } return ErrorCode::ReadError; } Result Reader::read_list() { if (!match('(')) return ErrorCode::ReadError; forward(); Value res = TRY(Nil::create(_arena)); while (1) { forward_whitespace(); if (is_eof()) { return ErrorCode::ReadError; } if (match(')')) { forward(); return reverse(_arena, res); } auto val = TRY(read_one()); res = Value(TRY(Pair::create(_arena, val, res))); } return ErrorCode::ReadError; } Result Reader::read_dict() { if (!match('{')) return ErrorCode::ReadError; forward(); auto res = TRY(Dict::create(_arena)); while (1) { forward_whitespace(); if (is_eof()) { return ErrorCode::ReadError; } if (match('}')) { forward(); return Value(std::move(res)); } auto val1 = TRY(read_one()); auto val2 = TRY(read_one()); res = TRY(res.insert(_arena, val1, val2)); } return ErrorCode::ReadError; } Result Reader::read_bool() { if (match("true")) { forward(4); if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError; return Value(TRY(Bool::create(_arena, true))); } if (match("false")) { forward(5); if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError; return Value(TRY(Bool::create(_arena, false))); } return ErrorCode::ReadError; } Result Reader::read_string() { if (!match('"')) return ErrorCode::ReadError; size_t start = position_.offset + 1; forward(); while (!match('"') && !match('\r') && !match('\n') && !is_eof()) { if (match('\\')) { forward(); } forward(); } if (!match('"')) return ErrorCode::UnterminatedStringLiteral; forward(); String result = TRY(String::create(_arena, "")); for (size_t i = 0; i < position_.offset - start - 1; i++) { char32_t cur = TRY(_str[start + i]); if (cur != '\\') { // TODO: optimize this result = TRY(result.concat(_arena, &cur, 1)); continue; } ++i; if (i >= position_.offset - start) continue; char32_t next = TRY(_str[start + i]); if (next == '\0') { result = TRY(result.concat(_arena, "\0")); continue; } switch (next) { case 'b': result = TRY(result.concat(_arena, "\b")); continue; case 'f': result = TRY(result.concat(_arena, "\f")); continue; case 'n': result = TRY(result.concat(_arena, "\n")); continue; case 'r': result = TRY(result.concat(_arena, "\r")); continue; case 't': result = TRY(result.concat(_arena, "\t")); continue; case 'v': result = TRY(result.concat(_arena, "\v")); continue; } result = TRY(result.concat(_arena, "\\")); result = TRY(result.concat(_arena, &next, 1)); } if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError; return Value(std::move(result)); } Result Reader::read_number() { if (!is_numeric_start()) return ErrorCode::ReadError; size_t start = position_.offset; bool is_float = false; if (match('+') || match('-')) forward(); if (match('0')) { forward(); if (match('.')) { is_float = true; forward(); while (is_digit(get())) forward(); if (match('e') || match('E')) { if (!forward_exponent()) return ErrorCode::InvalidNumericLiteral; } } else if (match('e') || match('E')) { is_float = true; if (!forward_exponent()) return ErrorCode::InvalidNumericLiteral; } else if (match('x') || match('X')) { if (!forward_hex_number()) return ErrorCode::InvalidNumericLiteral; } else if (is_digit(get())) { do { forward(); } while (is_digit(get())); } } else { while (is_digit(get())) forward(); if (match('.')) { is_float = true; forward(); while (is_digit(get())) forward(); } if (match('e') || match('E')) { is_float = true; if (!forward_exponent()) return ErrorCode::InvalidNumericLiteral; } } Value res; if (position_.offset - start >= 32) return ErrorCode::InvalidNumericLiteral; char buf[32]; for (size_t i = 0; i < position_.offset - start; ++i) { buf[i] = char(TRY(_str[start + i])); } buf[position_.offset - start] = '\0'; if (is_float) { res = Value(TRY(Float::create(_arena, strtod(buf, 0)))); if (match("f64")) { forward(3); } } else { res = Value(TRY(Int64::create(_arena, strtoll(buf, 0, 10)))); } if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError; return res; } Result Reader::read_symbol() { if (!is_symbol_char(get())) return ErrorCode::ReadError; size_t start = position_.offset; while (is_symbol_char(get())) forward(); size_t end = position_.offset; String str = TRY(_str.sub(_arena, start, end)); if (!is_separator(get()) && !is_eof()) return ErrorCode::ReadError; return Value(TRY(Symbol::create(_arena, str))); } char32_t Reader::get(size_t offset) { size_t pos = position_.offset + offset; auto res = _str[pos]; if (res.has_value()) return *res; return 0; } bool Reader::is_eol() { return match('\n') || match('\r'); } bool Reader::is_eof() { return position_.offset == _str.size(); } bool Reader::is_whitespace() { return match(' ') || match('\t') || match('\v') || match('\f'); return false; } bool Reader::is_comment_start() { return match(';'); } bool Reader::is_string_start() { return match('"'); } bool Reader::is_symbol_start() { return is_symbol_char(get()); } bool Reader::is_numeric_start() { char32_t c = get(); if (is_digit(c)) return true; if (c == '.' && is_digit(get(1))) return true; if ((c == '+' || c == '-') && is_digit(get(1))) return true; if ((c == '+' || c == '-') && get(1) == '.' && is_digit(get(2))) return true; return false; } bool Reader::match(const char* str) { size_t slen = strlen(str); for (size_t i = 0; i < slen; i++) { if (get(i) != char32_t(str[i])) return false; } return true; } bool Reader::match(char c) { return get() == char32_t(c); } void Reader::forward(size_t n) { for (size_t i = 0; i < n; i++) { forward(); } } void Reader::forward() { if (is_eof()) { return; } if (is_eol()) { if (match("\r\n")) { position_.offset++; } position_.line++; position_.column = 1; } else { position_.column++; } position_.offset++; } void Reader::forward_whitespace() { while (true) { if (is_eol()) { forward(); } else if (is_whitespace()) { forward(); } else if (is_comment_start()) { forward(); do { forward(); } while (!is_eof() && !is_eol()); } else { break; } } } bool Reader::forward_decimal_number() { if (!is_digit(get())) return false; while (is_digit(get())) forward(); return true; } bool Reader::forward_hex_number() { if (!is_hex_digit(get())) return false; while (is_hex_digit(get())) forward(); return true; } bool Reader::forward_exponent() { if (!match('e') && !match('E')) return false; forward(); if (match('-') || match('+')) forward(); return forward_decimal_number(); } Result read_one(Arena& arena, Value& value) { if (!value.is()) return ErrorCode::TypeMismatch; auto r = Reader(arena, *value.to()); return r.read_one(); } Result read_one(Arena& arena, String& value) { auto r = Reader(arena, value); return r.read_one(); } Result read_one(Arena& arena, const char* value) { auto s = TRY(String::create(arena, value)); auto r = Reader(arena, s); return r.read_one(); }