#include "reader.hpp" #include "common.hpp" #include "error.hpp" static bool is_digit(char32_t c) { return c >= '0' && c <= '9'; } static bool is_alpha(char32_t c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } static bool is_hex_digit(char32_t c) { return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } static bool is_symbol_char(char32_t c) { static char valid_char[] = {'!', '$', '%', '&', '*', '+', '-', '.', '/', ':', '<', '=', '>', '?', '@', '^', '_', '~'}; if (is_digit(c)) return true; if (is_alpha(c)) return true; for (size_t i = 0; i < sizeof(valid_char); ++i) { if (char32_t(valid_char[i]) == c) return true; } return false; } static bool is_whitespace(char32_t c) { return c == ' ' || c == '\t' || c == '\v' || c == '\f'; } static bool is_brace(char32_t c) { return c == '[' || c == ']' || c == '(' || c == ')' || c == '{' || c == '}'; } static bool is_newline(char32_t c) { return c == '\r' || c == '\n'; } static bool is_separator(char32_t c) { return is_whitespace(c) || is_brace(c) || is_newline(c) || c == ';'; } #define EOS 0 Result Reader::read_one() { forward_whitespace(); auto saved_position = position_; if (is_numeric_start()) { auto res = read_number(); if (res.has_value()) return res; } else if (match("true") || match("false")) { auto res = read_bool(); if (res.has_value()) return res; } else if (match("nil")) { auto res = read_nil(); if (res.has_value()) return res; } else if (is_string_start()) { return read_string(); } else if (match('(')) { return read_list(); } else if (match('[')) { return read_array(); } else if (match('{')) { return read_dict(); } position_ = saved_position; if (is_symbol_start()) { return read_symbol(); } return ERROR(ReadError); } Result Reader::read_multiple() { Value res = TRY(Nil::create()); while (1) { forward_whitespace(); if (is_eof()) { return reverse(res); } auto val = TRY(read_one()); res = Value(TRY(Pair::create(val, res))); } return ERROR(ReadError); } Result Reader::read_list() { if (!match('(')) return ERROR(ReadError); forward(); Value res = TRY(Nil::create()); while (1) { forward_whitespace(); if (is_eof()) { return ERROR(ReadError); } if (match(')')) { forward(); return reverse(res); } auto val = TRY(read_one()); res = Value(TRY(Pair::create(val, res))); } return ERROR(ReadError); } Result Reader::read_array() { if (!match('[')) return ERROR(ReadError); forward(); auto res = TRY(Array::create()); while (1) { forward_whitespace(); if (is_eof()) { return ERROR(ReadError); } if (match(']')) { forward(); return Value(std::move(res)); } auto val = TRY(read_one()); res = TRY(res.append(val)); } return ERROR(ReadError); } Result Reader::read_dict() { if (!match('{')) return ERROR(ReadError); forward(); auto res = TRY(Dict::create()); while (1) { forward_whitespace(); if (is_eof()) { return ERROR(ReadError); } if (match('}')) { forward(); return Value(std::move(res)); } auto val1 = TRY(read_one()); auto val2 = TRY(read_one()); res = TRY(res.insert(val1, val2)); } return ERROR(ReadError); } Result Reader::read_bool() { if (match("true")) { forward(4); if (!is_separator(get()) && !is_eof()) return ERROR(ReadError); return Value(TRY(Bool::create(true))); } if (match("false")) { forward(5); if (!is_separator(get()) && !is_eof()) return ERROR(ReadError); return Value(TRY(Bool::create(false))); } return ERROR(ReadError); } Result Reader::read_nil() { if (match("nil")) { forward(3); if (!is_separator(get()) && !is_eof()) return ERROR(ReadError); return Value(TRY(Nil::create())); } return ERROR(ReadError); } Result Reader::read_string() { if (!match('"')) return ERROR(ReadError); size_t start = position_.offset + 1; forward(); while (!match('"') && !match('\r') && !match('\n') && !is_eof()) { if (match('\\')) { forward(); } forward(); } if (!match('"')) return ERROR(UnterminatedStringLiteral); forward(); String result = TRY(String::create("")); for (size_t i = 0; i < position_.offset - start - 1; i++) { char32_t cur = TRY(_str[start + i]); if (cur != '\\') { // TODO: optimize this result = TRY(result.concat(&cur, 1)); continue; } ++i; if (i >= position_.offset - start) continue; char32_t next = TRY(_str[start + i]); if (next == '\0') { result = TRY(result.concat("\0")); continue; } switch (next) { case 'b': result = TRY(result.concat("\b")); continue; case 'f': result = TRY(result.concat("\f")); continue; case 'n': result = TRY(result.concat("\n")); continue; case 'r': result = TRY(result.concat("\r")); continue; case 't': result = TRY(result.concat("\t")); continue; case 'v': result = TRY(result.concat("\v")); continue; } result = TRY(result.concat("\\")); result = TRY(result.concat(&next, 1)); } if (!is_separator(get()) && !is_eof()) return ERROR(ReadError); return Value(std::move(result)); } Result Reader::read_number() { if (!is_numeric_start()) return ERROR(ReadError); size_t start = position_.offset; bool is_float = false; if (match('+') || match('-')) forward(); if (match('0')) { forward(); if (match('.')) { is_float = true; forward(); while (is_digit(get())) forward(); if (match('e') || match('E')) { if (!forward_exponent()) return ERROR(InvalidNumericLiteral); } } else if (match('e') || match('E')) { is_float = true; if (!forward_exponent()) return ERROR(InvalidNumericLiteral); } else if (match('x') || match('X')) { if (!forward_hex_number()) return ERROR(InvalidNumericLiteral); } else if (is_digit(get())) { do { forward(); } while (is_digit(get())); } } else { while (is_digit(get())) forward(); if (match('.')) { is_float = true; forward(); while (is_digit(get())) forward(); } if (match('e') || match('E')) { is_float = true; if (!forward_exponent()) return ERROR(InvalidNumericLiteral); } } Value res; if (position_.offset - start >= 32) return ERROR(InvalidNumericLiteral); char buf[32]; for (size_t i = 0; i < position_.offset - start; ++i) { buf[i] = char(TRY(_str[start + i])); } buf[position_.offset - start] = '\0'; if (is_float) { res = Value(TRY(Float::create(strtod(buf, 0)))); if (match("f64")) { forward(3); } } else { res = Value(TRY(Int64::create(strtoll(buf, 0, 10)))); } if (!is_separator(get()) && !is_eof()) return ERROR(ReadError); return res; } Result Reader::read_symbol() { if (!is_symbol_char(get())) return ERROR(ReadError); size_t start = position_.offset; while (is_symbol_char(get())) forward(); size_t end = position_.offset; String str = TRY(_str.slice(start, end)); if (!is_separator(get()) && !is_eof()) return ERROR(ReadError); return Value(TRY(Symbol::create(str))); } char32_t Reader::get(size_t offset) { size_t pos = position_.offset + offset; auto res = _str[pos]; if (res.has_value()) return *res; return 0; } bool Reader::is_eol() { return match('\n') || match('\r'); } bool Reader::is_eof() { return position_.offset == _str.size(); } bool Reader::is_whitespace() { return match(' ') || match('\t') || match('\v') || match('\f'); return false; } bool Reader::is_comment_start() { return match(';'); } bool Reader::is_string_start() { return match('"'); } bool Reader::is_symbol_start() { return is_symbol_char(get()); } bool Reader::is_numeric_start() { char32_t c = get(); if (is_digit(c)) return true; if (c == '.' && is_digit(get(1))) return true; if ((c == '+' || c == '-') && is_digit(get(1))) return true; if ((c == '+' || c == '-') && get(1) == '.' && is_digit(get(2))) return true; return false; } bool Reader::match(const char* str) { size_t slen = strlen(str); for (size_t i = 0; i < slen; i++) { if (get(i) != char32_t(str[i])) return false; } return true; } bool Reader::match(char c) { return get() == char32_t(c); } void Reader::forward(size_t n) { for (size_t i = 0; i < n; i++) { forward(); } } void Reader::forward() { if (is_eof()) { return; } if (is_eol()) { if (match("\r\n")) { position_.offset++; } position_.line++; position_.column = 1; } else { position_.column++; } position_.offset++; } void Reader::forward_whitespace() { while (true) { if (is_eol()) { forward(); } else if (is_whitespace()) { forward(); } else if (is_comment_start()) { forward(); do { forward(); } while (!is_eof() && !is_eol()); } else { break; } } } bool Reader::forward_decimal_number() { if (!is_digit(get())) return false; while (is_digit(get())) forward(); return true; } bool Reader::forward_hex_number() { if (!is_hex_digit(get())) return false; while (is_hex_digit(get())) forward(); return true; } bool Reader::forward_exponent() { if (!match('e') && !match('E')) return false; forward(); if (match('-') || match('+')) forward(); return forward_decimal_number(); } Result read_one(const Value& value) { if (!value.is()) return ERROR(TypeMismatch); auto r = Reader(*value.to()); return r.read_one(); } Result read_one(const String& value) { auto r = Reader(value); return r.read_one(); } Result read_one(const char* value) { auto s = TRY(String::create(value)); auto r = Reader(s); return r.read_one(); } Result read_multiple(const String& value) { auto r = Reader(value); return r.read_multiple(); }