2024-07-27 15:25:44 +00:00
|
|
|
#include "reader.hpp"
|
|
|
|
|
|
|
|
#include "common.hpp"
|
2024-07-27 22:13:59 +00:00
|
|
|
#include "error.hpp"
|
2024-07-27 15:25:44 +00:00
|
|
|
|
|
|
|
static bool is_digit(char32_t c) { return c >= '0' && c <= '9'; }
|
|
|
|
static bool is_alpha(char32_t c) {
|
|
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool is_hex_digit(char32_t c) {
|
|
|
|
return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool is_symbol_char(char32_t c) {
|
|
|
|
static char valid_char[] = {'!', '$', '%', '&', '*', '+', '-', '.', '/',
|
|
|
|
':', '<', '=', '>', '?', '@', '^', '_', '~'};
|
|
|
|
|
|
|
|
if (is_digit(c)) return true;
|
|
|
|
if (is_alpha(c)) return true;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < sizeof(valid_char); ++i) {
|
|
|
|
if (char32_t(valid_char[i]) == c) return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool is_whitespace(char32_t c) {
|
|
|
|
return c == ' ' || c == '\t' || c == '\v' || c == '\f';
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool is_brace(char32_t c) {
|
|
|
|
return c == '[' || c == ']' || c == '(' || c == ')' || c == '{' || c == '}';
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool is_newline(char32_t c) { return c == '\r' || c == '\n'; }
|
|
|
|
|
|
|
|
static bool is_separator(char32_t c) {
|
|
|
|
return is_whitespace(c) || is_brace(c) || is_newline(c) || c == ';';
|
|
|
|
}
|
|
|
|
|
|
|
|
#define EOS 0
|
|
|
|
|
|
|
|
Result<Value> Reader::read_one() {
|
|
|
|
forward_whitespace();
|
|
|
|
|
|
|
|
auto saved_position = position_;
|
|
|
|
if (is_numeric_start()) {
|
|
|
|
auto res = read_number();
|
|
|
|
if (res.has_value()) return res;
|
|
|
|
} else if (match("true") || match("false")) {
|
|
|
|
auto res = read_bool();
|
|
|
|
if (res.has_value()) return res;
|
|
|
|
} else if (is_string_start()) {
|
|
|
|
return read_string();
|
|
|
|
} else if (match('(')) {
|
|
|
|
return read_list();
|
|
|
|
} else if (match('[')) {
|
2024-07-27 22:13:59 +00:00
|
|
|
// TODO: implement array
|
|
|
|
// return read_array();
|
2024-08-10 10:17:20 +00:00
|
|
|
return ERROR(ReadError);
|
2024-07-27 15:25:44 +00:00
|
|
|
} else if (match('{')) {
|
2024-08-03 12:43:59 +00:00
|
|
|
return read_dict();
|
2024-08-10 10:17:20 +00:00
|
|
|
return ERROR(ReadError);
|
2024-07-27 15:25:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
position_ = saved_position;
|
|
|
|
if (is_symbol_start()) {
|
|
|
|
return read_symbol();
|
|
|
|
}
|
|
|
|
|
2024-08-10 10:17:20 +00:00
|
|
|
return ERROR(ReadError);
|
2024-07-27 15:25:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Result<Value> Reader::read_multiple() {
|
2024-08-09 22:45:06 +00:00
|
|
|
Value res = TRY(Nil::create());
|
2024-07-27 15:25:44 +00:00
|
|
|
|
|
|
|
while (1) {
|
|
|
|
forward_whitespace();
|
|
|
|
if (is_eof()) {
|
2024-08-09 22:45:06 +00:00
|
|
|
return reverse(res);
|
2024-07-27 15:25:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
auto val = TRY(read_one());
|
2024-08-09 22:45:06 +00:00
|
|
|
res = Value(TRY(Pair::create(val, res)));
|
2024-07-27 15:25:44 +00:00
|
|
|
}
|
|
|
|
|
2024-08-10 10:17:20 +00:00
|
|
|
return ERROR(ReadError);
|
2024-07-27 15:25:44 +00:00
|
|
|
}
|
2024-07-27 18:40:13 +00:00
|
|
|
|
|
|
|
Result<Value> Reader::read_list() {
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!match('(')) return ERROR(ReadError);
|
2024-07-27 18:40:13 +00:00
|
|
|
|
|
|
|
forward();
|
|
|
|
|
2024-08-09 22:45:06 +00:00
|
|
|
Value res = TRY(Nil::create());
|
2024-07-27 18:40:13 +00:00
|
|
|
|
|
|
|
while (1) {
|
|
|
|
forward_whitespace();
|
|
|
|
|
|
|
|
if (is_eof()) {
|
2024-08-10 10:17:20 +00:00
|
|
|
return ERROR(ReadError);
|
2024-07-27 18:40:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (match(')')) {
|
|
|
|
forward();
|
2024-08-09 22:45:06 +00:00
|
|
|
return reverse(res);
|
2024-07-27 18:40:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
auto val = TRY(read_one());
|
|
|
|
|
2024-08-09 22:45:06 +00:00
|
|
|
res = Value(TRY(Pair::create(val, res)));
|
2024-07-27 18:40:13 +00:00
|
|
|
}
|
|
|
|
|
2024-08-10 10:17:20 +00:00
|
|
|
return ERROR(ReadError);
|
2024-07-27 18:40:13 +00:00
|
|
|
}
|
|
|
|
|
2024-08-03 12:43:59 +00:00
|
|
|
Result<Value> Reader::read_dict() {
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!match('{')) return ERROR(ReadError);
|
2024-08-03 12:43:59 +00:00
|
|
|
|
|
|
|
forward();
|
|
|
|
|
2024-08-09 22:45:06 +00:00
|
|
|
auto res = TRY(Dict::create());
|
2024-08-03 12:43:59 +00:00
|
|
|
|
|
|
|
while (1) {
|
|
|
|
forward_whitespace();
|
|
|
|
|
|
|
|
if (is_eof()) {
|
2024-08-10 10:17:20 +00:00
|
|
|
return ERROR(ReadError);
|
2024-08-03 12:43:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (match('}')) {
|
|
|
|
forward();
|
|
|
|
return Value(std::move(res));
|
|
|
|
}
|
|
|
|
|
|
|
|
auto val1 = TRY(read_one());
|
|
|
|
auto val2 = TRY(read_one());
|
|
|
|
|
2024-08-09 22:45:06 +00:00
|
|
|
res = TRY(res.insert(val1, val2));
|
2024-08-03 12:43:59 +00:00
|
|
|
}
|
|
|
|
|
2024-08-10 10:17:20 +00:00
|
|
|
return ERROR(ReadError);
|
2024-08-03 12:43:59 +00:00
|
|
|
}
|
2024-07-27 18:40:13 +00:00
|
|
|
Result<Value> Reader::read_bool() {
|
|
|
|
if (match("true")) {
|
|
|
|
forward(4);
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!is_separator(get()) && !is_eof()) return ERROR(ReadError);
|
2024-08-09 22:45:06 +00:00
|
|
|
return Value(TRY(Bool::create(true)));
|
2024-07-27 18:40:13 +00:00
|
|
|
}
|
|
|
|
if (match("false")) {
|
|
|
|
forward(5);
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!is_separator(get()) && !is_eof()) return ERROR(ReadError);
|
2024-08-09 22:45:06 +00:00
|
|
|
return Value(TRY(Bool::create(false)));
|
2024-07-27 18:40:13 +00:00
|
|
|
}
|
|
|
|
|
2024-08-10 10:17:20 +00:00
|
|
|
return ERROR(ReadError);
|
2024-07-27 18:40:13 +00:00
|
|
|
}
|
2024-07-27 22:13:59 +00:00
|
|
|
|
|
|
|
Result<Value> Reader::read_string() {
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!match('"')) return ERROR(ReadError);
|
2024-07-27 22:13:59 +00:00
|
|
|
size_t start = position_.offset + 1;
|
|
|
|
|
|
|
|
forward();
|
|
|
|
|
|
|
|
while (!match('"') && !match('\r') && !match('\n') && !is_eof()) {
|
|
|
|
if (match('\\')) {
|
|
|
|
forward();
|
|
|
|
}
|
|
|
|
|
|
|
|
forward();
|
|
|
|
}
|
|
|
|
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!match('"')) return ERROR(UnterminatedStringLiteral);
|
2024-07-27 22:13:59 +00:00
|
|
|
|
|
|
|
forward();
|
|
|
|
|
2024-08-09 22:45:06 +00:00
|
|
|
String result = TRY(String::create(""));
|
2024-07-27 22:13:59 +00:00
|
|
|
|
|
|
|
for (size_t i = 0; i < position_.offset - start - 1; i++) {
|
|
|
|
char32_t cur = TRY(_str[start + i]);
|
|
|
|
if (cur != '\\') {
|
|
|
|
// TODO: optimize this
|
2024-08-09 22:45:06 +00:00
|
|
|
result = TRY(result.concat(&cur, 1));
|
2024-07-27 22:13:59 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
++i;
|
|
|
|
if (i >= position_.offset - start) continue;
|
|
|
|
char32_t next = TRY(_str[start + i]);
|
|
|
|
|
|
|
|
if (next == '\0') {
|
2024-08-09 22:45:06 +00:00
|
|
|
result = TRY(result.concat("\0"));
|
2024-07-27 22:13:59 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
switch (next) {
|
|
|
|
case 'b':
|
2024-08-09 22:45:06 +00:00
|
|
|
result = TRY(result.concat("\b"));
|
2024-07-27 22:13:59 +00:00
|
|
|
continue;
|
|
|
|
case 'f':
|
2024-08-09 22:45:06 +00:00
|
|
|
result = TRY(result.concat("\f"));
|
2024-07-27 22:13:59 +00:00
|
|
|
continue;
|
|
|
|
case 'n':
|
2024-08-09 22:45:06 +00:00
|
|
|
result = TRY(result.concat("\n"));
|
2024-07-27 22:13:59 +00:00
|
|
|
continue;
|
|
|
|
case 'r':
|
2024-08-09 22:45:06 +00:00
|
|
|
result = TRY(result.concat("\r"));
|
2024-07-27 22:13:59 +00:00
|
|
|
continue;
|
|
|
|
case 't':
|
2024-08-09 22:45:06 +00:00
|
|
|
result = TRY(result.concat("\t"));
|
2024-07-27 22:13:59 +00:00
|
|
|
continue;
|
|
|
|
case 'v':
|
2024-08-09 22:45:06 +00:00
|
|
|
result = TRY(result.concat("\v"));
|
2024-07-27 22:13:59 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2024-08-09 22:45:06 +00:00
|
|
|
result = TRY(result.concat("\\"));
|
|
|
|
result = TRY(result.concat(&next, 1));
|
2024-07-27 22:13:59 +00:00
|
|
|
}
|
|
|
|
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!is_separator(get()) && !is_eof()) return ERROR(ReadError);
|
2024-07-27 22:13:59 +00:00
|
|
|
|
|
|
|
return Value(std::move(result));
|
|
|
|
}
|
|
|
|
|
|
|
|
Result<Value> Reader::read_number() {
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!is_numeric_start()) return ERROR(ReadError);
|
2024-07-27 22:13:59 +00:00
|
|
|
size_t start = position_.offset;
|
|
|
|
|
|
|
|
bool is_float = false;
|
|
|
|
|
|
|
|
if (match('+') || match('-')) forward();
|
|
|
|
|
|
|
|
if (match('0')) {
|
|
|
|
forward();
|
|
|
|
if (match('.')) {
|
|
|
|
is_float = true;
|
|
|
|
forward();
|
|
|
|
while (is_digit(get())) forward();
|
|
|
|
|
|
|
|
if (match('e') || match('E')) {
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!forward_exponent()) return ERROR(InvalidNumericLiteral);
|
2024-07-27 22:13:59 +00:00
|
|
|
}
|
|
|
|
} else if (match('e') || match('E')) {
|
|
|
|
is_float = true;
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!forward_exponent()) return ERROR(InvalidNumericLiteral);
|
2024-07-27 22:13:59 +00:00
|
|
|
} else if (match('x') || match('X')) {
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!forward_hex_number()) return ERROR(InvalidNumericLiteral);
|
2024-07-27 22:13:59 +00:00
|
|
|
} else if (is_digit(get())) {
|
|
|
|
do {
|
|
|
|
forward();
|
|
|
|
} while (is_digit(get()));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
while (is_digit(get())) forward();
|
|
|
|
if (match('.')) {
|
|
|
|
is_float = true;
|
|
|
|
forward();
|
|
|
|
while (is_digit(get())) forward();
|
|
|
|
}
|
|
|
|
if (match('e') || match('E')) {
|
|
|
|
is_float = true;
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!forward_exponent()) return ERROR(InvalidNumericLiteral);
|
2024-07-27 22:13:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Value res;
|
|
|
|
|
2024-08-10 10:17:20 +00:00
|
|
|
if (position_.offset - start >= 32) return ERROR(InvalidNumericLiteral);
|
2024-07-27 22:13:59 +00:00
|
|
|
|
|
|
|
char buf[32];
|
|
|
|
for (size_t i = 0; i < position_.offset - start; ++i) {
|
|
|
|
buf[i] = char(TRY(_str[start + i]));
|
|
|
|
}
|
|
|
|
buf[position_.offset - start] = '\0';
|
|
|
|
|
|
|
|
if (is_float) {
|
2024-08-09 22:45:06 +00:00
|
|
|
res = Value(TRY(Float::create(strtod(buf, 0))));
|
2024-07-27 22:13:59 +00:00
|
|
|
if (match("f64")) {
|
|
|
|
forward(3);
|
|
|
|
}
|
|
|
|
} else {
|
2024-08-09 22:45:06 +00:00
|
|
|
res = Value(TRY(Int64::create(strtoll(buf, 0, 10))));
|
2024-07-27 22:13:59 +00:00
|
|
|
}
|
|
|
|
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!is_separator(get()) && !is_eof()) return ERROR(ReadError);
|
2024-07-27 22:13:59 +00:00
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
Result<Value> Reader::read_symbol() {
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!is_symbol_char(get())) return ERROR(ReadError);
|
2024-07-27 22:13:59 +00:00
|
|
|
|
|
|
|
size_t start = position_.offset;
|
|
|
|
|
|
|
|
while (is_symbol_char(get())) forward();
|
|
|
|
|
|
|
|
size_t end = position_.offset;
|
|
|
|
|
2024-08-13 00:11:23 +00:00
|
|
|
String str = TRY(_str.slice(start, end));
|
2024-07-27 22:13:59 +00:00
|
|
|
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!is_separator(get()) && !is_eof()) return ERROR(ReadError);
|
2024-07-27 22:13:59 +00:00
|
|
|
|
2024-08-09 22:45:06 +00:00
|
|
|
return Value(TRY(Symbol::create(str)));
|
2024-07-27 22:13:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
char32_t Reader::get(size_t offset) {
|
|
|
|
size_t pos = position_.offset + offset;
|
|
|
|
auto res = _str[pos];
|
|
|
|
if (res.has_value()) return *res;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Reader::is_eol() { return match('\n') || match('\r'); }
|
|
|
|
|
|
|
|
bool Reader::is_eof() { return position_.offset == _str.size(); }
|
|
|
|
|
|
|
|
bool Reader::is_whitespace() {
|
|
|
|
return match(' ') || match('\t') || match('\v') || match('\f');
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Reader::is_comment_start() { return match(';'); }
|
|
|
|
|
|
|
|
bool Reader::is_string_start() { return match('"'); }
|
|
|
|
|
|
|
|
bool Reader::is_symbol_start() { return is_symbol_char(get()); }
|
|
|
|
|
|
|
|
bool Reader::is_numeric_start() {
|
|
|
|
char32_t c = get();
|
|
|
|
if (is_digit(c)) return true;
|
|
|
|
if (c == '.' && is_digit(get(1))) return true;
|
|
|
|
if ((c == '+' || c == '-') && is_digit(get(1))) return true;
|
|
|
|
if ((c == '+' || c == '-') && get(1) == '.' && is_digit(get(2))) return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Reader::match(const char* str) {
|
|
|
|
size_t slen = strlen(str);
|
|
|
|
for (size_t i = 0; i < slen; i++) {
|
|
|
|
if (get(i) != char32_t(str[i])) return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Reader::match(char c) { return get() == char32_t(c); }
|
|
|
|
|
|
|
|
void Reader::forward(size_t n) {
|
|
|
|
for (size_t i = 0; i < n; i++) {
|
|
|
|
forward();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reader::forward() {
|
|
|
|
if (is_eof()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_eol()) {
|
|
|
|
if (match("\r\n")) {
|
|
|
|
position_.offset++;
|
|
|
|
}
|
|
|
|
position_.line++;
|
|
|
|
position_.column = 1;
|
|
|
|
} else {
|
|
|
|
position_.column++;
|
|
|
|
}
|
|
|
|
|
|
|
|
position_.offset++;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reader::forward_whitespace() {
|
|
|
|
while (true) {
|
|
|
|
if (is_eol()) {
|
|
|
|
forward();
|
|
|
|
} else if (is_whitespace()) {
|
|
|
|
forward();
|
|
|
|
} else if (is_comment_start()) {
|
|
|
|
forward();
|
|
|
|
do {
|
|
|
|
forward();
|
|
|
|
} while (!is_eof() && !is_eol());
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Reader::forward_decimal_number() {
|
|
|
|
if (!is_digit(get())) return false;
|
|
|
|
|
|
|
|
while (is_digit(get())) forward();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Reader::forward_hex_number() {
|
|
|
|
if (!is_hex_digit(get())) return false;
|
|
|
|
|
|
|
|
while (is_hex_digit(get())) forward();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Reader::forward_exponent() {
|
|
|
|
if (!match('e') && !match('E')) return false;
|
|
|
|
forward();
|
|
|
|
|
|
|
|
if (match('-') || match('+')) forward();
|
|
|
|
|
|
|
|
return forward_decimal_number();
|
|
|
|
}
|
2024-08-03 12:43:59 +00:00
|
|
|
|
2024-08-26 12:16:05 +00:00
|
|
|
Result<Value> read_one(const Value& value) {
|
2024-08-10 10:17:20 +00:00
|
|
|
if (!value.is<String>()) return ERROR(TypeMismatch);
|
2024-08-09 22:45:06 +00:00
|
|
|
auto r = Reader(*value.to<String>());
|
2024-08-03 12:43:59 +00:00
|
|
|
return r.read_one();
|
|
|
|
}
|
2024-08-26 12:16:05 +00:00
|
|
|
Result<Value> read_one(const String& value) {
|
2024-08-09 22:45:06 +00:00
|
|
|
auto r = Reader(value);
|
2024-08-03 12:43:59 +00:00
|
|
|
return r.read_one();
|
|
|
|
}
|
2024-08-09 22:45:06 +00:00
|
|
|
Result<Value> read_one(const char* value) {
|
|
|
|
auto s = TRY(String::create(value));
|
|
|
|
auto r = Reader(s);
|
2024-08-03 12:43:59 +00:00
|
|
|
return r.read_one();
|
|
|
|
}
|
2024-08-18 21:08:47 +00:00
|
|
|
|
2024-08-26 12:16:05 +00:00
|
|
|
Result<Value> read_multiple(const String& value) {
|
2024-08-18 21:08:47 +00:00
|
|
|
auto r = Reader(value);
|
|
|
|
return r.read_multiple();
|
|
|
|
}
|