valeri/src/reader.cpp

538 lines
11 KiB
C++

#include "reader.hpp"
#include "common.hpp"
#include "error.hpp"
static bool is_digit(char32_t c) { return c >= '0' && c <= '9'; }
static bool is_alpha(char32_t c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
static bool is_hex_digit(char32_t c) {
return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
static bool is_symbol_char(char32_t c) {
static char valid_char[] = {'!', '$', '%', '&', '*', '+', '-', '.', '/',
':', '<', '=', '>', '?', '@', '^', '_', '~'};
if (is_digit(c)) return true;
if (is_alpha(c)) return true;
for (size_t i = 0; i < sizeof(valid_char); ++i) {
if (char32_t(valid_char[i]) == c) return true;
}
return false;
}
static bool is_whitespace(char32_t c) {
return c == ' ' || c == '\t' || c == '\v' || c == '\f';
}
static bool is_brace(char32_t c) {
return c == '[' || c == ']' || c == '(' || c == ')' || c == '{' || c == '}';
}
static bool is_newline(char32_t c) { return c == '\r' || c == '\n'; }
static bool is_separator(char32_t c) {
return is_whitespace(c) || is_brace(c) || is_newline(c) || c == ';';
}
#define EOS 0
Result<Value> Reader::read_one() {
forward_whitespace();
auto saved_position = position_;
if (is_numeric_start()) {
auto res = read_number();
if (res.has_value()) return res;
} else if (match("true") || match("false")) {
auto res = read_bool();
if (res.has_value()) return res;
} else if (match("nil")) {
auto res = read_nil();
if (res.has_value()) return res;
} else if (is_string_start()) {
return read_string();
} else if (match('(')) {
return read_list();
} else if (match('[')) {
return read_array();
} else if (match('{')) {
return read_dict();
}
position_ = saved_position;
if (is_symbol_start()) {
return read_symbol();
}
return ERROR(ReadError);
}
Result<Value> Reader::read_multiple() {
Value res = TRY(Nil::create());
while (1) {
forward_whitespace();
if (is_eof()) {
return reverse(res);
}
auto val = TRY(read_one());
res = Value(TRY(Pair::create(val, res)));
}
return ERROR(ReadError);
}
Result<Value> Reader::read_list() {
if (!match('(')) return ERROR(ReadError);
forward();
Value res = TRY(Nil::create());
while (1) {
forward_whitespace();
if (is_eof()) {
return ERROR(ReadError);
}
if (match(')')) {
forward();
return reverse(res);
}
auto val = TRY(read_one());
res = Value(TRY(Pair::create(val, res)));
}
return ERROR(ReadError);
}
Result<Value> Reader::read_array_code() {
if (!match('[')) return ERROR(ReadError);
forward();
auto sym = Value(TRY(Symbol::create("array")));
auto res = Value(TRY(Pair::create(sym, TRY(Nil::create()))));
while (1) {
forward_whitespace();
if (is_eof()) {
return ERROR(ReadError);
}
if (match(']')) {
forward();
return reverse(res);
}
auto val = TRY(read_one());
res = TRY(Pair::create(val, res));
}
return ERROR(ReadError);
}
Result<Value> Reader::read_array_data() {
if (!match('[')) return ERROR(ReadError);
forward();
auto res = TRY(Array::create());
while (1) {
forward_whitespace();
if (is_eof()) {
return ERROR(ReadError);
}
if (match(']')) {
forward();
return Value(std::move(res));
}
auto val = TRY(read_one());
res = TRY(res.append(val));
}
return ERROR(ReadError);
}
Result<Value> Reader::read_array() {
if (_as_code) return read_array_code();
return read_array_data();
}
Result<Value> Reader::read_dict_code() {
if (!match('{')) return ERROR(ReadError);
forward();
auto sym = Value(TRY(Symbol::create("dict")));
auto res = Value(TRY(Pair::create(sym, TRY(Nil::create()))));
while (1) {
forward_whitespace();
if (is_eof()) {
return ERROR(ReadError);
}
if (match('}')) {
forward();
return reverse(res);
}
auto val1 = TRY(read_one());
auto val2 = TRY(read_one());
res = TRY(Pair::create(val1, res));
res = TRY(Pair::create(val2, res));
}
return ERROR(ReadError);
}
Result<Value> Reader::read_dict_data() {
if (!match('{')) return ERROR(ReadError);
forward();
auto res = TRY(Dict::create());
while (1) {
forward_whitespace();
if (is_eof()) {
return ERROR(ReadError);
}
if (match('}')) {
forward();
return Value(std::move(res));
}
auto val1 = TRY(read_one());
auto val2 = TRY(read_one());
res = TRY(res.insert(val1, val2));
}
return ERROR(ReadError);
}
Result<Value> Reader::read_dict() {
if (_as_code) return read_dict_code();
return read_dict_data();
}
Result<Value> Reader::read_bool() {
if (match("true")) {
forward(4);
if (!is_separator(get()) && !is_eof()) return ERROR(ReadError);
return Value(TRY(Bool::create(true)));
}
if (match("false")) {
forward(5);
if (!is_separator(get()) && !is_eof()) return ERROR(ReadError);
return Value(TRY(Bool::create(false)));
}
return ERROR(ReadError);
}
Result<Value> Reader::read_nil() {
if (match("nil")) {
forward(3);
if (!is_separator(get()) && !is_eof()) return ERROR(ReadError);
return Value(TRY(Nil::create()));
}
return ERROR(ReadError);
}
Result<Value> Reader::read_string() {
if (!match('"')) return ERROR(ReadError);
size_t start = position_.offset + 1;
forward();
while (!match('"') && !match('\r') && !match('\n') && !is_eof()) {
if (match('\\')) {
forward();
}
forward();
}
if (!match('"')) return ERROR(UnterminatedStringLiteral);
forward();
String result = TRY(String::create(""));
for (size_t i = 0; i < position_.offset - start - 1; i++) {
char32_t cur = TRY(_str[start + i]);
if (cur != '\\') {
// TODO: optimize this
result = TRY(result.concat(&cur, 1));
continue;
}
++i;
if (i >= position_.offset - start) continue;
char32_t next = TRY(_str[start + i]);
if (next == '\0') {
result = TRY(result.concat("\0"));
continue;
}
switch (next) {
case 'b':
result = TRY(result.concat("\b"));
continue;
case 'f':
result = TRY(result.concat("\f"));
continue;
case 'n':
result = TRY(result.concat("\n"));
continue;
case 'r':
result = TRY(result.concat("\r"));
continue;
case 't':
result = TRY(result.concat("\t"));
continue;
case 'v':
result = TRY(result.concat("\v"));
continue;
}
result = TRY(result.concat("\\"));
result = TRY(result.concat(&next, 1));
}
if (!is_separator(get()) && !is_eof()) return ERROR(ReadError);
return Value(std::move(result));
}
Result<Value> Reader::read_number() {
if (!is_numeric_start()) return ERROR(ReadError);
size_t start = position_.offset;
bool is_float = false;
if (match('+') || match('-')) forward();
if (match('0')) {
forward();
if (match('.')) {
is_float = true;
forward();
while (is_digit(get())) forward();
if (match('e') || match('E')) {
if (!forward_exponent()) return ERROR(InvalidNumericLiteral);
}
} else if (match('e') || match('E')) {
is_float = true;
if (!forward_exponent()) return ERROR(InvalidNumericLiteral);
} else if (match('x') || match('X')) {
if (!forward_hex_number()) return ERROR(InvalidNumericLiteral);
} else if (is_digit(get())) {
do {
forward();
} while (is_digit(get()));
}
} else {
while (is_digit(get())) forward();
if (match('.')) {
is_float = true;
forward();
while (is_digit(get())) forward();
}
if (match('e') || match('E')) {
is_float = true;
if (!forward_exponent()) return ERROR(InvalidNumericLiteral);
}
}
Value res;
if (position_.offset - start >= 32) return ERROR(InvalidNumericLiteral);
char buf[32];
for (size_t i = 0; i < position_.offset - start; ++i) {
buf[i] = char(TRY(_str[start + i]));
}
buf[position_.offset - start] = '\0';
if (is_float) {
res = Value(TRY(Float::create(strtod(buf, 0))));
if (match("f64")) {
forward(3);
}
} else {
res = Value(TRY(Int64::create(strtoll(buf, 0, 10))));
}
if (!is_separator(get()) && !is_eof()) return ERROR(ReadError);
return res;
}
Result<Value> Reader::read_symbol() {
if (!is_symbol_char(get())) return ERROR(ReadError);
size_t start = position_.offset;
while (is_symbol_char(get())) forward();
size_t end = position_.offset;
String str = TRY(_str.slice(start, end));
if (!is_separator(get()) && !is_eof()) return ERROR(ReadError);
return Value(TRY(Symbol::create(str)));
}
char32_t Reader::get(size_t offset) {
size_t pos = position_.offset + offset;
auto res = _str[pos];
if (res.has_value()) return *res;
return 0;
}
bool Reader::is_eol() { return match('\n') || match('\r'); }
bool Reader::is_eof() { return position_.offset == _str.size(); }
bool Reader::is_whitespace() {
return match(' ') || match('\t') || match('\v') || match('\f');
return false;
}
bool Reader::is_comment_start() { return match(';'); }
bool Reader::is_string_start() { return match('"'); }
bool Reader::is_symbol_start() { return is_symbol_char(get()); }
bool Reader::is_numeric_start() {
char32_t c = get();
if (is_digit(c)) return true;
if (c == '.' && is_digit(get(1))) return true;
if ((c == '+' || c == '-') && is_digit(get(1))) return true;
if ((c == '+' || c == '-') && get(1) == '.' && is_digit(get(2))) return true;
return false;
}
bool Reader::match(const char* str) {
size_t slen = strlen(str);
for (size_t i = 0; i < slen; i++) {
if (get(i) != char32_t(str[i])) return false;
}
return true;
}
bool Reader::match(char c) { return get() == char32_t(c); }
void Reader::forward(size_t n) {
for (size_t i = 0; i < n; i++) {
forward();
}
}
void Reader::forward() {
if (is_eof()) {
return;
}
if (is_eol()) {
if (match("\r\n")) {
position_.offset++;
}
position_.line++;
position_.column = 1;
} else {
position_.column++;
}
position_.offset++;
}
void Reader::forward_whitespace() {
while (true) {
if (is_eol()) {
forward();
} else if (is_whitespace()) {
forward();
} else if (is_comment_start()) {
forward();
do {
forward();
} while (!is_eof() && !is_eol());
} else {
break;
}
}
}
bool Reader::forward_decimal_number() {
if (!is_digit(get())) return false;
while (is_digit(get())) forward();
return true;
}
bool Reader::forward_hex_number() {
if (!is_hex_digit(get())) return false;
while (is_hex_digit(get())) forward();
return true;
}
bool Reader::forward_exponent() {
if (!match('e') && !match('E')) return false;
forward();
if (match('-') || match('+')) forward();
return forward_decimal_number();
}
Result<Value> read_one(const Value& value, bool as_code) {
if (!value.is<String>()) return ERROR(TypeMismatch);
auto r = Reader(*value.to<String>(), as_code);
return r.read_one();
}
Result<Value> read_one(const String& value, bool as_code) {
auto r = Reader(value, as_code);
return r.read_one();
}
Result<Value> read_one(const char* value, bool as_code) {
auto s = TRY(String::create(value));
auto r = Reader(s, as_code);
return r.read_one();
}
Result<Value> read_multiple(const String& value, bool as_code) {
auto r = Reader(value, as_code);
return r.read_multiple();
}