Initial commit - add basic boilerplate for POD types

This commit is contained in:
Konstantin Nazarov 2024-07-19 02:29:30 +01:00
commit 8ff01726cc
Signed by: knazarov
GPG key ID: 4CFE0A42FA409C22
17 changed files with 612 additions and 0 deletions

4
.clang-format Normal file
View file

@ -0,0 +1,4 @@
---
BasedOnStyle: Google
...

2
.clangd Normal file
View file

@ -0,0 +1,2 @@
CompileFlags:
Add: [-xc++, -std=c++23, -Isrc/]

20
.gitignore vendored Normal file
View file

@ -0,0 +1,20 @@
*.o
*.a
*.gcda
*.gcno
*.gcov
lcov.info
test/*.test
vm
lisp
asm
sd
compile_commands.json
.cache
CMakeFiles
cmake_install.cmake
CMakeCache.txt
CTestTestfile.cmake
Testing
result
build

52
CMakeLists.txt Normal file
View file

@ -0,0 +1,52 @@
cmake_minimum_required(VERSION 3.28)
project(valeri) # vli
set(CMAKE_CXX_STANDARD 23)
set(CMAKE_EXPORT_COMPILE_COMMANDS true)
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
set (CMAKE_CXX_FLAGS "-fno-exceptions -static-libgcc -static-libstdc++ -Werror -Wall -Wunused-result -Wno-unused-function -Wno-unused-variable -fno-omit-frame-pointer -fsanitize=address -Wno-c99-designator")
add_library(vm_lib)
target_sources(vm_lib
PRIVATE
src/vm.cpp
src/common.cpp
PUBLIC
FILE_SET HEADERS
BASE_DIRS src
FILES
src/vm.hpp
src/common.hpp
)
add_executable(vli src/vli.cpp)
target_link_libraries(vli vm_lib)
install(TARGETS vli)
# TESTING
enable_testing()
set(CPP_TESTS
)
foreach(testname IN LISTS CPP_TESTS)
add_executable("test_${testname}")
target_sources("test_${testname}" PRIVATE test/${testname}.cpp
PUBLIC
FILE_SET HEADERS
BASE_DIRS test
FILES
test/test.hpp)
target_link_libraries("test_${testname}" vm_lib)
add_test(
NAME "test_${testname}"
COMMAND $<TARGET_FILE:test_${testname}>
)
endforeach()

11
LICENSE Normal file
View file

@ -0,0 +1,11 @@
Copyright 2023 Konstantin Nazarov
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

94
README.md Normal file
View file

@ -0,0 +1,94 @@
# Experimental Lisp interpreter
This is a Lisp interpreter, that I'm writing to test a few ideas and try to make a language
that I would want to use for personal projects.
Note: project is in very early stage.
Features that I would like to see (in no particular order):
- small and simple core with focus on correctness/security
- Erlang-style concurrency and message-passing instead of threading/async-await
- per-actor garbage collectors
- native support for immutable data structures
- fast startup
- 64-bit only
- ability to produce small redistributable binaries
- persistence primitives out-of-the-box
## Rationale
I find that writing something simple in popular programming languages takes an ungodly amount of time.
Mostly because those languages try to cater to a very wide audience. If I have a need to glue a couple
systems together, I need to pull at least a dozen dependencies, and keep them up to date all the time.
Case in point: one day I needed a simple script that would crawl a github organization, collect
information from there about open pull requests, create a dashboard that presents the information
in the way I want, and occasionally pings people on open issues and pull requests.
Sounds simple, right? Well, not so fast. You cannot crawl GitHub all the time, because it has request
limits. So you need to run a daemon in background, and update the currently synchronized state. Oh,
so there is state now? How about you store it in a database? And then add migrations to initialize the
schema?
Don't want to mess with databases? Then you can go the route of just-put-everything-into-a-big-json-file.
Except then you have to turn dates into strings, because JSON doesn't allow custom types. And also,
no integer keys in dictionaries for you. And tuples now turn into arrays, so when you read them back,
the code will treat them differently (because surprise-surprise, arrays cannot be dictionary keys,
while tuples can).
Frankly, not everything needs a scalable persistence system capable of handling hundreds of thousands
of users. But I would prefer the language to provide something decent that I can just put my data structures
to, and get them back exactly the way I put them.
Then, the second problem with this particular case is that there are multiple parallel things happening.
At least, one of them is polling the API, and the other sends notifications once a day, and the third
updates the dashboard every now and then. Doing this in Python requires either threads, or async, or
a hand-rolled event loop with a timer.
For a lot of small apps, I don't care about copying data multiple times. But what I do care about is
never having to deal with data races or inconsistencies. So, in my opinion the Erlang model where
actors are isolated and exchange messages wins hands down. No "function coloring", no shared state,
relative ease of debugging, etc.
I would even go as far as to say that many of the production apps I worked on in my career could have
_easily_ been done without any shared state.
So, a language that can take care of such basics and get out of your way would be very nice. Even if
it would be as slow as TCL, it would still be fine for many practical things.
## Compiling and running
You'd need `gcc` and `cmake`.
At this moment, only a basic VM and assembler are implemented.
To run:
```sh
cmake .
make
cat examples/factorial.asm | ./asm | ./vm
```
To run tests:
```sh
ctest
```
## Influences
- TCL
- Clojure
- Erlang
## Get in touch
I'm working on this alone at the moment, and don't expect it to turn into anything. But if you like the idea, you can reach out
to me on Matrix: [@knazarov:knazarov.com](https://matrix.to/#/@knazarov:knazarov.com).
## Acknowledgements
The name of the language is the name of my love, Valeri.

26
flake.lock Normal file
View file

@ -0,0 +1,26 @@
{
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1720993261,
"narHash": "sha256-n7UcEUsgl4aYeSuE2Ukp6alNPoDQEk69GhSctX6wPDg=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "01f00069ce100284d5729ae6fdd999a7045e24d9",
"type": "github"
},
"original": {
"owner": "NixOS",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

36
flake.nix Normal file
View file

@ -0,0 +1,36 @@
{
description = "Nix flake for experimental lisp";
# Flake inputs
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs"; # also valid: "nixpkgs";
};
# Flake outputs
outputs = { self, nixpkgs }:
let
# Systems supported
allSystems = [
"x86_64-linux" # 64-bit Intel/AMD Linux
"aarch64-linux" # 64-bit ARM Linux
"x86_64-darwin" # 64-bit Intel macOS
"aarch64-darwin" # 64-bit ARM macOS
];
# Helper to provide system-specific attributes
forAllSystems = f: nixpkgs.lib.genAttrs allSystems (system: f {
pkgs = import nixpkgs { inherit system; };
});
in
{
packages = forAllSystems( {pkgs }:
{
lisp = (pkgs.callPackage ./lisp.nix {});
default = (pkgs.callPackage ./lisp.nix {});
}
);
overlays.default = final: prev: {
lisp = (prev.callPackage ./lisp.nix {});
};
};
}

32
lisp.nix Normal file
View file

@ -0,0 +1,32 @@
{ pkgs, stdenv }:
pkgs.gcc13Stdenv.mkDerivation rec {
pname = "lisp";
version = "0.1.0";
dontPatch = true;
preBuild = ''
patchShebangs bin/*.sh
'';
installFlags = "PREFIX=${placeholder "out"} VERSION=${version}";
nativeBuildInputs = with pkgs; [ pkg-config cmake ninja ];
buildInputs = with pkgs; [ gdb linuxPackages.perf jq lcov ];
hardeningDisable = [ "all" ];
cmakeFlags = [
"-DCMAKE_EXPORT_COMPILE_COMMANDS=TRUE"
"-DCMAKE_BUILD_TYPE=Debug"
];
shellHook = ''
export CMAKE_BUILD_TYPE=Debug
ln -s build/compile_commands.json compile_commands.json
'';
doCheck = true;
src = ./.;
}

56
src/arena.hpp Normal file
View file

@ -0,0 +1,56 @@
#pragma once
#include <concepts>
#include "pod.hpp"
class ArenaHeap {
public:
ArenaHeap(uint8_t* buf, uint64_t bufsize)
: buf(buf), bufsize(bufsize), boundary(0) {}
private:
uint8_t* buf;
uint64_t bufsize;
uint64_t boundary;
};
class Arena {
public:
Arena(ArenaHeap* first, ArenaHeap* second)
: _heaps{first, second}, _current(0) {}
private:
ArenaHeap* _heaps[2];
int _current;
};
template <uint64_t size>
class StaticArenaHeap {
public:
StaticArenaHeap() : _heap(_buf, heapsize) {}
ArenaHeap* get() { return &_heap; }
template <class T>
T* alloc(uint64_t extra = 0)
requires std::derived_from<T, PodObject>
{
return 0;
}
private:
static const uint64_t heapsize = size - sizeof(ArenaHeap);
ArenaHeap _heap;
uint8_t _buf[heapsize]{0};
};
template <uint64_t size>
class StaticArena {
public:
StaticArena() : _arena(_heaps[0].get(), _heaps[1].get()) {}
private:
StaticArenaHeap<(size - sizeof(Arena)) / 2> _heaps[2];
Arena _arena;
};

34
src/common.cpp Normal file
View file

@ -0,0 +1,34 @@
#include "common.hpp"
Syntax::Syntax(String filename, String modulename, Value expression) {}
Value Syntax::get_value() {
return pod_to_value(_value->expression.get(_value));
}
Value pod_to_value(PodObject* obj) {
switch (obj->header.tag) {
case Tag::Nil:
return Value();
case Tag::Int64:
return Value(Int64(((PodInt64*)obj)->value));
case Tag::Float:
return Value(Int64(((PodFloat*)obj)->value));
case Tag::String:
return Value(String((PodString*)obj));
case Tag::Symbol:
return Value(Symbol((PodSymbol*)obj));
case Tag::Syntax:
return Value(Syntax((PodSyntax*)obj));
case Tag::Pair:
return Value(Pair((PodPair*)obj));
};
return Value();
}
Value syntax_unwrap(Value val) {
Syntax* syntax = val.to<Syntax>();
if (syntax == 0) return val;
return syntax->get_value();
}

181
src/common.hpp Normal file
View file

@ -0,0 +1,181 @@
#pragma once
#include <concepts>
#include <cstdint>
#include <iostream>
#include "arena.hpp"
#include "pod.hpp"
// Forward declarations
class Value;
struct SourcePosition {
size_t line{1};
size_t column{1};
size_t offset{0};
};
struct SourceRange {
SourcePosition start;
SourcePosition end;
};
class PodInt64 final : public PodObject {
public:
PodInt64() : PodObject(Tag::Int64){};
int64_t value;
};
class PodFloat final : public PodObject {
public:
PodFloat() : PodObject(Tag::Float){};
double value;
};
class PodString final : public PodObject {
public:
PodString() : PodObject(Tag::String){};
uint64_t size;
char32_t data[];
};
class PodSymbol final : public PodObject {
public:
PodSymbol() : PodObject(Tag::Symbol){};
uint64_t size;
char32_t data[];
};
class PodSyntax : public PodObject {
public:
PodSyntax() : PodObject(Tag::Syntax){};
OffPtr<PodString> filename;
OffPtr<PodString> modulename;
OffPtr<PodObject> expression;
SourceRange sourcerange;
};
class PodPair : public PodObject {
public:
PodPair() : PodObject(Tag::Pair){};
OffPtr<PodObject> first;
OffPtr<PodObject> second;
};
class Object {
public:
virtual Tag tag() = 0;
};
class Nil : public Object {
public:
Nil() {}
virtual Tag tag() final { return Tag::Nil; }
};
class String : public Object {
public:
String(PodString* val) : _value(val) {}
virtual Tag tag() final { return Tag::String; }
private:
PodString* _value;
};
class Symbol : public Object {
public:
Symbol(PodSymbol* val) : _value(val) {}
virtual Tag tag() final { return Tag::Symbol; }
private:
PodSymbol* _value;
};
class Syntax : public Object {
public:
Syntax(PodSyntax* val) : _value(val) {}
Syntax(String filename, String modulename, Value expression);
virtual Tag tag() final { return Tag::Syntax; }
Value get_value();
private:
PodSyntax* _value;
};
class Pair : public Object {
public:
Pair(PodPair* value) : _value(value) {}
virtual Tag tag() final { return Tag::Pair; }
private:
PodPair* _value;
};
class Int64 : public Object {
public:
Int64() : _value(0) {}
Int64(int64_t val) : _value(val) {}
virtual ~Int64() = default;
virtual Tag tag() final { return Tag::Int64; }
int64_t value() { return _value; }
private:
int64_t _value;
};
class Float : public Object {
public:
Float() : _value(0) {}
virtual ~Float() = default;
virtual Tag tag() final { return Tag::Float; }
double value() { return _value; }
private:
double _value;
};
// note: this class doesn't perform proper destruction of objects in some cases
class Value {
public:
Value() { new (buf) Nil(); }
~Value() { ((Object*)buf)->~Object(); }
template <class T>
Value(const T& obj)
requires std::derived_from<T, Object> && (sizeof(T) <= 16)
{
new (buf) T(obj);
}
template <class T>
bool is() {
return dynamic_cast<T*>((Object*)buf) != nullptr;
}
template <class T>
T* to() {
return dynamic_cast<T*>((Object*)buf);
}
Object& operator*() { return *(Object*)(buf); }
Object* operator->() { return (Object*)(buf); }
private:
uint8_t buf[16];
};
Value pod_to_value(PodObject* obj);
Value syntax_unwrap(Value);

45
src/pod.hpp Normal file
View file

@ -0,0 +1,45 @@
#pragma once
#include <cstdint>
enum class Tag : uint8_t {
Nil,
Int64,
Float,
String,
Symbol,
Syntax,
Pair,
};
template <class T>
class OffPtr {
public:
OffPtr() : _offset(0) {}
template <class R>
OffPtr(R* base, T* ptr)
: _offset((ptr == 0) ? 0 : (uint8_t*)ptr - (uint8_t*)base){};
template <class R>
T* get(R* base) {
if (_offset == 0) return 0;
return (T*)(((uint8_t*)base) + _offset);
}
private:
int64_t _offset;
};
static_assert(sizeof(OffPtr<void>) == 8);
struct PodHeader {
PodHeader(Tag tag) : tag(tag), forward(0) {}
Tag tag;
int64_t forward : 56;
};
static_assert(sizeof(PodHeader) == 8);
struct PodObject {
PodHeader header;
};

19
src/vli.cpp Normal file
View file

@ -0,0 +1,19 @@
#include <iostream>
#include "common.hpp"
StaticArena<64 * 1024 * 1024> arena;
int main() {
Value val;
std::cout << sizeof(val) << "\n";
std::cout << int(val->tag()) << "\n";
val = Int64(123);
std::cout << sizeof(val) << "\n";
std::cout << int(val->tag()) << "\n";
std::cout << int(((Int64&)*val).value()) << "\n";
return 0;
}

0
src/vm.cpp Normal file
View file

0
src/vm.hpp Normal file
View file

0
test/test.hpp Normal file
View file