From 8ff01726cca4866ea82a4c7f3d40d308221517cb Mon Sep 17 00:00:00 2001 From: Konstantin Nazarov Date: Fri, 19 Jul 2024 02:29:30 +0100 Subject: [PATCH] Initial commit - add basic boilerplate for POD types --- .clang-format | 4 ++ .clangd | 2 + .gitignore | 20 ++++++ CMakeLists.txt | 52 ++++++++++++++ LICENSE | 11 +++ README.md | 94 +++++++++++++++++++++++++ flake.lock | 26 +++++++ flake.nix | 36 ++++++++++ lisp.nix | 32 +++++++++ src/arena.hpp | 56 +++++++++++++++ src/common.cpp | 34 ++++++++++ src/common.hpp | 181 +++++++++++++++++++++++++++++++++++++++++++++++++ src/pod.hpp | 45 ++++++++++++ src/vli.cpp | 19 ++++++ src/vm.cpp | 0 src/vm.hpp | 0 test/test.hpp | 0 17 files changed, 612 insertions(+) create mode 100644 .clang-format create mode 100644 .clangd create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 LICENSE create mode 100644 README.md create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 lisp.nix create mode 100644 src/arena.hpp create mode 100644 src/common.cpp create mode 100644 src/common.hpp create mode 100644 src/pod.hpp create mode 100644 src/vli.cpp create mode 100644 src/vm.cpp create mode 100644 src/vm.hpp create mode 100644 test/test.hpp diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..cf29b6c --- /dev/null +++ b/.clang-format @@ -0,0 +1,4 @@ +--- +BasedOnStyle: Google + +... diff --git a/.clangd b/.clangd new file mode 100644 index 0000000..c786f16 --- /dev/null +++ b/.clangd @@ -0,0 +1,2 @@ +CompileFlags: + Add: [-xc++, -std=c++23, -Isrc/] \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2b050c3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +*.o +*.a +*.gcda +*.gcno +*.gcov +lcov.info +test/*.test +vm +lisp +asm +sd +compile_commands.json +.cache +CMakeFiles +cmake_install.cmake +CMakeCache.txt +CTestTestfile.cmake +Testing +result +build \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..f5c2d7e --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,52 @@ +cmake_minimum_required(VERSION 3.28) +project(valeri) # vli + +set(CMAKE_CXX_STANDARD 23) +set(CMAKE_EXPORT_COMPILE_COMMANDS true) + +message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") + +set (CMAKE_CXX_FLAGS "-fno-exceptions -static-libgcc -static-libstdc++ -Werror -Wall -Wunused-result -Wno-unused-function -Wno-unused-variable -fno-omit-frame-pointer -fsanitize=address -Wno-c99-designator") + +add_library(vm_lib) +target_sources(vm_lib + PRIVATE + src/vm.cpp + src/common.cpp + + PUBLIC + FILE_SET HEADERS + BASE_DIRS src + FILES + src/vm.hpp + src/common.hpp +) + +add_executable(vli src/vli.cpp) +target_link_libraries(vli vm_lib) + +install(TARGETS vli) + +# TESTING + +enable_testing() + +set(CPP_TESTS +) + + +foreach(testname IN LISTS CPP_TESTS) + add_executable("test_${testname}") + target_sources("test_${testname}" PRIVATE test/${testname}.cpp + PUBLIC + FILE_SET HEADERS + BASE_DIRS test + FILES + test/test.hpp) + target_link_libraries("test_${testname}" vm_lib) + + add_test( + NAME "test_${testname}" + COMMAND $ + ) +endforeach() diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b52357f --- /dev/null +++ b/LICENSE @@ -0,0 +1,11 @@ +Copyright 2023 Konstantin Nazarov + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..82c915d --- /dev/null +++ b/README.md @@ -0,0 +1,94 @@ +# Experimental Lisp interpreter + +This is a Lisp interpreter, that I'm writing to test a few ideas and try to make a language +that I would want to use for personal projects. + +Note: project is in very early stage. + +Features that I would like to see (in no particular order): +- small and simple core with focus on correctness/security +- Erlang-style concurrency and message-passing instead of threading/async-await +- per-actor garbage collectors +- native support for immutable data structures +- fast startup +- 64-bit only +- ability to produce small redistributable binaries +- persistence primitives out-of-the-box + + +## Rationale + +I find that writing something simple in popular programming languages takes an ungodly amount of time. +Mostly because those languages try to cater to a very wide audience. If I have a need to glue a couple +systems together, I need to pull at least a dozen dependencies, and keep them up to date all the time. + +Case in point: one day I needed a simple script that would crawl a github organization, collect +information from there about open pull requests, create a dashboard that presents the information +in the way I want, and occasionally pings people on open issues and pull requests. + +Sounds simple, right? Well, not so fast. You cannot crawl GitHub all the time, because it has request +limits. So you need to run a daemon in background, and update the currently synchronized state. Oh, +so there is state now? How about you store it in a database? And then add migrations to initialize the +schema? + +Don't want to mess with databases? Then you can go the route of just-put-everything-into-a-big-json-file. +Except then you have to turn dates into strings, because JSON doesn't allow custom types. And also, +no integer keys in dictionaries for you. And tuples now turn into arrays, so when you read them back, +the code will treat them differently (because surprise-surprise, arrays cannot be dictionary keys, +while tuples can). + +Frankly, not everything needs a scalable persistence system capable of handling hundreds of thousands +of users. But I would prefer the language to provide something decent that I can just put my data structures +to, and get them back exactly the way I put them. + +Then, the second problem with this particular case is that there are multiple parallel things happening. +At least, one of them is polling the API, and the other sends notifications once a day, and the third +updates the dashboard every now and then. Doing this in Python requires either threads, or async, or +a hand-rolled event loop with a timer. + +For a lot of small apps, I don't care about copying data multiple times. But what I do care about is +never having to deal with data races or inconsistencies. So, in my opinion the Erlang model where +actors are isolated and exchange messages wins hands down. No "function coloring", no shared state, +relative ease of debugging, etc. + +I would even go as far as to say that many of the production apps I worked on in my career could have +_easily_ been done without any shared state. + +So, a language that can take care of such basics and get out of your way would be very nice. Even if +it would be as slow as TCL, it would still be fine for many practical things. + + +## Compiling and running + +You'd need `gcc` and `cmake`. + +At this moment, only a basic VM and assembler are implemented. + +To run: + +```sh +cmake . +make +cat examples/factorial.asm | ./asm | ./vm +``` + +To run tests: + +```sh +ctest +``` + +## Influences + +- TCL +- Clojure +- Erlang + +## Get in touch + +I'm working on this alone at the moment, and don't expect it to turn into anything. But if you like the idea, you can reach out +to me on Matrix: [@knazarov:knazarov.com](https://matrix.to/#/@knazarov:knazarov.com). + +## Acknowledgements + +The name of the language is the name of my love, Valeri. diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..d1ae847 --- /dev/null +++ b/flake.lock @@ -0,0 +1,26 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1720993261, + "narHash": "sha256-n7UcEUsgl4aYeSuE2Ukp6alNPoDQEk69GhSctX6wPDg=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "01f00069ce100284d5729ae6fdd999a7045e24d9", + "type": "github" + }, + "original": { + "owner": "NixOS", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..44ca113 --- /dev/null +++ b/flake.nix @@ -0,0 +1,36 @@ +{ + description = "Nix flake for experimental lisp"; + + # Flake inputs + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs"; # also valid: "nixpkgs"; + }; + + # Flake outputs + outputs = { self, nixpkgs }: + let + # Systems supported + allSystems = [ + "x86_64-linux" # 64-bit Intel/AMD Linux + "aarch64-linux" # 64-bit ARM Linux + "x86_64-darwin" # 64-bit Intel macOS + "aarch64-darwin" # 64-bit ARM macOS + ]; + + # Helper to provide system-specific attributes + forAllSystems = f: nixpkgs.lib.genAttrs allSystems (system: f { + pkgs = import nixpkgs { inherit system; }; + }); + in + { + packages = forAllSystems( {pkgs }: + { + lisp = (pkgs.callPackage ./lisp.nix {}); + default = (pkgs.callPackage ./lisp.nix {}); + } + ); + overlays.default = final: prev: { + lisp = (prev.callPackage ./lisp.nix {}); + }; + }; +} diff --git a/lisp.nix b/lisp.nix new file mode 100644 index 0000000..a684674 --- /dev/null +++ b/lisp.nix @@ -0,0 +1,32 @@ +{ pkgs, stdenv }: +pkgs.gcc13Stdenv.mkDerivation rec { + pname = "lisp"; + version = "0.1.0"; + + dontPatch = true; + + preBuild = '' + patchShebangs bin/*.sh + ''; + + installFlags = "PREFIX=${placeholder "out"} VERSION=${version}"; + + nativeBuildInputs = with pkgs; [ pkg-config cmake ninja ]; + + buildInputs = with pkgs; [ gdb linuxPackages.perf jq lcov ]; + + hardeningDisable = [ "all" ]; + cmakeFlags = [ + "-DCMAKE_EXPORT_COMPILE_COMMANDS=TRUE" + "-DCMAKE_BUILD_TYPE=Debug" + ]; + shellHook = '' + export CMAKE_BUILD_TYPE=Debug + ln -s build/compile_commands.json compile_commands.json + ''; + + doCheck = true; + + src = ./.; + +} diff --git a/src/arena.hpp b/src/arena.hpp new file mode 100644 index 0000000..1f6322c --- /dev/null +++ b/src/arena.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include + +#include "pod.hpp" + +class ArenaHeap { + public: + ArenaHeap(uint8_t* buf, uint64_t bufsize) + : buf(buf), bufsize(bufsize), boundary(0) {} + + private: + uint8_t* buf; + uint64_t bufsize; + uint64_t boundary; +}; + +class Arena { + public: + Arena(ArenaHeap* first, ArenaHeap* second) + : _heaps{first, second}, _current(0) {} + + private: + ArenaHeap* _heaps[2]; + int _current; +}; + +template +class StaticArenaHeap { + public: + StaticArenaHeap() : _heap(_buf, heapsize) {} + + ArenaHeap* get() { return &_heap; } + + template + T* alloc(uint64_t extra = 0) + requires std::derived_from + { + return 0; + } + + private: + static const uint64_t heapsize = size - sizeof(ArenaHeap); + ArenaHeap _heap; + uint8_t _buf[heapsize]{0}; +}; + +template +class StaticArena { + public: + StaticArena() : _arena(_heaps[0].get(), _heaps[1].get()) {} + + private: + StaticArenaHeap<(size - sizeof(Arena)) / 2> _heaps[2]; + Arena _arena; +}; diff --git a/src/common.cpp b/src/common.cpp new file mode 100644 index 0000000..6474d1b --- /dev/null +++ b/src/common.cpp @@ -0,0 +1,34 @@ +#include "common.hpp" + +Syntax::Syntax(String filename, String modulename, Value expression) {} + +Value Syntax::get_value() { + return pod_to_value(_value->expression.get(_value)); +} + +Value pod_to_value(PodObject* obj) { + switch (obj->header.tag) { + case Tag::Nil: + return Value(); + case Tag::Int64: + return Value(Int64(((PodInt64*)obj)->value)); + case Tag::Float: + return Value(Int64(((PodFloat*)obj)->value)); + case Tag::String: + return Value(String((PodString*)obj)); + case Tag::Symbol: + return Value(Symbol((PodSymbol*)obj)); + case Tag::Syntax: + return Value(Syntax((PodSyntax*)obj)); + case Tag::Pair: + return Value(Pair((PodPair*)obj)); + }; + return Value(); +} + +Value syntax_unwrap(Value val) { + Syntax* syntax = val.to(); + if (syntax == 0) return val; + + return syntax->get_value(); +} diff --git a/src/common.hpp b/src/common.hpp new file mode 100644 index 0000000..b8a68ea --- /dev/null +++ b/src/common.hpp @@ -0,0 +1,181 @@ +#pragma once + +#include +#include +#include + +#include "arena.hpp" +#include "pod.hpp" + +// Forward declarations + +class Value; + +struct SourcePosition { + size_t line{1}; + size_t column{1}; + size_t offset{0}; +}; + +struct SourceRange { + SourcePosition start; + SourcePosition end; +}; + +class PodInt64 final : public PodObject { + public: + PodInt64() : PodObject(Tag::Int64){}; + + int64_t value; +}; + +class PodFloat final : public PodObject { + public: + PodFloat() : PodObject(Tag::Float){}; + + double value; +}; + +class PodString final : public PodObject { + public: + PodString() : PodObject(Tag::String){}; + + uint64_t size; + char32_t data[]; +}; + +class PodSymbol final : public PodObject { + public: + PodSymbol() : PodObject(Tag::Symbol){}; + + uint64_t size; + char32_t data[]; +}; + +class PodSyntax : public PodObject { + public: + PodSyntax() : PodObject(Tag::Syntax){}; + OffPtr filename; + OffPtr modulename; + OffPtr expression; + SourceRange sourcerange; +}; + +class PodPair : public PodObject { + public: + PodPair() : PodObject(Tag::Pair){}; + OffPtr first; + OffPtr second; +}; + +class Object { + public: + virtual Tag tag() = 0; +}; + +class Nil : public Object { + public: + Nil() {} + + virtual Tag tag() final { return Tag::Nil; } +}; + +class String : public Object { + public: + String(PodString* val) : _value(val) {} + virtual Tag tag() final { return Tag::String; } + + private: + PodString* _value; +}; + +class Symbol : public Object { + public: + Symbol(PodSymbol* val) : _value(val) {} + virtual Tag tag() final { return Tag::Symbol; } + + private: + PodSymbol* _value; +}; + +class Syntax : public Object { + public: + Syntax(PodSyntax* val) : _value(val) {} + Syntax(String filename, String modulename, Value expression); + virtual Tag tag() final { return Tag::Syntax; } + + Value get_value(); + + private: + PodSyntax* _value; +}; + +class Pair : public Object { + public: + Pair(PodPair* value) : _value(value) {} + virtual Tag tag() final { return Tag::Pair; } + + private: + PodPair* _value; +}; + +class Int64 : public Object { + public: + Int64() : _value(0) {} + Int64(int64_t val) : _value(val) {} + virtual ~Int64() = default; + + virtual Tag tag() final { return Tag::Int64; } + + int64_t value() { return _value; } + + private: + int64_t _value; +}; + +class Float : public Object { + public: + Float() : _value(0) {} + virtual ~Float() = default; + + virtual Tag tag() final { return Tag::Float; } + + double value() { return _value; } + + private: + double _value; +}; + +// note: this class doesn't perform proper destruction of objects in some cases +class Value { + public: + Value() { new (buf) Nil(); } + ~Value() { ((Object*)buf)->~Object(); } + + template + Value(const T& obj) + requires std::derived_from && (sizeof(T) <= 16) + { + new (buf) T(obj); + } + + template + bool is() { + return dynamic_cast((Object*)buf) != nullptr; + } + + template + T* to() { + return dynamic_cast((Object*)buf); + } + + Object& operator*() { return *(Object*)(buf); } + Object* operator->() { return (Object*)(buf); } + + private: + uint8_t buf[16]; +}; + +Value pod_to_value(PodObject* obj); + +Value syntax_unwrap(Value); diff --git a/src/pod.hpp b/src/pod.hpp new file mode 100644 index 0000000..a276bf8 --- /dev/null +++ b/src/pod.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include + +enum class Tag : uint8_t { + Nil, + Int64, + Float, + String, + Symbol, + Syntax, + Pair, +}; + +template +class OffPtr { + public: + OffPtr() : _offset(0) {} + template + OffPtr(R* base, T* ptr) + : _offset((ptr == 0) ? 0 : (uint8_t*)ptr - (uint8_t*)base){}; + + template + T* get(R* base) { + if (_offset == 0) return 0; + return (T*)(((uint8_t*)base) + _offset); + } + + private: + int64_t _offset; +}; + +static_assert(sizeof(OffPtr) == 8); + +struct PodHeader { + PodHeader(Tag tag) : tag(tag), forward(0) {} + Tag tag; + int64_t forward : 56; +}; + +static_assert(sizeof(PodHeader) == 8); + +struct PodObject { + PodHeader header; +}; diff --git a/src/vli.cpp b/src/vli.cpp new file mode 100644 index 0000000..3d702e2 --- /dev/null +++ b/src/vli.cpp @@ -0,0 +1,19 @@ +#include + +#include "common.hpp" + +StaticArena<64 * 1024 * 1024> arena; + +int main() { + Value val; + std::cout << sizeof(val) << "\n"; + std::cout << int(val->tag()) << "\n"; + + val = Int64(123); + + std::cout << sizeof(val) << "\n"; + std::cout << int(val->tag()) << "\n"; + std::cout << int(((Int64&)*val).value()) << "\n"; + + return 0; +} diff --git a/src/vm.cpp b/src/vm.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/vm.hpp b/src/vm.hpp new file mode 100644 index 0000000..e69de29 diff --git a/test/test.hpp b/test/test.hpp new file mode 100644 index 0000000..e69de29