diff --git a/CMakeLists.txt b/CMakeLists.txt index 86bd0a1..ca3b2fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,7 @@ target_sources(vm_lib PRIVATE src/vm.cpp src/debug.cpp + src/elf.cpp PUBLIC FILE_SET HEADERS @@ -20,6 +21,7 @@ target_sources(vm_lib FILES src/vm.hpp src/debug.hpp + src/elf.hpp ) add_executable(rve src/rve.cpp) diff --git a/README.md b/README.md index 7751ac4..da911a8 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ # A simple RISC-V emulator This is a toy emulator for RISC-V, made for educational purposes. -The goal is to have a base rv32i instruction set (the bare minimum) plus a M-extension for division and multiplication. In theory, it should be enough to execute simple C programs compiled with GCC and sprinkled with a few linker scripts. Of course, no libc because there's no OS. +The goal is to have a base rv32i instruction set (the bare minimum) plus a M-extension for division and multiplication. It is capable of running normal ELF binaries produced by compiling C programs with GCC. It also has support for attaching the GDB debugger to the GDB stub port, so you can debug your programs running in the virtual machine. + +The code is small and compact on purpose, to make the implementation easy to understand. ## Compiling and running @@ -33,10 +35,28 @@ cd example make ``` -As a result, you'll get an `example.raw` binary. To execute it: +As a result, you'll get an `example` binary. To execute it: ```sh -./rve ../example/example.raw +./rve ../example/example ``` The expected output of the example program is `40320`. + +## Debugging programs under GDB + +The virtual machine contains an implementation of GDB stub protocol. To run the program in debug mode, execute: + +```sh +./rve --debug ../example/example +``` + +The program would load, and stop at first instruction. It will then prompt you to connect the debugger. +Then run `riscv32-none-elf-gdb`, and in the gdb prompt, type: + +``` +file ../example/example +target remote :1234 +``` + +From now on, you can set breakpoints, examine variables, registers and memory as you would expect under GDB. diff --git a/example/Makefile b/example/Makefile index 653a20e..76531bc 100644 --- a/example/Makefile +++ b/example/Makefile @@ -2,8 +2,3 @@ example: example.c Makefile boot.s linker.ld riscv32-none-elf-as -march=rv32i -mabi=ilp32 boot.s -o boot.o riscv32-none-elf-gcc -fno-builtin -fvisibility=hidden -nostdlib -nostartfiles -march=rv32im -mabi=ilp32 -c example.c -o example.o -g riscv32-none-elf-ld boot.o example.o -T linker.ld -o example -g - #riscv32-none-elf-strip -R .riscv.attributes example - #riscv32-none-elf-strip -R .comment example - riscv32-none-elf-objcopy -O binary example example.raw - #riscv32-none-elf-objcopy -O binary -j .text example example.text - #riscv32-none-elf-objcopy -O binary -j .sdata example example.data diff --git a/src/elf.cpp b/src/elf.cpp new file mode 100644 index 0000000..ea65438 --- /dev/null +++ b/src/elf.cpp @@ -0,0 +1,127 @@ +#include "elf.hpp" + +#include +#include +#include +#include +#include +#include +#include + +// ELF file constants +constexpr uint8_t ELF_MAGIC[] = {0x7F, 'E', 'L', 'F'}; +constexpr uint16_t ELF_TYPE_EXECUTABLE = 2; +constexpr uint16_t ELF_MACHINE_RISCV = 243; +constexpr uint8_t ELF_CLASS_32 = 1; +constexpr uint8_t ELF_LITTLE_ENDIAN = 1; + +struct Elf32Header { + uint8_t e_ident[16]; // Magic number and other info + uint16_t e_type; // Object file type + uint16_t e_machine; // Architecture + uint32_t e_version; // Object file version + uint32_t e_entry; // Entry point virtual address + uint32_t e_phoff; // Program header table file offset + uint32_t e_shoff; // Section header table file offset + uint32_t e_flags; // Processor-specific flags + uint16_t e_ehsize; // ELF header size in bytes + uint16_t e_phentsize; // Program header table entry size + uint16_t e_phnum; // Program header table entry count + uint16_t e_shentsize; // Section header table entry size + uint16_t e_shnum; // Section header table entry count + uint16_t e_shstrndx; // Section header string table index +}; + +struct Elf32Section { + uint32_t sh_name; // Section name (string table index) + uint32_t sh_type; // Section type + uint32_t sh_flags; // Section attributes + uint32_t sh_addr; // Virtual address in memory + uint32_t sh_offset; // Offset in file + uint32_t sh_size; // Size of section + uint32_t sh_link; // Link to another section + uint32_t sh_info; // Additional section information + uint32_t sh_addralign; // Section alignment + uint32_t sh_entsize; // Entry size if section holds table +}; + +std::vector load_elf(const std::string& filename, size_t memory_size) { + std::ifstream file(filename, std::ios::binary); + if (!file.is_open()) { + throw std::runtime_error("Failed to open ELF file"); + } + + // Read the ELF header + Elf32Header ehdr; + file.read(reinterpret_cast(&ehdr), sizeof(ehdr)); + if (!file) { + throw std::runtime_error("Failed to read ELF header"); + } + + // Validate ELF magic and basic properties + if (std::memcmp(ehdr.e_ident, ELF_MAGIC, sizeof(ELF_MAGIC)) != 0) { + throw std::runtime_error("Invalid ELF magic number"); + } + if (ehdr.e_ident[4] != ELF_CLASS_32 || ehdr.e_ident[5] != ELF_LITTLE_ENDIAN) { + throw std::runtime_error("Unsupported ELF class or endianness"); + } + if (ehdr.e_type != ELF_TYPE_EXECUTABLE || + ehdr.e_machine != ELF_MACHINE_RISCV) { + throw std::runtime_error("Unsupported ELF type or machine"); + } + + // Read section headers + file.seekg(ehdr.e_shoff); + std::vector sectionHeaders(ehdr.e_shnum); + for (size_t i = 0; i < ehdr.e_shnum; ++i) { + file.read(reinterpret_cast(§ionHeaders[i]), + sizeof(Elf32Section)); + if (!file) { + throw std::runtime_error("Failed to read section headers"); + } + } + + // Load section string table + const Elf32Section& strTabHdr = sectionHeaders[ehdr.e_shstrndx]; + std::vector sectionStrTable(strTabHdr.sh_size); + file.seekg(strTabHdr.sh_offset); + file.read(sectionStrTable.data(), strTabHdr.sh_size); + if (!file) { + throw std::runtime_error("Failed to read section string table"); + } + + // Determine memory range for allocation + uint32_t memoryEnd = 0; + for (const Elf32Section& shdr : sectionHeaders) { + const char* sectionName = §ionStrTable[shdr.sh_name]; + if (std::strcmp(sectionName, ".text") == 0 || + std::strcmp(sectionName, ".sdata") == 0) { + memoryEnd = std::max(memoryEnd, shdr.sh_addr + shdr.sh_size); + } + } + + if (memoryEnd == 0) { + throw std::runtime_error("No loadable sections found"); + } + + std::vector loadedData(memoryEnd, 0); + + // Load .text and .sdata sections + for (const Elf32Section& shdr : sectionHeaders) { + const char* sectionName = §ionStrTable[shdr.sh_name]; + if (std::strcmp(sectionName, ".text") == 0 || + std::strcmp(sectionName, ".sdata") == 0) { + std::vector sectionData(shdr.sh_size); + file.seekg(shdr.sh_offset); + file.read(reinterpret_cast(&loadedData[shdr.sh_addr]), + shdr.sh_size); + if (!file) { + throw std::runtime_error("Failed to read section data"); + } + loadedData.insert(loadedData.end(), sectionData.begin(), + sectionData.end()); + } + } + + return loadedData; +} diff --git a/src/elf.hpp b/src/elf.hpp new file mode 100644 index 0000000..99534d2 --- /dev/null +++ b/src/elf.hpp @@ -0,0 +1,7 @@ +#pragma once + +#include +#include +#include + +std::vector load_elf(const std::string& filename, size_t memory_size); diff --git a/src/rve.cpp b/src/rve.cpp index 486671e..7ba6fe7 100644 --- a/src/rve.cpp +++ b/src/rve.cpp @@ -5,6 +5,7 @@ #include #include "debug.hpp" +#include "elf.hpp" #include "vm.hpp" int main(int argc, char *argv[]) { @@ -26,12 +27,14 @@ int main(int argc, char *argv[]) { std::vector memory; try { - memory = load_program(program_filename, MEMORY_SIZE); + memory = load_elf(program_filename, MEMORY_SIZE); } catch (const std::exception &e) { std::cerr << e.what() << std::endl; return 1; } + memory.resize(MEMORY_SIZE, 0); + VM vm(memory); if (!debug) { diff --git a/src/vm.cpp b/src/vm.cpp index b42a49d..9eab0ad 100644 --- a/src/vm.cpp +++ b/src/vm.cpp @@ -12,36 +12,6 @@ inline int32_t sign_extend(int32_t value, int bits) { return (value ^ mask) - mask; } -std::vector load_program(const std::string& filename, - size_t memory_size) { - std::vector memory(memory_size, 0); - - std::ifstream file(filename, std::ios::binary | std::ios::ate); - - if (!file.is_open()) { - throw std::runtime_error("Failed to open file: " + filename); - } - - size_t file_size = file.tellg(); - - if (file_size > memory_size) { - throw std::runtime_error("File is too big"); - } - - file.seekg(0, std::ios::beg); - - file.read(reinterpret_cast(&memory[0]), file_size); - - if (!file) { - throw std::runtime_error( - "Failed to read the complete program into memory."); - } - - file.close(); - - return memory; -} - VM::VM(std::vector memory) : memory_(memory) {} std::vector VM::read_memory(size_t start, size_t size) { diff --git a/src/vm.hpp b/src/vm.hpp index 11ee55f..37f2885 100644 --- a/src/vm.hpp +++ b/src/vm.hpp @@ -9,9 +9,6 @@ class EbreakException : std::exception {}; const int NUM_REGISTERS = 32; // Standard RISC-V has 32 registers -std::vector load_program(const std::string& filename, - size_t memory_size); - class VM { public: VM(std::vector memory);