From 65e5011f7fd7dc20ad61ff1b612a776cb2739d5a Mon Sep 17 00:00:00 2001 From: Konstantin Nazarov Date: Thu, 19 Dec 2024 19:07:39 +0000 Subject: [PATCH] Speed up framebuffer operation to reach about 200fps fill rate --- CMakeLists.txt | 6 +- example/example.c | 12 ++-- rve.nix | 8 ++- src/debug.cpp | 6 +- src/framebuffer.cpp | 24 ++++++- src/vm.cpp | 149 +++++++++++++++++++++++++++----------------- src/vm.hpp | 64 ++++++++++++++++--- 7 files changed, 191 insertions(+), 78 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dfcc3d0..f469000 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS true) message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") -set (CMAKE_CXX_FLAGS "-static-libgcc -static-libstdc++ -Werror -Wall -Wunused-result -Wno-unused-function -Wno-unused-variable -fno-omit-frame-pointer -fsanitize=address -Wno-c99-designator") +set (CMAKE_CXX_FLAGS "-static-libgcc -static-libstdc++ -Werror -Wall -Wunused-result -Wno-unused-function -Wno-unused-variable -fno-omit-frame-pointer -Wno-c99-designator") + +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address") +endif() find_package(SDL2 REQUIRED) diff --git a/example/example.c b/example/example.c index ac8f9bd..6a9f074 100644 --- a/example/example.c +++ b/example/example.c @@ -35,11 +35,13 @@ int fib(int n) { } void draw() { - for (int i = 0; i< 640; i++) { - for (int j = 0; j < 480; j++) { - uint32_t color = i*j; - uint32_t* addr = (uint32_t*)(FRAMEBUFFER_BASE + ((j*640) + i) *4); - *addr = color; + for (int k = 0; k < 200; k++) { + for (int i = 0; i< 640; i++) { + for (int j = 0; j < 480; j++) { + uint32_t color = i*(j + k); + uint32_t* addr = (uint32_t*)(FRAMEBUFFER_BASE + ((j*640) + i) *4); + *addr = color; + } } } } diff --git a/rve.nix b/rve.nix index d4efb8c..1e15057 100644 --- a/rve.nix +++ b/rve.nix @@ -32,7 +32,13 @@ pkgs.gcc13Stdenv.mkDerivation rec { hardeningDisable = [ "all" ]; cmakeFlags = [ "-DCMAKE_EXPORT_COMPILE_COMMANDS=TRUE" - "-DCMAKE_BUILD_TYPE=Debug" + #"-DCMAKE_BUILD_TYPE=Debug" + "-DCMAKE_BUILD_TYPE=RelWithDebInfo" + + # For profiling + # "-DCMAKE_CXX_FLAGS=-pg" + # "-DCMAKE_EXE_LINKER_FLAGS=-pg" + # "-DCMAKE_SHARED_LINKER_FLAGS=-pg" ]; shellHook = '' export CMAKE_BUILD_TYPE=Debug diff --git a/src/debug.cpp b/src/debug.cpp index 00bd681..3eee4ed 100644 --- a/src/debug.cpp +++ b/src/debug.cpp @@ -229,10 +229,10 @@ void GDBStub::handle_packet(const std::string &packet) { if (breakpoints.count(addr) == 0) { uint32_t original_instr; - vm.read_mem((uint8_t *)&original_instr, addr, 4); + vm.read_mem_u32(&original_instr, addr); breakpoints[addr] = original_instr; uint32_t debug_instr = 0x00100073; // 0x00100073 is EBREAK - vm.write_mem((uint8_t *)&debug_instr, addr, 4); + vm.write_mem_u32(&debug_instr, addr); } } send_packet("OK"); @@ -246,7 +246,7 @@ void GDBStub::handle_packet(const std::string &packet) { if (breakpoints.count(addr) > 0) { // Restore the original instruction - vm.write_mem((uint8_t *)&breakpoints[addr], addr, 4); + vm.write_mem_u32(&breakpoints[addr], addr); breakpoints.erase(addr); } } diff --git a/src/framebuffer.cpp b/src/framebuffer.cpp index a5127af..9ffc275 100644 --- a/src/framebuffer.cpp +++ b/src/framebuffer.cpp @@ -51,10 +51,28 @@ void Framebuffer::draw() { SDL_RenderPresent(renderer); } -void Framebuffer::write_mem(uint8_t* src, size_t addr, size_t size) { - Device::write_mem(src, addr, size); +void Framebuffer::write_mem_u8(uint8_t* src, size_t addr) { + Device::write_mem_u8(src, addr); - if (addr + size == base() + (640 * 480) * 4) { + if (addr + 1 == base() + (640 * 480) * 4) { + // draw to the screen when the last byte is written + draw(); + } +} + +void Framebuffer::write_mem_u16(uint16_t* src, size_t addr) { + Device::write_mem_u16(src, addr); + + if (addr + 2 == base() + (640 * 480) * 4) { + // draw to the screen when the last byte is written + draw(); + } +} + +void Framebuffer::write_mem_u32(uint32_t* src, size_t addr) { + Device::write_mem_u32(src, addr); + + if (addr + 4 == base() + (640 * 480) * 4) { // draw to the screen when the last byte is written draw(); } diff --git a/src/vm.cpp b/src/vm.cpp index a0b83d4..df31cf3 100644 --- a/src/vm.cpp +++ b/src/vm.cpp @@ -30,76 +30,114 @@ void VM::setreg(size_t regnum, uint32_t value) { registers[regnum] = value; } -void Device::write_mem(uint8_t* src, size_t addr, size_t size) { - if (addr < base_addr || addr + size > base_addr + mem_size) { - throw std::runtime_error("Memory access out of bounds"); - } - addr -= base_addr; - std::memcpy(&mem[addr], src, size); -} - -void Device::read_mem(uint8_t* dst, size_t addr, size_t size) { - if (addr < base_addr || addr + size > base_addr + mem_size) { - throw std::runtime_error("Memory access out of bounds"); - } - addr -= base_addr; - std::memcpy(dst, &mem[addr], size); -} - -void UART::read_mem(uint8_t* dst, size_t addr, size_t size) { - if (addr < UART_ADDR || addr + size > UART_ADDR + 8) { +void UART::read_mem_u8(uint8_t* dst, size_t addr) { + if (addr < UART_ADDR || addr + 1 > UART_ADDR + 8) { throw std::runtime_error("Memory access out of bounds"); } addr -= UART_ADDR; - for (size_t i = 0; i < size; i++) { - switch (addr + i) { - case UART_LSR: - // Always ready to transmit - dst[i] = LSR_TRANSMITTER_EMPTY; - default: - dst[i] = 0; - } + switch (addr) { + case UART_LSR: + // Always ready to transmit + *dst = LSR_TRANSMITTER_EMPTY; + default: + *dst = 0; } } -void UART::write_mem(uint8_t* src, size_t addr, size_t size) { - if (addr < UART_ADDR || addr + size > UART_ADDR + 8) { +void UART::write_mem_u8(uint8_t* src, size_t addr) { + if (addr < UART_ADDR || addr + 1 > UART_ADDR + 8) { throw std::runtime_error("Memory access out of bounds"); } addr -= UART_ADDR; - for (size_t i = 0; i < size; i++) { - switch (addr + i) { - case UART_THR: - std::cout.put(static_cast(*src)); - break; - } + switch (addr) { + case UART_THR: + std::cout.put(static_cast(*src)); + break; } } -void Timer::read_mem(uint8_t* dst, size_t addr, size_t size) { +void Timer::update() { using Clock = std::chrono::high_resolution_clock; constexpr auto den = Clock::period::den; constexpr auto num = Clock::period::num; // Let's assume that clock rate is 1MHz for now - set(Clock::now().time_since_epoch().count() / (den / (1000000 * num))); - Device::read_mem(dst, addr, size); + uint64_t value = + Clock::now().time_since_epoch().count() / (den / (1000000 * num)); + + write_mem_u32((uint32_t*)&value, MTIME_ADDR); + write_mem_u32(((uint32_t*)&value) + 1, MTIME_ADDR + 4); } -void VM::read_mem(uint8_t* dst, size_t addr, size_t size) { +void Timer::read_mem_u8(uint8_t* dst, size_t addr) { + update(); + Device::read_mem_u8(dst, addr); +} + +void Timer::read_mem_u16(uint16_t* dst, size_t addr) { + update(); + Device::read_mem_u16(dst, addr); +} + +void Timer::read_mem_u32(uint32_t* dst, size_t addr) { + update(); + Device::read_mem_u32(dst, addr); +} + +void VM::read_mem_u8(uint8_t* dst, size_t addr) { for (Device* dev : devices) { - if (addr >= dev->base() && addr + size <= dev->base() + dev->size()) { - dev->read_mem(dst, addr, size); + if (addr >= dev->base() && addr + 1 <= dev->base() + dev->size()) { + dev->read_mem_u8(dst, addr); return; } } throw std::runtime_error("Memory access out of bounds"); } -void VM::write_mem(uint8_t* src, size_t addr, size_t size) { +void VM::read_mem_u16(uint16_t* dst, size_t addr) { for (Device* dev : devices) { - if (addr >= dev->base() && addr + size <= dev->base() + dev->size()) { - dev->write_mem(src, addr, size); + if (addr >= dev->base() && addr + 2 <= dev->base() + dev->size()) { + dev->read_mem_u16(dst, addr); + return; + } + } + throw std::runtime_error("Memory access out of bounds"); +} + +void VM::read_mem_u32(uint32_t* dst, size_t addr) { + for (Device* dev : devices) { + if (addr >= dev->base() && addr + 4 <= dev->base() + dev->size()) { + dev->read_mem_u32(dst, addr); + return; + } + } + throw std::runtime_error("Memory access out of bounds"); +} + +void VM::write_mem_u8(uint8_t* src, size_t addr) { + for (Device* dev : devices) { + if (addr >= dev->base() && addr + 1 <= dev->base() + dev->size()) { + dev->write_mem_u8(src, addr); + return; + } + } + throw std::runtime_error("Memory access out of bounds"); +} + +void VM::write_mem_u16(uint16_t* src, size_t addr) { + for (Device* dev : devices) { + if (addr >= dev->base() && addr + 2 <= dev->base() + dev->size()) { + dev->write_mem_u16(src, addr); + return; + } + } + throw std::runtime_error("Memory access out of bounds"); +} + +void VM::write_mem_u32(uint32_t* src, size_t addr) { + for (Device* dev : devices) { + if (addr >= dev->base() && addr + 4 <= dev->base() + dev->size()) { + dev->write_mem_u32(src, addr); return; } } @@ -114,13 +152,13 @@ std::vector VM::read_memory(size_t start, size_t size) { while (i < end) { if (i % 4 == 0 && end - i >= 4) { - read_mem(&res[i - start], i, 4); + read_mem_u32((uint32_t*)&res[i - start], i); i += 4; } else if (i % 2 == 0 && end - i >= 2) { - read_mem(&res[i - start], i, 2); + read_mem_u16((uint16_t*)&res[i - start], i); i += 2; } else { - read_mem(&res[i - start], i, 1); + read_mem_u8((uint8_t*)&res[i - start], i); i += 1; } } @@ -141,9 +179,8 @@ uint32_t VM::getreg(size_t regnum) { const std::string& VM::get_file_path() { return file_path; } void VM::step() { - if (pc < PROGRAM_ADDR) throw std::runtime_error("PC out of range"); uint32_t instr; - read_mem((uint8_t*)&instr, pc, 4); + ram.read_mem_u32(&instr, pc); // std::cout << "pc: " << std::hex << pc << std::dec << "\n"; // std::cout << "instr: " << std::hex << instr << "\n"; pc += 4; @@ -339,27 +376,27 @@ void VM::step() { if (funct3 == 0x00) { // LB uint32_t addr = registers[rs1] + imm; uint8_t val; - read_mem(&val, addr, 1); + read_mem_u8(&val, addr); setreg(rd, sign_extend(val, 8)); } else if (funct3 == 0x01) { // LH uint32_t addr = registers[rs1] + imm; uint16_t val; - read_mem((uint8_t*)&val, addr, 2); + read_mem_u16(&val, addr); setreg(rd, sign_extend(val, 16)); } else if (funct3 == 0x2) { // LW uint32_t addr = registers[rs1] + imm; uint32_t val; - read_mem((uint8_t*)&val, addr, 4); + read_mem_u32(&val, addr); setreg(rd, val); } else if (funct3 == 0x4) { // LBU uint32_t addr = registers[rs1] + imm; uint8_t val; - read_mem(&val, addr, 1); + read_mem_u8(&val, addr); setreg(rd, val); } else if (funct3 == 0x5) { // LHU uint32_t addr = registers[rs1] + imm; uint16_t val; - read_mem((uint8_t*)&val, addr, 2); + read_mem_u16(&val, addr); setreg(rd, val); } else { throw std::runtime_error("Unknown load instruction"); @@ -371,13 +408,13 @@ void VM::step() { imm = sign_extend(imm, 12); // Sign-extend 12-bit immediate if (funct3 == 0x0) { // SB uint32_t addr = registers[rs1] + imm; - write_mem((uint8_t*)®isters[rs2], addr, 1); + write_mem_u8((uint8_t*)®isters[rs2], addr); } else if (funct3 == 0x1) { // SH uint32_t addr = registers[rs1] + imm; - write_mem((uint8_t*)®isters[rs2], addr, 2); + write_mem_u16((uint16_t*)®isters[rs2], addr); } else if (funct3 == 0x2) { // SW uint32_t addr = registers[rs1] + imm; - write_mem((uint8_t*)®isters[rs2], addr, 4); + write_mem_u32((uint32_t*)®isters[rs2], addr); } else { throw std::runtime_error("Unknown store instruction"); } diff --git a/src/vm.hpp b/src/vm.hpp index 34e96b4..0e5f0b8 100644 --- a/src/vm.hpp +++ b/src/vm.hpp @@ -23,8 +23,43 @@ class Device { virtual ~Device() = default; - virtual void write_mem(uint8_t *src, size_t addr, size_t size); - virtual void read_mem(uint8_t *dst, size_t addr, size_t size); + template + inline void write_mem(T *src, size_t addr) { + if (addr < base_addr || addr + sizeof(T) > base_addr + mem_size) { + throw std::runtime_error("Memory access out of bounds"); + } + addr -= base_addr; + *(T *)&mem[addr] = *src; + } + + template + inline void read_mem(T *dst, size_t addr) { + if (addr < base_addr || addr + sizeof(T) > base_addr + mem_size) { + throw std::runtime_error("Memory access out of bounds"); + } + addr -= base_addr; + *dst = *(T *)&mem[addr]; + } + + virtual void write_mem_u8(uint8_t *src, size_t addr) { + write_mem(src, addr); + } + virtual void write_mem_u16(uint16_t *src, size_t addr) { + write_mem(src, addr); + } + virtual void write_mem_u32(uint32_t *src, size_t addr) { + write_mem(src, addr); + } + + virtual void read_mem_u8(uint8_t *dst, size_t addr) { + read_mem(dst, addr); + } + virtual void read_mem_u16(uint16_t *dst, size_t addr) { + read_mem(dst, addr); + } + virtual void read_mem_u32(uint32_t *dst, size_t addr) { + read_mem(dst, addr); + } uint32_t base() { return base_addr; } uint32_t size() { return mem_size; } @@ -39,8 +74,9 @@ class UART final : public Device { public: UART() : Device(UART_ADDR, 8) {} - virtual void write_mem(uint8_t *src, size_t addr, size_t size); - virtual void read_mem(uint8_t *dst, size_t addr, size_t size); + virtual void write_mem_u8(uint8_t *src, size_t addr); + + virtual void read_mem_u8(uint8_t *dst, size_t addr); private: enum Registers { @@ -61,14 +97,19 @@ class RAM final : public Device { class Timer final : public Device { public: Timer() : Device(MTIME_ADDR, 8) {} - virtual void read_mem(uint8_t *dst, size_t addr, size_t size); - void set(uint64_t value) { write_mem((uint8_t *)&value, MTIME_ADDR, 8); } + virtual void read_mem_u8(uint8_t *dst, size_t addr); + virtual void read_mem_u16(uint16_t *dst, size_t addr); + virtual void read_mem_u32(uint32_t *dst, size_t addr); + + void update(); }; class Framebuffer final : public Device { public: Framebuffer(); - virtual void write_mem(uint8_t *src, size_t addr, size_t size); + virtual void write_mem_u8(uint8_t *src, size_t addr); + virtual void write_mem_u16(uint16_t *src, size_t addr); + virtual void write_mem_u32(uint32_t *src, size_t addr); protected: void draw(); @@ -83,8 +124,13 @@ class VM { std::vector read_memory(size_t start, size_t size); - void read_mem(uint8_t *dst, size_t addr, size_t size); - void write_mem(uint8_t *src, size_t addr, size_t size); + void read_mem_u8(uint8_t *dst, size_t addr); + void read_mem_u16(uint16_t *dst, size_t addr); + void read_mem_u32(uint32_t *dst, size_t addr); + + void write_mem_u8(uint8_t *src, size_t addr); + void write_mem_u16(uint16_t *src, size_t addr); + void write_mem_u32(uint32_t *src, size_t addr); const std::string &get_file_path();