Speed up framebuffer operation to reach about 200fps fill rate

This commit is contained in:
Konstantin Nazarov 2024-12-19 19:07:39 +00:00
parent 44b45c58c6
commit 65e5011f7f
Signed by: knazarov
GPG key ID: 4CFE0A42FA409C22
7 changed files with 191 additions and 78 deletions

View file

@ -6,7 +6,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS true)
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
set (CMAKE_CXX_FLAGS "-static-libgcc -static-libstdc++ -Werror -Wall -Wunused-result -Wno-unused-function -Wno-unused-variable -fno-omit-frame-pointer -fsanitize=address -Wno-c99-designator")
set (CMAKE_CXX_FLAGS "-static-libgcc -static-libstdc++ -Werror -Wall -Wunused-result -Wno-unused-function -Wno-unused-variable -fno-omit-frame-pointer -Wno-c99-designator")
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
endif()
find_package(SDL2 REQUIRED)

View file

@ -35,11 +35,13 @@ int fib(int n) {
}
void draw() {
for (int i = 0; i< 640; i++) {
for (int j = 0; j < 480; j++) {
uint32_t color = i*j;
uint32_t* addr = (uint32_t*)(FRAMEBUFFER_BASE + ((j*640) + i) *4);
*addr = color;
for (int k = 0; k < 200; k++) {
for (int i = 0; i< 640; i++) {
for (int j = 0; j < 480; j++) {
uint32_t color = i*(j + k);
uint32_t* addr = (uint32_t*)(FRAMEBUFFER_BASE + ((j*640) + i) *4);
*addr = color;
}
}
}
}

View file

@ -32,7 +32,13 @@ pkgs.gcc13Stdenv.mkDerivation rec {
hardeningDisable = [ "all" ];
cmakeFlags = [
"-DCMAKE_EXPORT_COMPILE_COMMANDS=TRUE"
"-DCMAKE_BUILD_TYPE=Debug"
#"-DCMAKE_BUILD_TYPE=Debug"
"-DCMAKE_BUILD_TYPE=RelWithDebInfo"
# For profiling
# "-DCMAKE_CXX_FLAGS=-pg"
# "-DCMAKE_EXE_LINKER_FLAGS=-pg"
# "-DCMAKE_SHARED_LINKER_FLAGS=-pg"
];
shellHook = ''
export CMAKE_BUILD_TYPE=Debug

View file

@ -229,10 +229,10 @@ void GDBStub::handle_packet(const std::string &packet) {
if (breakpoints.count(addr) == 0) {
uint32_t original_instr;
vm.read_mem((uint8_t *)&original_instr, addr, 4);
vm.read_mem_u32(&original_instr, addr);
breakpoints[addr] = original_instr;
uint32_t debug_instr = 0x00100073; // 0x00100073 is EBREAK
vm.write_mem((uint8_t *)&debug_instr, addr, 4);
vm.write_mem_u32(&debug_instr, addr);
}
}
send_packet("OK");
@ -246,7 +246,7 @@ void GDBStub::handle_packet(const std::string &packet) {
if (breakpoints.count(addr) > 0) {
// Restore the original instruction
vm.write_mem((uint8_t *)&breakpoints[addr], addr, 4);
vm.write_mem_u32(&breakpoints[addr], addr);
breakpoints.erase(addr);
}
}

View file

@ -51,10 +51,28 @@ void Framebuffer::draw() {
SDL_RenderPresent(renderer);
}
void Framebuffer::write_mem(uint8_t* src, size_t addr, size_t size) {
Device::write_mem(src, addr, size);
void Framebuffer::write_mem_u8(uint8_t* src, size_t addr) {
Device::write_mem_u8(src, addr);
if (addr + size == base() + (640 * 480) * 4) {
if (addr + 1 == base() + (640 * 480) * 4) {
// draw to the screen when the last byte is written
draw();
}
}
void Framebuffer::write_mem_u16(uint16_t* src, size_t addr) {
Device::write_mem_u16(src, addr);
if (addr + 2 == base() + (640 * 480) * 4) {
// draw to the screen when the last byte is written
draw();
}
}
void Framebuffer::write_mem_u32(uint32_t* src, size_t addr) {
Device::write_mem_u32(src, addr);
if (addr + 4 == base() + (640 * 480) * 4) {
// draw to the screen when the last byte is written
draw();
}

View file

@ -30,76 +30,114 @@ void VM::setreg(size_t regnum, uint32_t value) {
registers[regnum] = value;
}
void Device::write_mem(uint8_t* src, size_t addr, size_t size) {
if (addr < base_addr || addr + size > base_addr + mem_size) {
throw std::runtime_error("Memory access out of bounds");
}
addr -= base_addr;
std::memcpy(&mem[addr], src, size);
}
void Device::read_mem(uint8_t* dst, size_t addr, size_t size) {
if (addr < base_addr || addr + size > base_addr + mem_size) {
throw std::runtime_error("Memory access out of bounds");
}
addr -= base_addr;
std::memcpy(dst, &mem[addr], size);
}
void UART::read_mem(uint8_t* dst, size_t addr, size_t size) {
if (addr < UART_ADDR || addr + size > UART_ADDR + 8) {
void UART::read_mem_u8(uint8_t* dst, size_t addr) {
if (addr < UART_ADDR || addr + 1 > UART_ADDR + 8) {
throw std::runtime_error("Memory access out of bounds");
}
addr -= UART_ADDR;
for (size_t i = 0; i < size; i++) {
switch (addr + i) {
case UART_LSR:
// Always ready to transmit
dst[i] = LSR_TRANSMITTER_EMPTY;
default:
dst[i] = 0;
}
switch (addr) {
case UART_LSR:
// Always ready to transmit
*dst = LSR_TRANSMITTER_EMPTY;
default:
*dst = 0;
}
}
void UART::write_mem(uint8_t* src, size_t addr, size_t size) {
if (addr < UART_ADDR || addr + size > UART_ADDR + 8) {
void UART::write_mem_u8(uint8_t* src, size_t addr) {
if (addr < UART_ADDR || addr + 1 > UART_ADDR + 8) {
throw std::runtime_error("Memory access out of bounds");
}
addr -= UART_ADDR;
for (size_t i = 0; i < size; i++) {
switch (addr + i) {
case UART_THR:
std::cout.put(static_cast<char>(*src));
break;
}
switch (addr) {
case UART_THR:
std::cout.put(static_cast<char>(*src));
break;
}
}
void Timer::read_mem(uint8_t* dst, size_t addr, size_t size) {
void Timer::update() {
using Clock = std::chrono::high_resolution_clock;
constexpr auto den = Clock::period::den;
constexpr auto num = Clock::period::num;
// Let's assume that clock rate is 1MHz for now
set(Clock::now().time_since_epoch().count() / (den / (1000000 * num)));
Device::read_mem(dst, addr, size);
uint64_t value =
Clock::now().time_since_epoch().count() / (den / (1000000 * num));
write_mem_u32((uint32_t*)&value, MTIME_ADDR);
write_mem_u32(((uint32_t*)&value) + 1, MTIME_ADDR + 4);
}
void VM::read_mem(uint8_t* dst, size_t addr, size_t size) {
void Timer::read_mem_u8(uint8_t* dst, size_t addr) {
update();
Device::read_mem_u8(dst, addr);
}
void Timer::read_mem_u16(uint16_t* dst, size_t addr) {
update();
Device::read_mem_u16(dst, addr);
}
void Timer::read_mem_u32(uint32_t* dst, size_t addr) {
update();
Device::read_mem_u32(dst, addr);
}
void VM::read_mem_u8(uint8_t* dst, size_t addr) {
for (Device* dev : devices) {
if (addr >= dev->base() && addr + size <= dev->base() + dev->size()) {
dev->read_mem(dst, addr, size);
if (addr >= dev->base() && addr + 1 <= dev->base() + dev->size()) {
dev->read_mem_u8(dst, addr);
return;
}
}
throw std::runtime_error("Memory access out of bounds");
}
void VM::write_mem(uint8_t* src, size_t addr, size_t size) {
void VM::read_mem_u16(uint16_t* dst, size_t addr) {
for (Device* dev : devices) {
if (addr >= dev->base() && addr + size <= dev->base() + dev->size()) {
dev->write_mem(src, addr, size);
if (addr >= dev->base() && addr + 2 <= dev->base() + dev->size()) {
dev->read_mem_u16(dst, addr);
return;
}
}
throw std::runtime_error("Memory access out of bounds");
}
void VM::read_mem_u32(uint32_t* dst, size_t addr) {
for (Device* dev : devices) {
if (addr >= dev->base() && addr + 4 <= dev->base() + dev->size()) {
dev->read_mem_u32(dst, addr);
return;
}
}
throw std::runtime_error("Memory access out of bounds");
}
void VM::write_mem_u8(uint8_t* src, size_t addr) {
for (Device* dev : devices) {
if (addr >= dev->base() && addr + 1 <= dev->base() + dev->size()) {
dev->write_mem_u8(src, addr);
return;
}
}
throw std::runtime_error("Memory access out of bounds");
}
void VM::write_mem_u16(uint16_t* src, size_t addr) {
for (Device* dev : devices) {
if (addr >= dev->base() && addr + 2 <= dev->base() + dev->size()) {
dev->write_mem_u16(src, addr);
return;
}
}
throw std::runtime_error("Memory access out of bounds");
}
void VM::write_mem_u32(uint32_t* src, size_t addr) {
for (Device* dev : devices) {
if (addr >= dev->base() && addr + 4 <= dev->base() + dev->size()) {
dev->write_mem_u32(src, addr);
return;
}
}
@ -114,13 +152,13 @@ std::vector<uint8_t> VM::read_memory(size_t start, size_t size) {
while (i < end) {
if (i % 4 == 0 && end - i >= 4) {
read_mem(&res[i - start], i, 4);
read_mem_u32((uint32_t*)&res[i - start], i);
i += 4;
} else if (i % 2 == 0 && end - i >= 2) {
read_mem(&res[i - start], i, 2);
read_mem_u16((uint16_t*)&res[i - start], i);
i += 2;
} else {
read_mem(&res[i - start], i, 1);
read_mem_u8((uint8_t*)&res[i - start], i);
i += 1;
}
}
@ -141,9 +179,8 @@ uint32_t VM::getreg(size_t regnum) {
const std::string& VM::get_file_path() { return file_path; }
void VM::step() {
if (pc < PROGRAM_ADDR) throw std::runtime_error("PC out of range");
uint32_t instr;
read_mem((uint8_t*)&instr, pc, 4);
ram.read_mem_u32(&instr, pc);
// std::cout << "pc: " << std::hex << pc << std::dec << "\n";
// std::cout << "instr: " << std::hex << instr << "\n";
pc += 4;
@ -339,27 +376,27 @@ void VM::step() {
if (funct3 == 0x00) { // LB
uint32_t addr = registers[rs1] + imm;
uint8_t val;
read_mem(&val, addr, 1);
read_mem_u8(&val, addr);
setreg(rd, sign_extend(val, 8));
} else if (funct3 == 0x01) { // LH
uint32_t addr = registers[rs1] + imm;
uint16_t val;
read_mem((uint8_t*)&val, addr, 2);
read_mem_u16(&val, addr);
setreg(rd, sign_extend(val, 16));
} else if (funct3 == 0x2) { // LW
uint32_t addr = registers[rs1] + imm;
uint32_t val;
read_mem((uint8_t*)&val, addr, 4);
read_mem_u32(&val, addr);
setreg(rd, val);
} else if (funct3 == 0x4) { // LBU
uint32_t addr = registers[rs1] + imm;
uint8_t val;
read_mem(&val, addr, 1);
read_mem_u8(&val, addr);
setreg(rd, val);
} else if (funct3 == 0x5) { // LHU
uint32_t addr = registers[rs1] + imm;
uint16_t val;
read_mem((uint8_t*)&val, addr, 2);
read_mem_u16(&val, addr);
setreg(rd, val);
} else {
throw std::runtime_error("Unknown load instruction");
@ -371,13 +408,13 @@ void VM::step() {
imm = sign_extend(imm, 12); // Sign-extend 12-bit immediate
if (funct3 == 0x0) { // SB
uint32_t addr = registers[rs1] + imm;
write_mem((uint8_t*)&registers[rs2], addr, 1);
write_mem_u8((uint8_t*)&registers[rs2], addr);
} else if (funct3 == 0x1) { // SH
uint32_t addr = registers[rs1] + imm;
write_mem((uint8_t*)&registers[rs2], addr, 2);
write_mem_u16((uint16_t*)&registers[rs2], addr);
} else if (funct3 == 0x2) { // SW
uint32_t addr = registers[rs1] + imm;
write_mem((uint8_t*)&registers[rs2], addr, 4);
write_mem_u32((uint32_t*)&registers[rs2], addr);
} else {
throw std::runtime_error("Unknown store instruction");
}

View file

@ -23,8 +23,43 @@ class Device {
virtual ~Device() = default;
virtual void write_mem(uint8_t *src, size_t addr, size_t size);
virtual void read_mem(uint8_t *dst, size_t addr, size_t size);
template <class T>
inline void write_mem(T *src, size_t addr) {
if (addr < base_addr || addr + sizeof(T) > base_addr + mem_size) {
throw std::runtime_error("Memory access out of bounds");
}
addr -= base_addr;
*(T *)&mem[addr] = *src;
}
template <class T>
inline void read_mem(T *dst, size_t addr) {
if (addr < base_addr || addr + sizeof(T) > base_addr + mem_size) {
throw std::runtime_error("Memory access out of bounds");
}
addr -= base_addr;
*dst = *(T *)&mem[addr];
}
virtual void write_mem_u8(uint8_t *src, size_t addr) {
write_mem<uint8_t>(src, addr);
}
virtual void write_mem_u16(uint16_t *src, size_t addr) {
write_mem<uint16_t>(src, addr);
}
virtual void write_mem_u32(uint32_t *src, size_t addr) {
write_mem<uint32_t>(src, addr);
}
virtual void read_mem_u8(uint8_t *dst, size_t addr) {
read_mem<uint8_t>(dst, addr);
}
virtual void read_mem_u16(uint16_t *dst, size_t addr) {
read_mem<uint16_t>(dst, addr);
}
virtual void read_mem_u32(uint32_t *dst, size_t addr) {
read_mem<uint32_t>(dst, addr);
}
uint32_t base() { return base_addr; }
uint32_t size() { return mem_size; }
@ -39,8 +74,9 @@ class UART final : public Device {
public:
UART() : Device(UART_ADDR, 8) {}
virtual void write_mem(uint8_t *src, size_t addr, size_t size);
virtual void read_mem(uint8_t *dst, size_t addr, size_t size);
virtual void write_mem_u8(uint8_t *src, size_t addr);
virtual void read_mem_u8(uint8_t *dst, size_t addr);
private:
enum Registers {
@ -61,14 +97,19 @@ class RAM final : public Device {
class Timer final : public Device {
public:
Timer() : Device(MTIME_ADDR, 8) {}
virtual void read_mem(uint8_t *dst, size_t addr, size_t size);
void set(uint64_t value) { write_mem((uint8_t *)&value, MTIME_ADDR, 8); }
virtual void read_mem_u8(uint8_t *dst, size_t addr);
virtual void read_mem_u16(uint16_t *dst, size_t addr);
virtual void read_mem_u32(uint32_t *dst, size_t addr);
void update();
};
class Framebuffer final : public Device {
public:
Framebuffer();
virtual void write_mem(uint8_t *src, size_t addr, size_t size);
virtual void write_mem_u8(uint8_t *src, size_t addr);
virtual void write_mem_u16(uint16_t *src, size_t addr);
virtual void write_mem_u32(uint32_t *src, size_t addr);
protected:
void draw();
@ -83,8 +124,13 @@ class VM {
std::vector<uint8_t> read_memory(size_t start, size_t size);
void read_mem(uint8_t *dst, size_t addr, size_t size);
void write_mem(uint8_t *src, size_t addr, size_t size);
void read_mem_u8(uint8_t *dst, size_t addr);
void read_mem_u16(uint16_t *dst, size_t addr);
void read_mem_u32(uint32_t *dst, size_t addr);
void write_mem_u8(uint8_t *src, size_t addr);
void write_mem_u16(uint16_t *src, size_t addr);
void write_mem_u32(uint32_t *src, size_t addr);
const std::string &get_file_path();