linuxOS_AP05/buildroot/package/retroarch/libretro-bsnes/0001-performance-async-render-scanline.patch
2025-06-02 13:59:07 +08:00

588 lines
15 KiB
Diff

From ec91c68690014125ce11c3ea3ba5388f50329831 Mon Sep 17 00:00:00 2001
From: Jeffy Chen <jeffy.chen@rock-chips.com>
Date: Tue, 14 May 2019 19:56:32 +0800
Subject: [PATCH] sfc: performance: Async render_scanline
---
Makefile | 2 +-
sfc/alt/ppu-performance/mmio/mmio.cpp | 113 ++++++++++++++++++----------------
sfc/alt/ppu-performance/ppu.cpp | 41 +++++++++++-
sfc/alt/ppu-performance/ppu.hpp | 8 +++
target-libretro/Makefile | 2 +-
5 files changed, 111 insertions(+), 55 deletions(-)
diff --git a/Makefile b/Makefile
index e208c156..10737acf 100644
--- a/Makefile
+++ b/Makefile
@@ -32,7 +32,7 @@ sfc_lagfix := 1
ifeq ($(DEBUG), 1)
flags := -I. -Ilibco -O0 -g
else
- flags := -I. -Ilibco -O3 -fomit-frame-pointer
+ flags := -I. -Ilibco -Ofast -ffast-math -fomit-frame-pointer
endif
cflags := -std=gnu99 -xc
diff --git a/sfc/alt/ppu-performance/mmio/mmio.cpp b/sfc/alt/ppu-performance/mmio/mmio.cpp
index 46244e23..28748c6b 100644
--- a/sfc/alt/ppu-performance/mmio/mmio.cpp
+++ b/sfc/alt/ppu-performance/mmio/mmio.cpp
@@ -281,12 +281,14 @@ uint8 PPU::mmio_read(unsigned addr) {
void PPU::mmio_write(unsigned addr, uint8 data) {
cpu.synchronize_ppu();
+ pthread_mutex_lock(&render_mutex);
+
switch(addr & 0xffff) {
case 0x2100: { //INIDISP
if(regs.display_disable && cpu.vcounter() == display.height) sprite.address_reset();
regs.display_disable = data & 0x80;
regs.display_brightness = data & 0x0f;
- return;
+ break;
}
case 0x2101: { //OBSEL
@@ -294,20 +296,20 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
sprite.regs.nameselect = (data >> 3) & 3;
sprite.regs.tiledata_addr = (data & 3) << 14;
sprite.list_valid = false;
- return;
+ break;
}
case 0x2102: { //OAMADDL
regs.oam_baseaddr = (regs.oam_baseaddr & 0x0100) | (data << 0);
sprite.address_reset();
- return;
+ break;
}
case 0x2103: { //OAMADDH
regs.oam_priority = data & 0x80;
regs.oam_baseaddr = ((data & 1) << 8) | (regs.oam_baseaddr & 0x00ff);
sprite.address_reset();
- return;
+ break;
}
case 0x2104: { //OAMDATA
@@ -320,7 +322,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
}
regs.oam_addr = (regs.oam_addr + 1) & 0x03ff;
sprite.set_first();
- return;
+ break;
}
case 0x2105: { //BGMODE
@@ -331,7 +333,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
regs.bg3_priority = data & 0x08;
regs.bgmode = data & 0x07;
mmio_update_video_mode();
- return;
+ break;
}
case 0x2106: { //MOSAIC
@@ -340,43 +342,43 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
bg3.regs.mosaic = (data & 0x04 ? mosaic_size : 0);
bg2.regs.mosaic = (data & 0x02 ? mosaic_size : 0);
bg1.regs.mosaic = (data & 0x01 ? mosaic_size : 0);
- return;
+ break;
}
case 0x2107: { //BG1SC
bg1.regs.screen_addr = (data & 0x7c) << 9;
bg1.regs.screen_size = data & 3;
- return;
+ break;
}
case 0x2108: { //BG2SC
bg2.regs.screen_addr = (data & 0x7c) << 9;
bg2.regs.screen_size = data & 3;
- return;
+ break;
}
case 0x2109: { //BG3SC
bg3.regs.screen_addr = (data & 0x7c) << 9;
bg3.regs.screen_size = data & 3;
- return;
+ break;
}
case 0x210a: { //BG4SC
bg4.regs.screen_addr = (data & 0x7c) << 9;
bg4.regs.screen_size = data & 3;
- return;
+ break;
}
case 0x210b: { //BG12NBA
bg1.regs.tiledata_addr = (data & 0x07) << 13;
bg2.regs.tiledata_addr = (data & 0x70) << 9;
- return;
+ break;
}
case 0x210c: { //BG34NBA
bg3.regs.tiledata_addr = (data & 0x07) << 13;
bg4.regs.tiledata_addr = (data & 0x70) << 9;
- return;
+ break;
}
case 0x210d: { //BG1HOFS
@@ -385,7 +387,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
bg1.regs.hoffset = (data << 8) | (regs.bgofs_latchdata & ~7) | ((bg1.regs.hoffset >> 8) & 7);
regs.bgofs_latchdata = data;
- return;
+ break;
}
case 0x210e: { //BG1VOFS
@@ -394,43 +396,43 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
bg1.regs.voffset = (data << 8) | regs.bgofs_latchdata;
regs.bgofs_latchdata = data;
- return;
+ break;
}
case 0x210f: { //BG2HOFS
bg2.regs.hoffset = (data << 8) | (regs.bgofs_latchdata & ~7) | ((bg2.regs.hoffset >> 8) & 7);
regs.bgofs_latchdata = data;
- return;
+ break;
}
case 0x2110: { //BG2VOFS
bg2.regs.voffset = (data << 8) | regs.bgofs_latchdata;
regs.bgofs_latchdata = data;
- return;
+ break;
}
case 0x2111: { //BG3HOFS
bg3.regs.hoffset = (data << 8) | (regs.bgofs_latchdata & ~7) | ((bg3.regs.hoffset >> 8) & 7);
regs.bgofs_latchdata = data;
- return;
+ break;
}
case 0x2112: { //BG3VOFS
bg3.regs.voffset = (data << 8) | regs.bgofs_latchdata;
regs.bgofs_latchdata = data;
- return;
+ break;
}
case 0x2113: { //BG4HOFS
bg4.regs.hoffset = (data << 8) | (regs.bgofs_latchdata & ~7) | ((bg4.regs.hoffset >> 8) & 7);
regs.bgofs_latchdata = data;
- return;
+ break;
}
case 0x2114: { //BG4VOFS
bg4.regs.voffset = (data << 8) | regs.bgofs_latchdata;
regs.bgofs_latchdata = data;
- return;
+ break;
}
case 0x2115: { //VMAIN
@@ -442,7 +444,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
case 2: regs.vram_incsize = 128; break;
case 3: regs.vram_incsize = 128; break;
}
- return;
+ break;
}
case 0x2116: { //VMADDL
@@ -450,7 +452,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
uint16 addr = get_vram_addr();
regs.vram_readbuffer = vram_read(addr + 0) << 0;
regs.vram_readbuffer |= vram_read(addr + 1) << 8;
- return;
+ break;
}
case 0x2117: { //VMADDH
@@ -458,67 +460,67 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
uint16 addr = get_vram_addr();
regs.vram_readbuffer = vram_read(addr + 0) << 0;
regs.vram_readbuffer |= vram_read(addr + 1) << 8;
- return;
+ break;
}
case 0x2118: { //VMDATAL
vram_write(get_vram_addr() + 0, data);
if(regs.vram_incmode == 0) regs.vram_addr += regs.vram_incsize;
- return;
+ break;
}
case 0x2119: { //VMDATAH
vram_write(get_vram_addr() + 1, data);
if(regs.vram_incmode == 1) regs.vram_addr += regs.vram_incsize;
- return;
+ break;
}
case 0x211a: { //M7SEL
regs.mode7_repeat = (data >> 6) & 3;
regs.mode7_vflip = data & 0x02;
regs.mode7_hflip = data & 0x01;
- return;
+ break;
}
case 0x211b: { //M7A
regs.m7a = (data << 8) | regs.mode7_latchdata;
regs.mode7_latchdata = data;
- return;
+ break;
}
case 0x211c: { //M7B
regs.m7b = (data << 8) | regs.mode7_latchdata;
regs.mode7_latchdata = data;
- return;
+ break;
}
case 0x211d: { //M7C
regs.m7c = (data << 8) | regs.mode7_latchdata;
regs.mode7_latchdata = data;
- return;
+ break;
}
case 0x211e: { //M7D
regs.m7d = (data << 8) | regs.mode7_latchdata;
regs.mode7_latchdata = data;
- return;
+ break;
}
case 0x211f: { //M7X
regs.m7x = (data << 8) | regs.mode7_latchdata;
regs.mode7_latchdata = data;
- return;
+ break;
}
case 0x2120: { //M7Y
regs.m7y = (data << 8) | regs.mode7_latchdata;
regs.mode7_latchdata = data;
- return;
+ break;
}
case 0x2121: { //CGADD
regs.cgram_addr = data << 1;
- return;
+ break;
}
case 0x2122: { //CGDATA
@@ -529,7 +531,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
cgram_write((regs.cgram_addr & ~1) + 1, data & 0x7f);
}
regs.cgram_addr = (regs.cgram_addr + 1) & 0x01ff;
- return;
+ break;
}
case 0x2123: { //W12SEL
@@ -541,7 +543,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
bg1.window.two_invert = data & 0x04;
bg1.window.one_enable = data & 0x02;
bg1.window.one_invert = data & 0x01;
- return;
+ break;
}
case 0x2124: { //W34SEL
@@ -553,7 +555,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
bg3.window.two_invert = data & 0x04;
bg3.window.one_enable = data & 0x02;
bg3.window.one_invert = data & 0x01;
- return;
+ break;
}
case 0x2125: { //WOBJSEL
@@ -565,27 +567,27 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
sprite.window.two_invert = data & 0x04;
sprite.window.one_enable = data & 0x02;
sprite.window.one_invert = data & 0x01;
- return;
+ break;
}
case 0x2126: { //WH0
regs.window_one_left = data;
- return;
+ break;
}
case 0x2127: { //WH1
regs.window_one_right = data;
- return;
+ break;
}
case 0x2128: { //WH2
regs.window_two_left = data;
- return;
+ break;
}
case 0x2129: { //WH3
regs.window_two_right = data;
- return;
+ break;
}
case 0x212a: { //WBGLOG
@@ -593,13 +595,13 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
bg3.window.mask = (data >> 4) & 3;
bg2.window.mask = (data >> 2) & 3;
bg1.window.mask = (data >> 0) & 3;
- return;
+ break;
}
case 0x212b: { //WOBJLOG
screen.window.mask = (data >> 2) & 3;
sprite.window.mask = (data >> 0) & 3;
- return;
+ break;
}
case 0x212c: { //TM
@@ -608,7 +610,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
bg3.regs.main_enable = data & 0x04;
bg2.regs.main_enable = data & 0x02;
bg1.regs.main_enable = data & 0x01;
- return;
+ break;
}
case 0x212d: { //TS
@@ -617,7 +619,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
bg3.regs.sub_enable = data & 0x04;
bg2.regs.sub_enable = data & 0x02;
bg1.regs.sub_enable = data & 0x01;
- return;
+ break;
}
case 0x212e: { //TMW
@@ -626,7 +628,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
bg3.window.main_enable = data & 0x04;
bg2.window.main_enable = data & 0x02;
bg1.window.main_enable = data & 0x01;
- return;
+ break;
}
case 0x212f: { //TSW
@@ -635,7 +637,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
bg3.window.sub_enable = data & 0x04;
bg2.window.sub_enable = data & 0x02;
bg1.window.sub_enable = data & 0x01;
- return;
+ break;
}
case 0x2130: { //CGWSEL
@@ -643,7 +645,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
screen.window.sub_mask = (data >> 4) & 3;
screen.regs.addsub_mode = data & 0x02;
screen.regs.direct_color = data & 0x01;
- return;
+ break;
}
case 0x2131: { //CGADDSUB
@@ -656,7 +658,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
screen.regs.color_enable[2] = data & 0x04;
screen.regs.color_enable[1] = data & 0x02;
screen.regs.color_enable[0] = data & 0x01;
- return;
+ break;
}
case 0x2132: { //COLDATA
@@ -664,7 +666,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
if(data & 0x40) screen.regs.color_g = data & 0x1f;
if(data & 0x20) screen.regs.color_r = data & 0x1f;
screen.regs.color = (screen.regs.color_b << 10) | (screen.regs.color_g << 5) | (screen.regs.color_r << 0);
- return;
+ break;
}
case 0x2133: { //SETINI
@@ -675,12 +677,17 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
regs.interlace = data & 0x01;
mmio_update_video_mode();
sprite.list_valid = false;
- return;
+ break;
}
}
+
+ pthread_mutex_unlock(&render_mutex);
}
void PPU::mmio_reset() {
+
+ pthread_mutex_lock(&render_mutex);
+
//internal
regs.ppu1_mdr = 0;
regs.ppu2_mdr = 0;
@@ -886,6 +893,8 @@ void PPU::mmio_reset() {
sprite.regs.range_over = 0;
mmio_update_video_mode();
+
+ pthread_mutex_unlock(&render_mutex);
}
#endif
diff --git a/sfc/alt/ppu-performance/ppu.cpp b/sfc/alt/ppu-performance/ppu.cpp
index b598cc70..c3cee939 100644
--- a/sfc/alt/ppu-performance/ppu.cpp
+++ b/sfc/alt/ppu-performance/ppu.cpp
@@ -27,6 +27,25 @@ void PPU::synchronize_cpu() {
void PPU::Enter() { ppu.enter(); }
+void *PPU::render_thread_fn(void *arg) {
+ PPU *ppu = (SuperFamicom::PPU *)arg;
+
+ while(true) {
+ pthread_mutex_lock(&ppu->render_mutex);
+ while (!ppu->render_busy)
+ pthread_cond_wait(&ppu->render_cond, &ppu->render_mutex);
+ pthread_mutex_unlock(&ppu->render_mutex);
+
+ ppu->render_scanline();
+ ppu->render_busy = 0;
+
+ pthread_mutex_lock(&ppu->render_mutex);
+ pthread_cond_signal(&ppu->render_cond);
+ pthread_mutex_unlock(&ppu->render_mutex);
+ }
+ return NULL;
+}
+
void PPU::enter() {
while(true) {
if(scheduler.sync == Scheduler::SynchronizeMode::All) {
@@ -36,7 +55,15 @@ void PPU::enter() {
scanline();
if(vcounter() < display.height && vcounter()) {
add_clocks(512);
- render_scanline();
+
+ pthread_mutex_lock(&render_mutex);
+ while (render_busy)
+ pthread_cond_wait(&render_cond, &render_mutex);
+
+ render_busy = 1;
+ pthread_cond_signal(&render_cond);
+ pthread_mutex_unlock(&render_mutex);
+
add_clocks(lineclocks() - 512);
} else {
add_clocks(lineclocks());
@@ -52,11 +79,17 @@ void PPU::add_clocks(unsigned clocks) {
void PPU::render_scanline() {
if(display.framecounter) return; //skip this frame?
+
+ pthread_mutex_lock(&render_mutex);
bg1.scanline();
bg2.scanline();
bg3.scanline();
bg4.scanline();
+ pthread_mutex_unlock(&render_mutex);
+
if(regs.display_disable) return screen.render_black();
+
+ pthread_mutex_lock(&render_mutex);
screen.scanline();
bg1.render();
bg2.render();
@@ -64,6 +97,7 @@ void PPU::render_scanline() {
bg4.render();
sprite.render();
screen.render();
+ pthread_mutex_unlock(&render_mutex);
}
void PPU::scanline() {
@@ -141,6 +175,11 @@ screen(*this) {
display.height = 224;
display.frameskip = 0;
display.framecounter = 0;
+
+ render_busy = 0;
+ pthread_cond_init(&render_cond, NULL);
+ pthread_mutex_init(&render_mutex, NULL);
+ pthread_create(&render_thread, NULL, render_thread_fn, this);
}
PPU::~PPU() {
diff --git a/sfc/alt/ppu-performance/ppu.hpp b/sfc/alt/ppu-performance/ppu.hpp
index b3427e29..516209a1 100644
--- a/sfc/alt/ppu-performance/ppu.hpp
+++ b/sfc/alt/ppu-performance/ppu.hpp
@@ -1,3 +1,5 @@
+#include <pthread.h>
+
struct PPU : Thread, public PPUcounter {
uint8 vram[64 * 1024];
uint8 oam[544];
@@ -58,6 +60,12 @@ private:
void add_clocks(unsigned clocks);
void render_scanline();
+ pthread_t render_thread;
+ volatile int render_busy;
+ pthread_cond_t render_cond;
+ pthread_mutex_t render_mutex;
+ static void *render_thread_fn(void *arg);
+
friend class PPU::Cache;
friend class PPU::Background;
friend class PPU::Sprite;
diff --git a/target-libretro/Makefile b/target-libretro/Makefile
index 997cd48b..56e316bf 100644
--- a/target-libretro/Makefile
+++ b/target-libretro/Makefile
@@ -46,7 +46,7 @@ obj/libretro-$(profile).o: $(ui)/libretro.cpp $(ui)/*
#targets
build: $(objects)
ifeq ($(platform),linux)
- $(compiler) -o out/bsnes_$(profile)_libretro.so -shared $(objects) -ldl -Wl,--no-undefined -Wl,--version-script=$(ui)/link.T
+ $(compiler) -o out/bsnes_$(profile)_libretro.so -shared $(objects) -ldl -Wl,--no-undefined -Wl,--version-script=$(ui)/link.T -lpthread
else ifneq (,$(findstring ios,$(platform)))
ifeq ($(platform),ios-arm64)
$(compiler) -o out/bsnes_$(profile)_libretro_ios.dylib -dynamiclib $(objects) -isysroot $(IOSSDK) -arch arm64
--
2.11.0