Skip to content

Commit

Permalink
gpu: rework dma vs busy timing
Browse files Browse the repository at this point in the history
maybe should implement actual fifo instead someday
libretro#809
  • Loading branch information
notaz committed Dec 13, 2023
1 parent 42dde52 commit 8412166
Show file tree
Hide file tree
Showing 13 changed files with 178 additions and 142 deletions.
2 changes: 1 addition & 1 deletion frontend/plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ extern void GPUwriteDataMem(uint32_t *, int);
extern uint32_t GPUreadStatus(void);
extern uint32_t GPUreadData(void);
extern void GPUreadDataMem(uint32_t *, int);
extern long GPUdmaChain(uint32_t *, uint32_t, uint32_t *);
extern long GPUdmaChain(uint32_t *, uint32_t, uint32_t *, int32_t *);
extern void GPUupdateLace(void);
extern long GPUfreeze(uint32_t, void *);
extern void GPUvBlank(int, int);
Expand Down
2 changes: 1 addition & 1 deletion libpcsxcore/plugins.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ typedef void (CALLBACK* GPUwriteDataMem)(uint32_t *, int);
typedef uint32_t (CALLBACK* GPUreadStatus)(void);
typedef uint32_t (CALLBACK* GPUreadData)(void);
typedef void (CALLBACK* GPUreadDataMem)(uint32_t *, int);
typedef long (CALLBACK* GPUdmaChain)(uint32_t *,uint32_t, uint32_t *);
typedef long (CALLBACK* GPUdmaChain)(uint32_t *, uint32_t, uint32_t *, int32_t *);
typedef void (CALLBACK* GPUupdateLace)(void);
typedef void (CALLBACK* GPUmakeSnapshot)(void);
typedef void (CALLBACK* GPUkeypressed)(int);
Expand Down
33 changes: 21 additions & 12 deletions libpcsxcore/psxdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU
DMA_INTERRUPT(4);
}

#if 0
// Taken from PEOPS SOFTGPU
static inline boolean CheckForEndlessLoop(u32 laddr, u32 *lUsedAddr) {
if (laddr == lUsedAddr[1]) return TRUE;
Expand Down Expand Up @@ -130,11 +131,12 @@ static u32 gpuDmaChainSize(u32 addr) {

return size;
}
#endif

void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU
u32 *ptr, madr_next, *madr_next_p, size;
u32 *ptr, madr_next, *madr_next_p;
u32 words, words_left, words_max, words_copy;
int do_walking;
int cycles_sum, cycles_last_cmd = 0, do_walking;

madr &= ~3;
switch (chcr) {
Expand Down Expand Up @@ -195,18 +197,19 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU
do_walking = Config.hacks.gpu_slow_list_walking;
madr_next_p = do_walking ? &madr_next : NULL;

size = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, madr_next_p);
if ((int)size <= 0)
size = gpuDmaChainSize(madr);
cycles_sum = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff,
madr_next_p, &cycles_last_cmd);

HW_DMA2_MADR = SWAPu32(madr_next);

// a hack for Judge Dredd which is annoyingly sensitive to timing
if (Config.hacks.gpu_timing1024)
size = 1024;
cycles_sum = 1024;

psxRegs.gpuIdleAfter = psxRegs.cycle + size + 16;
set_event(PSXINT_GPUDMA, size);
psxRegs.gpuIdleAfter = psxRegs.cycle + cycles_sum + cycles_last_cmd;
set_event(PSXINT_GPUDMA, cycles_sum);
//printf("%u dma2cf: %d,%d %08x\n", psxRegs.cycle, cycles_sum,
// cycles_last_cmd, HW_DMA2_MADR);
return;

default:
Expand All @@ -221,11 +224,17 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU
void gpuInterrupt() {
if (HW_DMA2_CHCR == SWAP32(0x01000401) && !(HW_DMA2_MADR & SWAP32(0x800000)))
{
u32 size, madr_next = 0xffffff, madr = SWAPu32(HW_DMA2_MADR);
size = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, &madr_next);
u32 madr_next = 0xffffff, madr = SWAPu32(HW_DMA2_MADR);
int cycles_sum, cycles_last_cmd = 0;
cycles_sum = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff,
&madr_next, &cycles_last_cmd);
HW_DMA2_MADR = SWAPu32(madr_next);
psxRegs.gpuIdleAfter = psxRegs.cycle + size + 64;
set_event(PSXINT_GPUDMA, size);
if ((s32)(psxRegs.gpuIdleAfter - psxRegs.cycle) > 0)
cycles_sum += psxRegs.gpuIdleAfter - psxRegs.cycle;
psxRegs.gpuIdleAfter = psxRegs.cycle + cycles_sum + cycles_last_cmd;
set_event(PSXINT_GPUDMA, cycles_sum);
//printf("%u dma2cn: %d,%d %08x\n", psxRegs.cycle, cycles_sum,
// cycles_last_cmd, HW_DMA2_MADR);
return;
}
if (HW_DMA2_CHCR & SWAP32(0x01000000))
Expand Down
54 changes: 28 additions & 26 deletions plugins/dfxvideo/gpulib_if.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,12 +306,13 @@ void renderer_notify_scanout_change(int x, int y)
#include "../gpulib/gpu_timing.h"
extern const unsigned char cmd_lengths[256];

int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd)
int do_cmd_list(uint32_t *list, int list_len,
int *cycles_sum_out, int *cycles_last, int *last_cmd)
{
int cpu_cycles_sum = 0, cpu_cycles = *cycles_last;
unsigned int cmd = 0, len;
uint32_t *list_start = list;
uint32_t *list_end = list + list_len;
u32 cpu_cycles = 0;

for (; list < list_end; list += 1 + len)
{
Expand Down Expand Up @@ -341,7 +342,7 @@ int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd

while(1)
{
cpu_cycles += gput_line(0);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));

if(list_position >= list_end) {
cmd = -1;
Expand All @@ -366,7 +367,7 @@ int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd

while(1)
{
cpu_cycles += gput_line(0);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));

if(list_position >= list_end) {
cmd = -1;
Expand All @@ -387,8 +388,8 @@ int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd
#ifdef TEST
case 0xA0: // sys -> vid
{
u32 load_width = LE2HOST32(slist[4]);
u32 load_height = LE2HOST32(slist[5]);
u32 load_width = LE2HOST16(slist[4]);
u32 load_height = LE2HOST16(slist[5]);
u32 load_size = load_width * load_height;

len += load_size / 2;
Expand All @@ -398,40 +399,41 @@ int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd

// timing
case 0x02:
cpu_cycles += gput_fill(LE2HOST32(slist[4]) & 0x3ff,
LE2HOST32(slist[5]) & 0x1ff);
gput_sum(cpu_cycles_sum, cpu_cycles,
gput_fill(LE2HOST16(slist[4]) & 0x3ff, LE2HOST16(slist[5]) & 0x1ff));
break;
case 0x20 ... 0x23: cpu_cycles += gput_poly_base(); break;
case 0x24 ... 0x27: cpu_cycles += gput_poly_base_t(); break;
case 0x28 ... 0x2B: cpu_cycles += gput_quad_base(); break;
case 0x2C ... 0x2F: cpu_cycles += gput_quad_base_t(); break;
case 0x30 ... 0x33: cpu_cycles += gput_poly_base_g(); break;
case 0x34 ... 0x37: cpu_cycles += gput_poly_base_gt(); break;
case 0x38 ... 0x3B: cpu_cycles += gput_quad_base_g(); break;
case 0x3C ... 0x3F: cpu_cycles += gput_quad_base_gt(); break;
case 0x40 ... 0x47: cpu_cycles += gput_line(0); break;
case 0x50 ... 0x57: cpu_cycles += gput_line(0); break;
case 0x20 ... 0x23: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); break;
case 0x24 ... 0x27: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); break;
case 0x28 ... 0x2B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); break;
case 0x2C ... 0x2F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); break;
case 0x30 ... 0x33: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); break;
case 0x34 ... 0x37: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); break;
case 0x38 ... 0x3B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); break;
case 0x3C ... 0x3F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); break;
case 0x40 ... 0x47: gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break;
case 0x50 ... 0x57: gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break;
case 0x60 ... 0x63:
cpu_cycles += gput_sprite(LE2HOST32(slist[4]) & 0x3ff,
LE2HOST32(slist[5]) & 0x1ff);
gput_sum(cpu_cycles_sum, cpu_cycles,
gput_sprite(LE2HOST16(slist[4]) & 0x3ff, LE2HOST16(slist[5]) & 0x1ff));
break;
case 0x64 ... 0x67:
cpu_cycles += gput_sprite(LE2HOST32(slist[6]) & 0x3ff,
LE2HOST32(slist[7]) & 0x1ff);
gput_sum(cpu_cycles_sum, cpu_cycles,
gput_sprite(LE2HOST16(slist[6]) & 0x3ff, LE2HOST16(slist[7]) & 0x1ff));
break;
case 0x68 ... 0x6B: cpu_cycles += gput_sprite(1, 1); break;
case 0x68 ... 0x6B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); break;
case 0x70 ... 0x73:
case 0x74 ... 0x77: cpu_cycles += gput_sprite(8, 8); break;
case 0x74 ... 0x77: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(8, 8)); break;
case 0x78 ... 0x7B:
case 0x7C ... 0x7F: cpu_cycles += gput_sprite(16, 16); break;
case 0x7C ... 0x7F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(16, 16)); break;
}
}

breakloop:
gpu.ex_regs[1] &= ~0x1ff;
gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff;

*cpu_cycles_out += cpu_cycles;
*cycles_sum_out += cpu_cycles_sum;
*cycles_last = cpu_cycles;
*last_cmd = cmd;
return list - list_start;
}
Expand Down
3 changes: 2 additions & 1 deletion plugins/gpu-gles/gpulib_if.c
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,8 @@ void renderer_notify_scanout_change(int x, int y)
extern const unsigned char cmd_lengths[256];

// XXX: mostly dupe code from soft peops
int do_cmd_list(unsigned int *list, int list_len, int *cycles, int *last_cmd)
int do_cmd_list(uint32_t *list, int list_len,
int *cycles_sum_out, int *cycles_last, int *last_cmd)
{
unsigned int cmd, len;
unsigned int *list_start = list;
Expand Down
2 changes: 1 addition & 1 deletion plugins/gpu_neon/psx_gpu/psx_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu);

void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram);
u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
s32 *cpu_cycles, u32 *last_command);
s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command);

void triangle_benchmark(psx_gpu_struct *psx_gpu);

Expand Down
Loading

0 comments on commit 8412166

Please sign in to comment.