Skip to content

Commit

Permalink
Merge branch 'master' into libretro
Browse files Browse the repository at this point in the history
  • Loading branch information
notaz committed Jan 6, 2025
2 parents 3fc26d1 + db2804f commit 60e75db
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 109 deletions.
17 changes: 17 additions & 0 deletions plugins/gpu_neon/psx_gpu/psx_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,11 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
render_block_handler_struct *render_block_handler =
psx_gpu->render_block_handler;

#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
// the asm doesn't bother to save callee-save vector regs, so do it here
__asm__ __volatile__("":::"q4","q5","q6","q7");
#endif

render_block_handler->texture_blocks(psx_gpu);
render_block_handler->shade_blocks(psx_gpu);
render_block_handler->blend_blocks(psx_gpu);
Expand All @@ -538,6 +543,9 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
#endif

psx_gpu->num_blocks = 0;
#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
__asm__ __volatile__("":::"q4","q5","q6","q7");
#endif
}
}

Expand Down Expand Up @@ -3037,6 +3045,11 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu,
triangle_set_direction(y_direction_b, y_delta_b);
triangle_set_direction(y_direction_c, y_delta_c);

#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
// the asm doesn't bother to save callee-save vector regs, so do it here
__asm__ __volatile__("vstmia %0, {q4-q7}" :: "r"(psx_gpu->saved_q4_q7) : "memory");
#endif

compute_all_gradients(psx_gpu, a, b, c);

switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) |
Expand Down Expand Up @@ -3163,6 +3176,10 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu,
&(render_triangle_block_handlers[render_state]);
((setup_blocks_function_type *)psx_gpu->render_block_handler->setup_blocks)
(psx_gpu);

#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
__asm__ __volatile__("vldmia %0, {q4-q7}" :: "r"(psx_gpu->saved_q4_q7));
#endif
}

void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
Expand Down
6 changes: 5 additions & 1 deletion plugins/gpu_neon/psx_gpu/psx_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,11 @@ typedef struct

// Align up to 64 byte boundary to keep the upcoming buffers cache line
// aligned, also make reachable with single immediate addition
u8 reserved_a[180 + 9*4 - 9*sizeof(void *)];
u8 reserved_a[68 + 9*4 - 9*sizeof(void *)];

// space for saving regs on c call to flush_render_block_buffer() and asm
u32 saved_tmp[48 / sizeof(u32)];
u32 saved_q4_q7[64 / sizeof(u32)];

// 8KB
block_struct blocks[MAX_BLOCKS_PER_ROW];
Expand Down
Loading

0 comments on commit 60e75db

Please sign in to comment.