From 502ea36eb42c455c063ce30a5ee93ddd75f839e6 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Mon, 16 Aug 2021 21:03:52 +0000 Subject: [PATCH 001/597] Fixed Linked List DMA end marker. (#183) Taken from PCSX Redux project. https://github.com/grumpycoders/pcsx-redux/pull/396/commits/a6401da3a4e7b4860b0f7a7f679cf9a93e739caa --- libpcsxcore/psxdma.c | 3 ++- plugins/dfxvideo/gpu.c | 4 ++-- plugins/gpu-gles/gpuPlugin.c | 5 ++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index 03ee56391..cb84fbccf 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -122,7 +122,8 @@ static u32 gpuDmaChainSize(u32 addr) { // next 32-bit pointer addr = psxMu32( addr & ~0x3 ) & 0xffffff; size += 1; - } while (addr != 0xffffff); + } while (!(addr & 0x800000)); // contrary to some documentation, the end-of-linked-list marker is not actually 0xFF'FFFF + // any pointer with bit 23 set will do. return size; } diff --git a/plugins/dfxvideo/gpu.c b/plugins/dfxvideo/gpu.c index 3d20dfa4e..649cb429e 100644 --- a/plugins/dfxvideo/gpu.c +++ b/plugins/dfxvideo/gpu.c @@ -1060,8 +1060,8 @@ long CALLBACK GPUdmaChain(uint32_t * baseAddrL, uint32_t addr) if(count>0) GPUwriteDataMem(&baseAddrL[dmaMem>>2],count); addr = GETLE32(&baseAddrL[addr>>2])&0xffffff; - } - while (addr != 0xffffff); + } while (!(addr & 0x800000)); // contrary to some documentation, the end-of-linked-list marker is not actually 0xFF'FFFF + // any pointer with bit 23 set will do. GPUIsIdle; diff --git a/plugins/gpu-gles/gpuPlugin.c b/plugins/gpu-gles/gpuPlugin.c index 60570ace1..6d3ca14c0 100644 --- a/plugins/gpu-gles/gpuPlugin.c +++ b/plugins/gpu-gles/gpuPlugin.c @@ -2205,9 +2205,8 @@ do if(count>0) GPUwriteDataMem(&baseAddrL[dmaMem>>2],count); addr = baseAddrL[addr>>2]&0xffffff; - } -while (addr != 0xffffff); - + } while (!(addr & 0x800000)); // contrary to some documentation, the end-of-linked-list marker is not actually 0xFF'FFFF + // any pointer with bit 23 set will do. GPUIsIdle; return 0; From b7159e1ade78d9e8aa44b2b065c9a38dd2a2b306 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Mon, 16 Aug 2021 21:18:58 +0000 Subject: [PATCH 002/597] gte: Fix gteH division and sign extension (from PCSX4ALL) (#185) gteH register is u16, not s16. DIVIDE macro/func assumed it was s16 for some reason. Behavior now matches Mednafen. Co-authored-by: senquack --- libpcsxcore/gte.c | 19 ++++++++++++++++++- libpcsxcore/gte_divider.h | 2 +- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/gte.c b/libpcsxcore/gte.c index 77dff1b11..97a4ccd0d 100644 --- a/libpcsxcore/gte.c +++ b/libpcsxcore/gte.c @@ -149,7 +149,13 @@ #define gteBFC (((s32 *)regs->CP2C.r)[23]) #define gteOFX (((s32 *)regs->CP2C.r)[24]) #define gteOFY (((s32 *)regs->CP2C.r)[25]) -#define gteH (regs->CP2C.p[26].sw.l) +// senquack - gteH register is u16, not s16, and used in GTE that way. +// HOWEVER when read back by CPU using CFC2, it will be incorrectly +// sign-extended by bug in original hardware, according to Nocash docs +// GTE section 'Screen Offset and Distance'. The emulator does this +// sign extension when it is loaded to GTE by CTC2. +//#define gteH (psxRegs.CP2C.p[26].sw.l) +#define gteH (psxRegs.CP2C.p[26].w.l) #define gteDQA (regs->CP2C.p[27].sw.l) #define gteDQB (((s32 *)regs->CP2C.r)[28]) #define gteZSF3 (regs->CP2C.p[29].sw.l) @@ -254,7 +260,18 @@ static inline u32 limE_(psxCP2Regs *regs, u32 result) { #define A3U(x) (x) #endif + +//senquack - n param should be unsigned (will be 'gteH' reg which is u16) +#ifdef GTE_USE_NATIVE_DIVIDE +INLINE u32 DIVIDE(u16 n, u16 d) { + if (n < d * 2) { + return ((u32)n << 16) / d; + } + return 0xffffffff; +} +#else #include "gte_divider.h" +#endif // GTE_USE_NATIVE_DIVIDE #ifndef FLAGLESS diff --git a/libpcsxcore/gte_divider.h b/libpcsxcore/gte_divider.h index 0c988266a..99b01eb3e 100644 --- a/libpcsxcore/gte_divider.h +++ b/libpcsxcore/gte_divider.h @@ -15,4 +15,4 @@ * along with this program; if not, see . */ -u32 DIVIDE(s16 n, u16 d); +u32 DIVIDE(u16 n, u16 d); From 1193aee2df43e4d293471ec2cb9e1f17a5c9d394 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 17 Aug 2021 00:26:01 +0300 Subject: [PATCH 003/597] gte_divider: should include it's own header Would have noticed the wrong divider type with this. --- libpcsxcore/gte_divider.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libpcsxcore/gte_divider.c b/libpcsxcore/gte_divider.c index e1d6c6b7f..a4cd0d2a6 100644 --- a/libpcsxcore/gte_divider.c +++ b/libpcsxcore/gte_divider.c @@ -10,6 +10,7 @@ */ #include "gte.h" +#include "gte_divider.h" static const u8 table[] = { From 00f0670c7acba4e561bdfcab076a01c2760838c8 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Mon, 16 Aug 2021 21:37:34 +0000 Subject: [PATCH 004/597] CHD support from libretro's fork. (#188) We are implementing it as a sub-module for maintenance reasons. Co-authored-by: aliaspider --- .gitmodules | 3 + Makefile | 10 +++ frontend/menu.c | 3 + libchdr | 1 + libpcsxcore/cdriso.c | 142 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 159 insertions(+) create mode 160000 libchdr diff --git a/.gitmodules b/.gitmodules index f93599e3f..5f7f360cd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "warm"] path = frontend/warm url = git://notaz.gp2x.de/~notaz/warm.git +[submodule "libchdr"] + path = libchdr + url = https://github.com/rtissera/libchdr.git diff --git a/Makefile b/Makefile index 0a3b1fece..c63fd1f5c 100644 --- a/Makefile +++ b/Makefile @@ -132,6 +132,16 @@ endif # cdrcimg OBJS += plugins/cdrcimg/cdrcimg.o +ifeq "$(CHD_SUPPORT)" "1" +OBJS += libchdr/src/libchdr_bitstream.o +OBJS += libchdr/src/libchdr_cdrom.o +OBJS += libchdr/src/libchdr_chd.o +OBJS += libchdr/src/libchdr_flac.o +OBJS += libchdr/src/libchdr_huffman.o +OBJS += libchdr/deps/lzma-19.00/src/Alloc.o libchdr/deps/lzma-19.00/src/Bra86.o libchdr/deps/lzma-19.00/src/BraIA64.o libchdr/deps/lzma-19.00/src/CpuArch.o libchdr/deps/lzma-19.00/src/Delta.o +OBJS += libchdr/deps/lzma-19.00/src/LzFind.o libchdr/deps/lzma-19.00/src/Lzma86Dec.o libchdr/deps/lzma-19.00/src/LzmaDec.o libchdr/deps/lzma-19.00/src/LzmaEnc.o libchdr/deps/lzma-19.00/src/Sort.o +CFLAGS += -DHAVE_CHD -D_7ZIP_ST -Ilibchdr/include/libchdr -Ilibchdr/include/dr_libs -Ilibchdr/include -Ilibchdr/deps/lzma-19.00/include +endif # dfinput OBJS += plugins/dfinput/main.o plugins/dfinput/pad.o plugins/dfinput/guncon.o diff --git a/frontend/menu.c b/frontend/menu.c index 6d753735b..c806aa9eb 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -707,6 +707,9 @@ static int menu_load_config(int is_game) static const char *filter_exts[] = { "bin", "img", "mdf", "iso", "cue", "z", + #ifdef HAVE_CHD + "chd", + #endif "bz", "znx", "pbp", "cbn", NULL }; diff --git a/libchdr b/libchdr new file mode 160000 index 000000000..15ff8d675 --- /dev/null +++ b/libchdr @@ -0,0 +1 @@ +Subproject commit 15ff8d67554f8651f4c971f4d42176214b96ce7b diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index dca64fadf..f8a4d21aa 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -39,6 +39,10 @@ #include #include +#ifdef HAVE_CHD +#include "chd.h" +#endif + #define OFF_T_MSB ((off_t)1 << (sizeof(off_t) * 8 - 1)) unsigned int cdrIsoMultidiskCount; @@ -92,6 +96,19 @@ static struct { unsigned int sector_in_blk; } *compr_img; +#ifdef HAVE_CHD +typedef struct { + unsigned char (*buffer)[CD_FRAMESIZE_RAW + SUB_FRAMESIZE]; + chd_file* chd; + const chd_header* header; + unsigned int sectors_per_hunk; + unsigned int current_hunk; + unsigned int sector_in_hunk; +} CHD_IMG; + +static CHD_IMG *chd_img; +#endif + int (*cdimg_read_func)(FILE *f, unsigned int base, void *dest, int sector); char* CALLBACK CDR__getDriveLetter(void); @@ -1029,6 +1046,84 @@ static int handlecbin(const char *isofile) { return -1; } +#ifdef HAVE_CHD +static int handlechd(const char *isofile) { + int frame_offset = 0; + int file_offset = 0; + + chd_img = (CHD_IMG *)calloc(1, sizeof(*chd_img)); + if (chd_img == NULL) + goto fail_io; + + if(chd_open(isofile, CHD_OPEN_READ, NULL, &chd_img->chd) != CHDERR_NONE) + goto fail_io; + + chd_img->header = chd_get_header(chd_img->chd); + + chd_img->buffer = (unsigned char (*)[CD_FRAMESIZE_RAW + SUB_FRAMESIZE])malloc(chd_img->header->hunkbytes); + if (chd_img->buffer == NULL) + goto fail_io; + + chd_img->sectors_per_hunk = chd_img->header->hunkbytes / (CD_FRAMESIZE_RAW + SUB_FRAMESIZE); + chd_img->current_hunk = (unsigned int)-1; + + cddaBigEndian = TRUE; + + numtracks = 0; + memset(ti, 0, sizeof(ti)); + + while (1) + { + struct { + char type[64]; + char subtype[32]; + char pgtype[32]; + char pgsub[32]; + uint32_t track; + uint32_t frames; + uint32_t pregap; + uint32_t postgap; + } md = {}; + char meta[256]; + uint32_t meta_size = 0; + + if (chd_get_metadata(chd_img->chd, CDROM_TRACK_METADATA2_TAG, numtracks, meta, sizeof(meta), &meta_size, NULL, NULL) == CHDERR_NONE) + sscanf(meta, CDROM_TRACK_METADATA2_FORMAT, &md.track, md.type, md.subtype, &md.frames, &md.pregap, md.pgtype, md.pgsub, &md.postgap); + else if (chd_get_metadata(chd_img->chd, CDROM_TRACK_METADATA_TAG, numtracks, meta, sizeof(meta), &meta_size, NULL, NULL) == CHDERR_NONE) + sscanf(meta, CDROM_TRACK_METADATA_FORMAT, &md.track, md.type, md.subtype, &md.frames); + else + break; + + if(md.track == 1) + md.pregap = 150; + else + sec2msf(msf2sec(ti[md.track-1].length) + md.pregap, ti[md.track-1].length); + + ti[md.track].type = !strncmp(md.type, "AUDIO", 5) ? CDDA : DATA; + + sec2msf(frame_offset + md.pregap, ti[md.track].start); + sec2msf(md.frames, ti[md.track].length); + + ti[md.track].start_offset = file_offset; + + frame_offset += md.pregap + md.frames + md.postgap; + file_offset += md.frames + md.postgap; + numtracks++; + } + + if (numtracks) + return 0; + +fail_io: + if (chd_img != NULL) { + free(chd_img->buffer); + free(chd_img); + chd_img = NULL; + } + return -1; +} +#endif + // this function tries to get the .sub file of the given .img static int opensubfile(const char *isoname) { char subname[MAXPATHLEN]; @@ -1190,6 +1285,30 @@ static int cdread_compressed(FILE *f, unsigned int base, void *dest, int sector) return CD_FRAMESIZE_RAW; } +#ifdef HAVE_CHD +static int cdread_chd(FILE *f, unsigned int base, void *dest, int sector) +{ + int hunk; + + if (base) + sector += base; + + hunk = sector / chd_img->sectors_per_hunk; + chd_img->sector_in_hunk = sector % chd_img->sectors_per_hunk; + + if (hunk != chd_img->current_hunk) + { + chd_read(chd_img->chd, hunk, chd_img->buffer); + chd_img->current_hunk = hunk; + } + + if (dest != cdbuffer) // copy avoid HACK + memcpy(dest, chd_img->buffer[chd_img->sector_in_hunk], + CD_FRAMESIZE_RAW); + return CD_FRAMESIZE_RAW; +} +#endif + static int cdread_2048(FILE *f, unsigned int base, void *dest, int sector) { int ret; @@ -1209,6 +1328,12 @@ static unsigned char * CALLBACK ISOgetBuffer_compr(void) { return compr_img->buff_raw[compr_img->sector_in_blk] + 12; } +#ifdef HAVE_CHD +static unsigned char *ISOgetBuffer_chd(void) { + return chd_img->buffer[chd_img->sector_in_hunk] + 12; +} +#endif + static unsigned char * CALLBACK ISOgetBuffer(void) { return cdbuffer + 12; } @@ -1276,6 +1401,14 @@ static long CALLBACK ISOopen(void) { CDR_getBuffer = ISOgetBuffer_compr; cdimg_read_func = cdread_compressed; } + +#ifdef HAVE_CHD + else if (handlechd(GetIsoFile()) == 0) { + printf("[chd]"); + CDR_getBuffer = ISOgetBuffer_chd; + cdimg_read_func = cdread_chd; + } +#endif if (!subChanMixed && opensubfile(GetIsoFile()) == 0) { SysPrintf("[+sub]"); @@ -1363,6 +1496,15 @@ static long CALLBACK ISOclose(void) { free(compr_img); compr_img = NULL; } + +#ifdef HAVE_CHD + if (chd_img != NULL) { + chd_close(chd_img->chd); + free(chd_img->buffer); + free(chd_img); + chd_img = NULL; + } +#endif for (i = 1; i <= numtracks; i++) { if (ti[i].handle != NULL) { From 3cf51e0893f3646f4f6217b9701c0ab199d8a6ac Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 17 Aug 2021 01:09:20 +0300 Subject: [PATCH 005/597] deal with some more annoying warnings Especially main.c as it's regenerated after each commit. --- libpcsxcore/debug.c | 5 +++++ libpcsxcore/disr3000a.c | 5 +++++ libpcsxcore/plugins.c | 12 ++++++------ libpcsxcore/psxbios.c | 6 ++++-- libpcsxcore/psxcommon.h | 7 +++++++ libpcsxcore/psxcounters.c | 3 +-- 6 files changed, 28 insertions(+), 10 deletions(-) diff --git a/libpcsxcore/debug.c b/libpcsxcore/debug.c index 763dc453f..d7b2d21ec 100644 --- a/libpcsxcore/debug.c +++ b/libpcsxcore/debug.c @@ -20,6 +20,11 @@ #include "debug.h" #include "socket.h" +// XXX: don't care but maybe fix it someday +#if defined(__GNUC__) && __GNUC__ >= 7 +#pragma GCC diagnostic ignored "-Wrestrict" +#endif + /* PCSX Debug console protocol description, version 1.0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/libpcsxcore/disr3000a.c b/libpcsxcore/disr3000a.c index 23667c1ae..a350da1a1 100644 --- a/libpcsxcore/disr3000a.c +++ b/libpcsxcore/disr3000a.c @@ -23,6 +23,11 @@ #include "psxcommon.h" +// XXX: don't care but maybe fix it someday +#if defined(__GNUC__) && __GNUC__ >= 7 +#pragma GCC diagnostic ignored "-Wrestrict" +#endif + char ostr[256]; // Names of registers diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index e6d8a11e1..0423310bc 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -779,11 +779,11 @@ void ReleasePlugins() { if (Config.UseNet && hNETDriver != NULL) NET_shutdown(); - if (hCDRDriver != NULL) SysCloseLibrary(hCDRDriver); hCDRDriver = NULL; - if (hGPUDriver != NULL) SysCloseLibrary(hGPUDriver); hGPUDriver = NULL; - if (hSPUDriver != NULL) SysCloseLibrary(hSPUDriver); hSPUDriver = NULL; - if (hPAD1Driver != NULL) SysCloseLibrary(hPAD1Driver); hPAD1Driver = NULL; - if (hPAD2Driver != NULL) SysCloseLibrary(hPAD2Driver); hPAD2Driver = NULL; + if (hCDRDriver != NULL) { SysCloseLibrary(hCDRDriver); hCDRDriver = NULL; } + if (hGPUDriver != NULL) { SysCloseLibrary(hGPUDriver); hGPUDriver = NULL; } + if (hSPUDriver != NULL) { SysCloseLibrary(hSPUDriver); hSPUDriver = NULL; } + if (hPAD1Driver != NULL) { SysCloseLibrary(hPAD1Driver); hPAD1Driver = NULL; } + if (hPAD2Driver != NULL) { SysCloseLibrary(hPAD2Driver); hPAD2Driver = NULL; } if (Config.UseNet && hNETDriver != NULL) { SysCloseLibrary(hNETDriver); hNETDriver = NULL; @@ -802,7 +802,7 @@ void ReleasePlugins() { int ReloadCdromPlugin() { if (hCDRDriver != NULL || cdrIsoActive()) CDR_shutdown(); - if (hCDRDriver != NULL) SysCloseLibrary(hCDRDriver); hCDRDriver = NULL; + if (hCDRDriver != NULL) { SysCloseLibrary(hCDRDriver); hCDRDriver = NULL; } if (UsingIso()) { LoadCDRplugin(NULL); diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index ecd4264d5..8d4cb27f8 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -35,6 +35,10 @@ #include "gpu.h" #include +#if (defined(__GNUC__) && __GNUC__ >= 5) || defined(__clang__) +#pragma GCC diagnostic ignored "-Wpointer-sign" +#endif + #undef SysPrintf #define SysPrintf if (Config.PsxOut) printf @@ -2090,8 +2094,6 @@ static void buopen(int mcd, u8 *ptr, u8 *cfg) */ void psxBios_open() { // 0x32 - int i; - char *ptr; void *pa0 = Ra0; #ifdef PSXBIOS_LOG diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index 9f5444ec4..8ef794b1c 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -31,6 +31,13 @@ extern "C" { #include "config.h" +// XXX: don't care but maybe fix it someday +#if defined(__GNUC__) && __GNUC__ >= 8 +#pragma GCC diagnostic ignored "-Wformat-truncation" +#pragma GCC diagnostic ignored "-Wformat-overflow" +#pragma GCC diagnostic ignored "-Wstringop-truncation" +#endif + // System includes #include #include diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index 35823dacd..6b9e2b7a2 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -64,7 +64,6 @@ static const u32 HSyncTotal[] = { 263, 313 }; #define VBlankStart 240 #define VERBOSE_LEVEL 0 -static const s32 VerboseLevel = VERBOSE_LEVEL; /******************************************************************************/ @@ -89,7 +88,7 @@ static void verboseLog( u32 level, const char *str, ... ) { #if VERBOSE_LEVEL > 0 - if( level <= VerboseLevel ) + if( level <= VERBOSE_LEVEL ) { va_list va; char buf[ 4096 ]; From eedfe8060a20c8a9120ff8cab55110a1e2470887 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Wed, 18 Aug 2021 20:22:43 +0000 Subject: [PATCH 006/597] Add internal database for problematic games. (#182) --- Makefile | 2 +- libpcsxcore/database.c | 36 ++++++++++++++++++++++++++++++++++++ libpcsxcore/database.h | 6 ++++++ libpcsxcore/misc.c | 3 +++ libpcsxcore/sio.h | 1 + 5 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 libpcsxcore/database.c create mode 100644 libpcsxcore/database.h diff --git a/Makefile b/Makefile index c63fd1f5c..29d241843 100644 --- a/Makefile +++ b/Makefile @@ -39,7 +39,7 @@ CFLAGS += -DPCNT endif # core -OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cheat.o libpcsxcore/debug.o \ +OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cheat.o libpcsxcore/database.o libpcsxcore/debug.o \ libpcsxcore/decode_xa.o libpcsxcore/disr3000a.o libpcsxcore/mdec.o \ libpcsxcore/misc.o libpcsxcore/plugins.o libpcsxcore/ppf.o libpcsxcore/psxbios.o \ libpcsxcore/psxcommon.o libpcsxcore/psxcounters.o libpcsxcore/psxdma.o libpcsxcore/psxhle.o \ diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c new file mode 100644 index 000000000..f383e3616 --- /dev/null +++ b/libpcsxcore/database.c @@ -0,0 +1,36 @@ +#include "misc.h" +#include "../plugins/dfsound/spu_config.h" +#include "sio.h" + +/* It's duplicated from emu_if.c */ +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +static const char MemorycardHack_db[8][10] = +{ + /* Lifeforce Tenka, also known as Codename Tenka */ + {"SLES00613"}, + {"SLED00690"}, + {"SLES00614"}, + {"SLES00615"}, + {"SLES00616"}, + {"SLES00617"}, + {"SCUS94409"} +}; + +/* Function for automatic patching according to GameID. */ +void Apply_Hacks_Cdrom() +{ + uint32_t i; + + /* Apply Memory card hack for Codename Tenka. (The game needs one of the memory card slots to be empty) */ + for(i=0;i char CdromId[10] = ""; @@ -389,6 +390,8 @@ int CheckCdrom() { SysPrintf(_("CD-ROM Label: %.32s\n"), CdromLabel); SysPrintf(_("CD-ROM ID: %.9s\n"), CdromId); SysPrintf(_("CD-ROM EXE Name: %.255s\n"), exename); + + Apply_Hacks_Cdrom(); BuildPPFCache(); diff --git a/libpcsxcore/sio.h b/libpcsxcore/sio.h index eff1746a3..a554c2bbd 100644 --- a/libpcsxcore/sio.h +++ b/libpcsxcore/sio.h @@ -34,6 +34,7 @@ extern "C" { #define MCD_SIZE (1024 * 8 * 16) extern char Mcd1Data[MCD_SIZE], Mcd2Data[MCD_SIZE]; +extern char McdDisable[2]; void sioWrite8(unsigned char value); void sioWriteStat16(unsigned short value); From dc0ee8d424293ea687a137ec1ca0440f88a1e5a5 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Wed, 18 Aug 2021 20:24:33 +0000 Subject: [PATCH 007/597] CDRom timings changes (#184) Upstreamed changes from PCSX Redux as seen here : https://github.com/grumpycoders/pcsx-redux/commit/5730e04f0183f37038bc1d133cf9f9092425b90a This apparently fixes Deus Fight in Xenogears. Co-authored-by: Nicolas Noble --- libpcsxcore/cdrom.c | 23 +++++++++++++++++++++-- libpcsxcore/cdrom.h | 3 ++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index a725efadd..16b7ca1a3 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -496,6 +496,7 @@ void cdrPlayInterrupt() if (cdr.SetlocPending) { memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); cdr.SetlocPending = 0; + cdr.m_locationChanged = TRUE; } Find_CurTrack(cdr.SetSectorPlay); ReadTrack(cdr.SetSectorPlay); @@ -527,7 +528,15 @@ void cdrPlayInterrupt() } } - CDRMISC_INT(cdReadTime); + if (cdr.m_locationChanged) + { + CDRMISC_INT(cdReadTime * 30); + cdr.m_locationChanged = FALSE; + } + else + { + CDRMISC_INT(cdReadTime); + } // update for CdlGetlocP/autopause generate_subq(cdr.SetSectorPlay); @@ -589,6 +598,7 @@ void cdrInterrupt() { if (cdr.SetlocPending) { memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); cdr.SetlocPending = 0; + cdr.m_locationChanged = TRUE; } // BIOS CD Player @@ -914,6 +924,7 @@ void cdrInterrupt() { if(seekTime > 1000000) seekTime = 1000000; memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); cdr.SetlocPending = 0; + cdr.m_locationChanged = TRUE; } Find_CurTrack(cdr.SetSectorPlay); @@ -1130,7 +1141,13 @@ void cdrReadInterrupt() { cdr.Readed = 0; - CDREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime); + uint32_t delay = (cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime; + if (cdr.m_locationChanged) { + CDREAD_INT(delay * 30); + cdr.m_locationChanged = FALSE; + } else { + CDREAD_INT(delay); + } /* Croc 2: $40 - only FORM1 (*) @@ -1468,6 +1485,8 @@ void cdrReset() { cdr.DriveState = DRIVESTATE_STANDBY; cdr.StatP = STATUS_ROTATING; pTransfer = cdr.Transfer; + cdr.SetlocPending = 0; + cdr.m_locationChanged = FALSE; // BIOS player - default values cdr.AttenuatorLeftToLeft = 0x80; diff --git a/libpcsxcore/cdrom.h b/libpcsxcore/cdrom.h index 543c619db..a37f6baff 100644 --- a/libpcsxcore/cdrom.h +++ b/libpcsxcore/cdrom.h @@ -59,7 +59,8 @@ typedef struct { unsigned char Absolute[3]; } subq; unsigned char TrackChanged; - unsigned char pad1[3]; + boolean m_locationChanged; + unsigned char pad1[2]; unsigned int freeze_ver; unsigned char Prev[4]; From d056f51b31f2856968e2efac75c3d437b2e6c000 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Wed, 18 Aug 2021 20:58:45 +0000 Subject: [PATCH 008/597] [SPU] Emulate SPUSTAT[5:0] as a mirror of SPUCNT[5:0] (#190) Fix is from PCSX-Redux : https://github.com/grumpycoders/pcsx-redux/commit/4e905d7953a26bffd52f486dc5b03c2a19075d01 I have tested the fix against Loonies 8192 (a PSn00bSDK made homebrew game) and it no longer locks up during loading. Co-authored-by: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> --- plugins/dfsound/registers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index bb64658d7..d508c0686 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -315,7 +315,7 @@ unsigned short CALLBACK SPUreadRegister(unsigned long reg) return spu.spuCtrl; case H_SPUstat: - return spu.spuStat; + return (spu.spuStat & ~0x3F) | (spu.spuCtrl & 0x3F); case H_SPUaddr: return (unsigned short)(spu.spuAddr>>3); From 3a284665dd7ab36e5d3973d09f9239efb7a838e2 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Wed, 18 Aug 2021 21:15:09 +0000 Subject: [PATCH 009/597] Rewrite GPU bios functions according to OpenBIOS. (#192) See PCSX-Redux : https://github.com/grumpycoders/pcsx-redux/blob/93653ba5281487d3bed57371d7b64c32dfc669f0/src/mips/openbios/gpu/gpu.c --- libpcsxcore/psxbios.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 8d4cb27f8..7a38436c9 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -1411,37 +1411,37 @@ void psxBios_FlushCache() { // 44 void psxBios_GPU_dw() { // 0x46 int size; - s32 *ptr; + u32 *ptr; #ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x46]); #endif GPU_writeData(0xa0000000); - GPU_writeData((a1<<16)|(a0&0xffff)); - GPU_writeData((a3<<16)|(a2&0xffff)); - size = (a2*a3+1)/2; - ptr = (s32*)PSXM(Rsp[4]); //that is correct? - do { - GPU_writeData(SWAP32(*ptr)); - ptr++; - } while(--size); + GPU_writeData((a1<<0x10)|(a0&0xffff)); + GPU_writeData((a3<<0x10)|(a2&0xffff)); + size = (a2*a3)/2; + ptr = (u32*)PSXM(Rsp[4]); //that is correct? + while(size--) + { + GPU_writeData(SWAPu32(*ptr++)); + } pc0 = ra; } void psxBios_mem2vram() { // 0x47 int size; - + gpuSyncPluginSR(); GPU_writeData(0xa0000000); - GPU_writeData((a1<<16)|(a0&0xffff)); - GPU_writeData((a3<<16)|(a2&0xffff)); - size = (a2*a3+1)/2; + GPU_writeData((a1<<0x10)|(a0&0xffff)); + GPU_writeData((a3<<0x10)|(a2&0xffff)); + size = ((((a2 * a3) / 2) >> 4) << 16); GPU_writeStatus(0x04000002); psxHwWrite32(0x1f8010f4,0); psxHwWrite32(0x1f8010f0,psxHwRead32(0x1f8010f0)|0x800); psxHwWrite32(0x1f8010a0,Rsp[4]);//might have a buggy... - psxHwWrite32(0x1f8010a4,((size/16)<<16)|16); + psxHwWrite32(0x1f8010a4, size | 0x10); psxHwWrite32(0x1f8010a8,0x01000201); pc0 = ra; @@ -1454,22 +1454,26 @@ void psxBios_SendGPU() { // 0x48 } void psxBios_GPU_cw() { // 0x49 + gpuSyncPluginSR(); GPU_writeData(a0); pc0 = ra; + v0 = HW_GPU_STATUS; } void psxBios_GPU_cwb() { // 0x4a - s32 *ptr = (s32*)Ra0; + u32 *ptr = (u32*)Ra0; int size = a1; - while(size--) { - GPU_writeData(SWAP32(*ptr)); - ptr++; + gpuSyncPluginSR(); + while(size--) + { + GPU_writeData(SWAPu32(*ptr++)); } pc0 = ra; } void psxBios_GPU_SendPackets() { //4b: + gpuSyncPluginSR(); GPU_writeStatus(0x04000002); psxHwWrite32(0x1f8010f4,0); psxHwWrite32(0x1f8010f0,psxHwRead32(0x1f8010f0)|0x800); From dd79da89fc4ddf020bb6f8d8c8a733429249bab3 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Sun, 22 Aug 2021 21:39:05 +0000 Subject: [PATCH 010/597] Properly protect the HLE instructions against corrupted memory. (#189) Fix is from PCSX-redux : https://github.com/grumpycoders/pcsx-redux/commit/99c9508f2a9dc1444b88f37eb100cdfb17862b52 This should hopefully fix HDHOSHY's experimental patch properly. Co-authored-by: Nicolas Noble --- libpcsxcore/new_dynarec/emu_if.h | 2 -- libpcsxcore/new_dynarec/new_dynarec.c | 7 ++++++- libpcsxcore/psxhle.c | 2 +- libpcsxcore/psxhle.h | 2 +- libpcsxcore/psxinterpreter.c | 8 +++++++- 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index 3980490ac..17abab0bd 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -97,8 +97,6 @@ void pcsx_mtc0(u32 reg, u32 val); void pcsx_mtc0_ds(u32 reg, u32 val); /* misc */ -extern void (*psxHLEt[])(); - extern void SysPrintf(const char *fmt, ...); #ifdef RAM_FIXED diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index cd63d2bf2..c3c470dd2 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -35,6 +35,7 @@ static int sceBlock; #endif #include "new_dynarec_config.h" +#include "../psxhle.h" //emulator interface #include "emu_if.h" //emulator interface //#define DISASM @@ -3445,7 +3446,11 @@ void hlecall_assemble(int i,struct regstat *i_regs) assert(!is_delayslot); (void)ccreg; emit_movimm(start+i*4+4,0); // Get PC - emit_movimm((int)psxHLEt[source[i]&7],1); + uint32_t hleCode = source[i] & 0x03ffffff; + if (hleCode >= (sizeof(psxHLEt) / sizeof(psxHLEt[0]))) + emit_movimm((int)psxNULL,1); + else + emit_movimm((int)psxHLEt[hleCode],1); emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX emit_jmp((int)jump_hlecall); } diff --git a/libpcsxcore/psxhle.c b/libpcsxcore/psxhle.c index 83362ecdf..52227a40d 100644 --- a/libpcsxcore/psxhle.c +++ b/libpcsxcore/psxhle.c @@ -89,7 +89,7 @@ static void hleExecRet() { psxRegs.pc = psxRegs.GPR.n.ra; } -void (*psxHLEt[256])() = { +const void (*psxHLEt[8])() = { hleDummy, hleA0, hleB0, hleC0, hleBootstrap, hleExecRet, hleDummy, hleDummy diff --git a/libpcsxcore/psxhle.h b/libpcsxcore/psxhle.h index eef78e80e..0529c3898 100644 --- a/libpcsxcore/psxhle.h +++ b/libpcsxcore/psxhle.h @@ -28,7 +28,7 @@ extern "C" { #include "r3000a.h" #include "plugins.h" -extern void (*psxHLEt[256])(); +extern const void (*psxHLEt[8])(); #ifdef __cplusplus } diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index cf3de7985..fa454e1af 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -842,7 +842,13 @@ void psxBASIC(struct psxCP2Regs *regs) { void psxHLE() { // psxHLEt[psxRegs.code & 0xffff](); - psxHLEt[psxRegs.code & 0x07](); // HDHOSHY experimental patch +// psxHLEt[psxRegs.code & 0x07](); // HDHOSHY experimental patch + uint32_t hleCode = psxRegs.code & 0x03ffffff; + if (hleCode >= (sizeof(psxHLEt) / sizeof(psxHLEt[0]))) { + psxNULL(); + } else { + psxHLEt[hleCode](); + } } void (*psxBSC[64])() = { From 16f3ca666fb090dcb9ac0b399b767e4ed0aabece Mon Sep 17 00:00:00 2001 From: gameblabla Date: Sun, 22 Aug 2021 21:50:09 +0000 Subject: [PATCH 011/597] Fixes Diablo 1 SPU bug properly and remove Diablo hack. (#191) (Mostly) inspired by the fixes done in PCSX Redux : https://github.com/grumpycoders/pcsx-redux/blob/93653ba5281487d3bed57371d7b64c32dfc669f0/src/spu/registers.cc#L504 It seems that there was an attempt to initially implement it in PCSX Reloaded (hence the bIgnoreLoop in freeze.c) but it was never implemented properly. Co-authored-by: Nicolas Noble --- frontend/main.c | 1 - frontend/menu.c | 2 -- maemo/main.c | 1 - plugins/dfsound/externals.h | 8 ++++---- plugins/dfsound/registers.c | 4 ++-- plugins/dfsound/spu.c | 5 ++++- plugins/dfsound/spu_config.h | 1 - 7 files changed, 10 insertions(+), 12 deletions(-) diff --git a/frontend/main.c b/frontend/main.c index 43a55481b..a824fdcc5 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -138,7 +138,6 @@ void emu_set_default_config(void) pl_rearmed_cbs.gpu_peopsgl.iTexGarbageCollection = 1; spu_config.iUseReverb = 1; - spu_config.idiablofix = 0; spu_config.iUseInterpolation = 1; spu_config.iXAPitch = 0; spu_config.iVolume = 768; diff --git a/frontend/menu.c b/frontend/menu.c index c806aa9eb..d9fee04c4 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -443,7 +443,6 @@ static const struct { CE_INTVAL_P(gpu_peopsgl.iTexGarbageCollection), CE_INTVAL_P(gpu_peopsgl.dwActFixes), CE_INTVAL(spu_config.iUseReverb), - CE_INTVAL(spu_config.idiablofix), CE_INTVAL(spu_config.iXAPitch), CE_INTVAL(spu_config.iUseInterpolation), CE_INTVAL(spu_config.iTempo), @@ -1458,7 +1457,6 @@ static menu_entry e_menu_plugin_spu[] = mee_range_h ("Volume boost", 0, volume_boost, -5, 30, h_spu_volboost), mee_onoff ("Reverb", 0, spu_config.iUseReverb, 1), mee_enum ("Interpolation", 0, spu_config.iUseInterpolation, men_spu_interp), - mee_onoff ("Diablo Music fix", 0, spu_config.idiablofix, 1), mee_onoff ("Adjust XA pitch", 0, spu_config.iXAPitch, 1), mee_onoff_h ("Adjust tempo", 0, spu_config.iTempo, 1, h_spu_tempo), mee_end, diff --git a/maemo/main.c b/maemo/main.c index c382c5110..564e8ed5a 100644 --- a/maemo/main.c +++ b/maemo/main.c @@ -197,7 +197,6 @@ int main(int argc, char **argv) strcpy(Config.Bios, "HLE"); spu_config.iUseReverb = 1; spu_config.iUseInterpolation = 1; - spu_config.idiablofix = 0; in_type1 = PSE_PAD_TYPE_STANDARD; in_type2 = PSE_PAD_TYPE_STANDARD; diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index 2db75ac21..de4b5dbe2 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -110,7 +110,7 @@ typedef struct unsigned int bNoise:1; // noise active flag unsigned int bFMod:2; // freq mod (0=off, 1=sound channel, 2=freq channel) unsigned int prevflags:3; // flags from previous block - + unsigned int bIgnoreLoop:1; // Ignore loop int iLeftVolume; // left volume int iRightVolume; // right volume ADSRInfoEx ADSRX; @@ -232,6 +232,9 @@ typedef struct unsigned short regArea[0x400]; } SPUInfo; +#define regAreaGet(ch,offset) \ + spu.regArea[((ch<<4)|(offset))>>1] + /////////////////////////////////////////////////////////// // SPU.C globals /////////////////////////////////////////////////////////// @@ -243,9 +246,6 @@ extern SPUInfo spu; void do_samples(unsigned int cycles_to, int do_sync); void schedule_next_irq(void); -#define regAreaGet(ch,offset) \ - spu.regArea[((ch<<4)|(offset))>>1] - #define do_samples_if_needed(c, sync) \ do { \ if (sync || (int)((c) - spu.cycles_played) >= 16 * 768) \ diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index d508c0686..cc7202078 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -112,6 +112,7 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, //------------------------------------------------// case 14: // loop? spu.s_chan[ch].pLoop=spu.spuMemC+((val&~1)<<3); + spu.s_chan[ch].bIgnoreLoop = 1; goto upd_irq; //------------------------------------------------// } @@ -351,8 +352,7 @@ static void SoundOn(int start,int end,unsigned short val) { if((val&1) && regAreaGet(ch,6)) // mmm... start has to be set before key on !?! { - spu.s_chan[ch].pCurr=spu.spuMemC+((regAreaGet(ch,6)&~1)<<3); // must be block aligned - if (spu_config.idiablofix == 0) spu.s_chan[ch].pLoop=spu.spuMemC+((regAreaGet(ch,14)&~1)<<3); + spu.s_chan[ch].bIgnoreLoop = 0; spu.dwNewChannel|=(1<iSBPos=27; s_chan->spos=0; + s_chan->pCurr = spu.spuMemC+((regAreaGet(ch,6)&~1)<<3); + spu.dwNewChannel&=~(1<bIgnoreLoop)) s_chan->pLoop = start; // loop adress start += 16; @@ -1489,6 +1491,7 @@ long CALLBACK SPUinit(void) spu.s_chan[i].ADSRX.SustainIncrease = 1; spu.s_chan[i].pLoop = spu.spuMemC; spu.s_chan[i].pCurr = spu.spuMemC; + spu.s_chan[i].bIgnoreLoop = 0; } spu.bSpuInit=1; // flag: we are inited diff --git a/plugins/dfsound/spu_config.h b/plugins/dfsound/spu_config.h index 6b46bf386..3e88a2c22 100644 --- a/plugins/dfsound/spu_config.h +++ b/plugins/dfsound/spu_config.h @@ -7,7 +7,6 @@ typedef struct int iUseReverb; int iUseInterpolation; int iTempo; - int idiablofix; int iUseThread; int iUseFixedUpdates; // output fixed number of samples/frame From 1ffdacd535251ca7d979e23df81a0f1181d6170c Mon Sep 17 00:00:00 2001 From: gameblabla Date: Sun, 22 Aug 2021 22:18:42 +0000 Subject: [PATCH 012/597] mdec.cpp: Dare to set MDEC_BIAS to 10 (#186) This fixes graphic artifacts during cinematics in : - Vandal Hearts - R-Types - Galerians Dmitrysmagin had a similar fix in PCSX4ALL but he set it to 6. It turns out MDEC_BIAS set to 6 was still not enough for Galerians to look properly so it was set to 10. It seems so far that other games are unaffected. Co-authored-by: Dmitry Smagin --- libpcsxcore/mdec.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/mdec.c b/libpcsxcore/mdec.c index d6c7ab68f..61ed5ea53 100644 --- a/libpcsxcore/mdec.c +++ b/libpcsxcore/mdec.c @@ -32,7 +32,14 @@ * 320x240x16@60Hz => 9.216 MB/s * so 2.0 to 4.0 should be fine. */ -#define MDEC_BIAS 2 + +/* Was set to 2 before but it would cause issues in R-types and Vandal Hearts videos. + * Setting it to 6 as dmitrysmagin did fix those... except for Galerians. + * Galerians needs this to be set to 10 (!!) before it looks properly. + * I've tried this with a few other games (including R-Types) and so far, this + * has not backfired. + * */ +#define MDEC_BIAS 10 #define DSIZE 8 #define DSIZE2 (DSIZE * DSIZE) From e73e384cfacbb9b03d14f809e1283dc715068e3c Mon Sep 17 00:00:00 2001 From: gameblabla Date: Sun, 22 Aug 2021 22:29:02 +0000 Subject: [PATCH 013/597] CDROM: Ignore sectors with channel number 255 (#195) This was inspired by the fix in Duckstation : https://github.com/stenzek/duckstation/commit/0710e3b6d384526ed939f742f8f657623bb354bb Some games have junk audio sectors with a channel number of 255. If these are not skipped, then they will play wrong file. This was tested on "Blue's Clues : Blue's Big Musical" and it fixed the missing audio there. Taxi 2 is also said to be affected by this. --- libpcsxcore/cdrom.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 16b7ca1a3..7cba92eed 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1116,9 +1116,14 @@ void cdrReadInterrupt() { cdr.Channel = cdr.Transfer[4 + 1]; } + /* Gameblabla + * Skips playing on channel 255. + * Fixes missing audio in Blue's Clues : Blue's Big Musical. (Should also fix Taxi 2) + * TODO : Check if this is the proper behaviour. + * */ if((cdr.Transfer[4 + 2] & 0x4) && (cdr.Transfer[4 + 1] == cdr.Channel) && - (cdr.Transfer[4 + 0] == cdr.File)) { + (cdr.Transfer[4 + 0] == cdr.File) && cdr.Channel != 255) { int ret = xa_decode_sector(&cdr.Xa, cdr.Transfer+4, cdr.FirstSector); if (!ret) { cdrAttenuate(cdr.Xa.pcm, cdr.Xa.nsamples, cdr.Xa.stereo); From a4ae39973ff2e77b233c467fbd0e49cc55d72f56 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Sun, 22 Aug 2021 22:31:18 +0000 Subject: [PATCH 014/597] psxinterpreter: Merge DIV interpreter fix from libretro's fork (#193) Fix comes from here : https://github.com/libretro/pcsx_rearmed/pull/530 Co-authored-by: Ember Cold --- libpcsxcore/psxinterpreter.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index fa454e1af..e59f93d8c 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -501,14 +501,27 @@ void psxSLTU() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) < _u32(_rRt_); } // Rd * Format: OP rs, rt * *********************************************************/ void psxDIV() { - if (_i32(_rRt_) != 0) { - _i32(_rLo_) = _i32(_rRs_) / _i32(_rRt_); - _i32(_rHi_) = _i32(_rRs_) % _i32(_rRt_); - } - else { - _i32(_rLo_) = _i32(_rRs_) >= 0 ? 0xffffffff : 1; - _i32(_rHi_) = _i32(_rRs_); - } + if (!_i32(_rRt_)) { + _i32(_rHi_) = _i32(_rRs_); + if (_i32(_rRs_) & 0x80000000) { + _i32(_rLo_) = 1; + } else { + _i32(_rLo_) = 0xFFFFFFFF; + } +/* + * Notaz said that this was "not needed" for ARM platforms and could slow it down so let's disable for ARM. + * This fixes a crash issue that can happen when running Amidog's CPU test. + * (It still stays stuck to a black screen but at least it doesn't crash anymore) + */ +#if !defined(__arm__) && !defined(__aarch64__) + } else if (_i32(_rRs_) == 0x80000000 && _i32(_rRt_) == 0xFFFFFFFF) { + _i32(_rLo_) = 0x80000000; + _i32(_rHi_) = 0; +#endif + } else { + _i32(_rLo_) = _i32(_rRs_) / _i32(_rRt_); + _i32(_rHi_) = _i32(_rRs_) % _i32(_rRt_); + } } void psxDIVU() { From 16c8be170aef16720d553992f69f5244550a9415 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Wed, 25 Aug 2021 20:55:22 +0000 Subject: [PATCH 015/597] Always look up verify_dirty literals from offsets by neonloop (#194) Literals are deduplicated, so there's no guarantee they will be stored next to each other, even if they're written sequentially. verify_dirty and get_bounds must use the offsets on each instruction, instead of assuming values are stored sequentially. Co-authored-by: neonloop --- libpcsxcore/new_dynarec/assem_arm.c | 38 ++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 21640f84a..b336bcca1 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -241,14 +241,21 @@ static u_int get_clean_addr(int addr) static int verify_dirty(u_int *ptr) { #ifndef HAVE_ARMV7 + u_int offset; // get from literal pool assert((*ptr&0xFFFF0000)==0xe59f0000); - u_int offset=*ptr&0xfff; - u_int *l_ptr=(void *)ptr+offset+8; - u_int source=l_ptr[0]; - u_int copy=l_ptr[1]; - u_int len=l_ptr[2]; - ptr+=4; + offset=*ptr&0xfff; + u_int source=*(u_int*)((void *)ptr+offset+8); + ptr++; + assert((*ptr&0xFFFF0000)==0xe59f0000); + offset=*ptr&0xfff; + u_int copy=*(u_int*)((void *)ptr+offset+8); + ptr++; + assert((*ptr&0xFFFF0000)==0xe59f0000); + offset=*ptr&0xfff; + u_int len=*(u_int*)((void *)ptr+offset+8); + ptr++; + ptr++; #else // ARMv7 movw/movt assert((*ptr&0xFFF00000)==0xe3000000); @@ -285,14 +292,21 @@ static void get_bounds(int addr,u_int *start,u_int *end) { u_int *ptr=(u_int *)addr; #ifndef HAVE_ARMV7 + u_int offset; // get from literal pool assert((*ptr&0xFFFF0000)==0xe59f0000); - u_int offset=*ptr&0xfff; - u_int *l_ptr=(void *)ptr+offset+8; - u_int source=l_ptr[0]; - //u_int copy=l_ptr[1]; - u_int len=l_ptr[2]; - ptr+=4; + offset=*ptr&0xfff; + u_int source=*(u_int*)((void *)ptr+offset+8); + ptr++; + //assert((*ptr&0xFFFF0000)==0xe59f0000); + //offset=*ptr&0xfff; + //u_int copy=*(u_int*)((void *)ptr+offset+8); + ptr++; + assert((*ptr&0xFFFF0000)==0xe59f0000); + offset=*ptr&0xfff; + u_int len=*(u_int*)((void *)ptr+offset+8); + ptr++; + ptr++; #else // ARMv7 movw/movt assert((*ptr&0xFFF00000)==0xe3000000); From 1f035e278151a3e29b3d2077f7242787fc541216 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Wed, 25 Aug 2021 21:22:44 +0000 Subject: [PATCH 016/597] CdlSync should be NULL and return an error. (#199) This has been confirmed by both Mednafen and nocash's documentation. --- libpcsxcore/cdrom.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 7cba92eed..1e8f950aa 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -48,7 +48,7 @@ cdrStruct cdr; static unsigned char *pTransfer; /* CD-ROM magic numbers */ -#define CdlSync 0 +#define CdlSync 0 /* nocash documentation : "Uh, actually, returns error code 40h = Invalid Command...?" */ #define CdlNop 1 #define CdlSetloc 2 #define CdlPlay 3 @@ -575,10 +575,6 @@ void cdrInterrupt() { cdr.Irq = 0; switch (Irq) { - case CdlSync: - // TOOD: sometimes/always return error? - break; - case CdlNop: if (cdr.DriveState != DRIVESTATE_LID_OPEN) cdr.StatP &= ~STATUS_SHELLOPEN; @@ -973,7 +969,7 @@ void cdrInterrupt() { cdr.Result[0] = cdr.StatP; start_rotating = 1; break; - + case CdlSync: default: CDR_LOG_I("Invalid command: %02x\n", Irq); error = ERROR_INVALIDCMD; From 449968efe945cc876a0805525b90d38b96acf4c0 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Wed, 25 Aug 2021 21:37:51 +0000 Subject: [PATCH 017/597] Improve SetLoc CDROM command from Duckstation (#201) See Duckstation's CDROM for reference : https://github.com/stenzek/duckstation/blob/18241978ea056a81ff9847f0ad89658f886153ea/src/core/cdrom.cpp#L1087 This probably shouldn't make a difference but who knows. --- libpcsxcore/cdrom.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 1e8f950aa..0bb2d57b3 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1253,17 +1253,29 @@ void cdrWrite1(unsigned char rt) { switch (cdr.Cmd) { case CdlSetloc: - for (i = 0; i < 3; i++) - set_loc[i] = btoi(cdr.Param[i]); + CDR_LOG("CDROM setloc command (%02X, %02X, %02X)\n", cdr.Param[0], cdr.Param[1], cdr.Param[2]); - i = msf2sec(cdr.SetSectorPlay); - i = abs(i - msf2sec(set_loc)); - if (i > 16) - cdr.Seeked = SEEK_PENDING; + // MM must be BCD, SS must be BCD and <0x60, FF must be BCD and <0x75 + if (((cdr.Param[0] & 0x0F) > 0x09) || (cdr.Param[0] > 0x99) || ((cdr.Param[1] & 0x0F) > 0x09) || (cdr.Param[1] >= 0x60) || ((cdr.Param[2] & 0x0F) > 0x09) || (cdr.Param[2] >= 0x75)) + { + CDR_LOG("Invalid/out of range seek to %02X:%02X:%02X\n", cdr.Param[0], cdr.Param[1], cdr.Param[2]); + } + else + { + for (i = 0; i < 3; i++) + { + set_loc[i] = btoi(cdr.Param[i]); + } - memcpy(cdr.SetSector, set_loc, 3); - cdr.SetSector[3] = 0; - cdr.SetlocPending = 1; + i = msf2sec(cdr.SetSectorPlay); + i = abs(i - msf2sec(set_loc)); + if (i > 16) + cdr.Seeked = SEEK_PENDING; + + memcpy(cdr.SetSector, set_loc, 3); + cdr.SetSector[3] = 0; + cdr.SetlocPending = 1; + } break; case CdlReadN: From 6360a61b062aeb1352c80dddcae611228207f5b5 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Wed, 25 Aug 2021 21:53:08 +0000 Subject: [PATCH 018/597] We shouldn't throw an error for GetQ. (#202) I doubt this is used by any games but who knows. See Mednafen for reference : https://github.com/libretro-mirrors/mednafen-git/blob/1899500078169e787f86eb5302a3f35abdaa8764/src/psx/cdc.cpp#L2527 --- libpcsxcore/cdrom.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 0bb2d57b3..5b53e99d8 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -898,8 +898,7 @@ void cdrInterrupt() { break; case CdlGetQ: - // TODO? - CDR_LOG_I("got CdlGetQ\n"); + no_busy_error = 1; break; case CdlReadToc: From dcd72441732fae38661db9b1d6a952a86f1cac83 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Wed, 25 Aug 2021 21:56:52 +0000 Subject: [PATCH 019/597] CdlGetmode is GetParam, fix it according to Mednafen's implementation (#203) The result size for GetParam (previously Getmode) should be only 5 and done this way. --- libpcsxcore/cdrom.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 5b53e99d8..69801011b 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -63,7 +63,7 @@ static unsigned char *pTransfer; #define CdlDemute 12 #define CdlSetfilter 13 #define CdlSetmode 14 -#define CdlGetmode 15 +#define CdlGetparam 15 #define CdlGetlocL 16 #define CdlGetlocP 17 #define CdlReadT 18 @@ -84,7 +84,7 @@ char *CmdName[0x100]= { "CdlSync", "CdlNop", "CdlSetloc", "CdlPlay", "CdlForward", "CdlBackward", "CdlReadN", "CdlStandby", "CdlStop", "CdlPause", "CdlInit", "CdlMute", - "CdlDemute", "CdlSetfilter", "CdlSetmode", "CdlGetmode", + "CdlDemute", "CdlSetfilter", "CdlSetmode", "CdlGetparam", "CdlGetlocL", "CdlGetlocP", "CdlReadT", "CdlGetTN", "CdlGetTD", "CdlSeekL", "CdlSeekP", "CdlSetclock", "CdlGetclock", "CdlTest", "CdlID", "CdlReadS", @@ -759,13 +759,13 @@ void cdrInterrupt() { no_busy_error = 1; break; - case CdlGetmode: - SetResultSize(6); + case CdlGetparam: + /* Gameblabla : According to mednafen, Result size should be 5 and done this way. */ + SetResultSize(5); cdr.Result[1] = cdr.Mode; - cdr.Result[2] = cdr.File; - cdr.Result[3] = cdr.Channel; - cdr.Result[4] = 0; - cdr.Result[5] = 0; + cdr.Result[2] = 0; + cdr.Result[3] = cdr.File; + cdr.Result[4] = cdr.Channel; no_busy_error = 1; break; From cfeb7cab2ce1083d54e46a102ab02f680615064d Mon Sep 17 00:00:00 2001 From: gameblabla Date: Thu, 26 Aug 2021 18:04:19 +0000 Subject: [PATCH 020/597] Fix CdlGetTD according to Mednafen's implementation. (#204) --- libpcsxcore/cdrom.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 69801011b..5a596adc8 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -815,7 +815,9 @@ void cdrInterrupt() { cdr.Result[0] = cdr.StatP; cdr.Result[1] = itob(cdr.ResultTD[2]); cdr.Result[2] = itob(cdr.ResultTD[1]); - cdr.Result[3] = itob(cdr.ResultTD[0]); + /* According to Nocash's documentation, the function doesn't care about ff. + * This can be seen also in Mednafen's implementation. */ + //cdr.Result[3] = itob(cdr.ResultTD[0]); } break; From b0bd140dee2602140edacd20dc868c9bfdd588b2 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Thu, 26 Aug 2021 18:05:23 +0000 Subject: [PATCH 021/597] CDROM: Rename Reset+Init commands approperly, fix "This is Football 2" lockup (#205) Upon the reset command being issued, we need to set the Mode to 0x20 (as it was confirmed in Mednafen) and also disable the muting. We also change the seektime for the reset command appropriately. Please see Mednafen's implementation as reference : https://github.com/libretro-mirrors/mednafen-git/blob/1899500078169e787f86eb5302a3f35abdaa8764/src/psx/cdc.cpp#L2007 This fixes the black screen issue in "This is Football 2". This also fixes the black screen issue in Pooh's Party. --- libpcsxcore/cdrom.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 5a596adc8..95e739f91 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -58,7 +58,7 @@ static unsigned char *pTransfer; #define CdlStandby 7 #define CdlStop 8 #define CdlPause 9 -#define CdlInit 10 +#define CdlReset 10 #define CdlMute 11 #define CdlDemute 12 #define CdlSetfilter 13 @@ -76,19 +76,19 @@ static unsigned char *pTransfer; #define CdlTest 25 #define CdlID 26 #define CdlReadS 27 -#define CdlReset 28 +#define CdlInit 28 #define CdlGetQ 29 #define CdlReadToc 30 char *CmdName[0x100]= { "CdlSync", "CdlNop", "CdlSetloc", "CdlPlay", "CdlForward", "CdlBackward", "CdlReadN", "CdlStandby", - "CdlStop", "CdlPause", "CdlInit", "CdlMute", + "CdlStop", "CdlPause", "CdlReset", "CdlMute", "CdlDemute", "CdlSetfilter", "CdlSetmode", "CdlGetparam", "CdlGetlocL", "CdlGetlocP", "CdlReadT", "CdlGetTN", "CdlGetTD", "CdlSeekL", "CdlSeekP", "CdlSetclock", "CdlGetclock", "CdlTest", "CdlID", "CdlReadS", - "CdlReset", NULL, "CDlReadToc", NULL + "CdlInit", NULL, "CDlReadToc", NULL }; unsigned char Test04[] = { 0 }; @@ -732,13 +732,15 @@ void cdrInterrupt() { cdr.Stat = Complete; break; - case CdlInit: - AddIrqQueue(CdlInit + 0x100, cdReadTime * 6); + case CdlReset: + cdr.Muted = FALSE; + cdr.Mode = 0x20; /* This fixes This is Football 2, Pooh's Party lockups */ + AddIrqQueue(CdlReset + 0x100, 4100000); no_busy_error = 1; start_rotating = 1; break; - case CdlInit + 0x100: + case CdlReset + 0x100: cdr.Stat = Complete; break; @@ -890,7 +892,7 @@ void cdrInterrupt() { cdr.Stat = Complete; break; - case CdlReset: + case CdlInit: // yes, it really sets STATUS_SHELLOPEN cdr.StatP |= STATUS_SHELLOPEN; cdr.DriveState = DRIVESTATE_RESCAN_CD; @@ -1286,8 +1288,8 @@ void cdrWrite1(unsigned char rt) { StopReading(); break; - case CdlReset: case CdlInit: + case CdlReset: cdr.Seeked = SEEK_DONE; StopCdda(); StopReading(); From e3d555e0f3578b4e317f9739a715744cbe1a8395 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Fri, 27 Aug 2021 22:05:02 +0000 Subject: [PATCH 022/597] Improve CdlPause command based on Mednafen's implementation (#207) Reference : https://github.com/libretro-mirrors/mednafen-git/blob/master/src/psx/cdc.cpp#L1969 This fixed an issue with Worms Pinball booting to the language screen. Previously, it would sometimes boot to it but not reliably and the screen would always not appear or be black. Now, the emulator doesn't crash anymore and the language screen can be seen (although it is a glitchy mess). --- libpcsxcore/cdrom.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 95e739f91..2d9d77f0a 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -722,7 +722,17 @@ void cdrInterrupt() { InuYasha - Feudal Fairy Tale: slower - Fixes battles */ - AddIrqQueue(CdlPause + 0x100, cdReadTime * 3); + /* Gameblabla - Tightening the timings (as taken from Mednafen). */ + if (cdr.DriveState != DRIVESTATE_STANDBY) + { + delay = 5000; + } + else + { + delay = (1124584 + (msf2sec(cdr.SetSectorPlay) * 42596 / (75 * 60))) * ((cdr.Mode & MODE_SPEED) ? 1 : 2); + CDRMISC_INT((cdr.Mode & MODE_SPEED) ? cdReadTime / 2 : cdReadTime); + } + AddIrqQueue(CdlPause + 0x100, delay); cdr.Ctrl |= 0x80; break; From 4e70ea5a634f0568c6cb40ff945fb6c3150665df Mon Sep 17 00:00:00 2001 From: gameblabla Date: Mon, 30 Aug 2021 21:08:08 +0000 Subject: [PATCH 023/597] Fix regression with CdlPause (#209) --- libpcsxcore/cdrom.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 2d9d77f0a..723617989 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -722,14 +722,18 @@ void cdrInterrupt() { InuYasha - Feudal Fairy Tale: slower - Fixes battles */ - /* Gameblabla - Tightening the timings (as taken from Mednafen). */ + /* Gameblabla - Tightening the timings (as taken from Duckstation). + * The timings from Duckstation are based upon hardware tests. + * Mednafen's timing don't work for Gundam Battle Assault 2 in PAL/50hz mode, + * seems to be timing sensitive as it can depend on the CPU's clock speed. + * */ if (cdr.DriveState != DRIVESTATE_STANDBY) { - delay = 5000; + delay = 7000; } else { - delay = (1124584 + (msf2sec(cdr.SetSectorPlay) * 42596 / (75 * 60))) * ((cdr.Mode & MODE_SPEED) ? 1 : 2); + delay = (((cdr.Mode & MODE_SPEED) ? 2 : 1) * (1000000)); CDRMISC_INT((cdr.Mode & MODE_SPEED) ? cdReadTime / 2 : cdReadTime); } AddIrqQueue(CdlPause + 0x100, delay); From bf4df3ba5781db5600f1ffb3c34e4d76ef490474 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Mon, 30 Aug 2021 22:34:02 +0000 Subject: [PATCH 024/597] Implement fix from Mednafen for Fantastic Pinball Kyuutenkai. (#206) This is taken from the way Mednafen implemented it. https://github.com/libretro-mirrors/mednafen-git/blob/563b72e377fa8284559df4df0271108d4935c9f1/src/psx/cdc.cpp#L941 This properly fixes the freeze issue in Fantastic Pinball Kyuutenkai. --- libpcsxcore/cdrom.c | 20 ++++++++++++++++++-- libpcsxcore/cdrom.h | 4 ++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 723617989..4e312fd69 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -431,6 +431,11 @@ static void AddIrqQueue(unsigned short irq, unsigned long ecycle) { static void cdrPlayInterrupt_Autopause() { + u32 abs_lev_max = 0; + boolean abs_lev_chselect; + u32 i; + s16 read_buf[CD_FRAMESIZE_RAW/2]; + if ((cdr.Mode & MODE_AUTOPAUSE) && cdr.TrackChanged) { CDR_LOG( "CDDA STOP\n" ); @@ -446,10 +451,21 @@ static void cdrPlayInterrupt_Autopause() StopCdda(); } else if (cdr.Mode & MODE_REPORT) { + CDR_readCDDA(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], (u8 *)read_buf); cdr.Result[0] = cdr.StatP; cdr.Result[1] = cdr.subq.Track; cdr.Result[2] = cdr.subq.Index; + + abs_lev_chselect = cdr.subq.Absolute[1] & 0x01; + + /* 8 is a hack. For accuracy, it should be 588. */ + for (i = 0; i < 8; i++) + { + abs_lev_max = MAX_VALUE(abs_lev_max, abs(read_buf[i * 2 + abs_lev_chselect])); + } + abs_lev_max = MIN_VALUE(abs_lev_max, 32767); + abs_lev_max |= abs_lev_chselect << 15; if (cdr.subq.Absolute[2] & 0x10) { cdr.Result[3] = cdr.subq.Relative[0]; @@ -462,8 +478,8 @@ static void cdrPlayInterrupt_Autopause() cdr.Result[5] = cdr.subq.Absolute[2]; } - cdr.Result[6] = 0; - cdr.Result[7] = 0; + cdr.Result[6] = abs_lev_max >> 0; + cdr.Result[7] = abs_lev_max >> 8; // Rayman: Logo freeze (resultready + dataready) cdr.ResultReady = 1; diff --git a/libpcsxcore/cdrom.h b/libpcsxcore/cdrom.h index a37f6baff..860930b10 100644 --- a/libpcsxcore/cdrom.h +++ b/libpcsxcore/cdrom.h @@ -34,6 +34,10 @@ extern "C" { #define btoi(b) ((b) / 16 * 10 + (b) % 16) /* BCD to u_char */ #define itob(i) ((i) / 10 * 16 + (i) % 10) /* u_char to BCD */ +#define ABS_CD(x) ((x >= 0) ? x : -x) +#define MIN_VALUE(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a < _b ? _a : _b; }) +#define MAX_VALUE(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a > _b ? _a : _b; }) + #define MSF2SECT(m, s, f) (((m) * 60 + (s) - 2) * 75 + (f)) #define CD_FRAMESIZE_RAW 2352 From 41e82ad46e60f9c3291ff81ebe4a07512b1194c5 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Thu, 9 Sep 2021 20:20:54 +0000 Subject: [PATCH 025/597] ARM build fixes. (Should fix building on ARM/GCC10) (#210) This fixes building on GCC10 and ARM. Note that in my previous HLE patch, i forgot to export psxNULL so it was failling to compile it... Oops. Co-authored-by: negativeExponent --- Makefile | 3 +-- frontend/menu.c | 4 ++++ libpcsxcore/misc.c | 2 +- libpcsxcore/new_dynarec/new_dynarec.c | 1 + libpcsxcore/psxcounters.c | 6 +++--- libpcsxcore/psxmem.c | 8 ++++---- libpcsxcore/psxmem.h | 4 ---- libpcsxcore/r3000a.c | 4 +++- libpcsxcore/r3000a.h | 1 - 9 files changed, 17 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 29d241843..18ef4e002 100644 --- a/Makefile +++ b/Makefile @@ -59,8 +59,7 @@ ifeq "$(USE_DYNAREC)" "1" OBJS += libpcsxcore/new_dynarec/new_dynarec.o libpcsxcore/new_dynarec/linkage_arm.o OBJS += libpcsxcore/new_dynarec/pcsxmem.o else -libpcsxcore/new_dynarec/emu_if.o: CFLAGS += -DDRC_DISABLE -frontend/libretro.o: CFLAGS += -DDRC_DISABLE +CFLAGS += -DDRC_DISABLE endif OBJS += libpcsxcore/new_dynarec/emu_if.o libpcsxcore/new_dynarec/new_dynarec.o: libpcsxcore/new_dynarec/assem_arm.c \ diff --git a/frontend/menu.c b/frontend/menu.c index d9fee04c4..5efd26014 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -2585,7 +2585,11 @@ void menu_prepare_emu(void) plat_video_menu_leave(); + #ifndef DRC_DISABLE psxCpu = (Config.Cpu == CPU_INTERPRETER) ? &psxInt : &psxRec; + #else + psxCpu = &psxInt; + #endif if (psxCpu != prev_cpu) { prev_cpu->Shutdown(); psxCpu->Init(); diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 9b0b27fd7..213040c58 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -745,7 +745,7 @@ int RecvPcsxInfo() { NET_recvData(&Config.Cpu, sizeof(Config.Cpu), PSE_NET_BLOCKING); if (tmp != Config.Cpu) { psxCpu->Shutdown(); -#ifdef PSXREC +#ifndef DRC_DISABLE if (Config.Cpu == CPU_INTERPRETER) psxCpu = &psxInt; else psxCpu = &psxRec; #else diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index c3c470dd2..7646e074d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -3441,6 +3441,7 @@ void syscall_assemble(int i,struct regstat *i_regs) void hlecall_assemble(int i,struct regstat *i_regs) { + extern void psxNULL(); signed char ccreg=get_reg(i_regs->regmap,CCREG); assert(ccreg==HOST_CCREG); assert(!is_delayslot); diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index 6b9e2b7a2..283173b52 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -66,9 +66,9 @@ static const u32 HSyncTotal[] = { 263, 313 }; #define VERBOSE_LEVEL 0 /******************************************************************************/ - +#ifdef DRC_DISABLE Rcnt rcnts[ CounterQuantity ]; - +#endif u32 hSyncCount = 0; u32 frame_counter = 0; static u32 hsync_steps = 0; @@ -493,7 +493,7 @@ s32 psxRcntFreeze( void *f, s32 Mode ) u32 count; s32 i; - gzfreeze( &rcnts, sizeof(rcnts) ); + gzfreeze( &rcnts, sizeof(Rcnt) * CounterQuantity ); gzfreeze( &hSyncCount, sizeof(hSyncCount) ); gzfreeze( &spuSyncCount, sizeof(spuSyncCount) ); gzfreeze( &psxNextCounter, sizeof(psxNextCounter) ); diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index a1a641df1..3caf889ba 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -307,7 +307,7 @@ void psxMemWrite8(u32 mem, u8 value) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, W1); *(u8 *)(p + (mem & 0xffff)) = value; -#ifdef PSXREC +#ifndef DRC_DISABLE psxCpu->Clear((mem & (~3)), 1); #endif } else { @@ -334,7 +334,7 @@ void psxMemWrite16(u32 mem, u16 value) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, W2); *(u16 *)(p + (mem & 0xffff)) = SWAPu16(value); -#ifdef PSXREC +#ifndef DRC_DISABLE psxCpu->Clear((mem & (~3)), 1); #endif } else { @@ -362,12 +362,12 @@ void psxMemWrite32(u32 mem, u32 value) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, W4); *(u32 *)(p + (mem & 0xffff)) = SWAPu32(value); -#ifdef PSXREC +#ifndef DRC_DISABLE psxCpu->Clear(mem, 1); #endif } else { if (mem != 0xfffe0130) { -#ifdef PSXREC +#ifndef DRC_DISABLE if (!writeok) psxCpu->Clear(mem, 1); #endif diff --git a/libpcsxcore/psxmem.h b/libpcsxcore/psxmem.h index d9fee0023..fbf5f67c7 100644 --- a/libpcsxcore/psxmem.h +++ b/libpcsxcore/psxmem.h @@ -122,10 +122,6 @@ extern u8 **psxMemRLUT; #define PSXMu32ref(mem) (*(u32 *)PSXM(mem)) -#if !defined(PSXREC) && (defined(__x86_64__) || defined(__i386__) || defined(__ppc__)) && !defined(NOPSXREC) -#define PSXREC -#endif - int psxMemInit(); void psxMemReset(); void psxMemShutdown(); diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 82eb88578..85b77cb44 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -27,12 +27,14 @@ #include "gte.h" R3000Acpu *psxCpu = NULL; +#ifdef DRC_DISABLE psxRegisters psxRegs; +#endif int psxInit() { SysPrintf(_("Running PCSX Version %s (%s).\n"), PACKAGE_VERSION, __DATE__); -#ifdef PSXREC +#ifndef DRC_DISABLE if (Config.Cpu == CPU_INTERPRETER) { psxCpu = &psxInt; } else psxCpu = &psxRec; diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 32538e584..399f9b63f 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -41,7 +41,6 @@ typedef struct { extern R3000Acpu *psxCpu; extern R3000Acpu psxInt; extern R3000Acpu psxRec; -#define PSXREC typedef union { #if defined(__BIGENDIAN__) From fa18abb20fe2316f078023ddec6068bafef86014 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Mon, 27 Sep 2021 19:09:52 +0000 Subject: [PATCH 026/597] psxmem: Writes to PIO Expansion area have no effect. (#220) This is related to the Tetris with Card Captor fix a while back. However, i forgot to merge this one as well related to writes, even though it was not really necessary to fix that particular game. Co-authored-by: senquack --- libpcsxcore/psxmem.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 3caf889ba..61b14c655 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -176,7 +176,11 @@ int psxMemInit() { memcpy(psxMemWLUT + 0x8000, psxMemWLUT, 0x80 * sizeof(void *)); memcpy(psxMemWLUT + 0xa000, psxMemWLUT, 0x80 * sizeof(void *)); - psxMemWLUT[0x1f00] = (u8 *)psxP; + // Don't allow writes to PIO Expansion region (psxP) to take effect. + // NOTE: Not sure if this is needed to fix any games but seems wise, + // seeing as some games do read from PIO as part of copy-protection + // check. (See fix in psxMemReset() regarding psxP region reads). + psxMemWLUT[0x1f00] = NULL; psxMemWLUT[0x1f80] = (u8 *)psxH; return 0; From 40337130bf8f5925875fd13c70543f5947015c1b Mon Sep 17 00:00:00 2001 From: gameblabla Date: Sat, 2 Oct 2021 13:23:27 +0000 Subject: [PATCH 027/597] Fix detection of some CDs such as Wild Arms USA detected as a PAL game. (#222) I was able to confirm it fixed the issue for Wild Arms. Co-authored-by: bardeci <37640967+bardeci@users.noreply.github.com> --- libpcsxcore/misc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 213040c58..1b38e2871 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -379,7 +379,13 @@ int CheckCdrom() { strcpy(CdromId, "SLUS99999"); if (Config.PsxAuto) { // autodetect system (pal or ntsc) - if (CdromId[2] == 'e' || CdromId[2] == 'E') + if ( + /* Make sure Wild Arms SCUS-94608 is not detected as a PAL game. */ + ((CdromId[0] == 's' || CdromId[0] == 'S') && (CdromId[2] == 'e' || CdromId[2] == 'E')) || + !strncmp(CdromId, "DTLS3035", 8) || + !strncmp(CdromId, "PBPX95001", 9) || // according to redump.org, these PAL + !strncmp(CdromId, "PBPX95007", 9) || // discs have a non-standard ID; + !strncmp(CdromId, "PBPX95008", 9)) // add more serials if they are discovered. Config.PsxType = PSX_TYPE_PAL; // pal else Config.PsxType = PSX_TYPE_NTSC; // ntsc } From 943a507a4156b8f5b00e4431152e41eeb4dc6f3d Mon Sep 17 00:00:00 2001 From: gameblabla Date: Sat, 2 Oct 2021 13:41:42 +0000 Subject: [PATCH 028/597] Icache emulation from PCSX Redux + Senquack changes from PCSX4ALL (#198) * Merge Icache emulation from PCSX Redux See (Redux) : https://github.com/grumpycoders/pcsx-redux/commit/1923ce54ef585beba3a948d50f8c30161102312c See original icache implementation (mirror of PCSX Reloaded): https://github.com/gameblabla/pcsxr Without icache emulation, F1 2001 will greatly misbehave : if you accelerate, the car will go around like crazy. With icache emulation, it works as intended. Our code is slightly different from theirs as i found out that having the icache arrays in psxregs would cause crashes so instead what i'm doing is to taking them out of there and only allocating them on the heap (due to their great size). Co-authored-by: Nicolas Noble * Fix issues with BREAK and some interpreter commands. Fixes F1 2000. Note that the game is very sensible to timing issues when it comes to the CDROM controller. That will be for a separate commit however. * Culling off cache bits from the hardware addresses. Based on those PRs from PCSX-Redux : https://github.com/grumpycoders/pcsx-redux/commit/0cd940100e96b95eea87dbb47381596f7f8dbe72#diff-009cbf66734b5de152bf170b80f8c7e03bebaa08a191f6ad7a06c7420f24b69c https://github.com/grumpycoders/pcsx-redux/commit/03d2ba3f278868cdd7ee3a44edef7ee87e6a1589#diff-009cbf66734b5de152bf170b80f8c7e03bebaa08a191f6ad7a06c7420f24b69c Co-authored-by: Nicolas Noble * Slightly better "open bus" behavior OG commit is here from PCSX Redux : https://github.com/grumpycoders/pcsx-redux/commit/128ba97f9680ab8dcd2f840f72ae998507325730#diff-8552772bc73559e3448880c9b8126252b49b95a89cfac254148d27127cbec719 Co-authored-by: Nicolas Noble * [Interpreter] Link even if branch is not taken in BGEZAL/BLTZAL Source : grumpycoders/pcsx-redux@c1a0569 Co-authored-by: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Co-authored-by: Nicolas Noble --- configure | 13 +++ frontend/main.c | 2 +- frontend/menu.c | 8 +- libpcsxcore/new_dynarec/emu_if.c | 21 ++++ libpcsxcore/psxbios.c | 5 +- libpcsxcore/psxcommon.h | 1 + libpcsxcore/psxhw.c | 12 +-- libpcsxcore/psxinterpreter.c | 176 +++++++++++++++++++++++++++++-- libpcsxcore/psxmem.c | 6 +- libpcsxcore/r3000a.c | 17 ++- libpcsxcore/r3000a.h | 12 +++ 11 files changed, 248 insertions(+), 25 deletions(-) diff --git a/configure b/configure index c3ff68fa2..5caf0f494 100755 --- a/configure +++ b/configure @@ -59,6 +59,7 @@ need_sdl="no" need_xlib="no" need_libpicofe="yes" need_warm="no" +enable_icache_emu="yes" CFLAGS_GLES="" LDLIBS_GLES="" # these are for known platforms @@ -93,12 +94,14 @@ set_platform() optimize_cortexa8="yes" have_arm_neon="yes" need_xlib="yes" + enable_icache_emu="no" ;; maemo) ram_fixed="yes" drc_cache_base="yes" optimize_cortexa8="yes" have_arm_neon="yes" + enable_icache_emu="no" ;; caanoo) sound_drivers="oss" @@ -106,6 +109,7 @@ set_platform() drc_cache_base="yes" optimize_arm926ej="yes" need_warm="yes" + enable_icache_emu="no" ;; libretro) sound_drivers="libretro" @@ -134,6 +138,10 @@ for opt do ;; --disable-dynarec) enable_dynarec="no" ;; + --disable-icache-emu) enable_icache_emu="no" + ;; + --enable-icache-emu) enable_icache_emu="yes" + ;; *) echo "ERROR: unknown option $opt"; show_help="yes" ;; esac @@ -152,6 +160,7 @@ if [ "$show_help" = "yes" ]; then echo " --disable-neon enable/disable ARM NEON optimizations [guessed]" echo " --disable-dynarec disable dynamic recompiler" echo " (dynarec is only available and enabled on ARM)" + echo " --disable-icache-emu Disables the instruction cache emulation" echo "influential environment variables:" echo " CROSS_COMPILE CC CXX AS AR CFLAGS ASFLAGS LDFLAGS LDLIBS" exit 1 @@ -492,6 +501,10 @@ if [ "x$sizeof_long" = "x4" ]; then CFLAGS="$CFLAGS -D_FILE_OFFSET_BITS=64" fi +if [ "$enable_icache_emu" = "yes" ]; then + CFLAGS="$CFLAGS -DICACHE_EMULATION" +fi + cat > $TMPC <Notify(R3000ACPU_NOTIFY_CACHE_ISOLATED, NULL); + psxCpu->Notify(R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL); +#endif pc0 = ra; } diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index 8ef794b1c..c9d300aaa 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -132,6 +132,7 @@ typedef struct { boolean RCntFix; boolean UseNet; boolean VSyncWA; + boolean icache_emulation; u8 Cpu; // CPU_DYNAREC or CPU_INTERPRETER u8 PsxType; // PSX_TYPE_NTSC or PSX_TYPE_PAL #ifdef _WIN32 diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index c90f8c73d..84ce2f730 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -44,7 +44,7 @@ void psxHwReset() { u8 psxHwRead8(u32 add) { unsigned char hard; - switch (add) { + switch (add & 0x1fffffff) { case 0x1f801040: hard = sioRead8();break; #ifdef ENABLE_SIO1API case 0x1f801050: hard = SIO1_readData8(); break; @@ -70,7 +70,7 @@ u8 psxHwRead8(u32 add) { u16 psxHwRead16(u32 add) { unsigned short hard; - switch (add) { + switch (add & 0x1fffffff) { #ifdef PSXHW_LOG case 0x1f801070: PSXHW_LOG("IREG 16bit read %x\n", psxHu16(0x1070)); return psxHu16(0x1070); @@ -204,7 +204,7 @@ u16 psxHwRead16(u32 add) { u32 psxHwRead32(u32 add) { u32 hard; - switch (add) { + switch (add & 0x1fffffff) { case 0x1f801040: hard = sioRead8(); hard |= sioRead8() << 8; @@ -355,7 +355,7 @@ u32 psxHwRead32(u32 add) { } void psxHwWrite8(u32 add, u8 value) { - switch (add) { + switch (add & 0x1fffffff) { case 0x1f801040: sioWrite8(value); break; #ifdef ENABLE_SIO1API case 0x1f801050: SIO1_writeData8(value); break; @@ -379,7 +379,7 @@ void psxHwWrite8(u32 add, u8 value) { } void psxHwWrite16(u32 add, u16 value) { - switch (add) { + switch (add & 0x1fffffff) { case 0x1f801040: sioWrite8((unsigned char)value); sioWrite8((unsigned char)(value>>8)); @@ -518,7 +518,7 @@ void psxHwWrite16(u32 add, u16 value) { } void psxHwWrite32(u32 add, u32 value) { - switch (add) { + switch (add & 0x1fffffff) { case 0x1f801040: sioWrite8((unsigned char)value); sioWrite8((unsigned char)((value&0xff) >> 8)); diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index e59f93d8c..02e00a9f1 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -49,6 +49,65 @@ void (*psxCP0[32])(); void (*psxCP2[64])(struct psxCP2Regs *regs); void (*psxCP2BSC[32])(); +#ifdef ICACHE_EMULATION +/* +Formula One 2001 : +Use old CPU cache code when the RAM location is updated with new code (affects in-game racing) +*/ +static u8* ICache_Addr; +static u8* ICache_Code; +uint32_t *Read_ICache(uint32_t pc) +{ + uint32_t pc_bank, pc_offset, pc_cache; + uint8_t *IAddr, *ICode; + + pc_bank = pc >> 24; + pc_offset = pc & 0xffffff; + pc_cache = pc & 0xfff; + + IAddr = ICache_Addr; + ICode = ICache_Code; + + // cached - RAM + if (pc_bank == 0x80 || pc_bank == 0x00) + { + if (SWAP32(*(uint32_t *)(IAddr + pc_cache)) == pc_offset) + { + // Cache hit - return last opcode used + return (uint32_t *)(ICode + pc_cache); + } + else + { + // Cache miss - addresses don't match + // - default: 0xffffffff (not init) + + // cache line is 4 bytes wide + pc_offset &= ~0xf; + pc_cache &= ~0xf; + + // address line + *(uint32_t *)(IAddr + pc_cache + 0x0) = SWAP32(pc_offset + 0x0); + *(uint32_t *)(IAddr + pc_cache + 0x4) = SWAP32(pc_offset + 0x4); + *(uint32_t *)(IAddr + pc_cache + 0x8) = SWAP32(pc_offset + 0x8); + *(uint32_t *)(IAddr + pc_cache + 0xc) = SWAP32(pc_offset + 0xc); + + // opcode line + pc_offset = pc & ~0xf; + *(uint32_t *)(ICode + pc_cache + 0x0) = psxMu32ref(pc_offset + 0x0); + *(uint32_t *)(ICode + pc_cache + 0x4) = psxMu32ref(pc_offset + 0x4); + *(uint32_t *)(ICode + pc_cache + 0x8) = psxMu32ref(pc_offset + 0x8); + *(uint32_t *)(ICode + pc_cache + 0xc) = psxMu32ref(pc_offset + 0xc); + } + } + + /* + TODO: Probably should add cached BIOS + */ + // default + return (uint32_t *)PSXM(pc); +} +#endif + static void delayRead(int reg, u32 bpc) { u32 rold, rnew; @@ -266,7 +325,17 @@ void psxDelayTest(int reg, u32 bpc) { u32 *code; u32 tmp; - code = (u32 *)PSXM(bpc); + #ifdef ICACHE_EMULATION + if (Config.icache_emulation) + { + code = Read_ICache(psxRegs.pc); + } + else + #endif + { + code = (u32 *)PSXM(psxRegs.pc); + } + tmp = ((code == NULL) ? 0 : SWAP32(*code)); branch = 1; @@ -290,7 +359,16 @@ static u32 psxBranchNoDelay(void) { u32 *code; u32 temp; - code = (u32 *)PSXM(psxRegs.pc); + #ifdef ICACHE_EMULATION + if (Config.icache_emulation) + { + code = Read_ICache(psxRegs.pc); + } + else + #endif + { + code = (u32 *)PSXM(psxRegs.pc); + } psxRegs.code = ((code == NULL) ? 0 : SWAP32(*code)); switch (_Op_) { case 0x00: // SPECIAL @@ -419,7 +497,16 @@ static void doBranch(u32 tar) { if (psxDelayBranchTest(tar)) return; - code = (u32 *)PSXM(psxRegs.pc); + #ifdef ICACHE_EMULATION + if (Config.icache_emulation) + { + code = Read_ICache(psxRegs.pc); + } + else + #endif + { + code = (u32 *)PSXM(psxRegs.pc); + } psxRegs.code = ((code == NULL) ? 0 : SWAP32(*code)); debugI(); @@ -554,7 +641,7 @@ void psxMULTU() { * Format: OP rs, offset * *********************************************************/ #define RepZBranchi32(op) if(_i32(_rRs_) op 0) doBranch(_BranchTarget_); -#define RepZBranchLinki32(op) if(_i32(_rRs_) op 0) { _SetLink(31); doBranch(_BranchTarget_); } +#define RepZBranchLinki32(op) { _SetLink(31); if(_i32(_rRs_) op 0) { doBranch(_BranchTarget_); } } void psxBGEZ() { RepZBranchi32(>=) } // Branch if Rs >= 0 void psxBGEZAL() { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link @@ -575,9 +662,9 @@ void psxSRL() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) >> _Sa_; } // Rd = * Shift arithmetic with variant register shift * * Format: OP rd, rt, rs * *********************************************************/ -void psxSLLV() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) << _u32(_rRs_); } // Rd = Rt << rs -void psxSRAV() { if (!_Rd_) return; _i32(_rRd_) = _i32(_rRt_) >> _u32(_rRs_); } // Rd = Rt >> rs (arithmetic) -void psxSRLV() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) >> _u32(_rRs_); } // Rd = Rt >> rs (logical) +void psxSLLV() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) << (_u32(_rRs_) & 0x1F); } // Rd = Rt << rs +void psxSRAV() { if (!_Rd_) return; _i32(_rRd_) = _i32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (arithmetic) +void psxSRLV() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (logical) /********************************************************* * Load higher 16 bits of the first word in GPR with imm * @@ -604,7 +691,8 @@ void psxMTLO() { _rLo_ = _rRs_; } // Lo = Rs * Format: OP * *********************************************************/ void psxBREAK() { - // Break exception - psx rom doens't handles this + psxRegs.pc -= 4; + psxException(0x24, branch); } void psxSYSCALL() { @@ -616,6 +704,7 @@ void psxRFE() { // SysPrintf("psxRFE\n"); psxRegs.CP0.n.Status = (psxRegs.CP0.n.Status & 0xfffffff0) | ((psxRegs.CP0.n.Status & 0x3c) >> 2); + psxTestSWInts(); } /********************************************************* @@ -639,14 +728,14 @@ void psxJAL() { _SetLink(31); doBranch(_JumpTarget_); } * Format: OP rs, rd * *********************************************************/ void psxJR() { - doBranch(_u32(_rRs_)); + doBranch(_rRs_ & ~3); psxJumpTest(); } void psxJALR() { u32 temp = _u32(_rRs_); if (_Rd_) { _SetLink(_Rd_); } - doBranch(temp); + doBranch(temp & ~3); } /********************************************************* @@ -923,10 +1012,38 @@ void (*psxCP2BSC[32])() = { /////////////////////////////////////////// static int intInit() { + #ifdef ICACHE_EMULATION + /* We have to allocate the icache memory even if + * the user has not enabled it as otherwise it can cause issues. + */ + if (!ICache_Addr) + { + ICache_Addr = malloc(0x1000); + if (!ICache_Addr) + { + return -1; + } + } + + if (!ICache_Code) + { + ICache_Code = malloc(0x1000); + if (!ICache_Code) + { + return -1; + } + } + memset(ICache_Addr, 0xff, 0x1000); + memset(ICache_Code, 0xff, 0x1000); + #endif return 0; } static void intReset() { + #ifdef ICACHE_EMULATION + memset(ICache_Addr, 0xff, 0x1000); + memset(ICache_Code, 0xff, 0x1000); + #endif } void intExecute() { @@ -943,12 +1060,46 @@ void intExecuteBlock() { static void intClear(u32 Addr, u32 Size) { } +void intNotify (int note, void *data) { + #ifdef ICACHE_EMULATION + /* Gameblabla - Only clear the icache if it's isolated */ + if (note == R3000ACPU_NOTIFY_CACHE_ISOLATED) + { + memset(ICache_Addr, 0xff, 0x1000); + memset(ICache_Code, 0xff, 0x1000); + } + #endif +} + static void intShutdown() { + #ifdef ICACHE_EMULATION + if (ICache_Addr) + { + free(ICache_Addr); + ICache_Addr = NULL; + } + + if (ICache_Code) + { + free(ICache_Code); + ICache_Code = NULL; + } + #endif } // interpreter execution void execI() { - u32 *code = (u32 *)PSXM(psxRegs.pc); + u32 *code; + #ifdef ICACHE_EMULATION + if (Config.icache_emulation) + { + code = Read_ICache(psxRegs.pc); + } + else + #endif + { + code = (u32 *)PSXM(psxRegs.pc); + } psxRegs.code = ((code == NULL) ? 0 : SWAP32(*code)); debugI(); @@ -967,5 +1118,8 @@ R3000Acpu psxInt = { intExecute, intExecuteBlock, intClear, +#ifdef ICACHE_EMULATION + intNotify, +#endif intShutdown }; diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 61b14c655..171104cb3 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -240,7 +240,7 @@ u8 psxMemRead8(u32 mem) { #ifdef PSXMEM_LOG PSXMEM_LOG("err lb %8.8lx\n", mem); #endif - return 0; + return 0xFF; } } } @@ -265,7 +265,7 @@ u16 psxMemRead16(u32 mem) { #ifdef PSXMEM_LOG PSXMEM_LOG("err lh %8.8lx\n", mem); #endif - return 0; + return 0xFFFF; } } } @@ -290,7 +290,7 @@ u32 psxMemRead32(u32 mem) { #ifdef PSXMEM_LOG if (writeok) { PSXMEM_LOG("err lw %8.8lx\n", mem); } #endif - return 0; + return 0xFFFFFFFF; } } } diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 85b77cb44..e21d48832 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -81,7 +81,21 @@ void psxShutdown() { } void psxException(u32 code, u32 bd) { - if (!Config.HLE && ((((psxRegs.code = PSXMu32(psxRegs.pc)) >> 24) & 0xfe) == 0x4a)) { + #ifdef ICACHE_EMULATION + /* Dynarecs may use this codepath and crash as a result. + * This should only be used for the interpreter. - Gameblabla + * */ + if (Config.icache_emulation && Config.Cpu == CPU_INTERPRETER) + { + psxRegs.code = SWAPu32(*Read_ICache(psxRegs.pc)); + } + else + #endif + { + psxRegs.code = PSXMu32(psxRegs.pc); + } + + if (!Config.HLE && ((((psxRegs.code) >> 24) & 0xfe) == 0x4a)) { // "hokuto no ken" / "Crash Bandicot 2" ... // BIOS does not allow to return to GTE instructions // (just skips it, supposedly because it's scheduled already) @@ -98,7 +112,6 @@ void psxException(u32 code, u32 bd) { #ifdef PSXCPU_LOG PSXCPU_LOG("bd set!!!\n"); #endif - SysPrintf("bd set!!!\n"); psxRegs.CP0.n.Cause |= 0x80000000; psxRegs.CP0.n.EPC = (psxRegs.pc - 4); } else diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 399f9b63f..4b1ec9e0c 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -29,12 +29,24 @@ extern "C" { #include "psxcounters.h" #include "psxbios.h" +#ifdef ICACHE_EMULATION +enum { + R3000ACPU_NOTIFY_CACHE_ISOLATED = 0, + R3000ACPU_NOTIFY_CACHE_UNISOLATED = 1, + R3000ACPU_NOTIFY_DMA3_EXE_LOAD = 2 +}; +extern uint32_t *Read_ICache(uint32_t pc); +#endif + typedef struct { int (*Init)(); void (*Reset)(); void (*Execute)(); /* executes up to a break */ void (*ExecuteBlock)(); /* executes up to a jump */ void (*Clear)(u32 Addr, u32 Size); +#ifdef ICACHE_EMULATION + void (*Notify)(int note, void *data); +#endif void (*Shutdown)(); } R3000Acpu; From b59f02a471a76ea61c0e6bd1082dc764e9aca59e Mon Sep 17 00:00:00 2001 From: gameblabla Date: Sat, 2 Oct 2021 14:39:44 +0000 Subject: [PATCH 029/597] Fix for Armored Core misdetecting a Link cable being detected (#223) For some reason, the game detects that a link cable is plugged in and disables the local multiplayer as a result. Thanks @sony for fixing the issue in their PS Classic branch, a simplier fix is done here instead. Co-authored-by: Sony --- libpcsxcore/psxhw.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index 84ce2f730..dbcb9892f 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -123,7 +123,14 @@ u16 psxHwRead16(u32 add) { return hard; case 0x1f80105e: hard = SIO1_readBaud16(); - return hard; + return hard; +#else + /* Fixes Armored Core misdetecting the Link cable being detected. + * We want to turn that thing off and force it to do local multiplayer instead. + * Thanks Sony for the fix, they fixed it in their PS Classic fork. + */ + case 0x1f801054: + return 0x80; #endif case 0x1f801100: hard = psxRcntRcount(0); From 91f412c2959e77ea744a13b0592020c50982c623 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Sat, 2 Oct 2021 14:53:16 +0000 Subject: [PATCH 030/597] One more on odd lines for PAL HSYNC (#224) As done by Duckstation https://github.com/stenzek/duckstation/blob/bbcf1c67d1aefd5de9cdc9c158f92bc7aaecaa63/src/core/gpu.h#L56 --- libpcsxcore/psxcounters.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index 283173b52..bd0f09b99 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -60,7 +60,7 @@ static const u32 CountToOverflow = 0; static const u32 CountToTarget = 1; static const u32 FrameRate[] = { 60, 50 }; -static const u32 HSyncTotal[] = { 263, 313 }; +static const u32 HSyncTotal[] = { 263, 314 }; // actually one more on odd lines for PAL #define VBlankStart 240 #define VERBOSE_LEVEL 0 From f4627eb3cb482fa3786b63e9ff4c9723ac792ce1 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Sat, 2 Oct 2021 15:51:24 +0000 Subject: [PATCH 031/597] Proper CDDA fix for Fantastic Pinball Kyuutenkai (and remove CDDA playthread as it causes issues) (#215) Part of the CDROM related changes were taken from PCSX Reloaded. https://github.com/gameblabla/pcsxr/blob/6484236cb0281e8040ff6c8078c87899a3407534/pcsxr/libpcsxcore/cdrom.c --- libpcsxcore/cdriso.c | 144 +------------------------------------------ libpcsxcore/cdrom.c | 11 +++- 2 files changed, 9 insertions(+), 146 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index f8a4d21aa..9ef594c68 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -65,11 +65,6 @@ static unsigned char sndbuffer[CD_FRAMESIZE_RAW * 10]; #define CDDA_FRAMETIME (1000 * (sizeof(sndbuffer) / CD_FRAMESIZE_RAW) / 75) -#ifdef _WIN32 -static HANDLE threadid; -#else -static pthread_t threadid; -#endif static unsigned int initial_offset = 0; static boolean playing = FALSE; static boolean cddaBigEndian = FALSE; @@ -190,123 +185,9 @@ static long GetTickCount(void) { } #endif -// this thread plays audio data -#ifdef _WIN32 -static void playthread(void *param) -#else -static void *playthread(void *param) -#endif -{ - long osleep, d, t, i, s; - unsigned char tmp; - int ret = 0, sector_offs; - - t = GetTickCount(); - - while (playing) { - s = 0; - for (i = 0; i < sizeof(sndbuffer) / CD_FRAMESIZE_RAW; i++) { - sector_offs = cdda_cur_sector - cdda_first_sector; - if (sector_offs < 0) { - d = CD_FRAMESIZE_RAW; - memset(sndbuffer + s, 0, d); - } - else { - d = cdimg_read_func(cddaHandle, cdda_file_offset, - sndbuffer + s, sector_offs); - if (d < CD_FRAMESIZE_RAW) - break; - } - - s += d; - cdda_cur_sector++; - } - - if (s == 0) { - playing = FALSE; - initial_offset = 0; - break; - } - - if (!cdr.Muted && playing) { - if (cddaBigEndian) { - for (i = 0; i < s / 2; i++) { - tmp = sndbuffer[i * 2]; - sndbuffer[i * 2] = sndbuffer[i * 2 + 1]; - sndbuffer[i * 2 + 1] = tmp; - } - } - - // can't do it yet due to readahead.. - //cdrAttenuate((short *)sndbuffer, s / 4, 1); - do { - ret = SPU_playCDDAchannel((short *)sndbuffer, s); - if (ret == 0x7761) - usleep(6 * 1000); - } while (ret == 0x7761 && playing); // rearmed_wait - } - - if (ret != 0x676f) { // !rearmed_go - // do approx sleep - long now; - - // HACK: stop feeding data while emu is paused - extern int stop; - while (stop && playing) - usleep(10000); - - now = GetTickCount(); - osleep = t - now; - if (osleep <= 0) { - osleep = 1; - t = now; - } - else if (osleep > CDDA_FRAMETIME) { - osleep = CDDA_FRAMETIME; - t = now; - } - - usleep(osleep * 1000); - t += CDDA_FRAMETIME; - } - - } - -#ifdef _WIN32 - _endthread(); -#else - pthread_exit(0); - return NULL; -#endif -} - // stop the CDDA playback static void stopCDDA() { - if (!playing) { - return; - } - playing = FALSE; -#ifdef _WIN32 - WaitForSingleObject(threadid, INFINITE); -#else - pthread_join(threadid, NULL); -#endif -} - -// start the CDDA playback -static void startCDDA(void) { - if (playing) { - stopCDDA(); - } - - playing = TRUE; - -#ifdef _WIN32 - threadid = (HANDLE)_beginthread(playthread, 0, NULL); -#else - pthread_create(&threadid, NULL, playthread, NULL); -#endif } // this function tries to get the .toc file of the given .bin @@ -1634,30 +1515,7 @@ static long CALLBACK ISOreadTrack(unsigned char *time) { // sector: byte 0 - minute; byte 1 - second; byte 2 - frame // does NOT uses bcd format static long CALLBACK ISOplay(unsigned char *time) { - unsigned int i; - - if (numtracks <= 1) - return 0; - - // find the track - cdda_cur_sector = msf2sec((char *)time); - for (i = numtracks; i > 1; i--) { - cdda_first_sector = msf2sec(ti[i].start); - if (cdda_first_sector <= cdda_cur_sector + 2 * 75) - break; - } - cdda_file_offset = ti[i].start_offset; - - // find the file that contains this track - for (; i > 1; i--) - if (ti[i].handle != NULL) - break; - - cddaHandle = ti[i].handle; - - if (SPU_playCDDAchannel != NULL) - startCDDA(); - + playing = TRUE; return 0; } diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 4e312fd69..47e403751 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -46,6 +46,7 @@ cdrStruct cdr; static unsigned char *pTransfer; +static s16 read_buf[CD_FRAMESIZE_RAW/2]; /* CD-ROM magic numbers */ #define CdlSync 0 /* nocash documentation : "Uh, actually, returns error code 40h = Invalid Command...?" */ @@ -434,8 +435,7 @@ static void cdrPlayInterrupt_Autopause() u32 abs_lev_max = 0; boolean abs_lev_chselect; u32 i; - s16 read_buf[CD_FRAMESIZE_RAW/2]; - + if ((cdr.Mode & MODE_AUTOPAUSE) && cdr.TrackChanged) { CDR_LOG( "CDDA STOP\n" ); @@ -452,7 +452,6 @@ static void cdrPlayInterrupt_Autopause() } else if (cdr.Mode & MODE_REPORT) { CDR_readCDDA(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], (u8 *)read_buf); - cdr.Result[0] = cdr.StatP; cdr.Result[1] = cdr.subq.Track; cdr.Result[2] = cdr.subq.Index; @@ -533,6 +532,12 @@ void cdrPlayInterrupt() cdrPlayInterrupt_Autopause(); if (!cdr.Play) return; + + if (CDR_readCDDA && !cdr.Muted && cdr.Mode & MODE_REPORT) { + cdrAttenuate((u8 *)read_buf, CD_FRAMESIZE_RAW / 4, 1); + if (SPU_playCDDAchannel) + SPU_playCDDAchannel((u8 *)read_buf, CD_FRAMESIZE_RAW); + } cdr.SetSectorPlay[2]++; if (cdr.SetSectorPlay[2] == 75) { From c9c7a925d158284eb3febf3cccc9789e94231430 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Sat, 2 Oct 2021 17:51:48 +0000 Subject: [PATCH 032/597] Merge several fixes from PCSX Redux and adjust delay for SetLocPending. (#221) There's a game, PoPoLoCrois Monogatari II, that unfortunately locks up during the intro screen. I should have known that code was wrong as Mednafen did not have anything like that in their code either, hence the confusion. Their fix however still don't include the Driver fix so the game would still crash if we don't have the "+ Seektime". To be honest, i'm not sure why the PCSX Reloaded team did it this way... I noticed that the fastforward and fastbackward code was pretty much unused. Looked at Mednafen and all they do is just adjust the cursector and make sure that fastword & backword trigger the AUTO_REPORT code so i did the latter. Co-authored-by: Nicolas Noble --- libpcsxcore/cdriso.c | 8 ++--- libpcsxcore/cdrom.c | 78 +++++++++++++++++++++++++++---------------- libpcsxcore/cdrom.h | 2 +- libpcsxcore/plugins.h | 2 +- 4 files changed, 55 insertions(+), 35 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index 9ef594c68..5aad2252f 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -1480,12 +1480,12 @@ static void DecodeRawSubData(void) { // read track // time: byte 0 - minute; byte 1 - second; byte 2 - frame // uses bcd format -static long CALLBACK ISOreadTrack(unsigned char *time) { +static boolean CALLBACK ISOreadTrack(unsigned char *time) { int sector = MSF2SECT(btoi(time[0]), btoi(time[1]), btoi(time[2])); long ret; if (cdHandle == NULL) { - return -1; + return 0; } if (pregapOffset) { @@ -1499,7 +1499,7 @@ static long CALLBACK ISOreadTrack(unsigned char *time) { ret = cdimg_read_func(cdHandle, 0, cdbuffer, sector); if (ret < 0) - return -1; + return 0; if (subHandle != NULL) { fseek(subHandle, sector * SUB_FRAMESIZE, SEEK_SET); @@ -1508,7 +1508,7 @@ static long CALLBACK ISOreadTrack(unsigned char *time) { if (subChanRaw) DecodeRawSubData(); } - return 0; + return 1; } // plays cdda audio diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 47e403751..24fd9c920 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -383,7 +383,7 @@ static void ReadTrack(const u8 *time) { CDR_LOG("ReadTrack *** %02x:%02x:%02x\n", tmp[0], tmp[1], tmp[2]); - cdr.RErr = CDR_readTrack(tmp); + cdr.NoErr = CDR_readTrack(tmp); memcpy(cdr.Prev, tmp, 3); if (CheckSBI(time)) @@ -450,7 +450,7 @@ static void cdrPlayInterrupt_Autopause() StopCdda(); } - else if (cdr.Mode & MODE_REPORT) { + else if (((cdr.Mode & MODE_REPORT) || cdr.FastForward || cdr.FastBackward)) { CDR_readCDDA(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], (u8 *)read_buf); cdr.Result[0] = cdr.StatP; cdr.Result[1] = cdr.subq.Track; @@ -612,6 +612,10 @@ void cdrInterrupt() { // XXX: wrong, should seek instead.. cdr.Seeked = SEEK_DONE; } + + cdr.FastBackward = 0; + cdr.FastForward = 0; + if (cdr.SetlocPending) { memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); cdr.SetlocPending = 0; @@ -676,9 +680,7 @@ void cdrInterrupt() { cdr.Stat = Complete; // GameShark CD Player: Calls 2x + Play 2x - if( cdr.FastForward == 0 ) cdr.FastForward = 2; - else cdr.FastForward++; - + cdr.FastForward = 1; cdr.FastBackward = 0; break; @@ -686,9 +688,7 @@ void cdrInterrupt() { cdr.Stat = Complete; // GameShark CD Player: Calls 2x + Play 2x - if( cdr.FastBackward == 0 ) cdr.FastBackward = 2; - else cdr.FastBackward++; - + cdr.FastBackward = 1; cdr.FastForward = 0; break; @@ -955,7 +955,22 @@ void cdrInterrupt() { case CdlReadS: if (cdr.SetlocPending) { seekTime = abs(msf2sec(cdr.SetSectorPlay) - msf2sec(cdr.SetSector)) * (cdReadTime / 200); - if(seekTime > 1000000) seekTime = 1000000; + /* + * Gameblabla : + * It was originally set to 1000000 for Driver, however it is not high enough for Worms Pinball + * and was unreliable for that game. + * I also tested it against Mednafen and Driver's titlescreen music starts 25 frames later, not immediatly. + * + * Obviously, this isn't perfect but right now, it should be a bit better. + * Games to test this against if you change that setting : + * - Driver (titlescreen music delay and retry mission) + * - Worms Pinball (Will either not boot or crash in the memory card screen) + * - Viewpoint (short pauses if the delay in the ingame music is too long) + * + * It seems that 3386880 * 5 is too much for Driver's titlescreen and it starts skipping. + * However, 1000000 is not enough for Worms Pinball to reliably boot. + */ + if(seekTime > 3386880 * 2) seekTime = 3386880 * 2; memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); cdr.SetlocPending = 0; cdr.m_locationChanged = TRUE; @@ -987,22 +1002,29 @@ void cdrInterrupt() { - fixes cutscenes C-12 - Final Resistance - doesn't like seek */ + + /* + By nicolasnoble from PCSX Redux : + "It LOOKS like this logic is wrong, therefore disabling it with `&& false` for now. + For "PoPoLoCrois Monogatari II", the game logic will soft lock and will never issue GetLocP to detect + the end of its XA streams, as it seems to assume ReadS will not return a status byte with the SEEK + flag set. I think the reasonning is that since it's invalid to call GetLocP while seeking, the game + tries to protect itself against errors by preventing from issuing a GetLocP while it knows the + last status was "seek". But this makes the logic just softlock as it'll never get a notification + about the fact the drive is done seeking and the read actually started. + + In other words, this state machine here is probably wrong in assuming the response to ReadS/ReadN is + done right away. It's rather when it's done seeking, and the read has actually started. This probably + requires a bit more work to make sure seek delays are processed properly. + Checked with a few games, this seems to work fine." + + Gameblabla additional notes : + This still needs the "+ seekTime" that PCSX Redux doesn't have for the Driver "retry" mission error. + */ + cdr.StatP |= STATUS_READ; + cdr.StatP &= ~STATUS_SEEK; - if (cdr.Seeked != SEEK_DONE) { - cdr.StatP |= STATUS_SEEK; - cdr.StatP &= ~STATUS_READ; - - // Crusaders of Might and Magic - use short time - // - fix cutscene speech (startup) - - // ??? - use more accurate seek time later - CDREAD_INT(((cdr.Mode & 0x80) ? (cdReadTime / 2) : cdReadTime * 1) + seekTime); - } else { - cdr.StatP |= STATUS_READ; - cdr.StatP &= ~STATUS_SEEK; - - CDREAD_INT((cdr.Mode & 0x80) ? (cdReadTime / 2) : cdReadTime * 1); - } + CDREAD_INT(((cdr.Mode & 0x80) ? (cdReadTime) : cdReadTime * 2) + seekTime); cdr.Result[0] = cdr.StatP; start_rotating = 1; @@ -1126,9 +1148,9 @@ void cdrReadInterrupt() { buf = CDR_getBuffer(); if (buf == NULL) - cdr.RErr = -1; + cdr.NoErr = 0; - if (cdr.RErr == -1) { + if (!cdr.NoErr) { CDR_LOG_I("cdrReadInterrupt() Log: err\n"); memset(cdr.Transfer, 0, DATA_SIZE); cdr.Stat = DiskError; @@ -1536,9 +1558,7 @@ void cdrReset() { cdr.DriveState = DRIVESTATE_STANDBY; cdr.StatP = STATUS_ROTATING; pTransfer = cdr.Transfer; - cdr.SetlocPending = 0; - cdr.m_locationChanged = FALSE; - + // BIOS player - default values cdr.AttenuatorLeftToLeft = 0x80; cdr.AttenuatorLeftToRight = 0x00; diff --git a/libpcsxcore/cdrom.h b/libpcsxcore/cdrom.h index 860930b10..2ec10545a 100644 --- a/libpcsxcore/cdrom.h +++ b/libpcsxcore/cdrom.h @@ -91,7 +91,7 @@ typedef struct { int CurTrack; int Mode, File, Channel; int Reset; - int RErr; + int NoErr; int FirstSector; xa_decode_t Xa; diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index 132df9073..e3bffc776 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -123,7 +123,7 @@ typedef long (CALLBACK* CDRopen)(void); typedef long (CALLBACK* CDRclose)(void); typedef long (CALLBACK* CDRgetTN)(unsigned char *); typedef long (CALLBACK* CDRgetTD)(unsigned char, unsigned char *); -typedef long (CALLBACK* CDRreadTrack)(unsigned char *); +typedef boolean (CALLBACK* CDRreadTrack)(unsigned char *); typedef unsigned char* (CALLBACK* CDRgetBuffer)(void); typedef unsigned char* (CALLBACK* CDRgetBufferSub)(void); typedef long (CALLBACK* CDRconfigure)(void); From ecd502e11f1d17998924f2de5909380b75c67d49 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 2 Oct 2021 21:37:51 +0300 Subject: [PATCH 033/597] cdriso: clean up after cdda thread removal --- libpcsxcore/cdriso.c | 28 +--------------------------- libpcsxcore/cdrom.c | 4 ++-- 2 files changed, 3 insertions(+), 29 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index 5aad2252f..e6247bbbe 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -61,24 +61,15 @@ static boolean multifile = FALSE; static unsigned char cdbuffer[CD_FRAMESIZE_RAW]; static unsigned char subbuffer[SUB_FRAMESIZE]; -static unsigned char sndbuffer[CD_FRAMESIZE_RAW * 10]; - -#define CDDA_FRAMETIME (1000 * (sizeof(sndbuffer) / CD_FRAMESIZE_RAW) / 75) - -static unsigned int initial_offset = 0; static boolean playing = FALSE; static boolean cddaBigEndian = FALSE; -// cdda sectors in toc, byte offset in file -static unsigned int cdda_cur_sector; -static unsigned int cdda_first_sector; -static unsigned int cdda_file_offset; /* Frame offset into CD image where pregap data would be found if it was there. * If a game seeks there we must *not* return subchannel data since it's * not in the CD image, so that cdrom code can fake subchannel data instead. * XXX: there could be multiple pregaps but PSX dumps only have one? */ static unsigned int pregapOffset; -#define cddaCurPos cdda_cur_sector +static unsigned int cddaCurPos; // compressed image stuff static struct { @@ -170,21 +161,6 @@ static void tok2msf(char *time, char *msf) { } } -#ifndef _WIN32 -static long GetTickCount(void) { - static time_t initial_time = 0; - struct timeval now; - - gettimeofday(&now, NULL); - - if (initial_time == 0) { - initial_time = now.tv_sec; - } - - return (now.tv_sec - initial_time) * 1000L + now.tv_usec / 1000L; -} -#endif - // stop the CDDA playback static void stopCDDA() { playing = FALSE; @@ -1352,8 +1328,6 @@ static long CALLBACK ISOopen(void) { if (numtracks > 1 && ti[1].handle == NULL) { ti[1].handle = fopen(bin_filename, "rb"); } - cdda_cur_sector = 0; - cdda_file_offset = 0; return 0; } diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 24fd9c920..647a1b6cb 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -534,9 +534,9 @@ void cdrPlayInterrupt() if (!cdr.Play) return; if (CDR_readCDDA && !cdr.Muted && cdr.Mode & MODE_REPORT) { - cdrAttenuate((u8 *)read_buf, CD_FRAMESIZE_RAW / 4, 1); + cdrAttenuate(read_buf, CD_FRAMESIZE_RAW / 4, 1); if (SPU_playCDDAchannel) - SPU_playCDDAchannel((u8 *)read_buf, CD_FRAMESIZE_RAW); + SPU_playCDDAchannel(read_buf, CD_FRAMESIZE_RAW); } cdr.SetSectorPlay[2]++; From 7bbabe80066083236adfd653f289de17443a7dd8 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Sun, 10 Oct 2021 15:30:59 +0000 Subject: [PATCH 034/597] Fix oversight for icache emulation code. (#226) This fixes Armored Core when being run in icache emulation mode. This was a mistake on my part as i forgot about that piece of code. --- libpcsxcore/misc.c | 2 ++ libpcsxcore/psxmem.c | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 1b38e2871..cd16c41a9 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -244,6 +244,7 @@ int LoadCdrom() { tmpHead.t_addr = SWAP32(tmpHead.t_addr); psxCpu->Clear(tmpHead.t_addr, tmpHead.t_size / 4); + psxCpu->Reset(); // Read the rest of the main executable while (tmpHead.t_size & ~2047) { @@ -291,6 +292,7 @@ int LoadCdromFile(const char *filename, EXE_HEADER *head) { addr = head->t_addr; psxCpu->Clear(addr, size / 4); + psxCpu->Reset(); while (size & ~2047) { incTime(); diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 171104cb3..c09965dc1 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -389,6 +389,10 @@ void psxMemWrite32(u32 mem, u32 value) { memset(psxMemWLUT + 0x0000, 0, 0x80 * sizeof(void *)); memset(psxMemWLUT + 0x8000, 0, 0x80 * sizeof(void *)); memset(psxMemWLUT + 0xa000, 0, 0x80 * sizeof(void *)); +#ifdef ICACHE_EMULATION + /* Required for icache interpreter otherwise Armored Core won't boot on icache interpreter */ + psxCpu->Notify(R3000ACPU_NOTIFY_CACHE_ISOLATED, NULL); +#endif break; case 0x00: case 0x1e988: if (writeok == 1) break; @@ -396,6 +400,10 @@ void psxMemWrite32(u32 mem, u32 value) { for (i = 0; i < 0x80; i++) psxMemWLUT[i + 0x0000] = (void *)&psxM[(i & 0x1f) << 16]; memcpy(psxMemWLUT + 0x8000, psxMemWLUT, 0x80 * sizeof(void *)); memcpy(psxMemWLUT + 0xa000, psxMemWLUT, 0x80 * sizeof(void *)); +#ifdef ICACHE_EMULATION + /* Dynarecs might take this opportunity to flush their code cache */ + psxCpu->Notify(R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL); +#endif break; default: #ifdef PSXMEM_LOG From b64fb8912577f6f1e856bf255b6bd96e4e977203 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Mon, 18 Oct 2021 20:20:09 +0000 Subject: [PATCH 035/597] Fix CD volume issue in Star Wars - Dark Forces. (#232) CD Volume is 16-bits signed, not unsigned. Otherwise in Star Wars Dark Forces : If you set the volume slider to the minimum value allowed for the CD Volume, it will overflow and wrap around (to the maximum volume). --- plugins/dfsound/externals.h | 2 +- plugins/dfsound/registers.c | 8 ++++---- plugins/dfsound/spu.c | 2 +- plugins/spunull/spunull.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index de4b5dbe2..5ec941525 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -201,7 +201,7 @@ typedef struct short * pS; void (CALLBACK *irqCallback)(void); // func of main emu, called on spu irq - void (CALLBACK *cddavCallback)(unsigned short,unsigned short); + void (CALLBACK *cddavCallback)(short, short); void (CALLBACK *scheduleCallback)(unsigned int); xa_decode_t * xapGlobal; diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index cc7202078..e0693064d 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -204,12 +204,12 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, break; //-------------------------------------------------// case H_CDLeft: - spu.iLeftXAVol=val & 0x7fff; - if(spu.cddavCallback) spu.cddavCallback(0,val); + spu.iLeftXAVol=(int16_t)val; + if(spu.cddavCallback) spu.cddavCallback(0,(int16_t)val); break; case H_CDRight: - spu.iRightXAVol=val & 0x7fff; - if(spu.cddavCallback) spu.cddavCallback(1,val); + spu.iRightXAVol=(int16_t)val; + if(spu.cddavCallback) spu.cddavCallback(1,(int16_t)val); break; //-------------------------------------------------// case H_FMod1: diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 3822e8c4e..637e85216 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1580,7 +1580,7 @@ void CALLBACK SPUregisterCallback(void (CALLBACK *callback)(void)) spu.irqCallback = callback; } -void CALLBACK SPUregisterCDDAVolume(void (CALLBACK *CDDAVcallback)(unsigned short,unsigned short)) +void CALLBACK SPUregisterCDDAVolume(void (CALLBACK *CDDAVcallback)(short, short)) { spu.cddavCallback = CDDAVcallback; } diff --git a/plugins/spunull/spunull.c b/plugins/spunull/spunull.c index 96bd39069..ece5db934 100644 --- a/plugins/spunull/spunull.c +++ b/plugins/spunull/spunull.c @@ -53,7 +53,7 @@ char * pConfigFile=0; //////////////////////////////////////////////////////////////////////// void (CALLBACK *irqCallback)(void)=0; // func of main emu, called on spu irq -void (CALLBACK *cddavCallback)(unsigned short,unsigned short)=0; +void (CALLBACK *cddavCallback)(short, short)=0; //////////////////////////////////////////////////////////////////////// // CODE AREA @@ -361,7 +361,7 @@ void CALLBACK SPUregisterCallback(void (CALLBACK *callback)(void)) irqCallback = callback; } -void CALLBACK SPUregisterCDDAVolume(void (CALLBACK *CDDAVcallback)(unsigned short,unsigned short)) +void CALLBACK SPUregisterCDDAVolume(void (CALLBACK *CDDAVcallback)(short, short)) { cddavCallback = CDDAVcallback; } From fcaa8d2263c8da53bbe86c482921e1a715bf59b3 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Fri, 29 Oct 2021 20:00:59 +0000 Subject: [PATCH 036/597] Hack fix for CDROM timings regression (Castlevania, Megaman X4) (#231) See https://github.com/libretro/pcsx_rearmed/issues/557. The CDROM timings changes from Redux fixed a bunch of games (Crash Team Racing's intro no longer cuts off too early, Worms Pinball gets further instead of just crashing, FF8 Lunar Cry FMV no longer crashes etc...) but it broke the other games that relied on ADPCM samples, which is most noticeable in games like Castlevania or Megaman X4. According to nicolasnoble, we should not cause a delay if seek destination is c> However, this would require a lot more work so in the meantime, let's do just that and add a comment. --- libpcsxcore/cdrom.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 647a1b6cb..26f68ac36 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1183,6 +1183,19 @@ void cdrReadInterrupt() { int ret = xa_decode_sector(&cdr.Xa, cdr.Transfer+4, cdr.FirstSector); if (!ret) { cdrAttenuate(cdr.Xa.pcm, cdr.Xa.nsamples, cdr.Xa.stereo); + /* + * Gameblabla - + * This is a hack for Megaman X4, Castlevania etc... + * that regressed from the new m_locationChanged and CDROM timings changes. + * It is mostly noticeable in Castevania however and the stuttering can be very jarring. + * + * According to PCSX redux authors, we shouldn't cause a location change if + * the sector difference is too small. + * I attempted to go with that approach but came empty handed. + * So for now, let's just set cdr.m_locationChanged to false when playing back any ADPCM samples. + * This does not regress Crash Team Racing's intro at least. + */ + cdr.m_locationChanged = FALSE; SPU_playADPCMchannel(&cdr.Xa); cdr.FirstSector = 0; } From 0bfe8d59b9150b2662c3ca68f950b272344cb9a4 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Fri, 29 Oct 2021 20:03:27 +0000 Subject: [PATCH 037/597] New, separate GPU plugin based on Unai. (#233) This new plugin is based on Unai but is more accurate and fixes a few issues. According to some tests on real hardware : fps old new spyro1 130 112 tekken3 95 68 nfs3 107 91 Because of this, it was decided to make it separate from the "Old" Unai. Note that this doesn't have the threading changes from libretro's fork yet : this will be for another PR. Co-authored-by: negativeExponent Co-authored-by: Justin Weiss Co-authored-by: senquack --- Makefile | 9 + configure | 4 +- frontend/main.c | 6 + frontend/menu.c | 29 + frontend/plugin_lib.h | 9 + plugins/gpu_senquack/Makefile | 19 + plugins/gpu_senquack/README_senquack.txt | 956 +++++++++++ plugins/gpu_senquack/debug.h | 0 plugins/gpu_senquack/gpu.cpp | 830 ++++++++++ plugins/gpu_senquack/gpu.h | 74 + plugins/gpu_senquack/gpu_arm.S | 56 + plugins/gpu_senquack/gpu_arm.h | 14 + plugins/gpu_senquack/gpu_blit.h | 405 +++++ plugins/gpu_senquack/gpu_command.h | 621 +++++++ plugins/gpu_senquack/gpu_fixedpoint.h | 134 ++ plugins/gpu_senquack/gpu_inner.h | 734 +++++++++ plugins/gpu_senquack/gpu_inner_blend.h | 188 +++ plugins/gpu_senquack/gpu_inner_blend_arm.h | 103 ++ plugins/gpu_senquack/gpu_inner_blend_arm5.h | 100 ++ plugins/gpu_senquack/gpu_inner_blend_arm7.h | 107 ++ plugins/gpu_senquack/gpu_inner_light.h | 271 +++ plugins/gpu_senquack/gpu_inner_light_arm.h | 112 ++ plugins/gpu_senquack/gpu_inner_quantization.h | 108 ++ plugins/gpu_senquack/gpu_raster_image.h | 220 +++ plugins/gpu_senquack/gpu_raster_line.h | 720 ++++++++ plugins/gpu_senquack/gpu_raster_polygon.h | 1453 +++++++++++++++++ plugins/gpu_senquack/gpu_raster_sprite.h | 170 ++ plugins/gpu_senquack/gpu_senquack.h | 316 ++++ plugins/gpu_senquack/gpulib_if.cpp | 642 ++++++++ plugins/gpu_senquack/port.h | 41 + plugins/gpu_senquack/profiler.h | 9 + 31 files changed, 8458 insertions(+), 2 deletions(-) create mode 100644 plugins/gpu_senquack/Makefile create mode 100644 plugins/gpu_senquack/README_senquack.txt create mode 100644 plugins/gpu_senquack/debug.h create mode 100644 plugins/gpu_senquack/gpu.cpp create mode 100644 plugins/gpu_senquack/gpu.h create mode 100644 plugins/gpu_senquack/gpu_arm.S create mode 100644 plugins/gpu_senquack/gpu_arm.h create mode 100644 plugins/gpu_senquack/gpu_blit.h create mode 100644 plugins/gpu_senquack/gpu_command.h create mode 100644 plugins/gpu_senquack/gpu_fixedpoint.h create mode 100644 plugins/gpu_senquack/gpu_inner.h create mode 100644 plugins/gpu_senquack/gpu_inner_blend.h create mode 100644 plugins/gpu_senquack/gpu_inner_blend_arm.h create mode 100644 plugins/gpu_senquack/gpu_inner_blend_arm5.h create mode 100644 plugins/gpu_senquack/gpu_inner_blend_arm7.h create mode 100644 plugins/gpu_senquack/gpu_inner_light.h create mode 100644 plugins/gpu_senquack/gpu_inner_light_arm.h create mode 100644 plugins/gpu_senquack/gpu_inner_quantization.h create mode 100644 plugins/gpu_senquack/gpu_raster_image.h create mode 100644 plugins/gpu_senquack/gpu_raster_line.h create mode 100644 plugins/gpu_senquack/gpu_raster_polygon.h create mode 100644 plugins/gpu_senquack/gpu_raster_sprite.h create mode 100644 plugins/gpu_senquack/gpu_senquack.h create mode 100644 plugins/gpu_senquack/gpulib_if.cpp create mode 100644 plugins/gpu_senquack/port.h create mode 100644 plugins/gpu_senquack/profiler.h diff --git a/Makefile b/Makefile index 18ef4e002..0998f586e 100644 --- a/Makefile +++ b/Makefile @@ -129,6 +129,15 @@ plugins/gpu_unai/gpulib_if.o: CFLAGS += -DREARMED -O3 CC_LINK = $(CXX) endif +ifeq "$(BUILTIN_GPU)" "senquack" +OBJS += plugins/gpu_senquack/gpulib_if.o +ifeq "$(ARCH)" "arm" +OBJS += plugins/gpu_senquack/gpu_arm.o +endif +plugins/gpu_senquack/gpulib_if.o: CFLAGS += -DREARMED -O3 +CC_LINK = $(CXX) +endif + # cdrcimg OBJS += plugins/cdrcimg/cdrcimg.o ifeq "$(CHD_SUPPORT)" "1" diff --git a/configure b/configure index 5caf0f494..20ff1d53d 100755 --- a/configure +++ b/configure @@ -39,12 +39,12 @@ check_define_val() platform_list="generic pandora maemo caanoo libretro" platform="generic" -builtin_gpu_list="peops unai neon" +builtin_gpu_list="peops unai neon senquack" builtin_gpu="" sound_driver_list="oss alsa pulseaudio sdl libretro" sound_drivers="" plugins="plugins/spunull/spunull.so \ -plugins/dfxvideo/gpu_peops.so plugins/gpu_unai/gpu_unai.so" +plugins/dfxvideo/gpu_peops.so plugins/gpu_unai/gpu_unai.so plugins/gpu_senquack/gpu_senquack.so" ram_fixed="no" drc_cache_base="no" have_armv5="" diff --git a/frontend/main.c b/frontend/main.c index 3bb0f4b66..4631618e5 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -130,6 +130,12 @@ void emu_set_default_config(void) pl_rearmed_cbs.gpu_neon.enhancement_no_main = 0; pl_rearmed_cbs.gpu_peops.iUseDither = 0; pl_rearmed_cbs.gpu_peops.dwActFixes = 1<<7; + pl_rearmed_cbs.gpu_senquack.ilace_force = 0; + pl_rearmed_cbs.gpu_senquack.pixel_skip = 0; + pl_rearmed_cbs.gpu_senquack.lighting = 1; + pl_rearmed_cbs.gpu_senquack.fast_lighting = 0; + pl_rearmed_cbs.gpu_senquack.blending = 1; + pl_rearmed_cbs.gpu_senquack.dithering = 0; pl_rearmed_cbs.gpu_unai.abe_hack = pl_rearmed_cbs.gpu_unai.no_light = pl_rearmed_cbs.gpu_unai.no_blend = 0; diff --git a/frontend/menu.c b/frontend/menu.c index e2286d42b..05dde4617 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -430,6 +430,13 @@ static const struct { CE_INTVAL_P(gpu_unai.abe_hack), CE_INTVAL_P(gpu_unai.no_light), CE_INTVAL_P(gpu_unai.no_blend), + CE_INTVAL_P(gpu_senquack.ilace_force), + CE_INTVAL_P(gpu_senquack.pixel_skip), + CE_INTVAL_P(gpu_senquack.lighting), + CE_INTVAL_P(gpu_senquack.fast_lighting), + CE_INTVAL_P(gpu_senquack.blending), + CE_INTVAL_P(gpu_senquack.dithering), + CE_INTVAL_P(gpu_senquack.scale_hires), CE_INTVAL_P(gpu_neon.allow_interlace), CE_INTVAL_P(gpu_neon.enhancement_enable), CE_INTVAL_P(gpu_neon.enhancement_no_main), @@ -1378,6 +1385,25 @@ static int menu_loop_plugin_gpu_unai(int id, int keys) return 0; } +static menu_entry e_menu_plugin_gpu_senquack[] = +{ + mee_onoff ("Interlace", 0, pl_rearmed_cbs.gpu_senquack.ilace_force, 1), + mee_onoff ("Dithering", 0, pl_rearmed_cbs.gpu_senquack.dithering, 1), + mee_onoff ("Lighting", 0, pl_rearmed_cbs.gpu_senquack.lighting, 1), + mee_onoff ("Fast lighting", 0, pl_rearmed_cbs.gpu_senquack.fast_lighting, 1), + mee_onoff ("Blending", 0, pl_rearmed_cbs.gpu_senquack.blending, 1), + mee_onoff ("Pixel skip", 0, pl_rearmed_cbs.gpu_senquack.pixel_skip, 1), + mee_end, +}; + +static int menu_loop_plugin_gpu_senquack(int id, int keys) +{ + int sel = 0; + me_loop(e_menu_plugin_gpu_senquack, &sel); + return 0; +} + + static const char *men_gpu_dithering[] = { "None", "Game dependant", "Always", NULL }; //static const char h_gpu_0[] = "Needed for Chrono Cross"; static const char h_gpu_1[] = "Capcom fighting games"; @@ -1479,6 +1505,7 @@ static const char h_plugin_gpu[] = #endif "gpu_peops is Pete's soft GPU, slow but accurate\n" "gpu_unai is GPU from PCSX4ALL, fast but glitchy\n" + "gpu_senquack is more accurate but slower\n" "gpu_gles Pete's hw GPU, uses 3D chip but is glitchy\n" "must save config and reload the game if changed"; static const char h_plugin_spu[] = "spunull effectively disables sound\n" @@ -1486,6 +1513,7 @@ static const char h_plugin_spu[] = "spunull effectively disables sound\n" static const char h_gpu_peops[] = "Configure P.E.Op.S. SoftGL Driver V1.17"; static const char h_gpu_peopsgl[]= "Configure P.E.Op.S. MesaGL Driver V1.78"; static const char h_gpu_unai[] = "Configure Unai/PCSX4ALL Team GPU plugin"; +static const char h_gpu_senquack[] = "Configure Unai/PCSX4ALL Senquack plugin"; static const char h_spu[] = "Configure built-in P.E.Op.S. Sound Driver V1.7"; static menu_entry e_menu_plugin_options[] = @@ -1498,6 +1526,7 @@ static menu_entry e_menu_plugin_options[] = #endif mee_handler_h ("Configure gpu_peops plugin", menu_loop_plugin_gpu_peops, h_gpu_peops), mee_handler_h ("Configure gpu_unai GPU plugin", menu_loop_plugin_gpu_unai, h_gpu_unai), + mee_handler_h ("Configure gpu_senquack GPU plugin", menu_loop_plugin_gpu_senquack, h_gpu_senquack), mee_handler_h ("Configure gpu_gles GPU plugin", menu_loop_plugin_gpu_peopsgl, h_gpu_peopsgl), mee_handler_h ("Configure built-in SPU plugin", menu_loop_plugin_spu, h_spu), mee_end, diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index 4a110020a..f55eb449b 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -77,6 +77,15 @@ struct rearmed_cbs { int no_light, no_blend; int lineskip; } gpu_unai; + struct { + int ilace_force; + int pixel_skip; + int lighting; + int fast_lighting; + int blending; + int dithering; + int scale_hires; + } gpu_senquack; struct { int dwActFixes; int bDrawDither, iFilterType, iFrameTexType; diff --git a/plugins/gpu_senquack/Makefile b/plugins/gpu_senquack/Makefile new file mode 100644 index 000000000..c3be35b4d --- /dev/null +++ b/plugins/gpu_senquack/Makefile @@ -0,0 +1,19 @@ +CFLAGS += -ggdb -Wall -O3 -ffast-math +CFLAGS += -DREARMED +CFLAGS += -I../../include +#CFLAGS += -DINLINE="static __inline__" +#CFLAGS += -Dasm="__asm__ __volatile__" +#CFLAGS += -DUSE_GPULIB=1 + +include ../../config.mak + +SRC_STANDALONE += gpu.cpp +SRC_GPULIB += gpulib_if.cpp + +ifeq "$(ARCH)" "arm" +SRC += gpu_arm.S +endif + +#BIN_STANDALONE = gpuPCSX4ALL.so +BIN_GPULIB = gpu_senquack.so +include ../gpulib/gpulib.mak diff --git a/plugins/gpu_senquack/README_senquack.txt b/plugins/gpu_senquack/README_senquack.txt new file mode 100644 index 000000000..cda17fca7 --- /dev/null +++ b/plugins/gpu_senquack/README_senquack.txt @@ -0,0 +1,956 @@ +//NOTE: You can find the set of original Unai poly routines (disabled now) +// at the bottom end of this file. + +//senquack - Original Unai GPU poly routines have been replaced with new +// ones based on DrHell routines. The original routines suffered from +// shifted rows, causing many quads to have their first triangle drawn +// correctly, but the second triangle would randomly have pixels shifted +// either left or right or entire rows not drawn at all. Furthermore, +// some times entire triangles seemed to be either missing or only +// partially drawn (most clearly seen in sky/road textures in NFS3, +// clock tower in beginning of Castlevania SOTN). Pixel gaps were +// prevalent. +// +// Since DrHell GPU didn't seem to exhibit these artifacts at all, I adapted +// its routines to GPU Unai (Unai was probably already originally based on it). +// DrHell uses 22.10 fixed point instead of Unai's 16.16, so gpu_fixedpoint.h +// required modification as well as gpu_inner.h (where gpuPolySpanFn driver +// functions are). +// +// Originally, I tried to patch up original Unai routines and got as far +// as fixing the shifted rows, but still had other problem of triangles rendered +// wrong (black triangular gaps in NFS3 sky, clock tower in Castlevania SOTN). +// I eventually gave up. Even after rewriting/adapting the routines, +// however, I still had some random pixel droupouts, specifically in +// NFS3 sky texture. I discovered that gpu_inner.h gpuPolySpanFn function +// was taking optimizations to an extreme and packing u/v texture coords +// into one 32-bit word, reducing their accuracy. Only once they were +// handled in full-accuracy individual words was that problem fixed. +// +// NOTE: I also added support for doing divisions using the FPU, either +// with normal division or multiplication-by-reciprocal. +// To use float division, GPU_UNAI_USE_FLOATMATH should be defined. +// To use float mult-by-reciprocal, GPU_UNAI_USE_FLOAT_DIV_MULTINV +// can be specified (GPU_UNAI_USE_FLOATMATH must also be specified) +// To use inaccurate fixed-point mult-by-reciprocal, define +// GPU_UNAI_USE_INT_DIV_MULTINV. This is the default on older +// ARM devices like Wiz/Caanoo that have neither integer division +// in hardware or an FPU. It results in some pixel dropouts, +// texture glitches, but less than the original GPU UNAI code. +// +// If nothing is specified, integer division will be used. +// +// NOTE 2: Even with MIPS32R2 having FPU recip.s instruction, and it is +// used when this platform is detected, I found it not to give any +// noticeable speedup over normal float division (in fact seemed a tiny +// tiny bit slower). I also found float division to not provide any +// noticeable speedups versus integer division on MISP32R2 platform. +// Granted, the differences were all around .5 FPS or less. +// +// TODO: +// * See if anything can be done about remaining pixel gaps in Gran +// Turismo car models, track. +// * Find better way of passing parameters to gpuPolySpanFn functions than +// through original Unai method of using global variables u4,v4,du4 etc. +// * Come up with some newer way of drawing rows of pixels than by calling +// gpuPolySpanFn through function pointer. For every row, at least on +// MIPS platforms, many registers are having to be pushed/popped from stack +// on each call, which is strange since MIPS has so many registers. +// * MIPS MXU/ASM optimized gpuPolySpanFn ? + +////////////////////////////////////////////////////////////////////////// +//senquack - Disabled original Unai poly routines left here for reference: +// ( from gpu_raster_polygon.h ) +////////////////////////////////////////////////////////////////////////// +#define GPU_TESTRANGE3() \ +{ \ + if(x0<0) { if((x1-x0)>CHKMAX_X) return; if((x2-x0)>CHKMAX_X) return; } \ + if(x1<0) { if((x0-x1)>CHKMAX_X) return; if((x2-x1)>CHKMAX_X) return; } \ + if(x2<0) { if((x0-x2)>CHKMAX_X) return; if((x1-x2)>CHKMAX_X) return; } \ + if(y0<0) { if((y1-y0)>CHKMAX_Y) return; if((y2-y0)>CHKMAX_Y) return; } \ + if(y1<0) { if((y0-y1)>CHKMAX_Y) return; if((y2-y1)>CHKMAX_Y) return; } \ + if(y2<0) { if((y0-y2)>CHKMAX_Y) return; if((y1-y2)>CHKMAX_Y) return; } \ +} + +/*---------------------------------------------------------------------- +F3 +----------------------------------------------------------------------*/ + +void gpuDrawF3(const PP gpuPolySpanDriver) +{ + const int li=linesInterlace; + const int pi=(progressInterlace?(linesInterlace+1):0); + const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1); + s32 temp; + s32 xa, xb, xmin, xmax; + s32 ya, yb, ymin, ymax; + s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; + s32 y0, y1, y2; + + x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]); + y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]); + x1 = GPU_EXPANDSIGN(PacketBuffer.S2[4]); + y1 = GPU_EXPANDSIGN(PacketBuffer.S2[5]); + x2 = GPU_EXPANDSIGN(PacketBuffer.S2[6]); + y2 = GPU_EXPANDSIGN(PacketBuffer.S2[7]); + + GPU_TESTRANGE3(); + + x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; + y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; + + xmin = DrawingArea[0]; xmax = DrawingArea[2]; + ymin = DrawingArea[1]; ymax = DrawingArea[3]; + + { + int rx0 = Max2(xmin,Min3(x0,x1,x2)); + int ry0 = Max2(ymin,Min3(y0,y1,y2)); + int rx1 = Min2(xmax,Max3(x0,x1,x2)); + int ry1 = Min2(ymax,Max3(y0,y1,y2)); + if( rx0>=rx1 || ry0>=ry1) return; + } + + PixelData = GPU_RGB16(PacketBuffer.U4[0]); + + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); + GPU_SWAP(y0, y1, temp); + } + } + if (y1 >= y2) + { + if( y1!=y2 || x1>x2 ) + { + GPU_SWAP(x1, x2, temp); + GPU_SWAP(y1, y2, temp); + } + } + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); + GPU_SWAP(y0, y1, temp); + } + } + + ya = y2 - y0; + yb = y2 - y1; + dx =(x2 - x1) * ya - (x2 - x0) * yb; + + for (s32 loop0 = 2; loop0; --loop0) + { + if (loop0 == 2) + { + ya = y0; + yb = y1; + x3 = i2x(x0); + x4 = y0!=y1 ? x3 : i2x(x1); + if (dx < 0) + { + dx3 = xLoDivx((x2 - x0), (y2 - y0)); + dx4 = xLoDivx((x1 - x0), (y1 - y0)); + } + else + { + dx3 = xLoDivx((x1 - x0), (y1 - y0)); + dx4 = xLoDivx((x2 - x0), (y2 - y0)); + } + } + else + { + ya = y1; + yb = y2; + if (dx < 0) + { + x4 = i2x(x1); + x3 = i2x(x0) + (dx3 * (y1 - y0)); + dx4 = xLoDivx((x2 - x1), (y2 - y1)); + } + else + { + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); + dx3 = xLoDivx((x2 - x1), (y2 - y1)); + } + } + + temp = ymin - ya; + if (temp > 0) + { + ya = ymin; + x3 += dx3*temp; + x4 += dx4*temp; + } + if (yb > ymax) yb = ymax; + if (ya>=yb) continue; + + x3+= fixed_HALF; + x4+= fixed_HALF; + + u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; + + for(;yaxmax) || (xb xmax) xb = xmax; + xb-=xa; + if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); + } + } +} + +/*---------------------------------------------------------------------- +FT3 +----------------------------------------------------------------------*/ + +void gpuDrawFT3(const PP gpuPolySpanDriver) +{ + const int li=linesInterlace; + const int pi=(progressInterlace?(linesInterlace+1):0); + const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1); + s32 temp; + s32 xa, xb, xmin, xmax; + s32 ya, yb, ymin, ymax; + s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; + s32 y0, y1, y2; + s32 u0, u1, u2, u3, du3=0; + s32 v0, v1, v2, v3, dv3=0; + + x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] ); + y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] ); + x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] ); + y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] ); + x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]); + y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]); + + GPU_TESTRANGE3(); + + x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; + y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; + + xmin = DrawingArea[0]; xmax = DrawingArea[2]; + ymin = DrawingArea[1]; ymax = DrawingArea[3]; + + { + int rx0 = Max2(xmin,Min3(x0,x1,x2)); + int ry0 = Max2(ymin,Min3(y0,y1,y2)); + int rx1 = Min2(xmax,Max3(x0,x1,x2)); + int ry1 = Min2(ymax,Max3(y0,y1,y2)); + if( rx0>=rx1 || ry0>=ry1) return; + } + + u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9]; + u1 = PacketBuffer.U1[16]; v1 = PacketBuffer.U1[17]; + u2 = PacketBuffer.U1[24]; v2 = PacketBuffer.U1[25]; + + r4 = s32(PacketBuffer.U1[0]); + g4 = s32(PacketBuffer.U1[1]); + b4 = s32(PacketBuffer.U1[2]); + dr4 = dg4 = db4 = 0; + + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); + GPU_SWAP(y0, y1, temp); + GPU_SWAP(u0, u1, temp); + GPU_SWAP(v0, v1, temp); + } + } + if (y1 >= y2) + { + if( y1!=y2 || x1>x2 ) + { + GPU_SWAP(x1, x2, temp); + GPU_SWAP(y1, y2, temp); + GPU_SWAP(u1, u2, temp); + GPU_SWAP(v1, v2, temp); + } + } + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); + GPU_SWAP(y0, y1, temp); + GPU_SWAP(u0, u1, temp); + GPU_SWAP(v0, v1, temp); + } + } + + ya = y2 - y0; + yb = y2 - y1; + dx = (x2 - x1) * ya - (x2 - x0) * yb; + du4 = (u2 - u1) * ya - (u2 - u0) * yb; + dv4 = (v2 - v1) * ya - (v2 - v0) * yb; + + s32 iF,iS; + xInv( dx, iF, iS); + du4 = xInvMulx( du4, iF, iS); + dv4 = xInvMulx( dv4, iF, iS); + tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff); + tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff; + + for (s32 loop0 = 2; loop0; --loop0) + { + if (loop0 == 2) + { + ya = y0; + yb = y1; + u3 = i2x(u0); + v3 = i2x(v0); + x3 = i2x(x0); + x4 = y0!=y1 ? x3 : i2x(x1); + if (dx < 0) + { + xInv( (y2 - y0), iF, iS); + dx3 = xInvMulx( (x2 - x0), iF, iS); + du3 = xInvMulx( (u2 - u0), iF, iS); + dv3 = xInvMulx( (v2 - v0), iF, iS); + dx4 = xLoDivx ( (x1 - x0), (y1 - y0)); + } + else + { + xInv( (y1 - y0), iF, iS); + dx3 = xInvMulx( (x1 - x0), iF, iS); + du3 = xInvMulx( (u1 - u0), iF, iS); + dv3 = xInvMulx( (v1 - v0), iF, iS); + dx4 = xLoDivx ( (x2 - x0), (y2 - y0)); + } + } + else + { + ya = y1; + yb = y2; + if (dx < 0) + { + temp = y1 - y0; + u3 = i2x(u0) + (du3 * temp); + v3 = i2x(v0) + (dv3 * temp); + x3 = i2x(x0) + (dx3 * temp); + x4 = i2x(x1); + dx4 = xLoDivx((x2 - x1), (y2 - y1)); + } + else + { + u3 = i2x(u1); + v3 = i2x(v1); + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); + xInv( (y2 - y1), iF, iS); + dx3 = xInvMulx( (x2 - x1), iF, iS); + du3 = xInvMulx( (u2 - u1), iF, iS); + dv3 = xInvMulx( (v2 - v1), iF, iS); + } + } + + temp = ymin - ya; + if (temp > 0) + { + ya = ymin; + x3 += dx3*temp; + x4 += dx4*temp; + u3 += du3*temp; + v3 += dv3*temp; + } + if (yb > ymax) yb = ymax; + if (ya>=yb) continue; + + x3+= fixed_HALF; + x4+= fixed_HALF; + u3+= fixed_HALF; + v4+= fixed_HALF; + + u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; + + for(;yaxmax) || (xb 0) + { + xa = xmin; + u4 = u3 + du4*temp; + v4 = v3 + dv4*temp; + } + else + { + u4 = u3; + v4 = v3; + } + if(xb > xmax) xb = xmax; + xb-=xa; + if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); + } + } +} + +/*---------------------------------------------------------------------- +G3 +----------------------------------------------------------------------*/ + +void gpuDrawG3(const PP gpuPolySpanDriver) +{ + const int li=linesInterlace; + const int pi=(progressInterlace?(linesInterlace+1):0); + const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1); + s32 temp; + s32 xa, xb, xmin, xmax; + s32 ya, yb, ymin, ymax; + s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; + s32 y0, y1, y2; + s32 r0, r1, r2, r3, dr3=0; + s32 g0, g1, g2, g3, dg3=0; + s32 b0, b1, b2, b3, db3=0; + + x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] ); + y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] ); + x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] ); + y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] ); + x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]); + y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]); + + GPU_TESTRANGE3(); + + x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; + y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; + + xmin = DrawingArea[0]; xmax = DrawingArea[2]; + ymin = DrawingArea[1]; ymax = DrawingArea[3]; + + { + int rx0 = Max2(xmin,Min3(x0,x1,x2)); + int ry0 = Max2(ymin,Min3(y0,y1,y2)); + int rx1 = Min2(xmax,Max3(x0,x1,x2)); + int ry1 = Min2(ymax,Max3(y0,y1,y2)); + if( rx0>=rx1 || ry0>=ry1) return; + } + + r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2]; + r1 = PacketBuffer.U1[8]; g1 = PacketBuffer.U1[9]; b1 = PacketBuffer.U1[10]; + r2 = PacketBuffer.U1[16]; g2 = PacketBuffer.U1[17]; b2 = PacketBuffer.U1[18]; + + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); + GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); + } + } + if (y1 >= y2) + { + if( y1!=y2 || x1>x2 ) + { + GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp); + GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp); + } + } + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); + GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); + } + } + + ya = y2 - y0; + yb = y2 - y1; + dx = (x2 - x1) * ya - (x2 - x0) * yb; + dr4 = (r2 - r1) * ya - (r2 - r0) * yb; + dg4 = (g2 - g1) * ya - (g2 - g0) * yb; + db4 = (b2 - b1) * ya - (b2 - b0) * yb; + + s32 iF,iS; + xInv( dx, iF, iS); + dr4 = xInvMulx( dr4, iF, iS); + dg4 = xInvMulx( dg4, iF, iS); + db4 = xInvMulx( db4, iF, iS); + u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21; + u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10; + u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0; + lInc = db + dg + dr; + + for (s32 loop0 = 2; loop0; --loop0) + { + if (loop0 == 2) + { + ya = y0; + yb = y1; + r3 = i2x(r0); + g3 = i2x(g0); + b3 = i2x(b0); + x3 = i2x(x0); + x4 = y0!=y1 ? x3 : i2x(x1); + if (dx < 0) + { + xInv( (y2 - y0), iF, iS); + dx3 = xInvMulx( (x2 - x0), iF, iS); + dr3 = xInvMulx( (r2 - r0), iF, iS); + dg3 = xInvMulx( (g2 - g0), iF, iS); + db3 = xInvMulx( (b2 - b0), iF, iS); + dx4 = xLoDivx ( (x1 - x0), (y1 - y0)); + } + else + { + xInv( (y1 - y0), iF, iS); + dx3 = xInvMulx( (x1 - x0), iF, iS); + dr3 = xInvMulx( (r1 - r0), iF, iS); + dg3 = xInvMulx( (g1 - g0), iF, iS); + db3 = xInvMulx( (b1 - b0), iF, iS); + dx4 = xLoDivx ( (x2 - x0), (y2 - y0)); + } + } + else + { + ya = y1; + yb = y2; + if (dx < 0) + { + temp = y1 - y0; + r3 = i2x(r0) + (dr3 * temp); + g3 = i2x(g0) + (dg3 * temp); + b3 = i2x(b0) + (db3 * temp); + x3 = i2x(x0) + (dx3 * temp); + x4 = i2x(x1); + dx4 = xLoDivx((x2 - x1), (y2 - y1)); + } + else + { + r3 = i2x(r1); + g3 = i2x(g1); + b3 = i2x(b1); + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); + + xInv( (y2 - y1), iF, iS); + dx3 = xInvMulx( (x2 - x1), iF, iS); + dr3 = xInvMulx( (r2 - r1), iF, iS); + dg3 = xInvMulx( (g2 - g1), iF, iS); + db3 = xInvMulx( (b2 - b1), iF, iS); + } + } + + temp = ymin - ya; + if (temp > 0) + { + ya = ymin; + x3 += dx3*temp; x4 += dx4*temp; + r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp; + } + if (yb > ymax) yb = ymax; + if (ya>=yb) continue; + + x3+= fixed_HALF; x4+= fixed_HALF; + r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF; + + u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; + + for(;yaxmax) || (xb 0) + { + xa = xmin; + r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp; + } + else + { + r4 = r3; g4 = g3; b4 = b3; + } + if(xb > xmax) xb = xmax; + xb-=xa; + if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); + } + } +} + +/*---------------------------------------------------------------------- +GT3 +----------------------------------------------------------------------*/ + +void gpuDrawGT3(const PP gpuPolySpanDriver) +{ + const int li=linesInterlace; + const int pi=(progressInterlace?(linesInterlace+1):0); + const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1); + s32 temp; + s32 xa, xb, xmin, xmax; + s32 ya, yb, ymin, ymax; + s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; + s32 y0, y1, y2; + s32 u0, u1, u2, u3, du3=0; + s32 v0, v1, v2, v3, dv3=0; + s32 r0, r1, r2, r3, dr3=0; + s32 g0, g1, g2, g3, dg3=0; + s32 b0, b1, b2, b3, db3=0; + + x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] ); + y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] ); + x1 = GPU_EXPANDSIGN(PacketBuffer.S2[8] ); + y1 = GPU_EXPANDSIGN(PacketBuffer.S2[9] ); + x2 = GPU_EXPANDSIGN(PacketBuffer.S2[14]); + y2 = GPU_EXPANDSIGN(PacketBuffer.S2[15]); + + GPU_TESTRANGE3(); + + x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; + y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; + + xmin = DrawingArea[0]; xmax = DrawingArea[2]; + ymin = DrawingArea[1]; ymax = DrawingArea[3]; + + { + int rx0 = Max2(xmin,Min3(x0,x1,x2)); + int ry0 = Max2(ymin,Min3(y0,y1,y2)); + int rx1 = Min2(xmax,Max3(x0,x1,x2)); + int ry1 = Min2(ymax,Max3(y0,y1,y2)); + if( rx0>=rx1 || ry0>=ry1) return; + } + + r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2]; + u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9]; + r1 = PacketBuffer.U1[12]; g1 = PacketBuffer.U1[13]; b1 = PacketBuffer.U1[14]; + u1 = PacketBuffer.U1[20]; v1 = PacketBuffer.U1[21]; + r2 = PacketBuffer.U1[24]; g2 = PacketBuffer.U1[25]; b2 = PacketBuffer.U1[26]; + u2 = PacketBuffer.U1[32]; v2 = PacketBuffer.U1[33]; + + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); + GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp); + GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); + } + } + if (y1 >= y2) + { + if( y1!=y2 || x1>x2 ) + { + GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp); + GPU_SWAP(u1, u2, temp); GPU_SWAP(v1, v2, temp); + GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp); + } + } + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); + GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp); + GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); + } + } + + ya = y2 - y0; + yb = y2 - y1; + dx = (x2 - x1) * ya - (x2 - x0) * yb; + du4 = (u2 - u1) * ya - (u2 - u0) * yb; + dv4 = (v2 - v1) * ya - (v2 - v0) * yb; + dr4 = (r2 - r1) * ya - (r2 - r0) * yb; + dg4 = (g2 - g1) * ya - (g2 - g0) * yb; + db4 = (b2 - b1) * ya - (b2 - b0) * yb; + + s32 iF,iS; + + xInv( dx, iF, iS); + du4 = xInvMulx( du4, iF, iS); + dv4 = xInvMulx( dv4, iF, iS); + dr4 = xInvMulx( dr4, iF, iS); + dg4 = xInvMulx( dg4, iF, iS); + db4 = xInvMulx( db4, iF, iS); + u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21; + u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10; + u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0; + lInc = db + dg + dr; + tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff); + tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff; + + for (s32 loop0 = 2; loop0; --loop0) + { + if (loop0 == 2) + { + ya = y0; + yb = y1; + u3 = i2x(u0); + v3 = i2x(v0); + r3 = i2x(r0); + g3 = i2x(g0); + b3 = i2x(b0); + x3 = i2x(x0); + x4 = y0!=y1 ? x3 : i2x(x1); + if (dx < 0) + { + xInv( (y2 - y0), iF, iS); + dx3 = xInvMulx( (x2 - x0), iF, iS); + du3 = xInvMulx( (u2 - u0), iF, iS); + dv3 = xInvMulx( (v2 - v0), iF, iS); + dr3 = xInvMulx( (r2 - r0), iF, iS); + dg3 = xInvMulx( (g2 - g0), iF, iS); + db3 = xInvMulx( (b2 - b0), iF, iS); + dx4 = xLoDivx ( (x1 - x0), (y1 - y0)); + } + else + { + xInv( (y1 - y0), iF, iS); + dx3 = xInvMulx( (x1 - x0), iF, iS); + du3 = xInvMulx( (u1 - u0), iF, iS); + dv3 = xInvMulx( (v1 - v0), iF, iS); + dr3 = xInvMulx( (r1 - r0), iF, iS); + dg3 = xInvMulx( (g1 - g0), iF, iS); + db3 = xInvMulx( (b1 - b0), iF, iS); + dx4 = xLoDivx ( (x2 - x0), (y2 - y0)); + } + } + else + { + ya = y1; + yb = y2; + if (dx < 0) + { + temp = y1 - y0; + u3 = i2x(u0) + (du3 * temp); + v3 = i2x(v0) + (dv3 * temp); + r3 = i2x(r0) + (dr3 * temp); + g3 = i2x(g0) + (dg3 * temp); + b3 = i2x(b0) + (db3 * temp); + x3 = i2x(x0) + (dx3 * temp); + x4 = i2x(x1); + dx4 = xLoDivx((x2 - x1), (y2 - y1)); + } + else + { + u3 = i2x(u1); + v3 = i2x(v1); + r3 = i2x(r1); + g3 = i2x(g1); + b3 = i2x(b1); + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); + + xInv( (y2 - y1), iF, iS); + dx3 = xInvMulx( (x2 - x1), iF, iS); + du3 = xInvMulx( (u2 - u1), iF, iS); + dv3 = xInvMulx( (v2 - v1), iF, iS); + dr3 = xInvMulx( (r2 - r1), iF, iS); + dg3 = xInvMulx( (g2 - g1), iF, iS); + db3 = xInvMulx( (b2 - b1), iF, iS); + } + } + + temp = ymin - ya; + if (temp > 0) + { + ya = ymin; + x3 += dx3*temp; x4 += dx4*temp; + u3 += du3*temp; v3 += dv3*temp; + r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp; + } + if (yb > ymax) yb = ymax; + if (ya>=yb) continue; + + x3+= fixed_HALF; x4+= fixed_HALF; + u3+= fixed_HALF; v4+= fixed_HALF; + r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF; + u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; + + for(;yaxmax) || (xb 0) + { + xa = xmin; + u4 = u3 + du4*temp; v4 = v3 + dv4*temp; + r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp; + } + else + { + u4 = u3; v4 = v3; + r4 = r3; g4 = g3; b4 = b3; + } + if(xb > xmax) xb = xmax; + xb-=xa; + if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); + } + } +} + + +////////////////////////////////////////////////////////////////////////// +//senquack - Original Unai poly routines left here for reference: +// ( from gpu_inner.h ) NOTE: this uses 16.16, not 22.10 fixed point +////////////////////////////////////////////////////////////////////////// +template +INLINE void gpuPolySpanFn(u16 *pDst, u32 count) +{ + if (!TM) + { + // NO TEXTURE + if (!G) + { + // NO GOURAUD + u16 data; + if (L) { u32 lCol=((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); gpuLightingRGB(data,lCol); } + else data=PixelData; + if ((!M)&&(!B)) + { + if (MB) { data = data | 0x8000; } + do { *pDst++ = data; } while (--count); + } + else if ((M)&&(!B)) + { + if (MB) { data = data | 0x8000; } + do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count); + } + else + { + u16 uSrc; + u16 uDst; + u32 uMsk; if (BM==0) uMsk=0x7BDE; + u32 bMsk; if (BI) bMsk=blit_mask; + do + { + // blit-mask + if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endtile; } + // masking + uDst = *pDst; + if(M) { if (uDst&0x8000) goto endtile; } + uSrc = data; + // blend + if (BM==0) gpuBlending00(uSrc, uDst); + if (BM==1) gpuBlending01(uSrc, uDst); + if (BM==2) gpuBlending02(uSrc, uDst); + if (BM==3) gpuBlending03(uSrc, uDst); + if (MB) { *pDst = uSrc | 0x8000; } + else { *pDst = uSrc; } + endtile: pDst++; + } + while (--count); + } + } + else + { + // GOURAUD + u16 uDst; + u16 uSrc; + u32 linc=lInc; + u32 lCol=((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21)); + u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE; + u32 bMsk; if (BI) bMsk=blit_mask; + do + { + // blit-mask + if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endgou; } + // masking + if(M) { uDst = *pDst; if (uDst&0x8000) goto endgou; } + // blend + if(B) + { + // light + gpuLightingRGB(uSrc,lCol); + if(!M) { uDst = *pDst; } + if (BM==0) gpuBlending00(uSrc, uDst); + if (BM==1) gpuBlending01(uSrc, uDst); + if (BM==2) gpuBlending02(uSrc, uDst); + if (BM==3) gpuBlending03(uSrc, uDst); + } + else + { + // light + gpuLightingRGB(uSrc,lCol); + } + if (MB) { *pDst = uSrc | 0x8000; } + else { *pDst = uSrc; } + endgou: pDst++; lCol=(lCol+linc); + } + while (--count); + } + } + else + { + // TEXTURE + u16 uDst; + u16 uSrc; + u32 linc; if (L&&G) linc=lInc; + u32 tinc=tInc; + u32 tmsk=tMsk; + u32 tCor = ((u32)( u4<<7)&0x7fff0000) | ((u32)( v4>>9)&0x00007fff); tCor&= tmsk; + const u16* _TBA=TBA; + const u16* _CBA; if (TM!=3) _CBA=CBA; + u32 lCol; + if(L && !G) { lCol = ((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); } + else if(L && G) { lCol = ((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21)); } + u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE; + u32 bMsk; if (BI) bMsk=blit_mask; + do + { + // blit-mask + if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endpoly; } + // masking + if(M) { uDst = *pDst; if (uDst&0x8000) goto endpoly; } + // texture + if (TM==1) { u32 tu=(tCor>>23); u32 tv=(tCor<<4)&(0xff<<11); u8 rgb=((u8*)_TBA)[tv+(tu>>1)]; uSrc=_CBA[(rgb>>((tu&1)<<2))&0xf]; if(!uSrc) goto endpoly; } + if (TM==2) { uSrc = _CBA[(((u8*)_TBA)[(tCor>>23)+((tCor<<4)&(0xff<<11))])]; if(!uSrc) goto endpoly; } + if (TM==3) { uSrc = _TBA[(tCor>>23)+((tCor<<3)&(0xff<<10))]; if(!uSrc) goto endpoly; } + // blend + if(B) + { + if (uSrc&0x8000) + { + // light + if(L) gpuLightingTXT(uSrc, lCol); + if(!M) { uDst = *pDst; } + if (BM==0) gpuBlending00(uSrc, uDst); + if (BM==1) gpuBlending01(uSrc, uDst); + if (BM==2) gpuBlending02(uSrc, uDst); + if (BM==3) gpuBlending03(uSrc, uDst); + } + else + { + // light + if(L) gpuLightingTXT(uSrc, lCol); + } + } + else + { + // light + if(L) { gpuLightingTXT(uSrc, lCol); } else if(!MB) { uSrc&= 0x7fff; } + } + if (MB) { *pDst = uSrc | 0x8000; } + else { *pDst = uSrc; } + endpoly: pDst++; + tCor=(tCor+tinc)&tmsk; + if (L&&G) lCol=(lCol+linc); + } + while (--count); + } +} diff --git a/plugins/gpu_senquack/debug.h b/plugins/gpu_senquack/debug.h new file mode 100644 index 000000000..e69de29bb diff --git a/plugins/gpu_senquack/gpu.cpp b/plugins/gpu_senquack/gpu.cpp new file mode 100644 index 000000000..5f2929fca --- /dev/null +++ b/plugins/gpu_senquack/gpu.cpp @@ -0,0 +1,830 @@ +/*************************************************************************** +* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* Copyright (C) 2016 Senquack (dansilsby gmail com) * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#include +#include "plugins.h" +#include "psxcommon.h" +//#include "port.h" +#include "gpu_senquack.h" + +#define VIDEO_WIDTH 320 + +#ifdef TIME_IN_MSEC +#define TPS 1000 +#else +#define TPS 1000000 +#endif + +#define IS_PAL (gpu_senquack.GPU_GP1&(0x08<<17)) + +//senquack - Original 512KB of guard space seems not to be enough, as Xenogears +// accesses outside this range and crashes in town intro fight sequence. +// Increased to 2MB total (double PSX VRAM) and Xenogears no longer +// crashes, but some textures are still messed up. Also note that alignment min +// is 16 bytes, needed for pixel-skipping rendering/blitting in high horiz res. +// Extra 4KB is for guard room at beginning. +// TODO: Determine cause of out-of-bounds write/reads. <-- Note: this is largely +// solved by adoption of PCSX Rearmed's 'gpulib' in gpulib_if.cpp, which +// replaces this file (gpu.cpp) +//u16 GPU_FrameBuffer[(FRAME_BUFFER_SIZE+512*1024)/2] __attribute__((aligned(32))); +static u16 GPU_FrameBuffer[(FRAME_BUFFER_SIZE*2 + 4096)/2] __attribute__((aligned(32))); + +/////////////////////////////////////////////////////////////////////////////// +// GPU fixed point math +#include "gpu_fixedpoint.h" + +/////////////////////////////////////////////////////////////////////////////// +// Inner loop driver instantiation file +#include "gpu_inner.h" + +/////////////////////////////////////////////////////////////////////////////// +// GPU internal image drawing functions +#include "gpu_raster_image.h" + +/////////////////////////////////////////////////////////////////////////////// +// GPU internal line drawing functions +#include "gpu_raster_line.h" + +/////////////////////////////////////////////////////////////////////////////// +// GPU internal polygon drawing functions +#include "gpu_raster_polygon.h" + +/////////////////////////////////////////////////////////////////////////////// +// GPU internal sprite drawing functions +#include "gpu_raster_sprite.h" + +/////////////////////////////////////////////////////////////////////////////// +// GPU command buffer execution/store +#include "gpu_command.h" + +/////////////////////////////////////////////////////////////////////////////// +static void gpuReset(void) +{ + memset((void*)&gpu_senquack, 0, sizeof(gpu_senquack)); + gpu_senquack.vram = (u16*)GPU_FrameBuffer + (4096/2); //4kb guard room in front + gpu_senquack.GPU_GP1 = 0x14802000; + gpu_senquack.DrawingArea[2] = 256; + gpu_senquack.DrawingArea[3] = 240; + gpu_senquack.DisplayArea[2] = 256; + gpu_senquack.DisplayArea[3] = 240; + gpu_senquack.DisplayArea[5] = 240; + gpu_senquack.TextureWindow[0] = 0; + gpu_senquack.TextureWindow[1] = 0; + gpu_senquack.TextureWindow[2] = 255; + gpu_senquack.TextureWindow[3] = 255; + //senquack - new vars must be updated whenever texture window is changed: + // (used for polygon-drawing in gpu_inner.h, gpu_raster_polygon.h) + const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 + gpu_senquack.u_msk = (((u32)gpu_senquack.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_senquack.v_msk = (((u32)gpu_senquack.TextureWindow[3]) << fb) | ((1 << fb) - 1); + + // Configuration options + gpu_senquack.config = gpu_senquack_config_ext; + gpu_senquack.ilace_mask = gpu_senquack.config.ilace_force; + gpu_senquack.frameskip.skipCount = gpu_senquack.config.frameskip_count; + + SetupLightLUT(); + SetupDitheringConstants(); +} + +/////////////////////////////////////////////////////////////////////////////// +long GPU_init(void) +{ + gpuReset(); + +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + // s_invTable + for(unsigned int i=1;i<=(1<ulFreezeVersion != 1) return (0); + + if (bWrite) + { + p2->ulStatus = gpu_senquack.GPU_GP1; + memset(p2->ulControl, 0, sizeof(p2->ulControl)); + // save resolution and registers for P.E.Op.S. compatibility + p2->ulControl[3] = (3 << 24) | ((gpu_senquack.GPU_GP1 >> 23) & 1); + p2->ulControl[4] = (4 << 24) | ((gpu_senquack.GPU_GP1 >> 29) & 3); + p2->ulControl[5] = (5 << 24) | (gpu_senquack.DisplayArea[0] | (gpu_senquack.DisplayArea[1] << 10)); + p2->ulControl[6] = (6 << 24) | (2560 << 12); + p2->ulControl[7] = (7 << 24) | (gpu_senquack.DisplayArea[4] | (gpu_senquack.DisplayArea[5] << 10)); + p2->ulControl[8] = (8 << 24) | ((gpu_senquack.GPU_GP1 >> 17) & 0x3f) | ((gpu_senquack.GPU_GP1 >> 10) & 0x40); + memcpy((void*)p2->psxVRam, (void*)gpu_senquack.vram, FRAME_BUFFER_SIZE); + return (1); + } + else + { + extern void GPU_writeStatus(u32 data); + gpu_senquack.GPU_GP1 = p2->ulStatus; + memcpy((void*)gpu_senquack.vram, (void*)p2->psxVRam, FRAME_BUFFER_SIZE); + GPU_writeStatus((5 << 24) | p2->ulControl[5]); + GPU_writeStatus((7 << 24) | p2->ulControl[7]); + GPU_writeStatus((8 << 24) | p2->ulControl[8]); + gpuSetTexture(gpu_senquack.GPU_GP1); + return (1); + } + return (0); +} + +/////////////////////////////////////////////////////////////////////////////// +// GPU DMA comunication + +/////////////////////////////////////////////////////////////////////////////// +u8 PacketSize[256] = +{ + 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0-15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 + 3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 32-47 + 5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11, // 48-63 + 2, 2, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, // 64-79 + 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, // 80-95 + 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, // 96-111 + 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, // 112-127 + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128- + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 144 + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 160 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // +}; + +/////////////////////////////////////////////////////////////////////////////// +INLINE void gpuSendPacket() +{ + gpuSendPacketFunction(gpu_senquack.PacketBuffer.U4[0]>>24); +} + +/////////////////////////////////////////////////////////////////////////////// +INLINE void gpuCheckPacket(u32 uData) +{ + if (gpu_senquack.PacketCount) + { + gpu_senquack.PacketBuffer.U4[gpu_senquack.PacketIndex++] = uData; + --gpu_senquack.PacketCount; + } + else + { + gpu_senquack.PacketBuffer.U4[0] = uData; + gpu_senquack.PacketCount = PacketSize[uData >> 24]; + gpu_senquack.PacketIndex = 1; + } + if (!gpu_senquack.PacketCount) gpuSendPacket(); +} + +/////////////////////////////////////////////////////////////////////////////// +void GPU_writeDataMem(u32* dmaAddress, int dmaCount) +{ + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_writeDataMem(%d)\n",dmaCount); + #endif + u32 data; + const u16 *VIDEO_END = (u16*)gpu_senquack.vram+(FRAME_BUFFER_SIZE/2)-1; + gpu_senquack.GPU_GP1 &= ~0x14000000; + + while (dmaCount) + { + if (gpu_senquack.dma.FrameToWrite) + { + while (dmaCount) + { + dmaCount--; + data = *dmaAddress++; + if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; + gpu_senquack.dma.pvram[gpu_senquack.dma.px] = data; + if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + { + gpu_senquack.dma.px = 0; + gpu_senquack.dma.pvram += 1024; + if (++gpu_senquack.dma.py >= gpu_senquack.dma.y_end) + { + gpu_senquack.dma.FrameToWrite = false; + gpu_senquack.GPU_GP1 &= ~0x08000000; + gpu_senquack.fb_dirty = true; + break; + } + } + if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; + gpu_senquack.dma.pvram[gpu_senquack.dma.px] = data>>16; + if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + { + gpu_senquack.dma.px = 0; + gpu_senquack.dma.pvram += 1024; + if (++gpu_senquack.dma.py >= gpu_senquack.dma.y_end) + { + gpu_senquack.dma.FrameToWrite = false; + gpu_senquack.GPU_GP1 &= ~0x08000000; + gpu_senquack.fb_dirty = true; + break; + } + } + } + } + else + { + data = *dmaAddress++; + dmaCount--; + gpuCheckPacket(data); + } + } + + gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 | 0x14000000) & ~0x60000000; +} + +long GPU_dmaChain(u32 *rambase, u32 start_addr) +{ + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_dmaChain(0x%x)\n",start_addr); + #endif + + u32 addr, *list; + u32 len, count; + long dma_words = 0; + + if (gpu_senquack.dma.last_dma) *gpu_senquack.dma.last_dma |= 0x800000; + + gpu_senquack.GPU_GP1 &= ~0x14000000; + + addr = start_addr & 0xffffff; + for (count = 0; addr != 0xffffff; count++) + { + list = rambase + (addr & 0x1fffff) / 4; + len = list[0] >> 24; + addr = list[0] & 0xffffff; + + dma_words += 1 + len; + + // add loop detection marker + list[0] |= 0x800000; + + if (len) GPU_writeDataMem(list + 1, len); + + if (addr & 0x800000) + { + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_dmaChain(LOOP)\n"); + #endif + break; + } + } + + // remove loop detection markers + addr = start_addr & 0x1fffff; + while (count-- > 0) + { + list = rambase + addr / 4; + addr = list[0] & 0x1fffff; + list[0] &= ~0x800000; + } + + if (gpu_senquack.dma.last_dma) *gpu_senquack.dma.last_dma &= ~0x800000; + gpu_senquack.dma.last_dma = rambase + (start_addr & 0x1fffff) / 4; + + gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 | 0x14000000) & ~0x60000000; + + return dma_words; +} + +/////////////////////////////////////////////////////////////////////////////// +void GPU_writeData(u32 data) +{ + const u16 *VIDEO_END = (u16*)gpu_senquack.vram+(FRAME_BUFFER_SIZE/2)-1; + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_writeData()\n"); + #endif + gpu_senquack.GPU_GP1 &= ~0x14000000; + + if (gpu_senquack.dma.FrameToWrite) + { + if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; + gpu_senquack.dma.pvram[gpu_senquack.dma.px]=(u16)data; + if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + { + gpu_senquack.dma.px = 0; + gpu_senquack.dma.pvram += 1024; + if (++gpu_senquack.dma.py >= gpu_senquack.dma.y_end) + { + gpu_senquack.dma.FrameToWrite = false; + gpu_senquack.GPU_GP1 &= ~0x08000000; + gpu_senquack.fb_dirty = true; + } + } + if (gpu_senquack.dma.FrameToWrite) + { + if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; + gpu_senquack.dma.pvram[gpu_senquack.dma.px]=data>>16; + if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + { + gpu_senquack.dma.px = 0; + gpu_senquack.dma.pvram += 1024; + if (++gpu_senquack.dma.py >= gpu_senquack.dma.y_end) + { + gpu_senquack.dma.FrameToWrite = false; + gpu_senquack.GPU_GP1 &= ~0x08000000; + gpu_senquack.fb_dirty = true; + } + } + } + } + else + { + gpuCheckPacket(data); + } + gpu_senquack.GPU_GP1 |= 0x14000000; +} + + +/////////////////////////////////////////////////////////////////////////////// +void GPU_readDataMem(u32* dmaAddress, int dmaCount) +{ + const u16 *VIDEO_END = (u16*)gpu_senquack.vram+(FRAME_BUFFER_SIZE/2)-1; + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_readDataMem(%d)\n",dmaCount); + #endif + if(!gpu_senquack.dma.FrameToRead) return; + + gpu_senquack.GPU_GP1 &= ~0x14000000; + do + { + if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; + // lower 16 bit + //senquack - 64-bit fix (from notaz) + //u32 data = (unsigned long)gpu_senquack.dma.pvram[gpu_senquack.dma.px]; + u32 data = (u32)gpu_senquack.dma.pvram[gpu_senquack.dma.px]; + + if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + { + gpu_senquack.dma.px = 0; + gpu_senquack.dma.pvram += 1024; + } + + if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; + // higher 16 bit (always, even if it's an odd width) + //senquack - 64-bit fix (from notaz) + //data |= (unsigned long)(gpu_senquack.dma.pvram[gpu_senquack.dma.px])<<16; + data |= (u32)(gpu_senquack.dma.pvram[gpu_senquack.dma.px])<<16; + + *dmaAddress++ = data; + + if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + { + gpu_senquack.dma.px = 0; + gpu_senquack.dma.pvram += 1024; + if (++gpu_senquack.dma.py >= gpu_senquack.dma.y_end) + { + gpu_senquack.dma.FrameToRead = false; + gpu_senquack.GPU_GP1 &= ~0x08000000; + break; + } + } + } while (--dmaCount); + + gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 | 0x14000000) & ~0x60000000; +} + + + +/////////////////////////////////////////////////////////////////////////////// +u32 GPU_readData(void) +{ + const u16 *VIDEO_END = (u16*)gpu_senquack.vram+(FRAME_BUFFER_SIZE/2)-1; + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_readData()\n"); + #endif + gpu_senquack.GPU_GP1 &= ~0x14000000; + if (gpu_senquack.dma.FrameToRead) + { + if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; + gpu_senquack.GPU_GP0 = gpu_senquack.dma.pvram[gpu_senquack.dma.px]; + if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + { + gpu_senquack.dma.px = 0; + gpu_senquack.dma.pvram += 1024; + if (++gpu_senquack.dma.py >= gpu_senquack.dma.y_end) + { + gpu_senquack.dma.FrameToRead = false; + gpu_senquack.GPU_GP1 &= ~0x08000000; + } + } + if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; + gpu_senquack.GPU_GP0 |= gpu_senquack.dma.pvram[gpu_senquack.dma.px]<<16; + if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + { + gpu_senquack.dma.px = 0; + gpu_senquack.dma.pvram += 1024; + if (++gpu_senquack.dma.py >= gpu_senquack.dma.y_end) + { + gpu_senquack.dma.FrameToRead = false; + gpu_senquack.GPU_GP1 &= ~0x08000000; + } + } + + } + gpu_senquack.GPU_GP1 |= 0x14000000; + + return (gpu_senquack.GPU_GP0); +} + +/////////////////////////////////////////////////////////////////////////////// +u32 GPU_readStatus(void) +{ + return gpu_senquack.GPU_GP1; +} + +INLINE void GPU_NoSkip(void) +{ + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_NoSkip()\n"); + #endif + gpu_senquack.frameskip.wasSkip = gpu_senquack.frameskip.isSkip; + if (gpu_senquack.frameskip.isSkip) + { + gpu_senquack.frameskip.isSkip = false; + gpu_senquack.frameskip.skipGPU = false; + } + else + { + gpu_senquack.frameskip.isSkip = gpu_senquack.frameskip.skipFrame; + gpu_senquack.frameskip.skipGPU = gpu_senquack.frameskip.skipFrame; + } +} + +/////////////////////////////////////////////////////////////////////////////// +void GPU_writeStatus(u32 data) +{ + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_writeStatus(%d,%d)\n",data>>24,data & 0xff); + #endif + switch (data >> 24) { + case 0x00: + gpuReset(); + break; + case 0x01: + gpu_senquack.GPU_GP1 &= ~0x08000000; + gpu_senquack.PacketCount = 0; + gpu_senquack.dma.FrameToRead = gpu_senquack.dma.FrameToWrite = false; + break; + case 0x02: + gpu_senquack.GPU_GP1 &= ~0x08000000; + gpu_senquack.PacketCount = 0; + gpu_senquack.dma.FrameToRead = gpu_senquack.dma.FrameToWrite = false; + break; + case 0x03: + gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 & ~0x00800000) | ((data & 1) << 23); + break; + case 0x04: + if (data == 0x04000000) gpu_senquack.PacketCount = 0; + gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 & ~0x60000000) | ((data & 3) << 29); + break; + case 0x05: + // Start of Display Area in VRAM + gpu_senquack.DisplayArea[0] = data & 0x3ff; // X (0..1023) + gpu_senquack.DisplayArea[1] = (data >> 10) & 0x1ff; // Y (0..511) + GPU_NoSkip(); + break; + case 0x06: + // GP1(06h) - Horizontal Display range (on Screen) + // 0-11 X1 (260h+0) ;12bit ;\counted in 53.222400MHz units, + // 12-23 X2 (260h+320*8) ;12bit ;/relative to HSYNC + + // senquack - gpu_senquack completely ignores GP1(0x06) command and + // lacks even a place in DisplayArea[] array to store the values. + // It seems to have been concerned only with vertical display range + // and centering top/bottom. I will not add support here, and + // focus instead on the gpulib version (gpulib_if.cpp) which uses + // gpulib for its PS1->host framebuffer blitting. + break; + case 0x07: + // GP1(07h) - Vertical Display range (on Screen) + // 0-9 Y1 (NTSC=88h-(224/2), (PAL=A3h-(264/2)) ;\scanline numbers on screen, + // 10-19 Y2 (NTSC=88h+(224/2), (PAL=A3h+(264/2)) ;/relative to VSYNC + // 20-23 Not used (zero) + { + u32 v1=data & 0x000003FF; //(short)(data & 0x3ff); + u32 v2=(data & 0x000FFC00) >> 10; //(short)((data>>10) & 0x3ff); + if ((gpu_senquack.DisplayArea[4]!=v1)||(gpu_senquack.DisplayArea[5]!=v2)) + { + gpu_senquack.DisplayArea[4] = v1; + gpu_senquack.DisplayArea[5] = v2; + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"video_clear(CHANGE_Y)\n"); + #endif + video_clear(); + } + } + break; + case 0x08: + { + static const u32 HorizontalResolution[8] = { 256, 368, 320, 384, 512, 512, 640, 640 }; + static const u32 VerticalResolution[4] = { 240, 480, 256, 480 }; + gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 & ~0x007F0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10); + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_writeStatus(RES=%dx%d,BITS=%d,PAL=%d)\n",HorizontalResolution[(gpu_senquack.GPU_GP1 >> 16) & 7], + VerticalResolution[(gpu_senquack.GPU_GP1 >> 19) & 3],(gpu_senquack.GPU_GP1&0x00200000?24:15),(IS_PAL?1:0)); + #endif + // Video mode change + u32 new_width = HorizontalResolution[(gpu_senquack.GPU_GP1 >> 16) & 7]; + u32 new_height = VerticalResolution[(gpu_senquack.GPU_GP1 >> 19) & 3]; + + if (gpu_senquack.DisplayArea[2] != new_width || gpu_senquack.DisplayArea[3] != new_height) + { + // Update width + gpu_senquack.DisplayArea[2] = new_width; + + if (PixelSkipEnabled()) { + // Set blit_mask for high horizontal resolutions. This allows skipping + // rendering pixels that would never get displayed on low-resolution + // platforms that use simple pixel-dropping scaler. + switch (gpu_senquack.DisplayArea[2]) + { + case 512: gpu_senquack.blit_mask = 0xa4; break; // GPU_BlitWWSWWSWS + case 640: gpu_senquack.blit_mask = 0xaa; break; // GPU_BlitWS + default: gpu_senquack.blit_mask = 0; break; + } + } else { + gpu_senquack.blit_mask = 0; + } + + // Update height + gpu_senquack.DisplayArea[3] = new_height; + + if (LineSkipEnabled()) { + // Set rendering line-skip (only render every other line in high-res + // 480 vertical mode, or, optionally, force it for all video modes) + + if (gpu_senquack.DisplayArea[3] == 480) { + if (gpu_senquack.config.ilace_force) { + gpu_senquack.ilace_mask = 3; // Only need 1/4 of lines + } else { + gpu_senquack.ilace_mask = 1; // Only need 1/2 of lines + } + } else { + // Vert resolution changed from 480 to lower one + gpu_senquack.ilace_mask = gpu_senquack.config.ilace_force; + } + } else { + gpu_senquack.ilace_mask = 0; + } + + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"video_clear(CHANGE_RES)\n"); + #endif + video_clear(); + } + + } + break; + case 0x10: + switch (data & 0xff) { + case 2: gpu_senquack.GPU_GP0 = gpu_senquack.tex_window; break; + case 3: gpu_senquack.GPU_GP0 = (gpu_senquack.DrawingArea[1] << 10) | gpu_senquack.DrawingArea[0]; break; + case 4: gpu_senquack.GPU_GP0 = ((gpu_senquack.DrawingArea[3]-1) << 10) | (gpu_senquack.DrawingArea[2]-1); break; + case 5: case 6: gpu_senquack.GPU_GP0 = (((u32)gpu_senquack.DrawingOffset[1] & 0x7ff) << 11) | ((u32)gpu_senquack.DrawingOffset[0] & 0x7ff); break; + case 7: gpu_senquack.GPU_GP0 = 2; break; + case 8: case 15: gpu_senquack.GPU_GP0 = 0xBFC03720; break; + } + break; + } +} + +// Blitting functions +#include "gpu_blit.h" + +static void gpuVideoOutput(void) +{ + int h0, x0, y0, w0, h1; + + x0 = gpu_senquack.DisplayArea[0]; + y0 = gpu_senquack.DisplayArea[1]; + + w0 = gpu_senquack.DisplayArea[2]; + h0 = gpu_senquack.DisplayArea[3]; // video mode + + h1 = gpu_senquack.DisplayArea[5] - gpu_senquack.DisplayArea[4]; // display needed + if (h0 == 480) h1 = Min2(h1*2,480); + + bool isRGB24 = (gpu_senquack.GPU_GP1 & 0x00200000 ? true : false); + u16* dst16 = SCREEN; + u16* src16 = (u16*)gpu_senquack.vram; + + // PS1 fb read wraps around (fixes black screen in 'Tobal no. 1') + unsigned int src16_offs_msk = 1024*512-1; + unsigned int src16_offs = (x0 + y0*1024) & src16_offs_msk; + + // Height centering + int sizeShift = 1; + if (h0 == 256) { + h0 = 240; + } else if (h0 == 480) { + sizeShift = 2; + } + if (h1 > h0) { + src16_offs = (src16_offs + (((h1-h0) / 2) * 1024)) & src16_offs_msk; + h1 = h0; + } else if (h1> sizeShift) * VIDEO_WIDTH; + } + + + /* Main blitter */ + int incY = (h0==480) ? 2 : 1; + h0=(h0==480 ? 2048 : 1024); + + { + const int li=gpu_senquack.ilace_mask; + bool pi = ProgressiveInterlaceEnabled(); + bool pif = gpu_senquack.prog_ilace_flag; + switch ( w0 ) + { + case 256: + for(int y1=y0+h1; y0>3 (8 times per second) +#define GPU_FRAMESKIP_UPDATE 3 + +static void GPU_frameskip (bool show) +{ + u32 now=get_ticks(); // current frame + + // Update frameskip + if (gpu_senquack.frameskip.skipCount==0) gpu_senquack.frameskip.skipFrame=false; // frameskip off + else if (gpu_senquack.frameskip.skipCount==7) { if (show) gpu_senquack.frameskip.skipFrame=!gpu_senquack.frameskip.skipFrame; } // frameskip medium + else if (gpu_senquack.frameskip.skipCount==8) gpu_senquack.frameskip.skipFrame=true; // frameskip maximum + else + { + static u32 spd=100; // speed % + static u32 frames=0; // frames counter + static u32 prev=now; // previous fps calculation + frames++; + if ((now-prev)>=(TPS>>GPU_FRAMESKIP_UPDATE)) + { + if (IS_PAL) spd=(frames<<1); + else spd=((frames*1001)/600); + spd<<=GPU_FRAMESKIP_UPDATE; + frames=0; + prev=now; + } + switch(gpu_senquack.frameskip.skipCount) + { + case 1: if (spd<50) gpu_senquack.frameskip.skipFrame=true; else gpu_senquack.frameskip.skipFrame=false; break; // frameskip on (spd<50%) + case 2: if (spd<60) gpu_senquack.frameskip.skipFrame=true; else gpu_senquack.frameskip.skipFrame=false; break; // frameskip on (spd<60%) + case 3: if (spd<70) gpu_senquack.frameskip.skipFrame=true; else gpu_senquack.frameskip.skipFrame=false; break; // frameskip on (spd<70%) + case 4: if (spd<80) gpu_senquack.frameskip.skipFrame=true; else gpu_senquack.frameskip.skipFrame=false; break; // frameskip on (spd<80%) + case 5: if (spd<90) gpu_senquack.frameskip.skipFrame=true; else gpu_senquack.frameskip.skipFrame=false; break; // frameskip on (spd<90%) + } + } +} + +/////////////////////////////////////////////////////////////////////////////// +void GPU_updateLace(void) +{ + // Interlace bit toggle + gpu_senquack.GPU_GP1 ^= 0x80000000; + + // Update display? + if ((gpu_senquack.fb_dirty) && (!gpu_senquack.frameskip.wasSkip) && (!(gpu_senquack.GPU_GP1&0x00800000))) + { + // Display updated + gpuVideoOutput(); + GPU_frameskip(true); + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_updateLace(UPDATE)\n"); + #endif + } else { + GPU_frameskip(false); + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_updateLace(SKIP)\n"); + #endif + } + + if ((!gpu_senquack.frameskip.skipCount) && (gpu_senquack.DisplayArea[3] == 480)) gpu_senquack.frameskip.skipGPU=true; // Tekken 3 hack + + gpu_senquack.fb_dirty=false; + gpu_senquack.dma.last_dma = NULL; +} + +// Allows frontend to signal plugin to redraw screen after returning to emu +void GPU_requestScreenRedraw() +{ + gpu_senquack.fb_dirty = true; +} + +void GPU_getScreenInfo(GPUScreenInfo_t *sinfo) +{ + bool depth24 = (gpu_senquack.GPU_GP1 & 0x00200000 ? true : false); + int16_t hres = (uint16_t)gpu_senquack.DisplayArea[2]; + int16_t vres = (uint16_t)gpu_senquack.DisplayArea[3]; + int16_t w = hres; // Original gpu_senquack doesn't support width < 100% + int16_t h = gpu_senquack.DisplayArea[5] - gpu_senquack.DisplayArea[4]; + if (vres == 480) + h *= 2; + if (h <= 0 || h > vres) + h = vres; + + sinfo->vram = (uint8_t*)gpu_senquack.vram; + sinfo->x = (uint16_t)gpu_senquack.DisplayArea[0]; + sinfo->y = (uint16_t)gpu_senquack.DisplayArea[1]; + sinfo->w = w; + sinfo->h = h; + sinfo->hres = hres; + sinfo->vres = vres; + sinfo->depth24 = depth24; + sinfo->pal = IS_PAL; +} diff --git a/plugins/gpu_senquack/gpu.h b/plugins/gpu_senquack/gpu.h new file mode 100644 index 000000000..7a467511c --- /dev/null +++ b/plugins/gpu_senquack/gpu.h @@ -0,0 +1,74 @@ +/*************************************************************************** +* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* Copyright (C) 2016 Senquack (dansilsby gmail com) * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef GPU_UNAI_GPU_H +#define GPU_UNAI_GPU_H + +struct gpu_senquack_config_t { + uint8_t pixel_skip:1; // If 1, allows skipping rendering pixels that + // would not be visible when a high horizontal + // resolution PS1 video mode is set. + // Only applies to devices with low resolutions + // like 320x240. Should not be used if a + // down-scaling framebuffer blitter is in use. + // Can cause gfx artifacts if game reads VRAM + // to do framebuffer effects. + + uint8_t ilace_force:3; // Option to force skipping rendering of lines, + // for very slow platforms. Value will be + // assigned to 'ilace_mask' in gpu_senquack struct. + // Normally 0. Value '1' will skip rendering + // odd lines. + + uint8_t scale_hires:1; // If 1, will scale hi-res output to + // 320x240 when gpulib reads the frame. + // Implies pixel_skip and ilace_force + // (when height > 240). + uint8_t lighting:1; + uint8_t fast_lighting:1; + uint8_t blending:1; + uint8_t dithering:1; + + //senquack Only PCSX Rearmed's version of gpu_senquack had this, and I + // don't think it's necessary. It would require adding 'AH' flag to + // gpuSpriteSpanFn() increasing size of sprite span function array. + //uint8_t enableAbbeyHack:1; // Abe's Odyssey hack + + //////////////////////////////////////////////////////////////////////////// + // Variables used only by older standalone version of gpu_senquack (gpu.cpp) +#ifndef USE_GPULIB + uint8_t prog_ilace:1; // Progressive interlace option (old option) + // This option was somewhat oddly named: + // When in interlaced video mode, on a low-res + // 320x240 device, only the even lines are + // rendered. This option will take that one + // step further and only render half the even + // even lines one frame, and then the other half. + uint8_t frameskip_count:3; // Frame skip (0..7) +#endif +}; + +extern gpu_senquack_config_t gpu_senquack_config_ext; + +// TODO: clean up show_fps frontend option +extern bool show_fps; + +#endif // GPU_UNAI_GPU_H diff --git a/plugins/gpu_senquack/gpu_arm.S b/plugins/gpu_senquack/gpu_arm.S new file mode 100644 index 000000000..ec87f211a --- /dev/null +++ b/plugins/gpu_senquack/gpu_arm.S @@ -0,0 +1,56 @@ +/* + * (C) Gražvydas "notaz" Ignotas, 2011 + * + * This work is licensed under the terms of GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "arm_features.h" + +.text +.align 2 + +@ in: r0=dst, r2=pal, r12=0x1e +@ trashes r6-r8,lr,flags +.macro do_4_pixels rs ibase obase +.if \ibase - 1 < 0 + and r6, r12, \rs, lsl #1 +.else + and r6, r12, \rs, lsr #\ibase-1 +.endif + and r7, r12, \rs, lsr #\ibase+3 + and r8, r12, \rs, lsr #\ibase+7 + and lr, r12, \rs, lsr #\ibase+11 + ldrh r6, [r2, r6] + ldrh r7, [r2, r7] + ldrh r8, [r2, r8] + ldrh lr, [r2, lr] + tst r6, r6 + strneh r6, [r0, #\obase+0] + tst r7, r7 + strneh r7, [r0, #\obase+2] + tst r8, r8 + strneh r8, [r0, #\obase+4] + tst lr, lr + strneh lr, [r0, #\obase+6] +.endm + +.global draw_spr16_full @ (u16 *d, void *s, u16 *pal, int lines) +draw_spr16_full: + stmfd sp!, {r4-r8,lr} + mov r12, #0x1e @ empty pixel + +0: + ldmia r1, {r4,r5} + do_4_pixels r4, 0, 0 + do_4_pixels r4, 16, 8 + do_4_pixels r5, 0, 16 + do_4_pixels r5, 16, 24 + subs r3, r3, #1 + add r0, r0, #2048 + add r1, r1, #2048 + bgt 0b + + ldmfd sp!, {r4-r8,pc} + +@ vim:filetype=armasm diff --git a/plugins/gpu_senquack/gpu_arm.h b/plugins/gpu_senquack/gpu_arm.h new file mode 100644 index 000000000..b9f8f97cc --- /dev/null +++ b/plugins/gpu_senquack/gpu_arm.h @@ -0,0 +1,14 @@ +#ifndef __GPU_UNAI_GPU_ARM_H__ +#define __GPU_UNAI_GPU_ARM_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +void draw_spr16_full(u16 *d, void *s, u16 *pal, int lines); + +#ifdef __cplusplus +} +#endif + +#endif /* __GPU_UNAI_GPU_ARM_H__ */ diff --git a/plugins/gpu_senquack/gpu_blit.h b/plugins/gpu_senquack/gpu_blit.h new file mode 100644 index 000000000..e93f12ffe --- /dev/null +++ b/plugins/gpu_senquack/gpu_blit.h @@ -0,0 +1,405 @@ +/*************************************************************************** +* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef _INNER_BLIT_H_ +#define _INNER_BLIT_H_ + +#ifndef USE_BGR15 +#define RGB24(R,G,B) (((((R)&0xF8)<<8)|(((G)&0xFC)<<3)|(((B)&0xF8)>>3))) +#define RGB16X2(C) (((C)&(0x1f001f<<10))>>10) | (((C)&(0x1f001f<<5))<<1) | (((C)&(0x1f001f<<0))<<11) +#define RGB16(C) (((C)&(0x1f<<10))>>10) | (((C)&(0x1f<<5))<<1) | (((C)&(0x1f<<0))<<11) +#else +#define RGB24(R,G,B) ((((R)&0xF8)>>3)|(((G)&0xF8)<<2)|(((B)&0xF8)<<7)) +#endif + +/////////////////////////////////////////////////////////////////////////////// +// GPU Blitting code with rescale and interlace support. + +INLINE void GPU_BlitWW(const void* src, u16* dst16, bool isRGB24) +{ + u32 uCount; + if(!isRGB24) + { + #ifndef USE_BGR15 + uCount = 20; + const u32* src32 = (const u32*) src; + u32* dst32 = (u32*)(void*) dst16; + do{ + dst32[0] = RGB16X2(src32[0]); + dst32[1] = RGB16X2(src32[1]); + dst32[2] = RGB16X2(src32[2]); + dst32[3] = RGB16X2(src32[3]); + dst32[4] = RGB16X2(src32[4]); + dst32[5] = RGB16X2(src32[5]); + dst32[6] = RGB16X2(src32[6]); + dst32[7] = RGB16X2(src32[7]); + dst32 += 8; + src32 += 8; + }while(--uCount); + #else + memcpy(dst16,src,640); + #endif + } + else + { + uCount = 20; + const u8* src8 = (const u8*)src; + do{ + dst16[ 0] = RGB24(src8[ 0], src8[ 1], src8[ 2] ); + dst16[ 1] = RGB24(src8[ 3], src8[ 4], src8[ 5] ); + dst16[ 2] = RGB24(src8[ 6], src8[ 7], src8[ 8] ); + dst16[ 3] = RGB24(src8[ 9], src8[10], src8[11] ); + dst16[ 4] = RGB24(src8[12], src8[13], src8[14] ); + dst16[ 5] = RGB24(src8[15], src8[16], src8[17] ); + dst16[ 6] = RGB24(src8[18], src8[19], src8[20] ); + dst16[ 7] = RGB24(src8[21], src8[22], src8[23] ); + + dst16[ 8] = RGB24(src8[24], src8[25], src8[26] ); + dst16[ 9] = RGB24(src8[27], src8[28], src8[29] ); + dst16[10] = RGB24(src8[30], src8[31], src8[32] ); + dst16[11] = RGB24(src8[33], src8[34], src8[35] ); + dst16[12] = RGB24(src8[36], src8[37], src8[38] ); + dst16[13] = RGB24(src8[39], src8[40], src8[41] ); + dst16[14] = RGB24(src8[42], src8[43], src8[44] ); + dst16[15] = RGB24(src8[45], src8[46], src8[47] ); + dst16 += 16; + src8 += 48; + }while(--uCount); + } +} + +INLINE void GPU_BlitWWSWWSWS(const void* src, u16* dst16, bool isRGB24) +{ + u32 uCount; + if(!isRGB24) + { + #ifndef USE_BGR15 + uCount = 32; + const u16* src16 = (const u16*) src; + do{ + dst16[ 0] = RGB16(src16[0]); + dst16[ 1] = RGB16(src16[1]); + dst16[ 2] = RGB16(src16[3]); + dst16[ 3] = RGB16(src16[4]); + dst16[ 4] = RGB16(src16[6]); + dst16[ 5] = RGB16(src16[8]); + dst16[ 6] = RGB16(src16[9]); + dst16[ 7] = RGB16(src16[11]); + dst16[ 8] = RGB16(src16[12]); + dst16[ 9] = RGB16(src16[14]); + dst16 += 10; + src16 += 16; + }while(--uCount); + #else + uCount = 64; + const u16* src16 = (const u16*) src; + do{ + *dst16++ = *src16++; + *dst16++ = *src16; + src16+=2; + *dst16++ = *src16++; + *dst16++ = *src16; + src16+=2; + *dst16++ = *src16; + src16+=2; + }while(--uCount); + #endif + } + else + { + uCount = 32; + const u8* src8 = (const u8*)src; + do{ + dst16[ 0] = RGB24(src8[ 0], src8[ 1], src8[ 2] ); + dst16[ 1] = RGB24(src8[ 3], src8[ 4], src8[ 5] ); + dst16[ 2] = RGB24(src8[ 9], src8[10], src8[11] ); + dst16[ 3] = RGB24(src8[12], src8[13], src8[14] ); + dst16[ 4] = RGB24(src8[18], src8[19], src8[20] ); + + dst16[ 5] = RGB24(src8[24], src8[25], src8[26] ); + dst16[ 6] = RGB24(src8[27], src8[28], src8[29] ); + dst16[ 7] = RGB24(src8[33], src8[34], src8[35] ); + dst16[ 8] = RGB24(src8[36], src8[37], src8[38] ); + dst16[ 9] = RGB24(src8[42], src8[43], src8[44] ); + + dst16 += 10; + src8 += 48; + }while(--uCount); + } +} + +INLINE void GPU_BlitWWWWWS(const void* src, u16* dst16, bool isRGB24) +{ + u32 uCount; + if(!isRGB24) + { + #ifndef USE_BGR15 + uCount = 32; + const u16* src16 = (const u16*) src; + do{ + dst16[ 0] = RGB16(src16[0]); + dst16[ 1] = RGB16(src16[1]); + dst16[ 2] = RGB16(src16[2]); + dst16[ 3] = RGB16(src16[3]); + dst16[ 4] = RGB16(src16[4]); + dst16[ 5] = RGB16(src16[6]); + dst16[ 6] = RGB16(src16[7]); + dst16[ 7] = RGB16(src16[8]); + dst16[ 8] = RGB16(src16[9]); + dst16[ 9] = RGB16(src16[10]); + dst16 += 10; + src16 += 12; + }while(--uCount); + #else + uCount = 64; + const u16* src16 = (const u16*) src; + do{ + *dst16++ = *src16++; + *dst16++ = *src16++; + *dst16++ = *src16++; + *dst16++ = *src16++; + *dst16++ = *src16; + src16+=2; + }while(--uCount); + #endif + } + else + { + uCount = 32; + const u8* src8 = (const u8*)src; + do{ + dst16[0] = RGB24(src8[ 0], src8[ 1], src8[ 2] ); + dst16[1] = RGB24(src8[ 3], src8[ 4], src8[ 5] ); + dst16[2] = RGB24(src8[ 6], src8[ 7], src8[ 8] ); + dst16[3] = RGB24(src8[ 9], src8[10], src8[11] ); + dst16[4] = RGB24(src8[12], src8[13], src8[14] ); + dst16[5] = RGB24(src8[18], src8[19], src8[20] ); + dst16[6] = RGB24(src8[21], src8[22], src8[23] ); + dst16[7] = RGB24(src8[24], src8[25], src8[26] ); + dst16[8] = RGB24(src8[27], src8[28], src8[29] ); + dst16[9] = RGB24(src8[30], src8[31], src8[32] ); + dst16 += 10; + src8 += 36; + }while(--uCount); + } +} + +INLINE void GPU_BlitWWWWWWWWS(const void* src, u16* dst16, bool isRGB24, u32 uClip_src) +{ + u32 uCount; + if(!isRGB24) + { + #ifndef USE_BGR15 + uCount = 20; + const u16* src16 = ((const u16*) src) + uClip_src; + do{ + dst16[ 0] = RGB16(src16[0]); + dst16[ 1] = RGB16(src16[1]); + dst16[ 2] = RGB16(src16[2]); + dst16[ 3] = RGB16(src16[3]); + dst16[ 4] = RGB16(src16[4]); + dst16[ 5] = RGB16(src16[5]); + dst16[ 6] = RGB16(src16[6]); + dst16[ 7] = RGB16(src16[7]); + + dst16[ 8] = RGB16(src16[9]); + dst16[ 9] = RGB16(src16[10]); + dst16[10] = RGB16(src16[11]); + dst16[11] = RGB16(src16[12]); + dst16[12] = RGB16(src16[13]); + dst16[13] = RGB16(src16[14]); + dst16[14] = RGB16(src16[15]); + dst16[15] = RGB16(src16[16]); + dst16 += 16; + src16 += 18; + }while(--uCount); + #else + uCount = 40; + const u16* src16 = ((const u16*) src) + uClip_src; + do{ + *dst16++ = *src16++; + *dst16++ = *src16++; + *dst16++ = *src16++; + *dst16++ = *src16++; + *dst16++ = *src16++; + *dst16++ = *src16++; + *dst16++ = *src16++; + *dst16++ = *src16; + src16+=2; + }while(--uCount); + #endif + } + else + { + uCount = 20; + const u8* src8 = (const u8*)src + (uClip_src<<1) + uClip_src; + do{ + dst16[ 0] = RGB24(src8[ 0], src8[ 1], src8[ 2] ); + dst16[ 1] = RGB24(src8[ 3], src8[ 4], src8[ 5] ); + dst16[ 2] = RGB24(src8[ 6], src8[ 7], src8[ 8] ); + dst16[ 3] = RGB24(src8[ 9], src8[10], src8[11] ); + dst16[ 4] = RGB24(src8[12], src8[13], src8[14] ); + dst16[ 5] = RGB24(src8[15], src8[16], src8[17] ); + dst16[ 6] = RGB24(src8[18], src8[19], src8[20] ); + dst16[ 7] = RGB24(src8[21], src8[22], src8[23] ); + + dst16[ 8] = RGB24(src8[27], src8[28], src8[29] ); + dst16[ 9] = RGB24(src8[30], src8[31], src8[32] ); + dst16[10] = RGB24(src8[33], src8[34], src8[35] ); + dst16[11] = RGB24(src8[36], src8[37], src8[38] ); + dst16[12] = RGB24(src8[39], src8[40], src8[41] ); + dst16[13] = RGB24(src8[42], src8[43], src8[44] ); + dst16[14] = RGB24(src8[45], src8[46], src8[47] ); + dst16[15] = RGB24(src8[48], src8[49], src8[50] ); + dst16 += 16; + src8 += 54; + }while(--uCount); + } +} + +INLINE void GPU_BlitWWDWW(const void* src, u16* dst16, bool isRGB24) +{ + u32 uCount; + if(!isRGB24) + { + #ifndef USE_BGR15 + uCount = 32; + const u16* src16 = (const u16*) src; + do{ + dst16[ 0] = RGB16(src16[0]); + dst16[ 1] = RGB16(src16[1]); + dst16[ 2] = dst16[1]; + dst16[ 3] = RGB16(src16[2]); + dst16[ 4] = RGB16(src16[3]); + dst16[ 5] = RGB16(src16[4]); + dst16[ 6] = RGB16(src16[5]); + dst16[ 7] = dst16[6]; + dst16[ 8] = RGB16(src16[6]); + dst16[ 9] = RGB16(src16[7]); + dst16 += 10; + src16 += 8; + }while(--uCount); + #else + uCount = 64; + const u16* src16 = (const u16*) src; + do{ + *dst16++ = *src16++; + *dst16++ = *src16; + *dst16++ = *src16++; + *dst16++ = *src16++; + *dst16++ = *src16++; + }while(--uCount); + #endif + } + else + { + uCount = 32; + const u8* src8 = (const u8*)src; + do{ + dst16[ 0] = RGB24(src8[0], src8[ 1], src8[ 2] ); + dst16[ 1] = RGB24(src8[3], src8[ 4], src8[ 5] ); + dst16[ 2] = dst16[1]; + dst16[ 3] = RGB24(src8[6], src8[ 7], src8[ 8] ); + dst16[ 4] = RGB24(src8[9], src8[10], src8[11] ); + + dst16[ 5] = RGB24(src8[12], src8[13], src8[14] ); + dst16[ 6] = RGB24(src8[15], src8[16], src8[17] ); + dst16[ 7] = dst16[6]; + dst16[ 8] = RGB24(src8[18], src8[19], src8[20] ); + dst16[ 9] = RGB24(src8[21], src8[22], src8[23] ); + dst16 += 10; + src8 += 24; + }while(--uCount); + } +} + + +INLINE void GPU_BlitWS(const void* src, u16* dst16, bool isRGB24) +{ + u32 uCount; + if(!isRGB24) + { + #ifndef USE_BGR15 + uCount = 20; + const u16* src16 = (const u16*) src; + do{ + dst16[ 0] = RGB16(src16[0]); + dst16[ 1] = RGB16(src16[2]); + dst16[ 2] = RGB16(src16[4]); + dst16[ 3] = RGB16(src16[6]); + + dst16[ 4] = RGB16(src16[8]); + dst16[ 5] = RGB16(src16[10]); + dst16[ 6] = RGB16(src16[12]); + dst16[ 7] = RGB16(src16[14]); + + dst16[ 8] = RGB16(src16[16]); + dst16[ 9] = RGB16(src16[18]); + dst16[10] = RGB16(src16[20]); + dst16[11] = RGB16(src16[22]); + + dst16[12] = RGB16(src16[24]); + dst16[13] = RGB16(src16[26]); + dst16[14] = RGB16(src16[28]); + dst16[15] = RGB16(src16[30]); + + dst16 += 16; + src16 += 32; + }while(--uCount); + #else + uCount = 320; + const u16* src16 = (const u16*) src; + do{ + *dst16++ = *src16; src16+=2; + }while(--uCount); + #endif + } + else + { + uCount = 20; + const u8* src8 = (const u8*) src; + do{ + dst16[ 0] = RGB24(src8[ 0], src8[ 1], src8[ 2] ); + dst16[ 1] = RGB24(src8[ 6], src8[ 7], src8[ 8] ); + dst16[ 2] = RGB24(src8[12], src8[13], src8[14] ); + dst16[ 3] = RGB24(src8[18], src8[19], src8[20] ); + + dst16[ 4] = RGB24(src8[24], src8[25], src8[26] ); + dst16[ 5] = RGB24(src8[30], src8[31], src8[32] ); + dst16[ 6] = RGB24(src8[36], src8[37], src8[38] ); + dst16[ 7] = RGB24(src8[42], src8[43], src8[44] ); + + dst16[ 8] = RGB24(src8[48], src8[49], src8[50] ); + dst16[ 9] = RGB24(src8[54], src8[55], src8[56] ); + dst16[10] = RGB24(src8[60], src8[61], src8[62] ); + dst16[11] = RGB24(src8[66], src8[67], src8[68] ); + + dst16[12] = RGB24(src8[72], src8[73], src8[74] ); + dst16[13] = RGB24(src8[78], src8[79], src8[80] ); + dst16[14] = RGB24(src8[84], src8[85], src8[86] ); + dst16[15] = RGB24(src8[90], src8[91], src8[92] ); + + dst16 += 16; + src8 += 96; + }while(--uCount); + } +} + +#endif //_INNER_BLIT_H_ diff --git a/plugins/gpu_senquack/gpu_command.h b/plugins/gpu_senquack/gpu_command.h new file mode 100644 index 000000000..d052ae8ce --- /dev/null +++ b/plugins/gpu_senquack/gpu_command.h @@ -0,0 +1,621 @@ +/*************************************************************************** +* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* Copyright (C) 2016 Senquack (dansilsby gmail com) * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef __GPU_UNAI_GPU_COMMAND_H__ +#define __GPU_UNAI_GPU_COMMAND_H__ + +/////////////////////////////////////////////////////////////////////////////// +void gpuSetTexture(u16 tpage) +{ + u32 tmode, tx, ty; + gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 & ~0x1FF) | (tpage & 0x1FF); + gpu_senquack.TextureWindow[0]&= ~gpu_senquack.TextureWindow[2]; + gpu_senquack.TextureWindow[1]&= ~gpu_senquack.TextureWindow[3]; + + tmode = (tpage >> 7) & 3; // 16bpp, 8bpp, or 4bpp texture colors? + // 0: 4bpp 1: 8bpp 2/3: 16bpp + + // Nocash PSX docs state setting of 3 is same as setting of 2 (16bpp): + // Note: DrHell assumes 3 is same as 0.. TODO: verify which is correct? + if (tmode == 3) tmode = 2; + + tx = (tpage & 0x0F) << 6; + ty = (tpage & 0x10) << 4; + + tx += (gpu_senquack.TextureWindow[0] >> (2 - tmode)); + ty += gpu_senquack.TextureWindow[1]; + + gpu_senquack.BLEND_MODE = ((tpage>>5) & 3) << 3; + gpu_senquack.TEXT_MODE = (tmode + 1) << 5; // gpu_senquack.TEXT_MODE should be values 1..3, so add one + gpu_senquack.TBA = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(tx, ty)]; +} + +/////////////////////////////////////////////////////////////////////////////// +INLINE void gpuSetCLUT(u16 clut) +{ + gpu_senquack.CBA = &((u16*)gpu_senquack.vram)[(clut & 0x7FFF) << 4]; +} + +#ifdef ENABLE_GPU_NULL_SUPPORT +#define NULL_GPU() break +#else +#define NULL_GPU() +#endif + +#ifdef ENABLE_GPU_LOG_SUPPORT +#define DO_LOG(expr) printf expr +#else +#define DO_LOG(expr) {} +#endif + +#define Blending (((PRIM&0x2) && BlendingEnabled()) ? (PRIM&0x2) : 0) +#define Blending_Mode (((PRIM&0x2) && BlendingEnabled()) ? gpu_senquack.BLEND_MODE : 0) +#define Lighting (((~PRIM)&0x1) && LightingEnabled()) +// Dithering applies only to Gouraud-shaded polys or texture-blended polys: +#define Dithering (((((~PRIM)&0x1) || (PRIM&0x10)) && DitheringEnabled()) ? \ + (ForcedDitheringEnabled() ? (1<<9) : (gpu_senquack.GPU_GP1 & (1 << 9))) \ + : 0) + +/////////////////////////////////////////////////////////////////////////////// +//Now handled by Rearmed's gpulib and gpu_senquack/gpulib_if.cpp: +/////////////////////////////////////////////////////////////////////////////// +#ifndef USE_GPULIB + +// Handles GP0 draw settings commands 0xE1...0xE6 +static void gpuGP0Cmd_0xEx(gpu_senquack_t &gpu_senquack, u32 cmd_word) +{ + // Assume incoming GP0 command is 0xE1..0xE6, convert to 1..6 + u8 num = (cmd_word >> 24) & 7; + switch (num) { + case 1: { + // GP0(E1h) - Draw Mode setting (aka "Texpage") + DO_LOG(("GP0(0xE1) DrawMode TexPage(0x%x)\n", cmd_word)); + u32 cur_texpage = gpu_senquack.GPU_GP1 & 0x7FF; + u32 new_texpage = cmd_word & 0x7FF; + if (cur_texpage != new_texpage) { + gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 & ~0x7FF) | new_texpage; + gpuSetTexture(gpu_senquack.GPU_GP1); + } + } break; + + case 2: { + // GP0(E2h) - Texture Window setting + DO_LOG(("GP0(0xE2) TextureWindow(0x%x)\n", cmd_word)); + if (cmd_word != gpu_senquack.TextureWindowCur) { + static const u8 TextureMask[32] = { + 255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7, + 127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7 + }; + gpu_senquack.TextureWindowCur = cmd_word; + gpu_senquack.TextureWindow[0] = ((cmd_word >> 10) & 0x1F) << 3; + gpu_senquack.TextureWindow[1] = ((cmd_word >> 15) & 0x1F) << 3; + gpu_senquack.TextureWindow[2] = TextureMask[(cmd_word >> 0) & 0x1F]; + gpu_senquack.TextureWindow[3] = TextureMask[(cmd_word >> 5) & 0x1F]; + gpu_senquack.TextureWindow[0] &= ~gpu_senquack.TextureWindow[2]; + gpu_senquack.TextureWindow[1] &= ~gpu_senquack.TextureWindow[3]; + + // Inner loop vars must be updated whenever texture window is changed: + const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 + gpu_senquack.u_msk = (((u32)gpu_senquack.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_senquack.v_msk = (((u32)gpu_senquack.TextureWindow[3]) << fb) | ((1 << fb) - 1); + + gpuSetTexture(gpu_senquack.GPU_GP1); + } + } break; + + case 3: { + // GP0(E3h) - Set Drawing Area top left (X1,Y1) + DO_LOG(("GP0(0xE3) DrawingArea Pos(0x%x)\n", cmd_word)); + gpu_senquack.DrawingArea[0] = cmd_word & 0x3FF; + gpu_senquack.DrawingArea[1] = (cmd_word >> 10) & 0x3FF; + } break; + + case 4: { + // GP0(E4h) - Set Drawing Area bottom right (X2,Y2) + DO_LOG(("GP0(0xE4) DrawingArea Size(0x%x)\n", cmd_word)); + gpu_senquack.DrawingArea[2] = (cmd_word & 0x3FF) + 1; + gpu_senquack.DrawingArea[3] = ((cmd_word >> 10) & 0x3FF) + 1; + } break; + + case 5: { + // GP0(E5h) - Set Drawing Offset (X,Y) + DO_LOG(("GP0(0xE5) DrawingOffset(0x%x)\n", cmd_word)); + gpu_senquack.DrawingOffset[0] = ((s32)cmd_word<<(32-11))>>(32-11); + gpu_senquack.DrawingOffset[1] = ((s32)cmd_word<<(32-22))>>(32-11); + } break; + + case 6: { + // GP0(E6h) - Mask Bit Setting + DO_LOG(("GP0(0xE6) SetMask(0x%x)\n", cmd_word)); + gpu_senquack.Masking = (cmd_word & 0x2) << 1; + gpu_senquack.PixelMSB = (cmd_word & 0x1) << 8; + } break; + } +} + +void gpuSendPacketFunction(const int PRIM) +{ + //printf("0x%x\n",PRIM); + + //senquack - TODO: optimize this (packet pointer union as prim draw parameter + // introduced as optimization for gpulib command-list processing) + PtrUnion packet = { .ptr = (void*)&gpu_senquack.PacketBuffer }; + + switch (PRIM) + { + case 0x02: { + NULL_GPU(); + gpuClearImage(packet); // prim handles updateLace && skip + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuClearImage(0x%x)\n",PRIM)); + } break; + + case 0x20: + case 0x21: + case 0x22: + case 0x23: { // Monochrome 3-pt poly + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + PP driver = gpuPolySpanDrivers[ + (gpu_senquack.blit_mask?1024:0) | + Blending_Mode | + gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB + ]; + gpuDrawPolyF(packet, driver, false); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawPolyF(0x%x)\n",PRIM)); + } + } break; + + case 0x24: + case 0x25: + case 0x26: + case 0x27: { // Textured 3-pt poly + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + gpuSetTexture (gpu_senquack.PacketBuffer.U4[4] >> 16); + + u32 driver_idx = + (gpu_senquack.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_senquack.TEXT_MODE | + gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB; + + if (!FastLightingEnabled()) { + driver_idx |= Lighting; + } else { + if (!((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F))) + driver_idx |= Lighting; + } + + PP driver = gpuPolySpanDrivers[driver_idx]; + gpuDrawPolyFT(packet, driver, false); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawPolyFT(0x%x)\n",PRIM)); + } + } break; + + case 0x28: + case 0x29: + case 0x2A: + case 0x2B: { // Monochrome 4-pt poly + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + PP driver = gpuPolySpanDrivers[ + (gpu_senquack.blit_mask?1024:0) | + Blending_Mode | + gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB + ]; + gpuDrawPolyF(packet, driver, true); // is_quad = true + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawPolyF(0x%x) (4-pt QUAD)\n",PRIM)); + } + } break; + + case 0x2C: + case 0x2D: + case 0x2E: + case 0x2F: { // Textured 4-pt poly + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + gpuSetTexture (gpu_senquack.PacketBuffer.U4[4] >> 16); + + u32 driver_idx = + (gpu_senquack.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_senquack.TEXT_MODE | + gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB; + + if (!FastLightingEnabled()) { + driver_idx |= Lighting; + } else { + if (!((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F))) + driver_idx |= Lighting; + } + + PP driver = gpuPolySpanDrivers[driver_idx]; + gpuDrawPolyFT(packet, driver, true); // is_quad = true + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawPolyFT(0x%x) (4-pt QUAD)\n",PRIM)); + } + } break; + + case 0x30: + case 0x31: + case 0x32: + case 0x33: { // Gouraud-shaded 3-pt poly + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + //NOTE: The '129' here is CF_GOURAUD | CF_LIGHT, however + // this is an untextured poly, so CF_LIGHT (texture blend) + // shouldn't apply. Until the original array of template + // instantiation ptrs is fixed, we're stuck with this. (TODO) + PP driver = gpuPolySpanDrivers[ + (gpu_senquack.blit_mask?1024:0) | + Dithering | + Blending_Mode | + gpu_senquack.Masking | Blending | 129 | gpu_senquack.PixelMSB + ]; + gpuDrawPolyG(packet, driver, false); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawPolyG(0x%x)\n",PRIM)); + } + } break; + + case 0x34: + case 0x35: + case 0x36: + case 0x37: { // Gouraud-shaded, textured 3-pt poly + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + gpuSetTexture (gpu_senquack.PacketBuffer.U4[5] >> 16); + PP driver = gpuPolySpanDrivers[ + (gpu_senquack.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_senquack.TEXT_MODE | + gpu_senquack.Masking | Blending | ((Lighting)?129:0) | gpu_senquack.PixelMSB + ]; + gpuDrawPolyGT(packet, driver, false); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawPolyGT(0x%x)\n",PRIM)); + } + } break; + + case 0x38: + case 0x39: + case 0x3A: + case 0x3B: { // Gouraud-shaded 4-pt poly + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + // See notes regarding '129' for 0x30..0x33 further above -senquack + PP driver = gpuPolySpanDrivers[ + (gpu_senquack.blit_mask?1024:0) | + Dithering | + Blending_Mode | + gpu_senquack.Masking | Blending | 129 | gpu_senquack.PixelMSB + ]; + gpuDrawPolyG(packet, driver, true); // is_quad = true + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawPolyG(0x%x) (4-pt QUAD)\n",PRIM)); + } + } break; + + case 0x3C: + case 0x3D: + case 0x3E: + case 0x3F: { // Gouraud-shaded, textured 4-pt poly + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + gpuSetTexture (gpu_senquack.PacketBuffer.U4[5] >> 16); + PP driver = gpuPolySpanDrivers[ + (gpu_senquack.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_senquack.TEXT_MODE | + gpu_senquack.Masking | Blending | ((Lighting)?129:0) | gpu_senquack.PixelMSB + ]; + gpuDrawPolyGT(packet, driver, true); // is_quad = true + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawPolyGT(0x%x) (4-pt QUAD)\n",PRIM)); + } + } break; + + case 0x40: + case 0x41: + case 0x42: + case 0x43: { // Monochrome line + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineF(packet, driver); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawLineF(0x%x)\n",PRIM)); + } + } break; + + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: { // Monochrome line strip + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineF(packet, driver); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawLineF(0x%x)\n",PRIM)); + } + if ((gpu_senquack.PacketBuffer.U4[3] & 0xF000F000) != 0x50005000) + { + gpu_senquack.PacketBuffer.U4[1] = gpu_senquack.PacketBuffer.U4[2]; + gpu_senquack.PacketBuffer.U4[2] = gpu_senquack.PacketBuffer.U4[3]; + gpu_senquack.PacketCount = 1; + gpu_senquack.PacketIndex = 3; + } + } break; + + case 0x50: + case 0x51: + case 0x52: + case 0x53: { // Gouraud-shaded line + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + // Index MSB selects Gouraud-shaded PixelSpanDriver: + driver_idx |= (1 << 5); + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineG(packet, driver); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawLineG(0x%x)\n",PRIM)); + } + } break; + + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5C: + case 0x5D: + case 0x5E: + case 0x5F: { // Gouraud-shaded line strip + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + // Index MSB selects Gouraud-shaded PixelSpanDriver: + driver_idx |= (1 << 5); + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineG(packet, driver); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawLineG(0x%x)\n",PRIM)); + } + if ((gpu_senquack.PacketBuffer.U4[4] & 0xF000F000) != 0x50005000) + { + gpu_senquack.PacketBuffer.U1[3 + (2 * 4)] = gpu_senquack.PacketBuffer.U1[3 + (0 * 4)]; + gpu_senquack.PacketBuffer.U4[0] = gpu_senquack.PacketBuffer.U4[2]; + gpu_senquack.PacketBuffer.U4[1] = gpu_senquack.PacketBuffer.U4[3]; + gpu_senquack.PacketBuffer.U4[2] = gpu_senquack.PacketBuffer.U4[4]; + gpu_senquack.PacketCount = 2; + gpu_senquack.PacketIndex = 3; + } + } break; + + case 0x60: + case 0x61: + case 0x62: + case 0x63: { // Monochrome rectangle (variable size) + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawT(0x%x)\n",PRIM)); + } + } break; + + case 0x64: + case 0x65: + case 0x66: + case 0x67: { // Textured rectangle (variable size) + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + u32 driver_idx = Blending_Mode | gpu_senquack.TEXT_MODE | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>1); + + // This fixes Silent Hill running animation on loading screens: + // (On PSX, color values 0x00-0x7F darken the source texture's color, + // 0x81-FF lighten textures (ultimately clamped to 0x1F), + // 0x80 leaves source texture color unchanged, HOWEVER, + // gpu_senquack uses a simple lighting LUT whereby only the upper + // 5 bits of an 8-bit color are used, so 0x80-0x87 all behave as + // 0x80. + // + // NOTE: I've changed all textured sprite draw commands here and + // elsewhere to use proper behavior, but left poly commands + // alone, I don't want to slow rendering down too much. (TODO) + //if ((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((gpu_senquack.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawS(0x%x)\n",PRIM)); + } + } break; + + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: { // Monochrome rectangle (1x1 dot) + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + gpu_senquack.PacketBuffer.U4[2] = 0x00010001; + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawT(0x%x)\n",PRIM)); + } + } break; + + case 0x70: + case 0x71: + case 0x72: + case 0x73: { // Monochrome rectangle (8x8) + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + gpu_senquack.PacketBuffer.U4[2] = 0x00080008; + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawT(0x%x)\n",PRIM)); + } + } break; + + case 0x74: + case 0x75: + case 0x76: + case 0x77: { // Textured rectangle (8x8) + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + gpu_senquack.PacketBuffer.U4[3] = 0x00080008; + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + u32 driver_idx = Blending_Mode | gpu_senquack.TEXT_MODE | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>1); + + //senquack - Only color 808080h-878787h allows skipping lighting calculation: + //if ((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((gpu_senquack.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawS(0x%x)\n",PRIM)); + } + } break; + + case 0x78: + case 0x79: + case 0x7A: + case 0x7B: { // Monochrome rectangle (16x16) + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + gpu_senquack.PacketBuffer.U4[2] = 0x00100010; + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawT(0x%x)\n",PRIM)); + } + } break; + + case 0x7C: + case 0x7D: + #ifdef __arm__ + /* Notaz 4bit sprites optimization */ + if ((!gpu_senquack.frameskip.skipGPU) && (!(gpu_senquack.GPU_GP1&0x180)) && (!(gpu_senquack.Masking|gpu_senquack.PixelMSB))) + { + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + gpuDrawS16(packet); + gpu_senquack.fb_dirty = true; + break; + } + #endif + case 0x7E: + case 0x7F: { // Textured rectangle (16x16) + if (!gpu_senquack.frameskip.skipGPU) + { + NULL_GPU(); + gpu_senquack.PacketBuffer.U4[3] = 0x00100010; + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + u32 driver_idx = Blending_Mode | gpu_senquack.TEXT_MODE | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>1); + + //senquack - Only color 808080h-878787h allows skipping lighting calculation: + //if ((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((gpu_senquack.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver); + gpu_senquack.fb_dirty = true; + DO_LOG(("gpuDrawS(0x%x)\n",PRIM)); + } + } break; + + case 0x80: // vid -> vid + gpuMoveImage(packet); // prim handles updateLace && skip + if ((!gpu_senquack.frameskip.skipCount) && (gpu_senquack.DisplayArea[3] == 480)) // Tekken 3 hack + { + if (!gpu_senquack.frameskip.skipGPU) gpu_senquack.fb_dirty = true; + } + else + { + gpu_senquack.fb_dirty = true; + } + DO_LOG(("gpuMoveImage(0x%x)\n",PRIM)); + break; + case 0xA0: // sys ->vid + gpuLoadImage(packet); // prim handles updateLace && skip + DO_LOG(("gpuLoadImage(0x%x)\n",PRIM)); + break; + case 0xC0: // vid -> sys + gpuStoreImage(packet); // prim handles updateLace && skip + DO_LOG(("gpuStoreImage(0x%x)\n",PRIM)); + break; + case 0xE1 ... 0xE6: { // Draw settings + gpuGP0Cmd_0xEx(gpu_senquack, gpu_senquack.PacketBuffer.U4[0]); + } break; + } +} +#endif //!USE_GPULIB +/////////////////////////////////////////////////////////////////////////////// +// End of code specific to non-gpulib standalone version of gpu_senquack +/////////////////////////////////////////////////////////////////////////////// + +#endif /* __GPU_UNAI_GPU_COMMAND_H__ */ diff --git a/plugins/gpu_senquack/gpu_fixedpoint.h b/plugins/gpu_senquack/gpu_fixedpoint.h new file mode 100644 index 000000000..5df42cf00 --- /dev/null +++ b/plugins/gpu_senquack/gpu_fixedpoint.h @@ -0,0 +1,134 @@ +/*************************************************************************** + * Copyright (C) 2010 PCSX4ALL Team * + * Copyright (C) 2010 Unai * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * + ***************************************************************************/ + +#ifndef FIXED_H +#define FIXED_H + +typedef s32 fixed; + +//senquack - The gpu_drhell poly routines I adapted use 22.10 fixed point, +// while original Unai used 16.16: (see README_senquack.txt) +//#define FIXED_BITS 16 +#define FIXED_BITS 10 + +#define fixed_ZERO ((fixed)0) +#define fixed_ONE ((fixed)1<>1)) + +#define fixed_LOMASK ((fixed)((1<fixed conversions: +#define i2x(x) ((x)<>FIXED_BITS) + +INLINE fixed FixedCeil(const fixed x) +{ + return (x + (fixed_ONE - 1)) & fixed_HIMASK; +} + +INLINE s32 FixedCeilToInt(const fixed x) +{ + return (x + (fixed_ONE - 1)) >> FIXED_BITS; +} + +//senquack - float<->fixed conversions: +#define f2x(x) ((s32)((x) * (float)(1< 0; ++i, x >>= 1); return i - 1; } +#endif + +INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_) +{ + u32 uD = (_b<0) ? -_b : _b; + if(uD>1) + { + u32 uLog = Log2(uD); + uLog = uLog>(TABLE_BITS-1) ? uLog-(TABLE_BITS-1) : 0; + u32 uDen = (uD>>uLog); + iFactor_ = s_invTable[uDen]; + iFactor_ = (_b<0) ? -iFactor_ :iFactor_; + //senquack - Adapted to 22.10 fixed point (originally 16.16): + //iShift_ = 15+uLog; + iShift_ = 21+uLog; + } + else + { + iFactor_=_b; + iShift_ = 0; + } +} + +INLINE fixed xInvMulx (const fixed _a, const s32 _iFact, const s32 _iShift) +{ + #ifdef __arm__ + s64 res; + asm ("smull %Q0, %R0, %1, %2" : "=&r" (res) : "r"(_a) , "r"(_iFact)); + return fixed(res>>_iShift); + #else + return fixed( ((s64)(_a)*(s64)(_iFact))>>(_iShift) ); + #endif +} + +INLINE fixed xLoDivx (const fixed _a, const fixed _b) +{ + s32 iFact, iShift; + xInv(_b, iFact, iShift); + return xInvMulx(_a, iFact, iShift); +} +#endif // GPU_UNAI_USE_INT_DIV_MULTINV +/////////////////////////////////////////////////////////////////////////// +// --- END INVERSE APPROXIMATION SECTION --- +/////////////////////////////////////////////////////////////////////////// + +#endif //FIXED_H diff --git a/plugins/gpu_senquack/gpu_inner.h b/plugins/gpu_senquack/gpu_inner.h new file mode 100644 index 000000000..8cb4bd534 --- /dev/null +++ b/plugins/gpu_senquack/gpu_inner.h @@ -0,0 +1,734 @@ +/*************************************************************************** +* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* Copyright (C) 2016 Senquack (dansilsby gmail com) * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef __GPU_UNAI_GPU_INNER_H__ +#define __GPU_UNAI_GPU_INNER_H__ + +/////////////////////////////////////////////////////////////////////////////// +// Inner loop driver instantiation file + +/////////////////////////////////////////////////////////////////////////////// +// Option Masks (CF template paramter) +#define CF_LIGHT ((CF>> 0)&1) // Lighting +#define CF_BLEND ((CF>> 1)&1) // Blending +#define CF_MASKCHECK ((CF>> 2)&1) // Mask bit check +#define CF_BLENDMODE ((CF>> 3)&3) // Blend mode 0..3 +#define CF_TEXTMODE ((CF>> 5)&3) // Texture mode 1..3 (0: texturing disabled) +#define CF_GOURAUD ((CF>> 7)&1) // Gouraud shading +#define CF_MASKSET ((CF>> 8)&1) // Mask bit set +#define CF_DITHER ((CF>> 9)&1) // Dithering +#define CF_BLITMASK ((CF>>10)&1) // blit_mask check (skip rendering pixels + // that wouldn't end up displayed on + // low-res screen using simple downscaler) + +//#ifdef __arm__ +//#ifndef ENABLE_GPU_ARMV7 +/* ARMv5 */ +//#include "gpu_inner_blend_arm5.h" +//#else +/* ARMv7 optimized */ +//#include "gpu_inner_blend_arm7.h" +//#endif +//#else +//#include "gpu_inner_blend.h" +//#endif + +#include "gpu_inner_blend.h" +#include "gpu_inner_quantization.h" +#include "gpu_inner_light.h" + +#ifdef __arm__ +#include "gpu_inner_blend_arm.h" +#include "gpu_inner_light_arm.h" +#define gpuBlending gpuBlendingARM +#define gpuLightingRGB gpuLightingRGBARM +#define gpuLightingTXT gpuLightingTXTARM +#define gpuLightingTXTGouraud gpuLightingTXTGouraudARM +// Non-dithering lighting and blending functions preserve uSrc +// MSB. This saves a few operations and useless load/stores. +#define MSB_PRESERVED (!CF_DITHER) +#else +#define gpuBlending gpuBlendingGeneric +#define gpuLightingRGB gpuLightingRGBGeneric +#define gpuLightingTXT gpuLightingTXTGeneric +#define gpuLightingTXTGouraud gpuLightingTXTGouraudGeneric +#define MSB_PRESERVED 0 +#endif + + +// If defined, Gouraud colors are fixed-point 5.11, otherwise they are 8.16 +// This is only for debugging/verification of low-precision colors in C. +// Low-precision Gouraud is intended for use by SIMD-optimized inner drivers +// which get/use Gouraud colors in SIMD registers. +//#define GPU_GOURAUD_LOW_PRECISION + +// How many bits of fixed-point precision GouraudColor uses +#ifdef GPU_GOURAUD_LOW_PRECISION +#define GPU_GOURAUD_FIXED_BITS 11 +#else +#define GPU_GOURAUD_FIXED_BITS 16 +#endif + +// Used to pass Gouraud colors to gpuPixelSpanFn() (lines) +struct GouraudColor { +#ifdef GPU_GOURAUD_LOW_PRECISION + u16 r, g, b; + s16 r_incr, g_incr, b_incr; +#else + u32 r, g, b; + s32 r_incr, g_incr, b_incr; +#endif +}; + +static inline u16 gpuGouraudColor15bpp(u32 r, u32 g, u32 b) +{ + r >>= GPU_GOURAUD_FIXED_BITS; + g >>= GPU_GOURAUD_FIXED_BITS; + b >>= GPU_GOURAUD_FIXED_BITS; + +#ifndef GPU_GOURAUD_LOW_PRECISION + // High-precision Gouraud colors are 8-bit + fractional + r >>= 3; g >>= 3; b >>= 3; +#endif + + return r | (g << 5) | (b << 10); +} + +/////////////////////////////////////////////////////////////////////////////// +// GPU Pixel span operations generator gpuPixelSpanFn<> +// Oct 2016: Created/adapted from old gpuPixelFn by senquack: +// Original gpuPixelFn was used to draw lines one pixel at a time. I wrote +// new line algorithms that draw lines using horizontal/vertical/diagonal +// spans of pixels, necessitating new pixel-drawing function that could +// not only render spans of pixels, but gouraud-shade them as well. +// This speeds up line rendering and would allow tile-rendering (untextured +// rectangles) to use the same set of functions. Since tiles are always +// monochrome, they simply wouldn't use the extra set of 32 gouraud-shaded +// gpuPixelSpanFn functions (TODO?). +// +// NOTE: While the PS1 framebuffer is 16 bit, we use 8-bit pointers here, +// so that pDst can be incremented directly by 'incr' parameter +// without having to shift it before use. +template +static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) +{ + // Blend func can save an operation if it knows uSrc MSB is + // unset. For untextured prims, this is always true. + const bool skip_uSrc_mask = true; + + u16 col; + struct GouraudColor * gcPtr; + u32 r, g, b; + s32 r_incr, g_incr, b_incr; + + if (CF_GOURAUD) { + gcPtr = (GouraudColor*)data; + r = gcPtr->r; r_incr = gcPtr->r_incr; + g = gcPtr->g; g_incr = gcPtr->g_incr; + b = gcPtr->b; b_incr = gcPtr->b_incr; + } else { + col = (u16)data; + } + + do { + if (!CF_GOURAUD) + { // NO GOURAUD + if (!CF_MASKCHECK && !CF_BLEND) { + if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } + else { *(u16*)pDst = col; } + } else if (CF_MASKCHECK && !CF_BLEND) { + if (!(*(u16*)pDst & 0x8000)) { + if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } + else { *(u16*)pDst = col; } + } + } else { + uint_fast16_t uDst = *(u16*)pDst; + if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; } + + uint_fast16_t uSrc = col; + + if (CF_BLEND) + uSrc = gpuBlending(uSrc, uDst); + + if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; } + else { *(u16*)pDst = uSrc; } + } + + } else + { // GOURAUD + + if (!CF_MASKCHECK && !CF_BLEND) { + col = gpuGouraudColor15bpp(r, g, b); + if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } + else { *(u16*)pDst = col; } + } else if (CF_MASKCHECK && !CF_BLEND) { + col = gpuGouraudColor15bpp(r, g, b); + if (!(*(u16*)pDst & 0x8000)) { + if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } + else { *(u16*)pDst = col; } + } + } else { + uint_fast16_t uDst = *(u16*)pDst; + if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; } + col = gpuGouraudColor15bpp(r, g, b); + + uint_fast16_t uSrc = col; + + // Blend func can save an operation if it knows uSrc MSB is + // unset. For untextured prims, this is always true. + const bool skip_uSrc_mask = true; + + if (CF_BLEND) + uSrc = gpuBlending(uSrc, uDst); + + if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; } + else { *(u16*)pDst = uSrc; } + } + } + +endpixel: + if (CF_GOURAUD) { + r += r_incr; + g += g_incr; + b += b_incr; + } + pDst += incr; + } while (len-- > 1); + + // Note from senquack: Normally, I'd prefer to write a 'do {} while (--len)' + // loop, or even a for() loop, however, on MIPS platforms anything but the + // 'do {} while (len-- > 1)' tends to generate very unoptimal asm, with + // many unneeded MULs/ADDs/branches at the ends of these functions. + // If you change the loop structure above, be sure to compare the quality + // of the generated code!! + + if (CF_GOURAUD) { + gcPtr->r = r; + gcPtr->g = g; + gcPtr->b = b; + } + return pDst; +} + +static u8* PixelSpanNULL(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) +{ + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"PixelSpanNULL()\n"); + #endif + return pDst; +} + +/////////////////////////////////////////////////////////////////////////////// +// PixelSpan (lines) innerloops driver +typedef u8* (*PSD)(u8* dst, uintptr_t data, ptrdiff_t incr, size_t len); + +const PSD gpuPixelSpanDrivers[64] = +{ + // Array index | 'CF' template field | Field value + // ------------+---------------------+---------------- + // Bit 0 | CF_BLEND | off (0), on (1) + // Bit 1 | CF_MASKCHECK | off (0), on (1) + // Bit 3:2 | CF_BLENDMODE | 0..3 + // Bit 4 | CF_MASKSET | off (0), on (1) + // Bit 5 | CF_GOURAUD | off (0), on (1) + // + // NULL entries are ones for which blending is disabled and blend-mode + // field is non-zero, which is obviously invalid. + + // Flat-shaded + gpuPixelSpanFn<0x00<<1>, gpuPixelSpanFn<0x01<<1>, gpuPixelSpanFn<0x02<<1>, gpuPixelSpanFn<0x03<<1>, + PixelSpanNULL, gpuPixelSpanFn<0x05<<1>, PixelSpanNULL, gpuPixelSpanFn<0x07<<1>, + PixelSpanNULL, gpuPixelSpanFn<0x09<<1>, PixelSpanNULL, gpuPixelSpanFn<0x0B<<1>, + PixelSpanNULL, gpuPixelSpanFn<0x0D<<1>, PixelSpanNULL, gpuPixelSpanFn<0x0F<<1>, + + // Flat-shaded + PixelMSB (CF_MASKSET) + gpuPixelSpanFn<(0x00<<1)|0x100>, gpuPixelSpanFn<(0x01<<1)|0x100>, gpuPixelSpanFn<(0x02<<1)|0x100>, gpuPixelSpanFn<(0x03<<1)|0x100>, + PixelSpanNULL, gpuPixelSpanFn<(0x05<<1)|0x100>, PixelSpanNULL, gpuPixelSpanFn<(0x07<<1)|0x100>, + PixelSpanNULL, gpuPixelSpanFn<(0x09<<1)|0x100>, PixelSpanNULL, gpuPixelSpanFn<(0x0B<<1)|0x100>, + PixelSpanNULL, gpuPixelSpanFn<(0x0D<<1)|0x100>, PixelSpanNULL, gpuPixelSpanFn<(0x0F<<1)|0x100>, + + // Gouraud-shaded (CF_GOURAUD) + gpuPixelSpanFn<(0x00<<1)|0x80>, gpuPixelSpanFn<(0x01<<1)|0x80>, gpuPixelSpanFn<(0x02<<1)|0x80>, gpuPixelSpanFn<(0x03<<1)|0x80>, + PixelSpanNULL, gpuPixelSpanFn<(0x05<<1)|0x80>, PixelSpanNULL, gpuPixelSpanFn<(0x07<<1)|0x80>, + PixelSpanNULL, gpuPixelSpanFn<(0x09<<1)|0x80>, PixelSpanNULL, gpuPixelSpanFn<(0x0B<<1)|0x80>, + PixelSpanNULL, gpuPixelSpanFn<(0x0D<<1)|0x80>, PixelSpanNULL, gpuPixelSpanFn<(0x0F<<1)|0x80>, + + // Gouraud-shaded (CF_GOURAUD) + PixelMSB (CF_MASKSET) + gpuPixelSpanFn<(0x00<<1)|0x180>, gpuPixelSpanFn<(0x01<<1)|0x180>, gpuPixelSpanFn<(0x02<<1)|0x180>, gpuPixelSpanFn<(0x03<<1)|0x180>, + PixelSpanNULL, gpuPixelSpanFn<(0x05<<1)|0x180>, PixelSpanNULL, gpuPixelSpanFn<(0x07<<1)|0x180>, + PixelSpanNULL, gpuPixelSpanFn<(0x09<<1)|0x180>, PixelSpanNULL, gpuPixelSpanFn<(0x0B<<1)|0x180>, + PixelSpanNULL, gpuPixelSpanFn<(0x0D<<1)|0x180>, PixelSpanNULL, gpuPixelSpanFn<(0x0F<<1)|0x180> +}; + +/////////////////////////////////////////////////////////////////////////////// +// GPU Tiles innerloops generator + +template +static void gpuTileSpanFn(u16 *pDst, u32 count, u16 data) +{ + if (!CF_MASKCHECK && !CF_BLEND) { + if (CF_MASKSET) { data = data | 0x8000; } + do { *pDst++ = data; } while (--count); + } else if (CF_MASKCHECK && !CF_BLEND) { + if (CF_MASKSET) { data = data | 0x8000; } + do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count); + } else + { + // Blend func can save an operation if it knows uSrc MSB is + // unset. For untextured prims, this is always true. + const bool skip_uSrc_mask = true; + + uint_fast16_t uSrc, uDst; + do + { + if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; } + if (CF_MASKCHECK) { if (uDst&0x8000) goto endtile; } + + uSrc = data; + + if (CF_BLEND) + uSrc = gpuBlending(uSrc, uDst); + + if (CF_MASKSET) { *pDst = uSrc | 0x8000; } + else { *pDst = uSrc; } + + //senquack - Did not apply "Silent Hill" mask-bit fix to here. + // It is hard to tell from scarce documentation available and + // lack of comments in code, but I believe the tile-span + // functions here should not bother to preserve any source MSB, + // as they are not drawing from a texture. +endtile: + pDst++; + } + while (--count); + } +} + +static void TileNULL(u16 *pDst, u32 count, u16 data) +{ + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"TileNULL()\n"); + #endif +} + +/////////////////////////////////////////////////////////////////////////////// +// Tiles innerloops driver +typedef void (*PT)(u16 *pDst, u32 count, u16 data); + +// Template instantiation helper macros +#define TI(cf) gpuTileSpanFn<(cf)> +#define TN TileNULL +#define TIBLOCK(ub) \ + TI((ub)|0x00), TI((ub)|0x02), TI((ub)|0x04), TI((ub)|0x06), \ + TN, TI((ub)|0x0a), TN, TI((ub)|0x0e), \ + TN, TI((ub)|0x12), TN, TI((ub)|0x16), \ + TN, TI((ub)|0x1a), TN, TI((ub)|0x1e) + +const PT gpuTileSpanDrivers[32] = { + TIBLOCK(0<<8), TIBLOCK(1<<8) +}; + +#undef TI +#undef TN +#undef TIBLOCK + + +/////////////////////////////////////////////////////////////////////////////// +// GPU Sprites innerloops generator + +template +static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0) +{ + // Blend func can save an operation if it knows uSrc MSB is unset. + // Untextured prims can always skip (source color always comes with MSB=0). + // For textured prims, the generic lighting funcs always return it unset. (bonus!) + const bool skip_uSrc_mask = MSB_PRESERVED ? (!CF_TEXTMODE) : (!CF_TEXTMODE) || CF_LIGHT; + + uint_fast16_t uSrc, uDst, srcMSB; + bool should_blend; + u32 u0_mask = gpu_senquack.TextureWindow[2]; + + u8 r5, g5, b5; + if (CF_LIGHT) { + r5 = gpu_senquack.r5; + g5 = gpu_senquack.g5; + b5 = gpu_senquack.b5; + } + + if (CF_TEXTMODE==3) { + // Texture is accessed byte-wise, so adjust mask if 16bpp + u0_mask <<= 1; + } + + const u16 *CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_senquack.CBA; + + do + { + if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; } + if (CF_MASKCHECK) if (uDst&0x8000) { goto endsprite; } + + if (CF_TEXTMODE==1) { // 4bpp (CLUT) + u8 rgb = pTxt[(u0 & u0_mask)>>1]; + uSrc = CBA_[(rgb>>((u0&1)<<2))&0xf]; + } + if (CF_TEXTMODE==2) { // 8bpp (CLUT) + uSrc = CBA_[pTxt[u0 & u0_mask]]; + } + if (CF_TEXTMODE==3) { // 16bpp + uSrc = *(u16*)(&pTxt[u0 & u0_mask]); + } + + if (!uSrc) goto endsprite; + + //senquack - save source MSB, as blending or lighting macros will not + // (Silent Hill gray rectangles mask bit bug) + if (CF_BLEND || CF_LIGHT) srcMSB = uSrc & 0x8000; + + if (CF_LIGHT) + uSrc = gpuLightingTXT(uSrc, r5, g5, b5); + + should_blend = MSB_PRESERVED ? uSrc & 0x8000 : srcMSB; + + if (CF_BLEND && should_blend) + uSrc = gpuBlending(uSrc, uDst); + + if (CF_MASKSET) { *pDst = uSrc | 0x8000; } + else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = uSrc | srcMSB; } + else { *pDst = uSrc; } + +endsprite: + u0 += (CF_TEXTMODE==3) ? 2 : 1; + pDst++; + } + while (--count); +} + +static void SpriteNULL(u16 *pDst, u32 count, u8* pTxt, u32 u0) +{ + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"SpriteNULL()\n"); + #endif +} + +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Sprite innerloops driver +typedef void (*PS)(u16 *pDst, u32 count, u8* pTxt, u32 u0); + +// Template instantiation helper macros +#define TI(cf) gpuSpriteSpanFn<(cf)> +#define TN SpriteNULL +#define TIBLOCK(ub) \ + TN, TN, TN, TN, TN, TN, TN, TN, \ + TN, TN, TN, TN, TN, TN, TN, TN, \ + TN, TN, TN, TN, TN, TN, TN, TN, \ + TN, TN, TN, TN, TN, TN, TN, TN, \ + TI((ub)|0x20), TI((ub)|0x21), TI((ub)|0x22), TI((ub)|0x23), TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \ + TN, TN, TI((ub)|0x2a), TI((ub)|0x2b), TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \ + TN, TN, TI((ub)|0x32), TI((ub)|0x33), TN, TN, TI((ub)|0x36), TI((ub)|0x37), \ + TN, TN, TI((ub)|0x3a), TI((ub)|0x3b), TN, TN, TI((ub)|0x3e), TI((ub)|0x3f), \ + TI((ub)|0x40), TI((ub)|0x41), TI((ub)|0x42), TI((ub)|0x43), TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \ + TN, TN, TI((ub)|0x4a), TI((ub)|0x4b), TN, TN, TI((ub)|0x4e), TI((ub)|0x4f), \ + TN, TN, TI((ub)|0x52), TI((ub)|0x53), TN, TN, TI((ub)|0x56), TI((ub)|0x57), \ + TN, TN, TI((ub)|0x5a), TI((ub)|0x5b), TN, TN, TI((ub)|0x5e), TI((ub)|0x5f), \ + TI((ub)|0x60), TI((ub)|0x61), TI((ub)|0x62), TI((ub)|0x63), TI((ub)|0x64), TI((ub)|0x65), TI((ub)|0x66), TI((ub)|0x67), \ + TN, TN, TI((ub)|0x6a), TI((ub)|0x6b), TN, TN, TI((ub)|0x6e), TI((ub)|0x6f), \ + TN, TN, TI((ub)|0x72), TI((ub)|0x73), TN, TN, TI((ub)|0x76), TI((ub)|0x77), \ + TN, TN, TI((ub)|0x7a), TI((ub)|0x7b), TN, TN, TI((ub)|0x7e), TI((ub)|0x7f) + +const PS gpuSpriteSpanDrivers[256] = { + TIBLOCK(0<<8), TIBLOCK(1<<8) +}; + +#undef TI +#undef TN +#undef TIBLOCK + +/////////////////////////////////////////////////////////////////////////////// +// GPU Polygon innerloops generator + +//senquack - Newer version with following changes: +// * Adapted to work with new poly routings in gpu_raster_polygon.h +// adapted from DrHell GPU. They are less glitchy and use 22.10 +// fixed-point instead of original UNAI's 16.16. +// * Texture coordinates are no longer packed together into one +// unsigned int. This seems to lose too much accuracy (they each +// end up being only 8.7 fixed-point that way) and pixel-droupouts +// were noticeable both with original code and current DrHell +// adaptations. An example would be the sky in NFS3. Now, they are +// stored in separate ints, using separate masks. +// * Function is no longer INLINE, as it was always called +// through a function pointer. +// * Function now ensures the mask bit of source texture is preserved +// across calls to blending functions (Silent Hill rectangles fix) +// * November 2016: Large refactoring of blending/lighting when +// JohnnyF added dithering. See gpu_inner_quantization.h and +// relevant blend/light headers. +// (see README_senquack.txt) +template +static void gpuPolySpanFn(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 count) +{ + // Blend func can save an operation if it knows uSrc MSB is unset. + // Untextured prims can always skip this (src color MSB is always 0). + // For textured prims, the generic lighting funcs always return it unset. (bonus!) + const bool skip_uSrc_mask = MSB_PRESERVED ? (!CF_TEXTMODE) : (!CF_TEXTMODE) || CF_LIGHT; + bool should_blend; + + u32 bMsk; if (CF_BLITMASK) bMsk = gpu_senquack.blit_mask; + + if (!CF_TEXTMODE) + { + if (!CF_GOURAUD) + { + // UNTEXTURED, NO GOURAUD + const u16 pix15 = gpu_senquack.PixelData; + do { + uint_fast16_t uSrc, uDst; + + // NOTE: Don't enable CF_BLITMASK pixel skipping (speed hack) + // on untextured polys. It seems to do more harm than good: see + // gravestone text at end of Medieval intro sequence. -senquack + //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) { goto endpolynotextnogou; } } + + if (CF_BLEND || CF_MASKCHECK) uDst = *pDst; + if (CF_MASKCHECK) { if (uDst&0x8000) { goto endpolynotextnogou; } } + + uSrc = pix15; + + if (CF_BLEND) + uSrc = gpuBlending(uSrc, uDst); + + if (CF_MASKSET) { *pDst = uSrc | 0x8000; } + else { *pDst = uSrc; } + +endpolynotextnogou: + pDst++; + } while(--count); + } + else + { + // UNTEXTURED, GOURAUD + u32 l_gCol = gpu_senquack.gCol; + u32 l_gInc = gpu_senquack.gInc; + + do { + uint_fast16_t uDst, uSrc; + + // See note in above loop regarding CF_BLITMASK + //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolynotextgou; } + + if (CF_BLEND || CF_MASKCHECK) uDst = *pDst; + if (CF_MASKCHECK) { if (uDst&0x8000) goto endpolynotextgou; } + + if (CF_DITHER) { + // GOURAUD, DITHER + + u32 uSrc24 = gpuLightingRGB24(l_gCol); + if (CF_BLEND) + uSrc24 = gpuBlending24(uSrc24, uDst); + uSrc = gpuColorQuantization24(uSrc24, pDst); + } else { + // GOURAUD, NO DITHER + + uSrc = gpuLightingRGB(l_gCol); + + if (CF_BLEND) + uSrc = gpuBlending(uSrc, uDst); + } + + if (CF_MASKSET) { *pDst = uSrc | 0x8000; } + else { *pDst = uSrc; } + +endpolynotextgou: + pDst++; + l_gCol += l_gInc; + } + while (--count); + } + } + else + { + // TEXTURED + + uint_fast16_t uDst, uSrc, srcMSB; + + //senquack - note: original UNAI code had gpu_senquack.{u4/v4} packed into + // one 32-bit unsigned int, but this proved to lose too much accuracy + // (pixel drouputs noticeable in NFS3 sky), so now are separate vars. + u32 l_u_msk = gpu_senquack.u_msk; u32 l_v_msk = gpu_senquack.v_msk; + u32 l_u = gpu_senquack.u & l_u_msk; u32 l_v = gpu_senquack.v & l_v_msk; + s32 l_u_inc = gpu_senquack.u_inc; s32 l_v_inc = gpu_senquack.v_inc; + + const u16* TBA_ = gpu_senquack.TBA; + const u16* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_senquack.CBA; + + u8 r5, g5, b5; + u8 r8, g8, b8; + + u32 l_gInc, l_gCol; + + if (CF_LIGHT) { + if (CF_GOURAUD) { + l_gInc = gpu_senquack.gInc; + l_gCol = gpu_senquack.gCol; + } else { + if (CF_DITHER) { + r8 = gpu_senquack.r8; + g8 = gpu_senquack.g8; + b8 = gpu_senquack.b8; + } else { + r5 = gpu_senquack.r5; + g5 = gpu_senquack.g5; + b5 = gpu_senquack.b5; + } + } + } + + do + { + if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolytext; } + if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; } + if (CF_MASKCHECK) if (uDst&0x8000) { goto endpolytext; } + + //senquack - adapted to work with new 22.10 fixed point routines: + // (UNAI originally used 16.16) + if (CF_TEXTMODE==1) { // 4bpp (CLUT) + u32 tu=(l_u>>10); + u32 tv=(l_v<<1)&(0xff<<11); + u8 rgb=((u8*)TBA_)[tv+(tu>>1)]; + uSrc=CBA_[(rgb>>((tu&1)<<2))&0xf]; + if (!uSrc) goto endpolytext; + } + if (CF_TEXTMODE==2) { // 8bpp (CLUT) + uSrc = CBA_[(((u8*)TBA_)[(l_u>>10)+((l_v<<1)&(0xff<<11))])]; + if (!uSrc) goto endpolytext; + } + if (CF_TEXTMODE==3) { // 16bpp + uSrc = TBA_[(l_u>>10)+((l_v)&(0xff<<10))]; + if (!uSrc) goto endpolytext; + } + + // Save source MSB, as blending or lighting will not (Silent Hill) + if (CF_BLEND || CF_LIGHT) srcMSB = uSrc & 0x8000; + + // When textured, only dither when LIGHT (texture blend) is enabled + // LIGHT && BLEND => dither + // LIGHT && !BLEND => dither + //!LIGHT && BLEND => no dither + //!LIGHT && !BLEND => no dither + + if (CF_DITHER && CF_LIGHT) { + u32 uSrc24; + if ( CF_GOURAUD) + uSrc24 = gpuLightingTXT24Gouraud(uSrc, l_gCol); + if (!CF_GOURAUD) + uSrc24 = gpuLightingTXT24(uSrc, r8, g8, b8); + + if (CF_BLEND && srcMSB) + uSrc24 = gpuBlending24(uSrc24, uDst); + + uSrc = gpuColorQuantization24(uSrc24, pDst); + } else + { + if (CF_LIGHT) { + if ( CF_GOURAUD) + uSrc = gpuLightingTXTGouraud(uSrc, l_gCol); + if (!CF_GOURAUD) + uSrc = gpuLightingTXT(uSrc, r5, g5, b5); + } + + should_blend = MSB_PRESERVED ? uSrc & 0x8000 : srcMSB; + if (CF_BLEND && should_blend) + uSrc = gpuBlending(uSrc, uDst); + } + + if (CF_MASKSET) { *pDst = uSrc | 0x8000; } + else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = uSrc | srcMSB; } + else { *pDst = uSrc; } +endpolytext: + pDst++; + l_u = (l_u + l_u_inc) & l_u_msk; + l_v = (l_v + l_v_inc) & l_v_msk; + if (CF_LIGHT && CF_GOURAUD) l_gCol += l_gInc; + } + while (--count); + } +} + +static void PolyNULL(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 count) +{ + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"PolyNULL()\n"); + #endif +} + +/////////////////////////////////////////////////////////////////////////////// +// Polygon innerloops driver +typedef void (*PP)(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 count); + +// Template instantiation helper macros +#define TI(cf) gpuPolySpanFn<(cf)> +#define TN PolyNULL +#define TIBLOCK(ub) \ + TI((ub)|0x00), TI((ub)|0x01), TI((ub)|0x02), TI((ub)|0x03), TI((ub)|0x04), TI((ub)|0x05), TI((ub)|0x06), TI((ub)|0x07), \ + TN, TN, TI((ub)|0x0a), TI((ub)|0x0b), TN, TN, TI((ub)|0x0e), TI((ub)|0x0f), \ + TN, TN, TI((ub)|0x12), TI((ub)|0x13), TN, TN, TI((ub)|0x16), TI((ub)|0x17), \ + TN, TN, TI((ub)|0x1a), TI((ub)|0x1b), TN, TN, TI((ub)|0x1e), TI((ub)|0x1f), \ + TI((ub)|0x20), TI((ub)|0x21), TI((ub)|0x22), TI((ub)|0x23), TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \ + TN, TN, TI((ub)|0x2a), TI((ub)|0x2b), TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \ + TN, TN, TI((ub)|0x32), TI((ub)|0x33), TN, TN, TI((ub)|0x36), TI((ub)|0x37), \ + TN, TN, TI((ub)|0x3a), TI((ub)|0x3b), TN, TN, TI((ub)|0x3e), TI((ub)|0x3f), \ + TI((ub)|0x40), TI((ub)|0x41), TI((ub)|0x42), TI((ub)|0x43), TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \ + TN, TN, TI((ub)|0x4a), TI((ub)|0x4b), TN, TN, TI((ub)|0x4e), TI((ub)|0x4f), \ + TN, TN, TI((ub)|0x52), TI((ub)|0x53), TN, TN, TI((ub)|0x56), TI((ub)|0x57), \ + TN, TN, TI((ub)|0x5a), TI((ub)|0x5b), TN, TN, TI((ub)|0x5e), TI((ub)|0x5f), \ + TI((ub)|0x60), TI((ub)|0x61), TI((ub)|0x62), TI((ub)|0x63), TI((ub)|0x64), TI((ub)|0x65), TI((ub)|0x66), TI((ub)|0x67), \ + TN, TN, TI((ub)|0x6a), TI((ub)|0x6b), TN, TN, TI((ub)|0x6e), TI((ub)|0x6f), \ + TN, TN, TI((ub)|0x72), TI((ub)|0x73), TN, TN, TI((ub)|0x76), TI((ub)|0x77), \ + TN, TN, TI((ub)|0x7a), TI((ub)|0x7b), TN, TN, TI((ub)|0x7e), TI((ub)|0x7f), \ + TN, TI((ub)|0x81), TN, TI((ub)|0x83), TN, TI((ub)|0x85), TN, TI((ub)|0x87), \ + TN, TN, TN, TI((ub)|0x8b), TN, TN, TN, TI((ub)|0x8f), \ + TN, TN, TN, TI((ub)|0x93), TN, TN, TN, TI((ub)|0x97), \ + TN, TN, TN, TI((ub)|0x9b), TN, TN, TN, TI((ub)|0x9f), \ + TN, TI((ub)|0xa1), TN, TI((ub)|0xa3), TN, TI((ub)|0xa5), TN, TI((ub)|0xa7), \ + TN, TN, TN, TI((ub)|0xab), TN, TN, TN, TI((ub)|0xaf), \ + TN, TN, TN, TI((ub)|0xb3), TN, TN, TN, TI((ub)|0xb7), \ + TN, TN, TN, TI((ub)|0xbb), TN, TN, TN, TI((ub)|0xbf), \ + TN, TI((ub)|0xc1), TN, TI((ub)|0xc3), TN, TI((ub)|0xc5), TN, TI((ub)|0xc7), \ + TN, TN, TN, TI((ub)|0xcb), TN, TN, TN, TI((ub)|0xcf), \ + TN, TN, TN, TI((ub)|0xd3), TN, TN, TN, TI((ub)|0xd7), \ + TN, TN, TN, TI((ub)|0xdb), TN, TN, TN, TI((ub)|0xdf), \ + TN, TI((ub)|0xe1), TN, TI((ub)|0xe3), TN, TI((ub)|0xe5), TN, TI((ub)|0xe7), \ + TN, TN, TN, TI((ub)|0xeb), TN, TN, TN, TI((ub)|0xef), \ + TN, TN, TN, TI((ub)|0xf3), TN, TN, TN, TI((ub)|0xf7), \ + TN, TN, TN, TI((ub)|0xfb), TN, TN, TN, TI((ub)|0xff) + +const PP gpuPolySpanDrivers[2048] = { + TIBLOCK(0<<8), TIBLOCK(1<<8), TIBLOCK(2<<8), TIBLOCK(3<<8), + TIBLOCK(4<<8), TIBLOCK(5<<8), TIBLOCK(6<<8), TIBLOCK(7<<8) +}; + +#undef TI +#undef TN +#undef TIBLOCK + +#endif /* __GPU_UNAI_GPU_INNER_H__ */ diff --git a/plugins/gpu_senquack/gpu_inner_blend.h b/plugins/gpu_senquack/gpu_inner_blend.h new file mode 100644 index 000000000..febc7ede4 --- /dev/null +++ b/plugins/gpu_senquack/gpu_inner_blend.h @@ -0,0 +1,188 @@ +/*************************************************************************** +* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef _OP_BLEND_H_ +#define _OP_BLEND_H_ + +// GPU Blending operations functions + +//////////////////////////////////////////////////////////////////////////////// +// Blend bgr555 color in 'uSrc' (foreground) with bgr555 color +// in 'uDst' (background), returning resulting color. +// +// INPUT: +// 'uSrc','uDst' input: -bbbbbgggggrrrrr +// ^ bit 16 +// OUTPUT: +// u16 output: 0bbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// Where '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +template +GPU_INLINE uint_fast16_t gpuBlendingGeneric(uint_fast16_t uSrc, uint_fast16_t uDst) +{ + // These use Blargg's bitwise modulo-clamping: + // http://blargg.8bitalley.com/info/rgb_mixing.html + // http://blargg.8bitalley.com/info/rgb_clamped_add.html + // http://blargg.8bitalley.com/info/rgb_clamped_sub.html + + uint_fast16_t mix; + + // 0.5 x Back + 0.5 x Forward + if (BLENDMODE==0) { +#ifdef GPU_UNAI_USE_ACCURATE_BLENDING + // Slower, but more accurate (doesn't lose LSB data) + uDst &= 0x7fff; + if (!SKIP_USRC_MSB_MASK) + uSrc &= 0x7fff; + mix = ((uSrc + uDst) - ((uSrc ^ uDst) & 0x0421)) >> 1; +#else + mix = ((uDst & 0x7bde) + (uSrc & 0x7bde)) >> 1; +#endif + } + + // 1.0 x Back + 1.0 x Forward + if (BLENDMODE==1) { + uDst &= 0x7fff; + if (!SKIP_USRC_MSB_MASK) + uSrc &= 0x7fff; + u32 sum = uSrc + uDst; + u32 low_bits = (uSrc ^ uDst) & 0x0421; + u32 carries = (sum - low_bits) & 0x8420; + u32 modulo = sum - carries; + u32 clamp = carries - (carries >> 5); + mix = modulo | clamp; + } + + // 1.0 x Back - 1.0 x Forward + if (BLENDMODE==2) { + uDst &= 0x7fff; + if (!SKIP_USRC_MSB_MASK) + uSrc &= 0x7fff; + u32 diff = uDst - uSrc + 0x8420; + u32 low_bits = (uDst ^ uSrc) & 0x8420; + u32 borrows = (diff - low_bits) & 0x8420; + u32 modulo = diff - borrows; + u32 clamp = borrows - (borrows >> 5); + mix = modulo & clamp; + } + + // 1.0 x Back + 0.25 x Forward + if (BLENDMODE==3) { + uDst &= 0x7fff; + uSrc = ((uSrc >> 2) & 0x1ce7); + u32 sum = uSrc + uDst; + u32 low_bits = (uSrc ^ uDst) & 0x0421; + u32 carries = (sum - low_bits) & 0x8420; + u32 modulo = sum - carries; + u32 clamp = carries - (carries >> 5); + mix = modulo | clamp; + } + + return mix; +} + + +//////////////////////////////////////////////////////////////////////////////// +// Convert bgr555 color in uSrc to padded u32 5.4:5.4:5.4 bgr fixed-pt +// color triplet suitable for use with HQ 24-bit quantization. +// +// INPUT: +// 'uDst' input: -bbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX +// ^ bit 31 +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u32 gpuGetRGB24(uint_fast16_t uSrc) +{ + return ((uSrc & 0x7C00)<<14) + | ((uSrc & 0x03E0)<< 9) + | ((uSrc & 0x001F)<< 4); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Blend padded u32 5.4:5.4:5.4 bgr fixed-pt color triplet in 'uSrc24' +// (foreground color) with bgr555 color in 'uDst' (background color), +// returning the resulting u32 5.4:5.4:5.4 color. +// +// INPUT: +// 'uSrc24' input: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX +// ^ bit 31 +// 'uDst' input: -bbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX +// ^ bit 31 +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +template +GPU_INLINE u32 gpuBlending24(u32 uSrc24, uint_fast16_t uDst) +{ + // These use techniques adapted from Blargg's techniques mentioned in + // in gpuBlending() comments above. Not as much bitwise trickery is + // necessary because of presence of 0 padding in uSrc24 format. + + u32 uDst24 = gpuGetRGB24(uDst); + u32 mix; + + // 0.5 x Back + 0.5 x Forward + if (BLENDMODE==0) { + const u32 uMsk = 0x1FE7F9FE; + // Only need to mask LSBs of uSrc24, uDst24's LSBs are 0 already + mix = (uDst24 + (uSrc24 & uMsk)) >> 1; + } + + // 1.0 x Back + 1.0 x Forward + if (BLENDMODE==1) { + u32 sum = uSrc24 + uDst24; + u32 carries = sum & 0x20080200; + u32 modulo = sum - carries; + u32 clamp = carries - (carries >> 9); + mix = modulo | clamp; + } + + // 1.0 x Back - 1.0 x Forward + if (BLENDMODE==2) { + // Insert ones in 0-padded borrow slot of color to be subtracted from + uDst24 |= 0x20080200; + u32 diff = uDst24 - uSrc24; + u32 borrows = diff & 0x20080200; + u32 clamp = borrows - (borrows >> 9); + mix = diff & clamp; + } + + // 1.0 x Back + 0.25 x Forward + if (BLENDMODE==3) { + uSrc24 = (uSrc24 & 0x1FC7F1FC) >> 2; + u32 sum = uSrc24 + uDst24; + u32 carries = sum & 0x20080200; + u32 modulo = sum - carries; + u32 clamp = carries - (carries >> 9); + mix = modulo | clamp; + } + + return mix; +} + +#endif //_OP_BLEND_H_ diff --git a/plugins/gpu_senquack/gpu_inner_blend_arm.h b/plugins/gpu_senquack/gpu_inner_blend_arm.h new file mode 100644 index 000000000..6413527c7 --- /dev/null +++ b/plugins/gpu_senquack/gpu_inner_blend_arm.h @@ -0,0 +1,103 @@ +#ifndef _OP_BLEND_ARM_H_ +#define _OP_BLEND_ARM_H_ + +//////////////////////////////////////////////////////////////////////////////// +// Blend bgr555 color in 'uSrc' (foreground) with bgr555 color +// in 'uDst' (background), returning resulting color. +// +// INPUT: +// 'uSrc','uDst' input: -bbbbbgggggrrrrr +// ^ bit 16 +// OUTPUT: +// u16 output: 0bbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// Where '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +template +GPU_INLINE uint_fast16_t gpuBlendingARM(uint_fast16_t uSrc, uint_fast16_t uDst) +{ + // These use Blargg's bitwise modulo-clamping: + // http://blargg.8bitalley.com/info/rgb_mixing.html + // http://blargg.8bitalley.com/info/rgb_clamped_add.html + // http://blargg.8bitalley.com/info/rgb_clamped_sub.html + + uint_fast16_t mix; + + // Clear preserved msb + asm ("bic %[uDst], %[uDst], #0x8000" : [uDst] "+r" (uDst)); + + if (BLENDMODE == 3) { + // Prepare uSrc for blending ((0.25 * uSrc) & (0.25 * mask)) + asm ("and %[uSrc], %[mask], %[uSrc], lsr #0x2" : [uSrc] "+r" (uSrc) : [mask] "r" (0x1ce7)); + } else if (!SKIP_USRC_MSB_MASK) { + asm ("bic %[uSrc], %[uSrc], #0x8000" : [uSrc] "+r" (uSrc)); + } + + + // 0.5 x Back + 0.5 x Forward + if (BLENDMODE==0) { + // mix = ((uSrc + uDst) - ((uSrc ^ uDst) & 0x0421)) >> 1; + asm ("eor %[mix], %[uSrc], %[uDst]\n\t" // uSrc ^ uDst + "and %[mix], %[mix], %[mask]\n\t" // ... & 0x0421 + "sub %[mix], %[uDst], %[mix]\n\t" // uDst - ... + "add %[mix], %[uSrc], %[mix]\n\t" // uSrc + ... + "mov %[mix], %[mix], lsr #0x1\n\t" // ... >> 1 + : [mix] "=&r" (mix) + : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x0421)); + } + + if (BLENDMODE == 1 || BLENDMODE == 3) { + // u32 sum = uSrc + uDst; + // u32 low_bits = (uSrc ^ uDst) & 0x0421; + // u32 carries = (sum - low_bits) & 0x8420; + // u32 modulo = sum - carries; + // u32 clamp = carries - (carries >> 5); + // mix = modulo | clamp; + + u32 sum; + + asm ("add %[sum], %[uSrc], %[uDst]\n\t" // sum = uSrc + uDst + "eor %[mix], %[uSrc], %[uDst]\n\t" // uSrc ^ uDst + "and %[mix], %[mix], %[mask]\n\t" // low_bits = (... & 0x0421) + "sub %[mix], %[sum], %[mix]\n\t" // sum - low_bits + "and %[mix], %[mix], %[mask], lsl #0x05\n\t" // carries = ... & 0x8420 + "sub %[sum], %[sum], %[mix] \n\t" // modulo = sum - carries + "sub %[mix], %[mix], %[mix], lsr #0x05\n\t" // clamp = carries - (carries >> 5) + "orr %[mix], %[sum], %[mix]" // mix = modulo | clamp + : [sum] "=&r" (sum), [mix] "=&r" (mix) + : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x0421)); + } + + // 1.0 x Back - 1.0 x Forward + if (BLENDMODE==2) { + u32 diff; + // u32 diff = uDst - uSrc + 0x8420; + // u32 low_bits = (uDst ^ uSrc) & 0x8420; + // u32 borrows = (diff - low_bits) & 0x8420; + // u32 modulo = diff - borrows; + // u32 clamp = borrows - (borrows >> 5); + // mix = modulo & clamp; + asm ("sub %[diff], %[uDst], %[uSrc]\n\t" // uDst - uSrc + "add %[diff], %[diff], %[mask]\n\t" // diff = ... + 0x8420 + "eor %[mix], %[uDst], %[uSrc]\n\t" // uDst ^ uSrc + "and %[mix], %[mix], %[mask]\n\t" // low_bits = ... & 0x8420 + "sub %[mix], %[diff], %[mix]\n\t" // diff - low_bits + "and %[mix], %[mix], %[mask]\n\t" // borrows = ... & 0x8420 + "sub %[diff], %[diff], %[mix]\n\t" // modulo = diff - borrows + "sub %[mix], %[mix], %[mix], lsr #0x05\n\t" // clamp = borrows - (borrows >> 5) + "and %[mix], %[diff], %[mix]" // mix = modulo & clamp + : [diff] "=&r" (diff), [mix] "=&r" (mix) + : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x8420)); + } + + // There's not a case where we can get into this function, + // SKIP_USRC_MSB_MASK is false, and the msb of uSrc is unset. + if (!SKIP_USRC_MSB_MASK) { + asm ("orr %[mix], %[mix], #0x8000" : [mix] "+r" (mix)); + } + + return mix; +} + +#endif //_OP_BLEND_ARM_H_ diff --git a/plugins/gpu_senquack/gpu_inner_blend_arm5.h b/plugins/gpu_senquack/gpu_inner_blend_arm5.h new file mode 100644 index 000000000..0e9b74f1c --- /dev/null +++ b/plugins/gpu_senquack/gpu_inner_blend_arm5.h @@ -0,0 +1,100 @@ +/*************************************************************************** +* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef _OP_BLEND_H_ +#define _OP_BLEND_H_ + +// GPU Blending operations functions + +#define gpuBlending00(uSrc,uDst) \ +{ \ + asm ("and %[src], %[src], %[msk] " : [src] "=r" (uSrc) : "0" (uSrc), [msk] "r" (uMsk) ); \ + asm ("and %[dst], %[dst], %[msk] " : [dst] "=r" (uDst) : "0" (uDst), [msk] "r" (uMsk) ); \ + asm ("add %[src], %[dst], %[src] " : [src] "=r" (uSrc) : [dst] "r" (uDst), "0" (uSrc) ); \ + asm ("mov %[src], %[src], lsr #1 " : [src] "=r" (uSrc) : "0" (uSrc) ); \ +} + +// 1.0 x Back + 1.0 x Forward +#define gpuBlending01(uSrc,uDst) \ +{ \ + u16 st,dt,out; \ + asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x7C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("add %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) ); \ + asm ("cmp %[out], #0x7C00 " : : [out] "r" (out) : "cc" ); \ + asm ("movhi %[out], #0x7C00 " : [out] "=r" (out) : "0" (out) ); \ + asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x03E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \ + asm ("cmp %[dt], #0x03E0 " : : [dt] "r" (dt) : "cc" ); \ + asm ("movhi %[dt], #0x03E0 " : [dt] "=r" (dt) : "0" (dt) ); \ + asm ("orr %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \ + asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x001F " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \ + asm ("cmp %[dt], #0x001F " : : [dt] "r" (dt) : "cc" ); \ + asm ("movhi %[dt], #0x001F " : [dt] "=r" (dt) : "0" (dt) ); \ + asm ("orr %[uSrc], %[out], %[dt] " : [uSrc] "=r" (uSrc) : [out] "r" (out), [dt] "r" (dt) ); \ +} + +// 1.0 x Back - 1.0 x Forward */ +#define gpuBlending02(uSrc,uDst) \ +{ \ + u16 st,dt,out; \ + asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x7C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("subs %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) : "cc" ); \ + asm ("movmi %[out], #0x0000 " : [out] "=r" (out) : "0" (out) ); \ + asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x03E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("subs %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) : "cc" ); \ + asm ("orrpl %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \ + asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x001F " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("subs %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) : "cc" ); \ + asm ("orrpl %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \ + asm ("mov %[uSrc], %[out]" : [uSrc] "=r" (uSrc) : [out] "r" (out) ); \ +} + +// 1.0 x Back + 0.25 x Forward */ +#define gpuBlending03(uSrc,uDst) \ +{ \ + u16 st,dt,out; \ + asm ("mov %[src], %[src], lsr #2 " : [src] "=r" (uSrc) : "0" (uSrc) ); \ + asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x1C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("add %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) ); \ + asm ("cmp %[out], #0x7C00 " : : [out] "r" (out) : "cc" ); \ + asm ("movhi %[out], #0x7C00 " : [out] "=r" (out) : "0" (out) ); \ + asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x00E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \ + asm ("cmp %[dt], #0x03E0 " : : [dt] "r" (dt) : "cc" ); \ + asm ("movhi %[dt], #0x03E0 " : [dt] "=r" (dt) : "0" (dt) ); \ + asm ("orr %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \ + asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x0007 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \ + asm ("cmp %[dt], #0x001F " : : [dt] "r" (dt) : "cc" ); \ + asm ("movhi %[dt], #0x001F " : [dt] "=r" (dt) : "0" (dt) ); \ + asm ("orr %[uSrc], %[out], %[dt] " : [uSrc] "=r" (uSrc) : [out] "r" (out), [dt] "r" (dt) ); \ +} + +#endif //_OP_BLEND_H_ diff --git a/plugins/gpu_senquack/gpu_inner_blend_arm7.h b/plugins/gpu_senquack/gpu_inner_blend_arm7.h new file mode 100644 index 000000000..083e62d8d --- /dev/null +++ b/plugins/gpu_senquack/gpu_inner_blend_arm7.h @@ -0,0 +1,107 @@ +/*************************************************************************** +* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef _OP_BLEND_H_ +#define _OP_BLEND_H_ + +// GPU Blending operations functions + +#define gpuBlending00(uSrc,uDst) \ +{ \ + asm ("and %[src], %[src], %[msk]\n" \ + "and %[dst], %[dst], %[msk]\n" \ + "add %[src], %[dst], %[src]\n" \ + "mov %[src], %[src], lsr #1\n" \ + : [src] "=&r" (uSrc), [dst] "=&r" (uDst) : "0" (uSrc), "1" (uDst), [msk] "r" (uMsk)); \ +} + +// 1.0 x Back + 1.0 x Forward +#define gpuBlending01(uSrc,uDst) \ +{ \ + u32 st,dt,out; \ + asm ("and %[dt], %[dst], #0x7C00\n" \ + "and %[st], %[src], #0x7C00\n" \ + "add %[out], %[dt], %[st] \n" \ + "cmp %[out], #0x7C00 \n" \ + "movhi %[out], #0x7C00 \n" \ + "and %[dt], %[dst], #0x03E0\n" \ + "and %[st], %[src], #0x03E0\n" \ + "add %[dt], %[dt], %[st] \n" \ + "cmp %[dt], #0x03E0 \n" \ + "movhi %[dt], #0x03E0 \n" \ + "orr %[out], %[out], %[dt] \n" \ + "and %[dt], %[dst], #0x001F\n" \ + "and %[st], %[src], #0x001F\n" \ + "add %[dt], %[dt], %[st] \n" \ + "cmp %[dt], #0x001F \n" \ + "movhi %[dt], #0x001F \n" \ + "orr %[src], %[out], %[dt] \n" \ + : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \ + : [dst] "r" (uDst), "0" (uSrc) : "cc"); \ +} + +// 1.0 x Back - 1.0 x Forward */ +#define gpuBlending02(uSrc,uDst) \ +{ \ + u32 st,dt,out; \ + asm ("and %[dt], %[dst], #0x7C00\n" \ + "and %[st], %[src], #0x7C00\n" \ + "subs %[out], %[dt], %[st] \n" \ + "movmi %[out], #0x0000 \n" \ + "and %[dt], %[dst], #0x03E0\n" \ + "and %[st], %[src], #0x03E0\n" \ + "subs %[dt], %[dt], %[st] \n" \ + "orrpl %[out], %[out], %[dt] \n" \ + "and %[dt], %[dst], #0x001F\n" \ + "and %[st], %[src], #0x001F\n" \ + "subs %[dt], %[dt], %[st] \n" \ + "orrpl %[out], %[out], %[dt] \n" \ + "mov %[src], %[out] \n" \ + : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \ + : [dst] "r" (uDst), "0" (uSrc) : "cc"); \ +} + +// 1.0 x Back + 0.25 x Forward */ +#define gpuBlending03(uSrc,uDst) \ +{ \ + u32 st,dt,out; \ + asm ("mov %[src], %[src], lsr #2 \n" \ + "and %[dt], %[dst], #0x7C00\n" \ + "and %[st], %[src], #0x1C00\n" \ + "add %[out], %[dt], %[st] \n" \ + "cmp %[out], #0x7C00 \n" \ + "movhi %[out], #0x7C00 \n" \ + "and %[dt], %[dst], #0x03E0\n" \ + "and %[st], %[src], #0x00E0\n" \ + "add %[dt], %[dt], %[st] \n" \ + "cmp %[dt], #0x03E0 \n" \ + "movhi %[dt], #0x03E0 \n" \ + "orr %[out], %[out], %[dt] \n" \ + "and %[dt], %[dst], #0x001F\n" \ + "and %[st], %[src], #0x0007\n" \ + "add %[dt], %[dt], %[st] \n" \ + "cmp %[dt], #0x001F \n" \ + "movhi %[dt], #0x001F \n" \ + "orr %[src], %[out], %[dt] \n" \ + : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \ + : [dst] "r" (uDst), "0" (uSrc) : "cc"); \ +} + +#endif //_OP_BLEND_H_ diff --git a/plugins/gpu_senquack/gpu_inner_light.h b/plugins/gpu_senquack/gpu_inner_light.h new file mode 100644 index 000000000..b5d89338a --- /dev/null +++ b/plugins/gpu_senquack/gpu_inner_light.h @@ -0,0 +1,271 @@ +/*************************************************************************** +* Copyright (C) 2016 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef _OP_LIGHT_H_ +#define _OP_LIGHT_H_ + +// GPU color operations for lighting calculations + +static void SetupLightLUT() +{ + // 1024-entry lookup table that modulates 5-bit texture + 5-bit light value. + // A light value of 15 does not modify the incoming texture color. + // LightLUT[32*32] array is initialized to following values: + // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + // 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, + // 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + // 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, + // 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,10,10,10,11,11, + // 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9,10,10,10,11,11,12,12,13,13, + // 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15, + // 0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 6, 7, 7, 8, 9, 9,10,10,11,11,12,12,13,14,14,15,15,16,16,17, + // 0, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 6, 7, 8, 8, 9,10,10,11,11,12,13,13,14,15,15,16,16,17,18,18,19, + // 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,19,20,21, + // 0, 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9,10,11,12,12,13,14,15,15,16,17,18,18,19,20,21,21,22,23, + // 0, 0, 1, 2, 3, 4, 4, 5, 6, 7, 8, 8, 9,10,11,12,13,13,14,15,16,17,17,18,19,20,21,21,22,23,24,25, + // 0, 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,15,16,17,18,19,20,21,21,22,23,24,25,26,27, + // 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29, + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,31, + // 0, 1, 2, 3, 4, 5, 6, 7, 9,10,11,12,13,14,15,16,18,19,20,21,22,23,24,25,27,28,29,30,31,31,31,31, + // 0, 1, 2, 3, 4, 5, 7, 8, 9,10,11,13,14,15,16,17,19,20,21,22,23,24,26,27,28,29,30,31,31,31,31,31, + // 0, 1, 2, 3, 5, 6, 7, 8,10,11,12,13,15,16,17,18,20,21,22,23,25,26,27,28,30,31,31,31,31,31,31,31, + // 0, 1, 2, 3, 5, 6, 7, 9,10,11,13,14,15,17,18,19,21,22,23,24,26,27,28,30,31,31,31,31,31,31,31,31, + // 0, 1, 2, 4, 5, 6, 8, 9,11,12,13,15,16,17,19,20,22,23,24,26,27,28,30,31,31,31,31,31,31,31,31,31, + // 0, 1, 2, 4, 5, 7, 8,10,11,12,14,15,17,18,20,21,23,24,25,27,28,30,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 4, 6, 7, 9,10,12,13,15,16,18,19,21,22,24,25,27,28,30,31,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 4, 6, 7, 9,10,12,14,15,17,18,20,21,23,25,26,28,29,31,31,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 4, 6, 8, 9,11,13,14,16,17,19,21,22,24,26,27,29,30,31,31,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 5, 6, 8,10,11,13,15,16,18,20,21,23,25,27,28,30,31,31,31,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 5, 7, 8,10,12,14,15,17,19,21,22,24,26,28,29,31,31,31,31,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 5, 7, 9,10,12,14,16,18,19,21,23,25,27,29,30,31,31,31,31,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 5, 7, 9,11,13,15,16,18,20,22,24,26,28,30,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 5, 7, 9,11,13,15,17,19,21,23,25,27,29,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31 + + for (int j=0; j < 32; ++j) { + for (int i=0; i < 32; ++i) { + int val = i * j / 16; + if (val > 31) val = 31; + gpu_senquack.LightLUT[(j*32) + i] = val; + } + } +} + + +//////////////////////////////////////////////////////////////////////////////// +// Create packed Gouraud fixed-pt 8.3:8.3:8.2 rgb triplet +// +// INPUT: +// 'r','g','b' are 8.10 fixed-pt color components (r shown here) +// 'r' input: --------------rrrrrrrrXXXXXXXXXX +// ^ bit 31 +// RETURNS: +// u32 output: rrrrrrrrXXXggggggggXXXbbbbbbbbXX +// ^ bit 31 +// Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '-' don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u32 gpuPackGouraudCol(u32 r, u32 g, u32 b) +{ + return ((u32)(b>> 8)&(0x03ff )) + | ((u32)(g<< 3)&(0x07ff<<10)) + | ((u32)(r<<14)&(0x07ff<<21)); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Create packed increment for Gouraud fixed-pt 8.3:8.3:8.2 rgb triplet +// +// INPUT: +// Sign-extended 8.10 fixed-pt r,g,b color increment values (only dr is shown) +// 'dr' input: ssssssssssssssrrrrrrrrXXXXXXXXXX +// ^ bit 31 +// RETURNS: +// u32 output: rrrrrrrrXXXggggggggXXXbbbbbbbbXX +// ^ bit 31 +// Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and 's' sign bits +// +// NOTE: The correctness of this code/method has not been fully verified, +// having been merely factored out from original code in +// poly-drawing functions. Feel free to check/improve it -senquack +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u32 gpuPackGouraudColInc(s32 dr, s32 dg, s32 db) +{ + u32 dr_tmp = (u32)(dr << 14)&(0xffffffff<<21); if (dr < 0) dr_tmp += 1<<21; + u32 dg_tmp = (u32)(dg << 3)&(0xffffffff<<10); if (dg < 0) dg_tmp += 1<<10; + u32 db_tmp = (u32)(db >> 8)&(0xffffffff ); if (db < 0) db_tmp += 1<< 0; + return db_tmp + dg_tmp + dr_tmp; +} + + +//////////////////////////////////////////////////////////////////////////////// +// Extract bgr555 color from Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet +// +// INPUT: +// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX +// ^ bit 31 +// RETURNS: +// u16 output: 0bbbbbgggggrrrrr +// ^ bit 16 +// Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '0' zero +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE uint_fast16_t gpuLightingRGBGeneric(u32 gCol) +{ + return ((gCol<< 5)&0x7C00) | + ((gCol>>11)&0x03E0) | + (gCol>>27); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Convert packed Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet in 'gCol' +// to padded u32 5.4:5.4:5.4 bgr fixed-pt triplet, suitable for use +// with HQ 24-bit lighting/quantization. +// +// INPUT: +// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX +// ^ bit 31 +// RETURNS: +// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX +// ^ bit 31 +// Where 'X' are fixed-pt bits, '0' zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u32 gpuLightingRGB24(u32 gCol) +{ + return ((gCol<<19) & (0x1FF<<20)) | + ((gCol>> 2) & (0x1FF<<10)) | + (gCol>>23); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Apply fast (low-precision) 5-bit lighting to bgr555 texture color: +// +// INPUT: +// 'r5','g5','b5' are unsigned 5-bit color values, value of 15 +// is midpoint that doesn't modify that component of texture +// 'uSrc' input: -bbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// u16 output: 0bbbbbgggggrrrrr +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE uint_fast16_t gpuLightingTXTGeneric(uint_fast16_t uSrc, u8 r5, u8 g5, u8 b5) +{ + return (gpu_senquack.LightLUT[((uSrc&0x7C00)>>5) | b5] << 10) | + (gpu_senquack.LightLUT[ (uSrc&0x03E0) | g5] << 5) | + (gpu_senquack.LightLUT[((uSrc&0x001F)<<5) | r5] ); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Apply fast (low-precision) 5-bit Gouraud lighting to bgr555 texture color: +// +// INPUT: +// 'gCol' is a packed Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet, value of +// 15.0 is midpoint that does not modify color of texture +// gCol input : rrrrrXXXXXXgggggXXXXXXbbbbbXXXXX +// ^ bit 31 +// 'uSrc' input: -bbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// u16 output: 0bbbbbgggggrrrrr +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE uint_fast16_t gpuLightingTXTGouraudGeneric(uint_fast16_t uSrc, u32 gCol) +{ + return (gpu_senquack.LightLUT[((uSrc&0x7C00)>>5) | ((gCol>> 5)&0x1F)]<<10) | + (gpu_senquack.LightLUT[ (uSrc&0x03E0) | ((gCol>>16)&0x1F)]<< 5) | + (gpu_senquack.LightLUT[((uSrc&0x001F)<<5) | (gCol>>27) ] ); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Apply high-precision 8-bit lighting to bgr555 texture color, +// returning a padded u32 5.4:5.4:5.4 bgr fixed-pt triplet +// suitable for use with HQ 24-bit lighting/quantization. +// +// INPUT: +// 'r8','g8','b8' are unsigned 8-bit color component values, value of +// 127 is midpoint that doesn't modify that component of texture +// +// uSrc input: -bbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX +// ^ bit 31 +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u32 gpuLightingTXT24(uint_fast16_t uSrc, u8 r8, u8 g8, u8 b8) +{ + uint_fast16_t r1 = uSrc&0x001F; + uint_fast16_t g1 = uSrc&0x03E0; + uint_fast16_t b1 = uSrc&0x7C00; + + uint_fast16_t r2 = r8; + uint_fast16_t g2 = g8; + uint_fast16_t b2 = b8; + + u32 r3 = r1 * r2; if (r3 & 0xFFFFF000) r3 = ~0xFFFFF000; + u32 g3 = g1 * g2; if (g3 & 0xFFFE0000) g3 = ~0xFFFE0000; + u32 b3 = b1 * b2; if (b3 & 0xFFC00000) b3 = ~0xFFC00000; + + return ((r3>> 3) ) | + ((g3>> 8)<<10) | + ((b3>>13)<<20); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Apply high-precision 8-bit lighting to bgr555 texture color in 'uSrc', +// returning a padded u32 5.4:5.4:5.4 bgr fixed-pt triplet +// suitable for use with HQ 24-bit lighting/quantization. +// +// INPUT: +// 'uSrc' input: -bbbbbgggggrrrrr +// ^ bit 16 +// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX +// ^ bit 31 +// RETURNS: +// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX +// ^ bit 31 +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u32 gpuLightingTXT24Gouraud(uint_fast16_t uSrc, u32 gCol) +{ + uint_fast16_t r1 = uSrc&0x001F; + uint_fast16_t g1 = uSrc&0x03E0; + uint_fast16_t b1 = uSrc&0x7C00; + + uint_fast16_t r2 = (gCol>>24) & 0xFF; + uint_fast16_t g2 = (gCol>>13) & 0xFF; + uint_fast16_t b2 = (gCol>> 2) & 0xFF; + + u32 r3 = r1 * r2; if (r3 & 0xFFFFF000) r3 = ~0xFFFFF000; + u32 g3 = g1 * g2; if (g3 & 0xFFFE0000) g3 = ~0xFFFE0000; + u32 b3 = b1 * b2; if (b3 & 0xFFC00000) b3 = ~0xFFC00000; + + return ((r3>> 3) ) | + ((g3>> 8)<<10) | + ((b3>>13)<<20); +} + +#endif //_OP_LIGHT_H_ diff --git a/plugins/gpu_senquack/gpu_inner_light_arm.h b/plugins/gpu_senquack/gpu_inner_light_arm.h new file mode 100644 index 000000000..550f6b1e4 --- /dev/null +++ b/plugins/gpu_senquack/gpu_inner_light_arm.h @@ -0,0 +1,112 @@ +#ifndef _OP_LIGHT_ARM_H_ +#define _OP_LIGHT_ARM_H_ + +//////////////////////////////////////////////////////////////////////////////// +// Extract bgr555 color from Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet +// +// INPUT: +// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX +// ^ bit 31 +// RETURNS: +// u16 output: 0bbbbbgggggrrrrr +// ^ bit 16 +// Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '0' zero +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE uint_fast16_t gpuLightingRGBARM(u32 gCol) +{ + uint_fast16_t out = 0x03E0; // don't need the mask after starting to write output + u32 tmp; + + asm ("and %[tmp], %[gCol], %[out]\n\t" // tmp holds 0x000000bbbbb00000 + "and %[out], %[out], %[gCol], lsr #0x0B\n\t" // out holds 0x000000ggggg00000 + "orr %[tmp], %[out], %[tmp], lsl #0x05\n\t" // tmp holds 0x0bbbbbggggg00000 + "orr %[out], %[tmp], %[gCol], lsr #0x1B\n\t" // out holds 0x0bbbbbgggggrrrrr + : [out] "+&r" (out), [tmp] "=&r" (tmp) + : [gCol] "r" (gCol) + ); + + return out; +} + +//////////////////////////////////////////////////////////////////////////////// +// Apply fast (low-precision) 5-bit lighting to bgr555 texture color: +// +// INPUT: +// 'r5','g5','b5' are unsigned 5-bit color values, value of 15 +// is midpoint that doesn't modify that component of texture +// 'uSrc' input: mbbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// u16 output: mbbbbbgggggrrrrr +// Where 'X' are fixed-pt bits. +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE uint_fast16_t gpuLightingTXTARM(uint_fast16_t uSrc, u8 r5, u8 g5, u8 b5) +{ + uint_fast16_t out = 0x03E0; + u32 db, dg; + + // Using `g` for src, `G` for dest + asm ("and %[dg], %[out], %[src] \n\t" // dg holds 0x000000ggggg00000 + "orr %[dg], %[dg], %[g5] \n\t" // dg holds 0x000000gggggGGGGG + "and %[db], %[out], %[src], lsr #0x05 \n\t" // db holds 0x000000bbbbb00000 + "ldrb %[dg], [%[lut], %[dg]] \n\t" // dg holds result 0x00000000000ggggg + "and %[out], %[out], %[src], lsl #0x05 \n\t" // out holds 0x000000rrrrr00000 + "orr %[out], %[out], %[r5] \n\t" // out holds 0x000000rrrrrRRRRR + "orr %[db], %[db], %[b5] \n\t" // db holds 0x000000bbbbbBBBBB + "ldrb %[out], [%[lut], %[out]] \n\t" // out holds result 0x00000000000rrrrr + "ldrb %[db], [%[lut], %[db]] \n\t" // db holds result 0x00000000000bbbbb + "tst %[src], #0x8000\n\t" // check whether msb was set on uSrc + "orr %[out], %[out], %[dg], lsl #0x05 \n\t" // out holds 0x000000gggggrrrrr + "orrne %[out], %[out], #0x8000\n\t" // add msb to out if set on uSrc + "orr %[out], %[out], %[db], lsl #0x0A \n\t" // out holds 0xmbbbbbgggggrrrrr + : [out] "=&r" (out), [db] "=&r" (db), [dg] "=&r" (dg) + : [r5] "r" (r5), [g5] "r" (g5), [b5] "r" (b5), + [lut] "r" (gpu_senquack.LightLUT), [src] "r" (uSrc), "0" (out) + : "cc"); + return out; +} + +//////////////////////////////////////////////////////////////////////////////// +// Apply fast (low-precision) 5-bit Gouraud lighting to bgr555 texture color: +// +// INPUT: +// 'gCol' is a packed Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet, value of +// 15.0 is midpoint that does not modify color of texture +// gCol input : rrrrrXXXXXXgggggXXXXXXbbbbbXXXXX +// ^ bit 31 +// 'uSrc' input: mbbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// u16 output: mbbbbbgggggrrrrr +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE uint_fast16_t gpuLightingTXTGouraudARM(uint_fast16_t uSrc, u32 gCol) +{ + uint_fast16_t out = 0x03E0; // don't need the mask after starting to write output + u32 db,dg,gtmp; + + // Using `g` for src, `G` for dest + asm ("and %[dg], %[out], %[src] \n\t" // dg holds 0x000000ggggg00000 + "and %[gtmp],%[out], %[gCol], lsr #0x0B \n\t" // gtmp holds 0x000000GGGGG00000 + "and %[db], %[out], %[src], lsr #0x05 \n\t" // db holds 0x000000bbbbb00000 + "orr %[dg], %[dg], %[gtmp], lsr #0x05 \n\t" // dg holds 0x000000gggggGGGGG + "and %[gtmp],%[out], %[gCol] \n\t" // gtmp holds 0x000000BBBBB00000 + "ldrb %[dg], [%[lut], %[dg]] \n\t" // dg holds result 0x00000000000ggggg + "and %[out], %[out], %[src], lsl #0x05 \n\t" // out holds 0x000000rrrrr00000 + "orr %[out], %[out], %[gCol], lsr #0x1B \n\t" // out holds 0x000000rrrrrRRRRR + "orr %[db], %[db], %[gtmp], lsr #0x05 \n\t" // db holds 0x000000bbbbbBBBBB + "ldrb %[out], [%[lut], %[out]] \n\t" // out holds result 0x00000000000rrrrr + "ldrb %[db], [%[lut], %[db]] \n\t" // db holds result 0x00000000000bbbbb + "tst %[src], #0x8000\n\t" // check whether msb was set on uSrc + "orr %[out], %[out], %[dg], lsl #0x05 \n\t" // out holds 0x000000gggggrrrrr + "orrne %[out], %[out], #0x8000\n\t" // add msb to out if set on uSrc + "orr %[out], %[out], %[db], lsl #0x0A \n\t" // out holds 0xmbbbbbgggggrrrrr + : [out] "=&r" (out), [db] "=&r" (db), [dg] "=&r" (dg), + [gtmp] "=&r" (gtmp) \ + : [gCol] "r" (gCol), [lut] "r" (gpu_senquack.LightLUT), "0" (out), [src] "r" (uSrc) + : "cc"); + + return out; +} + +#endif //_OP_LIGHT_ARM_H_ diff --git a/plugins/gpu_senquack/gpu_inner_quantization.h b/plugins/gpu_senquack/gpu_inner_quantization.h new file mode 100644 index 000000000..6432d0313 --- /dev/null +++ b/plugins/gpu_senquack/gpu_inner_quantization.h @@ -0,0 +1,108 @@ +/*************************************************************************** +* Copyright (C) 2016 PCSX4ALL Team * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef _OP_DITHER_H_ +#define _OP_DITHER_H_ + +static void SetupDitheringConstants() +{ + // Initialize Dithering Constants + // The screen is divided into 8x8 chunks and sub-unitary noise is applied + // using the following matrix. This ensures that data lost in color + // quantization will be added back to the image 'by chance' in predictable + // patterns that are naturally 'smoothed' by your sight when viewed from a + // certain distance. + // + // http://caca.zoy.org/study/index.html + // + // Shading colors are encoded in 4.5, and then are quantitized to 5.0, + // DitherMatrix constants reflect that. + + static const u8 DitherMatrix[] = { + 0, 32, 8, 40, 2, 34, 10, 42, + 48, 16, 56, 24, 50, 18, 58, 26, + 12, 44, 4, 36, 14, 46, 6, 38, + 60, 28, 52, 20, 62, 30, 54, 22, + 3, 35, 11, 43, 1, 33, 9, 41, + 51, 19, 59, 27, 49, 17, 57, 25, + 15, 47, 7, 39, 13, 45, 5, 37, + 63, 31, 55, 23, 61, 29, 53, 21 + }; + + int i, j; + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + u16 offset = (i << 3) | j; + + u32 component = ((DitherMatrix[offset] + 1) << 4) / 65; //[5.5] -> [5] + + // XXX - senquack - hack Dec 2016 + // Until JohnnyF gets the time to work further on dithering, + // force lower bit of component to 0. This fixes grid pattern + // affecting quality of dithered image, as well as loss of + // detail in dark areas. With lower bit unset like this, existing + // 27-bit accuracy of dithering math is unneeded, could be 24-bit. + // Is 8x8 matrix overkill as a result, can we use 4x4? + component &= ~1; + + gpu_senquack.DitherMatrix[offset] = (component) + | (component << 10) + | (component << 20); + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Convert padded u32 5.4:5.4:5.4 bgr fixed-pt triplet to final bgr555 color, +// applying dithering if specified by template parameter. +// +// INPUT: +// 'uSrc24' input: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX +// ^ bit 31 +// 'pDst' is a pointer to destination framebuffer pixel, used +// to determine which DitherMatrix[] entry to apply. +// RETURNS: +// u16 output: 0bbbbbgggggrrrrr +// ^ bit 16 +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +template +GPU_INLINE u16 gpuColorQuantization24(u32 uSrc24, const u16 *pDst) +{ + if (DITHER) + { + u16 fbpos = (u32)(pDst - gpu_senquack.vram); + u16 offset = ((fbpos & (0x7 << 10)) >> 7) | (fbpos & 0x7); + + //clean overflow flags and add + uSrc24 = (uSrc24 & 0x1FF7FDFF) + gpu_senquack.DitherMatrix[offset]; + + if (uSrc24 & (1<< 9)) uSrc24 |= (0x1FF ); + if (uSrc24 & (1<<19)) uSrc24 |= (0x1FF<<10); + if (uSrc24 & (1<<29)) uSrc24 |= (0x1FF<<20); + } + + return ((uSrc24>> 4) & (0x1F )) + | ((uSrc24>> 9) & (0x1F<<5 )) + | ((uSrc24>>14) & (0x1F<<10)); +} + +#endif //_OP_DITHER_H_ diff --git a/plugins/gpu_senquack/gpu_raster_image.h b/plugins/gpu_senquack/gpu_raster_image.h new file mode 100644 index 000000000..8e8064c46 --- /dev/null +++ b/plugins/gpu_senquack/gpu_raster_image.h @@ -0,0 +1,220 @@ +/*************************************************************************** + * Copyright (C) 2010 PCSX4ALL Team * + * Copyright (C) 2010 Unai * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * + ***************************************************************************/ + +#ifndef __GPU_UNAI_GPU_RASTER_IMAGE_H__ +#define __GPU_UNAI_GPU_RASTER_IMAGE_H__ + +/////////////////////////////////////////////////////////////////////////////// +#ifndef USE_GPULIB +void gpuLoadImage(PtrUnion packet) +{ + u16 x0, y0, w0, h0; + x0 = packet.U2[2] & 1023; + y0 = packet.U2[3] & 511; + w0 = packet.U2[4]; + h0 = packet.U2[5]; + + if ((y0 + h0) > FRAME_HEIGHT) + { + h0 = FRAME_HEIGHT - y0; + } + + gpu_senquack.dma.FrameToWrite = ((w0)&&(h0)); + + gpu_senquack.dma.px = 0; + gpu_senquack.dma.py = 0; + gpu_senquack.dma.x_end = w0; + gpu_senquack.dma.y_end = h0; + gpu_senquack.dma.pvram = &((u16*)gpu_senquack.vram)[x0+(y0*1024)]; + + gpu_senquack.GPU_GP1 |= 0x08000000; +} +#endif // !USE_GPULIB + +/////////////////////////////////////////////////////////////////////////////// +#ifndef USE_GPULIB +void gpuStoreImage(PtrUnion packet) +{ + u16 x0, y0, w0, h0; + x0 = packet.U2[2] & 1023; + y0 = packet.U2[3] & 511; + w0 = packet.U2[4]; + h0 = packet.U2[5]; + + if ((y0 + h0) > FRAME_HEIGHT) + { + h0 = FRAME_HEIGHT - y0; + } + gpu_senquack.dma.FrameToRead = ((w0)&&(h0)); + + gpu_senquack.dma.px = 0; + gpu_senquack.dma.py = 0; + gpu_senquack.dma.x_end = w0; + gpu_senquack.dma.y_end = h0; + gpu_senquack.dma.pvram = &((u16*)gpu_senquack.vram)[x0+(y0*1024)]; + + gpu_senquack.GPU_GP1 |= 0x08000000; +} +#endif // !USE_GPULIB + +void gpuMoveImage(PtrUnion packet) +{ + u32 x0, y0, x1, y1; + s32 w0, h0; + x0 = packet.U2[2] & 1023; + y0 = packet.U2[3] & 511; + x1 = packet.U2[4] & 1023; + y1 = packet.U2[5] & 511; + w0 = packet.U2[6]; + h0 = packet.U2[7]; + + if( (x0==x1) && (y0==y1) ) return; + if ((w0<=0) || (h0<=0)) return; + + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"gpuMoveImage(x0=%u,y0=%u,x1=%u,y1=%u,w0=%d,h0=%d)\n",x0,y0,x1,y1,w0,h0); + #endif + + if (((y0+h0)>512)||((x0+w0)>1024)||((y1+h0)>512)||((x1+w0)>1024)) + { + u16 *psxVuw=gpu_senquack.vram; + s32 i,j; + for(j=0;j>1); + lpDst += ((FRAME_OFFSET(x1, y1))>>1); + if (w0&1) + { + x1 = (FRAME_WIDTH - w0 +1)>>1; + w0>>=1; + if (!w0) { + do { + *((u16*)lpDst) = *((u16*)lpSrc); + lpDst += x1; + lpSrc += x1; + } while (--h0); + } else + do { + x0=w0; + do { *lpDst++ = *lpSrc++; } while (--x0); + *((u16*)lpDst) = *((u16*)lpSrc); + lpDst += x1; + lpSrc += x1; + } while (--h0); + } + else + { + x1 = (FRAME_WIDTH - w0)>>1; + w0>>=1; + do { + x0=w0; + do { *lpDst++ = *lpSrc++; } while (--x0); + lpDst += x1; + lpSrc += x1; + } while (--h0); + } + } +} + +void gpuClearImage(PtrUnion packet) +{ + s32 x0, y0, w0, h0; + x0 = packet.S2[2]; + y0 = packet.S2[3]; + w0 = packet.S2[4] & 0x3ff; + h0 = packet.S2[5] & 0x3ff; + + w0 += x0; + if (x0 < 0) x0 = 0; + if (w0 > FRAME_WIDTH) w0 = FRAME_WIDTH; + w0 -= x0; + if (w0 <= 0) return; + h0 += y0; + if (y0 < 0) y0 = 0; + if (h0 > FRAME_HEIGHT) h0 = FRAME_HEIGHT; + h0 -= y0; + if (h0 <= 0) return; + + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"gpuClearImage(x0=%d,y0=%d,w0=%d,h0=%d)\n",x0,y0,w0,h0); + #endif + + if (x0&1) + { + u16* pixel = (u16*)gpu_senquack.vram + FRAME_OFFSET(x0, y0); + u16 rgb = GPU_RGB16(packet.U4[0]); + y0 = FRAME_WIDTH - w0; + do { + x0=w0; + do { *pixel++ = rgb; } while (--x0); + pixel += y0; + } while (--h0); + } + else + { + u32* pixel = (u32*)gpu_senquack.vram + ((FRAME_OFFSET(x0, y0))>>1); + u32 rgb = GPU_RGB16(packet.U4[0]); + rgb |= (rgb<<16); + if (w0&1) + { + y0 = (FRAME_WIDTH - w0 +1)>>1; + w0>>=1; + do { + x0=w0; + do { *pixel++ = rgb; } while (--x0); + *((u16*)pixel) = (u16)rgb; + pixel += y0; + } while (--h0); + } + else + { + y0 = (FRAME_WIDTH - w0)>>1; + w0>>=1; + do { + x0=w0; + do { *pixel++ = rgb; } while (--x0); + pixel += y0; + } while (--h0); + } + } +} + +#endif /* __GPU_UNAI_GPU_RASTER_IMAGE_H__ */ diff --git a/plugins/gpu_senquack/gpu_raster_line.h b/plugins/gpu_senquack/gpu_raster_line.h new file mode 100644 index 000000000..4dd99a6dd --- /dev/null +++ b/plugins/gpu_senquack/gpu_raster_line.h @@ -0,0 +1,720 @@ +/*************************************************************************** +* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* Copyright (C) 2016 Senquack (dansilsby gmail com) * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef __GPU_UNAI_GPU_RASTER_LINE_H__ +#define __GPU_UNAI_GPU_RASTER_LINE_H__ + +/////////////////////////////////////////////////////////////////////////////// +// GPU internal line drawing functions +// +// Rewritten October 2016 by senquack: +// Instead of one pixel at a time, lines are now drawn in runs of pixels, +// whether vertical, horizontal, or diagonal. A new inner driver +// 'gpuPixelSpanFn' is used, as well as an enhanced Bresenham run-slice +// algorithm. For more information, see the following: +// +// Michael Abrash - Graphics Programming Black Book +// Chapters 35 - 36 (does not implement diagonal runs) +// http://www.drdobbs.com/parallel/graphics-programming-black-book/184404919 +// http://www.jagregory.com/abrash-black-book/ +// +// Article by Andrew Delong (does not implement diagonal runs) +// http://timetraces.ca/nw/drawline.htm +// +// 'Run-Based Multi-Point Line Drawing' by Eun Jae Lee & Larry F. Hodges +// https://smartech.gatech.edu/bitstream/handle/1853/3632/93-22.pdf +// Provided the idea of doing a half-octant transform allowing lines with +// slopes between 0.5 and 2.0 (diagonal runs of pixels) to be handled +// identically to the traditional horizontal/vertical run-slice method. + +// Use 16.16 fixed point precision for line math. +// NOTE: Gouraud colors used by gpuPixelSpanFn can use a different precision. +#define GPU_LINE_FIXED_BITS 16 + +// If defined, Gouraud lines will use fixed-point multiply-by-inverse to +// do most divisions. With enough accuracy, this should be OK. +#define USE_LINES_ALL_FIXED_PT_MATH + +////////////////////// +// Flat-shaded line // +////////////////////// +void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver) +{ + int x0, y0, x1, y1; + int dx, dy; + + // All three of these variables should be signed (so multiplication works) + ptrdiff_t sx; // Sign of x delta, positive when x0 < x1 + const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel + const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line + + // Clip region: xmax/ymax seem to normally be one *past* the rightmost/ + // bottommost pixels of the draw area. Since we render every pixel between + // and including both line endpoints, subtract one from xmax/ymax. + const int xmin = gpu_senquack.DrawingArea[0]; + const int ymin = gpu_senquack.DrawingArea[1]; + const int xmax = gpu_senquack.DrawingArea[2] - 1; + const int ymax = gpu_senquack.DrawingArea[3] - 1; + + x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_senquack.DrawingOffset[0]; + y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_senquack.DrawingOffset[1]; + x1 = GPU_EXPANDSIGN(packet.S2[4]) + gpu_senquack.DrawingOffset[0]; + y1 = GPU_EXPANDSIGN(packet.S2[5]) + gpu_senquack.DrawingOffset[1]; + + // Always draw top to bottom, so ensure y0 <= y1 + if (y0 > y1) { + SwapValues(y0, y1); + SwapValues(x0, x1); + } + + // Is line totally outside Y clipping range? + if (y0 > ymax || y1 < ymin) return; + + dx = x1 - x0; + dy = y1 - y0; + + // X-axis range check : max distance between any two X coords is 1023 + // (PSX hardware will not render anything violating this rule) + // NOTE: We'll check y coord range further below + if (dx >= CHKMAX_X || dx <= -CHKMAX_X) + return; + + // Y-axis range check and clipping + if (dy) { + // Y-axis range check : max distance between any two Y coords is 511 + // (PSX hardware will not render anything violating this rule) + if (dy >= CHKMAX_Y) + return; + + // We already know y0 < y1 + if (y0 < ymin) { + x0 += GPU_FAST_DIV(((ymin - y0) * dx), dy); + y0 = ymin; + } + if (y1 > ymax) { + x1 += GPU_FAST_DIV(((ymax - y1) * dx), dy); + y1 = ymax; + } + + // Recompute in case clipping occurred: + dx = x1 - x0; + dy = y1 - y0; + } + + // Check X clipping range, set 'sx' x-direction variable + if (dx == 0) { + // Is vertical line totally outside X clipping range? + if (x0 < xmin || x0 > xmax) + return; + sx = 0; + } else { + if (dx > 0) { + // x0 is leftmost coordinate + if (x0 > xmax) return; // Both points outside X clip range + + if (x0 < xmin) { + if (x1 < xmin) return; // Both points outside X clip range + y0 += GPU_FAST_DIV(((xmin - x0) * dy), dx); + x0 = xmin; + } + + if (x1 > xmax) { + y1 += GPU_FAST_DIV(((xmax - x1) * dy), dx); + x1 = xmax; + } + + sx = +1; + dx = x1 - x0; // Get final value, which should also be absolute value + } else { + // x1 is leftmost coordinate + if (x1 > xmax) return; // Both points outside X clip range + + if (x1 < xmin) { + if (x0 < xmin) return; // Both points outside X clip range + + y1 += GPU_FAST_DIV(((xmin - x1) * dy), dx); + x1 = xmin; + } + + if (x0 > xmax) { + y0 += GPU_FAST_DIV(((xmax - x0) * dy), dx); + x0 = xmax; + } + + sx = -1; + dx = x0 - x1; // Get final value, which should also be absolute value + } + + // Recompute in case clipping occurred: + dy = y1 - y0; + } + + // IMPORTANT: dx,dy should now contain their absolute values + + int min_length, // Minimum length of a pixel run + start_length, // Length of first run + end_length, // Length of last run + err_term, // Cumulative error to determine when to draw longer run + err_adjup, // Increment to err_term for each run drawn + err_adjdown; // Subract this from err_term after drawing longer run + + // Color to draw with (16 bits, highest of which is unset mask bit) + uintptr_t col16 = GPU_RGB16(packet.U4[0]); + + // We use u8 pointers even though PS1 has u16 framebuffer. + // This allows pixel-drawing functions to increment dst pointer + // directly by the passed 'incr' value, not having to shift it first. + u8 *dst = (u8*)gpu_senquack.vram + y0 * dst_stride + x0 * dst_depth; + + // SPECIAL CASE: Vertical line + if (dx == 0) { + gpuPixelSpanDriver(dst, col16, dst_stride, dy+1); + return; + } + + // SPECIAL CASE: Horizontal line + if (dy == 0) { + gpuPixelSpanDriver(dst, col16, sx * dst_depth, dx+1); + return; + } + + // SPECIAL CASE: Diagonal line + if (dx == dy) { + gpuPixelSpanDriver(dst, col16, dst_stride + (sx * dst_depth), dy+1); + return; + } + + int major, minor; // Major axis, minor axis + ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis + + if (dx > dy) { + major = dx; + minor = dy; + } else { + major = dy; + minor = dx; + } + + // Determine if diagonal or horizontal runs + if (major < (2 * minor)) { + // Diagonal runs, so perform half-octant transformation + minor = major - minor; + + // Advance diagonally when drawing runs + incr_major = dst_stride + (sx * dst_depth); + + // After drawing each run, correct for over-advance along minor axis + if (dx > dy) + incr_minor = -dst_stride; + else + incr_minor = -sx * dst_depth; + } else { + // Horizontal or vertical runs + if (dx > dy) { + incr_major = sx * dst_depth; + incr_minor = dst_stride; + } else { + incr_major = dst_stride; + incr_minor = sx * dst_depth; + } + } + + if (minor > 1) { + // Minimum number of pixels each run + min_length = major / minor; + + // Initial error term; reflects an initial step of 0.5 along minor axis + err_term = (major % minor) - (minor * 2); + + // Increment err_term this much each step along minor axis; when + // err_term crosses zero, draw longer pixel run. + err_adjup = (major % minor) * 2; + } else { + min_length = major; + err_term = 0; + err_adjup = 0; + } + + // Error term adjustment when err_term turns over; used to factor + // out the major-axis step made at that time + err_adjdown = minor * 2; + + // The initial and last runs are partial, because minor axis advances + // only 0.5 for these runs, rather than 1. Each is half a full run, + // plus the initial pixel. + start_length = end_length = (min_length / 2) + 1; + + if (min_length & 1) { + // If there're an odd number of pixels per run, we have 1 pixel that + // can't be allocated to either the initial or last partial run, so + // we'll add 0.5 to err_term so that this pixel will be handled + // by the normal full-run loop + err_term += minor; + } else { + // If the minimum run length is even and there's no fractional advance, + // we have one pixel that could go to either the initial or last + // partial run, which we arbitrarily allocate to the last run + if (err_adjup == 0) + start_length--; // Leave out the extra pixel at the start + } + + // First run of pixels + dst = gpuPixelSpanDriver(dst, col16, incr_major, start_length); + dst += incr_minor; + + // Middle runs of pixels + while (--minor > 0) { + int run_length = min_length; + err_term += err_adjup; + + // If err_term passed 0, reset it and draw longer run + if (err_term > 0) { + err_term -= err_adjdown; + run_length++; + } + + dst = gpuPixelSpanDriver(dst, col16, incr_major, run_length); + dst += incr_minor; + } + + // Final run of pixels + gpuPixelSpanDriver(dst, col16, incr_major, end_length); +} + +///////////////////////// +// Gouraud-shaded line // +///////////////////////// +void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) +{ + int x0, y0, x1, y1; + int dx, dy, dr, dg, db; + u32 r0, g0, b0, r1, g1, b1; + + // All three of these variables should be signed (so multiplication works) + ptrdiff_t sx; // Sign of x delta, positive when x0 < x1 + const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel + const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line + + // Clip region: xmax/ymax seem to normally be one *past* the rightmost/ + // bottommost pixels of the draw area. We'll render every pixel between + // and including both line endpoints, so subtract one from xmax/ymax. + const int xmin = gpu_senquack.DrawingArea[0]; + const int ymin = gpu_senquack.DrawingArea[1]; + const int xmax = gpu_senquack.DrawingArea[2] - 1; + const int ymax = gpu_senquack.DrawingArea[3] - 1; + + x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_senquack.DrawingOffset[0]; + y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_senquack.DrawingOffset[1]; + x1 = GPU_EXPANDSIGN(packet.S2[6]) + gpu_senquack.DrawingOffset[0]; + y1 = GPU_EXPANDSIGN(packet.S2[7]) + gpu_senquack.DrawingOffset[1]; + + u32 col0 = packet.U4[0]; + u32 col1 = packet.U4[2]; + + // Always draw top to bottom, so ensure y0 <= y1 + if (y0 > y1) { + SwapValues(y0, y1); + SwapValues(x0, x1); + SwapValues(col0, col1); + } + + // Is line totally outside Y clipping range? + if (y0 > ymax || y1 < ymin) return; + + // If defined, Gouraud colors are fixed-point 5.11, otherwise they are 8.16 + // (This is only beneficial if using SIMD-optimized pixel driver) +#ifdef GPU_GOURAUD_LOW_PRECISION + r0 = (col0 >> 3) & 0x1f; g0 = (col0 >> 11) & 0x1f; b0 = (col0 >> 19) & 0x1f; + r1 = (col1 >> 3) & 0x1f; g1 = (col1 >> 11) & 0x1f; b1 = (col1 >> 19) & 0x1f; +#else + r0 = col0 & 0xff; g0 = (col0 >> 8) & 0xff; b0 = (col0 >> 16) & 0xff; + r1 = col1 & 0xff; g1 = (col1 >> 8) & 0xff; b1 = (col1 >> 16) & 0xff; +#endif + + dx = x1 - x0; + dy = y1 - y0; + dr = r1 - r0; + dg = g1 - g0; + db = b1 - b0; + + // X-axis range check : max distance between any two X coords is 1023 + // (PSX hardware will not render anything violating this rule) + // NOTE: We'll check y coord range further below + if (dx >= CHKMAX_X || dx <= -CHKMAX_X) + return; + + // Y-axis range check and clipping + if (dy) { + // Y-axis range check : max distance between any two Y coords is 511 + // (PSX hardware will not render anything violating this rule) + if (dy >= CHKMAX_Y) + return; + + // We already know y0 < y1 + if (y0 < ymin) { +#ifdef USE_LINES_ALL_FIXED_PT_MATH + s32 factor = GPU_FAST_DIV(((ymin - y0) << GPU_LINE_FIXED_BITS), dy); + x0 += (dx * factor) >> GPU_LINE_FIXED_BITS; + r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; + g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; + b0 += (db * factor) >> GPU_LINE_FIXED_BITS; +#else + x0 += (ymin - y0) * dx / dy; + r0 += (ymin - y0) * dr / dy; + g0 += (ymin - y0) * dg / dy; + b0 += (ymin - y0) * db / dy; +#endif + y0 = ymin; + } + + if (y1 > ymax) { +#ifdef USE_LINES_ALL_FIXED_PT_MATH + s32 factor = GPU_FAST_DIV(((ymax - y1) << GPU_LINE_FIXED_BITS), dy); + x1 += (dx * factor) >> GPU_LINE_FIXED_BITS; + r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; + g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; + b1 += (db * factor) >> GPU_LINE_FIXED_BITS; +#else + x1 += (ymax - y1) * dx / dy; + r1 += (ymax - y1) * dr / dy; + g1 += (ymax - y1) * dg / dy; + b1 += (ymax - y1) * db / dy; +#endif + y1 = ymax; + } + + // Recompute in case clipping occurred: + dx = x1 - x0; + dy = y1 - y0; + dr = r1 - r0; + dg = g1 - g0; + db = b1 - b0; + } + + // Check X clipping range, set 'sx' x-direction variable + if (dx == 0) { + // Is vertical line totally outside X clipping range? + if (x0 < xmin || x0 > xmax) + return; + sx = 0; + } else { + if (dx > 0) { + // x0 is leftmost coordinate + if (x0 > xmax) return; // Both points outside X clip range + + if (x0 < xmin) { + if (x1 < xmin) return; // Both points outside X clip range + +#ifdef USE_LINES_ALL_FIXED_PT_MATH + s32 factor = GPU_FAST_DIV(((xmin - x0) << GPU_LINE_FIXED_BITS), dx); + y0 += (dy * factor) >> GPU_LINE_FIXED_BITS; + r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; + g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; + b0 += (db * factor) >> GPU_LINE_FIXED_BITS; +#else + y0 += (xmin - x0) * dy / dx; + r0 += (xmin - x0) * dr / dx; + g0 += (xmin - x0) * dg / dx; + b0 += (xmin - x0) * db / dx; +#endif + x0 = xmin; + } + + if (x1 > xmax) { +#ifdef USE_LINES_ALL_FIXED_PT_MATH + s32 factor = GPU_FAST_DIV(((xmax - x1) << GPU_LINE_FIXED_BITS), dx); + y1 += (dy * factor) >> GPU_LINE_FIXED_BITS; + r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; + g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; + b1 += (db * factor) >> GPU_LINE_FIXED_BITS; +#else + y1 += (xmax - x1) * dy / dx; + r1 += (xmax - x1) * dr / dx; + g1 += (xmax - x1) * dg / dx; + b1 += (xmax - x1) * db / dx; +#endif + x1 = xmax; + } + + sx = +1; + dx = x1 - x0; // Get final value, which should also be absolute value + } else { + // x1 is leftmost coordinate + if (x1 > xmax) return; // Both points outside X clip range + + if (x1 < xmin) { + if (x0 < xmin) return; // Both points outside X clip range + +#ifdef USE_LINES_ALL_FIXED_PT_MATH + s32 factor = GPU_FAST_DIV(((xmin - x1) << GPU_LINE_FIXED_BITS), dx); + y1 += (dy * factor) >> GPU_LINE_FIXED_BITS; + r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; + g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; + b1 += (db * factor) >> GPU_LINE_FIXED_BITS; +#else + y1 += (xmin - x1) * dy / dx; + r1 += (xmin - x1) * dr / dx; + g1 += (xmin - x1) * dg / dx; + b1 += (xmin - x1) * db / dx; +#endif + x1 = xmin; + } + + if (x0 > xmax) { +#ifdef USE_LINES_ALL_FIXED_PT_MATH + s32 factor = GPU_FAST_DIV(((xmax - x0) << GPU_LINE_FIXED_BITS), dx); + y0 += (dy * factor) >> GPU_LINE_FIXED_BITS; + r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; + g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; + b0 += (db * factor) >> GPU_LINE_FIXED_BITS; +#else + y0 += (xmax - x0) * dy / dx; + r0 += (xmax - x0) * dr / dx; + g0 += (xmax - x0) * dg / dx; + b0 += (xmax - x0) * db / dx; +#endif + x0 = xmax; + } + + sx = -1; + dx = x0 - x1; // Get final value, which should also be absolute value + } + + // Recompute in case clipping occurred: + dy = y1 - y0; + dr = r1 - r0; + dg = g1 - g0; + db = b1 - b0; + } + + // IMPORTANT: dx,dy should now contain their absolute values + + int min_length, // Minimum length of a pixel run + start_length, // Length of first run + end_length, // Length of last run + err_term, // Cumulative error to determine when to draw longer run + err_adjup, // Increment to err_term for each run drawn + err_adjdown; // Subract this from err_term after drawing longer run + + GouraudColor gcol; + gcol.r = r0 << GPU_GOURAUD_FIXED_BITS; + gcol.g = g0 << GPU_GOURAUD_FIXED_BITS; + gcol.b = b0 << GPU_GOURAUD_FIXED_BITS; + + // We use u8 pointers even though PS1 has u16 framebuffer. + // This allows pixel-drawing functions to increment dst pointer + // directly by the passed 'incr' value, not having to shift it first. + u8 *dst = (u8*)gpu_senquack.vram + y0 * dst_stride + x0 * dst_depth; + + // SPECIAL CASE: Vertical line + if (dx == 0) { +#ifdef USE_LINES_ALL_FIXED_PT_MATH + // Get dy fixed-point inverse + s32 inv_factor = 1 << GPU_GOURAUD_FIXED_BITS; + if (dy > 1) inv_factor = GPU_FAST_DIV(inv_factor, dy); + + // Simultaneously divide and convert integer to Gouraud fixed point: + gcol.r_incr = dr * inv_factor; + gcol.g_incr = dg * inv_factor; + gcol.b_incr = db * inv_factor; +#else + // First, convert to Gouraud fixed point + gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; + gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; + gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; + + if (dy > 1) { + if (dr) gcol.r_incr /= dy; + if (dg) gcol.g_incr /= dy; + if (db) gcol.b_incr /= dy; + } +#endif + + gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride, dy+1); + return; + } + + // SPECIAL CASE: Horizontal line + if (dy == 0) { +#ifdef USE_LINES_ALL_FIXED_PT_MATH + // Get dx fixed-point inverse + s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS); + if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx); + + // Simultaneously divide and convert integer to Gouraud fixed point: + gcol.r_incr = dr * inv_factor; + gcol.g_incr = dg * inv_factor; + gcol.b_incr = db * inv_factor; +#else + gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; + gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; + gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; + + if (dx > 1) { + if (dr) gcol.r_incr /= dx; + if (dg) gcol.g_incr /= dx; + if (db) gcol.b_incr /= dx; + } +#endif + + gpuPixelSpanDriver(dst, (uintptr_t)&gcol, sx * dst_depth, dx+1); + return; + } + + // SPECIAL CASE: Diagonal line + if (dx == dy) { +#ifdef USE_LINES_ALL_FIXED_PT_MATH + // Get dx fixed-point inverse + s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS); + if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx); + + // Simultaneously divide and convert integer to Gouraud fixed point: + gcol.r_incr = dr * inv_factor; + gcol.g_incr = dg * inv_factor; + gcol.b_incr = db * inv_factor; +#else + // First, convert to Gouraud fixed point + gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; + gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; + gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; + + if (dx > 1) { + if (dr) gcol.r_incr /= dx; + if (dg) gcol.g_incr /= dx; + if (db) gcol.b_incr /= dx; + } +#endif + + gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride + (sx * dst_depth), dy+1); + return; + } + + int major, minor; // Absolute val of major,minor axis delta + ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis + + if (dx > dy) { + major = dx; + minor = dy; + } else { + major = dy; + minor = dx; + } + + // Determine if diagonal or horizontal runs + if (major < (2 * minor)) { + // Diagonal runs, so perform half-octant transformation + minor = major - minor; + + // Advance diagonally when drawing runs + incr_major = dst_stride + (sx * dst_depth); + + // After drawing each run, correct for over-advance along minor axis + if (dx > dy) + incr_minor = -dst_stride; + else + incr_minor = -sx * dst_depth; + } else { + // Horizontal or vertical runs + if (dx > dy) { + incr_major = sx * dst_depth; + incr_minor = dst_stride; + } else { + incr_major = dst_stride; + incr_minor = sx * dst_depth; + } + } + +#ifdef USE_LINES_ALL_FIXED_PT_MATH + s32 major_inv = GPU_FAST_DIV((1 << GPU_GOURAUD_FIXED_BITS), major); + + // Simultaneously divide and convert from integer to Gouraud fixed point: + gcol.r_incr = dr * major_inv; + gcol.g_incr = dg * major_inv; + gcol.b_incr = db * major_inv; +#else + gcol.r_incr = dr ? ((dr << GPU_GOURAUD_FIXED_BITS) / major) : 0; + gcol.g_incr = dg ? ((dg << GPU_GOURAUD_FIXED_BITS) / major) : 0; + gcol.b_incr = db ? ((db << GPU_GOURAUD_FIXED_BITS) / major) : 0; +#endif + + if (minor > 1) { + // Minimum number of pixels each run + min_length = major / minor; + + // Initial error term; reflects an initial step of 0.5 along minor axis + err_term = (major % minor) - (minor * 2); + + // Increment err_term this much each step along minor axis; when + // err_term crosses zero, draw longer pixel run. + err_adjup = (major % minor) * 2; + } else { + min_length = major; + err_term = 0; + err_adjup = 0; + } + + // Error term adjustment when err_term turns over; used to factor + // out the major-axis step made at that time + err_adjdown = minor * 2; + + // The initial and last runs are partial, because minor axis advances + // only 0.5 for these runs, rather than 1. Each is half a full run, + // plus the initial pixel. + start_length = end_length = (min_length / 2) + 1; + + if (min_length & 1) { + // If there're an odd number of pixels per run, we have 1 pixel that + // can't be allocated to either the initial or last partial run, so + // we'll add 0.5 to err_term so that this pixel will be handled + // by the normal full-run loop + err_term += minor; + } else { + // If the minimum run length is even and there's no fractional advance, + // we have one pixel that could go to either the initial or last + // partial run, which we'll arbitrarily allocate to the last run + if (err_adjup == 0) + start_length--; // Leave out the extra pixel at the start + } + + // First run of pixels + dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, start_length); + dst += incr_minor; + + // Middle runs of pixels + while (--minor > 0) { + int run_length = min_length; + err_term += err_adjup; + + // If err_term passed 0, reset it and draw longer run + if (err_term > 0) { + err_term -= err_adjdown; + run_length++; + } + + dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, run_length); + dst += incr_minor; + } + + // Final run of pixels + gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, end_length); +} + +#endif /* __GPU_UNAI_GPU_RASTER_LINE_H__ */ diff --git a/plugins/gpu_senquack/gpu_raster_polygon.h b/plugins/gpu_senquack/gpu_raster_polygon.h new file mode 100644 index 000000000..8638ac420 --- /dev/null +++ b/plugins/gpu_senquack/gpu_raster_polygon.h @@ -0,0 +1,1453 @@ +/*************************************************************************** +* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef __GPU_UNAI_GPU_RASTER_POLYGON_H__ +#define __GPU_UNAI_GPU_RASTER_POLYGON_H__ + +//senquack - NOTE: GPU Unai poly routines have been rewritten/adapted +// from DrHell routines to fix multiple issues. See README_senquack.txt + +/////////////////////////////////////////////////////////////////////////////// +// Shared poly vertex buffer, able to handle 3 or 4-pt polys of any type. +/////////////////////////////////////////////////////////////////////////////// + +struct PolyVertex { + s32 x, y; // Sign-extended 11-bit X,Y coords + union { + struct { u8 u, v, pad[2]; } tex; // Texture coords (if used) + u32 tex_word; + }; + union { + struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used) + u32 col_word; + }; +}; + +enum PolyAttribute { + POLYATTR_TEXTURE = (1 << 0), + POLYATTR_GOURAUD = (1 << 1) +}; + +enum PolyType { + POLYTYPE_F = 0, + POLYTYPE_FT = (POLYATTR_TEXTURE), + POLYTYPE_G = (POLYATTR_GOURAUD), + POLYTYPE_GT = (POLYATTR_TEXTURE | POLYATTR_GOURAUD) +}; + +/////////////////////////////////////////////////////////////////////////////// +// polyInitVertexBuffer() +// Fills vbuf[] array with data from any type of poly draw-command packet. +/////////////////////////////////////////////////////////////////////////////// +static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyType ptype, u32 is_quad) +{ + bool texturing = ptype & POLYATTR_TEXTURE; + bool gouraud = ptype & POLYATTR_GOURAUD; + + int vert_stride = 1; // Stride of vertices in cmd packet, in 32-bit words + if (texturing) + vert_stride++; + if (gouraud) + vert_stride++; + + int num_verts = (is_quad) ? 4 : 3; + u32 *ptr; + + // X,Y coords, adjusted by draw offsets + s32 x_off = gpu_senquack.DrawingOffset[0]; + s32 y_off = gpu_senquack.DrawingOffset[1]; + ptr = &packet.U4[1]; + for (int i=0; i < num_verts; ++i, ptr += vert_stride) { + s16* coord_ptr = (s16*)ptr; + vbuf[i].x = GPU_EXPANDSIGN(coord_ptr[0]) + x_off; + vbuf[i].y = GPU_EXPANDSIGN(coord_ptr[1]) + y_off; + } + + // U,V texture coords (if applicable) + if (texturing) { + ptr = &packet.U4[2]; + for (int i=0; i < num_verts; ++i, ptr += vert_stride) + vbuf[i].tex_word = *ptr; + } + + // Colors (if applicable) + if (gouraud) { + ptr = &packet.U4[0]; + for (int i=0; i < num_verts; ++i, ptr += vert_stride) + vbuf[i].col_word = *ptr; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// Helper functions to determine which vertex in a 2 or 3 vertex array +// has the highest/lowest X/Y coordinate. +// Note: the comparison logic is such that, given a set of vertices with +// identical values for a given coordinate, a different index will be +// returned from vertIdxOfLeast..() than a call to vertIdxOfHighest..(). +// This ensures that, during the vertex-ordering phase of rasterization, +// all three vertices remain unique. +/////////////////////////////////////////////////////////////////////////////// + +template +static inline int vertIdxOfLeastXCoord2(const T *Tptr) +{ + return (Tptr[0].x <= Tptr[1].x) ? 0 : 1; +} + +template +static inline int vertIdxOfLeastXCoord3(const T *Tptr) +{ + int least_of_v0_v1 = vertIdxOfLeastXCoord2(Tptr); + return (Tptr[least_of_v0_v1].x <= Tptr[2].x) ? least_of_v0_v1 : 2; +} + +template +static inline int vertIdxOfLeastYCoord2(const T *Tptr) +{ + return (Tptr[0].y <= Tptr[1].y) ? 0 : 1; +} + +template +static inline int vertIdxOfLeastYCoord3(const T *Tptr) +{ + int least_of_v0_v1 = vertIdxOfLeastYCoord2(Tptr); + return (Tptr[least_of_v0_v1].y <= Tptr[2].y) ? least_of_v0_v1 : 2; +} + +template +static inline int vertIdxOfHighestXCoord2(const T *Tptr) +{ + return (Tptr[1].x >= Tptr[0].x) ? 1 : 0; +} + +template +static inline int vertIdxOfHighestXCoord3(const T *Tptr) +{ + int highest_of_v0_v1 = vertIdxOfHighestXCoord2(Tptr); + return (Tptr[2].x >= Tptr[highest_of_v0_v1].x) ? 2 : highest_of_v0_v1; +} + +template +static inline int vertIdxOfHighestYCoord2(const T *Tptr) +{ + return (Tptr[1].y >= Tptr[0].y) ? 1 : 0; +} + +template +static inline int vertIdxOfHighestYCoord3(const T *Tptr) +{ + int highest_of_v0_v1 = vertIdxOfHighestYCoord2(Tptr); + return (Tptr[2].y >= Tptr[highest_of_v0_v1].y) ? 2 : highest_of_v0_v1; +} + +/////////////////////////////////////////////////////////////////////////////// +// polyUseTriangle() +// Determines if the specified triangle should be rendered. If so, it +// fills the given array of vertex pointers, vert_ptrs, in order of +// increasing Y coordinate values, as required by rasterization algorithm. +// Parameter 'tri_num' is 0 for first triangle (idx 0,1,2 of vbuf[]), +// or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]). +// Returns true if triangle should be rendered, false if not. +/////////////////////////////////////////////////////////////////////////////// +static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs) +{ + // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)? + const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1]; + + // Get indices of highest/lowest X,Y coords within triangle + int idx_lowest_x = vertIdxOfLeastXCoord3(tri_ptr); + int idx_highest_x = vertIdxOfHighestXCoord3(tri_ptr); + int idx_lowest_y = vertIdxOfLeastYCoord3(tri_ptr); + int idx_highest_y = vertIdxOfHighestYCoord3(tri_ptr); + + // Maximum absolute distance between any two X coordinates is 1023, + // and for Y coordinates is 511 (PS1 hardware limitation) + int lowest_x = tri_ptr[idx_lowest_x].x; + int highest_x = tri_ptr[idx_highest_x].x; + int lowest_y = tri_ptr[idx_lowest_y].y; + int highest_y = tri_ptr[idx_highest_y].y; + if ((highest_x - lowest_x) >= CHKMAX_X || + (highest_y - lowest_y) >= CHKMAX_Y) + return false; + + // Determine if triangle is completely outside clipping range + int xmin, xmax, ymin, ymax; + xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; + ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + int clipped_lowest_x = Max2(xmin,lowest_x); + int clipped_lowest_y = Max2(ymin,lowest_y); + int clipped_highest_x = Min2(xmax,highest_x); + int clipped_highest_y = Min2(ymax,highest_y); + if (clipped_lowest_x >= clipped_highest_x || + clipped_lowest_y >= clipped_highest_y) + return false; + + // Order vertex ptrs by increasing y value (draw routines need this). + // The middle index is deduced by a binary math trick that depends + // on index range always being between 0..2 + vert_ptrs[0] = tri_ptr + idx_lowest_y; + vert_ptrs[1] = tri_ptr + ((idx_lowest_y + idx_highest_y) ^ 3); + vert_ptrs[2] = tri_ptr + idx_highest_y; + return true; +} + +/////////////////////////////////////////////////////////////////////////////// +// GPU internal polygon drawing functions +/////////////////////////////////////////////////////////////////////////////// + +/*---------------------------------------------------------------------- +gpuDrawPolyF - Flat-shaded, untextured poly +----------------------------------------------------------------------*/ +void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) +{ + // Set up bgr555 color to be used across calls in inner driver + gpu_senquack.PixelData = GPU_RGB16(packet.U4[0]); + + PolyVertex vbuf[4]; + polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad); + + int total_passes = is_quad ? 2 : 1; + int cur_pass = 0; + do + { + const PolyVertex* vptrs[3]; + if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) + continue; + + s32 xa, xb, ya, yb; + s32 x3, dx3, x4, dx4, dx; + s32 x0, x1, x2, y0, y1, y2; + + x0 = vptrs[0]->x; y0 = vptrs[0]->y; + x1 = vptrs[1]->x; y1 = vptrs[1]->y; + x2 = vptrs[2]->x; y2 = vptrs[2]->y; + + ya = y2 - y0; + yb = y2 - y1; + dx = (x2 - x1) * ya - (x2 - x0) * yb; + + for (int loop0 = 2; loop0; loop0--) { + if (loop0 == 2) { + ya = y0; yb = y1; + x3 = x4 = i2x(x0); + if (dx < 0) { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; +#else + dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0; + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + dx3 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; + dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; +#else + dx3 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; + dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; +#endif +#endif + } else { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; +#else + dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + dx3 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; + dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; +#else + dx3 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; + dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; +#endif +#endif + } + } else { + //senquack - break out of final loop if nothing to be drawn (1st loop + // must always be taken to setup dx3/dx4) + if (y1 == y2) break; + + ya = y1; yb = y2; + + if (dx < 0) { + x3 = i2x(x0) + (dx3 * (y1 - y0)); + x4 = i2x(x1); +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; +#endif +#endif + } else { + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; +#else + dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + dx3 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0; +#else + dx3 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; +#endif +#endif + } + } + + s32 xmin, xmax, ymin, ymax; + xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; + ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + + if ((ymin - ya) > 0) { + x3 += (dx3 * (ymin - ya)); + x4 += (dx4 * (ymin - ya)); + ya = ymin; + } + + if (yb > ymax) yb = ymax; + + int loop1 = yb - ya; + if (loop1 <= 0) + continue; + + u16* PixelBase = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(0, ya)]; + int li=gpu_senquack.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1); + + for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH, + x3 += dx3, x4 += dx4 ) + { + if (ya&li) continue; + if ((ya&pi)==pif) continue; + + xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4); + if ((xmin - xa) > 0) xa = xmin; + if (xb > xmax) xb = xmax; + if ((xb - xa) > 0) + gpuPolySpanDriver(gpu_senquack, PixelBase + xa, (xb - xa)); + } + } + } while (++cur_pass < total_passes); +} + +/*---------------------------------------------------------------------- +gpuDrawPolyFT - Flat-shaded, textured poly +----------------------------------------------------------------------*/ +void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) +{ + // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light) + gpu_senquack.r8 = packet.U1[0]; + gpu_senquack.g8 = packet.U1[1]; + gpu_senquack.b8 = packet.U1[2]; + // r5/g5/b5 used if just texture-blending is applied (15-bit light) + gpu_senquack.r5 = packet.U1[0] >> 3; + gpu_senquack.g5 = packet.U1[1] >> 3; + gpu_senquack.b5 = packet.U1[2] >> 3; + + PolyVertex vbuf[4]; + polyInitVertexBuffer(vbuf, packet, POLYTYPE_FT, is_quad); + + int total_passes = is_quad ? 2 : 1; + int cur_pass = 0; + do + { + const PolyVertex* vptrs[3]; + if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) + continue; + + s32 xa, xb, ya, yb; + s32 x3, dx3, x4, dx4, dx; + s32 u3, du3, v3, dv3; + s32 x0, x1, x2, y0, y1, y2; + s32 u0, u1, u2, v0, v1, v2; + s32 du4, dv4; + + x0 = vptrs[0]->x; y0 = vptrs[0]->y; + u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v; + x1 = vptrs[1]->x; y1 = vptrs[1]->y; + u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v; + x2 = vptrs[2]->x; y2 = vptrs[2]->y; + u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v; + + ya = y2 - y0; + yb = y2 - y1; + dx4 = (x2 - x1) * ya - (x2 - x0) * yb; + du4 = (u2 - u1) * ya - (u2 - u0) * yb; + dv4 = (v2 - v1) * ya - (v2 - v0) * yb; + dx = dx4; + if (dx4 < 0) { + dx4 = -dx4; + du4 = -du4; + dv4 = -dv4; + } + +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if (dx4 != 0) { + float finv = FloatInv(dx4); + du4 = (fixed)((du4 << FIXED_BITS) * finv); + dv4 = (fixed)((dv4 << FIXED_BITS) * finv); + } else { + du4 = dv4 = 0; + } +#else + if (dx4 != 0) { + float fdiv = dx4; + du4 = (fixed)((du4 << FIXED_BITS) / fdiv); + dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv); + } else { + du4 = dv4 = 0; + } +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if (dx4 != 0) { + int iF, iS; + xInv(dx4, iF, iS); + du4 = xInvMulx(du4, iF, iS); + dv4 = xInvMulx(dv4, iF, iS); + } else { + du4 = dv4 = 0; + } +#else + if (dx4 != 0) { + du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4); + dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4); + } else { + du4 = dv4 = 0; + } +#endif +#endif + // Set u,v increments for inner driver + gpu_senquack.u_inc = du4; + gpu_senquack.v_inc = dv4; + + //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here? + // (SAME ISSUE ELSEWHERE) + for (s32 loop0 = 2; loop0; loop0--) { + if (loop0 == 2) { + ya = y0; yb = y1; + x3 = x4 = i2x(x0); + u3 = i2x(u0); v3 = i2x(v0); + if (dx < 0) { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y2 - y0) != 0) { + float finv = FloatInv(y2 - y0); + dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv); + du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv); + dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; +#else + if ((y2 - y0) != 0) { + float fdiv = y2 - y0; + dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv); + du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv); + dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y2 - y0) != 0) { + int iF, iS; + xInv((y2 - y0), iF, iS); + dx3 = xInvMulx((x2 - x0), iF, iS); + du3 = xInvMulx((u2 - u0), iF, iS); + dv3 = xInvMulx((v2 - v0), iF, iS); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; +#else + if ((y2 - y0) != 0) { + dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)); + du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0)); + dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0)); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; +#endif +#endif + } else { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y1 - y0) != 0) { + float finv = FloatInv(y1 - y0); + dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv); + du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv); + dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; +#else + if ((y1 - y0) != 0) { + float fdiv = y1 - y0; + dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv); + du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv); + dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y1 - y0) != 0) { + int iF, iS; + xInv((y1 - y0), iF, iS); + dx3 = xInvMulx((x1 - x0), iF, iS); + du3 = xInvMulx((u1 - u0), iF, iS); + dv3 = xInvMulx((v1 - v0), iF, iS); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; +#else + if ((y1 - y0) != 0) { + dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)); + du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0)); + dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0)); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; +#endif +#endif + } + } else { + //senquack - break out of final loop if nothing to be drawn (1st loop + // must always be taken to setup dx3/dx4) + if (y1 == y2) break; + + ya = y1; yb = y2; + + if (dx < 0) { + x3 = i2x(x0); + x4 = i2x(x1); + u3 = i2x(u0); + v3 = i2x(v0); + if ((y1 - y0) != 0) { + x3 += (dx3 * (y1 - y0)); + u3 += (du3 * (y1 - y0)); + v3 += (dv3 * (y1 - y0)); + } +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; +#endif +#endif + } else { + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); + u3 = i2x(u1); + v3 = i2x(v1); +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y2 - y1) != 0) { + float finv = FloatInv(y2 - y1); + dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv); + du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv); + dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv); + } else { + dx3 = du3 = dv3 = 0; + } +#else + if ((y2 - y1) != 0) { + float fdiv = y2 - y1; + dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv); + du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv); + dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv); + } else { + dx3 = du3 = dv3 = 0; + } +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y2 - y1) != 0) { + int iF, iS; + xInv((y2 - y1), iF, iS); + dx3 = xInvMulx((x2 - x1), iF, iS); + du3 = xInvMulx((u2 - u1), iF, iS); + dv3 = xInvMulx((v2 - v1), iF, iS); + } else { + dx3 = du3 = dv3 = 0; + } +#else + if ((y2 - y1) != 0) { + dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)); + du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1)); + dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1)); + } else { + dx3 = du3 = dv3 = 0; + } +#endif +#endif + } + } + + s32 xmin, xmax, ymin, ymax; + xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; + ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + + if ((ymin - ya) > 0) { + x3 += dx3 * (ymin - ya); + x4 += dx4 * (ymin - ya); + u3 += du3 * (ymin - ya); + v3 += dv3 * (ymin - ya); + ya = ymin; + } + + if (yb > ymax) yb = ymax; + + int loop1 = yb - ya; + if (loop1 <= 0) + continue; + + u16* PixelBase = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(0, ya)]; + int li=gpu_senquack.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1); + + for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, + x3 += dx3, x4 += dx4, + u3 += du3, v3 += dv3 ) + { + if (ya&li) continue; + if ((ya&pi)==pif) continue; + + u32 u4, v4; + + xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4); + u4 = u3; v4 = v3; + + fixed itmp = i2x(xa) - x3; + if (itmp != 0) { + u4 += (du4 * itmp) >> FIXED_BITS; + v4 += (dv4 * itmp) >> FIXED_BITS; + } + + u4 += fixed_HALF; + v4 += fixed_HALF; + + if ((xmin - xa) > 0) { + u4 += du4 * (xmin - xa); + v4 += dv4 * (xmin - xa); + xa = xmin; + } + + // Set u,v coords for inner driver + gpu_senquack.u = u4; + gpu_senquack.v = v4; + + if (xb > xmax) xb = xmax; + if ((xb - xa) > 0) + gpuPolySpanDriver(gpu_senquack, PixelBase + xa, (xb - xa)); + } + } + } while (++cur_pass < total_passes); +} + +/*---------------------------------------------------------------------- +gpuDrawPolyG - Gouraud-shaded, untextured poly +----------------------------------------------------------------------*/ +void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) +{ + PolyVertex vbuf[4]; + polyInitVertexBuffer(vbuf, packet, POLYTYPE_G, is_quad); + + int total_passes = is_quad ? 2 : 1; + int cur_pass = 0; + do + { + const PolyVertex* vptrs[3]; + if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) + continue; + + s32 xa, xb, ya, yb; + s32 x3, dx3, x4, dx4, dx; + s32 r3, dr3, g3, dg3, b3, db3; + s32 x0, x1, x2, y0, y1, y2; + s32 r0, r1, r2, g0, g1, g2, b0, b1, b2; + s32 dr4, dg4, db4; + + x0 = vptrs[0]->x; y0 = vptrs[0]->y; + r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b; + x1 = vptrs[1]->x; y1 = vptrs[1]->y; + r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b; + x2 = vptrs[2]->x; y2 = vptrs[2]->y; + r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b; + + ya = y2 - y0; + yb = y2 - y1; + dx4 = (x2 - x1) * ya - (x2 - x0) * yb; + dr4 = (r2 - r1) * ya - (r2 - r0) * yb; + dg4 = (g2 - g1) * ya - (g2 - g0) * yb; + db4 = (b2 - b1) * ya - (b2 - b0) * yb; + dx = dx4; + if (dx4 < 0) { + dx4 = -dx4; + dr4 = -dr4; + dg4 = -dg4; + db4 = -db4; + } + +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if (dx4 != 0) { + float finv = FloatInv(dx4); + dr4 = (fixed)((dr4 << FIXED_BITS) * finv); + dg4 = (fixed)((dg4 << FIXED_BITS) * finv); + db4 = (fixed)((db4 << FIXED_BITS) * finv); + } else { + dr4 = dg4 = db4 = 0; + } +#else + if (dx4 != 0) { + float fdiv = dx4; + dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv); + dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv); + db4 = (fixed)((db4 << FIXED_BITS) / fdiv); + } else { + dr4 = dg4 = db4 = 0; + } +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if (dx4 != 0) { + int iF, iS; + xInv(dx4, iF, iS); + dr4 = xInvMulx(dr4, iF, iS); + dg4 = xInvMulx(dg4, iF, iS); + db4 = xInvMulx(db4, iF, iS); + } else { + dr4 = dg4 = db4 = 0; + } +#else + if (dx4 != 0) { + dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4); + dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4); + db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4); + } else { + dr4 = dg4 = db4 = 0; + } +#endif +#endif + // Setup packed Gouraud increment for inner driver + gpu_senquack.gInc = gpuPackGouraudColInc(dr4, dg4, db4); + + for (s32 loop0 = 2; loop0; loop0--) { + if (loop0 == 2) { + ya = y0; + yb = y1; + x3 = x4 = i2x(x0); + r3 = i2x(r0); + g3 = i2x(g0); + b3 = i2x(b0); + if (dx < 0) { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y2 - y0) != 0) { + float finv = FloatInv(y2 - y0); + dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv); + dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv); + dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv); + db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; +#else + if ((y2 - y0) != 0) { + float fdiv = y2 - y0; + dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv); + dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv); + dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv); + db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y2 - y0) != 0) { + int iF, iS; + xInv((y2 - y0), iF, iS); + dx3 = xInvMulx((x2 - x0), iF, iS); + dr3 = xInvMulx((r2 - r0), iF, iS); + dg3 = xInvMulx((g2 - g0), iF, iS); + db3 = xInvMulx((b2 - b0), iF, iS); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; +#else + if ((y2 - y0) != 0) { + dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)); + dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0)); + dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0)); + db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0)); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; +#endif +#endif + } else { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y1 - y0) != 0) { + float finv = FloatInv(y1 - y0); + dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv); + dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv); + dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv); + db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; +#else + if ((y1 - y0) != 0) { + float fdiv = y1 - y0; + dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv); + dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv); + dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv); + db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y1 - y0) != 0) { + int iF, iS; + xInv((y1 - y0), iF, iS); + dx3 = xInvMulx((x1 - x0), iF, iS); + dr3 = xInvMulx((r1 - r0), iF, iS); + dg3 = xInvMulx((g1 - g0), iF, iS); + db3 = xInvMulx((b1 - b0), iF, iS); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; +#else + if ((y1 - y0) != 0) { + dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)); + dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0)); + dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0)); + db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0)); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; +#endif +#endif + } + } else { + //senquack - break out of final loop if nothing to be drawn (1st loop + // must always be taken to setup dx3/dx4) + if (y1 == y2) break; + + ya = y1; yb = y2; + + if (dx < 0) { + x3 = i2x(x0); x4 = i2x(x1); + r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0); + + if ((y1 - y0) != 0) { + x3 += (dx3 * (y1 - y0)); + r3 += (dr3 * (y1 - y0)); + g3 += (dg3 * (y1 - y0)); + b3 += (db3 * (y1 - y0)); + } + +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; +#endif +#endif + } else { + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); + + r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1); + +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y2 - y1) != 0) { + float finv = FloatInv(y2 - y1); + dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv); + dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv); + dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv); + db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } +#else + if ((y2 - y1) != 0) { + float fdiv = y2 - y1; + dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv); + dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv); + dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv); + db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y2 - y1) != 0) { + int iF, iS; + xInv((y2 - y1), iF, iS); + dx3 = xInvMulx((x2 - x1), iF, iS); + dr3 = xInvMulx((r2 - r1), iF, iS); + dg3 = xInvMulx((g2 - g1), iF, iS); + db3 = xInvMulx((b2 - b1), iF, iS); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } +#else + if ((y2 - y1) != 0) { + dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)); + dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1)); + dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1)); + db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1)); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } +#endif +#endif + } + } + + s32 xmin, xmax, ymin, ymax; + xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; + ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + + if ((ymin - ya) > 0) { + x3 += (dx3 * (ymin - ya)); + x4 += (dx4 * (ymin - ya)); + r3 += (dr3 * (ymin - ya)); + g3 += (dg3 * (ymin - ya)); + b3 += (db3 * (ymin - ya)); + ya = ymin; + } + + if (yb > ymax) yb = ymax; + + int loop1 = yb - ya; + if (loop1 <= 0) + continue; + + u16* PixelBase = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(0, ya)]; + int li=gpu_senquack.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1); + + for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, + x3 += dx3, x4 += dx4, + r3 += dr3, g3 += dg3, b3 += db3 ) + { + if (ya&li) continue; + if ((ya&pi)==pif) continue; + + u32 r4, g4, b4; + + xa = FixedCeilToInt(x3); + xb = FixedCeilToInt(x4); + r4 = r3; g4 = g3; b4 = b3; + + fixed itmp = i2x(xa) - x3; + if (itmp != 0) { + r4 += (dr4 * itmp) >> FIXED_BITS; + g4 += (dg4 * itmp) >> FIXED_BITS; + b4 += (db4 * itmp) >> FIXED_BITS; + } + + r4 += fixed_HALF; + g4 += fixed_HALF; + b4 += fixed_HALF; + + if ((xmin - xa) > 0) { + r4 += (dr4 * (xmin - xa)); + g4 += (dg4 * (xmin - xa)); + b4 += (db4 * (xmin - xa)); + xa = xmin; + } + + // Setup packed Gouraud color for inner driver + gpu_senquack.gCol = gpuPackGouraudCol(r4, g4, b4); + + if (xb > xmax) xb = xmax; + if ((xb - xa) > 0) + gpuPolySpanDriver(gpu_senquack, PixelBase + xa, (xb - xa)); + } + } + } while (++cur_pass < total_passes); +} + +/*---------------------------------------------------------------------- +gpuDrawPolyGT - Gouraud-shaded, textured poly +----------------------------------------------------------------------*/ +void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) +{ + PolyVertex vbuf[4]; + polyInitVertexBuffer(vbuf, packet, POLYTYPE_GT, is_quad); + + int total_passes = is_quad ? 2 : 1; + int cur_pass = 0; + do + { + const PolyVertex* vptrs[3]; + if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) + continue; + + s32 xa, xb, ya, yb; + s32 x3, dx3, x4, dx4, dx; + s32 u3, du3, v3, dv3; + s32 r3, dr3, g3, dg3, b3, db3; + s32 x0, x1, x2, y0, y1, y2; + s32 u0, u1, u2, v0, v1, v2; + s32 r0, r1, r2, g0, g1, g2, b0, b1, b2; + s32 du4, dv4; + s32 dr4, dg4, db4; + + x0 = vptrs[0]->x; y0 = vptrs[0]->y; + u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v; + r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b; + x1 = vptrs[1]->x; y1 = vptrs[1]->y; + u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v; + r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b; + x2 = vptrs[2]->x; y2 = vptrs[2]->y; + u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v; + r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b; + + ya = y2 - y0; + yb = y2 - y1; + dx4 = (x2 - x1) * ya - (x2 - x0) * yb; + du4 = (u2 - u1) * ya - (u2 - u0) * yb; + dv4 = (v2 - v1) * ya - (v2 - v0) * yb; + dr4 = (r2 - r1) * ya - (r2 - r0) * yb; + dg4 = (g2 - g1) * ya - (g2 - g0) * yb; + db4 = (b2 - b1) * ya - (b2 - b0) * yb; + dx = dx4; + if (dx4 < 0) { + dx4 = -dx4; + du4 = -du4; + dv4 = -dv4; + dr4 = -dr4; + dg4 = -dg4; + db4 = -db4; + } + +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if (dx4 != 0) { + float finv = FloatInv(dx4); + du4 = (fixed)((du4 << FIXED_BITS) * finv); + dv4 = (fixed)((dv4 << FIXED_BITS) * finv); + dr4 = (fixed)((dr4 << FIXED_BITS) * finv); + dg4 = (fixed)((dg4 << FIXED_BITS) * finv); + db4 = (fixed)((db4 << FIXED_BITS) * finv); + } else { + du4 = dv4 = dr4 = dg4 = db4 = 0; + } +#else + if (dx4 != 0) { + float fdiv = dx4; + du4 = (fixed)((du4 << FIXED_BITS) / fdiv); + dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv); + dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv); + dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv); + db4 = (fixed)((db4 << FIXED_BITS) / fdiv); + } else { + du4 = dv4 = dr4 = dg4 = db4 = 0; + } +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if (dx4 != 0) { + int iF, iS; + xInv(dx4, iF, iS); + du4 = xInvMulx(du4, iF, iS); + dv4 = xInvMulx(dv4, iF, iS); + dr4 = xInvMulx(dr4, iF, iS); + dg4 = xInvMulx(dg4, iF, iS); + db4 = xInvMulx(db4, iF, iS); + } else { + du4 = dv4 = dr4 = dg4 = db4 = 0; + } +#else + if (dx4 != 0) { + du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4); + dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4); + dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4); + dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4); + db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4); + } else { + du4 = dv4 = dr4 = dg4 = db4 = 0; + } +#endif +#endif + // Set u,v increments and packed Gouraud increment for inner driver + gpu_senquack.u_inc = du4; + gpu_senquack.v_inc = dv4; + gpu_senquack.gInc = gpuPackGouraudColInc(dr4, dg4, db4); + + for (s32 loop0 = 2; loop0; loop0--) { + if (loop0 == 2) { + ya = y0; yb = y1; + x3 = x4 = i2x(x0); + u3 = i2x(u0); v3 = i2x(v0); + r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0); + if (dx < 0) { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y2 - y0) != 0) { + float finv = FloatInv(y2 - y0); + dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv); + du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv); + dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv); + dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv); + dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv); + db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; +#else + if ((y2 - y0) != 0) { + float fdiv = y2 - y0; + dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv); + du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv); + dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv); + dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv); + dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv); + db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y2 - y0) != 0) { + int iF, iS; + xInv((y2 - y0), iF, iS); + dx3 = xInvMulx((x2 - x0), iF, iS); + du3 = xInvMulx((u2 - u0), iF, iS); + dv3 = xInvMulx((v2 - v0), iF, iS); + dr3 = xInvMulx((r2 - r0), iF, iS); + dg3 = xInvMulx((g2 - g0), iF, iS); + db3 = xInvMulx((b2 - b0), iF, iS); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; +#else + if ((y2 - y0) != 0) { + dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)); + du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0)); + dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0)); + dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0)); + dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0)); + db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0)); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; +#endif +#endif + } else { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y1 - y0) != 0) { + float finv = FloatInv(y1 - y0); + dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv); + du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv); + dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv); + dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv); + dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv); + db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; +#else + if ((y1 - y0) != 0) { + float fdiv = y1 - y0; + dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv); + du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv); + dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv); + dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv); + dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv); + db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y1 - y0) != 0) { + int iF, iS; + xInv((y1 - y0), iF, iS); + dx3 = xInvMulx((x1 - x0), iF, iS); + du3 = xInvMulx((u1 - u0), iF, iS); + dv3 = xInvMulx((v1 - v0), iF, iS); + dr3 = xInvMulx((r1 - r0), iF, iS); + dg3 = xInvMulx((g1 - g0), iF, iS); + db3 = xInvMulx((b1 - b0), iF, iS); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; +#else + if ((y1 - y0) != 0) { + dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)); + du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0)); + dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0)); + dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0)); + dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0)); + db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0)); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; +#endif +#endif + } + } else { + //senquack - break out of final loop if nothing to be drawn (1st loop + // must always be taken to setup dx3/dx4) + if (y1 == y2) break; + + ya = y1; yb = y2; + + if (dx < 0) { + x3 = i2x(x0); x4 = i2x(x1); + u3 = i2x(u0); v3 = i2x(v0); + r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0); + + if ((y1 - y0) != 0) { + x3 += (dx3 * (y1 - y0)); + u3 += (du3 * (y1 - y0)); + v3 += (dv3 * (y1 - y0)); + r3 += (dr3 * (y1 - y0)); + g3 += (dg3 * (y1 - y0)); + b3 += (db3 * (y1 - y0)); + } + +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; +#endif +#endif + } else { + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); + + u3 = i2x(u1); v3 = i2x(v1); + r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1); +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y2 - y1) != 0) { + float finv = FloatInv(y2 - y1); + dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv); + du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv); + dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv); + dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv); + dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv); + db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } +#else + if ((y2 - y1) != 0) { + float fdiv = y2 - y1; + dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv); + du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv); + dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv); + dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv); + dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv); + db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y2 - y1) != 0) { + int iF, iS; + xInv((y2 - y1), iF, iS); + dx3 = xInvMulx((x2 - x1), iF, iS); + du3 = xInvMulx((u2 - u1), iF, iS); + dv3 = xInvMulx((v2 - v1), iF, iS); + dr3 = xInvMulx((r2 - r1), iF, iS); + dg3 = xInvMulx((g2 - g1), iF, iS); + db3 = xInvMulx((b2 - b1), iF, iS); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } +#else + if ((y2 - y1) != 0) { + dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)); + du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1)); + dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1)); + dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1)); + dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1)); + db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1)); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } +#endif +#endif + } + } + + s32 xmin, xmax, ymin, ymax; + xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; + ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + + if ((ymin - ya) > 0) { + x3 += (dx3 * (ymin - ya)); + x4 += (dx4 * (ymin - ya)); + u3 += (du3 * (ymin - ya)); + v3 += (dv3 * (ymin - ya)); + r3 += (dr3 * (ymin - ya)); + g3 += (dg3 * (ymin - ya)); + b3 += (db3 * (ymin - ya)); + ya = ymin; + } + + if (yb > ymax) yb = ymax; + + int loop1 = yb - ya; + if (loop1 <= 0) + continue; + + u16* PixelBase = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(0, ya)]; + int li=gpu_senquack.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1); + + for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, + x3 += dx3, x4 += dx4, + u3 += du3, v3 += dv3, + r3 += dr3, g3 += dg3, b3 += db3 ) + { + if (ya&li) continue; + if ((ya&pi)==pif) continue; + + u32 u4, v4; + u32 r4, g4, b4; + + xa = FixedCeilToInt(x3); + xb = FixedCeilToInt(x4); + u4 = u3; v4 = v3; + r4 = r3; g4 = g3; b4 = b3; + + fixed itmp = i2x(xa) - x3; + if (itmp != 0) { + u4 += (du4 * itmp) >> FIXED_BITS; + v4 += (dv4 * itmp) >> FIXED_BITS; + r4 += (dr4 * itmp) >> FIXED_BITS; + g4 += (dg4 * itmp) >> FIXED_BITS; + b4 += (db4 * itmp) >> FIXED_BITS; + } + + u4 += fixed_HALF; + v4 += fixed_HALF; + r4 += fixed_HALF; + g4 += fixed_HALF; + b4 += fixed_HALF; + + if ((xmin - xa) > 0) { + u4 += du4 * (xmin - xa); + v4 += dv4 * (xmin - xa); + r4 += dr4 * (xmin - xa); + g4 += dg4 * (xmin - xa); + b4 += db4 * (xmin - xa); + xa = xmin; + } + + // Set packed Gouraud color and u,v coords for inner driver + gpu_senquack.u = u4; + gpu_senquack.v = v4; + gpu_senquack.gCol = gpuPackGouraudCol(r4, g4, b4); + + if (xb > xmax) xb = xmax; + if ((xb - xa) > 0) + gpuPolySpanDriver(gpu_senquack, PixelBase + xa, (xb - xa)); + } + } + } while (++cur_pass < total_passes); +} + +#endif /* __GPU_UNAI_GPU_RASTER_POLYGON_H__ */ diff --git a/plugins/gpu_senquack/gpu_raster_sprite.h b/plugins/gpu_senquack/gpu_raster_sprite.h new file mode 100644 index 000000000..ddbad67b2 --- /dev/null +++ b/plugins/gpu_senquack/gpu_raster_sprite.h @@ -0,0 +1,170 @@ +/*************************************************************************** +* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef __GPU_UNAI_GPU_RASTER_SPRITE_H__ +#define __GPU_UNAI_GPU_RASTER_SPRITE_H__ + +/////////////////////////////////////////////////////////////////////////////// +// GPU internal sprite drawing functions + +void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver) +{ + s32 x0, x1, y0, y1; + u32 u0, v0; + + //NOTE: Must 11-bit sign-extend the whole sum here, not just packet X/Y, + // or sprites in 1st level of SkullMonkeys disappear when walking right. + // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon: + x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_senquack.DrawingOffset[0]); + y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_senquack.DrawingOffset[1]); + + u32 w = packet.U2[6] & 0x3ff; // Max width is 1023 + u32 h = packet.U2[7] & 0x1ff; // Max height is 511 + x1 = x0 + w; + y1 = y0 + h; + + s32 xmin, xmax, ymin, ymax; + xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; + ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + + u0 = packet.U1[8]; + v0 = packet.U1[9]; + + s32 temp; + temp = ymin - y0; + if (temp > 0) { y0 = ymin; v0 += temp; } + if (y1 > ymax) y1 = ymax; + if (y1 <= y0) return; + + temp = xmin - x0; + if (temp > 0) { x0 = xmin; u0 += temp; } + if (x1 > xmax) x1 = xmax; + x1 -= x0; + if (x1 <= 0) return; + + gpu_senquack.r5 = packet.U1[0] >> 3; + gpu_senquack.g5 = packet.U1[1] >> 3; + gpu_senquack.b5 = packet.U1[2] >> 3; + + u16 *Pixel = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(x0, y0)]; + const int li=gpu_senquack.ilace_mask; + const int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0); + const int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1); + unsigned int tmode = gpu_senquack.TEXT_MODE >> 5; + const u32 v0_mask = gpu_senquack.TextureWindow[3]; + u8* pTxt_base = (u8*)gpu_senquack.TBA; + + // Texture is accessed byte-wise, so adjust idx if 16bpp + if (tmode == 3) u0 <<= 1; + + for (; y0 xmax - 16 || x0 < xmin || + ((u0 | v0) & 15) || !(gpu_senquack.TextureWindow[2] & gpu_senquack.TextureWindow[3] & 8)) { + // send corner cases to general handler + packet.U4[3] = 0x00100010; + gpuDrawS(packet, gpuSpriteSpanFn<0x20>); + return; + } + + if (y0 >= ymax || y0 <= ymin - 16) + return; + if (y0 < ymin) { + h -= ymin - y0; + v0 += ymin - y0; + y0 = ymin; + } + else if (ymax - y0 < 16) + h = ymax - y0; + + draw_spr16_full(&gpu_senquack.vram[FRAME_OFFSET(x0, y0)], &gpu_senquack.TBA[FRAME_OFFSET(u0/4, v0)], gpu_senquack.CBA, h); +} +#endif // __arm__ + +void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver) +{ + s32 x0, x1, y0, y1; + + // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon: + x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_senquack.DrawingOffset[0]); + y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_senquack.DrawingOffset[1]); + + u32 w = packet.U2[4] & 0x3ff; // Max width is 1023 + u32 h = packet.U2[5] & 0x1ff; // Max height is 511 + x1 = x0 + w; + y1 = y0 + h; + + s32 xmin, xmax, ymin, ymax; + xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; + ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + + if (y0 < ymin) y0 = ymin; + if (y1 > ymax) y1 = ymax; + if (y1 <= y0) return; + + if (x0 < xmin) x0 = xmin; + if (x1 > xmax) x1 = xmax; + x1 -= x0; + if (x1 <= 0) return; + + const u16 Data = GPU_RGB16(packet.U4[0]); + u16 *Pixel = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(x0, y0)]; + const int li=gpu_senquack.ilace_mask; + const int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0); + const int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1); + + for (; y0 gmail com) * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef GPU_UNAI_H +#define GPU_UNAI_H + +#include "gpu.h" + +// Header shared between both standalone gpu_senquack (gpu.cpp) and new +// gpulib-compatible gpu_senquack (gpulib_if.cpp) +// -> Anything here should be for gpu_senquack's private use. <- + +/////////////////////////////////////////////////////////////////////////////// +// Compile Options + +//#define ENABLE_GPU_NULL_SUPPORT // Enables NullGPU support +//#define ENABLE_GPU_LOG_SUPPORT // Enables gpu logger, very slow only for windows debugging +//#define ENABLE_GPU_ARMV7 // Enables ARMv7 optimized assembly + +//Poly routine options (default is integer math and accurate division) +//#define GPU_UNAI_USE_FLOATMATH // Use float math in poly routines +//#define GPU_UNAI_USE_FLOAT_DIV_MULTINV // If GPU_UNAI_USE_FLOATMATH is defined, + // use multiply-by-inverse for division +//#define GPU_UNAI_USE_INT_DIV_MULTINV // If GPU_UNAI_USE_FLOATMATH is *not* + // defined, use old inaccurate division + + +#define GPU_INLINE static inline __attribute__((always_inline)) +#define INLINE static inline __attribute__((always_inline)) + +#define u8 uint8_t +#define s8 int8_t +#define u16 uint16_t +#define s16 int16_t +#define u32 uint32_t +#define s32 int32_t +#define s64 int64_t + +union PtrUnion +{ + u32 *U4; + s32 *S4; + u16 *U2; + s16 *S2; + u8 *U1; + s8 *S1; + void *ptr; +}; + +union GPUPacket +{ + u32 U4[16]; + s32 S4[16]; + u16 U2[32]; + s16 S2[32]; + u8 U1[64]; + s8 S1[64]; +}; + +template static inline void SwapValues(T &x, T &y) +{ + T tmp(x); x = y; y = tmp; +} + +template +static inline T Min2 (const T a, const T b) +{ + return (a +static inline T Min3 (const T a, const T b, const T c) +{ + return Min2(Min2(a,b),c); +} + +template +static inline T Max2 (const T a, const T b) +{ + return (a>b)?a:b; +} + +template +static inline T Max3 (const T a, const T b, const T c) +{ + return Max2(Max2(a,b),c); +} + + +/////////////////////////////////////////////////////////////////////////////// +// GPU Raster Macros + +// Convert 24bpp color parameter of GPU command to 16bpp (15bpp + mask bit) +#define GPU_RGB16(rgb) ((((rgb)&0xF80000)>>9)|(((rgb)&0xF800)>>6)|(((rgb)&0xF8)>>3)) + +// Sign-extend 11-bit coordinate command param +#define GPU_EXPANDSIGN(x) (((s32)(x)<<(32-11))>>(32-11)) + +// Max difference between any two X or Y primitive coordinates +#define CHKMAX_X 1024 +#define CHKMAX_Y 512 + +#define FRAME_BUFFER_SIZE (1024*512*2) +#define FRAME_WIDTH 1024 +#define FRAME_HEIGHT 512 +#define FRAME_OFFSET(x,y) (((y)<<10)+(x)) +#define FRAME_BYTE_STRIDE 2048 +#define FRAME_BYTES_PER_PIXEL 2 + +static inline s32 GPU_DIV(s32 rs, s32 rt) +{ + return rt ? (rs / rt) : (0); +} + +// 'Unsafe' version of above that doesn't check for div-by-zero +#define GPU_FAST_DIV(rs, rt) ((signed)(rs) / (signed)(rt)) + +struct gpu_senquack_t { + u32 GPU_GP1; + GPUPacket PacketBuffer; + u16 *vram; + +#ifndef USE_GPULIB + u32 GPU_GP0; + u32 tex_window; // Current texture window vals (set by GP0(E2h) cmd) + s32 PacketCount; + s32 PacketIndex; + bool fb_dirty; // Framebuffer is dirty (according to GPU) + + // Display status + // NOTE: Standalone older gpu_senquack didn't care about horiz display range + u16 DisplayArea[6]; // [0] : Start of display area (in VRAM) X + // [1] : Start of display area (in VRAM) Y + // [2] : Display mode resolution HORIZONTAL + // [3] : Display mode resolution VERTICAL + // [4] : Vertical display range (on TV) START + // [5] : Vertical display range (on TV) END + + //////////////////////////////////////////////////////////////////////////// + // Dma Transfers info + struct { + s32 px,py; + s32 x_end,y_end; + u16* pvram; + u32 *last_dma; // Last dma pointer + bool FrameToRead; // Load image in progress + bool FrameToWrite; // Store image in progress + } dma; + + //////////////////////////////////////////////////////////////////////////// + // Frameskip + struct { + int skipCount; // Frame skip (0,1,2,3...) + bool isSkip; // Skip frame (according to GPU) + bool skipFrame; // Skip this frame (according to frame skip) + bool wasSkip; // Skip frame old value (according to GPU) + bool skipGPU; // Skip GPU primitives + } frameskip; +#endif + // END of standalone gpu_senquack variables + //////////////////////////////////////////////////////////////////////////// + + u32 TextureWindowCur; // Current setting from last GP0(0xE2) cmd (raw form) + u8 TextureWindow[4]; // [0] : Texture window offset X + // [1] : Texture window offset Y + // [2] : Texture window mask X + // [3] : Texture window mask Y + + u16 DrawingArea[4]; // [0] : Drawing area top left X + // [1] : Drawing area top left Y + // [2] : Drawing area bottom right X + // [3] : Drawing area bottom right Y + + s16 DrawingOffset[2]; // [0] : Drawing offset X (signed) + // [1] : Drawing offset Y (signed) + + u16* TBA; // Ptr to current texture in VRAM + u16* CBA; // Ptr to current CLUT in VRAM + + //////////////////////////////////////////////////////////////////////////// + // Inner Loop parameters + + // 22.10 Fixed-pt texture coords, mask, scanline advance + // NOTE: U,V are no longer packed together into one u32, this proved to be + // too imprecise, leading to pixel dropouts. Example: NFS3's skybox. + u32 u, v; + u32 u_msk, v_msk; + s32 u_inc, v_inc; + + // Color for Gouraud-shaded prims + // Packed fixed-pt 8.3:8.3:8.2 rgb triplet + // layout: rrrrrrrrXXXggggggggXXXbbbbbbbbXX + // ^ bit 31 ^ bit 0 + u32 gCol; + u32 gInc; // Increment along scanline for gCol + + // Color for flat-shaded, texture-blended prims + u8 r5, g5, b5; // 5-bit light for undithered prims + u8 r8, g8, b8; // 8-bit light for dithered prims + + // Color for flat-shaded, untextured prims + u16 PixelData; // bgr555 color for untextured flat-shaded polys + + // End of inner Loop parameters + //////////////////////////////////////////////////////////////////////////// + + + u8 blit_mask; // Determines what pixels to skip when rendering. + // Only useful on low-resolution devices using + // a simple pixel-dropping downscaler for PS1 + // high-res modes. See 'pixel_skip' option. + + u8 ilace_mask; // Determines what lines to skip when rendering. + // Normally 0 when PS1 240 vertical res is in + // use and ilace_force is 0. When running in + // PS1 480 vertical res on a low-resolution + // device (320x240), will usually be set to 1 + // so odd lines are not rendered. (Unless future + // full-screen scaling option is in use ..TODO) + + bool prog_ilace_flag; // Tracks successive frames for 'prog_ilace' option + + u8 BLEND_MODE; + u8 TEXT_MODE; + u8 Masking; + + u16 PixelMSB; + + gpu_senquack_config_t config; + + u8 LightLUT[32*32]; // 5-bit lighting LUT (gpu_inner_light.h) + u32 DitherMatrix[64]; // Matrix of dither coefficients +}; + +static gpu_senquack_t gpu_senquack; + +// Global config that frontend can alter.. Values are read in GPU_init(). +// TODO: if frontend menu modifies a setting, add a function that can notify +// GPU plugin to use new setting. +gpu_senquack_config_t gpu_senquack_config_ext; + +/////////////////////////////////////////////////////////////////////////////// +// Internal inline funcs to get option status: (Allows flexibility) +static inline bool LightingEnabled() +{ + return gpu_senquack.config.lighting; +} + +static inline bool FastLightingEnabled() +{ + return gpu_senquack.config.fast_lighting; +} + +static inline bool BlendingEnabled() +{ + return gpu_senquack.config.blending; +} + +static inline bool DitheringEnabled() +{ + return gpu_senquack.config.dithering; +} + +// For now, this is just for development/experimentation purposes.. +// If modified to return true, it will allow ignoring the status register +// bit 9 setting (dither enable). It will still restrict dithering only +// to Gouraud-shaded or texture-blended polys. +static inline bool ForcedDitheringEnabled() +{ + return false; +} + +static inline bool ProgressiveInterlaceEnabled() +{ +#ifdef USE_GPULIB + // Using this old option greatly decreases quality of image. Disabled + // for now when using new gpulib, since it also adds more work in loops. + return false; +#else + return gpu_senquack.config.prog_ilace; +#endif +} + +// For now, 320x240 output resolution is assumed, using simple line-skipping +// and pixel-skipping downscaler. +// TODO: Flesh these out so they return useful values based on whether +// running on higher-res device or a resampling downscaler is enabled. +static inline bool PixelSkipEnabled() +{ + return gpu_senquack.config.pixel_skip || gpu_senquack.config.scale_hires; +} + +static inline bool LineSkipEnabled() +{ + return true; +} + +#endif // GPU_UNAI_H diff --git a/plugins/gpu_senquack/gpulib_if.cpp b/plugins/gpu_senquack/gpulib_if.cpp new file mode 100644 index 000000000..c8452a3d0 --- /dev/null +++ b/plugins/gpu_senquack/gpulib_if.cpp @@ -0,0 +1,642 @@ +/*************************************************************************** +* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* Copyright (C) 2011 notaz * +* Copyright (C) 2016 Senquack (dansilsby gmail com) * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#include +#include +#include +#include +#include "../gpulib/gpu.h" + +//#include "port.h" +#include "gpu_senquack.h" + +// GPU fixed point math +#include "gpu_fixedpoint.h" + +// Inner loop driver instantiation file +#include "gpu_inner.h" + +// GPU internal image drawing functions +#include "gpu_raster_image.h" + +// GPU internal line drawing functions +#include "gpu_raster_line.h" + +// GPU internal polygon drawing functions +#include "gpu_raster_polygon.h" + +// GPU internal sprite drawing functions +#include "gpu_raster_sprite.h" + +// GPU command buffer execution/store +#include "gpu_command.h" + +///////////////////////////////////////////////////////////////////////////// + +int renderer_init(void) +{ + memset((void*)&gpu_senquack, 0, sizeof(gpu_senquack)); + gpu_senquack.vram = (u16*)gpu.vram; + + // Original standalone gpu_senquack initialized TextureWindow[]. I added the + // same behavior here, since it seems unsafe to leave [2],[3] unset when + // using HLE and Rearmed gpu_neon sets this similarly on init. -senquack + gpu_senquack.TextureWindow[0] = 0; + gpu_senquack.TextureWindow[1] = 0; + gpu_senquack.TextureWindow[2] = 255; + gpu_senquack.TextureWindow[3] = 255; + //senquack - new vars must be updated whenever texture window is changed: + // (used for polygon-drawing in gpu_inner.h, gpu_raster_polygon.h) + const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 + gpu_senquack.u_msk = (((u32)gpu_senquack.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_senquack.v_msk = (((u32)gpu_senquack.TextureWindow[3]) << fb) | ((1 << fb) - 1); + + // Configuration options + gpu_senquack.config = gpu_senquack_config_ext; + //senquack - disabled, not sure this is needed and would require modifying + // sprite-span functions, perhaps unnecessarily. No Abe Oddysey hack was + // present in latest PCSX4ALL sources we were using. + //gpu_senquack.config.enableAbbeyHack = gpu_senquack_config_ext.abe_hack; + gpu_senquack.ilace_mask = gpu_senquack.config.ilace_force; + +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + // s_invTable + for(int i=1;i<=(1<>1); +#else + v *= double(0x80000000); +#endif + s_invTable[i-1]=s32(v); + } +#endif + + SetupLightLUT(); + SetupDitheringConstants(); + + return 0; +} + +void renderer_finish(void) +{ +} + +void renderer_notify_res_change(void) +{ + if (PixelSkipEnabled()) { + // Set blit_mask for high horizontal resolutions. This allows skipping + // rendering pixels that would never get displayed on low-resolution + // platforms that use simple pixel-dropping scaler. + + switch (gpu.screen.hres) + { + case 512: gpu_senquack.blit_mask = 0xa4; break; // GPU_BlitWWSWWSWS + case 640: gpu_senquack.blit_mask = 0xaa; break; // GPU_BlitWS + default: gpu_senquack.blit_mask = 0; break; + } + } else { + gpu_senquack.blit_mask = 0; + } + + if (LineSkipEnabled()) { + // Set rendering line-skip (only render every other line in high-res + // 480 vertical mode, or, optionally, force it for all video modes) + + if (gpu.screen.vres == 480) { + if (gpu_senquack.config.ilace_force) { + gpu_senquack.ilace_mask = 3; // Only need 1/4 of lines + } else { + gpu_senquack.ilace_mask = 1; // Only need 1/2 of lines + } + } else { + // Vert resolution changed from 480 to lower one + gpu_senquack.ilace_mask = gpu_senquack.config.ilace_force; + } + } else { + gpu_senquack.ilace_mask = 0; + } + + /* + printf("res change hres: %d vres: %d depth: %d ilace_mask: %d\n", + gpu.screen.hres, gpu.screen.vres, gpu.status.rgb24 ? 24 : 15, + gpu_senquack.ilace_mask); + */ +} + +#ifdef USE_GPULIB +// Handles GP0 draw settings commands 0xE1...0xE6 +static void gpuGP0Cmd_0xEx(gpu_senquack_t &gpu_senquack, u32 cmd_word) +{ + // Assume incoming GP0 command is 0xE1..0xE6, convert to 1..6 + u8 num = (cmd_word >> 24) & 7; + gpu.ex_regs[num] = cmd_word; // Update gpulib register + switch (num) { + case 1: { + // GP0(E1h) - Draw Mode setting (aka "Texpage") + u32 cur_texpage = gpu_senquack.GPU_GP1 & 0x7FF; + u32 new_texpage = cmd_word & 0x7FF; + if (cur_texpage != new_texpage) { + gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 & ~0x7FF) | new_texpage; + gpuSetTexture(gpu_senquack.GPU_GP1); + } + } break; + + case 2: { + // GP0(E2h) - Texture Window setting + if (cmd_word != gpu_senquack.TextureWindowCur) { + static const u8 TextureMask[32] = { + 255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7, + 127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7 + }; + gpu_senquack.TextureWindowCur = cmd_word; + gpu_senquack.TextureWindow[0] = ((cmd_word >> 10) & 0x1F) << 3; + gpu_senquack.TextureWindow[1] = ((cmd_word >> 15) & 0x1F) << 3; + gpu_senquack.TextureWindow[2] = TextureMask[(cmd_word >> 0) & 0x1F]; + gpu_senquack.TextureWindow[3] = TextureMask[(cmd_word >> 5) & 0x1F]; + gpu_senquack.TextureWindow[0] &= ~gpu_senquack.TextureWindow[2]; + gpu_senquack.TextureWindow[1] &= ~gpu_senquack.TextureWindow[3]; + + // Inner loop vars must be updated whenever texture window is changed: + const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 + gpu_senquack.u_msk = (((u32)gpu_senquack.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_senquack.v_msk = (((u32)gpu_senquack.TextureWindow[3]) << fb) | ((1 << fb) - 1); + + gpuSetTexture(gpu_senquack.GPU_GP1); + } + } break; + + case 3: { + // GP0(E3h) - Set Drawing Area top left (X1,Y1) + gpu_senquack.DrawingArea[0] = cmd_word & 0x3FF; + gpu_senquack.DrawingArea[1] = (cmd_word >> 10) & 0x3FF; + } break; + + case 4: { + // GP0(E4h) - Set Drawing Area bottom right (X2,Y2) + gpu_senquack.DrawingArea[2] = (cmd_word & 0x3FF) + 1; + gpu_senquack.DrawingArea[3] = ((cmd_word >> 10) & 0x3FF) + 1; + } break; + + case 5: { + // GP0(E5h) - Set Drawing Offset (X,Y) + gpu_senquack.DrawingOffset[0] = ((s32)cmd_word<<(32-11))>>(32-11); + gpu_senquack.DrawingOffset[1] = ((s32)cmd_word<<(32-22))>>(32-11); + } break; + + case 6: { + // GP0(E6h) - Mask Bit Setting + gpu_senquack.Masking = (cmd_word & 0x2) << 1; + gpu_senquack.PixelMSB = (cmd_word & 0x1) << 8; + } break; + } +} +#endif + +extern const unsigned char cmd_lengths[256]; + +int do_cmd_list(u32 *list, int list_len, int *last_cmd) +{ + u32 cmd = 0, len, i; + u32 *list_start = list; + u32 *list_end = list + list_len; + + //TODO: set ilace_mask when resolution changes instead of every time, + // eliminate #ifdef below. + gpu_senquack.ilace_mask = gpu_senquack.config.ilace_force; + +#ifdef HAVE_PRE_ARMV7 /* XXX */ + gpu_senquack.ilace_mask |= gpu.status.interlace; +#endif + if (gpu_senquack.config.scale_hires) { + gpu_senquack.ilace_mask |= gpu.status.interlace; + } + + for (; list < list_end; list += 1 + len) + { + cmd = *list >> 24; + len = cmd_lengths[cmd]; + if (list + 1 + len > list_end) { + cmd = -1; + break; + } + + #define PRIM cmd + gpu_senquack.PacketBuffer.U4[0] = list[0]; + for (i = 1; i <= len; i++) + gpu_senquack.PacketBuffer.U4[i] = list[i]; + + PtrUnion packet = { .ptr = (void*)&gpu_senquack.PacketBuffer }; + + switch (cmd) + { + case 0x02: + gpuClearImage(packet); + break; + + case 0x20: + case 0x21: + case 0x22: + case 0x23: { // Monochrome 3-pt poly + PP driver = gpuPolySpanDrivers[ + (gpu_senquack.blit_mask?1024:0) | + Blending_Mode | + gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB + ]; + gpuDrawPolyF(packet, driver, false); + } break; + + case 0x24: + case 0x25: + case 0x26: + case 0x27: { // Textured 3-pt poly + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + gpuSetTexture(gpu_senquack.PacketBuffer.U4[4] >> 16); + + u32 driver_idx = + (gpu_senquack.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_senquack.TEXT_MODE | + gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB; + + if (!FastLightingEnabled()) { + driver_idx |= Lighting; + } else { + if (!((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F))) + driver_idx |= Lighting; + } + + PP driver = gpuPolySpanDrivers[driver_idx]; + gpuDrawPolyFT(packet, driver, false); + } break; + + case 0x28: + case 0x29: + case 0x2A: + case 0x2B: { // Monochrome 4-pt poly + PP driver = gpuPolySpanDrivers[ + (gpu_senquack.blit_mask?1024:0) | + Blending_Mode | + gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB + ]; + gpuDrawPolyF(packet, driver, true); // is_quad = true + } break; + + case 0x2C: + case 0x2D: + case 0x2E: + case 0x2F: { // Textured 4-pt poly + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + gpuSetTexture(gpu_senquack.PacketBuffer.U4[4] >> 16); + + u32 driver_idx = + (gpu_senquack.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_senquack.TEXT_MODE | + gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB; + + if (!FastLightingEnabled()) { + driver_idx |= Lighting; + } else { + if (!((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F))) + driver_idx |= Lighting; + } + + PP driver = gpuPolySpanDrivers[driver_idx]; + gpuDrawPolyFT(packet, driver, true); // is_quad = true + } break; + + case 0x30: + case 0x31: + case 0x32: + case 0x33: { // Gouraud-shaded 3-pt poly + //NOTE: The '129' here is CF_GOURAUD | CF_LIGHT, however + // this is an untextured poly, so CF_LIGHT (texture blend) + // shouldn't apply. Until the original array of template + // instantiation ptrs is fixed, we're stuck with this. (TODO) + PP driver = gpuPolySpanDrivers[ + (gpu_senquack.blit_mask?1024:0) | + Dithering | + Blending_Mode | + gpu_senquack.Masking | Blending | 129 | gpu_senquack.PixelMSB + ]; + gpuDrawPolyG(packet, driver, false); + } break; + + case 0x34: + case 0x35: + case 0x36: + case 0x37: { // Gouraud-shaded, textured 3-pt poly + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + gpuSetTexture (gpu_senquack.PacketBuffer.U4[5] >> 16); + PP driver = gpuPolySpanDrivers[ + (gpu_senquack.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_senquack.TEXT_MODE | + gpu_senquack.Masking | Blending | ((Lighting)?129:0) | gpu_senquack.PixelMSB + ]; + gpuDrawPolyGT(packet, driver, false); + } break; + + case 0x38: + case 0x39: + case 0x3A: + case 0x3B: { // Gouraud-shaded 4-pt poly + // See notes regarding '129' for 0x30..0x33 further above -senquack + PP driver = gpuPolySpanDrivers[ + (gpu_senquack.blit_mask?1024:0) | + Dithering | + Blending_Mode | + gpu_senquack.Masking | Blending | 129 | gpu_senquack.PixelMSB + ]; + gpuDrawPolyG(packet, driver, true); // is_quad = true + } break; + + case 0x3C: + case 0x3D: + case 0x3E: + case 0x3F: { // Gouraud-shaded, textured 4-pt poly + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + gpuSetTexture (gpu_senquack.PacketBuffer.U4[5] >> 16); + PP driver = gpuPolySpanDrivers[ + (gpu_senquack.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_senquack.TEXT_MODE | + gpu_senquack.Masking | Blending | ((Lighting)?129:0) | gpu_senquack.PixelMSB + ]; + gpuDrawPolyGT(packet, driver, true); // is_quad = true + } break; + + case 0x40: + case 0x41: + case 0x42: + case 0x43: { // Monochrome line + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineF(packet, driver); + } break; + + case 0x48 ... 0x4F: { // Monochrome line strip + u32 num_vertexes = 1; + u32 *list_position = &(list[2]); + + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineF(packet, driver); + + while(1) + { + gpu_senquack.PacketBuffer.U4[1] = gpu_senquack.PacketBuffer.U4[2]; + gpu_senquack.PacketBuffer.U4[2] = *list_position++; + gpuDrawLineF(packet, driver); + + num_vertexes++; + if(list_position >= list_end) { + cmd = -1; + goto breakloop; + } + if((*list_position & 0xf000f000) == 0x50005000) + break; + } + + len += (num_vertexes - 2); + } break; + + case 0x50: + case 0x51: + case 0x52: + case 0x53: { // Gouraud-shaded line + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + // Index MSB selects Gouraud-shaded PixelSpanDriver: + driver_idx |= (1 << 5); + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineG(packet, driver); + } break; + + case 0x58 ... 0x5F: { // Gouraud-shaded line strip + u32 num_vertexes = 1; + u32 *list_position = &(list[2]); + + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + // Index MSB selects Gouraud-shaded PixelSpanDriver: + driver_idx |= (1 << 5); + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineG(packet, driver); + + while(1) + { + gpu_senquack.PacketBuffer.U4[0] = gpu_senquack.PacketBuffer.U4[2]; + gpu_senquack.PacketBuffer.U4[1] = gpu_senquack.PacketBuffer.U4[3]; + gpu_senquack.PacketBuffer.U4[2] = *list_position++; + gpu_senquack.PacketBuffer.U4[3] = *list_position++; + gpuDrawLineG(packet, driver); + + num_vertexes++; + if(list_position >= list_end) { + cmd = -1; + goto breakloop; + } + if((*list_position & 0xf000f000) == 0x50005000) + break; + } + + len += (num_vertexes - 2) * 2; + } break; + + case 0x60: + case 0x61: + case 0x62: + case 0x63: { // Monochrome rectangle (variable size) + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + } break; + + case 0x64: + case 0x65: + case 0x66: + case 0x67: { // Textured rectangle (variable size) + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + u32 driver_idx = Blending_Mode | gpu_senquack.TEXT_MODE | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>1); + + //senquack - Only color 808080h-878787h allows skipping lighting calculation: + // This fixes Silent Hill running animation on loading screens: + // (On PSX, color values 0x00-0x7F darken the source texture's color, + // 0x81-FF lighten textures (ultimately clamped to 0x1F), + // 0x80 leaves source texture color unchanged, HOWEVER, + // gpu_senquack uses a simple lighting LUT whereby only the upper + // 5 bits of an 8-bit color are used, so 0x80-0x87 all behave as + // 0x80. + // + // NOTE: I've changed all textured sprite draw commands here and + // elsewhere to use proper behavior, but left poly commands + // alone, I don't want to slow rendering down too much. (TODO) + //if ((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((gpu_senquack.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver); + } break; + + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: { // Monochrome rectangle (1x1 dot) + gpu_senquack.PacketBuffer.U4[2] = 0x00010001; + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + } break; + + case 0x70: + case 0x71: + case 0x72: + case 0x73: { // Monochrome rectangle (8x8) + gpu_senquack.PacketBuffer.U4[2] = 0x00080008; + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + } break; + + case 0x74: + case 0x75: + case 0x76: + case 0x77: { // Textured rectangle (8x8) + gpu_senquack.PacketBuffer.U4[3] = 0x00080008; + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + u32 driver_idx = Blending_Mode | gpu_senquack.TEXT_MODE | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>1); + + //senquack - Only color 808080h-878787h allows skipping lighting calculation: + //if ((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((gpu_senquack.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver); + } break; + + case 0x78: + case 0x79: + case 0x7A: + case 0x7B: { // Monochrome rectangle (16x16) + gpu_senquack.PacketBuffer.U4[2] = 0x00100010; + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + } break; + + case 0x7C: + case 0x7D: +#ifdef __arm__ + if ((gpu_senquack.GPU_GP1 & 0x180) == 0 && (gpu_senquack.Masking | gpu_senquack.PixelMSB) == 0) + { + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + gpuDrawS16(packet); + break; + } + // fallthrough +#endif + case 0x7E: + case 0x7F: { // Textured rectangle (16x16) + gpu_senquack.PacketBuffer.U4[3] = 0x00100010; + gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + u32 driver_idx = Blending_Mode | gpu_senquack.TEXT_MODE | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>1); + //senquack - Only color 808080h-878787h allows skipping lighting calculation: + //if ((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((gpu_senquack.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver); + } break; + + case 0x80: // vid -> vid + gpuMoveImage(packet); + break; + +#ifdef TEST + case 0xA0: // sys -> vid + { + u32 load_width = list[2] & 0xffff; + u32 load_height = list[2] >> 16; + u32 load_size = load_width * load_height; + + len += load_size / 2; + } break; + + case 0xC0: + break; +#else + case 0xA0: // sys ->vid + case 0xC0: // vid -> sys + // Handled by gpulib + goto breakloop; +#endif + case 0xE1 ... 0xE6: { // Draw settings + gpuGP0Cmd_0xEx(gpu_senquack, gpu_senquack.PacketBuffer.U4[0]); + } break; + } + } + +breakloop: + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= gpu_senquack.GPU_GP1 & 0x1ff; + + *last_cmd = cmd; + return list - list_start; +} + +void renderer_sync_ecmds(uint32_t *ecmds) +{ + int dummy; + do_cmd_list(&ecmds[1], 6, &dummy); +} + +void renderer_update_caches(int x, int y, int w, int h) +{ +} + +void renderer_flush_queues(void) +{ +} + +void renderer_set_interlace(int enable, int is_odd) +{ +} + +#include "../../frontend/plugin_lib.h" +// Handle any gpulib settings applicable to gpu_senquack: +void renderer_set_config(const struct rearmed_cbs *cbs) +{ + gpu_senquack.vram = (u16*)gpu.vram; + gpu_senquack.config.ilace_force = cbs->gpu_senquack.ilace_force; + gpu_senquack.config.pixel_skip = cbs->gpu_senquack.pixel_skip; + gpu_senquack.config.lighting = cbs->gpu_senquack.lighting; + gpu_senquack.config.fast_lighting = cbs->gpu_senquack.fast_lighting; + gpu_senquack.config.blending = cbs->gpu_senquack.blending; + gpu_senquack.config.dithering = cbs->gpu_senquack.dithering; + gpu_senquack.config.scale_hires = cbs->gpu_senquack.scale_hires; +} + +// vim:shiftwidth=2:expandtab diff --git a/plugins/gpu_senquack/port.h b/plugins/gpu_senquack/port.h new file mode 100644 index 000000000..0a731f8e7 --- /dev/null +++ b/plugins/gpu_senquack/port.h @@ -0,0 +1,41 @@ +#ifndef __GPU_UNAI_GPU_PORT_H__ +#define __GPU_UNAI_GPU_PORT_H__ + +#include +#include + +#define INLINE static inline + +#define GPU_init GPUinit +#define GPU_shutdown GPUshutdown +//#define GPU_freeze GPUfreeze +#define GPU_writeDataMem GPUwriteDataMem +#define GPU_dmaChain GPUdmaChain +#define GPU_writeData GPUwriteData +#define GPU_readDataMem GPUreadDataMem +#define GPU_readData GPUreadData +#define GPU_readStatus GPUreadStatus +#define GPU_writeStatus GPUwriteStatus +#define GPU_updateLace GPUupdateLace + +extern "C" { + +#define u32 unsigned int +#define s32 signed int + +bool GPUinit(void); +void GPUshutdown(void); +void GPUwriteDataMem(u32* dmaAddress, s32 dmaCount); +long GPUdmaChain(u32* baseAddr, u32 dmaVAddr); +void GPUwriteData(u32 data); +void GPUreadDataMem(u32* dmaAddress, s32 dmaCount); +u32 GPUreadData(void); +u32 GPUreadStatus(void); +void GPUwriteStatus(u32 data); + +#undef u32 +#undef s32 + +} + +#endif /* __GPU_UNAI_GPU_PORT_H__ */ diff --git a/plugins/gpu_senquack/profiler.h b/plugins/gpu_senquack/profiler.h new file mode 100644 index 000000000..a23ee3853 --- /dev/null +++ b/plugins/gpu_senquack/profiler.h @@ -0,0 +1,9 @@ +#ifndef __GPU_UNAI_GPU_PROFILER_H__ +#define __GPU_UNAI_GPU_PROFILER_H__ + +#define pcsx4all_prof_pause(...) +#define pcsx4all_prof_start_with_pause(...) +#define pcsx4all_prof_end_with_resume(...) +#define pcsx4all_prof_resume(...) + +#endif /* __GPU_UNAI_GPU_PROFILER_H__ */ From 2da09dae586e9677b0ff3bfc1ce746b9ea6c9479 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Fri, 29 Oct 2021 20:09:41 +0000 Subject: [PATCH 038/597] Move CdlSetLoc to CdrInterrupt and return invalid arg error (#234) So far, i could only find Simple 1500 Series Vol. 31 - The Sound Novel to be affected by this. In Duckstation, this was causing extra delays without it. However in our case, this doesn't seem to be the case and i couldn't find much find about it. --- libpcsxcore/cdrom.c | 55 ++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 26f68ac36..016b9939c 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -570,6 +570,8 @@ void cdrInterrupt() { int error = 0; int delay; unsigned int seekTime = 0; + u8 set_loc[3]; + int i; // Reschedule IRQ if (cdr.Stat) { @@ -603,6 +605,31 @@ void cdrInterrupt() { break; case CdlSetloc: + CDR_LOG("CDROM setloc command (%02X, %02X, %02X)\n", cdr.Param[0], cdr.Param[1], cdr.Param[2]); + + // MM must be BCD, SS must be BCD and <0x60, FF must be BCD and <0x75 + if (((cdr.Param[0] & 0x0F) > 0x09) || (cdr.Param[0] > 0x99) || ((cdr.Param[1] & 0x0F) > 0x09) || (cdr.Param[1] >= 0x60) || ((cdr.Param[2] & 0x0F) > 0x09) || (cdr.Param[2] >= 0x75)) + { + CDR_LOG("Invalid/out of range seek to %02X:%02X:%02X\n", cdr.Param[0], cdr.Param[1], cdr.Param[2]); + error = ERROR_INVALIDARG; + goto set_error; + } + else + { + for (i = 0; i < 3; i++) + { + set_loc[i] = btoi(cdr.Param[i]); + } + + i = msf2sec(cdr.SetSectorPlay); + i = abs(i - msf2sec(set_loc)); + if (i > 16) + cdr.Seeked = SEEK_PENDING; + + memcpy(cdr.SetSector, set_loc, 3); + cdr.SetSector[3] = 0; + cdr.SetlocPending = 1; + } break; do_CdlPlay: @@ -1289,9 +1316,6 @@ unsigned char cdrRead1(void) { } void cdrWrite1(unsigned char rt) { - u8 set_loc[3]; - int i; - CDR_LOG_IO("cdr w1: %02x\n", rt); switch (cdr.Ctrl & 3) { @@ -1325,31 +1349,6 @@ void cdrWrite1(unsigned char rt) { AddIrqQueue(cdr.Cmd, 0x800); switch (cdr.Cmd) { - case CdlSetloc: - CDR_LOG("CDROM setloc command (%02X, %02X, %02X)\n", cdr.Param[0], cdr.Param[1], cdr.Param[2]); - - // MM must be BCD, SS must be BCD and <0x60, FF must be BCD and <0x75 - if (((cdr.Param[0] & 0x0F) > 0x09) || (cdr.Param[0] > 0x99) || ((cdr.Param[1] & 0x0F) > 0x09) || (cdr.Param[1] >= 0x60) || ((cdr.Param[2] & 0x0F) > 0x09) || (cdr.Param[2] >= 0x75)) - { - CDR_LOG("Invalid/out of range seek to %02X:%02X:%02X\n", cdr.Param[0], cdr.Param[1], cdr.Param[2]); - } - else - { - for (i = 0; i < 3; i++) - { - set_loc[i] = btoi(cdr.Param[i]); - } - - i = msf2sec(cdr.SetSectorPlay); - i = abs(i - msf2sec(set_loc)); - if (i > 16) - cdr.Seeked = SEEK_PENDING; - - memcpy(cdr.SetSector, set_loc, 3); - cdr.SetSector[3] = 0; - cdr.SetlocPending = 1; - } - break; case CdlReadN: case CdlReadS: From 981752f84063a77f456e494dc298808ec14611a9 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Fri, 29 Oct 2021 20:22:25 +0000 Subject: [PATCH 039/597] Remove CdlGetlocP hack. (#235) This was added back in 2013 or so in PCSX Rearmed and according to some tests against Tomb Raider 1 (which is affected by the GetLocP code), it works properly without this hack. So let's just remove it as we are now doing it properly. (Besides, Duckstation and mednafen don't have this hack) --- libpcsxcore/cdrom.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 016b9939c..753c51262 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -841,9 +841,6 @@ void cdrInterrupt() { case CdlGetlocP: SetResultSize(8); memcpy(&cdr.Result, &cdr.subq, 8); - - if (!cdr.Play && !cdr.Reading) - cdr.Result[1] = 0; // HACK? break; case CdlReadT: // SetSession? From 8478ca412d4a76e311977d5c220810c498f3938b Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 29 Oct 2021 23:57:05 +0300 Subject: [PATCH 040/597] try to migrate from Travis CI to GitHub Actions --- .github/workflows/ci-linux.yml | 15 +++++++++++++++ .travis.yml | 8 -------- README.md | 2 +- 3 files changed, 16 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/ci-linux.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml new file mode 100644 index 000000000..511aa5661 --- /dev/null +++ b/.github/workflows/ci-linux.yml @@ -0,0 +1,15 @@ +name: CI (Linux) +on: [push, pull_request] +jobs: + build-linux: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Install dependencies + run: | + sudo apt-get update -qq + sudo apt-get install -y libsdl1.2-dev libasound2-dev libpng-dev libz-dev + - name: configure + run: ./configure + - name: make + run: make diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 7c4eafbd0..000000000 --- a/.travis.yml +++ /dev/null @@ -1,8 +0,0 @@ -language: cpp -compiler: - - gcc - - clang -before_install: - - sudo apt-get update -qq - - sudo apt-get install -y libsdl1.2-dev libasound2-dev libpng-dev libz-dev -script: ./configure && make diff --git a/README.md b/README.md index 996441088..be7093c3c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ PCSX-ReARMed - yet another PCSX fork ==================================== -[![Build Status](https://travis-ci.org/notaz/pcsx_rearmed.svg?branch=master)](https://travis-ci.org/notaz/pcsx_rearmed) +![CI (Linux)](https://github.com/notaz/pcsx_rearmed/workflows/CI%20(Linux)/badge.svg) *see [readme.txt](readme.txt) for more complete documentation* From 47a8e01c397c8852e00c202e3f2815bb7b5a364a Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 30 Oct 2021 00:08:27 +0300 Subject: [PATCH 041/597] ci-linux.yml: also need submodules --- .github/workflows/ci-linux.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml index 511aa5661..438b0552e 100644 --- a/.github/workflows/ci-linux.yml +++ b/.github/workflows/ci-linux.yml @@ -5,6 +5,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 + with: + submodules: true - name: Install dependencies run: | sudo apt-get update -qq From 857275a93c8e222bc800999f08d4b42e327f4c1b Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 31 Oct 2021 14:40:30 +0200 Subject: [PATCH 042/597] assorted warning fixes --- Makefile | 6 ++++++ frontend/menu.c | 11 ++++++----- libpcsxcore/psxbios.c | 6 +++--- libpcsxcore/psxhle.c | 2 +- libpcsxcore/psxhle.h | 2 +- plugins/dfxvideo/gpulib_if.c | 4 ++++ plugins/gpu-gles/gpuDraw.c | 8 ++++---- 7 files changed, 25 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 0998f586e..e3b909980 100644 --- a/Makefile +++ b/Makefile @@ -290,6 +290,12 @@ endif .PHONY: all clean target_ plugins_ clean_plugins FORCE +ifneq "$(PLATFORM)" "pandora" +ifdef CPATH +$(warning warning: CPATH is defined) +endif +endif + # ----------- release ----------- VER ?= $(shell git describe HEAD) diff --git a/frontend/menu.c b/frontend/menu.c index 05dde4617..37956ffe8 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1256,10 +1256,11 @@ static const char h_scaler[] = "int. 2x - scales w. or h. 2x if it fits on s "int. 4:3 - uses integer if possible, else fractional"; static const char h_cscaler[] = "Displays the scaler layer, you can resize it\n" "using d-pad or move it using R+d-pad"; -static const char h_overlay[] = "Overlay provides hardware accelerated scaling"; static const char h_soft_filter[] = "Works only if game uses low resolution modes"; -static const char h_scanline_l[] = "Scanline brightness, 0-100%"; static const char h_gamma[] = "Gamma/brightness adjustment (default 100)"; +#ifdef __ARM_NEON__ +static const char h_scanline_l[] = "Scanline brightness, 0-100%"; +#endif static int menu_loop_cscaler(int id, int keys) { @@ -1579,10 +1580,10 @@ static const char h_cfg_fl[] = "Frame Limiter keeps the game from running to static const char h_cfg_xa[] = "Disables XA sound, which can sometimes improve performance"; static const char h_cfg_cdda[] = "Disable CD Audio for a performance boost\n" "(proper .cue/.bin dump is needed otherwise)"; -static const char h_cfg_sio[] = "You should not need this, breaks games"; +//static const char h_cfg_sio[] = "You should not need this, breaks games"; static const char h_cfg_spuirq[] = "Compatibility tweak; should be left off"; -static const char h_cfg_rcnt1[] = "Parasite Eve 2, Vandal Hearts 1/2 Fix\n" - "(timing hack, breaks other games)"; +//static const char h_cfg_rcnt1[] = "Parasite Eve 2, Vandal Hearts 1/2 Fix\n" +// "(timing hack, breaks other games)"; static const char h_cfg_rcnt2[] = "InuYasha Sengoku Battle Fix\n" "(timing hack, breaks other games)"; static const char h_cfg_nodrc[] = "Disable dynamic recompiler and use interpreter\n" diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index ed95b06f6..a0588be99 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -2032,10 +2032,10 @@ void psxBios_UnDeliverEvent() { // 0x20 char ffile[64], *pfile; int nfile; -static void buopen(int mcd, u8 *ptr, u8 *cfg) +static void buopen(int mcd, char *ptr, char *cfg) { int i; - u8 *fptr = ptr; + char *fptr = ptr; strcpy(FDesc[1 + mcd].name, Ra0+5); FDesc[1 + mcd].offset = 0; @@ -2066,7 +2066,7 @@ static void buopen(int mcd, u8 *ptr, u8 *cfg) fptr[6] = 0x00; fptr[7] = 0x00; strcpy(fptr+0xa, FDesc[1 + mcd].name); - pptr = fptr2 = fptr; + pptr = fptr2 = (u8 *)fptr; for(j=2; j<=nblk; j++) { int k; for(i++; i<16; i++) { diff --git a/libpcsxcore/psxhle.c b/libpcsxcore/psxhle.c index 52227a40d..064d40115 100644 --- a/libpcsxcore/psxhle.c +++ b/libpcsxcore/psxhle.c @@ -89,7 +89,7 @@ static void hleExecRet() { psxRegs.pc = psxRegs.GPR.n.ra; } -const void (*psxHLEt[8])() = { +void (* const psxHLEt[])() = { hleDummy, hleA0, hleB0, hleC0, hleBootstrap, hleExecRet, hleDummy, hleDummy diff --git a/libpcsxcore/psxhle.h b/libpcsxcore/psxhle.h index 0529c3898..04126345e 100644 --- a/libpcsxcore/psxhle.h +++ b/libpcsxcore/psxhle.h @@ -28,7 +28,7 @@ extern "C" { #include "r3000a.h" #include "plugins.h" -extern const void (*psxHLEt[8])(); +extern void (* const psxHLEt[8])(); #ifdef __cplusplus } diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index 01b8dde2e..ff0c96c70 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -260,6 +260,10 @@ unsigned short sSetMask = 0; unsigned long lSetMask = 0; long lLowerpart; +#if defined(__GNUC__) && __GNUC__ >= 6 +#pragma GCC diagnostic ignored "-Wmisleading-indentation" +#endif + #include "soft.c" #include "prim.c" diff --git a/plugins/gpu-gles/gpuDraw.c b/plugins/gpu-gles/gpuDraw.c index 34d1c3bde..c49eac5ff 100644 --- a/plugins/gpu-gles/gpuDraw.c +++ b/plugins/gpu-gles/gpuDraw.c @@ -291,7 +291,7 @@ bool TestEGLError(const char* pszLocation) EGLint iErr = eglGetError(); if (iErr != EGL_SUCCESS) { - printf("%s failed (0x%x).\n", pszLocation, iErr); + printf("%s failed (0x%x).\n", pszLocation, (int)iErr); return FALSE; } @@ -572,7 +572,7 @@ void GLcleanup() // real psx polygon coord mapping right... the following // works not to bad with many games, though -__inline BOOL CheckCoord4() +static __inline BOOL CheckCoord4() { if(lx0<0) { @@ -638,7 +638,7 @@ __inline BOOL CheckCoord4() return FALSE; } -__inline BOOL CheckCoord3() +static __inline BOOL CheckCoord3() { if(lx0<0) { @@ -675,7 +675,7 @@ __inline BOOL CheckCoord3() } -__inline BOOL CheckCoord2() +static __inline BOOL CheckCoord2() { if(lx0<0) { From a4da039c0c2f0731057b26398b6729819bbdaaeb Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 31 Oct 2021 17:23:53 +0200 Subject: [PATCH 043/597] update libpicofe --- .gitmodules | 4 ++-- Makefile | 1 + frontend/libpicofe | 2 +- frontend/plat_omap.c | 2 +- frontend/plat_pollux.c | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.gitmodules b/.gitmodules index 5f7f360cd..691f83092 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,9 @@ [submodule "libpicofe"] path = frontend/libpicofe - url = git://notaz.gp2x.de/~notaz/libpicofe.git + url = https://github.com/notaz/libpicofe.git [submodule "warm"] path = frontend/warm - url = git://notaz.gp2x.de/~notaz/warm.git + url = https://github.com/notaz/warm.git [submodule "libchdr"] path = libchdr url = https://github.com/rtissera/libchdr.git diff --git a/Makefile b/Makefile index e3b909980..f8d1dc17d 100644 --- a/Makefile +++ b/Makefile @@ -222,6 +222,7 @@ ifeq "$(USE_PLUGIN_LIB)" "1" OBJS += frontend/plugin_lib.o OBJS += frontend/libpicofe/linux/plat.o OBJS += frontend/libpicofe/readpng.o frontend/libpicofe/fonts.o +frontend/libpicofe/linux/plat.o: CFLAGS += -DNO_HOME_DIR ifeq "$(HAVE_NEON)" "1" OBJS += frontend/libpicofe/arm/neon_scale2x.o OBJS += frontend/libpicofe/arm/neon_eagle2x.o diff --git a/frontend/libpicofe b/frontend/libpicofe index 21604a047..c668921a4 160000 --- a/frontend/libpicofe +++ b/frontend/libpicofe @@ -1 +1 @@ -Subproject commit 21604a047941b8fe81d381ede0371c75da964afd +Subproject commit c668921a45b7a7f5f548d0e09836f143f56b4ae0 diff --git a/frontend/plat_omap.c b/frontend/plat_omap.c index f25f31cc2..c4bff3131 100644 --- a/frontend/plat_omap.c +++ b/frontend/plat_omap.c @@ -223,7 +223,7 @@ void plat_omap_init(void) exit(1); } - g_menuscreen_w = w; + g_menuscreen_w = g_menuscreen_pp = w; g_menuscreen_h = h; g_menuscreen_ptr = vout_fbdev_flip(main_fb); pl_rearmed_cbs.screen_w = w; diff --git a/frontend/plat_pollux.c b/frontend/plat_pollux.c index 33e94178e..18b805319 100644 --- a/frontend/plat_pollux.c +++ b/frontend/plat_pollux.c @@ -555,7 +555,7 @@ void plat_init(void) memset(fb_vaddrs[0], 0, FB_VRAM_SIZE); pollux_changemode(16, 0); - g_menuscreen_w = 320; + g_menuscreen_w = g_menuscreen_pp = 320; g_menuscreen_h = 240; g_menuscreen_ptr = fb_flip(); From d63486396d143aa254c4b964995d36edff671476 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 31 Oct 2021 18:20:57 +0200 Subject: [PATCH 044/597] some cleanup to reduce confusion --- libpcsxcore/new_dynarec/emu_if.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 1733a2a52..0569cebd1 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -25,7 +25,6 @@ #define evprintf(...) char invalid_code[0x100000]; -static u32 scratch_buf[8*8*2] __attribute__((aligned(64))); u32 event_cycles[PSXINT_COUNT]; static void schedule_timeslice(void) @@ -191,6 +190,8 @@ void new_dyna_freeze(void *f, int mode) //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded"); } +#ifndef DRC_DISABLE + /* GTE stuff */ void *gte_handlers[64]; @@ -303,6 +304,7 @@ const uint64_t gte_reg_writes[64] = { static int ari64_init() { + static u32 scratch_buf[8*8*2] __attribute__((aligned(64))); extern void (*psxCP2[64])(); extern void psxNULL(); extern unsigned char *out; @@ -417,25 +419,11 @@ static void ari64_shutdown() new_dyna_pcsx_mem_shutdown(); } -extern void intExecute(); -extern void intExecuteT(); -extern void intExecuteBlock(); -extern void intExecuteBlockT(); -#ifndef DRC_DBG -#define intExecuteT intExecute -#define intExecuteBlockT intExecuteBlock -#endif - R3000Acpu psxRec = { ari64_init, ari64_reset, -#ifndef DRC_DISABLE ari64_execute, ari64_execute_until, -#else - intExecuteT, - intExecuteBlockT, -#endif ari64_clear, #ifdef ICACHE_EMULATION ari64_notify, @@ -443,13 +431,8 @@ R3000Acpu psxRec = { ari64_shutdown }; -// TODO: rm -#ifndef DRC_DBG -void do_insn_trace() {} -void do_insn_cmp() {} -#endif +#else // if DRC_DISABLE -#ifdef DRC_DISABLE unsigned int address; int pending_exception, stop; unsigned int next_interupt; @@ -462,7 +445,7 @@ u8 zero_mem[0x1000]; unsigned char *out; void *mem_rtab; void *scratch_buf_ptr; -void new_dynarec_init() { (void)ari64_execute; } +void new_dynarec_init() {} void new_dyna_start() {} void new_dynarec_cleanup() {} void new_dynarec_clear_full() {} From dd114d7d8e8d30bde79eb72d3ae1afc2f06cebb7 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 31 Oct 2021 18:33:40 +0200 Subject: [PATCH 045/597] some drc debug helpers --- libpcsxcore/new_dynarec/assem_arm.c | 51 +++++++++++++++++++++++++-- libpcsxcore/new_dynarec/emu_if.c | 2 ++ libpcsxcore/new_dynarec/new_dynarec.c | 25 +++++++++++++ 3 files changed, 76 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index b336bcca1..8693c2ddb 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -43,6 +43,12 @@ char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); #define unused __attribute__((unused)) +#ifdef DRC_DBG +#pragma GCC diagnostic ignored "-Wunused-function" +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" +#endif + extern int cycle_count; extern int last_count; extern int pcaddr; @@ -1651,16 +1657,57 @@ static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt) emit_cmovb_imm(1,rt); } +#ifdef DRC_DBG +extern void gen_interupt(); +extern void do_insn_cmp(); +#define FUNCNAME(f) { (intptr_t)f, " " #f } +static const struct { + intptr_t addr; + const char *name; +} function_names[] = { + FUNCNAME(cc_interrupt), + FUNCNAME(gen_interupt), + FUNCNAME(get_addr_ht), + FUNCNAME(get_addr), + FUNCNAME(jump_handler_read8), + FUNCNAME(jump_handler_read16), + FUNCNAME(jump_handler_read32), + FUNCNAME(jump_handler_write8), + FUNCNAME(jump_handler_write16), + FUNCNAME(jump_handler_write32), + FUNCNAME(invalidate_addr), + FUNCNAME(verify_code_vm), + FUNCNAME(verify_code), + FUNCNAME(jump_hlecall), + FUNCNAME(jump_syscall_hle), + FUNCNAME(new_dyna_leave), + FUNCNAME(pcsx_mtc0), + FUNCNAME(pcsx_mtc0_ds), + FUNCNAME(do_insn_cmp), +}; + +static const char *func_name(intptr_t a) +{ + int i; + for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++) + if (function_names[i].addr == a) + return function_names[i].name; + return ""; +} +#else +#define func_name(x) "" +#endif + static void emit_call(int a) { - assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8); + assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a)); u_int offset=genjmp(a); output_w32(0xeb000000|offset); } static void emit_jmp(int a) { - assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8); + assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a)); u_int offset=genjmp(a); output_w32(0xea000000|offset); } diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 0569cebd1..2c82f58bf 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -489,6 +489,7 @@ static u32 memcheck_read(u32 a) return *(u32 *)(psxM + (a & 0x1ffffc)); } +#if 0 void do_insn_trace(void) { static psxRegisters oldregs; @@ -550,6 +551,7 @@ void do_insn_trace(void) } #endif } +#endif static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 7646e074d..6d7069d9c 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -4263,6 +4263,28 @@ int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) return 1; } +#ifdef DRC_DBG +static void drc_dbg_emit_do_cmp(int i) +{ + extern void do_insn_cmp(); + extern int cycle; + u_int hr,reglist=0; + + for(hr=0;hr=0) reglist|=1<\n"); + drc_dbg_emit_do_cmp(t); if(regs[t].regmap_entry[HOST_CCREG]==CCREG&®s[t].regmap[HOST_CCREG]!=CCREG) wb_register(CCREG,regs[t].regmap_entry,regs[t].wasdirty,regs[t].was32); load_regs(regs[t].regmap_entry,regs[t].regmap,regs[t].was32,rs1[t],rs2[t]); @@ -10033,6 +10056,8 @@ int new_recompile_block(int addr) // branch target entry point instr_addr[i]=(u_int)out; assem_debug("<->\n"); + drc_dbg_emit_do_cmp(i); + // load regs if(regs[i].regmap_entry[HOST_CCREG]==CCREG&®s[i].regmap[HOST_CCREG]!=CCREG) wb_register(CCREG,regs[i].regmap_entry,regs[i].wasdirty,regs[i].was32); From a151a8d8331cf743eabeab23ce52e9b7726239e5 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 31 Oct 2021 19:12:50 +0200 Subject: [PATCH 046/597] some drc debug patches --- libpcsxcore/new_dynarec/patches/trace_drc_chk | 269 ++++++++++++++++++ libpcsxcore/new_dynarec/patches/trace_intr | 230 +++++++++++++++ 2 files changed, 499 insertions(+) create mode 100644 libpcsxcore/new_dynarec/patches/trace_drc_chk create mode 100644 libpcsxcore/new_dynarec/patches/trace_intr diff --git a/libpcsxcore/new_dynarec/patches/trace_drc_chk b/libpcsxcore/new_dynarec/patches/trace_drc_chk new file mode 100644 index 000000000..d1fc6e96e --- /dev/null +++ b/libpcsxcore/new_dynarec/patches/trace_drc_chk @@ -0,0 +1,269 @@ +diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S +index d32dc0b..e52dde8 100644 +--- a/libpcsxcore/new_dynarec/linkage_arm.S ++++ b/libpcsxcore/new_dynarec/linkage_arm.S +@@ -442,7 +442,7 @@ FUNCTION(cc_interrupt): + str r1, [fp, #LO_pending_exception] + and r2, r2, r10, lsr #17 + add r3, fp, #LO_restore_candidate +- str r10, [fp, #LO_cycle] /* PCSX cycles */ ++@@@ str r10, [fp, #LO_cycle] /* PCSX cycles */ + @@ str r10, [fp, #LO_reg_cop0+36] /* Count */ + ldr r4, [r2, r3] + mov r10, lr +@@ -530,7 +530,7 @@ FUNCTION(jump_syscall_hle): + mov r1, #0 /* in delay slot */ + add r2, r2, r10 + mov r0, #0x20 /* cause */ +- str r2, [fp, #LO_cycle] /* PCSX cycle counter */ ++@@@ str r2, [fp, #LO_cycle] /* PCSX cycle counter */ + bl psxException + + /* note: psxException might do recursive recompiler call from it's HLE code, +@@ -551,7 +551,7 @@ FUNCTION(jump_hlecall): + str r0, [fp, #LO_pcaddr] + add r2, r2, r10 + adr lr, pcsx_return +- str r2, [fp, #LO_cycle] /* PCSX cycle counter */ ++@@@ str r2, [fp, #LO_cycle] /* PCSX cycle counter */ + bx r1 + .size jump_hlecall, .-jump_hlecall + +@@ -561,7 +561,7 @@ FUNCTION(jump_intcall): + str r0, [fp, #LO_pcaddr] + add r2, r2, r10 + adr lr, pcsx_return +- str r2, [fp, #LO_cycle] /* PCSX cycle counter */ ++@@@ str r2, [fp, #LO_cycle] /* PCSX cycle counter */ + b execI + .size jump_hlecall, .-jump_hlecall + +@@ -570,7 +570,7 @@ FUNCTION(new_dyna_leave): + ldr r0, [fp, #LO_last_count] + add r12, fp, #28 + add r10, r0, r10 +- str r10, [fp, #LO_cycle] ++@@@ str r10, [fp, #LO_cycle] + ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} + .size new_dyna_leave, .-new_dyna_leave + +@@ -687,7 +687,7 @@ FUNCTION(new_dyna_start): + \readop r0, [r1, r3, lsl #\tab_shift] + .endif + movcc pc, lr +- str r2, [fp, #LO_cycle] ++@@@ str r2, [fp, #LO_cycle] + bx r1 + .endm + +@@ -722,7 +722,7 @@ FUNCTION(jump_handler_read32): + mov r0, r1 + add r2, r2, r12 + push {r2, lr} +- str r2, [fp, #LO_cycle] ++@@@ str r2, [fp, #LO_cycle] + blx r3 + + ldr r0, [fp, #LO_next_interupt] +@@ -750,7 +750,7 @@ FUNCTION(jump_handler_write_h): + add r2, r2, r12 + mov r0, r1 + push {r2, lr} +- str r2, [fp, #LO_cycle] ++@@@ str r2, [fp, #LO_cycle] + blx r3 + + ldr r0, [fp, #LO_next_interupt] +diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c +index 6d7069d..586a6db 100644 +--- a/libpcsxcore/new_dynarec/new_dynarec.c ++++ b/libpcsxcore/new_dynarec/new_dynarec.c +@@ -38,10 +38,10 @@ static int sceBlock; + #include "../psxhle.h" //emulator interface + #include "emu_if.h" //emulator interface + +-//#define DISASM +-//#define assem_debug printf ++#define DISASM ++#define assem_debug printf + //#define inv_debug printf +-#define assem_debug(...) ++//#define assem_debug(...) + #define inv_debug(...) + + #ifdef __i386__ +@@ -362,6 +362,9 @@ static u_int get_vpage(u_int vaddr) + // This is called from the recompiled JR/JALR instructions + void *get_addr(u_int vaddr) + { ++#ifdef DRC_DBG ++printf("get_addr %08x, pc=%08x\n", vaddr, psxRegs.pc); ++#endif + u_int page=get_page(vaddr); + u_int vpage=get_vpage(vaddr); + struct ll_entry *head; +@@ -4403,13 +4406,15 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) + } + emit_addimm_and_set_flags(cycles,HOST_CCREG); + jaddr=(int)out; +- emit_jns(0); ++// emit_jns(0); ++emit_jmp(0); + } + else + { + emit_cmpimm(HOST_CCREG,-CLOCK_ADJUST(count+2)); + jaddr=(int)out; +- emit_jns(0); ++// emit_jns(0); ++emit_jmp(0); + } + add_stub(CC_STUB,jaddr,idle?idle:(int)out,(*adj==0||invert||idle)?0:(count+2),i,addr,taken,0); + } +@@ -4884,7 +4889,8 @@ void rjump_assemble(int i,struct regstat *i_regs) + // special case for RFE + emit_jmp(0); + else +- emit_jns(0); ++ //emit_jns(0); ++ emit_jmp(0); + //load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1); + #ifdef USE_MINI_HT + if(rs1[i]==31) { +@@ -5034,7 +5040,8 @@ void cjump_assemble(int i,struct regstat *i_regs) + else if(nop) { + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); + int jaddr=(int)out; +- emit_jns(0); ++// emit_jns(0); ++emit_jmp(0); + add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); + } + else { +@@ -5300,7 +5307,8 @@ void cjump_assemble(int i,struct regstat *i_regs) + emit_loadreg(CCREG,HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); + int jaddr=(int)out; +- emit_jns(0); ++// emit_jns(0); ++emit_jmp(0); + add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); + emit_storereg(CCREG,HOST_CCREG); + } +@@ -5309,7 +5317,8 @@ void cjump_assemble(int i,struct regstat *i_regs) + assert(cc==HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); + int jaddr=(int)out; +- emit_jns(0); ++// emit_jns(0); ++emit_jmp(0); + add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); + } + } +@@ -5419,7 +5428,8 @@ void sjump_assemble(int i,struct regstat *i_regs) + else if(nevertaken) { + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); + int jaddr=(int)out; +- emit_jns(0); ++// emit_jns(0); ++emit_jmp(0); + add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); + } + else { +@@ -5628,7 +5638,8 @@ void sjump_assemble(int i,struct regstat *i_regs) + emit_loadreg(CCREG,HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); + int jaddr=(int)out; +- emit_jns(0); ++// emit_jns(0); ++emit_jmp(0); + add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); + emit_storereg(CCREG,HOST_CCREG); + } +@@ -5637,7 +5648,8 @@ void sjump_assemble(int i,struct regstat *i_regs) + assert(cc==HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); + int jaddr=(int)out; +- emit_jns(0); ++// emit_jns(0); ++emit_jmp(0); + add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); + } + } +@@ -5833,7 +5845,8 @@ void fjump_assemble(int i,struct regstat *i_regs) + emit_loadreg(CCREG,HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); + int jaddr=(int)out; +- emit_jns(0); ++// emit_jns(0); ++emit_jmp(0); + add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); + emit_storereg(CCREG,HOST_CCREG); + } +@@ -5842,7 +5855,8 @@ void fjump_assemble(int i,struct regstat *i_regs) + assert(cc==HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); + int jaddr=(int)out; +- emit_jns(0); ++// emit_jns(0); ++emit_jmp(0); + add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); + } + } +@@ -6463,7 +6477,7 @@ void unneeded_registers(int istart,int iend,int r) + // R0 is always unneeded + u|=1;uu|=1; + // Save it +- unneeded_reg[i]=u; ++ unneeded_reg[i]=1;//u; + unneeded_reg_upper[i]=uu; + gte_unneeded[i]=gte_u; + /* +@@ -9676,6 +9690,7 @@ int new_recompile_block(int addr) + + // This allocates registers (if possible) one instruction prior + // to use, which can avoid a load-use penalty on certain CPUs. ++#if 0 + for(i=0;i>16)==0x1000) + literal_pool(1024); + else +@@ -10256,7 +10277,7 @@ int new_recompile_block(int addr) + } + } + // External Branch Targets (jump_in) +- if(copy+slen*4>(void *)shadow+sizeof(shadow)) copy=shadow; ++ if(copy+slen*4>(void *)shadow+sizeof(shadow)) {copy=shadow;printf("shadow overflow\n");} + for(i=0;i> 26; + switch (tmp) { +@@ -547,13 +548,15 @@ static void doBranch(u32 tar) { + } + break; + } +- ++#endif + psxBSC[psxRegs.code >> 26](); + + branch = 0; + psxRegs.pc = branchPC; + + psxBranchTest(); ++ ++ psxRegs.cycle += BIAS; + } + + /********************************************************* +@@ -636,12 +639,13 @@ void psxMULTU() { + psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff); + } + ++#define doBranchNotTaken() do { psxRegs.cycle -= BIAS; execI(); psxBranchTest(); psxRegs.cycle += BIAS; } while(0) + /********************************************************* + * Register branch logic * + * Format: OP rs, offset * + *********************************************************/ +-#define RepZBranchi32(op) if(_i32(_rRs_) op 0) doBranch(_BranchTarget_); +-#define RepZBranchLinki32(op) { _SetLink(31); if(_i32(_rRs_) op 0) { doBranch(_BranchTarget_); } } ++#define RepZBranchi32(op) if(_i32(_rRs_) op 0) doBranch(_BranchTarget_); else doBranchNotTaken(); ++#define RepZBranchLinki32(op) { _SetLink(31); if(_i32(_rRs_) op 0) { doBranch(_BranchTarget_); } else doBranchNotTaken(); } + + void psxBGEZ() { RepZBranchi32(>=) } // Branch if Rs >= 0 + void psxBGEZAL() { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link +@@ -711,7 +715,7 @@ void psxRFE() { + * Register branch logic * + * Format: OP rs, rt, offset * + *********************************************************/ +-#define RepBranchi32(op) if(_i32(_rRs_) op _i32(_rRt_)) doBranch(_BranchTarget_); ++#define RepBranchi32(op) if(_i32(_rRs_) op _i32(_rRt_)) doBranch(_BranchTarget_); else doBranchNotTaken(); + + void psxBEQ() { RepBranchi32(==) } // Branch if Rs == Rt + void psxBNE() { RepBranchi32(!=) } // Branch if Rs != Rt +@@ -895,6 +899,9 @@ void MTC0(int reg, u32 val) { + case 12: // Status + psxRegs.CP0.r[12] = val; + psxTestSWInts(); ++#ifndef __arm__ ++ psxBranchTest(); ++#endif + break; + + case 13: // Cause +@@ -1057,6 +1064,23 @@ void intExecuteBlock() { + while (!branch2) execI(); + } + ++extern void do_insn_trace(void); ++ ++void intExecuteT() { ++ for (;;) { ++ do_insn_trace(); ++ execI(); ++ } ++} ++ ++void intExecuteBlockT() { ++ branch2 = 0; ++ while (!branch2) { ++ do_insn_trace(); ++ execI(); ++ } ++} ++ + static void intClear(u32 Addr, u32 Size) { + } + +diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c +index c09965d..135a5d0 100644 +--- a/libpcsxcore/psxmem.c ++++ b/libpcsxcore/psxmem.c +@@ -219,11 +219,13 @@ void psxMemShutdown() { + } + + static int writeok = 1; ++u32 last_io_addr; + + u8 psxMemRead8(u32 mem) { + char *p; + u32 t; + ++ last_io_addr = mem; + t = mem >> 16; + if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { + if ((mem & 0xffff) < 0x400) +@@ -249,6 +251,7 @@ u16 psxMemRead16(u32 mem) { + char *p; + u32 t; + ++ last_io_addr = mem; + t = mem >> 16; + if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { + if ((mem & 0xffff) < 0x400) +@@ -274,6 +277,7 @@ u32 psxMemRead32(u32 mem) { + char *p; + u32 t; + ++ last_io_addr = mem; + t = mem >> 16; + if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { + if ((mem & 0xffff) < 0x400) +@@ -299,6 +303,7 @@ void psxMemWrite8(u32 mem, u8 value) { + char *p; + u32 t; + ++ last_io_addr = mem; + t = mem >> 16; + if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { + if ((mem & 0xffff) < 0x400) +@@ -326,6 +331,7 @@ void psxMemWrite16(u32 mem, u16 value) { + char *p; + u32 t; + ++ last_io_addr = mem; + t = mem >> 16; + if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { + if ((mem & 0xffff) < 0x400) +@@ -353,6 +359,7 @@ void psxMemWrite32(u32 mem, u32 value) { + char *p; + u32 t; + ++ last_io_addr = mem; + // if ((mem&0x1fffff) == 0x71E18 || value == 0x48088800) SysPrintf("t2fix!!\n"); + t = mem >> 16; + if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { From df4dc2b13cf71f32751cf842b03ef87991b2c55a Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 1 Nov 2021 01:30:34 +0200 Subject: [PATCH 047/597] drc: rework for 64bit, part 1 --- libpcsxcore/new_dynarec/assem_arm.c | 58 ++-- libpcsxcore/new_dynarec/linkage_arm.S | 14 +- libpcsxcore/new_dynarec/new_dynarec.c | 388 ++++++++++++++------------ 3 files changed, 241 insertions(+), 219 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 8693c2ddb..7641bbcc7 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -127,26 +127,27 @@ static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; /* Linker */ -static void set_jump_target(int addr,u_int target) +static void set_jump_target(void *addr, void *target_) { - u_char *ptr=(u_char *)addr; + u_int target = (u_int)target_; + u_char *ptr = addr; u_int *ptr2=(u_int *)ptr; if(ptr[3]==0xe2) { assert((target-(u_int)ptr2-8)<1024); - assert((addr&3)==0); + assert(((uintptr_t)addr&3)==0); assert((target&3)==0); *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00; - //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2); + //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2); } else if(ptr[3]==0x72) { // generated by emit_jno_unlikely if((target-(u_int)ptr2-8)<1024) { - assert((addr&3)==0); + assert(((uintptr_t)addr&3)==0); assert((target&3)==0); *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00; } else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) { - assert((addr&3)==0); + assert(((uintptr_t)addr&3)==0); assert((target&3)==0); *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00; } @@ -227,9 +228,9 @@ static int get_pointer(void *stub) // Find the "clean" entry point from a "dirty" entry point // by skipping past the call to verify_code -static u_int get_clean_addr(int addr) +static void *get_clean_addr(void *addr) { - int *ptr=(int *)addr; + signed int *ptr = addr; #ifndef HAVE_ARMV7 ptr+=4; #else @@ -239,9 +240,9 @@ static u_int get_clean_addr(int addr) assert((*ptr&0xFF000000)==0xeb000000); // bl instruction ptr++; if((*ptr&0xFF000000)==0xea000000) { - return (int)ptr+((*ptr<<8)>>6)+8; // follow jump + return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump } - return (u_int)ptr; + return ptr; } static int verify_dirty(u_int *ptr) @@ -278,7 +279,7 @@ static int verify_dirty(u_int *ptr) // This doesn't necessarily find all clean entry points, just // guarantees that it's not dirty -static int isclean(int addr) +static int isclean(void *addr) { #ifndef HAVE_ARMV7 u_int *ptr=((u_int *)addr)+4; @@ -2467,10 +2468,10 @@ static void literal_pool_jumpover(int n) if(n) { if((int)out-literals[0][0]<4096-n) return; } - int jaddr=(int)out; + void *jaddr = out; emit_jmp(0); literal_pool(0); - set_jump_target(jaddr,(int)out); + set_jump_target(jaddr, out); } static void emit_extjump2(u_int addr, int target, int linker) @@ -2586,7 +2587,7 @@ static void do_readstub(int n) { assem_debug("do_readstub %x\n",start+stubs[n][3]*4); literal_pool(256); - set_jump_target(stubs[n][1],(int)out); + set_jump_target(stubs[n][1], out); int type=stubs[n][0]; int i=stubs[n][3]; int rs=stubs[n][4]; @@ -2600,7 +2601,8 @@ static void do_readstub(int n) rt=get_reg(i_regmap,rt1[i]); } assert(rs>=0); - int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0; + int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0; + void *restore_jump = NULL; reglist|=(1<=0); assert(rt>=0); - int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra; + int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,ra; + void *restore_jump = NULL; int reglist2=reglist|(1<>16)^vaddr)&0xFFFF]; +} + +static void hash_table_add(struct ht_entry *ht_bin, u_int vaddr, void *tcaddr) +{ + ht_bin->vaddr[1] = ht_bin->vaddr[0]; + ht_bin->tcaddr[1] = ht_bin->tcaddr[0]; + ht_bin->vaddr[0] = vaddr; + ht_bin->tcaddr[0] = tcaddr; +} + +// some messy ari64's code, seems to rely on unsigned 32bit overflow +static int doesnt_expire_soon(void *tcaddr) +{ + u_int diff = (u_int)((u_char *)tcaddr - out) << (32-TARGET_SIZE_2); + return diff > (u_int)(0x60000000 + (MAX_OUTPUT_BLOCK_SIZE << (32-TARGET_SIZE_2))); +} + // Get address from virtual address // This is called from the recompiled JR/JALR instructions void *get_addr(u_int vaddr) @@ -370,11 +396,7 @@ void *get_addr(u_int vaddr) while(head!=NULL) { if(head->vaddr==vaddr) { //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr); - u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; - ht_bin[3]=ht_bin[1]; - ht_bin[2]=ht_bin[0]; - ht_bin[1]=(u_int)head->addr; - ht_bin[0]=vaddr; + hash_table_add(hash_table_get(vaddr), vaddr, head->addr); return head->addr; } head=head->next; @@ -384,8 +406,8 @@ void *get_addr(u_int vaddr) if(head->vaddr==vaddr) { //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr); // Don't restore blocks which are about to expire from the cache - if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) - if(verify_dirty(head->addr)) { + if (doesnt_expire_soon(head->addr)) + if (verify_dirty(head->addr)) { //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); invalid_code[vaddr>>12]=0; inv_code_start=inv_code_end=~0; @@ -393,17 +415,12 @@ void *get_addr(u_int vaddr) restore_candidate[vpage>>3]|=1<<(vpage&7); } else restore_candidate[page>>3]|=1<<(page&7); - u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; - if(ht_bin[0]==vaddr) { - ht_bin[1]=(u_int)head->addr; // Replace existing entry - } + struct ht_entry *ht_bin = hash_table_get(vaddr); + if (ht_bin->vaddr[0] == vaddr) + ht_bin->tcaddr[0] = head->addr; // Replace existing entry else - { - ht_bin[3]=ht_bin[1]; - ht_bin[2]=ht_bin[0]; - ht_bin[1]=(int)head->addr; - ht_bin[0]=vaddr; - } + hash_table_add(ht_bin, vaddr, head->addr); + return head->addr; } } @@ -425,9 +442,9 @@ void *get_addr(u_int vaddr) void *get_addr_ht(u_int vaddr) { //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr); - u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; - if(ht_bin[0]==vaddr) return (void *)ht_bin[1]; - if(ht_bin[2]==vaddr) return (void *)ht_bin[3]; + const struct ht_entry *ht_bin = hash_table_get(vaddr); + if (ht_bin->vaddr[0] == vaddr) return ht_bin->tcaddr[0]; + if (ht_bin->vaddr[1] == vaddr) return ht_bin->tcaddr[1]; return get_addr(vaddr); } @@ -796,39 +813,39 @@ void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr // but don't return addresses which are about to expire from the cache void *check_addr(u_int vaddr) { - u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; - if(ht_bin[0]==vaddr) { - if(((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) - if(isclean(ht_bin[1])) return (void *)ht_bin[1]; - } - if(ht_bin[2]==vaddr) { - if(((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) - if(isclean(ht_bin[3])) return (void *)ht_bin[3]; + struct ht_entry *ht_bin = hash_table_get(vaddr); + size_t i; + for (i = 0; i < sizeof(ht_bin->vaddr)/sizeof(ht_bin->vaddr[0]); i++) { + if (ht_bin->vaddr[i] == vaddr) + if (doesnt_expire_soon((u_char *)ht_bin->tcaddr[i] - MAX_OUTPUT_BLOCK_SIZE)) + if (isclean(ht_bin->tcaddr[i])) + return ht_bin->tcaddr[i]; } u_int page=get_page(vaddr); struct ll_entry *head; head=jump_in[page]; - while(head!=NULL) { - if(head->vaddr==vaddr) { - if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) { + while (head != NULL) { + if (head->vaddr == vaddr) { + if (doesnt_expire_soon(head->addr)) { // Update existing entry with current address - if(ht_bin[0]==vaddr) { - ht_bin[1]=(int)head->addr; + if (ht_bin->vaddr[0] == vaddr) { + ht_bin->tcaddr[0] = head->addr; return head->addr; } - if(ht_bin[2]==vaddr) { - ht_bin[3]=(int)head->addr; + if (ht_bin->vaddr[1] == vaddr) { + ht_bin->tcaddr[1] = head->addr; return head->addr; } // Insert into hash table with low priority. // Don't evict existing entries, as they are probably // addresses that are being accessed frequently. - if(ht_bin[0]==-1) { - ht_bin[1]=(int)head->addr; - ht_bin[0]=vaddr; - }else if(ht_bin[2]==-1) { - ht_bin[3]=(int)head->addr; - ht_bin[2]=vaddr; + if (ht_bin->vaddr[0] == -1) { + ht_bin->vaddr[0] = vaddr; + ht_bin->tcaddr[0] = head->addr; + } + else if (ht_bin->vaddr[1] == -1) { + ht_bin->vaddr[1] = vaddr; + ht_bin->tcaddr[1] = head->addr; } return head->addr; } @@ -841,14 +858,16 @@ void *check_addr(u_int vaddr) void remove_hash(int vaddr) { //printf("remove hash: %x\n",vaddr); - u_int *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF]; - if(ht_bin[2]==vaddr) { - ht_bin[2]=ht_bin[3]=-1; + struct ht_entry *ht_bin = hash_table_get(vaddr); + if (ht_bin->vaddr[1] == vaddr) { + ht_bin->vaddr[1] = -1; + ht_bin->tcaddr[1] = NULL; } - if(ht_bin[0]==vaddr) { - ht_bin[0]=ht_bin[2]; - ht_bin[1]=ht_bin[3]; - ht_bin[2]=ht_bin[3]=-1; + if (ht_bin->vaddr[0] == vaddr) { + ht_bin->vaddr[0] = ht_bin->vaddr[1]; + ht_bin->tcaddr[0] = ht_bin->tcaddr[1]; + ht_bin->vaddr[1] = -1; + ht_bin->tcaddr[1] = NULL; } } @@ -901,7 +920,7 @@ static void ll_kill_pointers(struct ll_entry *head,int addr,int shift) #ifdef __arm__ mark_clear_cache(host_addr); #endif - set_jump_target((int)host_addr,(int)head->addr); + set_jump_target(host_addr, head->addr); } head=head->next; } @@ -929,7 +948,7 @@ void invalidate_page(u_int page) #ifdef __arm__ mark_clear_cache(host_addr); #endif - set_jump_target((int)host_addr,(int)head->addr); + set_jump_target(host_addr, head->addr); next=head->next; free(head); head=next; @@ -1091,7 +1110,7 @@ void clean_blocks(u_int page) while(head!=NULL) { if(!invalid_code[head->vaddr>>12]) { // Don't restore blocks which are about to expire from the cache - if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) { + if (doesnt_expire_soon(head->addr)) { u_int start,end; if(verify_dirty(head->addr)) { //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr); @@ -1107,20 +1126,18 @@ void clean_blocks(u_int page) inv=1; } if(!inv) { - void * clean_addr=(void *)get_clean_addr((int)head->addr); - if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) { + void *clean_addr = get_clean_addr(head->addr); + if (doesnt_expire_soon(clean_addr)) { u_int ppage=page; inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr); //printf("page=%x, addr=%x\n",page,head->vaddr); //assert(head->vaddr>>12==(page|0x80000)); ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr); - u_int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF]; - if(ht_bin[0]==head->vaddr) { - ht_bin[1]=(u_int)clean_addr; // Replace existing entry - } - if(ht_bin[2]==head->vaddr) { - ht_bin[3]=(u_int)clean_addr; // Replace existing entry - } + struct ht_entry *ht_bin = hash_table_get(head->vaddr); + if (ht_bin->vaddr[0] == head->vaddr) + ht_bin->tcaddr[0] = clean_addr; // Replace existing entry + if (ht_bin->vaddr[1] == head->vaddr) + ht_bin->tcaddr[1] = clean_addr; // Replace existing entry } } } @@ -3058,8 +3075,8 @@ void storelr_assemble(int i,struct regstat *i_regs) int temp2=-1; int offset; int jaddr=0; - int case1,case2,case3; - int done0,done1,done2; + void *case1, *case2, *case3; + void *done0, *done1, *done2; int memtarget=0,c=0; int agr=AGEN1+(i&1); u_int hr,reglist=0; @@ -3110,10 +3127,10 @@ void storelr_assemble(int i,struct regstat *i_regs) emit_xorimm(temp,3,temp); #endif emit_testimm(temp,2); - case2=(int)out; + case2=out; emit_jne(0); emit_testimm(temp,1); - case1=(int)out; + case1=out; emit_jne(0); // 0 if (opcode[i]==0x2A) { // SWL @@ -3130,10 +3147,10 @@ void storelr_assemble(int i,struct regstat *i_regs) emit_writebyte_indexed(tl,3,temp); if(rs2[i]) emit_shldimm(th,tl,24,temp2); } - done0=(int)out; + done0=out; emit_jmp(0); // 1 - set_jump_target(case1,(int)out); + set_jump_target(case1, out); if (opcode[i]==0x2A) { // SWL // Write 3 msb into three least significant bytes if(rs2[i]) emit_rorimm(tl,8,tl); @@ -3160,12 +3177,12 @@ void storelr_assemble(int i,struct regstat *i_regs) // Write two lsb into two most significant bytes emit_writehword_indexed(tl,1,temp); } - done1=(int)out; + done1=out; emit_jmp(0); // 2 - set_jump_target(case2,(int)out); + set_jump_target(case2, out); emit_testimm(temp,1); - case3=(int)out; + case3=out; emit_jne(0); if (opcode[i]==0x2A) { // SWL // Write two msb into two least significant bytes @@ -3195,10 +3212,10 @@ void storelr_assemble(int i,struct regstat *i_regs) emit_writehword_indexed(tl,0,temp); if(rs2[i]) emit_rorimm(tl,24,tl); } - done2=(int)out; + done2=out; emit_jmp(0); // 3 - set_jump_target(case3,(int)out); + set_jump_target(case3, out); if (opcode[i]==0x2A) { // SWL // Write msb into least significant byte if(rs2[i]) emit_rorimm(tl,24,tl); @@ -3221,24 +3238,24 @@ void storelr_assemble(int i,struct regstat *i_regs) // Write entire word emit_writeword_indexed(tl,-3,temp); } - set_jump_target(done0,(int)out); - set_jump_target(done1,(int)out); - set_jump_target(done2,(int)out); + set_jump_target(done0, out); + set_jump_target(done1, out); + set_jump_target(done2, out); if (opcode[i]==0x2C) { // SDL emit_testimm(temp,4); - done0=(int)out; + done0=out; emit_jne(0); emit_andimm(temp,~3,temp); emit_writeword_indexed(temp2,4,temp); - set_jump_target(done0,(int)out); + set_jump_target(done0, out); } if (opcode[i]==0x2D) { // SDR emit_testimm(temp,4); - done0=(int)out; + done0=out; emit_jeq(0); emit_andimm(temp,~3,temp); emit_writeword_indexed(temp2,-4,temp); - set_jump_target(done0,(int)out); + set_jump_target(done0, out); } if(!c||!memtarget) add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist); @@ -4289,7 +4306,8 @@ static void drc_dbg_emit_do_cmp(int i) void ds_assemble_entry(int i) { int t=(ba[i]-start)>>2; - if(!instr_addr[t]) instr_addr[t]=(u_int)out; + if (!instr_addr[t]) + instr_addr[t] = out; assem_debug("Assemble delay slot at %x\n",ba[i]); assem_debug("<->\n"); drc_dbg_emit_do_cmp(t); @@ -4418,7 +4436,7 @@ void do_ccstub(int n) { literal_pool(256); assem_debug("do_ccstub %x\n",start+stubs[n][4]*4); - set_jump_target(stubs[n][1],(int)out); + set_jump_target(stubs[n][1], out); int i=stubs[n][4]; if(stubs[n][6]==NULLDS) { // Delay slot instruction is nullified ("likely" branch) @@ -4716,12 +4734,12 @@ static void ujump_assemble_write_ra(int i) #ifdef REG_PREFETCH if(temp>=0) { - if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); + if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table_get(return_address),temp); } #endif emit_movimm(return_address,rt); // PC into link register #ifdef IMM_PREFETCH - emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]); + emit_prefetch(hash_table_get(return_address)); #endif } } @@ -4739,7 +4757,7 @@ void ujump_assemble(int i,struct regstat *i_regs) signed char *i_regmap=i_regs->regmap; int return_address=start+i*4+8; if(get_reg(branch_regs[i].regmap,31)>0) - if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); + if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table_get(return_address),temp); } #endif if(rt1[i]==31&&(rt1[i]==rs1[i+1]||rt1[i]==rs2[i+1])) { @@ -4791,12 +4809,12 @@ static void rjump_assemble_write_ra(int i) #ifdef REG_PREFETCH if(temp>=0) { - if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); + if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table_get(return_address),temp); } #endif emit_movimm(return_address,rt); // PC into link register #ifdef IMM_PREFETCH - emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]); + emit_prefetch(hash_table_get(return_address)); #endif } @@ -4822,7 +4840,7 @@ void rjump_assemble(int i,struct regstat *i_regs) if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) { signed char *i_regmap=i_regs->regmap; int return_address=start+i*4+8; - if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); + if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table_get(return_address),temp); } } #endif @@ -5038,7 +5056,7 @@ void cjump_assemble(int i,struct regstat *i_regs) add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); } else { - int taken=0,nottaken=0,nottaken1=0; + void *taken = NULL, *nottaken = NULL, *nottaken1 = NULL; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(!only32) @@ -5048,32 +5066,32 @@ void cjump_assemble(int i,struct regstat *i_regs) { if(s2h>=0) emit_cmp(s1h,s2h); else emit_test(s1h,s1h); - nottaken1=(int)out; + nottaken1=out; emit_jne(1); } if(opcode[i]==5) // BNE { if(s2h>=0) emit_cmp(s1h,s2h); else emit_test(s1h,s1h); - if(invert) taken=(int)out; + if(invert) taken=out; else add_to_linker((int)out,ba[i],internal); emit_jne(0); } if(opcode[i]==6) // BLEZ { emit_test(s1h,s1h); - if(invert) taken=(int)out; + if(invert) taken=out; else add_to_linker((int)out,ba[i],internal); emit_js(0); - nottaken1=(int)out; + nottaken1=out; emit_jne(1); } if(opcode[i]==7) // BGTZ { emit_test(s1h,s1h); - nottaken1=(int)out; + nottaken1=out; emit_js(1); - if(invert) taken=(int)out; + if(invert) taken=out; else add_to_linker((int)out,ba[i],internal); emit_jne(0); } @@ -5086,7 +5104,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); if(invert){ - nottaken=(int)out; + nottaken=out; emit_jne(1); }else{ add_to_linker((int)out,ba[i],internal); @@ -5098,7 +5116,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); if(invert){ - nottaken=(int)out; + nottaken=out; emit_jeq(1); }else{ add_to_linker((int)out,ba[i],internal); @@ -5109,7 +5127,7 @@ void cjump_assemble(int i,struct regstat *i_regs) { emit_cmpimm(s1l,1); if(invert){ - nottaken=(int)out; + nottaken=out; emit_jge(1); }else{ add_to_linker((int)out,ba[i],internal); @@ -5120,7 +5138,7 @@ void cjump_assemble(int i,struct regstat *i_regs) { emit_cmpimm(s1l,1); if(invert){ - nottaken=(int)out; + nottaken=out; emit_jl(1); }else{ add_to_linker((int)out,ba[i],internal); @@ -5128,7 +5146,7 @@ void cjump_assemble(int i,struct regstat *i_regs) } } if(invert) { - if(taken) set_jump_target(taken,(int)out); + if(taken) set_jump_target(taken, out); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { if(adj) { @@ -5157,10 +5175,10 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jmp(0); } } - set_jump_target(nottaken,(int)out); + set_jump_target(nottaken, out); } - if(nottaken1) set_jump_target(nottaken1,(int)out); + if(nottaken1) set_jump_target(nottaken1, out); if(adj) { if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); } @@ -5172,7 +5190,7 @@ void cjump_assemble(int i,struct regstat *i_regs) //if(likely[i]) printf("IOL\n"); //else //printf("IOE\n"); - int taken=0,nottaken=0,nottaken1=0; + void *taken = NULL, *nottaken = NULL, *nottaken1 = NULL; if(!unconditional&&!nop) { if(!only32) { @@ -5181,30 +5199,30 @@ void cjump_assemble(int i,struct regstat *i_regs) { if(s2h>=0) emit_cmp(s1h,s2h); else emit_test(s1h,s1h); - nottaken1=(int)out; + nottaken1=out; emit_jne(2); } if((opcode[i]&0x2f)==5) // BNE { if(s2h>=0) emit_cmp(s1h,s2h); else emit_test(s1h,s1h); - taken=(int)out; + taken=out; emit_jne(1); } if((opcode[i]&0x2f)==6) // BLEZ { emit_test(s1h,s1h); - taken=(int)out; + taken=out; emit_js(1); - nottaken1=(int)out; + nottaken1=out; emit_jne(2); } if((opcode[i]&0x2f)==7) // BGTZ { emit_test(s1h,s1h); - nottaken1=(int)out; + nottaken1=out; emit_js(2); - taken=(int)out; + taken=out; emit_jne(1); } } // if(!only32) @@ -5215,26 +5233,26 @@ void cjump_assemble(int i,struct regstat *i_regs) { if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); - nottaken=(int)out; + nottaken=out; emit_jne(2); } if((opcode[i]&0x2f)==5) // BNE { if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); - nottaken=(int)out; + nottaken=out; emit_jeq(2); } if((opcode[i]&0x2f)==6) // BLEZ { emit_cmpimm(s1l,1); - nottaken=(int)out; + nottaken=out; emit_jge(2); } if((opcode[i]&0x2f)==7) // BGTZ { emit_cmpimm(s1l,1); - nottaken=(int)out; + nottaken=out; emit_jl(2); } } // if(!unconditional) @@ -5248,7 +5266,7 @@ void cjump_assemble(int i,struct regstat *i_regs) ds_unneeded_upper|=1; // branch taken if(!nop) { - if(taken) set_jump_target(taken,(int)out); + if(taken) set_jump_target(taken, out); assem_debug("1:\n"); wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32, ds_unneeded,ds_unneeded_upper); @@ -5283,8 +5301,8 @@ void cjump_assemble(int i,struct regstat *i_regs) // branch not taken cop1_usable=prev_cop1_usable; if(!unconditional) { - if(nottaken1) set_jump_target(nottaken1,(int)out); - set_jump_target(nottaken,(int)out); + if(nottaken1) set_jump_target(nottaken1, out); + set_jump_target(nottaken, out); assem_debug("2:\n"); if(!likely[i]) { wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32, @@ -5385,7 +5403,7 @@ void sjump_assemble(int i,struct regstat *i_regs) return_address=start+i*4+8; emit_movimm(return_address,rt); // PC into link register #ifdef IMM_PREFETCH - if(!nevertaken) emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]); + if(!nevertaken) emit_prefetch(hash_table_get(return_address)); #endif } } @@ -5423,7 +5441,7 @@ void sjump_assemble(int i,struct regstat *i_regs) add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); } else { - int nottaken=0; + void *nottaken = NULL; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(!only32) @@ -5433,7 +5451,7 @@ void sjump_assemble(int i,struct regstat *i_regs) { emit_test(s1h,s1h); if(invert){ - nottaken=(int)out; + nottaken=out; emit_jns(1); }else{ add_to_linker((int)out,ba[i],internal); @@ -5444,7 +5462,7 @@ void sjump_assemble(int i,struct regstat *i_regs) { emit_test(s1h,s1h); if(invert){ - nottaken=(int)out; + nottaken=out; emit_js(1); }else{ add_to_linker((int)out,ba[i],internal); @@ -5459,7 +5477,7 @@ void sjump_assemble(int i,struct regstat *i_regs) { emit_test(s1l,s1l); if(invert){ - nottaken=(int)out; + nottaken=out; emit_jns(1); }else{ add_to_linker((int)out,ba[i],internal); @@ -5470,7 +5488,7 @@ void sjump_assemble(int i,struct regstat *i_regs) { emit_test(s1l,s1l); if(invert){ - nottaken=(int)out; + nottaken=out; emit_js(1); }else{ add_to_linker((int)out,ba[i],internal); @@ -5508,7 +5526,7 @@ void sjump_assemble(int i,struct regstat *i_regs) emit_jmp(0); } } - set_jump_target(nottaken,(int)out); + set_jump_target(nottaken, out); } if(adj) { @@ -5520,7 +5538,7 @@ void sjump_assemble(int i,struct regstat *i_regs) { // In-order execution (branch first) //printf("IOE\n"); - int nottaken=0; + void *nottaken = NULL; if(rt1[i]==31) { int rt,return_address; rt=get_reg(branch_regs[i].regmap,31); @@ -5529,7 +5547,7 @@ void sjump_assemble(int i,struct regstat *i_regs) return_address=start+i*4+8; emit_movimm(return_address,rt); // PC into link register #ifdef IMM_PREFETCH - emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]); + emit_prefetch(hash_table_get(return_address)); #endif } } @@ -5541,13 +5559,13 @@ void sjump_assemble(int i,struct regstat *i_regs) if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL { emit_test(s1h,s1h); - nottaken=(int)out; + nottaken=out; emit_jns(1); } if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL { emit_test(s1h,s1h); - nottaken=(int)out; + nottaken=out; emit_js(1); } } // if(!only32) @@ -5557,13 +5575,13 @@ void sjump_assemble(int i,struct regstat *i_regs) if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL { emit_test(s1l,s1l); - nottaken=(int)out; + nottaken=out; emit_jns(1); } if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL { emit_test(s1l,s1l); - nottaken=(int)out; + nottaken=out; emit_js(1); } } @@ -5612,7 +5630,7 @@ void sjump_assemble(int i,struct regstat *i_regs) // branch not taken cop1_usable=prev_cop1_usable; if(!unconditional) { - set_jump_target(nottaken,(int)out); + set_jump_target(nottaken, out); assem_debug("1:\n"); if(!likely[i]) { wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32, @@ -5700,7 +5718,7 @@ void fjump_assemble(int i,struct regstat *i_regs) do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); assem_debug("cycle count (adj)\n"); if(1) { - int nottaken=0; + void *nottaken = NULL; if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(1) { assert(fs>=0); @@ -5708,7 +5726,7 @@ void fjump_assemble(int i,struct regstat *i_regs) if(source[i]&0x10000) // BC1T { if(invert){ - nottaken=(int)out; + nottaken=out; emit_jeq(1); }else{ add_to_linker((int)out,ba[i],internal); @@ -5717,7 +5735,7 @@ void fjump_assemble(int i,struct regstat *i_regs) } else // BC1F if(invert){ - nottaken=(int)out; + nottaken=out; emit_jne(1); }else{ add_to_linker((int)out,ba[i],internal); @@ -5745,7 +5763,7 @@ void fjump_assemble(int i,struct regstat *i_regs) add_to_linker((int)out,ba[i],internal); emit_jmp(0); } - set_jump_target(nottaken,(int)out); + set_jump_target(nottaken, out); } if(adj) { @@ -5757,7 +5775,7 @@ void fjump_assemble(int i,struct regstat *i_regs) { // In-order execution (branch first) //printf("IOE\n"); - int nottaken=0; + void *nottaken = NULL; if(1) { //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); if(1) { @@ -5765,12 +5783,12 @@ void fjump_assemble(int i,struct regstat *i_regs) emit_testimm(fs,0x800000); if(source[i]&0x10000) // BC1T { - nottaken=(int)out; + nottaken=out; emit_jeq(1); } else // BC1F { - nottaken=(int)out; + nottaken=out; emit_jne(1); } } @@ -5817,7 +5835,7 @@ void fjump_assemble(int i,struct regstat *i_regs) // branch not taken if(1) { // <- FIXME (don't need this) - set_jump_target(nottaken,(int)out); + set_jump_target(nottaken, out); assem_debug("1:\n"); if(!likely[i]) { wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32, @@ -5855,8 +5873,8 @@ static void pagespan_assemble(int i,struct regstat *i_regs) int s1h=get_reg(i_regs->regmap,rs1[i]|64); int s2l=get_reg(i_regs->regmap,rs2[i]); int s2h=get_reg(i_regs->regmap,rs2[i]|64); - int taken=0; - int nottaken=0; + void *taken = NULL; + void *nottaken = NULL; int unconditional=0; if(rs1[i]==0) { @@ -5989,13 +6007,13 @@ static void pagespan_assemble(int i,struct regstat *i_regs) if(s1h>=0) { if(s2h>=0) emit_cmp(s1h,s2h); else emit_test(s1h,s1h); - nottaken=(int)out; + nottaken=out; emit_jne(0); } if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); - if(nottaken) set_jump_target(nottaken,(int)out); - nottaken=(int)out; + if(nottaken) set_jump_target(nottaken, out); + nottaken=out; emit_jne(0); } if((opcode[i]&0x3f)==0x15) // BNEL @@ -6003,14 +6021,14 @@ static void pagespan_assemble(int i,struct regstat *i_regs) if(s1h>=0) { if(s2h>=0) emit_cmp(s1h,s2h); else emit_test(s1h,s1h); - taken=(int)out; + taken=out; emit_jne(0); } if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); - nottaken=(int)out; + nottaken=out; emit_jeq(0); - if(taken) set_jump_target(taken,(int)out); + if(taken) set_jump_target(taken, out); } if((opcode[i]&0x3f)==6) // BLEZ { @@ -6063,13 +6081,13 @@ static void pagespan_assemble(int i,struct regstat *i_regs) if((source[i]&0x30000)==0x20000) // BC1FL { emit_testimm(s1l,0x800000); - nottaken=(int)out; + nottaken=out; emit_jne(0); } if((source[i]&0x30000)==0x30000) // BC1TL { emit_testimm(s1l,0x800000); - nottaken=(int)out; + nottaken=out; emit_jeq(0); } } @@ -6091,13 +6109,13 @@ static void pagespan_assemble(int i,struct regstat *i_regs) void *compiled_target_addr=check_addr(target_addr); emit_extjump_ds((int)branch_addr,target_addr); if(compiled_target_addr) { - set_jump_target((int)branch_addr,(int)compiled_target_addr); + set_jump_target(branch_addr, compiled_target_addr); add_link(target_addr,stub); } - else set_jump_target((int)branch_addr,(int)stub); + else set_jump_target(branch_addr, stub); if(likely[i]) { // Not-taken path - set_jump_target((int)nottaken,(int)out); + set_jump_target(nottaken, out); wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty); void *branch_addr=out; emit_jmp(0); @@ -6106,10 +6124,10 @@ static void pagespan_assemble(int i,struct regstat *i_regs) void *compiled_target_addr=check_addr(target_addr); emit_extjump_ds((int)branch_addr,target_addr); if(compiled_target_addr) { - set_jump_target((int)branch_addr,(int)compiled_target_addr); + set_jump_target(branch_addr, compiled_target_addr); add_link(target_addr,stub); } - else set_jump_target((int)branch_addr,(int)stub); + else set_jump_target(branch_addr, stub); } } @@ -6197,11 +6215,11 @@ static void pagespan_ds() #else emit_cmpimm(btaddr,start+4); #endif - int branch=(int)out; + void *branch = out; emit_jeq(0); store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,-1); emit_jmp(jump_vaddr_reg[btaddr]); - set_jump_target(branch,(int)out); + set_jump_target(branch, out); store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4); load_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4); } @@ -10007,12 +10025,12 @@ int new_recompile_block(int addr) ds=1; pagespan_ds(); } - u_int instr_addr0_override=0; + void *instr_addr0_override = NULL; if (start == 0x80030000) { // nasty hack for fastbios thing // override block entry to this code - instr_addr0_override=(u_int)out; + instr_addr0_override = out; emit_movimm(start,0); // abuse io address var as a flag that we // have already returned here once @@ -10029,7 +10047,7 @@ int new_recompile_block(int addr) if(ds) { ds=0; // Skip delay slot if(bt[i]) assem_debug("OOPS - branch into delay slot\n"); - instr_addr[i]=0; + instr_addr[i] = NULL; } else { speculate_register_values(i); #ifndef DESTRUCTIVE_WRITEBACK @@ -10054,7 +10072,7 @@ int new_recompile_block(int addr) loop_preload(regmap_pre[i],regs[i].regmap_entry); } // branch target entry point - instr_addr[i]=(u_int)out; + instr_addr[i] = out; assem_debug("<->\n"); drc_dbg_emit_do_cmp(i); @@ -10237,10 +10255,10 @@ int new_recompile_block(int addr) void *addr=check_addr(link_addr[i][1]); emit_extjump(link_addr[i][0],link_addr[i][1]); if(addr) { - set_jump_target(link_addr[i][0],(int)addr); + set_jump_target(link_addr[i][0], addr); add_link(link_addr[i][1],stub); } - else set_jump_target(link_addr[i][0],(int)stub); + else set_jump_target(link_addr[i][0], stub); } else { @@ -10268,22 +10286,20 @@ int new_recompile_block(int addr) u_int vpage=get_vpage(vaddr); literal_pool(256); { - assem_debug("%8x (%d) <- %8x\n",instr_addr[i],i,start+i*4); + assem_debug("%p (%d) <- %8x\n",instr_addr[i],i,start+i*4); assem_debug("jump_in: %x\n",start+i*4); - ll_add(jump_dirty+vpage,vaddr,(void *)out); - int entry_point=do_dirty_stub(i); - ll_add_flags(jump_in+page,vaddr,state_rflags,(void *)entry_point); + ll_add(jump_dirty+vpage,vaddr,out); + void *entry_point = do_dirty_stub(i); + ll_add_flags(jump_in+page,vaddr,state_rflags,entry_point); // If there was an existing entry in the hash table, // replace it with the new address. // Don't add new entries. We'll insert the // ones that actually get used in check_addr(). - u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; - if(ht_bin[0]==vaddr) { - ht_bin[1]=entry_point; - } - if(ht_bin[2]==vaddr) { - ht_bin[3]=entry_point; - } + struct ht_entry *ht_bin = hash_table_get(vaddr); + if (ht_bin->vaddr[0] == vaddr) + ht_bin->tcaddr[0] = entry_point; + if (ht_bin->vaddr[1] == vaddr) + ht_bin->tcaddr[1] = entry_point; } } } @@ -10324,7 +10340,7 @@ int new_recompile_block(int addr) while(expirep!=end) { int shift=TARGET_SIZE_2-3; // Divide into 8 blocks - int base=(int)BASE_ADDR+((expirep>>13)<>13)<>11)&3) { @@ -10343,18 +10359,20 @@ int new_recompile_block(int addr) case 2: // Clear hash table for(i=0;i<32;i++) { - u_int *ht_bin=hash_table[((expirep&2047)<<5)+i]; - if((ht_bin[3]>>shift)==(base>>shift) || - ((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) { - inv_debug("EXP: Remove hash %x -> %x\n",ht_bin[2],ht_bin[3]); - ht_bin[2]=ht_bin[3]=-1; - } - if((ht_bin[1]>>shift)==(base>>shift) || - ((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) { - inv_debug("EXP: Remove hash %x -> %x\n",ht_bin[0],ht_bin[1]); - ht_bin[0]=ht_bin[2]; - ht_bin[1]=ht_bin[3]; - ht_bin[2]=ht_bin[3]=-1; + struct ht_entry *ht_bin = &hash_table[((expirep&2047)<<5)+i]; + if (((uintptr_t)ht_bin->tcaddr[1]>>shift) == (base>>shift) || + (((uintptr_t)ht_bin->tcaddr[1]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) { + inv_debug("EXP: Remove hash %x -> %p\n",ht_bin->vaddr[1],ht_bin->tcaddr[1]); + ht_bin->vaddr[1] = -1; + ht_bin->tcaddr[1] = NULL; + } + if (((uintptr_t)ht_bin->tcaddr[0]>>shift) == (base>>shift) || + (((uintptr_t)ht_bin->tcaddr[0]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) { + inv_debug("EXP: Remove hash %x -> %p\n",ht_bin->vaddr[0],ht_bin->tcaddr[0]); + ht_bin->vaddr[0] = ht_bin->vaddr[1]; + ht_bin->tcaddr[0] = ht_bin->tcaddr[1]; + ht_bin->vaddr[1] = -1; + ht_bin->tcaddr[1] = NULL; } } break; From b14b6a8f0009d03d6051056a9dec699b82342f59 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 1 Nov 2021 16:55:04 +0200 Subject: [PATCH 048/597] drc: convert 'stubs' to a struct with proper types (rework part 2) --- libpcsxcore/new_dynarec/assem_arm.c | 165 +++++++++--------- libpcsxcore/new_dynarec/new_dynarec.c | 239 +++++++++++++++----------- 2 files changed, 220 insertions(+), 184 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 7641bbcc7..940391ce7 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -74,23 +74,24 @@ void jump_vaddr_r9(); void jump_vaddr_r10(); void jump_vaddr_r12(); -const u_int jump_vaddr_reg[16] = { - (int)jump_vaddr_r0, - (int)jump_vaddr_r1, - (int)jump_vaddr_r2, - (int)jump_vaddr_r3, - (int)jump_vaddr_r4, - (int)jump_vaddr_r5, - (int)jump_vaddr_r6, - (int)jump_vaddr_r7, - (int)jump_vaddr_r8, - (int)jump_vaddr_r9, - (int)jump_vaddr_r10, +void * const jump_vaddr_reg[16] = { + jump_vaddr_r0, + jump_vaddr_r1, + jump_vaddr_r2, + jump_vaddr_r3, + jump_vaddr_r4, + jump_vaddr_r5, + jump_vaddr_r6, + jump_vaddr_r7, + jump_vaddr_r8, + jump_vaddr_r9, + jump_vaddr_r10, 0, - (int)jump_vaddr_r12, + jump_vaddr_r12, 0, 0, - 0}; + 0 +}; void invalidate_addr_r0(); void invalidate_addr_r1(); @@ -1706,8 +1707,9 @@ static void emit_call(int a) output_w32(0xeb000000|offset); } -static void emit_jmp(int a) +static void emit_jmp(const void *a_) { + int a = (int)a_; assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a)); u_int offset=genjmp(a); output_w32(0xea000000|offset); @@ -1769,8 +1771,9 @@ static void emit_jc(int a) output_w32(0x2a000000|offset); } -static void emit_jcc(int a) +static void emit_jcc(void *a_) { + int a = (int)a_; assem_debug("bcc %x\n",a); u_int offset=genjmp(a); output_w32(0x3a000000|offset); @@ -2474,7 +2477,7 @@ static void literal_pool_jumpover(int n) set_jump_target(jaddr, out); } -static void emit_extjump2(u_int addr, int target, int linker) +static void emit_extjump2(u_int addr, int target, void *linker) { u_char *ptr=(u_char *)addr; assert((ptr[3]&0x0e)==0xa); @@ -2499,12 +2502,12 @@ static void emit_extjump2(u_int addr, int target, int linker) static void emit_extjump(int addr, int target) { - emit_extjump2(addr, target, (int)dyna_linker); + emit_extjump2(addr, target, dyna_linker); } static void emit_extjump_ds(int addr, int target) { - emit_extjump2(addr, target, (int)dyna_linker_ds); + emit_extjump2(addr, target, dyna_linker_ds); } // put rt_val into rt, potentially making use of rs with value rs_val @@ -2568,7 +2571,7 @@ static void pass_args(int a0, int a1) } } -static void mov_loadtype_adj(int type,int rs,int rt) +static void mov_loadtype_adj(enum stub_type type,int rs,int rt) { switch(type) { case LOADB_STUB: emit_signextend8(rs,rt); break; @@ -2585,14 +2588,14 @@ static void mov_loadtype_adj(int type,int rs,int rt) static void do_readstub(int n) { - assem_debug("do_readstub %x\n",start+stubs[n][3]*4); + assem_debug("do_readstub %x\n",start+stubs[n].a*4); literal_pool(256); - set_jump_target(stubs[n][1], out); - int type=stubs[n][0]; - int i=stubs[n][3]; - int rs=stubs[n][4]; - struct regstat *i_regs=(struct regstat *)stubs[n][5]; - u_int reglist=stubs[n][7]; + set_jump_target(stubs[n].addr, out); + enum stub_type type=stubs[n].type; + int i=stubs[n].a; + int rs=stubs[n].b; + struct regstat *i_regs=(struct regstat *)stubs[n].c; + u_int reglist=stubs[n].e; signed char *i_regmap=i_regs->regmap; int rt; if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) { @@ -2629,6 +2632,7 @@ static void do_readstub(int n) case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break; case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break; case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break; + default: assert(0); } } if(regs_saved) { @@ -2636,7 +2640,7 @@ static void do_readstub(int n) emit_jcc(0); // jump to reg restore } else - emit_jcc(stubs[n][2]); // return address + emit_jcc(stubs[n].retaddr); // return address if(!regs_saved) save_regs(reglist); @@ -2652,7 +2656,7 @@ static void do_readstub(int n) int cc=get_reg(i_regmap,CCREG); if(cc<0) emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2); emit_call(handler); if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { mov_loadtype_adj(type,0,rt); @@ -2660,11 +2664,11 @@ static void do_readstub(int n) if(restore_jump) set_jump_target(restore_jump, out); restore_regs(reglist); - emit_jmp(stubs[n][2]); // return address + emit_jmp(stubs[n].retaddr); // return address } // return memhandler, or get directly accessable address and return 0 -static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host) +static u_int get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host) { u_int l1,l2=0; l1=((u_int *)table)[addr>>12]; @@ -2690,7 +2694,7 @@ static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_h } } -static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) +static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { int rs=get_reg(regmap,target); int rt=get_reg(regmap,target); @@ -2773,14 +2777,14 @@ static void inline_readstub(int type, int i, u_int addr, signed char regmap[], i static void do_writestub(int n) { - assem_debug("do_writestub %x\n",start+stubs[n][3]*4); + assem_debug("do_writestub %x\n",start+stubs[n].a*4); literal_pool(256); - set_jump_target(stubs[n][1], out); - int type=stubs[n][0]; - int i=stubs[n][3]; - int rs=stubs[n][4]; - struct regstat *i_regs=(struct regstat *)stubs[n][5]; - u_int reglist=stubs[n][7]; + set_jump_target(stubs[n].addr, out); + enum stub_type type=stubs[n].type; + int i=stubs[n].a; + int rs=stubs[n].b; + struct regstat *i_regs=(struct regstat *)stubs[n].c; + u_int reglist=stubs[n].e; signed char *i_regmap=i_regs->regmap; int rt,r; if(itype[i]==C1LS||itype[i]==C2LS) { @@ -2790,7 +2794,7 @@ static void do_writestub(int n) } assert(rs>=0); assert(rt>=0); - int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,ra; + int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0; void *restore_jump = NULL; int reglist2=reglist|(1<regmap; int temp2=get_reg(i_regmap,FTEMP); int rt; @@ -2915,13 +2919,13 @@ static void do_unalignedwritestub(int n) int cc=get_reg(i_regmap,CCREG); if(cc<0) emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2); emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr)); - emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc); + emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); restore_regs(reglist); - emit_jmp(stubs[n][2]); // return address + emit_jmp(stubs[n].retaddr); // return address #else emit_andimm(addr,0xfffffffc,temp2); emit_writeword(temp2,(int)&address); @@ -2933,7 +2937,7 @@ static void do_unalignedwritestub(int n) emit_loadreg(CCREG,2); } emit_movimm((u_int)readmem,0); - emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2); + emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2); emit_call((int)&indirect_jump_indexed); restore_regs(reglist); @@ -2965,27 +2969,27 @@ static void do_unalignedwritestub(int n) emit_readword_dualindexedx4(0,1,15); emit_readword((int)&Count,HOST_TEMPREG); emit_readword((int)&next_interupt,2); - emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG); + emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG); emit_writeword(2,(int)&last_count); emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc); if(cc<0) { emit_storereg(CCREG,HOST_TEMPREG); } restore_regs(reglist); - emit_jmp(stubs[n][2]); // return address + emit_jmp(stubs[n].retaddr); // return address #endif } static void do_invstub(int n) { literal_pool(20); - u_int reglist=stubs[n][3]; - set_jump_target(stubs[n][1], out); + u_int reglist=stubs[n].a; + set_jump_target(stubs[n].addr, out); save_regs(reglist); - if(stubs[n][4]!=0) emit_mov(stubs[n][4],0); + if(stubs[n].b!=0) emit_mov(stubs[n].b,0); emit_call((int)&invalidate_addr); restore_regs(reglist); - emit_jmp(stubs[n][2]); // return address + emit_jmp(stubs[n].retaddr); // return address } void *do_dirty_stub(int i) @@ -3035,12 +3039,12 @@ static void do_dirty_stub_ds() static void do_cop1stub(int n) { literal_pool(256); - assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4); - set_jump_target(stubs[n][1], out); - int i=stubs[n][3]; -// int rs=stubs[n][4]; - struct regstat *i_regs=(struct regstat *)stubs[n][5]; - int ds=stubs[n][6]; + assem_debug("do_cop1stub %x\n",start+stubs[n].a*4); + set_jump_target(stubs[n].addr, out); + int i=stubs[n].a; +// int rs=stubs[n].b; + struct regstat *i_regs=(struct regstat *)stubs[n].c; + int ds=stubs[n].d; if(!ds) { load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i); //if(i_regs!=®s[i]) printf("oops: regs[i]=%x i_regs=%x",(int)®s[i],(int)i_regs); @@ -3050,7 +3054,7 @@ static void do_cop1stub(int n) if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); emit_movimm(start+(i-ds)*4,EAX); // Get PC emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... - emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception); + emit_jmp(ds?fp_exception_ds:fp_exception); } /* Special assem */ @@ -3283,9 +3287,10 @@ static int get_ptr_mem_type(u_int a) return MTYPE_8000; } -static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) +static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) { - int jaddr=0,type=0; + void *jaddr = NULL; + int type=0; int mr=rs1[i]; if(((smrv_strong|smrv_weak)>>mr)&1) { type=get_ptr_mem_type(smrv[mr]); @@ -3316,7 +3321,7 @@ static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) if (psxH == (void *)0x1f800000) { emit_addimm(addr,-0x1f800000,HOST_TEMPREG); emit_cmpimm(HOST_TEMPREG,0x1000); - jaddr=(int)out; + jaddr=out; emit_jc(0); } else { @@ -3328,7 +3333,7 @@ static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) if(type==0) { emit_cmpimm(addr,RAM_SIZE); - jaddr=(int)out; + jaddr=out; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK // Hint to branch predictor that the branch is unlikely to be taken if(rs1[i]>=28) @@ -3351,7 +3356,7 @@ static void loadlr_assemble_arm(int i,struct regstat *i_regs) { int s,th,tl,temp,temp2,addr,map=-1; int offset; - int jaddr=0; + void *jaddr=0; int memtarget=0,c=0; int fastload_reg_override=0; u_int hr,reglist=0; @@ -3405,7 +3410,7 @@ static void loadlr_assemble_arm(int i,struct regstat *i_regs) if(fastload_reg_override) a=fastload_reg_override; //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2); emit_readword_indexed_tlb(0,a,map,temp2); - if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist); + if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist); } else inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist); @@ -3437,7 +3442,7 @@ static void loadlr_assemble_arm(int i,struct regstat *i_regs) //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h); //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2); emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2); - if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist); + if(jaddr) add_stub_r(LOADD_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist); } else inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist); @@ -3875,9 +3880,9 @@ static void cop1_unusable(int i,struct regstat *i_regs) { // XXX: should just just do the exception instead if(!cop1_usable) { - int jaddr=(int)out; + void *jaddr=out; emit_jmp(0); - add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0); + add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0); cop1_usable=1; } } @@ -3973,7 +3978,7 @@ static void multdiv_assemble_arm(int i,struct regstat *i_regs) emit_subcs(remainder,HOST_TEMPREG,remainder); emit_adcs(quotient,quotient,quotient); emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG); - emit_jcc((int)out-16); // -4 + emit_jcc(out-16); // -4 emit_teq(d1,d2); emit_negmi(quotient,quotient); emit_test(d1,d1); @@ -4009,7 +4014,7 @@ static void multdiv_assemble_arm(int i,struct regstat *i_regs) emit_subcs(remainder,d2,remainder); emit_adcs(quotient,quotient,quotient); emit_shrcc_imm(d2,1,d2); - emit_jcc((int)out-16); // -4 + emit_jcc(out-16); // -4 } } else // 64-bit diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 28bee08fa..a156d3aac 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -38,6 +38,10 @@ static int sceBlock; #include "../psxhle.h" //emulator interface #include "emu_if.h" //emulator interface +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#endif + //#define DISASM //#define assem_debug printf //#define inv_debug printf @@ -57,6 +61,24 @@ static int sceBlock; #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 +// stubs +enum stub_type { + CC_STUB = 1, + FP_STUB = 2, + LOADB_STUB = 3, + LOADH_STUB = 4, + LOADW_STUB = 5, + LOADD_STUB = 6, + LOADBU_STUB = 7, + LOADHU_STUB = 8, + STOREB_STUB = 9, + STOREH_STUB = 10, + STOREW_STUB = 11, + STORED_STUB = 12, + STORELR_STUB = 13, + INVCODE_STUB = 14, +}; + struct regstat { signed char regmap_entry[HOST_REGS]; @@ -88,6 +110,18 @@ struct ht_entry void *tcaddr[2]; }; +struct code_stub +{ + enum stub_type type; + void *addr; + void *retaddr; + u_int a; + uintptr_t b; + uintptr_t c; + u_int d; + u_int e; +}; + // used by asm: u_char *out; struct ht_entry hash_table[65536] __attribute__((aligned(16))); @@ -142,7 +176,7 @@ struct ht_entry static void *instr_addr[MAXBLOCK]; static u_int link_addr[MAXBLOCK][3]; static int linkcount; - static u_int stubs[MAXBLOCK*3][8]; + static struct code_stub stubs[MAXBLOCK*3]; static int stubcount; static u_int literals[1024][2]; static int literalcount; @@ -220,22 +254,6 @@ struct ht_entry #define C2OP 29 // Coprocessor 2 operation #define INTCALL 30// Call interpreter to handle rare corner cases - /* stubs */ -#define CC_STUB 1 -#define FP_STUB 2 -#define LOADB_STUB 3 -#define LOADH_STUB 4 -#define LOADW_STUB 5 -#define LOADD_STUB 6 -#define LOADBU_STUB 7 -#define LOADHU_STUB 8 -#define STOREB_STUB 9 -#define STOREH_STUB 10 -#define STOREW_STUB 11 -#define STORED_STUB 12 -#define STORELR_STUB 13 -#define INVCODE_STUB 14 - /* branch codes */ #define TAKEN 1 #define NOTTAKEN 2 @@ -271,7 +289,10 @@ static void load_all_consts(signed char regmap[],int is32,u_int dirty,int i); static int verify_dirty(u_int *ptr); static int get_final_value(int hr, int i, int *value); -static void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e); +static void add_stub(enum stub_type type, void *addr, void *retaddr, + u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e); +static void add_stub_r(enum stub_type type, void *addr, void *retaddr, + int i, int addr_reg, struct regstat *i_regs, int ccadj, u_int reglist); static void add_to_linker(int addr,int target,int ext); static int tracedebug=0; @@ -815,7 +836,7 @@ void *check_addr(u_int vaddr) { struct ht_entry *ht_bin = hash_table_get(vaddr); size_t i; - for (i = 0; i < sizeof(ht_bin->vaddr)/sizeof(ht_bin->vaddr[0]); i++) { + for (i = 0; i < ARRAY_SIZE(ht_bin->vaddr); i++) { if (ht_bin->vaddr[i] == vaddr) if (doesnt_expire_soon((u_char *)ht_bin->tcaddr[i] - MAX_OUTPUT_BLOCK_SIZE)) if (isclean(ht_bin->tcaddr[i])) @@ -1831,19 +1852,27 @@ static void pagespan_alloc(struct regstat *current,int i) //else ... } -static void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e) +static void add_stub(enum stub_type type, void *addr, void *retaddr, + u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e) { - stubs[stubcount][0]=type; - stubs[stubcount][1]=addr; - stubs[stubcount][2]=retaddr; - stubs[stubcount][3]=a; - stubs[stubcount][4]=b; - stubs[stubcount][5]=c; - stubs[stubcount][6]=d; - stubs[stubcount][7]=e; + assert(a < ARRAY_SIZE(stubs)); + stubs[stubcount].type = type; + stubs[stubcount].addr = addr; + stubs[stubcount].retaddr = retaddr; + stubs[stubcount].a = a; + stubs[stubcount].b = b; + stubs[stubcount].c = c; + stubs[stubcount].d = d; + stubs[stubcount].e = e; stubcount++; } +static void add_stub_r(enum stub_type type, void *addr, void *retaddr, + int i, int addr_reg, struct regstat *i_regs, int ccadj, u_int reglist) +{ + add_stub(type, addr, retaddr, i, addr_reg, (uintptr_t)i_regs, ccadj, reglist); +} + // Write out a single register void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32) { @@ -2591,7 +2620,7 @@ void load_assemble(int i,struct regstat *i_regs) { int s,th,tl,addr,map=-1; int offset; - int jaddr=0; + void *jaddr=0; int memtarget=0,c=0; int fastload_reg_override=0; u_int hr,reglist=0; @@ -2671,7 +2700,7 @@ void load_assemble(int i,struct regstat *i_regs) } } if(jaddr) - add_stub(LOADB_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist); + add_stub_r(LOADB_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); @@ -2708,7 +2737,7 @@ void load_assemble(int i,struct regstat *i_regs) } } if(jaddr) - add_stub(LOADH_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist); + add_stub_r(LOADH_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); @@ -2727,7 +2756,7 @@ void load_assemble(int i,struct regstat *i_regs) emit_readword_indexed_tlb(0,a,map,tl); } if(jaddr) - add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist); + add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); @@ -2756,7 +2785,7 @@ void load_assemble(int i,struct regstat *i_regs) } } if(jaddr) - add_stub(LOADBU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist); + add_stub_r(LOADBU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); @@ -2793,7 +2822,7 @@ void load_assemble(int i,struct regstat *i_regs) } } if(jaddr) - add_stub(LOADHU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist); + add_stub_r(LOADHU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); @@ -2813,7 +2842,7 @@ void load_assemble(int i,struct regstat *i_regs) emit_readword_indexed_tlb(0,a,map,tl); } if(jaddr) - add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist); + add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else { inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); @@ -2835,7 +2864,7 @@ void load_assemble(int i,struct regstat *i_regs) emit_readdword_indexed_tlb(0,a,map,th,tl); } if(jaddr) - add_stub(LOADD_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist); + add_stub_r(LOADD_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); @@ -2885,7 +2914,8 @@ void store_assemble(int i,struct regstat *i_regs) int s,th,tl,map=-1; int addr,temp; int offset; - int jaddr=0,type; + void *jaddr=0; + enum stub_type type; int memtarget=0,c=0; int agr=AGEN1+(i&1); int faststore_reg_override=0; @@ -2984,7 +3014,7 @@ void store_assemble(int i,struct regstat *i_regs) if(jaddr) { // PCSX store handlers don't check invcode again reglist|=1<waswritten&(1<regmap,rs2[i],ccadj[i],reglist); } @@ -3026,7 +3056,7 @@ void store_assemble(int i,struct regstat *i_regs) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty); emit_movimm(start+i*4+4,0); emit_writeword(0,(int)&pcaddr); - emit_jmp((int)do_interrupt); + emit_jmp(do_interrupt); } } //if(opcode[i]==0x2B || opcode[i]==0x3F) @@ -3074,7 +3104,7 @@ void storelr_assemble(int i,struct regstat *i_regs) int temp; int temp2=-1; int offset; - int jaddr=0; + void *jaddr=0; void *case1, *case2, *case3; void *done0, *done1, *done2; int memtarget=0,c=0; @@ -3100,13 +3130,13 @@ void storelr_assemble(int i,struct regstat *i_regs) if(!c) { emit_cmpimm(s<0||offset?temp:s,RAM_SIZE); if(!offset&&s!=temp) emit_mov(s,temp); - jaddr=(int)out; + jaddr=out; emit_jno(0); } else { if(!memtarget||!rs1[i]) { - jaddr=(int)out; + jaddr=out; emit_jmp(0); } } @@ -3258,7 +3288,7 @@ void storelr_assemble(int i,struct regstat *i_regs) set_jump_target(done0, out); } if(!c||!memtarget) - add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist); + add_stub_r(STORELR_STUB,jaddr,out,i,temp,i_regs,ccadj[i],reglist); if(!(i_regs->waswritten&(1<regmap,ROREG); @@ -3277,9 +3307,9 @@ void storelr_assemble(int i,struct regstat *i_regs) #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) emit_callne(invalidate_addr_reg[temp]); #else - int jaddr2=(int)out; + void *jaddr2 = out; emit_jne(0); - add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<waswritten&(1<= (sizeof(psxHLEt) / sizeof(psxHLEt[0]))) + if (hleCode >= ARRAY_SIZE(psxHLEt)) emit_movimm((int)psxNULL,1); else emit_movimm((int)psxHLEt[hleCode],1); emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX - emit_jmp((int)jump_hlecall); + emit_jmp(jump_hlecall); } void intcall_assemble(int i,struct regstat *i_regs) @@ -3481,7 +3512,7 @@ void intcall_assemble(int i,struct regstat *i_regs) (void)ccreg; emit_movimm(start+i*4,0); // Get PC emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_jmp((int)jump_intcall); + emit_jmp(jump_intcall); } void ds_assemble(int i,struct regstat *i_regs) @@ -4383,8 +4414,8 @@ void ds_assemble_entry(int i) void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) { int count; - int jaddr; - int idle=0; + void *jaddr; + void *idle=NULL; int t=0; if(itype[i]==RJUMP) { @@ -4405,10 +4436,10 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) if(taken==TAKEN && i==(ba[i]-start)>>2 && source[i+1]==0) { // Idle loop if(count&1) emit_addimm_and_set_flags(2*(count+2),HOST_CCREG); - idle=(int)out; + idle=out; //emit_subfrommem(&idlecount,HOST_CCREG); // Count idle cycles emit_andimm(HOST_CCREG,3,HOST_CCREG); - jaddr=(int)out; + jaddr=out; emit_jmp(0); } else if(*adj==0||invert) { @@ -4420,39 +4451,39 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) cycles=CLOCK_ADJUST(*adj)+count+2-*adj; } emit_addimm_and_set_flags(cycles,HOST_CCREG); - jaddr=(int)out; + jaddr=out; emit_jns(0); } else { emit_cmpimm(HOST_CCREG,-CLOCK_ADJUST(count+2)); - jaddr=(int)out; + jaddr=out; emit_jns(0); } - add_stub(CC_STUB,jaddr,idle?idle:(int)out,(*adj==0||invert||idle)?0:(count+2),i,addr,taken,0); + add_stub(CC_STUB,jaddr,idle?idle:out,(*adj==0||invert||idle)?0:(count+2),i,addr,taken,0); } -void do_ccstub(int n) +static void do_ccstub(int n) { literal_pool(256); - assem_debug("do_ccstub %x\n",start+stubs[n][4]*4); - set_jump_target(stubs[n][1], out); - int i=stubs[n][4]; - if(stubs[n][6]==NULLDS) { + assem_debug("do_ccstub %x\n",start+stubs[n].b*4); + set_jump_target(stubs[n].addr, out); + int i=stubs[n].b; + if(stubs[n].d==NULLDS) { // Delay slot instruction is nullified ("likely" branch) wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty); } - else if(stubs[n][6]!=TAKEN) { + else if(stubs[n].d!=TAKEN) { wb_dirtys(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty); } else { if(internal_branch(branch_regs[i].is32,ba[i])) wb_needed_dirtys(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); } - if(stubs[n][5]!=-1) + if(stubs[n].c!=-1) { // Save PC as return address - emit_movimm(stubs[n][5],EAX); + emit_movimm(stubs[n].c,EAX); emit_writeword(EAX,(int)&pcaddr); } else @@ -4649,10 +4680,10 @@ void do_ccstub(int n) } // Update cycle count assert(branch_regs[i].regmap[HOST_CCREG]==CCREG||branch_regs[i].regmap[HOST_CCREG]==-1); - if(stubs[n][3]) emit_addimm(HOST_CCREG,CLOCK_ADJUST((int)stubs[n][3]),HOST_CCREG); + if(stubs[n].a) emit_addimm(HOST_CCREG,CLOCK_ADJUST((int)stubs[n].a),HOST_CCREG); emit_call((int)cc_interrupt); - if(stubs[n][3]) emit_addimm(HOST_CCREG,-CLOCK_ADJUST((int)stubs[n][3]),HOST_CCREG); - if(stubs[n][6]==TAKEN) { + if(stubs[n].a) emit_addimm(HOST_CCREG,-CLOCK_ADJUST((int)stubs[n].a),HOST_CCREG); + if(stubs[n].d==TAKEN) { if(internal_branch(branch_regs[i].is32,ba[i])) load_needed_regs(branch_regs[i].regmap,regs[(ba[i]-start)>>2].regmap_entry); else if(itype[i]==RJUMP) { @@ -4661,17 +4692,17 @@ void do_ccstub(int n) else emit_loadreg(rs1[i],get_reg(branch_regs[i].regmap,rs1[i])); } - }else if(stubs[n][6]==NOTTAKEN) { + }else if(stubs[n].d==NOTTAKEN) { if(i>2) assem_debug("idle loop\n"); @@ -5692,9 +5723,9 @@ void fjump_assemble(int i,struct regstat *i_regs) cs=get_reg(i_regmap,CSREG); assert(cs>=0); emit_testimm(cs,0x20000000); - eaddr=(int)out; + eaddr=out; emit_jeq(0); - add_stub(FP_STUB,eaddr,(int)out,i,cs,(int)i_regs,0,0); + add_stub_r(FP_STUB,eaddr,out,i,cs,i_regs,0,0); cop1_usable=1; } @@ -5850,18 +5881,18 @@ void fjump_assemble(int i,struct regstat *i_regs) // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); - int jaddr=(int)out; + void *jaddr=out; emit_jns(0); - add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); + add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); emit_storereg(CCREG,HOST_CCREG); } else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); - int jaddr=(int)out; + void *jaddr=out; emit_jns(0); - add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); + add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); } } } @@ -7220,7 +7251,7 @@ int new_dynarec_save_blocks(void *save, int size) u_int addr; o = 0; - for (p = 0; p < sizeof(jump_in) / sizeof(jump_in[0]); p++) { + for (p = 0; p < ARRAY_SIZE(jump_in); p++) { bcnt = 0; for (head = jump_in[p]; head != NULL; head = head->next) { tmp_blocks[bcnt].addr = head->vaddr; @@ -7316,7 +7347,7 @@ int new_recompile_block(int addr) invalid_code[start>>12]=0; emit_movimm(start,0); emit_writeword(0,(int)&pcaddr); - emit_jmp((int)new_dyna_leave); + emit_jmp(new_dyna_leave); literal_pool(0); end_block(beginning); ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning); @@ -10216,7 +10247,7 @@ int new_recompile_block(int addr) // Stubs for(i=0;i Date: Mon, 1 Nov 2021 18:14:23 +0200 Subject: [PATCH 049/597] drc: remove old debug code it just makes arm64 conversion harder --- libpcsxcore/new_dynarec/new_dynarec.c | 179 +------------------------- 1 file changed, 1 insertion(+), 178 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index a156d3aac..a64cec6f0 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -1894,7 +1894,6 @@ void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32 int mchecksum() { - //if(!tracedebug) return 0; int i; int sum=0; for(i=0;i<2097152;i++) { @@ -1922,37 +1921,6 @@ void rlist() printf("\n"); } -void enabletrace() -{ - tracedebug=1; -} - -void memdebug(int i) -{ - //printf("TRACE: count=%d next=%d (checksum %x) lo=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[LOREG]>>32),(int)reg[LOREG]); - //printf("TRACE: count=%d next=%d (rchecksum %x)\n",Count,next_interupt,rchecksum()); - //rlist(); - //if(tracedebug) { - //if(Count>=-2084597794) { - if((signed int)Count>=-2084597794&&(signed int)Count<0) { - //if(0) { - printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum()); - //printf("TRACE: count=%d next=%d (checksum %x) Status=%x\n",Count,next_interupt,mchecksum(),Status); - //printf("TRACE: count=%d next=%d (checksum %x) hi=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[HIREG]>>32),(int)reg[HIREG]); - rlist(); - #ifdef __i386__ - printf("TRACE: %x\n",(&i)[-1]); - #endif - #ifdef __arm__ - int j; - printf("TRACE: %x \n",(&j)[10]); - printf("TRACE: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",(&j)[1],(&j)[2],(&j)[3],(&j)[4],(&j)[5],(&j)[6],(&j)[7],(&j)[8],(&j)[9],(&j)[10],(&j)[11],(&j)[12],(&j)[13],(&j)[14],(&j)[15],(&j)[16],(&j)[17],(&j)[18],(&j)[19],(&j)[20]); - #endif - //fflush(stdout); - } - //printf("TRACE: %x\n",(&i)[-1]); -} - void alu_assemble(int i,struct regstat *i_regs) { if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU @@ -2870,35 +2838,6 @@ void load_assemble(int i,struct regstat *i_regs) inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); } } - //emit_storereg(rt1[i],tl); // DEBUG - //if(opcode[i]==0x23) - //if(opcode[i]==0x24) - //if(opcode[i]==0x23||opcode[i]==0x24) - /*if(opcode[i]==0x21||opcode[i]==0x23||opcode[i]==0x24) - { - //emit_pusha(); - save_regs(0x100f); - emit_readword((int)&last_count,ECX); - #ifdef __i386__ - if(get_reg(i_regs->regmap,CCREG)<0) - emit_loadreg(CCREG,HOST_CCREG); - emit_add(HOST_CCREG,ECX,HOST_CCREG); - emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG); - emit_writeword(HOST_CCREG,(int)&Count); - #endif - #ifdef __arm__ - if(get_reg(i_regs->regmap,CCREG)<0) - emit_loadreg(CCREG,0); - else - emit_mov(HOST_CCREG,0); - emit_add(0,ECX,0); - emit_addimm(0,2*ccadj[i],0); - emit_writeword(0,(int)&Count); - #endif - emit_call((int)memdebug); - //emit_popa(); - restore_regs(0x100f); - }*/ } #ifndef loadlr_assemble @@ -3059,43 +2998,6 @@ void store_assemble(int i,struct regstat *i_regs) emit_jmp(do_interrupt); } } - //if(opcode[i]==0x2B || opcode[i]==0x3F) - //if(opcode[i]==0x2B || opcode[i]==0x28) - //if(opcode[i]==0x2B || opcode[i]==0x29) - //if(opcode[i]==0x2B) - /*if(opcode[i]==0x2B || opcode[i]==0x28 || opcode[i]==0x29 || opcode[i]==0x3F) - { - #ifdef __i386__ - emit_pusha(); - #endif - #ifdef __arm__ - save_regs(0x100f); - #endif - emit_readword((int)&last_count,ECX); - #ifdef __i386__ - if(get_reg(i_regs->regmap,CCREG)<0) - emit_loadreg(CCREG,HOST_CCREG); - emit_add(HOST_CCREG,ECX,HOST_CCREG); - emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG); - emit_writeword(HOST_CCREG,(int)&Count); - #endif - #ifdef __arm__ - if(get_reg(i_regs->regmap,CCREG)<0) - emit_loadreg(CCREG,0); - else - emit_mov(HOST_CCREG,0); - emit_add(0,ECX,0); - emit_addimm(0,2*ccadj[i],0); - emit_writeword(0,(int)&Count); - #endif - emit_call((int)memdebug); - #ifdef __i386__ - emit_popa(); - #endif - #ifdef __arm__ - restore_regs(0x100f); - #endif - }*/ } void storelr_assemble(int i,struct regstat *i_regs) @@ -3312,19 +3214,6 @@ void storelr_assemble(int i,struct regstat *i_regs) add_stub(INVCODE_STUB,jaddr2,out,reglist|(1<regmap,CCREG)<0) - emit_loadreg(CCREG,HOST_CCREG); - emit_add(HOST_CCREG,ECX,HOST_CCREG); - emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG); - emit_writeword(HOST_CCREG,(int)&Count); - emit_call((int)memdebug); - emit_popa(); - //restore_regs(0x100f); - */ } void c1ls_assemble(int i,struct regstat *i_regs) @@ -4703,34 +4592,6 @@ static void do_ccstub(int n) load_all_regs(branch_regs[i].regmap); } emit_jmp(stubs[n].retaddr); - - /* This works but uses a lot of memory... - emit_readword((int)&last_count,ECX); - emit_add(HOST_CCREG,ECX,EAX); - emit_writeword(EAX,(int)&Count); - emit_call((int)gen_interupt); - emit_readword((int)&Count,HOST_CCREG); - emit_readword((int)&next_interupt,EAX); - emit_readword((int)&pending_exception,EBX); - emit_writeword(EAX,(int)&last_count); - emit_sub(HOST_CCREG,EAX,HOST_CCREG); - emit_test(EBX,EBX); - int jne_instr=(int)out; - emit_jne(0); - if(stubs[n].a) emit_addimm(HOST_CCREG,-2*stubs[n].a,HOST_CCREG); - load_all_regs(branch_regs[i].regmap); - emit_jmp(stubs[n].retaddr); // return address - set_jump_target(jne_instr,(int)out); - emit_readword((int)&pcaddr,EAX); - // Call get_addr_ht instead of doing the hash table here. - // This code is executed infrequently and takes up a lot of space - // so smaller is better. - emit_storereg(CCREG,HOST_CCREG); - emit_pushreg(EAX); - emit_call((int)get_addr_ht); - emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(ESP,4,ESP); - emit_jmpreg(EAX);*/ } static void add_to_linker(int addr,int target,int ext) @@ -4942,41 +4803,8 @@ void rjump_assemble(int i,struct regstat *i_regs) else #endif { - //if(rs!=EAX) emit_mov(rs,EAX); - //emit_jmp(jump_vaddr_eax); emit_jmp(jump_vaddr_reg[rs]); } - /* Check hash table - temp=!rs; - emit_mov(rs,temp); - emit_shrimm(rs,16,rs); - emit_xor(temp,rs,rs); - emit_movzwl_reg(rs,rs); - emit_shlimm(rs,4,rs); - emit_cmpmem_indexed((int)hash_table,rs,temp); - emit_jne((int)out+14); - emit_readword_indexed((int)hash_table+4,rs,rs); - emit_jmpreg(rs); - emit_cmpmem_indexed((int)hash_table+8,rs,temp); - emit_addimm_no_flags(8,rs); - emit_jeq((int)out-17); - // No hit on hash table, call compiler - emit_pushreg(temp); -//DEBUG > -#ifdef DEBUG_CYCLE_COUNT - emit_readword((int)&last_count,ECX); - emit_add(HOST_CCREG,ECX,HOST_CCREG); - emit_readword((int)&next_interupt,ECX); - emit_writeword(HOST_CCREG,(int)&Count); - emit_sub(HOST_CCREG,ECX,HOST_CCREG); - emit_writeword(ECX,(int)&last_count); -#endif -//DEBUG < - emit_storereg(CCREG,HOST_CCREG); - emit_call((int)get_addr); - emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(ESP,4,ESP); - emit_jmpreg(EAX);*/ #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(rt1[i]!=31&&i %x\n", (int)addr, (int)out); - //printf("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out); + assem_debug("NOTCOMPILED: addr = %x -> %p\n", addr, out); //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr); //if(debug) //printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum()); //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29); - /*if(Count>=312978186) { - rlist(); - }*/ - //rlist(); // this is just for speculation for (i = 1; i < 32; i++) { From 643aeae3222be00a799ca1e96e795ff846f81fee Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 1 Nov 2021 21:57:53 +0200 Subject: [PATCH 050/597] drc: rework for 64bit, part 3 --- libpcsxcore/new_dynarec/assem_arm.c | 269 +++++++++---------- libpcsxcore/new_dynarec/assem_arm.h | 10 +- libpcsxcore/new_dynarec/new_dynarec.c | 316 +++++++++++------------ libpcsxcore/new_dynarec/pcsxmem_inline.c | 10 +- 4 files changed, 304 insertions(+), 301 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 940391ce7..36a3e45ac 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -30,9 +30,9 @@ #if defined(BASE_ADDR_FIXED) #elif defined(BASE_ADDR_DYNAMIC) -char *translation_cache; +u_char *translation_cache; #else -char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); +u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); #endif #ifndef __MACH__ @@ -219,12 +219,12 @@ static void *find_extjump_insn(void *stub) // get address that insn one after stub loads (dyna_linker arg1), // treat it as a pointer to branch insn, // return addr where that branch jumps to -static int get_pointer(void *stub) +static void *get_pointer(void *stub) { //printf("get_pointer(%x)\n",(int)stub); int *i_ptr=find_extjump_insn(stub); assert((*i_ptr&0x0f000000)==0x0a000000); - return (int)i_ptr+((*i_ptr<<8)>>6)+8; + return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8; } // Find the "clean" entry point from a "dirty" entry point @@ -296,9 +296,9 @@ static int isclean(void *addr) } // get source that block at addr was compiled from (host pointers) -static void get_bounds(int addr,u_int *start,u_int *end) +static void get_bounds(void *addr,u_int *start,u_int *end) { - u_int *ptr=(u_int *)addr; + u_int *ptr = addr; #ifndef HAVE_ARMV7 u_int offset; // get from literal pool @@ -1700,8 +1700,9 @@ static const char *func_name(intptr_t a) #define func_name(x) "" #endif -static void emit_call(int a) +static void emit_call(const void *a_) { + int a = (int)a_; assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a)); u_int offset=genjmp(a); output_w32(0xeb000000|offset); @@ -1715,8 +1716,9 @@ static void emit_jmp(const void *a_) output_w32(0xea000000|offset); } -static void emit_jne(int a) +static void emit_jne(const void *a_) { + int a = (int)a_; assem_debug("bne %x\n",a); u_int offset=genjmp(a); output_w32(0x1a000000|offset); @@ -1913,6 +1915,7 @@ static void emit_movzbl_indexed(int offset, int rs, int rt) static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt) { + assert(rs2>=0); assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100); } @@ -1952,9 +1955,9 @@ static void emit_ldrd(int offset, int rs, int rt) } } -static void emit_readword(int addr, int rt) +static void emit_readword(void *addr, int rt) { - u_int offset = addr-(u_int)&dynarec_local; + uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; assert(offset<4096); assem_debug("ldr %s,fp+%d\n",regname[rt],offset); output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset); @@ -2059,6 +2062,7 @@ static void emit_writebyte_indexed(int rt, int offset, int rs) static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2) { + assert(rs2>=0); assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100); } @@ -2094,25 +2098,25 @@ static void emit_strcch_dualindexed(int rs1, int rs2, int rt) output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2)); } -static void emit_writeword(int rt, int addr) +static void emit_writeword(int rt, void *addr) { - u_int offset = addr-(u_int)&dynarec_local; + uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; assert(offset<4096); assem_debug("str %s,fp+%d\n",regname[rt],offset); output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset); } -static unused void emit_writehword(int rt, int addr) +static unused void emit_writehword(int rt, void *addr) { - u_int offset = addr-(u_int)&dynarec_local; + uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; assert(offset<256); assem_debug("strh %s,fp+%d\n",regname[rt],offset); output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); } -static unused void emit_writebyte(int rt, int addr) +static unused void emit_writebyte(int rt, void *addr) { - u_int offset = addr-(u_int)&dynarec_local; + uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; assert(offset<4096); assem_debug("strb %s,fp+%d\n",regname[rt],offset); output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset); @@ -2477,35 +2481,35 @@ static void literal_pool_jumpover(int n) set_jump_target(jaddr, out); } -static void emit_extjump2(u_int addr, int target, void *linker) +static void emit_extjump2(u_char *addr, int target, void *linker) { u_char *ptr=(u_char *)addr; assert((ptr[3]&0x0e)==0xa); (void)ptr; emit_loadlp(target,0); - emit_loadlp(addr,1); - assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<=translation_cache&&addr<(translation_cache+(1<=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000)); //DEBUG > #ifdef DEBUG_CYCLE_COUNT - emit_readword((int)&last_count,ECX); + emit_readword(&last_count,ECX); emit_add(HOST_CCREG,ECX,HOST_CCREG); - emit_readword((int)&next_interupt,ECX); - emit_writeword(HOST_CCREG,(int)&Count); + emit_readword(&next_interupt,ECX); + emit_writeword(HOST_CCREG,&Count); emit_sub(HOST_CCREG,ECX,HOST_CCREG); - emit_writeword(ECX,(int)&last_count); + emit_writeword(ECX,&last_count); #endif //DEBUG < emit_jmp(linker); } -static void emit_extjump(int addr, int target) +static void emit_extjump(void *addr, int target) { emit_extjump2(addr, target, dyna_linker); } -static void emit_extjump_ds(int addr, int target) +static void emit_extjump_ds(void *addr, int target) { emit_extjump2(addr, target, dyna_linker_ds); } @@ -2621,7 +2625,7 @@ static void do_readstub(int n) } if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1) temp2=1; - emit_readword((int)&mem_rtab,temp); + emit_readword(&mem_rtab,temp); emit_shrimm(rs,12,temp2); emit_readword_dualindexedx4(temp,temp2,temp2); emit_lsls_imm(temp2,1,temp2); @@ -2644,14 +2648,14 @@ static void do_readstub(int n) if(!regs_saved) save_regs(reglist); - int handler=0; + void *handler=NULL; if(type==LOADB_STUB||type==LOADBU_STUB) - handler=(int)jump_handler_read8; + handler=jump_handler_read8; if(type==LOADH_STUB||type==LOADHU_STUB) - handler=(int)jump_handler_read16; + handler=jump_handler_read16; if(type==LOADW_STUB) - handler=(int)jump_handler_read32; - assert(handler!=0); + handler=jump_handler_read32; + assert(handler); pass_args(rs,temp2); int cc=get_reg(i_regmap,CCREG); if(cc<0) @@ -2668,14 +2672,14 @@ static void do_readstub(int n) } // return memhandler, or get directly accessable address and return 0 -static u_int get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host) +static void *get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host) { u_int l1,l2=0; l1=((u_int *)table)[addr>>12]; if((l1&(1<<31))==0) { u_int v=l1<<1; *addr_host=v+addr; - return 0; + return NULL; } else { l1<<=1; @@ -2688,9 +2692,9 @@ static u_int get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_ if((l2&(1<<31))==0) { u_int v=l2<<1; *addr_host=v+(addr&0xfff); - return 0; + return NULL; } - return l2<<1; + return (void *)(l2<<1); } } @@ -2700,12 +2704,13 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, signed char int rt=get_reg(regmap,target); if(rs<0) rs=get_reg(regmap,-1); assert(rs>=0); - u_int handler,host_addr=0,is_dynamic,far_call=0; + u_int host_addr=0,is_dynamic,far_call=0; + void *handler; int cc=get_reg(regmap,CCREG); if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt)) return; - handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr); - if (handler==0) { + handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr); + if (handler == NULL) { if(rt<0||rt1[i]==0) return; if(addr!=host_addr) @@ -2723,11 +2728,11 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, signed char is_dynamic=pcsxmem_is_handler_dynamic(addr); if(is_dynamic) { if(type==LOADB_STUB||type==LOADBU_STUB) - handler=(int)jump_handler_read8; + handler=jump_handler_read8; if(type==LOADH_STUB||type==LOADHU_STUB) - handler=(int)jump_handler_read16; + handler=jump_handler_read16; if(type==LOADW_STUB) - handler=(int)jump_handler_read32; + handler=jump_handler_read32; } // call a memhandler @@ -2738,10 +2743,10 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, signed char emit_movimm(addr,0); else if(rs!=0) emit_mov(rs,0); - int offset=(int)handler-(int)out-8; + int offset=(u_char *)handler-out-8; if(offset<-33554432||offset>=33554432) { // unreachable memhandler, a plugin func perhaps - emit_movimm(handler,12); + emit_movimm((u_int)handler,12); far_call=1; } if(cc<0) @@ -2751,10 +2756,10 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, signed char emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); } else { - emit_readword((int)&last_count,3); + emit_readword(&last_count,3); emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); emit_add(2,3,2); - emit_writeword(2,(int)&Count); + emit_writeword(2,&Count); } if(far_call) @@ -2811,7 +2816,7 @@ static void do_writestub(int n) } if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3) temp2=3; - emit_readword((int)&mem_wtab,temp); + emit_readword(&mem_wtab,temp); emit_shrimm(rs,12,temp2); emit_readword_dualindexedx4(temp,temp2,temp2); emit_lsls_imm(temp2,1,temp2); @@ -2830,14 +2835,14 @@ static void do_writestub(int n) if(!regs_saved) save_regs(reglist); - int handler=0; + void *handler=NULL; switch(type) { - case STOREB_STUB: handler=(int)jump_handler_write8; break; - case STOREH_STUB: handler=(int)jump_handler_write16; break; - case STOREW_STUB: handler=(int)jump_handler_write32; break; + case STOREB_STUB: handler=jump_handler_write8; break; + case STOREH_STUB: handler=jump_handler_write16; break; + case STOREW_STUB: handler=jump_handler_write32; break; default: assert(0); } - assert(handler!=0); + assert(handler); pass_args(rs,rt); if(temp2!=3) emit_mov(temp2,3); @@ -2862,9 +2867,9 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, signed char int rt=get_reg(regmap,target); assert(rs>=0); assert(rt>=0); - u_int handler,host_addr=0; - handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr); - if (handler==0) { + u_int host_addr=0; + void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr); + if (handler == NULL) { if(addr!=host_addr) emit_movimm_from(addr,rs,host_addr,rs); switch(type) { @@ -2883,9 +2888,9 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, signed char if(cc<0) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); - emit_movimm(handler,3); + emit_movimm((u_int)handler,3); // returns new cycle_count - emit_call((int)jump_handler_write_h); + emit_call(jump_handler_write_h); emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); @@ -2920,7 +2925,7 @@ static void do_unalignedwritestub(int n) if(cc<0) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2); - emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr)); + emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr)); emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); @@ -2928,7 +2933,7 @@ static void do_unalignedwritestub(int n) emit_jmp(stubs[n].retaddr); // return address #else emit_andimm(addr,0xfffffffc,temp2); - emit_writeword(temp2,(int)&address); + emit_writeword(temp2,&address); save_regs(reglist); emit_shrimm(addr,16,1); @@ -2941,7 +2946,7 @@ static void do_unalignedwritestub(int n) emit_call((int)&indirect_jump_indexed); restore_regs(reglist); - emit_readword((int)&readmem_dword,temp2); + emit_readword(&readmem_dword,temp2); int temp=addr; //hmh emit_shlimm(addr,3,temp); emit_andimm(temp,24,temp); @@ -2959,18 +2964,18 @@ static void do_unalignedwritestub(int n) emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2); emit_orrshl(rt,temp,temp2); } - emit_readword((int)&address,addr); - emit_writeword(temp2,(int)&word); + emit_readword(&address,addr); + emit_writeword(temp2,&word); //save_regs(reglist); // don't need to, no state changes emit_shrimm(addr,16,1); emit_movimm((u_int)writemem,0); //emit_call((int)&indirect_jump_indexed); emit_mov(15,14); emit_readword_dualindexedx4(0,1,15); - emit_readword((int)&Count,HOST_TEMPREG); - emit_readword((int)&next_interupt,2); + emit_readword(&Count,HOST_TEMPREG); + emit_readword(&next_interupt,2); emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG); - emit_writeword(2,(int)&last_count); + emit_writeword(2,&last_count); emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc); if(cc<0) { emit_storereg(CCREG,HOST_TEMPREG); @@ -2987,7 +2992,7 @@ static void do_invstub(int n) set_jump_target(stubs[n].addr, out); save_regs(reglist); if(stubs[n].b!=0) emit_mov(stubs[n].b,0); - emit_call((int)&invalidate_addr); + emit_call(&invalidate_addr); restore_regs(reglist); emit_jmp(stubs[n].retaddr); // return address } @@ -3009,7 +3014,7 @@ void *do_dirty_stub(int i) emit_movw(slen*4,3); #endif emit_movimm(start+i*4,0); - emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm); + emit_call((int)start<(int)0xC0000000?&verify_code:&verify_code_vm); void *entry = out; load_regs_entry(i); if (entry == out) @@ -3033,7 +3038,7 @@ static void do_dirty_stub_ds() emit_movw(slen*4,3); #endif emit_movimm(start+1,0); - emit_call((int)&verify_code_ds); + emit_call(&verify_code_ds); } static void do_cop1stub(int n) @@ -3486,10 +3491,10 @@ static void cop0_assemble(int i,struct regstat *i_regs) if(opcode2[i]==0) // MFC0 { signed char t=get_reg(i_regs->regmap,rt1[i]); - char copr=(source[i]>>11)&0x1f; + u_int copr=(source[i]>>11)&0x1f; //assert(t>=0); // Why does this happen? OOT is weird if(t>=0&&rt1[i]!=0) { - emit_readword((int)®_cop0+copr*4,t); + emit_readword(®_cop0[copr],t); } } else if(opcode2[i]==4) // MTC0 @@ -3499,11 +3504,11 @@ static void cop0_assemble(int i,struct regstat *i_regs) assert(s>=0); wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32); if(copr==9||copr==11||copr==12||copr==13) { - emit_readword((int)&last_count,HOST_TEMPREG); + emit_readword(&last_count,HOST_TEMPREG); emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_writeword(HOST_CCREG,(int)&Count); + emit_writeword(HOST_CCREG,&Count); } // What a mess. The status register (12) can enable interrupts, // so needs a special case to handle a pending interrupt. @@ -3514,19 +3519,19 @@ static void cop0_assemble(int i,struct regstat *i_regs) // burn cycles to cause cc_interrupt, which will // reschedule next_interupt. Relies on CCREG from above. assem_debug("MTC0 DS %d\n", copr); - emit_writeword(HOST_CCREG,(int)&last_count); + emit_writeword(HOST_CCREG,&last_count); emit_movimm(0,HOST_CCREG); emit_storereg(CCREG,HOST_CCREG); emit_loadreg(rs1[i],1); emit_movimm(copr,0); - emit_call((int)pcsx_mtc0_ds); + emit_call(pcsx_mtc0_ds); emit_loadreg(rs1[i],s); return; } emit_movimm(start+i*4+4,HOST_TEMPREG); - emit_writeword(HOST_TEMPREG,(int)&pcaddr); + emit_writeword(HOST_TEMPREG,&pcaddr); emit_movimm(0,HOST_TEMPREG); - emit_writeword(HOST_TEMPREG,(int)&pending_exception); + emit_writeword(HOST_TEMPREG,&pending_exception); } //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12); //else @@ -3535,20 +3540,20 @@ static void cop0_assemble(int i,struct regstat *i_regs) else if(s!=1) emit_mov(s,1); emit_movimm(copr,0); - emit_call((int)pcsx_mtc0); + emit_call(pcsx_mtc0); if(copr==9||copr==11||copr==12||copr==13) { - emit_readword((int)&Count,HOST_CCREG); - emit_readword((int)&next_interupt,HOST_TEMPREG); + emit_readword(&Count,HOST_CCREG); + emit_readword(&next_interupt,HOST_TEMPREG); emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG); emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); - emit_writeword(HOST_TEMPREG,(int)&last_count); + emit_writeword(HOST_TEMPREG,&last_count); emit_storereg(CCREG,HOST_CCREG); } if(copr==12||copr==13) { assert(!is_delayslot); - emit_readword((int)&pending_exception,14); + emit_readword(&pending_exception,14); emit_test(14,14); - emit_jne((int)&do_interrupt); + emit_jne(&do_interrupt); } emit_loadreg(rs1[i],s); if(get_reg(i_regs->regmap,rs1[i]|64)>=0) @@ -3560,11 +3565,11 @@ static void cop0_assemble(int i,struct regstat *i_regs) assert(opcode2[i]==0x10); if((source[i]&0x3f)==0x10) // RFE { - emit_readword((int)&Status,0); + emit_readword(&Status,0); emit_andimm(0,0x3c,1); emit_andimm(0,~0xf,0); emit_orrshr_imm(1,2,0); - emit_writeword(0,(int)&Status); + emit_writeword(0,&Status); } } } @@ -3579,44 +3584,44 @@ static void cop2_get_dreg(u_int copr,signed char tl,signed char temp) case 9: case 10: case 11: - emit_readword((int)®_cop2d[copr],tl); + emit_readword(®_cop2d[copr],tl); emit_signextend16(tl,tl); - emit_writeword(tl,(int)®_cop2d[copr]); // hmh + emit_writeword(tl,®_cop2d[copr]); // hmh break; case 7: case 16: case 17: case 18: case 19: - emit_readword((int)®_cop2d[copr],tl); + emit_readword(®_cop2d[copr],tl); emit_andimm(tl,0xffff,tl); - emit_writeword(tl,(int)®_cop2d[copr]); + emit_writeword(tl,®_cop2d[copr]); break; case 15: - emit_readword((int)®_cop2d[14],tl); // SXY2 - emit_writeword(tl,(int)®_cop2d[copr]); + emit_readword(®_cop2d[14],tl); // SXY2 + emit_writeword(tl,®_cop2d[copr]); break; case 28: case 29: - emit_readword((int)®_cop2d[9],temp); + emit_readword(®_cop2d[9],temp); emit_testimm(temp,0x8000); // do we need this? emit_andimm(temp,0xf80,temp); emit_andne_imm(temp,0,temp); emit_shrimm(temp,7,tl); - emit_readword((int)®_cop2d[10],temp); + emit_readword(®_cop2d[10],temp); emit_testimm(temp,0x8000); emit_andimm(temp,0xf80,temp); emit_andne_imm(temp,0,temp); emit_orrshr_imm(temp,2,tl); - emit_readword((int)®_cop2d[11],temp); + emit_readword(®_cop2d[11],temp); emit_testimm(temp,0x8000); emit_andimm(temp,0xf80,temp); emit_andne_imm(temp,0,temp); emit_orrshl_imm(temp,3,tl); - emit_writeword(tl,(int)®_cop2d[copr]); + emit_writeword(tl,®_cop2d[copr]); break; default: - emit_readword((int)®_cop2d[copr],tl); + emit_readword(®_cop2d[copr],tl); break; } } @@ -3625,24 +3630,24 @@ static void cop2_put_dreg(u_int copr,signed char sl,signed char temp) { switch (copr) { case 15: - emit_readword((int)®_cop2d[13],temp); // SXY1 - emit_writeword(sl,(int)®_cop2d[copr]); - emit_writeword(temp,(int)®_cop2d[12]); // SXY0 - emit_readword((int)®_cop2d[14],temp); // SXY2 - emit_writeword(sl,(int)®_cop2d[14]); - emit_writeword(temp,(int)®_cop2d[13]); // SXY1 + emit_readword(®_cop2d[13],temp); // SXY1 + emit_writeword(sl,®_cop2d[copr]); + emit_writeword(temp,®_cop2d[12]); // SXY0 + emit_readword(®_cop2d[14],temp); // SXY2 + emit_writeword(sl,®_cop2d[14]); + emit_writeword(temp,®_cop2d[13]); // SXY1 break; case 28: emit_andimm(sl,0x001f,temp); emit_shlimm(temp,7,temp); - emit_writeword(temp,(int)®_cop2d[9]); + emit_writeword(temp,®_cop2d[9]); emit_andimm(sl,0x03e0,temp); emit_shlimm(temp,2,temp); - emit_writeword(temp,(int)®_cop2d[10]); + emit_writeword(temp,®_cop2d[10]); emit_andimm(sl,0x7c00,temp); emit_shrimm(temp,3,temp); - emit_writeword(temp,(int)®_cop2d[11]); - emit_writeword(sl,(int)®_cop2d[28]); + emit_writeword(temp,®_cop2d[11]); + emit_writeword(sl,®_cop2d[28]); break; case 30: emit_movs(sl,temp); @@ -3657,13 +3662,13 @@ static void cop2_put_dreg(u_int copr,signed char sl,signed char temp) emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); emit_jns((int)out-2*4); #endif - emit_writeword(sl,(int)®_cop2d[30]); - emit_writeword(temp,(int)®_cop2d[31]); + emit_writeword(sl,®_cop2d[30]); + emit_writeword(temp,®_cop2d[31]); break; case 31: break; default: - emit_writeword(sl,(int)®_cop2d[copr]); + emit_writeword(sl,®_cop2d[copr]); break; } } @@ -3685,7 +3690,7 @@ static void cop2_assemble(int i,struct regstat *i_regs) { signed char tl=get_reg(i_regs->regmap,rt1[i]); if(tl>=0&&rt1[i]!=0) - emit_readword((int)®_cop2c[copr],tl); + emit_readword(®_cop2c[copr],tl); } else if (opcode2[i]==6) // CTC2 { @@ -3714,7 +3719,7 @@ static void cop2_assemble(int i,struct regstat *i_regs) temp=sl; break; } - emit_writeword(temp,(int)®_cop2c[copr]); + emit_writeword(temp,®_cop2c[copr]); assert(sl>=0); } } @@ -3741,19 +3746,19 @@ static void c2op_epilogue(u_int op,u_int reglist) static void c2op_call_MACtoIR(int lm,int need_flags) { if(need_flags) - emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0)); + emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0); else - emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); + emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf); } static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) { - emit_call((int)func); + emit_call(func); // func is C code and trashes r0 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); if(need_flags||need_ir) c2op_call_MACtoIR(lm,need_flags); - emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf)); + emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf); } static void c2op_assemble(int i,struct regstat *i_regs) @@ -3796,17 +3801,17 @@ static void c2op_assemble(int i,struct regstat *i_regs) if(mx<3) emit_addimm(0,32*4+mx*8*4,6); else - emit_readword((int)&zeromem_ptr,6); + emit_readword(&zeromem_ptr,6); if(cv<3) emit_addimm(0,32*4+(cv*8+5)*4,7); else - emit_readword((int)&zeromem_ptr,7); + emit_readword(&zeromem_ptr,7); #ifdef __ARM_NEON__ emit_movimm(source[i],1); // opcode - emit_call((int)gteMVMVA_part_neon); + emit_call(gteMVMVA_part_neon); if(need_flags) { emit_movimm(lm,1); - emit_call((int)gteMACtoIR_flags_neon); + emit_call(gteMACtoIR_flags_neon); } #else if(cv==3&&shift) @@ -3821,14 +3826,14 @@ static void c2op_assemble(int i,struct regstat *i_regs) #else /* if not HAVE_ARMV5 */ c2op_prologue(c2op,reglist); emit_movimm(source[i],1); // opcode - emit_writeword(1,(int)&psxRegs.code); + emit_writeword(1,&psxRegs.code); emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); #endif break; } case GTE_OP: c2op_prologue(c2op,reglist); - emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift)); + emit_call(shift?gteOP_part_shift:gteOP_part_noshift); if(need_flags||need_ir) { emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); c2op_call_MACtoIR(lm,need_flags); @@ -3844,7 +3849,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) break; case GTE_SQR: c2op_prologue(c2op,reglist); - emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift)); + emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift); if(need_flags||need_ir) { emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); c2op_call_MACtoIR(lm,need_flags); @@ -3867,9 +3872,9 @@ static void c2op_assemble(int i,struct regstat *i_regs) c2op_prologue(c2op,reglist); #ifdef DRC_DBG emit_movimm(source[i],1); // opcode - emit_writeword(1,(int)&psxRegs.code); + emit_writeword(1,&psxRegs.code); #endif - emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); + emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]); break; } c2op_epilogue(c2op,reglist); @@ -4065,17 +4070,17 @@ static void do_miniht_jump(int rs,int rh,int ht) { static void do_miniht_insert(u_int return_address,int rt,int temp) { #ifndef HAVE_ARMV7 emit_movimm(return_address,rt); // PC into link register - add_to_linker((int)out,return_address,1); + add_to_linker(out,return_address,1); emit_pcreladdr(temp); - emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]); - emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]); + emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]); + emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]); #else emit_movw(return_address&0x0000FFFF,rt); - add_to_linker((int)out,return_address,1); + add_to_linker(out,return_address,1); emit_pcreladdr(temp); - emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]); + emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]); emit_movt(return_address&0xFFFF0000,rt); - emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]); + emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]); #endif } @@ -4165,7 +4170,7 @@ static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dir static void mark_clear_cache(void *target) { - u_long offset = (char *)target - (char *)BASE_ADDR; + u_long offset = (u_char *)target - translation_cache; u_int mask = 1u << ((offset >> 12) & 31); if (!(needs_clear_cache[offset >> 17] & mask)) { char *start = (char *)((u_long)target & ~4095ul); @@ -4183,11 +4188,11 @@ static void do_clear_cache() { u_int bitmap=needs_clear_cache[i]; if(bitmap) { - u_int start,end; + u_char *start, *end; for(j=0;j<32;j++) { if(bitmap&(1< (u_char *)BASE_ADDR + (1< translation_cache + (1<vaddr==vaddr) { - //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr); + //printf("TRACE: count=%d next=%d (get_addr match %x: %p)\n",Count,next_interupt,vaddr,head->addr); hash_table_add(hash_table_get(vaddr), vaddr, head->addr); return head->addr; } @@ -425,7 +430,7 @@ void *get_addr(u_int vaddr) head=jump_dirty[vpage]; while(head!=NULL) { if(head->vaddr==vaddr) { - //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr); + //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %p)\n",Count,next_interupt,vaddr,head->addr); // Don't restore blocks which are about to expire from the cache if (doesnt_expire_soon(head->addr)) if (verify_dirty(head->addr)) { @@ -892,14 +897,14 @@ void remove_hash(int vaddr) } } -void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift) +void ll_remove_matching_addrs(struct ll_entry **head,uintptr_t addr,int shift) { struct ll_entry *next; while(*head) { - if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || - ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)) + if(((uintptr_t)((*head)->addr)>>shift)==(addr>>shift) || + ((uintptr_t)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)) { - inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr); + inv_debug("EXP: Remove pointer to %p (%x)\n",(*head)->addr,(*head)->vaddr); remove_hash((*head)->vaddr); next=(*head)->next; free(*head); @@ -928,15 +933,15 @@ void ll_clear(struct ll_entry **head) } // Dereference the pointers and remove if it matches -static void ll_kill_pointers(struct ll_entry *head,int addr,int shift) +static void ll_kill_pointers(struct ll_entry *head,uintptr_t addr,int shift) { while(head) { - int ptr=get_pointer(head->addr); - inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr); + uintptr_t ptr = (uintptr_t)get_pointer(head->addr); + inv_debug("EXP: Lookup pointer to %lx at %p (%x)\n",(long)ptr,head->addr,head->vaddr); if(((ptr>>shift)==(addr>>shift)) || (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))) { - inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr); + inv_debug("EXP: Kill pointer at %p (%x)\n",head->addr,head->vaddr); void *host_addr=find_extjump_insn(head->addr); #ifdef __arm__ mark_clear_cache(host_addr); @@ -964,7 +969,7 @@ void invalidate_page(u_int page) head=jump_out[page]; jump_out[page]=0; while(head!=NULL) { - inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr); + inv_debug("INVALIDATE: kill pointer to %x (%p)\n",head->vaddr,head->addr); void *host_addr=find_extjump_insn(head->addr); #ifdef __arm__ mark_clear_cache(host_addr); @@ -1017,7 +1022,7 @@ void invalidate_block(u_int block) while(head!=NULL) { u_int start,end; if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision - get_bounds((int)head->addr,&start,&end); + get_bounds(head->addr,&start,&end); //printf("start: %x end: %x\n",start,end); if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) { if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) { @@ -1054,7 +1059,7 @@ void invalidate_addr(u_int addr) for(;pg1<=page;pg1++) { for(head=jump_dirty[pg1];head!=NULL;head=head->next) { u_int start,end; - get_bounds((int)head->addr,&start,&end); + get_bounds(head->addr,&start,&end); if(ram_offset) { start-=ram_offset; end-=ram_offset; @@ -1110,13 +1115,13 @@ void invalidate_all_pages() void add_link(u_int vaddr,void *src) { u_int page=get_page(vaddr); - inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page); + inv_debug("add_link: %p -> %x (%d)\n",src,vaddr,page); int *ptr=(int *)(src+4); assert((*ptr&0x0fff0000)==0x059f0000); (void)ptr; ll_add(jump_out+page,vaddr,src); - //int ptr=get_pointer(src); - //inv_debug("add_link: Pointer is to %x\n",(int)ptr); + //void *ptr=get_pointer(src); + //inv_debug("add_link: Pointer is to %p\n",ptr); } // If a code block was found to be unmodified (bit was set in @@ -1134,10 +1139,10 @@ void clean_blocks(u_int page) if (doesnt_expire_soon(head->addr)) { u_int start,end; if(verify_dirty(head->addr)) { - //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr); + //printf("Possibly Restore %x (%p)\n",head->vaddr, head->addr); u_int i; u_int inv=0; - get_bounds((int)head->addr,&start,&end); + get_bounds(head->addr,&start,&end); if(start-(u_int)rdram>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) { inv|=invalid_code[i]; @@ -1150,7 +1155,7 @@ void clean_blocks(u_int page) void *clean_addr = get_clean_addr(head->addr); if (doesnt_expire_soon(clean_addr)) { u_int ppage=page; - inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr); + inv_debug("INV: Restored %x (%p/%p)\n",head->vaddr, head->addr, clean_addr); //printf("page=%x, addr=%x\n",page,head->vaddr); //assert(head->vaddr>>12==(page|0x80000)); ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr); @@ -2607,7 +2612,7 @@ void load_assemble(int i,struct regstat *i_regs) } } //printf("load_assemble: c=%d\n",c); - //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset); + //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset); // FIXME: Even if the load is a NOP, we should check for pagefaults... if((tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80)) ||rt1[i]==0) { @@ -2622,7 +2627,7 @@ void load_assemble(int i,struct regstat *i_regs) //if(tl<0) tl=get_reg(i_regs->regmap,-1); if(tl>=0) { //printf("load_assemble: c=%d\n",c); - //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset); + //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset); assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O reglist&=~(1<=0) reglist&=~(1<=0) emit_readword_indexed((int)rdram-0x80000000,addr,th); - //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl); + //if(th>=0) emit_readword_indexed(rdram-0x80000000,addr,th); + //emit_readword_indexed(rdram-0x7FFFFFFC,addr,tl); #ifdef HOST_IMM_ADDR32 if(c) emit_readdword_tlb(constmap[i][s]+offset,map,th,tl); @@ -2897,7 +2902,7 @@ void store_assemble(int i,struct regstat *i_regs) if(!c) a=addr; #endif if(faststore_reg_override) a=faststore_reg_override; - //emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp); + //emit_writebyte_indexed(tl,rdram-0x80000000,temp); emit_writebyte_indexed_tlb(tl,x,a,map,a); } type=STOREB_STUB; @@ -2918,7 +2923,7 @@ void store_assemble(int i,struct regstat *i_regs) if(map>=0) { emit_writehword_indexed(tl,x,a); }else - //emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a); + //emit_writehword_indexed(tl,rdram-0x80000000+x,a); emit_writehword_indexed(tl,x,a); } type=STOREH_STUB; @@ -2927,7 +2932,7 @@ void store_assemble(int i,struct regstat *i_regs) if(!c||memtarget) { int a=addr; if(faststore_reg_override) a=faststore_reg_override; - //emit_writeword_indexed(tl,(int)rdram-0x80000000,addr); + //emit_writeword_indexed(tl,rdram-0x80000000,addr); emit_writeword_indexed_tlb(tl,0,a,map,temp); } type=STOREW_STUB; @@ -2938,13 +2943,13 @@ void store_assemble(int i,struct regstat *i_regs) if(faststore_reg_override) a=faststore_reg_override; if(rs2[i]) { assert(th>=0); - //emit_writeword_indexed(th,(int)rdram-0x80000000,addr); - //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,addr); + //emit_writeword_indexed(th,rdram-0x80000000,addr); + //emit_writeword_indexed(tl,rdram-0x7FFFFFFC,addr); emit_writedword_indexed_tlb(th,tl,0,a,map,temp); }else{ // Store zero - //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp); - //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp); + //emit_writeword_indexed(tl,rdram-0x80000000,temp); + //emit_writeword_indexed(tl,rdram-0x7FFFFFFC,temp); emit_writedword_indexed_tlb(tl,tl,0,a,map,temp); } } @@ -2968,7 +2973,7 @@ void store_assemble(int i,struct regstat *i_regs) assert(ir>=0); emit_cmpmem_indexedsr12_reg(ir,addr,1); #else - emit_cmpmem_indexedsr12_imm((int)invalid_code,addr,1); + emit_cmpmem_indexedsr12_imm(invalid_code,addr,1); #endif #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) emit_callne(invalidate_addr_reg[addr]); @@ -2994,7 +2999,7 @@ void store_assemble(int i,struct regstat *i_regs) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i); wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty); emit_movimm(start+i*4+4,0); - emit_writeword(0,(int)&pcaddr); + emit_writeword(0,&pcaddr); emit_jmp(do_interrupt); } } @@ -3204,7 +3209,7 @@ void storelr_assemble(int i,struct regstat *i_regs) assert(ir>=0); emit_cmpmem_indexedsr12_reg(ir,temp,1); #else - emit_cmpmem_indexedsr12_imm((int)invalid_code,temp,1); + emit_cmpmem_indexedsr12_imm(invalid_code,temp,1); #endif #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) emit_callne(invalidate_addr_reg[temp]); @@ -3304,7 +3309,7 @@ void c2ls_assemble(int i,struct regstat *i_regs) assert(ir>=0); emit_cmpmem_indexedsr12_reg(ir,ar,1); #else - emit_cmpmem_indexedsr12_imm((int)invalid_code,ar,1); + emit_cmpmem_indexedsr12_imm(invalid_code,ar,1); #endif #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) emit_callne(invalidate_addr_reg[ar]); @@ -3386,9 +3391,9 @@ void hlecall_assemble(int i,struct regstat *i_regs) emit_movimm(start+i*4+4,0); // Get PC uint32_t hleCode = source[i] & 0x03ffffff; if (hleCode >= ARRAY_SIZE(psxHLEt)) - emit_movimm((int)psxNULL,1); + emit_movimm((uintptr_t)psxNULL,1); else - emit_movimm((int)psxHLEt[hleCode],1); + emit_movimm((uintptr_t)psxHLEt[hleCode],1); emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX emit_jmp(jump_hlecall); } @@ -3745,13 +3750,13 @@ static int get_final_value(int hr, int i, int *value) { // Precompute load address *value=constmap[i][hr]+imm[i+1]; - //printf("c=%x imm=%x\n",(int)constmap[i][hr],imm[i+1]); + //printf("c=%x imm=%lx\n",(long)constmap[i][hr],imm[i+1]); return 1; } } } *value=constmap[i][hr]; - //printf("c=%x\n",(int)constmap[i][hr]); + //printf("c=%lx\n",(long)constmap[i][hr]); if(i==slen-1) return 1; if(reg<64) { return !((unneeded_reg[i+1]>>reg)&1); @@ -4211,11 +4216,11 @@ static void drc_dbg_emit_do_cmp(int i) if(regs[i].regmap[hr]>=0) reglist|=1<>2].regmap_entry); else if(itype[i]==RJUMP) { if(get_reg(branch_regs[i].regmap,RTEMP)>=0) - emit_readword((int)&pcaddr,get_reg(branch_regs[i].regmap,RTEMP)); + emit_readword(&pcaddr,get_reg(branch_regs[i].regmap,RTEMP)); else emit_loadreg(rs1[i],get_reg(branch_regs[i].regmap,rs1[i])); } @@ -4594,11 +4599,12 @@ static void do_ccstub(int n) emit_jmp(stubs[n].retaddr); } -static void add_to_linker(int addr,int target,int ext) +static void add_to_linker(void *addr, u_int target, int ext) { - link_addr[linkcount][0]=addr; - link_addr[linkcount][1]=target; - link_addr[linkcount][2]=ext; + assert(linkcount < ARRAY_SIZE(link_addr)); + link_addr[linkcount].addr = addr; + link_addr[linkcount].target = target; + link_addr[linkcount].ext = ext; linkcount++; } @@ -4626,7 +4632,7 @@ static void ujump_assemble_write_ra(int i) #ifdef REG_PREFETCH if(temp>=0) { - if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table_get(return_address),temp); + if(i_regmap[temp]!=PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp); } #endif emit_movimm(return_address,rt); // PC into link register @@ -4649,7 +4655,7 @@ void ujump_assemble(int i,struct regstat *i_regs) signed char *i_regmap=i_regs->regmap; int return_address=start+i*4+8; if(get_reg(branch_regs[i].regmap,31)>0) - if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table_get(return_address),temp); + if(i_regmap[temp]==PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp); } #endif if(rt1[i]==31&&(rt1[i]==rs1[i+1]||rt1[i]==rs2[i+1])) { @@ -4684,7 +4690,7 @@ void ujump_assemble(int i,struct regstat *i_regs) ds_assemble_entry(i); } else { - add_to_linker((int)out,ba[i],internal_branch(branch_regs[i].is32,ba[i])); + add_to_linker(out,ba[i],internal_branch(branch_regs[i].is32,ba[i])); emit_jmp(0); } } @@ -4701,7 +4707,7 @@ static void rjump_assemble_write_ra(int i) #ifdef REG_PREFETCH if(temp>=0) { - if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table_get(return_address),temp); + if(i_regmap[temp]!=PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp); } #endif emit_movimm(return_address,rt); // PC into link register @@ -4732,7 +4738,7 @@ void rjump_assemble(int i,struct regstat *i_regs) if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) { signed char *i_regmap=i_regs->regmap; int return_address=start+i*4+8; - if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table_get(return_address),temp); + if(i_regmap[temp]==PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp); } } #endif @@ -4900,7 +4906,7 @@ void cjump_assemble(int i,struct regstat *i_regs) ds_assemble_entry(i); } else { - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jmp(0); } #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -4926,24 +4932,24 @@ void cjump_assemble(int i,struct regstat *i_regs) if(s2h>=0) emit_cmp(s1h,s2h); else emit_test(s1h,s1h); nottaken1=out; - emit_jne(1); + emit_jne((void *)1l); } if(opcode[i]==5) // BNE { if(s2h>=0) emit_cmp(s1h,s2h); else emit_test(s1h,s1h); if(invert) taken=out; - else add_to_linker((int)out,ba[i],internal); + else add_to_linker(out,ba[i],internal); emit_jne(0); } if(opcode[i]==6) // BLEZ { emit_test(s1h,s1h); if(invert) taken=out; - else add_to_linker((int)out,ba[i],internal); + else add_to_linker(out,ba[i],internal); emit_js(0); nottaken1=out; - emit_jne(1); + emit_jne((void *)1l); } if(opcode[i]==7) // BGTZ { @@ -4951,7 +4957,7 @@ void cjump_assemble(int i,struct regstat *i_regs) nottaken1=out; emit_js(1); if(invert) taken=out; - else add_to_linker((int)out,ba[i],internal); + else add_to_linker(out,ba[i],internal); emit_jne(0); } } // if(!only32) @@ -4964,9 +4970,9 @@ void cjump_assemble(int i,struct regstat *i_regs) else emit_test(s1l,s1l); if(invert){ nottaken=out; - emit_jne(1); + emit_jne((void *)1l); }else{ - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jeq(0); } } @@ -4978,7 +4984,7 @@ void cjump_assemble(int i,struct regstat *i_regs) nottaken=out; emit_jeq(1); }else{ - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jne(0); } } @@ -4989,7 +4995,7 @@ void cjump_assemble(int i,struct regstat *i_regs) nottaken=out; emit_jge(1); }else{ - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jl(0); } } @@ -5000,7 +5006,7 @@ void cjump_assemble(int i,struct regstat *i_regs) nottaken=out; emit_jl(1); }else{ - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jge(0); } } @@ -5010,10 +5016,10 @@ void cjump_assemble(int i,struct regstat *i_regs) if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { if(adj) { emit_addimm(cc,-CLOCK_ADJUST(adj),cc); - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); }else{ emit_addnop(13); - add_to_linker((int)out,ba[i],internal*2); + add_to_linker(out,ba[i],internal*2); } emit_jmp(0); }else @@ -5030,7 +5036,7 @@ void cjump_assemble(int i,struct regstat *i_regs) ds_assemble_entry(i); } else { - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jmp(0); } } @@ -5059,14 +5065,14 @@ void cjump_assemble(int i,struct regstat *i_regs) if(s2h>=0) emit_cmp(s1h,s2h); else emit_test(s1h,s1h); nottaken1=out; - emit_jne(2); + emit_jne((void *)2l); } if((opcode[i]&0x2f)==5) // BNE { if(s2h>=0) emit_cmp(s1h,s2h); else emit_test(s1h,s1h); taken=out; - emit_jne(1); + emit_jne((void *)1l); } if((opcode[i]&0x2f)==6) // BLEZ { @@ -5074,7 +5080,7 @@ void cjump_assemble(int i,struct regstat *i_regs) taken=out; emit_js(1); nottaken1=out; - emit_jne(2); + emit_jne((void *)2l); } if((opcode[i]&0x2f)==7) // BGTZ { @@ -5082,7 +5088,7 @@ void cjump_assemble(int i,struct regstat *i_regs) nottaken1=out; emit_js(2); taken=out; - emit_jne(1); + emit_jne((void *)1l); } } // if(!only32) @@ -5093,7 +5099,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); nottaken=out; - emit_jne(2); + emit_jne((void *)2l); } if((opcode[i]&0x2f)==5) // BNE { @@ -5153,7 +5159,7 @@ void cjump_assemble(int i,struct regstat *i_regs) ds_assemble_entry(i); } else { - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jmp(0); } } @@ -5285,7 +5291,7 @@ void sjump_assemble(int i,struct regstat *i_regs) ds_assemble_entry(i); } else { - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jmp(0); } #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -5313,7 +5319,7 @@ void sjump_assemble(int i,struct regstat *i_regs) nottaken=out; emit_jns(1); }else{ - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_js(0); } } @@ -5324,7 +5330,7 @@ void sjump_assemble(int i,struct regstat *i_regs) nottaken=out; emit_js(1); }else{ - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jns(0); } } @@ -5339,7 +5345,7 @@ void sjump_assemble(int i,struct regstat *i_regs) nottaken=out; emit_jns(1); }else{ - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_js(0); } } @@ -5350,7 +5356,7 @@ void sjump_assemble(int i,struct regstat *i_regs) nottaken=out; emit_js(1); }else{ - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jns(0); } } @@ -5361,10 +5367,10 @@ void sjump_assemble(int i,struct regstat *i_regs) if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { if(adj) { emit_addimm(cc,-CLOCK_ADJUST(adj),cc); - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); }else{ emit_addnop(13); - add_to_linker((int)out,ba[i],internal*2); + add_to_linker(out,ba[i],internal*2); } emit_jmp(0); }else @@ -5381,7 +5387,7 @@ void sjump_assemble(int i,struct regstat *i_regs) ds_assemble_entry(i); } else { - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jmp(0); } } @@ -5482,7 +5488,7 @@ void sjump_assemble(int i,struct regstat *i_regs) ds_assemble_entry(i); } else { - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jmp(0); } } @@ -5588,16 +5594,16 @@ void fjump_assemble(int i,struct regstat *i_regs) nottaken=out; emit_jeq(1); }else{ - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jne(0); } } else // BC1F if(invert){ nottaken=out; - emit_jne(1); + emit_jne((void *)1l); }else{ - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jeq(0); } { @@ -5619,7 +5625,7 @@ void fjump_assemble(int i,struct regstat *i_regs) ds_assemble_entry(i); } else { - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jmp(0); } set_jump_target(nottaken, out); @@ -5648,7 +5654,7 @@ void fjump_assemble(int i,struct regstat *i_regs) else // BC1F { nottaken=out; - emit_jne(1); + emit_jne((void *)1l); } } } // if(!unconditional) @@ -5688,7 +5694,7 @@ void fjump_assemble(int i,struct regstat *i_regs) ds_assemble_entry(i); } else { - add_to_linker((int)out,ba[i],internal); + add_to_linker(out,ba[i],internal); emit_jmp(0); } @@ -5966,7 +5972,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) int target_addr=start+i*4+5; void *stub=out; void *compiled_target_addr=check_addr(target_addr); - emit_extjump_ds((int)branch_addr,target_addr); + emit_extjump_ds(branch_addr, target_addr); if(compiled_target_addr) { set_jump_target(branch_addr, compiled_target_addr); add_link(target_addr,stub); @@ -5981,7 +5987,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) int target_addr=start+i*4+8; void *stub=out; void *compiled_target_addr=check_addr(target_addr); - emit_extjump_ds((int)branch_addr,target_addr); + emit_extjump_ds(branch_addr, target_addr); if(compiled_target_addr) { set_jump_target(branch_addr, compiled_target_addr); add_link(target_addr,stub); @@ -6004,7 +6010,7 @@ static void pagespan_ds() if(regs[0].regmap[HOST_CCREG]!=CCREG) wb_register(CCREG,regs[0].regmap_entry,regs[0].wasdirty,regs[0].was32); if(regs[0].regmap[HOST_BTREG]!=BTREG) - emit_writeword(HOST_BTREG,(int)&branch_target); + emit_writeword(HOST_BTREG,&branch_target); load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,rs1[0],rs2[0]); address_generation(0,®s[0],regs[0].regmap_entry); if(itype[0]==STORE||itype[0]==STORELR||(opcode[0]&0x3b)==0x39||(opcode[0]&0x3b)==0x3a) @@ -6064,7 +6070,7 @@ static void pagespan_ds() int btaddr=get_reg(regs[0].regmap,BTREG); if(btaddr<0) { btaddr=get_reg(regs[0].regmap,-1); - emit_readword((int)&branch_target,btaddr); + emit_readword(&branch_target,btaddr); } assert(btaddr!=HOST_CCREG); if(regs[0].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); @@ -6916,7 +6922,7 @@ static int new_dynarec_test(void) SysPrintf("test passed.\n"); else SysPrintf("test failed: %08x\n", ret); - out=(u_char *)BASE_ADDR; + out = translation_cache; return ret == DRC_TEST_VAL; } @@ -6925,7 +6931,7 @@ static int new_dynarec_test(void) void new_dynarec_clear_full() { int n; - out=(u_char *)BASE_ADDR; + out = translation_cache; memset(invalid_code,1,sizeof(invalid_code)); memset(hash_table,0xff,sizeof(hash_table)); memset(mini_ht,-1,sizeof(mini_ht)); @@ -6950,7 +6956,7 @@ void new_dynarec_init() // allocate/prepare a buffer for translation cache // see assem_arm.h for some explanation #if defined(BASE_ADDR_FIXED) - if (mmap (translation_cache, 1 << TARGET_SIZE_2, + if (mmap(translation_cache, 1 << TARGET_SIZE_2, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) != translation_cache) { @@ -6978,11 +6984,11 @@ void new_dynarec_init() #else #ifndef NO_WRITE_EXEC // not all systems allow execute in data segment by default - if (mprotect((void *)BASE_ADDR, 1<>12]=0; emit_movimm(start,0); - emit_writeword(0,(int)&pcaddr); + emit_writeword(0,&pcaddr); emit_jmp(new_dyna_leave); literal_pool(0); end_block(beginning); @@ -9814,27 +9820,19 @@ int new_recompile_block(int addr) if(regs[i].isconst) { printf("constants: "); #if defined(__i386__) || defined(__x86_64__) - if(regs[i].isconst&1) printf("eax=%x ",(int)constmap[i][0]); - if((regs[i].isconst>>1)&1) printf("ecx=%x ",(int)constmap[i][1]); - if((regs[i].isconst>>2)&1) printf("edx=%x ",(int)constmap[i][2]); - if((regs[i].isconst>>3)&1) printf("ebx=%x ",(int)constmap[i][3]); - if((regs[i].isconst>>5)&1) printf("ebp=%x ",(int)constmap[i][5]); - if((regs[i].isconst>>6)&1) printf("esi=%x ",(int)constmap[i][6]); - if((regs[i].isconst>>7)&1) printf("edi=%x ",(int)constmap[i][7]); + if(regs[i].isconst&1) printf("eax=%x ",(u_int)constmap[i][0]); + if((regs[i].isconst>>1)&1) printf("ecx=%x ",(u_int)constmap[i][1]); + if((regs[i].isconst>>2)&1) printf("edx=%x ",(u_int)constmap[i][2]); + if((regs[i].isconst>>3)&1) printf("ebx=%x ",(u_int)constmap[i][3]); + if((regs[i].isconst>>5)&1) printf("ebp=%x ",(u_int)constmap[i][5]); + if((regs[i].isconst>>6)&1) printf("esi=%x ",(u_int)constmap[i][6]); + if((regs[i].isconst>>7)&1) printf("edi=%x ",(u_int)constmap[i][7]); #endif #ifdef __arm__ - if(regs[i].isconst&1) printf("r0=%x ",(int)constmap[i][0]); - if((regs[i].isconst>>1)&1) printf("r1=%x ",(int)constmap[i][1]); - if((regs[i].isconst>>2)&1) printf("r2=%x ",(int)constmap[i][2]); - if((regs[i].isconst>>3)&1) printf("r3=%x ",(int)constmap[i][3]); - if((regs[i].isconst>>4)&1) printf("r4=%x ",(int)constmap[i][4]); - if((regs[i].isconst>>5)&1) printf("r5=%x ",(int)constmap[i][5]); - if((regs[i].isconst>>6)&1) printf("r6=%x ",(int)constmap[i][6]); - if((regs[i].isconst>>7)&1) printf("r7=%x ",(int)constmap[i][7]); - if((regs[i].isconst>>8)&1) printf("r8=%x ",(int)constmap[i][8]); - if((regs[i].isconst>>9)&1) printf("r9=%x ",(int)constmap[i][9]); - if((regs[i].isconst>>10)&1) printf("r10=%x ",(int)constmap[i][10]); - if((regs[i].isconst>>12)&1) printf("r12=%x ",(int)constmap[i][12]); + int r; + for (r = 0; r < ARRAY_SIZE(constmap[i]); r++) + if ((regs[i].isconst >> r) & 1) + printf(" r%d=%x", r, (u_int)constmap[i][r]); #endif printf("\n"); } @@ -9888,11 +9886,11 @@ int new_recompile_block(int addr) emit_movimm(start,0); // abuse io address var as a flag that we // have already returned here once - emit_readword((int)&address,1); - emit_writeword(0,(int)&pcaddr); - emit_writeword(0,(int)&address); + emit_readword(&address,1); + emit_writeword(0,&pcaddr); + emit_writeword(0,&address); emit_cmp(0,1); - emit_jne((int)new_dyna_leave); + emit_jne(new_dyna_leave); } for(i=0;i %8x\n",link_addr[i][0],link_addr[i][1]); + assem_debug("%p -> %8x\n",link_addr[i].addr,link_addr[i].target); literal_pool(64); - if(!link_addr[i][2]) + if (!link_addr[i].ext) { - void *stub=out; - void *addr=check_addr(link_addr[i][1]); - emit_extjump(link_addr[i][0],link_addr[i][1]); - if(addr) { - set_jump_target(link_addr[i][0], addr); - add_link(link_addr[i][1],stub); + void *stub = out; + void *addr = check_addr(link_addr[i].target); + emit_extjump(link_addr[i].addr, link_addr[i].target); + if (addr) { + set_jump_target(link_addr[i].addr, addr); + add_link(link_addr[i].target,stub); } - else set_jump_target(link_addr[i][0], stub); + else + set_jump_target(link_addr[i].addr, stub); } else { // Internal branch - int target=(link_addr[i][1]-start)>>2; + int target=(link_addr[i].target-start)>>2; assert(target>=0&&target>1); + //set_jump_target_fillslot(link_addr[i].addr,instr_addr[target],link_addr[i].ext>>1); //#else - set_jump_target(link_addr[i][0],instr_addr[target]); + set_jump_target(link_addr[i].addr, instr_addr[target]); //#endif } } @@ -10165,7 +10164,7 @@ int new_recompile_block(int addr) if(((u_int)out)&7) emit_addnop(13); #endif assert((u_int)out-(u_int)beginning(u_int)BASE_ADDR+(1< translation_cache+(1<>12;i<=(start+slen*4)>>12;i++) { @@ -10190,11 +10190,11 @@ int new_recompile_block(int addr) /* Pass 10 - Free memory by expiring oldest blocks */ - int end=((((int)out-(int)BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535; + int end=(((out-translation_cache)>>(TARGET_SIZE_2-16))+16384)&65535; while(expirep!=end) { int shift=TARGET_SIZE_2-3; // Divide into 8 blocks - uintptr_t base=(uintptr_t)BASE_ADDR+((expirep>>13)<>13)<>11)&3) { diff --git a/libpcsxcore/new_dynarec/pcsxmem_inline.c b/libpcsxcore/new_dynarec/pcsxmem_inline.c index 305931ae3..02e7705e0 100644 --- a/libpcsxcore/new_dynarec/pcsxmem_inline.c +++ b/libpcsxcore/new_dynarec/pcsxmem_inline.c @@ -15,10 +15,10 @@ static int pcsx_direct_read(int type, u_int addr, int cc_adj, int cc, int rs, in case 0x1120: // rcnt2 count if (rt < 0) goto dont_care; if (cc < 0) return 0; - emit_readword((int)&rcnts[2].mode, HOST_TEMPREG); - emit_readword((int)&rcnts[2].cycleStart, rt); + emit_readword(&rcnts[2].mode, HOST_TEMPREG); + emit_readword(&rcnts[2].cycleStart, rt); emit_testimm(HOST_TEMPREG, 0x200); - emit_readword((int)&last_count, HOST_TEMPREG); + emit_readword(&last_count, HOST_TEMPREG); emit_sub(HOST_TEMPREG, rt, HOST_TEMPREG); emit_add(HOST_TEMPREG, cc, HOST_TEMPREG); if (cc_adj) @@ -31,9 +31,9 @@ static int pcsx_direct_read(int type, u_int addr, int cc_adj, int cc, int rs, in case 0x1124: // rcnt mode if (rt < 0) return 0; t = (addr >> 4) & 3; - emit_readword((int)&rcnts[t].mode, rt); + emit_readword(&rcnts[t].mode, rt); emit_andimm(rt, ~0x1800, HOST_TEMPREG); - emit_writeword(HOST_TEMPREG, (int)&rcnts[t].mode); + emit_writeword(HOST_TEMPREG, &rcnts[t].mode); mov_loadtype_adj(type, rt, rt); goto hit; } From 9c45ca93727acdf3053a9fefaa1d5773d219133e Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 2 Nov 2021 00:42:09 +0200 Subject: [PATCH 051/597] drc: remove some leftover n64-only stuff quite sure some more is left, but it's not easy to separate out --- libpcsxcore/new_dynarec/assem_arm.c | 374 +--------------- libpcsxcore/new_dynarec/new_dynarec.c | 604 ++------------------------ 2 files changed, 49 insertions(+), 929 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 36a3e45ac..23b47badf 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -1361,30 +1361,6 @@ static void emit_rorimm(int rs,u_int imm,int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7)); } -static void emit_shldimm(int rs,int rs2,u_int imm,int rt) -{ - assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm); - assert(imm>0); - assert(imm<32); - //if(imm==1) ... - assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7)); - assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm); - output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7)); -} - -static void emit_shrdimm(int rs,int rs2,u_int imm,int rt) -{ - assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm); - assert(imm>0); - assert(imm<32); - //if(imm==1) ... - assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7)); - assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm); - output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7)); -} - static void emit_signextend16(int rs,int rt) { #ifndef HAVE_ARMV6 @@ -1507,12 +1483,6 @@ static void emit_cmovs_imm(int imm,int rt) output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval); } -static void emit_cmove_reg(int rs,int rt) -{ - assem_debug("moveq %s,%s\n",regname[rt],regname[rs]); - output_w32(0x01a00000|rd_rn_rm(rt,0,rs)); -} - static void emit_cmovne_reg(int rs,int rt) { assem_debug("movne %s,%s\n",regname[rt],regname[rs]); @@ -1841,28 +1811,6 @@ static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt) output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2)); } -static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt) -{ - if(map<0) emit_readword_indexed(addr, rs, rt); - else { - assert(addr==0); - emit_readword_dualindexedx4(rs, map, rt); - } -} - -static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl) -{ - if(map<0) { - if(rh>=0) emit_readword_indexed(addr, rs, rh); - emit_readword_indexed(addr+4, rs, rl); - }else{ - assert(rh!=rs); - if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh); - emit_addimm(map,1,map); - emit_readword_indexed_tlb(addr, rs, map, rl); - } -} - static void emit_movsbl_indexed(int offset, int rs, int rt) { assert(offset>-256&&offset<256); @@ -1874,23 +1822,6 @@ static void emit_movsbl_indexed(int offset, int rs, int rt) } } -static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt) -{ - if(map<0) emit_movsbl_indexed(addr, rs, rt); - else { - if(addr==0) { - emit_shlimm(map,2,map); - assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]); - output_w32(0xe19000d0|rd_rn_rm(rt,rs,map)); - }else{ - assert(addr>-256&&addr<256); - assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]); - output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7)); - emit_movsbl_indexed(addr, rt, rt); - } - } -} - static void emit_movswl_indexed(int offset, int rs, int rt) { assert(offset>-256&&offset<256); @@ -1913,26 +1844,6 @@ static void emit_movzbl_indexed(int offset, int rs, int rt) } } -static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt) -{ - assert(rs2>=0); - assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100); -} - -static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt) -{ - if(map<0) emit_movzbl_indexed(addr, rs, rt); - else { - if(addr==0) { - emit_movzbl_dualindexedx4(rs, map, rt); - }else{ - emit_addimm(rs,addr,rt); - emit_movzbl_dualindexedx4(rt, map, rt); - } - } -} - static void emit_movzwl_indexed(int offset, int rs, int rt) { assert(offset>-256&&offset<256); @@ -1963,38 +1874,6 @@ static void emit_readword(void *addr, int rt) output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset); } -static unused void emit_movsbl(int addr, int rt) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<256); - assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset); - output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); -} - -static unused void emit_movswl(int addr, int rt) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<256); - assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset); - output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); -} - -static unused void emit_movzbl(int addr, int rt) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<4096); - assem_debug("ldrb %s,fp+%d\n",regname[rt],offset); - output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset); -} - -static unused void emit_movzwl(int addr, int rt) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<256); - assem_debug("ldrh %s,fp+%d\n",regname[rt],offset); - output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); -} - static void emit_writeword_indexed(int rt, int offset, int rs) { assert(offset>-4096&&offset<4096); @@ -2006,38 +1885,6 @@ static void emit_writeword_indexed(int rt, int offset, int rs) } } -static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2) -{ - assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100); -} - -static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp) -{ - if(map<0) emit_writeword_indexed(rt, addr, rs); - else { - assert(addr==0); - emit_writeword_dualindexedx4(rt, rs, map); - } -} - -static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp) -{ - if(map<0) { - if(rh>=0) emit_writeword_indexed(rh, addr, rs); - emit_writeword_indexed(rl, addr+4, rs); - }else{ - assert(rh>=0); - if(temp!=rs) emit_addimm(map,1,temp); - emit_writeword_indexed_tlb(rh, addr, rs, map, temp); - if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp); - else { - emit_addimm(rs,4,rs); - emit_writeword_indexed_tlb(rl, addr, rs, map, temp); - } - } -} - static void emit_writehword_indexed(int rt, int offset, int rs) { assert(offset>-256&&offset<256); @@ -2060,26 +1907,6 @@ static void emit_writebyte_indexed(int rt, int offset, int rs) } } -static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2) -{ - assert(rs2>=0); - assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100); -} - -static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp) -{ - if(map<0) emit_writebyte_indexed(rt, addr, rs); - else { - if(addr==0) { - emit_writebyte_dualindexedx4(rt, rs, map); - }else{ - emit_addimm(rs,addr,temp); - emit_writebyte_dualindexedx4(rt, temp, map); - } - } -} - static void emit_strcc_dualindexed(int rs1, int rs2, int rt) { assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); @@ -2106,22 +1933,6 @@ static void emit_writeword(int rt, void *addr) output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset); } -static unused void emit_writehword(int rt, void *addr) -{ - uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; - assert(offset<256); - assem_debug("strh %s,fp+%d\n",regname[rt],offset); - output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); -} - -static unused void emit_writebyte(int rt, void *addr) -{ - uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; - assert(offset<4096); - assem_debug("strb %s,fp+%d\n",regname[rt],offset); - output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset); -} - static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) { assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); @@ -2182,54 +1993,18 @@ static void emit_negsmi(int rs, int rt) output_w32(0x42700000|rd_rn_rm(rt,rs,0)); } -static void emit_orreq(u_int rs1,u_int rs2,u_int rt) -{ - assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_orrne(u_int rs1,u_int rs2,u_int rt) -{ - assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2)); -} - static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) { assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); } -static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); -} - -static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); -} - static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) { assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); } -static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); -} - -static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); -} - static void emit_teq(int rs, int rt) { assem_debug("teq %s,%s\n",regname[rs],regname[rt]); @@ -2326,46 +2101,6 @@ static void emit_ldreq_indexed(int rs, u_int offset, int rt) output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset); } -static unused void emit_bicne_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval); -} - -static unused void emit_biccs_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval); -} - -static unused void emit_bicvc_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval); -} - -static unused void emit_bichi_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval); -} - -static unused void emit_orrvs_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval); -} - static void emit_orrne_imm(int rs,int imm,int rt) { u_int armval; @@ -3359,13 +3094,12 @@ static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) static void loadlr_assemble_arm(int i,struct regstat *i_regs) { - int s,th,tl,temp,temp2,addr,map=-1; + int s,tl,temp,temp2,addr; int offset; void *jaddr=0; int memtarget=0,c=0; int fastload_reg_override=0; u_int hr,reglist=0; - th=get_reg(i_regs->regmap,rt1[i]|64); tl=get_reg(i_regs->regmap,rt1[i]); s=get_reg(i_regs->regmap,rs1[i]); temp=get_reg(i_regs->regmap,-1); @@ -3386,10 +3120,6 @@ static void loadlr_assemble_arm(int i,struct regstat *i_regs) } } if(!c) { - #ifdef RAM_OFFSET - map=get_reg(i_regs->regmap,ROREG); - if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); - #endif emit_shlimm(addr,3,temp); if (opcode[i]==0x22||opcode[i]==0x26) { emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR @@ -3413,8 +3143,7 @@ static void loadlr_assemble_arm(int i,struct regstat *i_regs) if(!c||memtarget) { int a=temp2; if(fastload_reg_override) a=fastload_reg_override; - //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2); - emit_readword_indexed_tlb(0,a,map,temp2); + emit_readword_indexed(0,a,temp2); if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist); } else @@ -3441,47 +3170,7 @@ static void loadlr_assemble_arm(int i,struct regstat *i_regs) //emit_storereg(rt1[i],tl); // DEBUG } if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR - // FIXME: little endian, fastload_reg_override - int temp2h=get_reg(i_regs->regmap,FTEMP|64); - if(!c||memtarget) { - //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h); - //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2); - emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2); - if(jaddr) add_stub_r(LOADD_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist); - } - else - inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist); - if(rt1[i]) { - assert(th>=0); - assert(tl>=0); - emit_testimm(temp,32); - emit_andimm(temp,24,temp); - if (opcode[i]==0x1A) { // LDL - emit_rsbimm(temp,32,HOST_TEMPREG); - emit_shl(temp2h,temp,temp2h); - emit_orrshr(temp2,HOST_TEMPREG,temp2h); - emit_movimm(-1,HOST_TEMPREG); - emit_shl(temp2,temp,temp2); - emit_cmove_reg(temp2h,th); - emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl); - emit_bicne_lsl(th,HOST_TEMPREG,temp,th); - emit_orreq(temp2,tl,tl); - emit_orrne(temp2,th,th); - } - if (opcode[i]==0x1B) { // LDR - emit_xorimm(temp,24,temp); - emit_rsbimm(temp,32,HOST_TEMPREG); - emit_shr(temp2,temp,temp2); - emit_orrshl(temp2h,HOST_TEMPREG,temp2); - emit_movimm(-1,HOST_TEMPREG); - emit_shr(temp2h,temp,temp2h); - emit_cmovne_reg(temp2,tl); - emit_bicne_lsr(th,HOST_TEMPREG,temp,th); - emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl); - emit_orrne(temp2h,th,th); - emit_orreq(temp2h,tl,tl); - } - } + assert(0); } } #define loadlr_assemble loadlr_assemble_arm @@ -4111,63 +3800,6 @@ static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int } } - -/* using strd could possibly help but you'd have to allocate registers in pairs -static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu) -{ - int hr; - int wrote=-1; - for(hr=HOST_REGS-1;hr>=0;hr--) { - if(hr!=EXCLUDE_REG) { - if(pre[hr]!=entry[hr]) { - if(pre[hr]>=0) { - if((dirty>>hr)&1) { - if(get_reg(entry,pre[hr])<0) { - if(pre[hr]<64) { - if(!((u>>pre[hr])&1)) { - if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) { - if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) { - emit_sarimm(hr,31,hr+1); - emit_strdreg(pre[hr],hr); - } - else - emit_storereg(pre[hr],hr); - }else{ - emit_storereg(pre[hr],hr); - if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) { - emit_sarimm(hr,31,hr); - emit_storereg(pre[hr]|64,hr); - } - } - } - }else{ - if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) { - emit_storereg(pre[hr],hr); - } - } - wrote=hr; - } - } - } - } - } - } - for(hr=0;hr=0) { - int nr; - if((nr=get_reg(entry,pre[hr]))>=0) { - emit_mov(hr,nr); - } - } - } - } - } -} -#define wb_invalidate wb_invalidate_arm -*/ - static void mark_clear_cache(void *target) { u_long offset = (u_char *)target - translation_cache; diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 78e53d45e..61dac47a7 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -213,7 +213,7 @@ struct link_entry #define CCREG 36 // Cycle count #define INVCP 37 // Pointer to invalid_code //#define MMREG 38 // Pointer to memory_map -#define ROREG 39 // ram offset (if rdram!=0x80000000) +//#define ROREG 39 // ram offset (if rdram!=0x80000000) #define TEMPREG 40 #define FTEMP 40 // FPU temporary register #define PTEMP 41 // Prefetch temporary register @@ -1218,44 +1218,19 @@ void shiftimm_alloc(struct regstat *current,int i) if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA { - if(rt1[i]) { - if(rs1[i]) alloc_reg64(current,i,rs1[i]); - alloc_reg64(current,i,rt1[i]); - current->is32&=~(1LL<is32&=~(1LL<is32&=~(1LL<is32|=1LL<is32|=1LL<>31; - sum^=((u_int *)rdram)[i]; - } - return sum; -} -int rchecksum() -{ - int i; - int sum=0; - for(i=0;i<64;i++) - sum^=((u_int *)reg)[i]; - return sum; -} void rlist() { int i; @@ -2484,100 +2439,19 @@ void shiftimm_assemble(int i,struct regstat *i_regs) } if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA { - if(rt1[i]) { - signed char sh,sl,th,tl; - th=get_reg(i_regs->regmap,rt1[i]|64); - tl=get_reg(i_regs->regmap,rt1[i]); - sh=get_reg(i_regs->regmap,rs1[i]|64); - sl=get_reg(i_regs->regmap,rs1[i]); - if(tl>=0) { - if(rs1[i]==0) - { - emit_zeroreg(tl); - if(th>=0) emit_zeroreg(th); - } - else - { - assert(sl>=0); - assert(sh>=0); - if(imm[i]) { - if(opcode2[i]==0x38) // DSLL - { - if(th>=0) emit_shldimm(sh,sl,imm[i],th); - emit_shlimm(sl,imm[i],tl); - } - if(opcode2[i]==0x3a) // DSRL - { - emit_shrdimm(sl,sh,imm[i],tl); - if(th>=0) emit_shrimm(sh,imm[i],th); - } - if(opcode2[i]==0x3b) // DSRA - { - emit_shrdimm(sl,sh,imm[i],tl); - if(th>=0) emit_sarimm(sh,imm[i],th); - } - }else{ - // Shift by zero - if(sl!=tl) emit_mov(sl,tl); - if(th>=0&&sh!=th) emit_mov(sh,th); - } - } - } - } + assert(0); } if(opcode2[i]==0x3c) // DSLL32 { - if(rt1[i]) { - signed char sl,tl,th; - tl=get_reg(i_regs->regmap,rt1[i]); - th=get_reg(i_regs->regmap,rt1[i]|64); - sl=get_reg(i_regs->regmap,rs1[i]); - if(th>=0||tl>=0){ - assert(tl>=0); - assert(th>=0); - assert(sl>=0); - emit_mov(sl,th); - emit_zeroreg(tl); - if(imm[i]>32) - { - emit_shlimm(th,imm[i]&31,th); - } - } - } + assert(0); } if(opcode2[i]==0x3e) // DSRL32 { - if(rt1[i]) { - signed char sh,tl,th; - tl=get_reg(i_regs->regmap,rt1[i]); - th=get_reg(i_regs->regmap,rt1[i]|64); - sh=get_reg(i_regs->regmap,rs1[i]|64); - if(tl>=0){ - assert(sh>=0); - emit_mov(sh,tl); - if(th>=0) emit_zeroreg(th); - if(imm[i]>32) - { - emit_shrimm(tl,imm[i]&31,tl); - } - } - } + assert(0); } if(opcode2[i]==0x3f) // DSRA32 { - if(rt1[i]) { - signed char sh,tl; - tl=get_reg(i_regs->regmap,rt1[i]); - sh=get_reg(i_regs->regmap,rs1[i]|64); - if(tl>=0){ - assert(sh>=0); - emit_mov(sh,tl); - if(imm[i]>32) - { - emit_sarimm(tl,imm[i]&31,tl); - } - } - } + assert(0); } } @@ -2591,7 +2465,7 @@ void shift_assemble(int i,struct regstat *i_regs) void load_assemble(int i,struct regstat *i_regs) { - int s,th,tl,addr,map=-1; + int s,th,tl,addr; int offset; void *jaddr=0; int memtarget=0,c=0; @@ -2632,10 +2506,6 @@ void load_assemble(int i,struct regstat *i_regs) reglist&=~(1<=0) reglist&=~(1<regmap,ROREG); - if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); - #endif #ifdef R29_HACK // Strmnnrmn's speed hack if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) @@ -2652,24 +2522,12 @@ void load_assemble(int i,struct regstat *i_regs) if (opcode[i]==0x20) { // LB if(!c||memtarget) { if(!dummy) { - #ifdef HOST_IMM_ADDR32 - if(c) - emit_movsbl_tlb((constmap[i][s]+offset)^3,map,tl); - else - #endif { - //emit_xorimm(addr,3,tl); - //emit_movsbl_indexed(rdram-0x80000000,tl,tl); int x=0,a=tl; -#ifdef BIG_ENDIAN_MIPS - if(!c) emit_xorimm(addr,3,tl); - else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset); -#else if(!c) a=addr; -#endif if(fastload_reg_override) a=fastload_reg_override; - emit_movsbl_indexed_tlb(x,a,map,tl); + emit_movsbl_indexed(x,a,tl); } } if(jaddr) @@ -2681,33 +2539,10 @@ void load_assemble(int i,struct regstat *i_regs) if (opcode[i]==0x21) { // LH if(!c||memtarget) { if(!dummy) { - #ifdef HOST_IMM_ADDR32 - if(c) - emit_movswl_tlb((constmap[i][s]+offset)^2,map,tl); - else - #endif - { - int x=0,a=tl; -#ifdef BIG_ENDIAN_MIPS - if(!c) emit_xorimm(addr,2,tl); - else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset); -#else - if(!c) a=addr; -#endif - if(fastload_reg_override) a=fastload_reg_override; - //#ifdef - //emit_movswl_indexed_tlb(x,tl,map,tl); - //else - if(map>=0) { - emit_movswl_indexed(x,a,tl); - }else{ - #if 1 //def RAM_OFFSET - emit_movswl_indexed(x,a,tl); - #else - emit_movswl_indexed(rdram-0x80000000+x,a,tl); - #endif - } - } + int x=0,a=tl; + if(!c) a=addr; + if(fastload_reg_override) a=fastload_reg_override; + emit_movswl_indexed(x,a,tl); } if(jaddr) add_stub_r(LOADH_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); @@ -2720,13 +2555,7 @@ void load_assemble(int i,struct regstat *i_regs) if(!dummy) { int a=addr; if(fastload_reg_override) a=fastload_reg_override; - //emit_readword_indexed(rdram-0x80000000,addr,tl); - #ifdef HOST_IMM_ADDR32 - if(c) - emit_readword_tlb(constmap[i][s]+offset,map,tl); - else - #endif - emit_readword_indexed_tlb(0,a,map,tl); + emit_readword_indexed(0,a,tl); } if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); @@ -2737,25 +2566,11 @@ void load_assemble(int i,struct regstat *i_regs) if (opcode[i]==0x24) { // LBU if(!c||memtarget) { if(!dummy) { - #ifdef HOST_IMM_ADDR32 - if(c) - emit_movzbl_tlb((constmap[i][s]+offset)^3,map,tl); - else - #endif - { - //emit_xorimm(addr,3,tl); - //emit_movzbl_indexed(rdram-0x80000000,tl,tl); - int x=0,a=tl; -#ifdef BIG_ENDIAN_MIPS - if(!c) emit_xorimm(addr,3,tl); - else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset); -#else - if(!c) a=addr; -#endif - if(fastload_reg_override) a=fastload_reg_override; + int x=0,a=tl; + if(!c) a=addr; + if(fastload_reg_override) a=fastload_reg_override; - emit_movzbl_indexed_tlb(x,a,map,tl); - } + emit_movzbl_indexed(x,a,tl); } if(jaddr) add_stub_r(LOADBU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); @@ -2766,33 +2581,10 @@ void load_assemble(int i,struct regstat *i_regs) if (opcode[i]==0x25) { // LHU if(!c||memtarget) { if(!dummy) { - #ifdef HOST_IMM_ADDR32 - if(c) - emit_movzwl_tlb((constmap[i][s]+offset)^2,map,tl); - else - #endif - { - int x=0,a=tl; -#ifdef BIG_ENDIAN_MIPS - if(!c) emit_xorimm(addr,2,tl); - else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset); -#else - if(!c) a=addr; -#endif - if(fastload_reg_override) a=fastload_reg_override; - //#ifdef - //emit_movzwl_indexed_tlb(x,tl,map,tl); - //#else - if(map>=0) { - emit_movzwl_indexed(x,a,tl); - }else{ - #if 1 //def RAM_OFFSET - emit_movzwl_indexed(x,a,tl); - #else - emit_movzwl_indexed(rdram-0x80000000+x,a,tl); - #endif - } - } + int x=0,a=tl; + if(!c) a=addr; + if(fastload_reg_override) a=fastload_reg_override; + emit_movzwl_indexed(x,a,tl); } if(jaddr) add_stub_r(LOADHU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); @@ -2806,13 +2598,7 @@ void load_assemble(int i,struct regstat *i_regs) if(!dummy) { int a=addr; if(fastload_reg_override) a=fastload_reg_override; - //emit_readword_indexed(rdram-0x80000000,addr,tl); - #ifdef HOST_IMM_ADDR32 - if(c) - emit_readword_tlb(constmap[i][s]+offset,map,tl); - else - #endif - emit_readword_indexed_tlb(0,a,map,tl); + emit_readword_indexed(0,a,tl); } if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); @@ -2823,24 +2609,7 @@ void load_assemble(int i,struct regstat *i_regs) emit_zeroreg(th); } if (opcode[i]==0x37) { // LD - if(!c||memtarget) { - if(!dummy) { - int a=addr; - if(fastload_reg_override) a=fastload_reg_override; - //if(th>=0) emit_readword_indexed(rdram-0x80000000,addr,th); - //emit_readword_indexed(rdram-0x7FFFFFFC,addr,tl); - #ifdef HOST_IMM_ADDR32 - if(c) - emit_readdword_tlb(constmap[i][s]+offset,map,th,tl); - else - #endif - emit_readdword_indexed_tlb(0,a,map,th,tl); - } - if(jaddr) - add_stub_r(LOADD_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); - } - else - inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); + assert(0); } } } @@ -2855,7 +2624,7 @@ void loadlr_assemble(int i,struct regstat *i_regs) void store_assemble(int i,struct regstat *i_regs) { - int s,th,tl,map=-1; + int s,tl; int addr,temp; int offset; void *jaddr=0; @@ -2864,7 +2633,6 @@ void store_assemble(int i,struct regstat *i_regs) int agr=AGEN1+(i&1); int faststore_reg_override=0; u_int hr,reglist=0; - th=get_reg(i_regs->regmap,rs2[i]|64); tl=get_reg(i_regs->regmap,rs2[i]); s=get_reg(i_regs->regmap,rs1[i]); temp=get_reg(i_regs->regmap,agr); @@ -2895,36 +2663,18 @@ void store_assemble(int i,struct regstat *i_regs) if (opcode[i]==0x28) { // SB if(!c||memtarget) { int x=0,a=temp; -#ifdef BIG_ENDIAN_MIPS - if(!c) emit_xorimm(addr,3,temp); - else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset); -#else if(!c) a=addr; -#endif if(faststore_reg_override) a=faststore_reg_override; - //emit_writebyte_indexed(tl,rdram-0x80000000,temp); - emit_writebyte_indexed_tlb(tl,x,a,map,a); + emit_writebyte_indexed(tl,x,a); } type=STOREB_STUB; } if (opcode[i]==0x29) { // SH if(!c||memtarget) { int x=0,a=temp; -#ifdef BIG_ENDIAN_MIPS - if(!c) emit_xorimm(addr,2,temp); - else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset); -#else if(!c) a=addr; -#endif if(faststore_reg_override) a=faststore_reg_override; - //#ifdef - //emit_writehword_indexed_tlb(tl,x,temp,map,temp); - //#else - if(map>=0) { - emit_writehword_indexed(tl,x,a); - }else - //emit_writehword_indexed(tl,rdram-0x80000000+x,a); - emit_writehword_indexed(tl,x,a); + emit_writehword_indexed(tl,x,a); } type=STOREH_STUB; } @@ -2932,27 +2682,12 @@ void store_assemble(int i,struct regstat *i_regs) if(!c||memtarget) { int a=addr; if(faststore_reg_override) a=faststore_reg_override; - //emit_writeword_indexed(tl,rdram-0x80000000,addr); - emit_writeword_indexed_tlb(tl,0,a,map,temp); + emit_writeword_indexed(tl,0,a); } type=STOREW_STUB; } if (opcode[i]==0x3F) { // SD - if(!c||memtarget) { - int a=addr; - if(faststore_reg_override) a=faststore_reg_override; - if(rs2[i]) { - assert(th>=0); - //emit_writeword_indexed(th,rdram-0x80000000,addr); - //emit_writeword_indexed(tl,rdram-0x7FFFFFFC,addr); - emit_writedword_indexed_tlb(th,tl,0,a,map,temp); - }else{ - // Store zero - //emit_writeword_indexed(tl,rdram-0x80000000,temp); - //emit_writeword_indexed(tl,rdram-0x7FFFFFFC,temp); - emit_writedword_indexed_tlb(tl,tl,0,a,map,temp); - } - } + assert(0); type=STORED_STUB; } if(jaddr) { @@ -3007,9 +2742,8 @@ void store_assemble(int i,struct regstat *i_regs) void storelr_assemble(int i,struct regstat *i_regs) { - int s,th,tl; + int s,tl; int temp; - int temp2=-1; int offset; void *jaddr=0; void *case1, *case2, *case3; @@ -3017,7 +2751,6 @@ void storelr_assemble(int i,struct regstat *i_regs) int memtarget=0,c=0; int agr=AGEN1+(i&1); u_int hr,reglist=0; - th=get_reg(i_regs->regmap,rs2[i]|64); tl=get_reg(i_regs->regmap,rs2[i]); s=get_reg(i_regs->regmap,rs1[i]); temp=get_reg(i_regs->regmap,agr); @@ -3047,22 +2780,13 @@ void storelr_assemble(int i,struct regstat *i_regs) emit_jmp(0); } } - #ifdef RAM_OFFSET - int map=get_reg(i_regs->regmap,ROREG); - if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); - #else - if((u_int)rdram!=0x80000000) - emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp); - #endif + emit_addimm_no_flags(ram_offset,temp); if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR - temp2=get_reg(i_regs->regmap,FTEMP); - if(!rs2[i]) temp2=th=tl; + assert(0); } -#ifndef BIG_ENDIAN_MIPS - emit_xorimm(temp,3,temp); -#endif + emit_xorimm(temp,3,temp); emit_testimm(temp,2); case2=out; emit_jne(0); @@ -3077,12 +2801,10 @@ void storelr_assemble(int i,struct regstat *i_regs) emit_writebyte_indexed(tl,3,temp); } if (opcode[i]==0x2C) { // SDL - emit_writeword_indexed(th,0,temp); - if(rs2[i]) emit_mov(tl,temp2); + assert(0); } if (opcode[i]==0x2D) { // SDR - emit_writebyte_indexed(tl,3,temp); - if(rs2[i]) emit_shldimm(th,tl,24,temp2); + assert(0); } done0=out; emit_jmp(0); @@ -3101,18 +2823,10 @@ void storelr_assemble(int i,struct regstat *i_regs) emit_writehword_indexed(tl,1,temp); } if (opcode[i]==0x2C) { // SDL - if(rs2[i]) emit_shrdimm(tl,th,8,temp2); - // Write 3 msb into three least significant bytes - if(rs2[i]) emit_rorimm(th,8,th); - emit_writehword_indexed(th,-1,temp); - if(rs2[i]) emit_rorimm(th,16,th); - emit_writebyte_indexed(th,1,temp); - if(rs2[i]) emit_rorimm(th,8,th); + assert(0); } if (opcode[i]==0x2D) { // SDR - if(rs2[i]) emit_shldimm(th,tl,16,temp2); - // Write two lsb into two most significant bytes - emit_writehword_indexed(tl,1,temp); + assert(0); } done1=out; emit_jmp(0); @@ -3135,19 +2849,10 @@ void storelr_assemble(int i,struct regstat *i_regs) if(rs2[i]) emit_rorimm(tl,24,tl); } if (opcode[i]==0x2C) { // SDL - if(rs2[i]) emit_shrdimm(tl,th,16,temp2); - // Write two msb into two least significant bytes - if(rs2[i]) emit_rorimm(th,16,th); - emit_writehword_indexed(th,-2,temp); - if(rs2[i]) emit_rorimm(th,16,th); + assert(0); } if (opcode[i]==0x2D) { // SDR - if(rs2[i]) emit_shldimm(th,tl,8,temp2); - // Write 3 lsb into three most significant bytes - emit_writebyte_indexed(tl,-1,temp); - if(rs2[i]) emit_rorimm(tl,8,tl); - emit_writehword_indexed(tl,0,temp); - if(rs2[i]) emit_rorimm(tl,24,tl); + assert(0); } done2=out; emit_jmp(0); @@ -3164,46 +2869,24 @@ void storelr_assemble(int i,struct regstat *i_regs) emit_writeword_indexed(tl,-3,temp); } if (opcode[i]==0x2C) { // SDL - if(rs2[i]) emit_shrdimm(tl,th,24,temp2); - // Write msb into least significant byte - if(rs2[i]) emit_rorimm(th,24,th); - emit_writebyte_indexed(th,-3,temp); - if(rs2[i]) emit_rorimm(th,8,th); + assert(0); } if (opcode[i]==0x2D) { // SDR - if(rs2[i]) emit_mov(th,temp2); - // Write entire word - emit_writeword_indexed(tl,-3,temp); + assert(0); } set_jump_target(done0, out); set_jump_target(done1, out); set_jump_target(done2, out); if (opcode[i]==0x2C) { // SDL - emit_testimm(temp,4); - done0=out; - emit_jne(0); - emit_andimm(temp,~3,temp); - emit_writeword_indexed(temp2,4,temp); - set_jump_target(done0, out); + assert(0); } if (opcode[i]==0x2D) { // SDR - emit_testimm(temp,4); - done0=out; - emit_jeq(0); - emit_andimm(temp,~3,temp); - emit_writeword_indexed(temp2,-4,temp); - set_jump_target(done0, out); + assert(0); } if(!c||!memtarget) add_stub_r(STORELR_STUB,jaddr,out,i,temp,i_regs,ccadj[i],reglist); if(!(i_regs->waswritten&(1<regmap,ROREG); - if(map<0) map=HOST_TEMPREG; - gen_orig_addr_w(temp,map); - #else - emit_addimm_no_flags((u_int)0x80000000-(u_int)rdram,temp); - #endif + emit_addimm_no_flags(-ram_offset,temp); #if defined(HOST_IMM8) int ir=get_reg(i_regs->regmap,INVCP); assert(ir>=0); @@ -3283,10 +2966,6 @@ void c2ls_assemble(int i,struct regstat *i_regs) fastio_reg_override=HOST_TEMPREG; } if (opcode[i]==0x32) { // LWC2 - #ifdef HOST_IMM_ADDR32 - if(c) emit_readword_tlb(constmap[i][s]+offset,-1,tl); - else - #endif int a=ar; if(fastio_reg_override) a=fastio_reg_override; emit_readword_indexed(0,a,tl); @@ -3667,9 +3346,6 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) }else if (opcode[i]==0x1a||opcode[i]==0x1b) { emit_movimm((constmap[i][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR }else{ - #ifdef HOST_IMM_ADDR32 - if((itype[i]!=LOAD&&(opcode[i]&0x3b)!=0x31&&(opcode[i]&0x3b)!=0x32)) // LWC1/LDC1/LWC2/LDC2 - #endif emit_movimm(constmap[i][rs]+offset,ra); regs[i].loadedconst|=1< %p\n", addr, out); //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr); //if(debug) - //printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum()); //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29); // this is just for speculation @@ -8062,9 +7734,7 @@ int new_recompile_block(int addr) #ifdef USE_MINI_HT if(rs1[i]==31) { // JALR alloc_reg(¤t,i,RHASH); - #ifndef HOST_IMM_ADDR32 alloc_reg(¤t,i,RHTBL); - #endif } #endif delayslot_alloc(¤t,i+1); @@ -8459,9 +8129,7 @@ int new_recompile_block(int addr) #ifdef USE_MINI_HT if(rs1[i-1]==31) { // JALR alloc_reg(&branch_regs[i-1],i-1,RHASH); - #ifndef HOST_IMM_ADDR32 alloc_reg(&branch_regs[i-1],i-1,RHTBL); - #endif } #endif memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); @@ -9372,186 +9040,6 @@ int new_recompile_block(int addr) } } - // Cache memory offset or tlb map pointer if a register is available - #ifndef HOST_IMM_ADDR32 - #ifndef RAM_OFFSET - if(0) - #endif - { - int earliest_available[HOST_REGS]; - int loop_start[HOST_REGS]; - int score[HOST_REGS]; - int end[HOST_REGS]; - int reg=ROREG; - - // Init - for(hr=0;hr=0) { - score[hr]=0;earliest_available[hr]=i+1; - loop_start[hr]=MAXBLOCK; - } - if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) { - if(branch_regs[i].regmap[hr]>=0) { - score[hr]=0;earliest_available[hr]=i+2; - loop_start[hr]=MAXBLOCK; - } - } - } - // No register allocations after unconditional jumps - if(itype[i]==UJUMP||itype[i]==RJUMP||(source[i]>>16)==0x1000) - { - for(hr=0;hr=0) break; - if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) { - if(branch_regs[j].regmap[hr]>=0) break; - if(ooo[j]) { - if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) break; - }else{ - if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) break; - } - } - else if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) break; - if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) { - int t=(ba[j]-start)>>2; - if(t=earliest_available[hr]) { - if(t==1||(t>1&&itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||(t>1&&rt1[t-2]!=31)) { // call/ret assumes no registers allocated - // Score a point for hoisting loop invariant - if(t>16)==0x1000) - { - // Stop on unconditional branch - break; - } - else - if(itype[j]==LOAD||itype[j]==LOADLR|| - itype[j]==STORE||itype[j]==STORELR||itype[j]==C1LS) { - score[hr]++; - end[hr]=j; - } - } - } - } - // Find highest score and allocate that register - int maxscore=0; - for(hr=0;hrscore[maxscore]) { - maxscore=hr; - //printf("highest score: %d %d (%x->%x)\n",score[hr],hr,start+i*4,start+end[hr]*4); - } - } - } - if(score[maxscore]>1) - { - if(i=0) {printf("oops: %x %x was %d=%d\n",loop_start[maxscore]*4+start,j*4+start,maxscore,regs[j].regmap[maxscore]);} - assert(regs[j].regmap[maxscore]<0); - if(j>loop_start[maxscore]) regs[j].regmap_entry[maxscore]=reg; - regs[j].regmap[maxscore]=reg; - regs[j].dirty&=~(1<>16)!=0x1000) { - regmap_pre[j+2][maxscore]=reg; - regs[j+2].wasdirty&=~(1<>2; - if(t==loop_start[maxscore]) { - if(t==1||(t>1&&itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||(t>1&&rt1[t-2]!=31)) // call/ret assumes no registers allocated - regs[t].regmap_entry[maxscore]=reg; - } - } - else - { - if(j<1||(itype[j-1]!=RJUMP&&itype[j-1]!=UJUMP&&itype[j-1]!=CJUMP&&itype[j-1]!=SJUMP&&itype[j-1]!=FJUMP)) { - regmap_pre[j+1][maxscore]=reg; - regs[j+1].wasdirty&=~(1< Date: Tue, 2 Nov 2021 00:44:56 +0200 Subject: [PATCH 052/597] drc: rework for 64bit, part 4 --- libpcsxcore/new_dynarec/assem_arm.c | 6 ++-- libpcsxcore/new_dynarec/emu_if.h | 5 +-- libpcsxcore/new_dynarec/new_dynarec.c | 47 +++++++++++++-------------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 23b47badf..0f280592f 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -296,7 +296,7 @@ static int isclean(void *addr) } // get source that block at addr was compiled from (host pointers) -static void get_bounds(void *addr,u_int *start,u_int *end) +static void get_bounds(void *addr, u_char **start, u_char **end) { u_int *ptr = addr; #ifndef HAVE_ARMV7 @@ -325,8 +325,8 @@ static void get_bounds(void *addr,u_int *start,u_int *end) #endif if((*ptr&0xFF000000)!=0xeb000000) ptr++; assert((*ptr&0xFF000000)==0xeb000000); // bl instruction - *start=source; - *end=source+len; + *start=(u_char *)source; + *end=(u_char *)source+len; } /* Register allocation */ diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index 17abab0bd..494ca7eff 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -86,6 +86,7 @@ extern void *zeromem_ptr; extern void *scratch_buf_ptr; // same as invalid_code, just a region for ram write checks (inclusive) +// (psx/guest address range) extern u32 inv_code_start, inv_code_end; /* cycles/irqs */ @@ -100,7 +101,7 @@ void pcsx_mtc0_ds(u32 reg, u32 val); extern void SysPrintf(const char *fmt, ...); #ifdef RAM_FIXED -#define rdram ((u_int)0x80000000) +#define rdram ((u_char *)0x80000000) #else -#define rdram ((u_int)psxM) +#define rdram ((u_char *)psxM) #endif diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 61dac47a7..0f56082cb 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -194,9 +194,9 @@ struct link_entry static int expirep; static u_int stop_after_jal; #ifndef RAM_FIXED - static u_int ram_offset; + static uintptr_t ram_offset; #else - static const u_int ram_offset=0; + static const uintptr_t ram_offset=0; #endif int new_dynarec_hacks; @@ -1020,14 +1020,14 @@ void invalidate_block(u_int block) head=jump_dirty[vpage]; //printf("page=%d vpage=%d\n",page,vpage); while(head!=NULL) { - u_int start,end; if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision - get_bounds(head->addr,&start,&end); - //printf("start: %x end: %x\n",start,end); - if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) { - if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) { - if((((start-(u_int)rdram)>>12)&2047)>12)&2047; - if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047; + u_char *start, *end; + get_bounds(head->addr, &start, &end); + //printf("start: %p end: %p\n", start, end); + if (page < 2048 && start >= rdram && end < rdram+RAM_SIZE) { + if (((start-rdram)>>12) <= page && ((end-1-rdram)>>12) >= page) { + if ((((start-rdram)>>12)&2047) < first) first = ((start-rdram)>>12)&2047; + if ((((end-1-rdram)>>12)&2047) > last) last = ((end-1-rdram)>>12)&2047; } } } @@ -1058,12 +1058,11 @@ void invalidate_addr(u_int addr) } for(;pg1<=page;pg1++) { for(head=jump_dirty[pg1];head!=NULL;head=head->next) { - u_int start,end; - get_bounds(head->addr,&start,&end); - if(ram_offset) { - start-=ram_offset; - end-=ram_offset; - } + u_char *start_h, *end_h; + u_int start, end; + get_bounds(head->addr, &start_h, &end_h); + start = (uintptr_t)start_h - ram_offset; + end = (uintptr_t)end_h - ram_offset; if(start<=addr_main&&addr_mainaddr_max) addr_max=end; @@ -1137,14 +1136,14 @@ void clean_blocks(u_int page) if(!invalid_code[head->vaddr>>12]) { // Don't restore blocks which are about to expire from the cache if (doesnt_expire_soon(head->addr)) { - u_int start,end; if(verify_dirty(head->addr)) { + u_char *start, *end; //printf("Possibly Restore %x (%p)\n",head->vaddr, head->addr); u_int i; u_int inv=0; - get_bounds(head->addr,&start,&end); - if(start-(u_int)rdram>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) { + get_bounds(head->addr, &start, &end); + if (start - rdram < RAM_SIZE) { + for (i = (start-rdram+0x80000000)>>12; i <= (end-1-rdram+0x80000000)>>12; i++) { inv|=invalid_code[i]; } } @@ -6671,7 +6670,7 @@ void new_dynarec_init() arch_init(); new_dynarec_test(); #ifndef RAM_FIXED - ram_offset=(u_int)rdram-0x80000000; + ram_offset=(uintptr_t)rdram-0x80000000; #endif if (ram_offset!=0) SysPrintf("warning: RAM is not directly mapped, performance will suffer\n"); @@ -6703,18 +6702,18 @@ static u_int *get_source_start(u_int addr, u_int *limit) (0xa0000000 <= addr && addr < 0xa0200000)) { // used for BIOS calls mostly? *limit = (addr&0xa0000000)|0x00200000; - return (u_int *)((u_int)rdram + (addr&0x1fffff)); + return (u_int *)(rdram + (addr&0x1fffff)); } else if (!Config.HLE && ( /* (0x9fc00000 <= addr && addr < 0x9fc80000) ||*/ (0xbfc00000 <= addr && addr < 0xbfc80000))) { // BIOS *limit = (addr & 0xfff00000) | 0x80000; - return (u_int *)((u_int)psxR + (addr&0x7ffff)); + return (u_int *)((u_char *)psxR + (addr&0x7ffff)); } else if (addr >= 0x80000000 && addr < 0x80000000+RAM_SIZE) { *limit = (addr & 0x80600000) + 0x00200000; - return (u_int *)((u_int)rdram + (addr&0x1fffff)); + return (u_int *)(rdram + (addr&0x1fffff)); } return NULL; } @@ -9651,7 +9650,7 @@ int new_recompile_block(int addr) // Align code if(((u_int)out)&7) emit_addnop(13); #endif - assert((u_int)out-(u_int)beginning Date: Tue, 2 Nov 2021 15:50:03 +0200 Subject: [PATCH 053/597] drc: remove yet more n64 stuff --- libpcsxcore/new_dynarec/assem_arm.c | 265 +---- libpcsxcore/new_dynarec/emu_if.h | 3 - libpcsxcore/new_dynarec/linkage_arm.S | 3 - libpcsxcore/new_dynarec/linkage_offsets.h | 3 - libpcsxcore/new_dynarec/new_dynarec.c | 1146 ++------------------- 5 files changed, 117 insertions(+), 1303 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 0f280592f..1d6600246 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -360,13 +360,8 @@ static void alloc_reg(struct regstat *cur,int i,signed char reg) return; } r=cur->regmap[preferred_reg]; - if(r<64&&((cur->u>>r)&1)) { - cur->regmap[preferred_reg]=reg; - cur->dirty&=~(1<isconst&=~(1<=64&&((cur->uu>>(r&63))&1)) { + assert(r < 64); + if((cur->u>>r)&1) { cur->regmap[preferred_reg]=reg; cur->dirty&=~(1<isconst&=~(1<regmap[hr]; if(r>=0) { - if(r<64) { - if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;} - } - else - { - if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;} - } + assert(r < 64); + if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;} } } // Try to allocate any available register, but prefer @@ -500,168 +490,8 @@ static void alloc_reg(struct regstat *cur,int i,signed char reg) static void alloc_reg64(struct regstat *cur,int i,signed char reg) { - int preferred_reg = 8+(reg&1); - int r,hr; - // allocate the lower 32 bits alloc_reg(cur,i,reg); - - // Don't allocate unused registers - if((cur->uu>>reg)&1) return; - - // see if the upper half is already allocated - for(hr=0;hrregmap[hr]==reg+64) return; - } - - // Keep the same mapping if the register was already allocated in a loop - preferred_reg = loop_reg(i,reg,preferred_reg); - - // Try to allocate the preferred register - if(cur->regmap[preferred_reg]==-1) { - cur->regmap[preferred_reg]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]; - if(r<64&&((cur->u>>r)&1)) { - cur->regmap[preferred_reg]=reg|64; - cur->dirty&=~(1<isconst&=~(1<=64&&((cur->uu>>(r&63))&1)) { - cur->regmap[preferred_reg]=reg|64; - cur->dirty&=~(1<isconst&=~(1<=0;hr--) - { - r=cur->regmap[hr]; - if(r>=0) { - if(r<64) { - if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;} - } - else - { - if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;} - } - } - } - // Try to allocate any available register, but prefer - // registers that have not been used recently. - if(i>0) { - for(hr=0;hrregmap[hr]==-1) { - if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==-1) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]); - //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); - if(i>0) { - // Don't evict the cycle count at entry points, otherwise the entry - // stub will have to write it. - if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; - for(j=10;j>=3;j--) - { - // Alloc preferred register if available - if(hsn[r=cur->regmap[preferred_reg]&63]==j) { - for(hr=0;hrregmap[hr]&63)==r) { - cur->regmap[hr]=-1; - cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]=reg|64; - return; - } - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<=0;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]; if(r>=0) { - if(r<64) { - if((cur->u>>r)&1) { - if(i==0||((unneeded_reg[i-1]>>r)&1)) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<uu>>(r&63))&1) { - if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<u>>r)&1) { + if(i==0||((unneeded_reg[i-1]>>r)&1)) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<>4); if(r==CCREG) addr=(int)&cycle_count; if(r==CSREG) addr=(int)&Status; - if(r==FSREG) addr=(int)&FCR31; if(r==INVCP) addr=(int)&invc_ptr; u_int offset = addr-(u_int)&dynarec_local; assert(offset<4096); @@ -1057,7 +856,6 @@ static void emit_storereg(int r, int hr) if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4); if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4); if(r==CCREG) addr=(int)&cycle_count; - if(r==FSREG) addr=(int)&FCR31; u_int offset = addr-(u_int)&dynarec_local; assert(offset<4096); assem_debug("str %s,fp+%d\n",regname[hr],offset); @@ -1224,15 +1022,6 @@ static void emit_adcimm(u_int rs,int imm,u_int rt) output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval); } -static void emit_rscimm(int rs,int imm,u_int rt) -{ - assert(0); - u_int armval; - genimm_checked(imm,&armval); - assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval); -} - static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl) { // TODO: if(genimm(imm,&armval)) ... @@ -2776,6 +2565,7 @@ static void do_dirty_stub_ds() emit_call(&verify_code_ds); } +// FP_STUB static void do_cop1stub(int n) { literal_pool(256); @@ -3247,12 +3037,11 @@ static void cop0_assemble(int i,struct regstat *i_regs) emit_loadreg(rs1[i],s); if(get_reg(i_regs->regmap,rs1[i]|64)>=0) emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64)); - cop1_usable=0; } else { assert(opcode2[i]==0x10); - if((source[i]&0x3f)==0x10) // RFE + //if((source[i]&0x3f)==0x10) // RFE { emit_readword(&Status,0); emit_andimm(0,0x3c,1); @@ -3573,11 +3362,11 @@ static void c2op_assemble(int i,struct regstat *i_regs) static void cop1_unusable(int i,struct regstat *i_regs) { // XXX: should just just do the exception instead - if(!cop1_usable) { + //if(!cop1_usable) + { void *jaddr=out; emit_jmp(0); add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0); - cop1_usable=1; } } @@ -3586,22 +3375,6 @@ static void cop1_assemble(int i,struct regstat *i_regs) cop1_unusable(i, i_regs); } -static void fconv_assemble_arm(int i,struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); -} -#define fconv_assemble fconv_assemble_arm - -static void fcomp_assemble(int i,struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); -} - -static void float_assemble(int i,struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); -} - static void multdiv_assemble_arm(int i,struct regstat *i_regs) { // case 0x18: MULT @@ -3773,7 +3546,7 @@ static void do_miniht_insert(u_int return_address,int rt,int temp) { #endif } -static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu) +static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u) { //if(dirty_pre==dirty) return; int hr,reg; @@ -3785,13 +3558,9 @@ static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int if(((dirty_pre&~dirty)>>hr)&1) { if(reg>0&®<34) { emit_storereg(reg,hr); - if( ((is32_pre&~uu)>>reg)&1 ) { - emit_sarimm(hr,31,HOST_TEMPREG); - emit_storereg(reg|64,HOST_TEMPREG); - } } else if(reg>=64) { - emit_storereg(reg,hr); + assert(0); } } } diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index 494ca7eff..c18a64489 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -57,9 +57,6 @@ extern const char gte_cycletab[64]; extern const uint64_t gte_reg_reads[64]; extern const uint64_t gte_reg_writes[64]; -/* dummy */ -extern int FCR0, FCR31; - /* mem */ extern void *mem_rtab; extern void *mem_wtab; diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 1f232c4d8..5f2b6eea7 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -89,9 +89,6 @@ DRC_VAR(scratch_buf_ptr, 4) DRC_VAR(mini_ht, 256) DRC_VAR(restore_candidate, 512) -/* unused */ -DRC_VAR(FCR0, 4) -DRC_VAR(FCR31, 4) #ifdef TEXRELS_FORBIDDEN .data diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h index f7e1911a3..e36a75d16 100644 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ b/libpcsxcore/new_dynarec/linkage_offsets.h @@ -35,7 +35,4 @@ #define LO_restore_candidate (LO_mini_ht + 256) #define LO_dynarec_local_size (LO_restore_candidate + 512) -#define LO_FCR0 (LO_align0) -#define LO_FCR31 (LO_align0) - #define LO_cop2_to_scratch_buf (LO_scratch_buf_ptr - LO_reg_cop2d) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 0f56082cb..d19fcad0d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -88,7 +88,6 @@ struct regstat uint64_t wasdirty; uint64_t dirty; uint64_t u; - uint64_t uu; u_int wasconst; u_int isconst; u_int loadedconst; // host regs that have constants loaded @@ -166,9 +165,7 @@ struct link_entry static char is_ds[MAXBLOCK]; static char ooo[MAXBLOCK]; static uint64_t unneeded_reg[MAXBLOCK]; - static uint64_t unneeded_reg_upper[MAXBLOCK]; static uint64_t branch_unneeded_reg[MAXBLOCK]; - static uint64_t branch_unneeded_reg_upper[MAXBLOCK]; static signed char regmap_pre[MAXBLOCK][HOST_REGS]; static uint64_t current_constmap[HOST_REGS]; static uint64_t constmap[MAXBLOCK][HOST_REGS]; @@ -188,7 +185,6 @@ struct link_entry static u_int literals[1024][2]; static int literalcount; static int is_delayslot; - static int cop1_usable; static char shadow[1048576] __attribute__((aligned(16))); static void *copy; static int expirep; @@ -208,7 +204,7 @@ struct link_entry /* 1-31 gpr */ #define HIREG 32 // hi #define LOREG 33 // lo -#define FSREG 34 // FPU status (FCSR) +//#define FSREG 34 // FPU status (FCSR) #define CSREG 35 // Coprocessor status #define CCREG 36 // Cycle count #define INVCP 37 // Pointer to invalid_code @@ -248,9 +244,9 @@ struct link_entry #define COP1 16 // Coprocessor 1 #define C1LS 17 // Coprocessor 1 load/store #define FJUMP 18 // Conditional branch (floating point) -#define FLOAT 19 // Floating point unit -#define FCONV 20 // Convert integer to float -#define FCOMP 21 // Floating point compare (sets FSREG) +//#define FLOAT 19 // Floating point unit +//#define FCONV 20 // Convert integer to float +//#define FCOMP 21 // Floating point compare (sets FSREG) #define SYSCALL 22// SYSCALL #define OTHER 23 // Other #define SPAN 24 // Branch/delay slot spans 2 pages @@ -765,8 +761,8 @@ int loop_reg(int i, int r, int hr) } for(;k>r)&1)) return hr; - if(r>64&&((unneeded_reg_upper[i+k]>>r)&1)) return hr; + assert(r < 64); + if((unneeded_reg[i+k]>>r)&1) return hr; if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP||itype[i+k]==FJUMP)) { if(ba[i+k]>=start && ba[i+k]<(start+i*4)) @@ -1178,8 +1174,7 @@ void mov_alloc(struct regstat *current,int i) // Note: Don't need to actually alloc the source registers if((~current->is32>>rs1[i])&1) { //alloc_reg64(current,i,rs1[i]); - alloc_reg64(current,i,rt1[i]); - current->is32&=~(1LL<is32|=1LL<is32&=~(1LL<is32>>rs1[i])&(current->is32>>rs2[i])&1)) { - if(!((current->uu>>rt1[i])&1)) { - alloc_reg64(current,i,rt1[i]); - } if(get_reg(current->regmap,rt1[i]|64)>=0) { - if(rs1[i]&&rs2[i]) { - alloc_reg64(current,i,rs1[i]); - alloc_reg64(current,i,rs2[i]); - } - else - { - // Is is really worth it to keep 64-bit values in registers? - #ifdef NATIVE_64BIT - if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]); - if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg64(current,i,rs2[i]); - #endif - } + assert(0); } current->is32&=~(1LL<=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU - if(rt1[i]) { - if(rs1[i]&&rs2[i]) { - if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) { - alloc_reg64(current,i,rs1[i]); - alloc_reg64(current,i,rs2[i]); - alloc_reg64(current,i,rt1[i]); - } else { - alloc_reg(current,i,rs1[i]); - alloc_reg(current,i,rs2[i]); - alloc_reg(current,i,rt1[i]); - } - } - else { - alloc_reg(current,i,rt1[i]); - if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) { - // DADD used as move, or zeroing - // If we have a 64-bit source, then make the target 64 bits too - if(rs1[i]&&!((current->is32>>rs1[i])&1)) { - if(get_reg(current->regmap,rs1[i])>=0) alloc_reg64(current,i,rs1[i]); - alloc_reg64(current,i,rt1[i]); - } else if(rs2[i]&&!((current->is32>>rs2[i])&1)) { - if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]); - alloc_reg64(current,i,rt1[i]); - } - if(opcode2[i]>=0x2e&&rs2[i]) { - // DSUB used as negation - 64-bit result - // If we have a 32-bit register, extend it to 64 bits - if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]); - alloc_reg64(current,i,rt1[i]); - } - } - } - if(rs1[i]&&rs2[i]) { - current->is32&=~(1LL<is32&=~(1LL<is32>>rs1[i])&1) - current->is32|=1LL<is32&=~(1LL<is32>>rs2[i])&1) - current->is32|=1LL<is32|=1LL<is32&=~(1LL<uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) { - // TODO: Could preserve the 32-bit flag if the immediate is zero - alloc_reg64(current,i,rt1[i]); - alloc_reg64(current,i,rs1[i]); - } - clear_const(current,rs1[i]); - clear_const(current,rt1[i]); + assert(0); } else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU if((~current->is32>>rs1[i])&1) alloc_reg64(current,i,rs1[i]); @@ -1576,20 +1497,7 @@ void multdiv_alloc(struct regstat *current,int i) } else // 64-bit { - current->u&=~(1LL<u&=~(1LL<uu&=~(1LL<uu&=~(1LL<10) alloc_reg64(current,i,LOREG); - alloc_reg64(current,i,rs1[i]); - alloc_reg64(current,i,rs2[i]); - alloc_all(current,i); - current->is32&=~(1LL<is32&=~(1LL<is32&=~(1LL<is32|=1LL<is32|=1LL<3) // MTC1/DMTC1/CTC1 + else if(opcode2[i]>3) // MTC1/CTC1 { if(rs1[i]){ clear_const(current,rs1[i]); - if(opcode2[i]==5) - alloc_reg64(current,i,rs1[i]); // DMTC1 - else - alloc_reg(current,i,rs1[i]); // MTC1/CTC1 - alloc_reg_temp(current,i,-1); + alloc_reg(current,i,rs1[i]); } else { current->u&=~1LL; alloc_reg(current,i,0); - alloc_reg_temp(current,i,-1); } + alloc_reg_temp(current,i,-1); } minimum_free_regs[i]=1; } -void fconv_alloc(struct regstat *current,int i) -{ - alloc_reg(current,i,CSREG); // Load status - alloc_reg_temp(current,i,-1); - minimum_free_regs[i]=1; -} -void float_alloc(struct regstat *current,int i) -{ - alloc_reg(current,i,CSREG); // Load status - alloc_reg_temp(current,i,-1); - minimum_free_regs[i]=1; -} + void c2op_alloc(struct regstat *current,int i) { alloc_reg_temp(current,i,-1); } -void fcomp_alloc(struct regstat *current,int i) -{ - alloc_reg(current,i,CSREG); // Load status - alloc_reg(current,i,FSREG); // Load flags - dirty_reg(current,FSREG); // Flag will be modified - alloc_reg_temp(current,i,-1); - minimum_free_regs[i]=1; -} void syscall_alloc(struct regstat *current,int i) { @@ -1757,7 +1637,7 @@ void delayslot_alloc(struct regstat *current,int i) break; case COP1: case COP2: - cop1_alloc(current,i); + cop12_alloc(current,i); break; case C1LS: c1ls_alloc(current,i); @@ -1765,15 +1645,6 @@ void delayslot_alloc(struct regstat *current,int i) case C2LS: c2ls_alloc(current,i); break; - case FCONV: - fconv_alloc(current,i); - break; - case FLOAT: - float_alloc(current,i); - break; - case FCOMP: - fcomp_alloc(current,i); - break; case C2OP: c2op_alloc(current,i); break; @@ -1809,8 +1680,7 @@ static void pagespan_alloc(struct regstat *current,int i) if(rs2[i]) alloc_reg(current,i,rs2[i]); if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1)) { - if(rs1[i]) alloc_reg64(current,i,rs1[i]); - if(rs2[i]) alloc_reg64(current,i,rs2[i]); + assert(0); } } else @@ -1819,15 +1689,9 @@ static void pagespan_alloc(struct regstat *current,int i) if(rs1[i]) alloc_reg(current,i,rs1[i]); if(!((current->is32>>rs1[i])&1)) { - if(rs1[i]) alloc_reg64(current,i,rs1[i]); + assert(0); } } - else - if(opcode[i]==0x11) // BC1 - { - alloc_reg(current,i,FSREG); - alloc_reg(current,i,CSREG); - } //else ... } @@ -1914,74 +1778,7 @@ void alu_assemble(int i,struct regstat *i_regs) } } if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU - if(rt1[i]) { - signed char s1l,s2l,s1h,s2h,tl,th; - tl=get_reg(i_regs->regmap,rt1[i]); - th=get_reg(i_regs->regmap,rt1[i]|64); - if(tl>=0) { - s1l=get_reg(i_regs->regmap,rs1[i]); - s2l=get_reg(i_regs->regmap,rs2[i]); - s1h=get_reg(i_regs->regmap,rs1[i]|64); - s2h=get_reg(i_regs->regmap,rs2[i]|64); - if(rs1[i]&&rs2[i]) { - assert(s1l>=0); - assert(s2l>=0); - if(opcode2[i]&2) emit_subs(s1l,s2l,tl); - else emit_adds(s1l,s2l,tl); - if(th>=0) { - #ifdef INVERTED_CARRY - if(opcode2[i]&2) {if(s1h!=th) emit_mov(s1h,th);emit_sbb(th,s2h);} - #else - if(opcode2[i]&2) emit_sbc(s1h,s2h,th); - #endif - else emit_add(s1h,s2h,th); - } - } - else if(rs1[i]) { - if(s1l>=0) emit_mov(s1l,tl); - else emit_loadreg(rs1[i],tl); - if(th>=0) { - if(s1h>=0) emit_mov(s1h,th); - else emit_loadreg(rs1[i]|64,th); - } - } - else if(rs2[i]) { - if(s2l>=0) { - if(opcode2[i]&2) emit_negs(s2l,tl); - else emit_mov(s2l,tl); - } - else { - emit_loadreg(rs2[i],tl); - if(opcode2[i]&2) emit_negs(tl,tl); - } - if(th>=0) { - #ifdef INVERTED_CARRY - if(s2h>=0) emit_mov(s2h,th); - else emit_loadreg(rs2[i]|64,th); - if(opcode2[i]&2) { - emit_adcimm(-1,th); // x86 has inverted carry flag - emit_not(th,th); - } - #else - if(opcode2[i]&2) { - if(s2h>=0) emit_rscimm(s2h,0,th); - else { - emit_loadreg(rs2[i]|64,th); - emit_rscimm(th,0,th); - } - }else{ - if(s2h>=0) emit_mov(s2h,th); - else emit_loadreg(rs2[i]|64,th); - } - #endif - } - } - else { - emit_zeroreg(tl); - if(th>=0) emit_zeroreg(th); - } - } - } + assert(0); } if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU if(rt1[i]) { @@ -3032,22 +2829,6 @@ void mov_assemble(int i,struct regstat *i_regs) } } -#ifndef fconv_assemble -void fconv_assemble(int i,struct regstat *i_regs) -{ - printf("Need fconv_assemble for this architecture.\n"); - exit(1); -} -#endif - -#if 0 -void float_assemble(int i,struct regstat *i_regs) -{ - printf("Need float_assemble for this architecture.\n"); - exit(1); -} -#endif - void syscall_assemble(int i,struct regstat *i_regs) { signed char ccreg=get_reg(i_regs->regmap,CCREG); @@ -3120,12 +2901,6 @@ void ds_assemble(int i,struct regstat *i_regs) c2ls_assemble(i,i_regs);break; case C2OP: c2op_assemble(i,i_regs);break; - case FCONV: - fconv_assemble(i,i_regs);break; - case FLOAT: - float_assemble(i,i_regs);break; - case FCOMP: - fcomp_assemble(i,i_regs);break; case MULTDIV: multdiv_assemble(i,i_regs);break; case MOV: @@ -3150,24 +2925,12 @@ int internal_branch(uint64_t i_is32,int addr) if(addr&1) return 0; // Indirect (register) jump if(addr>=start && addr>2; - // Delay slots are not valid branch targets - //if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0; - // 64 -> 32 bit transition requires a recompile - /*if(is32[t]&~unneeded_reg_upper[t]&~i_is32) - { - if(requires_32bit[t]&~i_is32) printf("optimizable: no\n"); - else printf("optimizable: yes\n"); - }*/ - //if(is32[t]&~unneeded_reg_upper[t]&~i_is32) return 0; return 1; } return 0; } -#ifndef wb_invalidate -void wb_invalidate(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32, - uint64_t u,uint64_t uu) +static void wb_invalidate(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u) { int hr; for(hr=0;hr=0) { if((dirty>>hr)&1) { if(get_reg(entry,pre[hr])<0) { - if(pre[hr]<64) { - if(!((u>>pre[hr])&1)) { - emit_storereg(pre[hr],hr); - if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) { - emit_sarimm(hr,31,hr); - emit_storereg(pre[hr]|64,hr); - } - } - }else{ - if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) { - emit_storereg(pre[hr],hr); - } - } + assert(pre[hr]<64); + if(!((u>>pre[hr])&1)) + emit_storereg(pre[hr],hr); } } } @@ -3209,7 +2962,6 @@ void wb_invalidate(signed char pre[],signed char entry[],uint64_t dirty,uint64_t } } } -#endif // Load the specified registers // This only loads the registers given as arguments because @@ -3430,11 +3182,8 @@ static int get_final_value(int hr, int i, int *value) *value=constmap[i][hr]; //printf("c=%lx\n",(long)constmap[i][hr]); if(i==slen-1) return 1; - if(reg<64) { - return !((unneeded_reg[i+1]>>reg)&1); - }else{ - return !((unneeded_reg_upper[i+1]>>reg)&1); - } + assert(reg < 64); + return !((unneeded_reg[i+1]>>reg)&1); } // Load registers with known constants @@ -3567,13 +3316,8 @@ void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty) if(i_regmap[hr]>0) { if(i_regmap[hr]!=CCREG) { if((i_dirty>>hr)&1) { - if(i_regmap[hr]<64) { - emit_storereg(i_regmap[hr],hr); - }else{ - if( !((i_is32>>(i_regmap[hr]&63))&1) ) { - emit_storereg(i_regmap[hr],hr); - } - } + assert(i_regmap[hr]<64); + emit_storereg(i_regmap[hr],hr); } } } @@ -3590,15 +3334,10 @@ void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,in if(hr!=EXCLUDE_REG) { if(i_regmap[hr]>0) { if(i_regmap[hr]!=CCREG) { - if(i_regmap[hr]==regs[t].regmap_entry[hr] && ((regs[t].dirty>>hr)&1) && !(((i_is32&~regs[t].was32&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1)) { + if(i_regmap[hr]==regs[t].regmap_entry[hr] && ((regs[t].dirty>>hr)&1) && !(((i_is32&~regs[t].was32)>>(i_regmap[hr]&63))&1)) { if((i_dirty>>hr)&1) { - if(i_regmap[hr]<64) { - emit_storereg(i_regmap[hr],hr); - }else{ - if( !((i_is32>>(i_regmap[hr]&63))&1) ) { - emit_storereg(i_regmap[hr],hr); - } - } + assert(i_regmap[hr]<64); + emit_storereg(i_regmap[hr],hr); } } } @@ -3698,26 +3437,11 @@ void store_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int a for(hr=0;hr0 && i_regmap[hr]!=CCREG) { - if(i_regmap[hr]!=regs[t].regmap_entry[hr] || !((regs[t].dirty>>hr)&1) || (((i_is32&~regs[t].was32&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1)) { + if(i_regmap[hr]!=regs[t].regmap_entry[hr] || !((regs[t].dirty>>hr)&1) || (((i_is32&~regs[t].was32)>>(i_regmap[hr]&63))&1)) { if((i_dirty>>hr)&1) { - if(i_regmap[hr]<64) { - if(!((unneeded_reg[t]>>i_regmap[hr])&1)) { - emit_storereg(i_regmap[hr],hr); - if( ((i_is32>>i_regmap[hr])&1) && !((unneeded_reg_upper[t]>>i_regmap[hr])&1) ) { - #ifdef DESTRUCTIVE_WRITEBACK - emit_sarimm(hr,31,hr); - emit_storereg(i_regmap[hr]|64,hr); - #else - emit_sarimm(hr,31,HOST_TEMPREG); - emit_storereg(i_regmap[hr]|64,HOST_TEMPREG); - #endif - } - } - }else{ - if( !((i_is32>>(i_regmap[hr]&63))&1) && !((unneeded_reg_upper[t]>>(i_regmap[hr]&63))&1) ) { - emit_storereg(i_regmap[hr],hr); - } - } + assert(i_regmap[hr]<64); + if(!((unneeded_reg[t]>>i_regmap[hr])&1)) + emit_storereg(i_regmap[hr],hr); } } } @@ -3749,11 +3473,7 @@ void load_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int ad // Load 32-bit regs for(hr=0;hr=0&®s[t].regmap_entry[hr]>hr)&1) && ((i_dirty>>hr)&1) && (((i_is32&~unneeded_reg_upper[t])>>i_regmap[hr])&1) ) || (((i_is32&~regs[t].was32&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1)) { - #else - if(i_regmap[hr]!=regs[t].regmap_entry[hr] ) { - #endif + if(i_regmap[hr]!=regs[t].regmap_entry[hr]) { if(regs[t].regmap_entry[hr]==0) { emit_zeroreg(hr); } @@ -3821,8 +3541,7 @@ int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) } else if(i_regmap[hr]>=64&&i_regmap[hr]>(i_regmap[hr]&63))&1)) - return 0; + assert(0); } } } @@ -3840,15 +3559,9 @@ int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) } } } - if((((regs[t].was32^i_is32)&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1) - { - //printf("%x: is32 no match\n",addr); - return 0; - } } } } - //if(is32[t]&~unneeded_reg_upper[t]&~i_is32) return 0; // Delay slots are not valid branch targets //if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0; // Delay slots require additional processing, so do not match @@ -3914,7 +3627,6 @@ void ds_assemble_entry(int i) address_generation(t,®s[t],regs[t].regmap_entry); if(itype[t]==STORE||itype[t]==STORELR||(opcode[t]&0x3b)==0x39||(opcode[t]&0x3b)==0x3a) load_regs(regs[t].regmap_entry,regs[t].regmap,regs[t].was32,INVCP,INVCP); - cop1_usable=0; is_delayslot=0; switch(itype[t]) { case ALU: @@ -3945,12 +3657,6 @@ void ds_assemble_entry(int i) c2ls_assemble(t,®s[t]);break; case C2OP: c2op_assemble(t,®s[t]);break; - case FCONV: - fconv_assemble(t,®s[t]);break; - case FLOAT: - float_assemble(t,®s[t]);break; - case FCOMP: - fcomp_assemble(t,®s[t]);break; case MULTDIV: multdiv_assemble(t,®s[t]);break; case MOV: @@ -4336,11 +4042,8 @@ void ujump_assemble(int i,struct regstat *i_regs) } ds_assemble(i+1,i_regs); uint64_t bc_unneeded=branch_regs[i].u; - uint64_t bc_unneeded_upper=branch_regs[i].uu; bc_unneeded|=1|(1LL<>rt1[i+1])&1) ds_unneeded_upper&=~((1LL<>rt1[i+1])&1) ds_unneeded_upper&=~((1LL<regmap; - int cc; - int match; - match=match_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); - assem_debug("fmatch=%d\n",match); - int fs,cs; - void *eaddr; - int invert=0; - int internal=internal_branch(branch_regs[i].is32,ba[i]); - if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); - if(!match) invert=1; - #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - if(i>(ba[i]-start)>>2) invert=1; - #endif - - if(ooo[i]) { - fs=get_reg(branch_regs[i].regmap,FSREG); - address_generation(i+1,i_regs,regs[i].regmap_entry); // Is this okay? - } - else { - fs=get_reg(i_regmap,FSREG); - } - - // Check cop1 unusable - if(!cop1_usable) { - cs=get_reg(i_regmap,CSREG); - assert(cs>=0); - emit_testimm(cs,0x20000000); - eaddr=out; - emit_jeq(0); - add_stub_r(FP_STUB,eaddr,out,i,cs,i_regs,0,0); - cop1_usable=1; - } - - if(ooo[i]) { - // Out of order execution (delay slot first) - //printf("OOOE\n"); - ds_assemble(i+1,i_regs); - int adj; - uint64_t bc_unneeded=branch_regs[i].u; - uint64_t bc_unneeded_upper=branch_regs[i].uu; - bc_unneeded&=~((1LL<=0); - emit_testimm(fs,0x800000); - if(source[i]&0x10000) // BC1T - { - if(invert){ - nottaken=out; - emit_jeq(1); - }else{ - add_to_linker(out,ba[i],internal); - emit_jne(0); - } - } - else // BC1F - if(invert){ - nottaken=out; - emit_jne((void *)1l); - }else{ - add_to_linker(out,ba[i],internal); - emit_jeq(0); - } - { - } - } // if(!only32) - - if(invert) { - if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); - #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - else if(match) emit_addnop(13); - #endif - store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); - if(internal) - assem_debug("branch: internal\n"); - else - assem_debug("branch: external\n"); - if(internal&&is_ds[(ba[i]-start)>>2]) { - ds_assemble_entry(i); - } - else { - add_to_linker(out,ba[i],internal); - emit_jmp(0); - } - set_jump_target(nottaken, out); - } - - if(adj) { - if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); - } - } // (!unconditional) - } // if(ooo) - else - { - // In-order execution (branch first) - //printf("IOE\n"); - void *nottaken = NULL; - if(1) { - //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); - if(1) { - assert(fs>=0); - emit_testimm(fs,0x800000); - if(source[i]&0x10000) // BC1T - { - nottaken=out; - emit_jeq(1); - } - else // BC1F - { - nottaken=out; - emit_jne((void *)1l); - } - } - } // if(!unconditional) - int adj; - uint64_t ds_unneeded=branch_regs[i].u; - uint64_t ds_unneeded_upper=branch_regs[i].uu; - ds_unneeded&=~((1LL<>rt1[i+1])&1) ds_unneeded_upper&=~((1LL<>2]) { - ds_assemble_entry(i); - } - else { - add_to_linker(out,ba[i],internal); - emit_jmp(0); - } - - // branch not taken - if(1) { // <- FIXME (don't need this) - set_jump_target(nottaken, out); - assem_debug("1:\n"); - if(!likely[i]) { - wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32, - ds_unneeded,ds_unneeded_upper); + wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,ds_unneeded); load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]); address_generation(i+1,&branch_regs[i],0); load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG); @@ -5687,7 +5158,6 @@ static void pagespan_ds() address_generation(0,®s[0],regs[0].regmap_entry); if(itype[0]==STORE||itype[0]==STORELR||(opcode[0]&0x3b)==0x39||(opcode[0]&0x3b)==0x3a) load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,INVCP,INVCP); - cop1_usable=0; is_delayslot=0; switch(itype[0]) { case ALU: @@ -5718,12 +5188,6 @@ static void pagespan_ds() c2ls_assemble(0,®s[0]);break; case C2OP: c2op_assemble(0,®s[0]);break; - case FCONV: - fconv_assemble(0,®s[0]);break; - case FLOAT: - float_assemble(0,®s[0]);break; - case FCOMP: - fcomp_assemble(0,®s[0]);break; case MULTDIV: multdiv_assemble(0,®s[0]);break; case MOV: @@ -5765,19 +5229,17 @@ static void pagespan_ds() void unneeded_registers(int istart,int iend,int r) { int i; - uint64_t u,uu,gte_u,b,bu,gte_bu; - uint64_t temp_u,temp_uu,temp_gte_u=0; - uint64_t tdep; + uint64_t u,gte_u,b,gte_b; + uint64_t temp_u,temp_gte_u=0; uint64_t gte_u_unknown=0; if(new_dynarec_hacks&NDHACK_GTE_UNNEEDED) gte_u_unknown=~0ll; if(iend==slen-1) { - u=1;uu=1; + u=1; gte_u=gte_u_unknown; }else{ - u=unneeded_reg[iend+1]; - uu=unneeded_reg_upper[iend+1]; - u=1;uu=1; + //u=unneeded_reg[iend+1]; + u=1; gte_u=gte_unneeded[iend+1]; } @@ -5793,39 +5255,12 @@ void unneeded_registers(int istart,int iend,int r) { // Branch out of this block, flush all regs u=1; - uu=1; gte_u=gte_u_unknown; - /* Hexagon hack - if(itype[i]==UJUMP&&rt1[i]==31) - { - uu=u=0x300C00F; // Discard at, v0-v1, t6-t9 - } - if(itype[i]==RJUMP&&rs1[i]==31) - { - uu=u=0x300C0F3; // Discard at, a0-a3, t6-t9 - } - if(start>0x80000400&&start<0x80000000+RAM_SIZE) { - if(itype[i]==UJUMP&&rt1[i]==31) - { - //uu=u=0x30300FF0FLL; // Discard at, v0-v1, t0-t9, lo, hi - uu=u=0x300FF0F; // Discard at, v0-v1, t0-t9 - } - if(itype[i]==RJUMP&&rs1[i]==31) - { - //uu=u=0x30300FFF3LL; // Discard at, a0-a3, t0-t9, lo, hi - uu=u=0x300FFF3; // Discard at, a0-a3, t0-t9 - } - }*/ branch_unneeded_reg[i]=u; - branch_unneeded_reg_upper[i]=uu; // Merge in delay slot - tdep=(~uu>>rt1[i+1])&1; u|=(1LL<>16)==0x1000) { // Unconditional branch - temp_u=1;temp_uu=1; + temp_u=1; temp_gte_u=0; } else { // Conditional branch (not taken case) temp_u=unneeded_reg[i+2]; - temp_uu=unneeded_reg_upper[i+2]; temp_gte_u&=gte_unneeded[i+2]; } // Merge in delay slot - tdep=(~temp_uu>>rt1[i+1])&1; temp_u|=(1LL<>rt1[i])&1; temp_u|=(1LL<>2,i-1,r+1); }else{ unneeded_reg[(ba[i]-start)>>2]=1; - unneeded_reg_upper[(ba[i]-start)>>2]=1; gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown; } } /*else*/ if(1) { @@ -5912,73 +5332,42 @@ void unneeded_registers(int istart,int iend,int r) { // Unconditional branch u=unneeded_reg[(ba[i]-start)>>2]; - uu=unneeded_reg_upper[(ba[i]-start)>>2]; gte_u=gte_unneeded[(ba[i]-start)>>2]; branch_unneeded_reg[i]=u; - branch_unneeded_reg_upper[i]=uu; - //u=1; - //uu=1; - //branch_unneeded_reg[i]=u; - //branch_unneeded_reg_upper[i]=uu; // Merge in delay slot - tdep=(~uu>>rt1[i+1])&1; u|=(1LL<>2]; - bu=unneeded_reg_upper[(ba[i]-start)>>2]; - gte_bu=gte_unneeded[(ba[i]-start)>>2]; + gte_b=gte_unneeded[(ba[i]-start)>>2]; branch_unneeded_reg[i]=b; - branch_unneeded_reg_upper[i]=bu; - //b=1; - //bu=1; - //branch_unneeded_reg[i]=b; - //branch_unneeded_reg_upper[i]=bu; // Branch delay slot - tdep=(~uu>>rt1[i+1])&1; b|=(1LL<>rt1[i])&1; + //u=1; // DEBUG // Written registers are unneeded u|=1LL<>r)&1) { - if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } - printf("\n");*/ - } - for (i=iend;i>=istart;i--) - { - unneeded_reg_upper[i]=branch_unneeded_reg_upper[i]=-1LL; + printf("\n"); + */ } } @@ -6984,133 +6352,14 @@ int new_recompile_block(int addr) switch(op2) { case 0x00: strcpy(insn[i],"MFC0"); type=COP0; break; + case 0x02: strcpy(insn[i],"CFC0"); type=COP0; break; case 0x04: strcpy(insn[i],"MTC0"); type=COP0; break; - case 0x10: strcpy(insn[i],"tlb"); type=NI; - switch(source[i]&0x3f) - { - case 0x01: strcpy(insn[i],"TLBR"); type=COP0; break; - case 0x02: strcpy(insn[i],"TLBWI"); type=COP0; break; - case 0x06: strcpy(insn[i],"TLBWR"); type=COP0; break; - case 0x08: strcpy(insn[i],"TLBP"); type=COP0; break; - case 0x10: strcpy(insn[i],"RFE"); type=COP0; break; - //case 0x18: strcpy(insn[i],"ERET"); type=COP0; break; - } + case 0x06: strcpy(insn[i],"CTC0"); type=COP0; break; + case 0x10: strcpy(insn[i],"RFE"); type=COP0; break; } break; - case 0x11: strcpy(insn[i],"cop1"); type=NI; + case 0x11: strcpy(insn[i],"cop1"); type=COP1; op2=(source[i]>>21)&0x1f; - switch(op2) - { - case 0x00: strcpy(insn[i],"MFC1"); type=COP1; break; - case 0x01: strcpy(insn[i],"DMFC1"); type=COP1; break; - case 0x02: strcpy(insn[i],"CFC1"); type=COP1; break; - case 0x04: strcpy(insn[i],"MTC1"); type=COP1; break; - case 0x05: strcpy(insn[i],"DMTC1"); type=COP1; break; - case 0x06: strcpy(insn[i],"CTC1"); type=COP1; break; - case 0x08: strcpy(insn[i],"BC1"); type=FJUMP; - switch((source[i]>>16)&0x3) - { - case 0x00: strcpy(insn[i],"BC1F"); break; - case 0x01: strcpy(insn[i],"BC1T"); break; - case 0x02: strcpy(insn[i],"BC1FL"); break; - case 0x03: strcpy(insn[i],"BC1TL"); break; - } - break; - case 0x10: strcpy(insn[i],"C1.S"); type=NI; - switch(source[i]&0x3f) - { - case 0x00: strcpy(insn[i],"ADD.S"); type=FLOAT; break; - case 0x01: strcpy(insn[i],"SUB.S"); type=FLOAT; break; - case 0x02: strcpy(insn[i],"MUL.S"); type=FLOAT; break; - case 0x03: strcpy(insn[i],"DIV.S"); type=FLOAT; break; - case 0x04: strcpy(insn[i],"SQRT.S"); type=FLOAT; break; - case 0x05: strcpy(insn[i],"ABS.S"); type=FLOAT; break; - case 0x06: strcpy(insn[i],"MOV.S"); type=FLOAT; break; - case 0x07: strcpy(insn[i],"NEG.S"); type=FLOAT; break; - case 0x08: strcpy(insn[i],"ROUND.L.S"); type=FCONV; break; - case 0x09: strcpy(insn[i],"TRUNC.L.S"); type=FCONV; break; - case 0x0A: strcpy(insn[i],"CEIL.L.S"); type=FCONV; break; - case 0x0B: strcpy(insn[i],"FLOOR.L.S"); type=FCONV; break; - case 0x0C: strcpy(insn[i],"ROUND.W.S"); type=FCONV; break; - case 0x0D: strcpy(insn[i],"TRUNC.W.S"); type=FCONV; break; - case 0x0E: strcpy(insn[i],"CEIL.W.S"); type=FCONV; break; - case 0x0F: strcpy(insn[i],"FLOOR.W.S"); type=FCONV; break; - case 0x21: strcpy(insn[i],"CVT.D.S"); type=FCONV; break; - case 0x24: strcpy(insn[i],"CVT.W.S"); type=FCONV; break; - case 0x25: strcpy(insn[i],"CVT.L.S"); type=FCONV; break; - case 0x30: strcpy(insn[i],"C.F.S"); type=FCOMP; break; - case 0x31: strcpy(insn[i],"C.UN.S"); type=FCOMP; break; - case 0x32: strcpy(insn[i],"C.EQ.S"); type=FCOMP; break; - case 0x33: strcpy(insn[i],"C.UEQ.S"); type=FCOMP; break; - case 0x34: strcpy(insn[i],"C.OLT.S"); type=FCOMP; break; - case 0x35: strcpy(insn[i],"C.ULT.S"); type=FCOMP; break; - case 0x36: strcpy(insn[i],"C.OLE.S"); type=FCOMP; break; - case 0x37: strcpy(insn[i],"C.ULE.S"); type=FCOMP; break; - case 0x38: strcpy(insn[i],"C.SF.S"); type=FCOMP; break; - case 0x39: strcpy(insn[i],"C.NGLE.S"); type=FCOMP; break; - case 0x3A: strcpy(insn[i],"C.SEQ.S"); type=FCOMP; break; - case 0x3B: strcpy(insn[i],"C.NGL.S"); type=FCOMP; break; - case 0x3C: strcpy(insn[i],"C.LT.S"); type=FCOMP; break; - case 0x3D: strcpy(insn[i],"C.NGE.S"); type=FCOMP; break; - case 0x3E: strcpy(insn[i],"C.LE.S"); type=FCOMP; break; - case 0x3F: strcpy(insn[i],"C.NGT.S"); type=FCOMP; break; - } - break; - case 0x11: strcpy(insn[i],"C1.D"); type=NI; - switch(source[i]&0x3f) - { - case 0x00: strcpy(insn[i],"ADD.D"); type=FLOAT; break; - case 0x01: strcpy(insn[i],"SUB.D"); type=FLOAT; break; - case 0x02: strcpy(insn[i],"MUL.D"); type=FLOAT; break; - case 0x03: strcpy(insn[i],"DIV.D"); type=FLOAT; break; - case 0x04: strcpy(insn[i],"SQRT.D"); type=FLOAT; break; - case 0x05: strcpy(insn[i],"ABS.D"); type=FLOAT; break; - case 0x06: strcpy(insn[i],"MOV.D"); type=FLOAT; break; - case 0x07: strcpy(insn[i],"NEG.D"); type=FLOAT; break; - case 0x08: strcpy(insn[i],"ROUND.L.D"); type=FCONV; break; - case 0x09: strcpy(insn[i],"TRUNC.L.D"); type=FCONV; break; - case 0x0A: strcpy(insn[i],"CEIL.L.D"); type=FCONV; break; - case 0x0B: strcpy(insn[i],"FLOOR.L.D"); type=FCONV; break; - case 0x0C: strcpy(insn[i],"ROUND.W.D"); type=FCONV; break; - case 0x0D: strcpy(insn[i],"TRUNC.W.D"); type=FCONV; break; - case 0x0E: strcpy(insn[i],"CEIL.W.D"); type=FCONV; break; - case 0x0F: strcpy(insn[i],"FLOOR.W.D"); type=FCONV; break; - case 0x20: strcpy(insn[i],"CVT.S.D"); type=FCONV; break; - case 0x24: strcpy(insn[i],"CVT.W.D"); type=FCONV; break; - case 0x25: strcpy(insn[i],"CVT.L.D"); type=FCONV; break; - case 0x30: strcpy(insn[i],"C.F.D"); type=FCOMP; break; - case 0x31: strcpy(insn[i],"C.UN.D"); type=FCOMP; break; - case 0x32: strcpy(insn[i],"C.EQ.D"); type=FCOMP; break; - case 0x33: strcpy(insn[i],"C.UEQ.D"); type=FCOMP; break; - case 0x34: strcpy(insn[i],"C.OLT.D"); type=FCOMP; break; - case 0x35: strcpy(insn[i],"C.ULT.D"); type=FCOMP; break; - case 0x36: strcpy(insn[i],"C.OLE.D"); type=FCOMP; break; - case 0x37: strcpy(insn[i],"C.ULE.D"); type=FCOMP; break; - case 0x38: strcpy(insn[i],"C.SF.D"); type=FCOMP; break; - case 0x39: strcpy(insn[i],"C.NGLE.D"); type=FCOMP; break; - case 0x3A: strcpy(insn[i],"C.SEQ.D"); type=FCOMP; break; - case 0x3B: strcpy(insn[i],"C.NGL.D"); type=FCOMP; break; - case 0x3C: strcpy(insn[i],"C.LT.D"); type=FCOMP; break; - case 0x3D: strcpy(insn[i],"C.NGE.D"); type=FCOMP; break; - case 0x3E: strcpy(insn[i],"C.LE.D"); type=FCOMP; break; - case 0x3F: strcpy(insn[i],"C.NGT.D"); type=FCOMP; break; - } - break; - case 0x14: strcpy(insn[i],"C1.W"); type=NI; - switch(source[i]&0x3f) - { - case 0x20: strcpy(insn[i],"CVT.S.W"); type=FCONV; break; - case 0x21: strcpy(insn[i],"CVT.D.W"); type=FCONV; break; - } - break; - case 0x15: strcpy(insn[i],"C1.L"); type=NI; - switch(source[i]&0x3f) - { - case 0x20: strcpy(insn[i],"CVT.S.L"); type=FCONV; break; - case 0x21: strcpy(insn[i],"CVT.D.L"); type=FCONV; break; - } - break; - } break; #if 0 case 0x14: strcpy(insn[i],"BEQL"); type=CJUMP; break; @@ -7349,8 +6598,8 @@ int new_recompile_block(int addr) rs2[i]=0; rt1[i]=0; rt2[i]=0; - if(op2==0) rt1[i]=(source[i]>>16)&0x1F; // MFC0 - if(op2==4) rs1[i]=(source[i]>>16)&0x1F; // MTC0 + if(op2==0||op2==2) rt1[i]=(source[i]>>16)&0x1F; // MFC0/CFC0 + if(op2==4||op2==6) rs1[i]=(source[i]>>16)&0x1F; // MTC0/CTC0 if(op2==4&&((source[i]>>11)&0x1f)==12) rt2[i]=CSREG; // Status if(op2==16) if((source[i]&0x3f)==0x18) rs2[i]=CCREG; // ERET break; @@ -7412,19 +6661,6 @@ int new_recompile_block(int addr) else gte_rs[i]|=3ll<<(v*2); } break; - case FLOAT: - case FCONV: - rs1[i]=0; - rs2[i]=CSREG; - rt1[i]=0; - rt2[i]=0; - break; - case FCOMP: - rs1[i]=FSREG; - rs2[i]=CSREG; - rt1[i]=FSREG; - rt2[i]=0; - break; case SYSCALL: case HLECALL: case INTCALL: @@ -7536,7 +6772,6 @@ int new_recompile_block(int addr) current.is32=1; current.dirty=0; current.u=unneeded_reg[0]; - current.uu=unneeded_reg_upper[0]; clear_all_regs(current.regmap); alloc_reg(¤t,0,CCREG); dirty_reg(¤t,CCREG); @@ -7553,7 +6788,6 @@ int new_recompile_block(int addr) bt[1]=1; ds=1; unneeded_reg[0]=1; - unneeded_reg_upper[0]=1; current.regmap[HOST_BTREG]=BTREG; } @@ -7599,23 +6833,15 @@ int new_recompile_block(int addr) if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP&&itype[i]!=FJUMP) { if(i+1>rt1[i])&1) current.uu&=~((1LL<>rt1[i+1])&1) current.uu&=~((1LL<>rt1[i])&1) current.uu&=~((1LL<>(r&63))&1) { - regs[i].regmap_entry[hr]=-1; - regs[i].regmap[hr]=-1; - //Don't clear regs in the delay slot as the branch might need them - //current.regmap[hr]=-1; - }else - regs[i].regmap_entry[hr]=r; + assert(0); } } } else { @@ -7765,8 +6980,7 @@ int new_recompile_block(int addr) if(rs2[i]) alloc_reg(¤t,i,rs2[i]); if(!((current.is32>>rs1[i])&(current.is32>>rs2[i])&1)) { - if(rs1[i]) alloc_reg64(¤t,i,rs1[i]); - if(rs2[i]) alloc_reg64(¤t,i,rs2[i]); + assert(0); } if((rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]))|| (rs2[i]&&(rs2[i]==rt1[i+1]||rs2[i]==rt2[i+1]))) { @@ -7779,8 +6993,7 @@ int new_recompile_block(int addr) if(rs2[i]) alloc_reg(¤t,i,rs2[i]); if(!((current.is32>>rs1[i])&(current.is32>>rs2[i])&1)) { - if(rs1[i]) alloc_reg64(¤t,i,rs1[i]); - if(rs2[i]) alloc_reg64(¤t,i,rs2[i]); + assert(0); } } else @@ -7797,7 +7010,7 @@ int new_recompile_block(int addr) alloc_reg(¤t,i,rs1[i]); if(!(current.is32>>rs1[i]&1)) { - alloc_reg64(¤t,i,rs1[i]); + assert(0); } if(rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1])) { // The delay slot overwrites one of our conditions. @@ -7808,7 +7021,7 @@ int new_recompile_block(int addr) if(rs1[i]) alloc_reg(¤t,i,rs1[i]); if(!((current.is32>>rs1[i])&1)) { - if(rs1[i]) alloc_reg64(¤t,i,rs1[i]); + assert(0); } } else @@ -7830,8 +7043,7 @@ int new_recompile_block(int addr) alloc_reg(¤t,i,rs2[i]); if(!((current.is32>>rs1[i])&(current.is32>>rs2[i])&1)) { - alloc_reg64(¤t,i,rs1[i]); - alloc_reg64(¤t,i,rs2[i]); + assert(0); } } else @@ -7845,7 +7057,7 @@ int new_recompile_block(int addr) alloc_reg(¤t,i,rs1[i]); if(!(current.is32>>rs1[i]&1)) { - alloc_reg64(¤t,i,rs1[i]); + assert(0); } } ds=1; @@ -7865,7 +7077,7 @@ int new_recompile_block(int addr) alloc_reg(¤t,i,rs1[i]); if(!(current.is32>>rs1[i]&1)) { - alloc_reg64(¤t,i,rs1[i]); + assert(0); } if (rt1[i]==31) { // BLTZAL/BGEZAL alloc_reg(¤t,i,31); @@ -7884,7 +7096,7 @@ int new_recompile_block(int addr) if(rs1[i]) alloc_reg(¤t,i,rs1[i]); if(!((current.is32>>rs1[i])&1)) { - if(rs1[i]) alloc_reg64(¤t,i,rs1[i]); + assert(0); } } else @@ -7905,52 +7117,14 @@ int new_recompile_block(int addr) alloc_reg(¤t,i,rs1[i]); if(!(current.is32>>rs1[i]&1)) { - alloc_reg64(¤t,i,rs1[i]); + assert(0); } } ds=1; //current.isconst=0; break; case FJUMP: - current.isconst=0; - current.wasconst=0; - regs[i].wasconst=0; - if(likely[i]==0) // BC1F/BC1T - { - // TODO: Theoretically we can run out of registers here on x86. - // The delay slot can allocate up to six, and we need to check - // CSREG before executing the delay slot. Possibly we can drop - // the cycle count and then reload it after checking that the - // FPU is in a usable state, or don't do out-of-order execution. - alloc_cc(¤t,i); - dirty_reg(¤t,CCREG); - alloc_reg(¤t,i,FSREG); - alloc_reg(¤t,i,CSREG); - if(itype[i+1]==FCOMP) { - // The delay slot overwrites the branch condition. - // Allocate the branch condition registers instead. - alloc_cc(¤t,i); - dirty_reg(¤t,CCREG); - alloc_reg(¤t,i,CSREG); - alloc_reg(¤t,i,FSREG); - } - else { - ooo[i]=1; - delayslot_alloc(¤t,i+1); - alloc_reg(¤t,i+1,CSREG); - } - } - else - // Don't alloc the delay slot yet because we might not execute it - if(likely[i]) // BC1FL/BC1TL - { - alloc_cc(¤t,i); - dirty_reg(¤t,CCREG); - alloc_reg(¤t,i,CSREG); - alloc_reg(¤t,i,FSREG); - } - ds=1; - current.isconst=0; + assert(0); break; case IMM16: imm16_alloc(¤t,i); @@ -7983,7 +7157,7 @@ int new_recompile_block(int addr) break; case COP1: case COP2: - cop1_alloc(¤t,i); + cop12_alloc(¤t,i); break; case C1LS: c1ls_alloc(¤t,i); @@ -7994,15 +7168,6 @@ int new_recompile_block(int addr) case C2OP: c2op_alloc(¤t,i); break; - case FCONV: - fconv_alloc(¤t,i); - break; - case FLOAT: - float_alloc(¤t,i); - break; - case FCOMP: - fcomp_alloc(¤t,i); - break; case SYSCALL: case HLECALL: case INTCALL: @@ -8013,20 +7178,6 @@ int new_recompile_block(int addr) break; } - // Drop the upper half of registers that have become 32-bit - current.uu|=current.is32&((1LL<>rt1[i])&1) current.uu&=~((1LL<>rt1[i+1])&1) current.uu&=~((1LL<>(r&63))&1) { - regs[i].regmap_entry[hr]=-1; - //regs[i].regmap[hr]=-1; - current.regmap[hr]=-1; - }else - regs[i].regmap_entry[hr]=r; + assert(0); } } } else { @@ -8100,7 +7246,6 @@ int new_recompile_block(int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; branch_regs[i-1].u=branch_unneeded_reg[i-1]&~((1LL<>rt1[i])&1) current.uu&=~((1LL<>rs1[i-1])&(current.is32>>rs2[i-1])&1)) { - if(rs1[i-1]) alloc_reg64(¤t,i-1,rs1[i-1]); - if(rs2[i-1]) alloc_reg64(¤t,i-1,rs2[i-1]); + assert(0); } } memcpy(&branch_regs[i-1],¤t,sizeof(current)); @@ -8179,22 +7318,18 @@ int new_recompile_block(int addr) // The delay slot overwrote the branch condition // Delay slot goes after the test (in order) current.u=branch_unneeded_reg[i-1]&~((1LL<>rt1[i])&1) current.uu&=~((1LL<>rs1[i-1]&1)) { - alloc_reg64(¤t,i-1,rs1[i-1]); + assert(0); } } memcpy(&branch_regs[i-1],¤t,sizeof(current)); @@ -8209,8 +7344,6 @@ int new_recompile_block(int addr) { memcpy(&branch_regs[i-1],¤t,sizeof(current)); branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL<>rt1[i])&1) branch_regs[i-1].uu&=~((1LL<>rt1[i])&1) branch_regs[i-1].uu&=~((1LL<>rt1[i])&1) current.uu&=~((1LL<>rs1[i-1]&1)) { - alloc_reg64(¤t,i-1,rs1[i-1]); + assert(0); } } memcpy(&branch_regs[i-1],¤t,sizeof(current)); @@ -8275,8 +7402,6 @@ int new_recompile_block(int addr) { memcpy(&branch_regs[i-1],¤t,sizeof(current)); branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL<>rt1[i])&1) branch_regs[i-1].uu&=~((1LL<>rt1[i])&1) branch_regs[i-1].uu&=~((1LL<>dep1[i+1])&1)) { - if(dep1[i+1]==(regmap_pre[i][hr]&63)) nr|=1<>dep2[i+1])&1)) { - if(dep1[i+1]==(regs[i].regmap_entry[hr]&63)) nr|=1<>dep1[i])&1)) { - if(dep1[i]==(regmap_pre[i][hr]&63)) nr|=1<>dep2[i])&1)) { - if(dep2[i]==(regmap_pre[i][hr]&63)) nr|=1<0&&!bt[i]&&((regs[i].wasdirty>>hr)&1)) { - if((regmap_pre[i][hr]>0&®map_pre[i][hr]<64&&!((unneeded_reg[i]>>regmap_pre[i][hr])&1)) || - (regmap_pre[i][hr]>64&&!((unneeded_reg_upper[i]>>(regmap_pre[i][hr]&63))&1)) ) { + if((regmap_pre[i][hr]>0&®map_pre[i][hr]<64&&!((unneeded_reg[i]>>regmap_pre[i][hr])&1))) { if(rt1[i-1]==(regmap_pre[i][hr]&63)) nr|=1<0&®s[i].regmap_entry[hr]<64&&!((unneeded_reg[i]>>regs[i].regmap_entry[hr])&1)) || - (regs[i].regmap_entry[hr]>64&&!((unneeded_reg_upper[i]>>(regs[i].regmap_entry[hr]&63))&1)) ) { + if((regs[i].regmap_entry[hr]>0&®s[i].regmap_entry[hr]<64&&!((unneeded_reg[i]>>regs[i].regmap_entry[hr])&1))) { if(rt1[i-1]==(regs[i].regmap_entry[hr]&63)) nr|=1<>2; @@ -8771,15 +7843,7 @@ int new_recompile_block(int addr) { //printf("Test %x -> %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r); if(r<34&&((unneeded_reg[j]>>r)&1)) break; - if(r>63&&((unneeded_reg_upper[j]>>(r&63))&1)) break; - if(r>63) { - // NB This can exclude the case where the upper-half - // register is lower numbered than the lower-half - // register. Not sure if it's worth fixing... - if(get_reg(regs[j].regmap,r&63)<0) break; - if(get_reg(regs[j].regmap_entry,r&63)<0) break; - if(regs[j].is32&(1LL<<(r&63))) break; - } + assert(r < 64); if(regs[j].regmap[hr]==f_regmap[hr]&&(f_regmap[hr]&63) %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r); int k; @@ -9031,8 +8095,7 @@ int new_recompile_block(int addr) } if(itype[i]!=STORE&&itype[i]!=STORELR&&itype[i]!=C1LS&&itype[i]!=SHIFT&& itype[i]!=NOP&&itype[i]!=MOV&&itype[i]!=ALU&&itype[i]!=SHIFTIMM&& - itype[i]!=IMM16&&itype[i]!=LOAD&&itype[i]!=COP1&&itype[i]!=FLOAT&& - itype[i]!=FCONV&&itype[i]!=FCOMP) + itype[i]!=IMM16&&itype[i]!=LOAD&&itype[i]!=COP1) { memcpy(f_regmap,regs[i].regmap,sizeof(f_regmap)); } @@ -9356,7 +8419,6 @@ int new_recompile_block(int addr) /* Pass 8 - Assembly */ linkcount=0;stubcount=0; ds=0;is_delayslot=0; - cop1_usable=0; uint64_t is32_pre=0; u_int dirty_pre=0; void *beginning=start_block(); @@ -9393,7 +8455,7 @@ int new_recompile_block(int addr) if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000)) { wb_valid(regmap_pre[i],regs[i].regmap_entry,dirty_pre,regs[i].wasdirty,is32_pre, - unneeded_reg[i],unneeded_reg_upper[i]); + unneeded_reg[i]); } if((itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)&&!likely[i]) { is32_pre=branch_regs[i].is32; @@ -9406,8 +8468,7 @@ int new_recompile_block(int addr) // write back if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000)) { - wb_invalidate(regmap_pre[i],regs[i].regmap_entry,regs[i].wasdirty,regs[i].was32, - unneeded_reg[i],unneeded_reg_upper[i]); + wb_invalidate(regmap_pre[i],regs[i].regmap_entry,regs[i].wasdirty,regs[i].was32,unneeded_reg[i]); loop_preload(regmap_pre[i],regs[i].regmap_entry); } // branch target entry point @@ -9446,7 +8507,6 @@ int new_recompile_block(int addr) load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,CCREG,CCREG); if(itype[i]==STORE||itype[i]==STORELR||(opcode[i]&0x3b)==0x39||(opcode[i]&0x3b)==0x3a) load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,INVCP,INVCP); - if(bt[i]) cop1_usable=0; // assemble switch(itype[i]) { case ALU: @@ -9477,12 +8537,6 @@ int new_recompile_block(int addr) c2ls_assemble(i,®s[i]);break; case C2OP: c2op_assemble(i,®s[i]);break; - case FCONV: - fconv_assemble(i,®s[i]);break; - case FLOAT: - float_assemble(i,®s[i]);break; - case FCOMP: - fcomp_assemble(i,®s[i]);break; case MULTDIV: multdiv_assemble(i,®s[i]);break; case MOV: @@ -9502,7 +8556,7 @@ int new_recompile_block(int addr) case SJUMP: sjump_assemble(i,®s[i]);ds=1;break; case FJUMP: - fjump_assemble(i,®s[i]);ds=1;break; + assert(0);ds=1;break; case SPAN: pagespan_assemble(i,®s[i]);break; } From ad49de8937ff55b6295a8065d1b942c29726a363 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 2 Nov 2021 17:12:21 +0200 Subject: [PATCH 054/597] drc: remove yet yet more n64 stuff --- libpcsxcore/new_dynarec/assem_arm.c | 110 +-- libpcsxcore/new_dynarec/new_dynarec.c | 1107 +++++-------------------- 2 files changed, 224 insertions(+), 993 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 1d6600246..0e0acdc76 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -418,7 +418,7 @@ static void alloc_reg(struct regstat *cur,int i,signed char reg) // Don't evict the cycle count at entry points, otherwise the entry // stub will have to write it. if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; + if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2; for(j=10;j>=3;j--) { // Alloc preferred register if available @@ -488,12 +488,6 @@ static void alloc_reg(struct regstat *cur,int i,signed char reg) SysPrintf("This shouldn't happen (alloc_reg)");exit(1); } -static void alloc_reg64(struct regstat *cur,int i,signed char reg) -{ - // allocate the lower 32 bits - alloc_reg(cur,i,reg); -} - // Allocate a temporary register. This is done without regard to // dirty status or whether the register we request is on the unneeded list // Note: This will only allocate one register, even if called multiple times @@ -548,7 +542,7 @@ static void alloc_reg_temp(struct regstat *cur,int i,signed char reg) // Don't evict the cycle count at entry points, otherwise the entry // stub will have to write it. if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; + if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2; for(j=10;j>=3;j--) { for(r=1;r<=MAXREG;r++) @@ -744,12 +738,6 @@ static void emit_adcs(int rs1,int rs2,int rt) output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2)); } -static void emit_sbcs(int rs1,int rs2,int rt) -{ - assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2)); -} - static void emit_neg(int rs, int rt) { assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]); @@ -908,12 +896,6 @@ static void emit_or(u_int rs1,u_int rs2,u_int rt) output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2)); } -static void emit_or_and_set_flags(int rs1,int rs2,int rt) -{ - assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2)); -} - static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt) { assert(rs<16); @@ -1264,14 +1246,6 @@ static void emit_cmovb_imm(int imm,int rt) output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval); } -static void emit_cmovs_imm(int imm,int rt) -{ - assem_debug("movmi %s,#%d\n",regname[rt],imm); - u_int armval; - genimm_checked(imm,&armval); - output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval); -} - static void emit_cmovne_reg(int rs,int rt) { assem_debug("movne %s,%s\n",regname[rt],regname[rs]); @@ -1306,40 +1280,6 @@ static void emit_sltiu32(int rs,int imm,int rt) emit_cmovb_imm(1,rt); } -static void emit_slti64_32(int rsh,int rsl,int imm,int rt) -{ - assert(rsh!=rt); - emit_slti32(rsl,imm,rt); - if(imm>=0) - { - emit_test(rsh,rsh); - emit_cmovne_imm(0,rt); - emit_cmovs_imm(1,rt); - } - else - { - emit_cmpimm(rsh,-1); - emit_cmovne_imm(0,rt); - emit_cmovl_imm(1,rt); - } -} - -static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt) -{ - assert(rsh!=rt); - emit_sltiu32(rsl,imm,rt); - if(imm>=0) - { - emit_test(rsh,rsh); - emit_cmovne_imm(0,rt); - } - else - { - emit_cmpimm(rsh,-1); - emit_cmovne_imm(1,rt); - } -} - static void emit_cmp(int rs,int rt) { assem_debug("cmp %s,%s\n",regname[rs],regname[rt]); @@ -1362,22 +1302,6 @@ static void emit_set_nz32(int rs, int rt) emit_cmovne_imm(1,rt); } -static void emit_set_gz64_32(int rsh, int rsl, int rt) -{ - //assem_debug("set_gz64\n"); - emit_set_gz32(rsl,rt); - emit_test(rsh,rsh); - emit_cmovne_imm(1,rt); - emit_cmovs_imm(0,rt); -} - -static void emit_set_nz64_32(int rsh, int rsl, int rt) -{ - //assem_debug("set_nz64\n"); - emit_or_and_set_flags(rsh,rsl,rt); - emit_cmovne_imm(1,rt); -} - static void emit_set_if_less32(int rs1, int rs2, int rt) { //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); @@ -1396,28 +1320,6 @@ static void emit_set_if_carry32(int rs1, int rs2, int rt) emit_cmovb_imm(1,rt); } -static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt) -{ - //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]); - assert(u1!=rt); - assert(u2!=rt); - emit_cmp(l1,l2); - emit_movimm(0,rt); - emit_sbcs(u1,u2,HOST_TEMPREG); - emit_cmovl_imm(1,rt); -} - -static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt) -{ - //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]); - assert(u1!=rt); - assert(u2!=rt); - emit_cmp(l1,l2); - emit_movimm(0,rt); - emit_sbcs(u1,u2,HOST_TEMPREG); - emit_cmovb_imm(1,rt); -} - #ifdef DRC_DBG extern void gen_interupt(); extern void do_insn_cmp(); @@ -2576,11 +2478,11 @@ static void do_cop1stub(int n) struct regstat *i_regs=(struct regstat *)stubs[n].c; int ds=stubs[n].d; if(!ds) { - load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i); + load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i); //if(i_regs!=®s[i]) printf("oops: regs[i]=%x i_regs=%x",(int)®s[i],(int)i_regs); } //else {printf("fp exception in delay slot\n");} - wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty); + wb_dirtys(i_regs->regmap_entry,i_regs->wasdirty); if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); emit_movimm(start+(i-ds)*4,EAX); // Get PC emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... @@ -2981,7 +2883,7 @@ static void cop0_assemble(int i,struct regstat *i_regs) signed char s=get_reg(i_regs->regmap,rs1[i]); char copr=(source[i]>>11)&0x1f; assert(s>=0); - wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32); + wb_register(rs1[i],i_regs->regmap,i_regs->dirty); if(copr==9||copr==11||copr==12||copr==13) { emit_readword(&last_count,HOST_TEMPREG); emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc @@ -3546,7 +3448,7 @@ static void do_miniht_insert(u_int return_address,int rt,int temp) { #endif } -static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u) +static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t u) { //if(dirty_pre==dirty) return; int hr,reg; diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index d19fcad0d..78a342d26 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -83,8 +83,6 @@ struct regstat { signed char regmap_entry[HOST_REGS]; signed char regmap[HOST_REGS]; - uint64_t was32; - uint64_t is32; uint64_t wasdirty; uint64_t dirty; uint64_t u; @@ -243,7 +241,7 @@ struct link_entry #define COP0 15 // Coprocessor 0 #define COP1 16 // Coprocessor 1 #define C1LS 17 // Coprocessor 1 load/store -#define FJUMP 18 // Conditional branch (floating point) +//#define FJUMP 18 // Conditional branch (floating point) //#define FLOAT 19 // Floating point unit //#define FCONV 20 // Convert integer to float //#define FCOMP 21 // Floating point compare (sets FSREG) @@ -282,13 +280,13 @@ void jump_intcall(); void new_dyna_leave(); // Needed by assembler -static void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32); -static void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty); -static void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr); +static void wb_register(signed char r,signed char regmap[],uint64_t dirty); +static void wb_dirtys(signed char i_regmap[],uint64_t i_dirty); +static void wb_needed_dirtys(signed char i_regmap[],uint64_t i_dirty,int addr); static void load_all_regs(signed char i_regmap[]); static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]); static void load_regs_entry(int t); -static void load_all_consts(signed char regmap[],int is32,u_int dirty,int i); +static void load_all_consts(signed char regmap[],u_int dirty,int i); static int verify_dirty(u_int *ptr); static int get_final_value(int hr, int i, int *value); @@ -515,23 +513,6 @@ void dirty_reg(struct regstat *cur,signed char reg) } } -// If we dirty the lower half of a 64 bit register which is now being -// sign-extended, we need to dump the upper half. -// Note: Do this only after completion of the instruction, because -// some instructions may need to read the full 64-bit value even if -// overwriting it (eg SLTI, DSRA32). -static void flush_dirty_uppers(struct regstat *cur) -{ - int hr,reg; - for (hr=0;hrdirty>>hr)&1) { - reg=cur->regmap[hr]; - if(reg>=64) - if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1; - } - } -} - void set_const(struct regstat *cur,signed char reg,uint64_t value) { int hr; @@ -621,7 +602,7 @@ void lsn(u_char hsn[], int i, int *preferred_reg) hsn[INVCP]=j; } #endif - if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP)) + if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP)) { hsn[CCREG]=j; b=j; @@ -645,7 +626,7 @@ void lsn(u_char hsn[], int i, int *preferred_reg) // TODO: preferred register based on backward branch } // Delay slot should preferably not overwrite branch conditions or cycle count - if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) { + if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP)) { if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1; if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1; hsn[CCREG]=1; @@ -707,7 +688,7 @@ int needed_again(int r, int i) if(rs1[i+j]==r) rn=j; if(rs2[i+j]==r) rn=j; if((unneeded_reg[i+j]>>r)&1) rn=10; - if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP)) + if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP)) { b=j; } @@ -756,14 +737,14 @@ int loop_reg(int i, int r, int hr) } k=0; if(i>0){ - if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP) + if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP) k--; } for(;k>r)&1) return hr; - if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP||itype[i+k]==FJUMP)) + if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP)) { if(ba[i+k]>=start && ba[i+k]<(start+i*4)) { @@ -1168,24 +1149,18 @@ void clean_blocks(u_int page) } } - -void mov_alloc(struct regstat *current,int i) +static void mov_alloc(struct regstat *current,int i) { // Note: Don't need to actually alloc the source registers - if((~current->is32>>rs1[i])&1) { - //alloc_reg64(current,i,rs1[i]); - assert(0); - } else { - //alloc_reg(current,i,rs1[i]); - alloc_reg(current,i,rt1[i]); - current->is32|=(1LL<is32|=1LL<is32|=1LL<=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU if(rt1[i]) { @@ -1265,22 +1238,13 @@ void alu_alloc(struct regstat *current,int i) } alloc_reg(current,i,rt1[i]); } - current->is32|=1LL<is32>>rs1[i])&(current->is32>>rs2[i])&1)) - { - alloc_reg64(current,i,rs1[i]); - alloc_reg64(current,i,rs2[i]); - alloc_reg(current,i,rt1[i]); - } else { - alloc_reg(current,i,rs1[i]); - alloc_reg(current,i,rs2[i]); - alloc_reg(current,i,rt1[i]); - } + alloc_reg(current,i,rs1[i]); + alloc_reg(current,i,rs2[i]); + alloc_reg(current,i,rt1[i]); } - current->is32|=1LL<=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR if(rt1[i]) { @@ -1294,15 +1258,6 @@ void alu_alloc(struct regstat *current,int i) if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]); } alloc_reg(current,i,rt1[i]); - if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1)) - { - if(get_reg(current->regmap,rt1[i]|64)>=0) { - assert(0); - } - current->is32&=~(1LL<is32|=1LL<=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU @@ -1314,7 +1269,7 @@ void alu_alloc(struct regstat *current,int i) dirty_reg(current,rt1[i]); } -void imm16_alloc(struct regstat *current,int i) +static void imm16_alloc(struct regstat *current,int i) { if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]); else lt1[i]=rs1[i]; @@ -1323,20 +1278,10 @@ void imm16_alloc(struct regstat *current,int i) assert(0); } else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU - if((~current->is32>>rs1[i])&1) alloc_reg64(current,i,rs1[i]); - current->is32|=1LL<=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI - if(((~current->is32>>rs1[i])&1)&&opcode[i]>0x0c) { - if(rs1[i]!=rt1[i]) { - if(needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]); - alloc_reg64(current,i,rt1[i]); - current->is32&=~(1LL<is32|=1LL<is32|=1LL<is32|=1LL<regmap,rt1[i])>=0); if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD { - current->is32&=~(1LL<is32&=~(1LL<is32|=1LL<is32|=1LL<is32|=1LL<is32|=1LL<is32|=1LL<is32|=1LL<is32|=1LL<is32>>rs1[i])&(current->is32>>rs2[i])&1)) - { - assert(0); - } } else if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL { if(rs1[i]) alloc_reg(current,i,rs1[i]); - if(!((current->is32>>rs1[i])&1)) - { - assert(0); - } } //else ... } @@ -1717,18 +1636,15 @@ static void add_stub_r(enum stub_type type, void *addr, void *retaddr, } // Write out a single register -void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32) +static void wb_register(signed char r,signed char regmap[],uint64_t dirty) { int hr; for(hr=0;hr>hr)&1) { - if(regmap[hr]<64) { - emit_storereg(r,hr); - }else{ - emit_storereg(r|64,hr); - } + assert(regmap[hr]<64); + emit_storereg(r,hr); } } } @@ -1782,42 +1698,8 @@ void alu_assemble(int i,struct regstat *i_regs) } if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU if(rt1[i]) { - signed char s1l,s1h,s2l,s2h,t; - if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1)) + signed char s1l,s2l,t; { - t=get_reg(i_regs->regmap,rt1[i]); - //assert(t>=0); - if(t>=0) { - s1l=get_reg(i_regs->regmap,rs1[i]); - s1h=get_reg(i_regs->regmap,rs1[i]|64); - s2l=get_reg(i_regs->regmap,rs2[i]); - s2h=get_reg(i_regs->regmap,rs2[i]|64); - if(rs2[i]==0) // rx=0); - if(opcode2[i]==0x2a) // SLT - emit_shrimm(s1h,31,t); - else // SLTU (unsigned can not be less than zero) - emit_zeroreg(t); - } - else if(rs1[i]==0) // r0=0); - if(opcode2[i]==0x2a) // SLT - emit_set_gz64_32(s2h,s2l,t); - else // SLTU (set if not zero) - emit_set_nz64_32(s2h,s2l,t); - } - else { - assert(s1l>=0);assert(s1h>=0); - assert(s2l>=0);assert(s2h>=0); - if(opcode2[i]==0x2a) // SLT - emit_set_if_less64_32(s1h,s1l,s2h,s2l,t); - else // SLTU - emit_set_if_carry64_32(s1h,s1l,s2h,s2l,t); - } - } - } else { t=get_reg(i_regs->regmap,rt1[i]); //assert(t>=0); if(t>=0) { @@ -1852,101 +1734,9 @@ void alu_assemble(int i,struct regstat *i_regs) } if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR if(rt1[i]) { - signed char s1l,s1h,s2l,s2h,th,tl; + signed char s1l,s2l,tl; tl=get_reg(i_regs->regmap,rt1[i]); - th=get_reg(i_regs->regmap,rt1[i]|64); - if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1)&&th>=0) - { - assert(tl>=0); - if(tl>=0) { - s1l=get_reg(i_regs->regmap,rs1[i]); - s1h=get_reg(i_regs->regmap,rs1[i]|64); - s2l=get_reg(i_regs->regmap,rs2[i]); - s2h=get_reg(i_regs->regmap,rs2[i]|64); - if(rs1[i]&&rs2[i]) { - assert(s1l>=0);assert(s1h>=0); - assert(s2l>=0);assert(s2h>=0); - if(opcode2[i]==0x24) { // AND - emit_and(s1l,s2l,tl); - emit_and(s1h,s2h,th); - } else - if(opcode2[i]==0x25) { // OR - emit_or(s1l,s2l,tl); - emit_or(s1h,s2h,th); - } else - if(opcode2[i]==0x26) { // XOR - emit_xor(s1l,s2l,tl); - emit_xor(s1h,s2h,th); - } else - if(opcode2[i]==0x27) { // NOR - emit_or(s1l,s2l,tl); - emit_or(s1h,s2h,th); - emit_not(tl,tl); - emit_not(th,th); - } - } - else - { - if(opcode2[i]==0x24) { // AND - emit_zeroreg(tl); - emit_zeroreg(th); - } else - if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR - if(rs1[i]){ - if(s1l>=0) emit_mov(s1l,tl); - else emit_loadreg(rs1[i],tl); - if(s1h>=0) emit_mov(s1h,th); - else emit_loadreg(rs1[i]|64,th); - } - else - if(rs2[i]){ - if(s2l>=0) emit_mov(s2l,tl); - else emit_loadreg(rs2[i],tl); - if(s2h>=0) emit_mov(s2h,th); - else emit_loadreg(rs2[i]|64,th); - } - else{ - emit_zeroreg(tl); - emit_zeroreg(th); - } - } else - if(opcode2[i]==0x27) { // NOR - if(rs1[i]){ - if(s1l>=0) emit_not(s1l,tl); - else{ - emit_loadreg(rs1[i],tl); - emit_not(tl,tl); - } - if(s1h>=0) emit_not(s1h,th); - else{ - emit_loadreg(rs1[i]|64,th); - emit_not(th,th); - } - } - else - if(rs2[i]){ - if(s2l>=0) emit_not(s2l,tl); - else{ - emit_loadreg(rs2[i],tl); - emit_not(tl,tl); - } - if(s2h>=0) emit_not(s2h,th); - else{ - emit_loadreg(rs2[i]|64,th); - emit_not(th,th); - } - } - else { - emit_movimm(-1,tl); - emit_movimm(-1,th); - } - } - } - } - } - else { - // 32 bit if(tl>=0) { s1l=get_reg(i_regs->regmap,rs1[i]); s2l=get_reg(i_regs->regmap,rs2[i]); @@ -2078,15 +1868,12 @@ void imm16_assemble(int i,struct regstat *i_regs) else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU if(rt1[i]) { //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug - signed char sh,sl,t; + signed char sl,t; t=get_reg(i_regs->regmap,rt1[i]); - sh=get_reg(i_regs->regmap,rs1[i]|64); sl=get_reg(i_regs->regmap,rs1[i]); //assert(t>=0); if(t>=0) { if(rs1[i]>0) { - if(sh<0) assert((i_regs->was32>>rs1[i])&1); - if(sh<0||((i_regs->was32>>rs1[i])&1)) { if(opcode[i]==0x0a) { // SLTI if(sl<0) { if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t); @@ -2103,13 +1890,6 @@ void imm16_assemble(int i,struct regstat *i_regs) emit_sltiu32(sl,imm[i],t); } } - }else{ // 64-bit - assert(sl>=0); - if(opcode[i]==0x0a) // SLTI - emit_slti64_32(sh,sl,imm[i],t); - else // SLTIU - emit_sltiu64_32(sh,sl,imm[i],t); - } }else{ // SLTI(U) with r0 is just stupid, // nonetheless examples can be found @@ -2527,8 +2307,8 @@ void store_assemble(int i,struct regstat *i_regs) SysPrintf("write to %08x hits block %08x, pc=%08x\n",addr_val,start,start+i*4); assert(i_regs->regmap==regs[i].regmap); // not delay slot if(i_regs->regmap==regs[i].regmap) { - load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i); - wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty); + load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i); + wb_dirtys(regs[i].regmap_entry,regs[i].wasdirty); emit_movimm(start+i*4+4,0); emit_writeword(0,&pcaddr); emit_jmp(do_interrupt); @@ -2913,14 +2693,13 @@ void ds_assemble(int i,struct regstat *i_regs) case RJUMP: case CJUMP: case SJUMP: - case FJUMP: SysPrintf("Jump in the delay slot. This is probably a bug.\n"); } is_delayslot=0; } // Is the branch target a valid internal jump? -int internal_branch(uint64_t i_is32,int addr) +static int internal_branch(int addr) { if(addr&1) return 0; // Indirect (register) jump if(addr>=start && addr=0) { - if(entry[hr]!=regmap[hr]) { - if(regmap[hr]-64==rs1||regmap[hr]-64==rs2) - { - assert(regmap[hr]!=64); - if((is32>>(regmap[hr]&63))&1) { - int lr=get_reg(regmap,regmap[hr]-64); - if(lr>=0) - emit_sarimm(lr,31,hr); - else - emit_loadreg(regmap[hr],hr); - } - else - { - emit_loadreg(regmap[hr],hr); - } - } - } - } - } } // Load registers prior to the start of a loop @@ -3187,7 +2944,7 @@ static int get_final_value(int hr, int i, int *value) } // Load registers with known constants -void load_consts(signed char pre[],signed char regmap[],int is32,int i) +static void load_consts(signed char pre[],signed char regmap[],int i) { int hr,hr2; // propagate loaded constant flags @@ -3207,7 +2964,8 @@ void load_consts(signed char pre[],signed char regmap[],int is32,int i) if(hr!=EXCLUDE_REG&®map[hr]>=0) { //if(entry[hr]!=regmap[hr]) { if(!((regs[i].loadedconst>>hr)&1)) { - if(((regs[i].isconst>>hr)&1)&®map[hr]<64&®map[hr]>0) { + assert(regmap[hr]<64); + if(((regs[i].isconst>>hr)&1)&®map[hr]>0) { int value,similar=0; if(get_final_value(hr,i,&value)) { // see if some other register has similar value @@ -3238,41 +2996,16 @@ void load_consts(signed char pre[],signed char regmap[],int is32,int i) } } } - // Load 64-bit regs - for(hr=0;hr=0) { - //if(entry[hr]!=regmap[hr]) { - if(i==0||!((regs[i-1].isconst>>hr)&1)||pre[hr]!=regmap[hr]||bt[i]) { - if(((regs[i].isconst>>hr)&1)&®map[hr]>64) { - if((is32>>(regmap[hr]&63))&1) { - int lr=get_reg(regmap,regmap[hr]-64); - assert(lr>=0); - emit_sarimm(lr,31,hr); - } - else - { - int value; - if(get_final_value(hr,i,&value)) { - if(value==0) { - emit_zeroreg(hr); - } - else { - emit_movimm(value,hr); - } - } - } - } - } - } - } } -void load_all_consts(signed char regmap[],int is32,u_int dirty,int i) + +void load_all_consts(signed char regmap[], u_int dirty, int i) { int hr; // Load 32-bit regs for(hr=0;hr=0&&((dirty>>hr)&1)) { - if(((regs[i].isconst>>hr)&1)&®map[hr]<64&®map[hr]>0) { + assert(regmap[hr] < 64); + if(((regs[i].isconst>>hr)&1)&®map[hr]>0) { int value=constmap[i][hr]; if(value==0) { emit_zeroreg(hr); @@ -3283,32 +3016,10 @@ void load_all_consts(signed char regmap[],int is32,u_int dirty,int i) } } } - // Load 64-bit regs - for(hr=0;hr=0&&((dirty>>hr)&1)) { - if(((regs[i].isconst>>hr)&1)&®map[hr]>64) { - if((is32>>(regmap[hr]&63))&1) { - int lr=get_reg(regmap,regmap[hr]-64); - assert(lr>=0); - emit_sarimm(lr,31,hr); - } - else - { - int value=constmap[i][hr]; - if(value==0) { - emit_zeroreg(hr); - } - else { - emit_movimm(value,hr); - } - } - } - } - } } // Write out all dirty registers (except cycle count) -void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty) +static void wb_dirtys(signed char i_regmap[],uint64_t i_dirty) { int hr; for(hr=0;hr>2; @@ -3334,7 +3046,7 @@ void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,in if(hr!=EXCLUDE_REG) { if(i_regmap[hr]>0) { if(i_regmap[hr]!=CCREG) { - if(i_regmap[hr]==regs[t].regmap_entry[hr] && ((regs[t].dirty>>hr)&1) && !(((i_is32&~regs[t].was32)>>(i_regmap[hr]&63))&1)) { + if(i_regmap[hr]==regs[t].regmap_entry[hr] && ((regs[t].dirty>>hr)&1)) { if((i_dirty>>hr)&1) { assert(i_regmap[hr]<64); emit_storereg(i_regmap[hr],hr); @@ -3405,39 +3117,19 @@ void load_regs_entry(int t) } } } - // Load 64-bit regs - for(hr=0;hr=64&®s[t].regmap_entry[hr]>(regs[t].regmap_entry[hr]&63))&1) { - int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64); - if(lr<0) { - emit_loadreg(regs[t].regmap_entry[hr],hr); - } - else - { - emit_sarimm(lr,31,hr); - } - } - else - { - emit_loadreg(regs[t].regmap_entry[hr],hr); - } - } - } } // Store dirty registers prior to branch -void store_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) +void store_regs_bt(signed char i_regmap[],uint64_t i_dirty,int addr) { - if(internal_branch(i_is32,addr)) + if(internal_branch(addr)) { int t=(addr-start)>>2; int hr; for(hr=0;hr0 && i_regmap[hr]!=CCREG) { - if(i_regmap[hr]!=regs[t].regmap_entry[hr] || !((regs[t].dirty>>hr)&1) || (((i_is32&~regs[t].was32)>>(i_regmap[hr]&63))&1)) { + if(i_regmap[hr]!=regs[t].regmap_entry[hr] || !((regs[t].dirty>>hr)&1)) { if((i_dirty>>hr)&1) { assert(i_regmap[hr]<64); if(!((unneeded_reg[t]>>i_regmap[hr])&1)) @@ -3451,15 +3143,15 @@ void store_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int a else { // Branch out of this block, write out all dirty regs - wb_dirtys(i_regmap,i_is32,i_dirty); + wb_dirtys(i_regmap,i_dirty); } } // Load all needed registers for branch target -void load_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) +static void load_regs_bt(signed char i_regmap[],uint64_t i_dirty,int addr) { //if(addr>=start && addr<(start+slen*4)) - if(internal_branch(i_is32,addr)) + if(internal_branch(addr)) { int t=(addr-start)>>2; int hr; @@ -3484,37 +3176,10 @@ void load_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int ad } } } - //Load 64-bit regs - for(hr=0;hr=64&®s[t].regmap_entry[hr]>(regs[t].regmap_entry[hr]&63))&1) { - int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64); - if(lr<0) { - emit_loadreg(regs[t].regmap_entry[hr],hr); - } - else - { - emit_sarimm(lr,31,hr); - } - } - else - { - emit_loadreg(regs[t].regmap_entry[hr],hr); - } - } - else if((i_is32>>(regs[t].regmap_entry[hr]&63))&1) { - int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64); - assert(lr>=0); - emit_sarimm(lr,31,hr); - } - } - } } } -int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) +static int match_bt(signed char i_regmap[],uint64_t i_dirty,int addr) { if(addr>=start && addr0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0; + //if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP)) return 0; // Delay slots require additional processing, so do not match if(is_ds[t]) return 0; } @@ -3622,11 +3287,11 @@ void ds_assemble_entry(int i) assem_debug("<->\n"); drc_dbg_emit_do_cmp(t); if(regs[t].regmap_entry[HOST_CCREG]==CCREG&®s[t].regmap[HOST_CCREG]!=CCREG) - wb_register(CCREG,regs[t].regmap_entry,regs[t].wasdirty,regs[t].was32); - load_regs(regs[t].regmap_entry,regs[t].regmap,regs[t].was32,rs1[t],rs2[t]); + wb_register(CCREG,regs[t].regmap_entry,regs[t].wasdirty); + load_regs(regs[t].regmap_entry,regs[t].regmap,rs1[t],rs2[t]); address_generation(t,®s[t],regs[t].regmap_entry); if(itype[t]==STORE||itype[t]==STORELR||(opcode[t]&0x3b)==0x39||(opcode[t]&0x3b)==0x3a) - load_regs(regs[t].regmap_entry,regs[t].regmap,regs[t].was32,INVCP,INVCP); + load_regs(regs[t].regmap_entry,regs[t].regmap,INVCP,INVCP); is_delayslot=0; switch(itype[t]) { case ALU: @@ -3669,17 +3334,16 @@ void ds_assemble_entry(int i) case RJUMP: case CJUMP: case SJUMP: - case FJUMP: SysPrintf("Jump in the delay slot. This is probably a bug.\n"); } - store_regs_bt(regs[t].regmap,regs[t].is32,regs[t].dirty,ba[i]+4); - load_regs_bt(regs[t].regmap,regs[t].is32,regs[t].dirty,ba[i]+4); - if(internal_branch(regs[t].is32,ba[i]+4)) + store_regs_bt(regs[t].regmap,regs[t].dirty,ba[i]+4); + load_regs_bt(regs[t].regmap,regs[t].dirty,ba[i]+4); + if(internal_branch(ba[i]+4)) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - assert(internal_branch(regs[t].is32,ba[i]+4)); - add_to_linker(out,ba[i]+4,internal_branch(regs[t].is32,ba[i]+4)); + assert(internal_branch(ba[i]+4)); + add_to_linker(out,ba[i]+4,internal_branch(ba[i]+4)); emit_jmp(0); } @@ -3694,7 +3358,7 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) *adj=0; } //if(ba[i]>=start && ba[i]<(start+slen*4)) - if(internal_branch(branch_regs[i].is32,ba[i])) + if(internal_branch(ba[i])) { t=(ba[i]-start)>>2; if(is_ds[t]) *adj=-1; // Branch into delay slot adds an extra cycle @@ -3743,14 +3407,14 @@ static void do_ccstub(int n) int i=stubs[n].b; if(stubs[n].d==NULLDS) { // Delay slot instruction is nullified ("likely" branch) - wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty); + wb_dirtys(regs[i].regmap,regs[i].dirty); } else if(stubs[n].d!=TAKEN) { - wb_dirtys(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty); + wb_dirtys(branch_regs[i].regmap,branch_regs[i].dirty); } else { - if(internal_branch(branch_regs[i].is32,ba[i])) - wb_needed_dirtys(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); + if(internal_branch(ba[i])) + wb_needed_dirtys(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); } if(stubs[n].c!=-1) { @@ -3761,36 +3425,31 @@ static void do_ccstub(int n) else { // Return address depends on which way the branch goes - if(itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) + if(itype[i]==CJUMP||itype[i]==SJUMP) { int s1l=get_reg(branch_regs[i].regmap,rs1[i]); - int s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); int s2l=get_reg(branch_regs[i].regmap,rs2[i]); - int s2h=get_reg(branch_regs[i].regmap,rs2[i]|64); if(rs1[i]==0) { - s1l=s2l;s1h=s2h; - s2l=s2h=-1; + s1l=s2l; + s2l=-1; } else if(rs2[i]==0) { - s2l=s2h=-1; - } - if((branch_regs[i].is32>>rs1[i])&(branch_regs[i].is32>>rs2[i])&1) { - s1h=s2h=-1; + s2l=-1; } assert(s1l>=0); #ifdef DESTRUCTIVE_WRITEBACK if(rs1[i]) { - if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs1[i])&1) + if((branch_regs[i].dirty>>s1l)&&1) emit_loadreg(rs1[i],s1l); } else { - if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs2[i])&1) + if((branch_regs[i].dirty>>s1l)&1) emit_loadreg(rs2[i],s1l); } if(s2l>=0) - if((branch_regs[i].dirty>>s2l)&(branch_regs[i].is32>>rs2[i])&1) + if((branch_regs[i].dirty>>s2l)&1) emit_loadreg(rs2[i],s2l); #endif int hr=0; @@ -3832,46 +3491,28 @@ static void do_ccstub(int n) if((opcode[i]&0x2f)==4) // BEQ { #ifdef HAVE_CMOV_IMM - if(s1h<0) { - if(s2l>=0) emit_cmp(s1l,s2l); - else emit_test(s1l,s1l); - emit_cmov2imm_e_ne_compact(ba[i],start+i*4+8,addr); - } - else + if(s2l>=0) emit_cmp(s1l,s2l); + else emit_test(s1l,s1l); + emit_cmov2imm_e_ne_compact(ba[i],start+i*4+8,addr); + #else + emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt); + if(s2l>=0) emit_cmp(s1l,s2l); + else emit_test(s1l,s1l); + emit_cmovne_reg(alt,addr); #endif - { - emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt); - if(s1h>=0) { - if(s2h>=0) emit_cmp(s1h,s2h); - else emit_test(s1h,s1h); - emit_cmovne_reg(alt,addr); - } - if(s2l>=0) emit_cmp(s1l,s2l); - else emit_test(s1l,s1l); - emit_cmovne_reg(alt,addr); - } } if((opcode[i]&0x2f)==5) // BNE { #ifdef HAVE_CMOV_IMM - if(s1h<0) { - if(s2l>=0) emit_cmp(s1l,s2l); - else emit_test(s1l,s1l); - emit_cmov2imm_e_ne_compact(start+i*4+8,ba[i],addr); - } - else + if(s2l>=0) emit_cmp(s1l,s2l); + else emit_test(s1l,s1l); + emit_cmov2imm_e_ne_compact(start+i*4+8,ba[i],addr); + #else + emit_mov2imm_compact(start+i*4+8,addr,ba[i],alt); + if(s2l>=0) emit_cmp(s1l,s2l); + else emit_test(s1l,s1l); + emit_cmovne_reg(alt,addr); #endif - { - emit_mov2imm_compact(start+i*4+8,addr,ba[i],alt); - if(s1h>=0) { - if(s2h>=0) emit_cmp(s1h,s2h); - else emit_test(s1h,s1h); - emit_cmovne_reg(alt,addr); - } - if(s2l>=0) emit_cmp(s1l,s2l); - else emit_test(s1l,s1l); - emit_cmovne_reg(alt,addr); - } } if((opcode[i]&0x2f)==6) // BLEZ { @@ -3879,13 +3520,7 @@ static void do_ccstub(int n) //emit_movimm(start+i*4+8,addr); emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr); emit_cmpimm(s1l,1); - if(s1h>=0) emit_mov(addr,ntaddr); emit_cmovl_reg(alt,addr); - if(s1h>=0) { - emit_test(s1h,s1h); - emit_cmovne_reg(ntaddr,addr); - emit_cmovs_reg(alt,addr); - } } if((opcode[i]&0x2f)==7) // BGTZ { @@ -3893,21 +3528,14 @@ static void do_ccstub(int n) //emit_movimm(start+i*4+8,ntaddr); emit_mov2imm_compact(ba[i],addr,start+i*4+8,ntaddr); emit_cmpimm(s1l,1); - if(s1h>=0) emit_mov(addr,alt); emit_cmovl_reg(ntaddr,addr); - if(s1h>=0) { - emit_test(s1h,s1h); - emit_cmovne_reg(alt,addr); - emit_cmovs_reg(ntaddr,addr); - } } if((opcode[i]==1)&&(opcode2[i]&0x2D)==0) // BLTZ { //emit_movimm(ba[i],alt); //emit_movimm(start+i*4+8,addr); emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr); - if(s1h>=0) emit_test(s1h,s1h); - else emit_test(s1l,s1l); + emit_test(s1l,s1l); emit_cmovs_reg(alt,addr); } if((opcode[i]==1)&&(opcode2[i]&0x2D)==1) // BGEZ @@ -3915,8 +3543,7 @@ static void do_ccstub(int n) //emit_movimm(ba[i],addr); //emit_movimm(start+i*4+8,alt); emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt); - if(s1h>=0) emit_test(s1h,s1h); - else emit_test(s1l,s1l); + emit_test(s1l,s1l); emit_cmovs_reg(alt,addr); } if(opcode[i]==0x11 && opcode2[i]==0x08 ) { @@ -3956,7 +3583,7 @@ static void do_ccstub(int n) emit_call(cc_interrupt); if(stubs[n].a) emit_addimm(HOST_CCREG,-CLOCK_ADJUST((signed int)stubs[n].a),HOST_CCREG); if(stubs[n].d==TAKEN) { - if(internal_branch(branch_regs[i].is32,ba[i])) + if(internal_branch(ba[i])) load_needed_regs(branch_regs[i].regmap,regs[(ba[i]-start)>>2].regmap_entry); else if(itype[i]==RJUMP) { if(get_reg(branch_regs[i].regmap,RTEMP)>=0) @@ -3996,7 +3623,7 @@ static void ujump_assemble_write_ra(int i) return_address=start+i*4+8; if(rt>=0) { #ifdef USE_MINI_HT - if(internal_branch(branch_regs[i].is32,return_address)&&rt1[i+1]!=31) { + if(internal_branch(return_address)&&rt1[i+1]!=31) { int temp=-1; // note: must be ds-safe #ifdef HOST_TEMPREG temp=HOST_TEMPREG; @@ -4043,29 +3670,29 @@ void ujump_assemble(int i,struct regstat *i_regs) ds_assemble(i+1,i_regs); uint64_t bc_unneeded=branch_regs[i].u; bc_unneeded|=1|(1LL<=0) emit_prefetchreg(temp); #endif do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); - if(internal_branch(branch_regs[i].is32,ba[i])) + load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); + if(internal_branch(ba[i])) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if(internal_branch(branch_regs[i].is32,ba[i])&&is_ds[(ba[i]-start)>>2]) { + if(internal_branch(ba[i])&&is_ds[(ba[i]-start)>>2]) { ds_assemble_entry(i); } else { - add_to_linker(out,ba[i],internal_branch(branch_regs[i].is32,ba[i])); + add_to_linker(out,ba[i],internal_branch(ba[i])); emit_jmp(0); } } @@ -4131,8 +3758,8 @@ void rjump_assemble(int i,struct regstat *i_regs) uint64_t bc_unneeded=branch_regs[i].u; bc_unneeded|=1|(1LL<>rs)&(branch_regs[i].is32>>rs1[i])&1) { + if((branch_regs[i].dirty>>rs)&1) { if(rs1[i]!=rt1[i+1]&&rs1[i]!=rt2[i+1]) { emit_loadreg(rs1[i],rs); } @@ -4173,7 +3800,7 @@ void rjump_assemble(int i,struct regstat *i_regs) emit_jmp(0); else emit_jns(0); - //load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1); + //load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,-1); #ifdef USE_MINI_HT if(rs1[i]==31) { do_miniht_jump(rs,rh,ht); @@ -4193,13 +3820,12 @@ void cjump_assemble(int i,struct regstat *i_regs) signed char *i_regmap=i_regs->regmap; int cc; int match; - match=match_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); + match=match_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); assem_debug("match=%d\n",match); - int s1h,s1l,s2h,s2l; + int s1l,s2l; int unconditional=0,nop=0; - int only32=0; int invert=0; - int internal=internal_branch(branch_regs[i].is32,ba[i]); + int internal=internal_branch(ba[i]); if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); if(!match) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -4208,15 +3834,11 @@ void cjump_assemble(int i,struct regstat *i_regs) if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); - s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); s2l=get_reg(branch_regs[i].regmap,rs2[i]); - s2h=get_reg(branch_regs[i].regmap,rs2[i]|64); } else { s1l=get_reg(i_regmap,rs1[i]); - s1h=get_reg(i_regmap,rs1[i]|64); s2l=get_reg(i_regmap,rs2[i]); - s2h=get_reg(i_regmap,rs2[i]|64); } if(rs1[i]==0&&rs2[i]==0) { @@ -4229,17 +3851,12 @@ void cjump_assemble(int i,struct regstat *i_regs) } else if(rs1[i]==0) { - s1l=s2l;s1h=s2h; - s2l=s2h=-1; - only32=(regs[i].was32>>rs2[i])&1; + s1l=s2l; + s2l=-1; } else if(rs2[i]==0) { - s2l=s2h=-1; - only32=(regs[i].was32>>rs1[i])&1; - } - else { - only32=(regs[i].was32>>rs1[i])&(regs[i].was32>>rs2[i])&1; + s2l=-1; } if(ooo[i]) { @@ -4251,20 +3868,20 @@ void cjump_assemble(int i,struct regstat *i_regs) uint64_t bc_unneeded=branch_regs[i].u; bc_unneeded&=~((1LL<>2 || source[i+1]!=0) { if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); + load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); else @@ -4291,43 +3908,6 @@ void cjump_assemble(int i,struct regstat *i_regs) void *taken = NULL, *nottaken = NULL, *nottaken1 = NULL; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); - if(!only32) - { - assert(s1h>=0); - if(opcode[i]==4) // BEQ - { - if(s2h>=0) emit_cmp(s1h,s2h); - else emit_test(s1h,s1h); - nottaken1=out; - emit_jne((void *)1l); - } - if(opcode[i]==5) // BNE - { - if(s2h>=0) emit_cmp(s1h,s2h); - else emit_test(s1h,s1h); - if(invert) taken=out; - else add_to_linker(out,ba[i],internal); - emit_jne(0); - } - if(opcode[i]==6) // BLEZ - { - emit_test(s1h,s1h); - if(invert) taken=out; - else add_to_linker(out,ba[i],internal); - emit_js(0); - nottaken1=out; - emit_jne((void *)1l); - } - if(opcode[i]==7) // BGTZ - { - emit_test(s1h,s1h); - nottaken1=out; - emit_js(1); - if(invert) taken=out; - else add_to_linker(out,ba[i],internal); - emit_jne(0); - } - } // if(!only32) //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); @@ -4393,8 +3973,8 @@ void cjump_assemble(int i,struct regstat *i_regs) #endif { if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); - store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); + store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); + load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); else @@ -4424,41 +4004,6 @@ void cjump_assemble(int i,struct regstat *i_regs) //printf("IOE\n"); void *taken = NULL, *nottaken = NULL, *nottaken1 = NULL; if(!unconditional&&!nop) { - if(!only32) - { - assert(s1h>=0); - if((opcode[i]&0x2f)==4) // BEQ - { - if(s2h>=0) emit_cmp(s1h,s2h); - else emit_test(s1h,s1h); - nottaken1=out; - emit_jne((void *)2l); - } - if((opcode[i]&0x2f)==5) // BNE - { - if(s2h>=0) emit_cmp(s1h,s2h); - else emit_test(s1h,s1h); - taken=out; - emit_jne((void *)1l); - } - if((opcode[i]&0x2f)==6) // BLEZ - { - emit_test(s1h,s1h); - taken=out; - emit_js(1); - nottaken1=out; - emit_jne((void *)2l); - } - if((opcode[i]&0x2f)==7) // BGTZ - { - emit_test(s1h,s1h); - nottaken1=out; - emit_js(2); - taken=out; - emit_jne((void *)1l); - } - } // if(!only32) - //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if((opcode[i]&0x2f)==4) // BEQ @@ -4496,11 +4041,11 @@ void cjump_assemble(int i,struct regstat *i_regs) if(!nop) { if(taken) set_jump_target(taken, out); assem_debug("1:\n"); - wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,ds_unneeded); + wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,ds_unneeded); // load regs - load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]); + load_regs(regs[i].regmap,branch_regs[i].regmap,rs1[i+1],rs2[i+1]); address_generation(i+1,&branch_regs[i],0); - load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,INVCP); + load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP); ds_assemble(i+1,&branch_regs[i]); cc=get_reg(branch_regs[i].regmap,CCREG); if(cc==-1) { @@ -4508,11 +4053,11 @@ void cjump_assemble(int i,struct regstat *i_regs) // CHECK: Is the following instruction (fall thru) allocated ok? } assert(cc==HOST_CCREG); - store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); + store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); + load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); else @@ -4531,10 +4076,10 @@ void cjump_assemble(int i,struct regstat *i_regs) set_jump_target(nottaken, out); assem_debug("2:\n"); if(!likely[i]) { - wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,ds_unneeded); - load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]); + wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,ds_unneeded); + load_regs(regs[i].regmap,branch_regs[i].regmap,rs1[i+1],rs2[i+1]); address_generation(i+1,&branch_regs[i],0); - load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG); + load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG); ds_assemble(i+1,&branch_regs[i]); } cc=get_reg(branch_regs[i].regmap,CCREG); @@ -4564,13 +4109,12 @@ void sjump_assemble(int i,struct regstat *i_regs) signed char *i_regmap=i_regs->regmap; int cc; int match; - match=match_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); + match=match_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); assem_debug("smatch=%d\n",match); - int s1h,s1l; + int s1l; int unconditional=0,nevertaken=0; - int only32=0; int invert=0; - int internal=internal_branch(branch_regs[i].is32,ba[i]); + int internal=internal_branch(ba[i]); if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); if(!match) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -4582,11 +4126,9 @@ void sjump_assemble(int i,struct regstat *i_regs) if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); - s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); } else { s1l=get_reg(i_regmap,rs1[i]); - s1h=get_reg(i_regmap,rs1[i]|64); } if(rs1[i]==0) { @@ -4598,9 +4140,6 @@ void sjump_assemble(int i,struct regstat *i_regs) //assert(opcode2[i]!=0x10); //assert(opcode2[i]!=0x12); } - else { - only32=(regs[i].was32>>rs1[i])&1; - } if(ooo[i]) { // Out of order execution (delay slot first) @@ -4611,9 +4150,9 @@ void sjump_assemble(int i,struct regstat *i_regs) uint64_t bc_unneeded=branch_regs[i].u; bc_unneeded&=~((1LL<>2 || source[i+1]!=0) { if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); + load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); else @@ -4664,33 +4203,6 @@ void sjump_assemble(int i,struct regstat *i_regs) void *nottaken = NULL; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); - if(!only32) - { - assert(s1h>=0); - if((opcode2[i]&0xf)==0) // BLTZ/BLTZAL - { - emit_test(s1h,s1h); - if(invert){ - nottaken=out; - emit_jns(1); - }else{ - add_to_linker(out,ba[i],internal); - emit_js(0); - } - } - if((opcode2[i]&0xf)==1) // BGEZ/BLTZAL - { - emit_test(s1h,s1h); - if(invert){ - nottaken=out; - emit_js(1); - }else{ - add_to_linker(out,ba[i],internal); - emit_jns(0); - } - } - } // if(!only32) - else { assert(s1l>=0); if((opcode2[i]&0xf)==0) // BLTZ/BLTZAL @@ -4715,7 +4227,7 @@ void sjump_assemble(int i,struct regstat *i_regs) emit_jns(0); } } - } // if(!only32) + } if(invert) { #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -4732,8 +4244,8 @@ void sjump_assemble(int i,struct regstat *i_regs) #endif { if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); - store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); + store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); + load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); else @@ -4773,24 +4285,6 @@ void sjump_assemble(int i,struct regstat *i_regs) } if(!unconditional) { //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); - if(!only32) - { - assert(s1h>=0); - if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL - { - emit_test(s1h,s1h); - nottaken=out; - emit_jns(1); - } - if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL - { - emit_test(s1h,s1h); - nottaken=out; - emit_js(1); - } - } // if(!only32) - else - { assert(s1l>=0); if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL { @@ -4804,7 +4298,6 @@ void sjump_assemble(int i,struct regstat *i_regs) nottaken=out; emit_js(1); } - } } // if(!unconditional) int adj; uint64_t ds_unneeded=branch_regs[i].u; @@ -4813,11 +4306,11 @@ void sjump_assemble(int i,struct regstat *i_regs) // branch taken if(!nevertaken) { //assem_debug("1:\n"); - wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,ds_unneeded); + wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,ds_unneeded); // load regs - load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]); + load_regs(regs[i].regmap,branch_regs[i].regmap,rs1[i+1],rs2[i+1]); address_generation(i+1,&branch_regs[i],0); - load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,INVCP); + load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP); ds_assemble(i+1,&branch_regs[i]); cc=get_reg(branch_regs[i].regmap,CCREG); if(cc==-1) { @@ -4825,11 +4318,11 @@ void sjump_assemble(int i,struct regstat *i_regs) // CHECK: Is the following instruction (fall thru) allocated ok? } assert(cc==HOST_CCREG); - store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); + store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); + load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); else @@ -4847,10 +4340,10 @@ void sjump_assemble(int i,struct regstat *i_regs) set_jump_target(nottaken, out); assem_debug("1:\n"); if(!likely[i]) { - wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,ds_unneeded); - load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]); + wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,ds_unneeded); + load_regs(regs[i].regmap,branch_regs[i].regmap,rs1[i+1],rs2[i+1]); address_generation(i+1,&branch_regs[i],0); - load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG); + load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG); ds_assemble(i+1,&branch_regs[i]); } cc=get_reg(branch_regs[i].regmap,CCREG); @@ -4878,23 +4371,18 @@ void sjump_assemble(int i,struct regstat *i_regs) static void pagespan_assemble(int i,struct regstat *i_regs) { int s1l=get_reg(i_regs->regmap,rs1[i]); - int s1h=get_reg(i_regs->regmap,rs1[i]|64); int s2l=get_reg(i_regs->regmap,rs2[i]); - int s2h=get_reg(i_regs->regmap,rs2[i]|64); void *taken = NULL; void *nottaken = NULL; int unconditional=0; if(rs1[i]==0) { - s1l=s2l;s1h=s2h; - s2l=s2h=-1; + s1l=s2l; + s2l=-1; } else if(rs2[i]==0) { - s2l=s2h=-1; - } - if((i_regs->is32>>rs1[i])&(i_regs->is32>>rs2[i])&1) { - s1h=s2h=-1; + s2l=-1; } int hr=0; int addr=-1,alt=-1,ntaddr=-1; @@ -4936,7 +4424,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) } assert(hr=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); emit_cmov2imm_e_ne_compact(ba[i],start+i*4+8,addr); @@ -4977,11 +4465,6 @@ static void pagespan_assemble(int i,struct regstat *i_regs) { assert(s1l>=0); emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt); - if(s1h>=0) { - if(s2h>=0) emit_cmp(s1h,s2h); - else emit_test(s1h,s1h); - emit_cmovne_reg(alt,addr); - } if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); emit_cmovne_reg(alt,addr); @@ -4990,34 +4473,19 @@ static void pagespan_assemble(int i,struct regstat *i_regs) if((opcode[i]&0x3f)==5) // BNE { #ifdef HAVE_CMOV_IMM - if(s1h<0) { - if(s2l>=0) emit_cmp(s1l,s2l); - else emit_test(s1l,s1l); - emit_cmov2imm_e_ne_compact(start+i*4+8,ba[i],addr); - } - else + if(s2l>=0) emit_cmp(s1l,s2l); + else emit_test(s1l,s1l); + emit_cmov2imm_e_ne_compact(start+i*4+8,ba[i],addr); + #else + assert(s1l>=0); + emit_mov2imm_compact(start+i*4+8,addr,ba[i],alt); + if(s2l>=0) emit_cmp(s1l,s2l); + else emit_test(s1l,s1l); + emit_cmovne_reg(alt,addr); #endif - { - assert(s1l>=0); - emit_mov2imm_compact(start+i*4+8,addr,ba[i],alt); - if(s1h>=0) { - if(s2h>=0) emit_cmp(s1h,s2h); - else emit_test(s1h,s1h); - emit_cmovne_reg(alt,addr); - } - if(s2l>=0) emit_cmp(s1l,s2l); - else emit_test(s1l,s1l); - emit_cmovne_reg(alt,addr); - } } if((opcode[i]&0x3f)==0x14) // BEQL { - if(s1h>=0) { - if(s2h>=0) emit_cmp(s1h,s2h); - else emit_test(s1h,s1h); - nottaken=out; - emit_jne(0); - } if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); if(nottaken) set_jump_target(nottaken, out); @@ -5026,12 +4494,6 @@ static void pagespan_assemble(int i,struct regstat *i_regs) } if((opcode[i]&0x3f)==0x15) // BNEL { - if(s1h>=0) { - if(s2h>=0) emit_cmp(s1h,s2h); - else emit_test(s1h,s1h); - taken=out; - emit_jne(0); - } if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); nottaken=out; @@ -5042,25 +4504,13 @@ static void pagespan_assemble(int i,struct regstat *i_regs) { emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr); emit_cmpimm(s1l,1); - if(s1h>=0) emit_mov(addr,ntaddr); emit_cmovl_reg(alt,addr); - if(s1h>=0) { - emit_test(s1h,s1h); - emit_cmovne_reg(ntaddr,addr); - emit_cmovs_reg(alt,addr); - } } if((opcode[i]&0x3f)==7) // BGTZ { emit_mov2imm_compact(ba[i],addr,start+i*4+8,ntaddr); emit_cmpimm(s1l,1); - if(s1h>=0) emit_mov(addr,alt); emit_cmovl_reg(ntaddr,addr); - if(s1h>=0) { - emit_test(s1h,s1h); - emit_cmovne_reg(alt,addr); - emit_cmovs_reg(ntaddr,addr); - } } if((opcode[i]&0x3f)==0x16) // BLEZL { @@ -5101,7 +4551,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) } assert(i_regs->regmap[HOST_CCREG]==CCREG); - wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty); + wb_dirtys(regs[i].regmap,regs[i].dirty); if(likely[i]||unconditional) { emit_movimm(ba[i],HOST_BTREG); @@ -5124,7 +4574,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) if(likely[i]) { // Not-taken path set_jump_target(nottaken, out); - wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty); + wb_dirtys(regs[i].regmap,regs[i].dirty); void *branch_addr=out; emit_jmp(0); int target_addr=start+i*4+8; @@ -5151,13 +4601,13 @@ static void pagespan_ds() ll_add(jump_in+page,vaddr,(void *)out); assert(regs[0].regmap_entry[HOST_CCREG]==CCREG); if(regs[0].regmap[HOST_CCREG]!=CCREG) - wb_register(CCREG,regs[0].regmap_entry,regs[0].wasdirty,regs[0].was32); + wb_register(CCREG,regs[0].regmap_entry,regs[0].wasdirty); if(regs[0].regmap[HOST_BTREG]!=BTREG) emit_writeword(HOST_BTREG,&branch_target); - load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,rs1[0],rs2[0]); + load_regs(regs[0].regmap_entry,regs[0].regmap,rs1[0],rs2[0]); address_generation(0,®s[0],regs[0].regmap_entry); if(itype[0]==STORE||itype[0]==STORELR||(opcode[0]&0x3b)==0x39||(opcode[0]&0x3b)==0x3a) - load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,INVCP,INVCP); + load_regs(regs[0].regmap_entry,regs[0].regmap,INVCP,INVCP); is_delayslot=0; switch(itype[0]) { case ALU: @@ -5200,7 +4650,6 @@ static void pagespan_ds() case RJUMP: case CJUMP: case SJUMP: - case FJUMP: SysPrintf("Jump in the delay slot. This is probably a bug.\n"); } int btaddr=get_reg(regs[0].regmap,BTREG); @@ -5218,11 +4667,11 @@ static void pagespan_ds() #endif void *branch = out; emit_jeq(0); - store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,-1); + store_regs_bt(regs[0].regmap,regs[0].dirty,-1); emit_jmp(jump_vaddr_reg[btaddr]); set_jump_target(branch, out); - store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4); - load_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4); + store_regs_bt(regs[0].regmap,regs[0].dirty,start+4); + load_regs_bt(regs[0].regmap,regs[0].dirty,start+4); } // Basic liveness analysis for MIPS registers @@ -5246,7 +4695,7 @@ void unneeded_registers(int istart,int iend,int r) for (i=iend;i>=istart;i--) { //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r); - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) + if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP) { // If subroutine call, flag return address as a possible branch target if(rt1[i]==31 && i=istart;i--) { - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) + if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP) { if(ba[i]=(start+slen*4)) { @@ -5753,7 +5202,7 @@ void clean_registers(int istart,int iend,int wr) if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<istart) { - if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) + if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP) { // Don't store a register immediately after writing it, // may prevent dual-issue. @@ -5776,11 +5225,11 @@ void clean_registers(int istart,int iend,int wr) } printf("\n");*/ - //if(i==istart||(itype[i-1]!=RJUMP&&itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=FJUMP)) { + //if(i==istart||(itype[i-1]!=RJUMP&&itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP)) { regs[i].dirty|=will_dirty_i; #ifndef DESTRUCTIVE_WRITEBACK regs[i].dirty&=wont_dirty_i; - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) + if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP) { if(i>16)!=0x1000) { for(r=0;r>14):*ba);break; case SJUMP: printf (" %x: %s r%d,%8x\n",start+i*4,insn[i],rs1[i],start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14));break; - case FJUMP: - printf (" %x: %s %8x\n",start+i*4,insn[i],ba[i]);break; case RJUMP: if (opcode[i]==0x9&&rt1[i]!=31) printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],rt1[i],rs1[i]); @@ -6530,13 +5977,6 @@ int new_recompile_block(int addr) } likely[i]=(op2&2)>>1; break; - case FJUMP: - rs1[i]=FSREG; - rs2[i]=CSREG; - rt1[i]=0; - rt2[i]=0; - likely[i]=((source[i])>>17)&1; - break; case ALU: rs1[i]=(source[i]>>21)&0x1f; // source rs2[i]=(source[i]>>16)&0x1f; // subtract amount @@ -6682,13 +6122,13 @@ int new_recompile_block(int addr) ba[i]=start+i*4+8; // Ignore never taken branch else if(type==SJUMP&&rs1[i]==0&&!(op2&1)) ba[i]=start+i*4+8; // Ignore never taken branch - else if(type==CJUMP||type==SJUMP||type==FJUMP) + else if(type==CJUMP||type==SJUMP) ba[i]=start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14); else ba[i]=-1; - if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) { + if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP)) { int do_in_intrp=0; // branch in delay slot? - if(type==RJUMP||type==UJUMP||type==CJUMP||type==SJUMP||type==FJUMP) { + if(type==RJUMP||type==UJUMP||type==CJUMP||type==SJUMP) { // don't handle first branch and call interpreter if it's hit SysPrintf("branch in delay slot @%08x (%08x)\n", addr + i*4, addr); do_in_intrp=1; @@ -6755,7 +6195,7 @@ int new_recompile_block(int addr) } } slen=i; - if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==RJUMP||itype[i-1]==FJUMP) { + if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==RJUMP) { if(start+i*4==pagelimit) { itype[i-1]=SPAN; } @@ -6769,7 +6209,6 @@ int new_recompile_block(int addr) /* Pass 3 - Register allocation */ struct regstat current; // Current register allocations/status - current.is32=1; current.dirty=0; current.u=unneeded_reg[0]; clear_all_regs(current.regmap); @@ -6811,26 +6250,22 @@ int new_recompile_block(int addr) if(rs1[i-2]==0||rs2[i-2]==0) { if(rs1[i-2]) { - current.is32|=1LL<=0) current.regmap[hr]=-1; } if(rs2[i-2]) { - current.is32|=1LL<=0) current.regmap[hr]=-1; } } } } - current.is32=-1LL; memcpy(regmap_pre[i],current.regmap,sizeof(current.regmap)); regs[i].wasconst=current.isconst; - regs[i].was32=current.is32; regs[i].wasdirty=current.dirty; regs[i].loadedconst=0; - if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP&&itype[i]!=FJUMP) { + if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP) { if(i+1>rs1[i])&(current.is32>>rs2[i])&1)) - { - assert(0); - } if((rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]))|| (rs2[i]&&(rs2[i]==rt1[i+1]||rs2[i]==rt2[i+1]))) { // The delay slot overwrites one of our conditions. @@ -6991,10 +6418,6 @@ int new_recompile_block(int addr) regs[i].wasconst=0; if(rs1[i]) alloc_reg(¤t,i,rs1[i]); if(rs2[i]) alloc_reg(¤t,i,rs2[i]); - if(!((current.is32>>rs1[i])&(current.is32>>rs2[i])&1)) - { - assert(0); - } } else { @@ -7008,10 +6431,6 @@ int new_recompile_block(int addr) alloc_cc(¤t,i); dirty_reg(¤t,CCREG); alloc_reg(¤t,i,rs1[i]); - if(!(current.is32>>rs1[i]&1)) - { - assert(0); - } if(rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1])) { // The delay slot overwrites one of our conditions. // Allocate the branch condition registers instead. @@ -7019,10 +6438,6 @@ int new_recompile_block(int addr) current.wasconst=0; regs[i].wasconst=0; if(rs1[i]) alloc_reg(¤t,i,rs1[i]); - if(!((current.is32>>rs1[i])&1)) - { - assert(0); - } } else { @@ -7041,10 +6456,6 @@ int new_recompile_block(int addr) dirty_reg(¤t,CCREG); alloc_reg(¤t,i,rs1[i]); alloc_reg(¤t,i,rs2[i]); - if(!((current.is32>>rs1[i])&(current.is32>>rs2[i])&1)) - { - assert(0); - } } else if((opcode[i]&0x3E)==0x16) // BLEZL/BGTZL @@ -7055,10 +6466,6 @@ int new_recompile_block(int addr) alloc_cc(¤t,i); dirty_reg(¤t,CCREG); alloc_reg(¤t,i,rs1[i]); - if(!(current.is32>>rs1[i]&1)) - { - assert(0); - } } ds=1; //current.isconst=0; @@ -7075,17 +6482,12 @@ int new_recompile_block(int addr) alloc_cc(¤t,i); dirty_reg(¤t,CCREG); alloc_reg(¤t,i,rs1[i]); - if(!(current.is32>>rs1[i]&1)) - { - assert(0); - } if (rt1[i]==31) { // BLTZAL/BGEZAL alloc_reg(¤t,i,31); dirty_reg(¤t,31); //#ifdef REG_PREFETCH //alloc_reg(¤t,i,PTEMP); //#endif - //current.is32|=1LL<>rs1[i])&1)) - { - assert(0); - } } else { @@ -7115,17 +6513,10 @@ int new_recompile_block(int addr) alloc_cc(¤t,i); dirty_reg(¤t,CCREG); alloc_reg(¤t,i,rs1[i]); - if(!(current.is32>>rs1[i]&1)) - { - assert(0); - } } ds=1; //current.isconst=0; break; - case FJUMP: - assert(0); - break; case IMM16: imm16_alloc(¤t,i); break; @@ -7238,7 +6629,6 @@ int new_recompile_block(int addr) /* Branch post-alloc */ if(i>0) { - current.was32=current.is32; current.wasdirty=current.dirty; switch(itype[i-1]) { case UJUMP: @@ -7251,7 +6641,6 @@ int new_recompile_block(int addr) if(rt1[i-1]==31) { // JAL alloc_reg(&branch_regs[i-1],i-1,31); dirty_reg(&branch_regs[i-1],31); - branch_regs[i-1].is32|=1LL<<31; } memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); @@ -7267,7 +6656,6 @@ int new_recompile_block(int addr) if(rt1[i-1]!=0) { // JALR alloc_reg(&branch_regs[i-1],i-1,rt1[i-1]); dirty_reg(&branch_regs[i-1],rt1[i-1]); - branch_regs[i-1].is32|=1LL<>rs1[i-1])&(current.is32>>rs2[i-1])&1)) - { - assert(0); - } } memcpy(&branch_regs[i-1],¤t,sizeof(current)); branch_regs[i-1].isconst=0; @@ -7327,10 +6711,6 @@ int new_recompile_block(int addr) current.u=branch_unneeded_reg[i-1]&~(1LL<>rs1[i-1]&1)) - { - assert(0); - } } memcpy(&branch_regs[i-1],¤t,sizeof(current)); branch_regs[i-1].isconst=0; @@ -7385,10 +6765,6 @@ int new_recompile_block(int addr) current.u=branch_unneeded_reg[i-1]&~(1LL<>rs1[i-1]&1)) - { - assert(0); - } } memcpy(&branch_regs[i-1],¤t,sizeof(current)); branch_regs[i-1].isconst=0; @@ -7414,12 +6790,8 @@ int new_recompile_block(int addr) if(opcode2[i-1]&0x10) { // BxxZAL alloc_reg(&branch_regs[i-1],i-1,31); dirty_reg(&branch_regs[i-1],31); - branch_regs[i-1].is32|=1LL<<31; } break; - case FJUMP: - assert(0); - break; } if(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000) @@ -7427,7 +6799,6 @@ int new_recompile_block(int addr) if(rt1[i-1]==31) // JAL/JALR { // Subroutine call will return here, don't alloc any registers - current.is32=1; current.dirty=0; clear_all_regs(current.regmap); alloc_reg(¤t,i,CCREG); @@ -7436,7 +6807,6 @@ int new_recompile_block(int addr) else if(i+10&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP||itype[i]==SYSCALL||itype[i]==HLECALL)) + if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i]==SYSCALL||itype[i]==HLECALL)) { cc=0; } @@ -7496,9 +6864,7 @@ int new_recompile_block(int addr) cc++; } - flush_dirty_uppers(¤t); if(!is_ds[i]) { - regs[i].is32=current.is32; regs[i].dirty=current.dirty; regs[i].isconst=current.isconst; memcpy(constmap[i],current_constmap,sizeof(current_constmap)); @@ -7521,7 +6887,7 @@ int new_recompile_block(int addr) for (i=slen-1;i>=0;i--) { int hr; - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) + if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP) { if(ba[i]=(start+slen*4)) { @@ -7635,7 +7001,7 @@ int new_recompile_block(int addr) } } // Cycle count is needed at branches. Assume it is needed at the target too. - if(i==0||bt[i]||itype[i]==CJUMP||itype[i]==FJUMP||itype[i]==SPAN) { + if(i==0||bt[i]||itype[i]==CJUMP||itype[i]==SPAN) { if(regmap_pre[i][HOST_CCREG]==CCREG) nr|=1<=0||get_reg(branch_regs[i].regmap,rt1[i+1]|64)>=0) @@ -7740,9 +7106,9 @@ int new_recompile_block(int addr) (itype[i]!=SPAN||regs[i].regmap[hr]!=CCREG)) { if(i>(regs[i].regmap[hr]&63))&1)) { SysPrintf("fail: %x (%d %d!=%d)\n",start+i*4,hr,regmap_pre[i+1][hr],regs[i].regmap[hr]); assert(regmap_pre[i+1][hr]==regs[i].regmap[hr]); @@ -7770,7 +7136,7 @@ int new_recompile_block(int addr) clear_all_regs(f_regmap); for(i=0;i=start && ba[i]<(start+i*4)) if(itype[i+1]==NOP||itype[i+1]==MOV||itype[i+1]==ALU @@ -7780,7 +7146,7 @@ int new_recompile_block(int addr) ||itype[i+1]==COP2||itype[i+1]==C2LS||itype[i+1]==C2OP) { int t=(ba[i]-start)>>2; - if(t>0&&(itype[t-1]!=UJUMP&&itype[t-1]!=RJUMP&&itype[t-1]!=CJUMP&&itype[t-1]!=SJUMP&&itype[t-1]!=FJUMP)) // loop_preload can't handle jumps into delay slots + if(t>0&&(itype[t-1]!=UJUMP&&itype[t-1]!=RJUMP&&itype[t-1]!=CJUMP&&itype[t-1]!=SJUMP)) // loop_preload can't handle jumps into delay slots if(t<2||(itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||rt1[t-2]!=31) // call/ret assumes no registers allocated for(hr=0;hr2&&(itype[k-3]==UJUMP||itype[k-3]==RJUMP)&&rt1[k-3]==31) { break; } - if(r>63) { - // NB This can exclude the case where the upper-half - // register is lower numbered than the lower-half - // register. Not sure if it's worth fixing... - if(get_reg(regs[k-1].regmap,r&63)<0) break; - if(regs[k-1].is32&(1LL<<(r&63))) break; - } + assert(r < 64); k--; } - if(i\n",hr,start+k*4); while(k>16)!=0x1000) { regmap_pre[k+2][hr]=f_regmap[hr]; regs[k+2].wasdirty&=~(1<>16)==0x1000) { // Stop on unconditional branch break; } - if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) + if(itype[j]==CJUMP||itype[j]==SJUMP) { if(ooo[j]) { if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) @@ -7999,17 +7344,7 @@ int new_recompile_block(int addr) //printf("No free regs for store %x\n",start+j*4); break; } - if(f_regmap[hr]>=64) { - if(regs[j].is32&(1LL<<(f_regmap[hr]&63))) { - break; - } - else - { - if(get_reg(regs[j].regmap,f_regmap[hr]&63)<0) { - break; - } - } - } + assert(f_regmap[hr]<64); } } } @@ -8106,7 +7441,7 @@ int new_recompile_block(int addr) // to use, which can avoid a load-use penalty on certain CPUs. for(i=0;i=0;i--) { - if(itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) + if(itype[i]==CJUMP||itype[i]==SJUMP) { // Conditional branch if((source[i]>>16)!=0x1000&&i>16)!=0x1000)) { - wb_valid(regmap_pre[i],regs[i].regmap_entry,dirty_pre,regs[i].wasdirty,is32_pre, - unneeded_reg[i]); + wb_valid(regmap_pre[i],regs[i].regmap_entry,dirty_pre,regs[i].wasdirty,unneeded_reg[i]); } - if((itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)&&!likely[i]) { - is32_pre=branch_regs[i].is32; + if((itype[i]==CJUMP||itype[i]==SJUMP)&&!likely[i]) { dirty_pre=branch_regs[i].dirty; }else{ - is32_pre=regs[i].is32; dirty_pre=regs[i].dirty; } #endif // write back if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000)) { - wb_invalidate(regmap_pre[i],regs[i].regmap_entry,regs[i].wasdirty,regs[i].was32,unneeded_reg[i]); + wb_invalidate(regmap_pre[i],regs[i].regmap_entry,regs[i].wasdirty,unneeded_reg[i]); loop_preload(regmap_pre[i],regs[i].regmap_entry); } // branch target entry point @@ -8478,35 +7809,35 @@ int new_recompile_block(int addr) // load regs if(regs[i].regmap_entry[HOST_CCREG]==CCREG&®s[i].regmap[HOST_CCREG]!=CCREG) - wb_register(CCREG,regs[i].regmap_entry,regs[i].wasdirty,regs[i].was32); - load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs1[i],rs2[i]); + wb_register(CCREG,regs[i].regmap_entry,regs[i].wasdirty); + load_regs(regs[i].regmap_entry,regs[i].regmap,rs1[i],rs2[i]); address_generation(i,®s[i],regs[i].regmap_entry); - load_consts(regmap_pre[i],regs[i].regmap,regs[i].was32,i); - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) + load_consts(regmap_pre[i],regs[i].regmap,i); + if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP) { // Load the delay slot registers if necessary if(rs1[i+1]!=rs1[i]&&rs1[i+1]!=rs2[i]&&(rs1[i+1]!=rt1[i]||rt1[i]==0)) - load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs1[i+1],rs1[i+1]); + load_regs(regs[i].regmap_entry,regs[i].regmap,rs1[i+1],rs1[i+1]); if(rs2[i+1]!=rs1[i+1]&&rs2[i+1]!=rs1[i]&&rs2[i+1]!=rs2[i]&&(rs2[i+1]!=rt1[i]||rt1[i]==0)) - load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs2[i+1],rs2[i+1]); + load_regs(regs[i].regmap_entry,regs[i].regmap,rs2[i+1],rs2[i+1]); if(itype[i+1]==STORE||itype[i+1]==STORELR||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a) - load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,INVCP,INVCP); + load_regs(regs[i].regmap_entry,regs[i].regmap,INVCP,INVCP); } else if(i+11) { if(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000&&itype[i-1]!=SPAN) { - assert(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP&&itype[i-1]!=FJUMP); + assert(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP); assert(i==slen); - if(itype[i-2]!=CJUMP&&itype[i-2]!=SJUMP&&itype[i-2]!=FJUMP) { - store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4); + if(itype[i-2]!=CJUMP&&itype[i-2]!=SJUMP) { + store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG); } else if(!likely[i-2]) { - store_regs_bt(branch_regs[i-2].regmap,branch_regs[i-2].is32,branch_regs[i-2].dirty,start+i*4); + store_regs_bt(branch_regs[i-2].regmap,branch_regs[i-2].dirty,start+i*4); assert(branch_regs[i-2].regmap[HOST_CCREG]==CCREG); } else { - store_regs_bt(regs[i-2].regmap,regs[i-2].is32,regs[i-2].dirty,start+i*4); + store_regs_bt(regs[i-2].regmap,regs[i-2].dirty,start+i*4); assert(regs[i-2].regmap[HOST_CCREG]==CCREG); } add_to_linker(out,start+i*4,0); @@ -8596,8 +7925,8 @@ int new_recompile_block(int addr) else { assert(i>0); - assert(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP&&itype[i-1]!=FJUMP); - store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4); + assert(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP); + store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG); From 8062d65a99a541d3092672655e11cb2babfe3857 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 2 Nov 2021 17:47:56 +0200 Subject: [PATCH 055/597] drc: move some stuff out of assem_arm for reuse --- libpcsxcore/new_dynarec/assem_arm.c | 794 ------------------------- libpcsxcore/new_dynarec/new_dynarec.c | 797 +++++++++++++++++++++++++- 2 files changed, 794 insertions(+), 797 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 0e0acdc76..4953faa03 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -329,276 +329,6 @@ static void get_bounds(void *addr, u_char **start, u_char **end) *end=(u_char *)source+len; } -/* Register allocation */ - -// Note: registers are allocated clean (unmodified state) -// if you intend to modify the register, you must call dirty_reg(). -static void alloc_reg(struct regstat *cur,int i,signed char reg) -{ - int r,hr; - int preferred_reg = (reg&7); - if(reg==CCREG) preferred_reg=HOST_CCREG; - if(reg==PTEMP||reg==FTEMP) preferred_reg=12; - - // Don't allocate unused registers - if((cur->u>>reg)&1) return; - - // see if it's already allocated - for(hr=0;hrregmap[hr]==reg) return; - } - - // Keep the same mapping if the register was already allocated in a loop - preferred_reg = loop_reg(i,reg,preferred_reg); - - // Try to allocate the preferred register - if(cur->regmap[preferred_reg]==-1) { - cur->regmap[preferred_reg]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]; - assert(r < 64); - if((cur->u>>r)&1) { - cur->regmap[preferred_reg]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]; - if(r>=0) { - assert(r < 64); - if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;} - } - } - // Try to allocate any available register, but prefer - // registers that have not been used recently. - if(i>0) { - for(hr=0;hrregmap[hr]==-1) { - if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==-1) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]); - //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); - if(i>0) { - // Don't evict the cycle count at entry points, otherwise the entry - // stub will have to write it. - if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2; - for(j=10;j>=3;j--) - { - // Alloc preferred register if available - if(hsn[r=cur->regmap[preferred_reg]&63]==j) { - for(hr=0;hrregmap[hr]&63)==r) { - cur->regmap[hr]=-1; - cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]=reg; - return; - } - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<=0;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==reg) return; - } - - // Try to allocate any available register - for(hr=HOST_REGS-1;hr>=0;hr--) { - if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<=0;hr--) - { - r=cur->regmap[hr]; - if(r>=0) { - assert(r < 64); - if((cur->u>>r)&1) { - if(i==0||((unneeded_reg[i-1]>>r)&1)) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); - if(i>0) { - // Don't evict the cycle count at entry points, otherwise the entry - // stub will have to write it. - if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2; - for(j=10;j>=3;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { - for(hr=0;hr2) { - if(cur->regmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<2) { - if(cur->regmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<=0;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap_entry,i_regs->wasdirty); - if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_movimm(start+(i-ds)*4,EAX); // Get PC - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... - emit_jmp(ds?fp_exception_ds:fp_exception); -} - /* Special assem */ static void shift_assemble_arm(int i,struct regstat *i_regs) @@ -2600,188 +2264,6 @@ static void shift_assemble_arm(int i,struct regstat *i_regs) } } } - -static void speculate_mov(int rs,int rt) -{ - if(rt!=0) { - smrv_strong_next|=1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); - else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]); - else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); - else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]); - else { - smrv_strong_next&=~(1<=0) { - if(get_final_value(hr,i,&value)) - smrv[rt1[i]]=value; - else smrv[rt1[i]]=constmap[i][hr]; - smrv_strong_next|=1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); - else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); - } - break; - case LOAD: - if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) { - // special case for BIOS - smrv[rt1[i]]=0xa0000000; - smrv_strong_next|=1<>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst); -#endif -} - -enum { - MTYPE_8000 = 0, - MTYPE_8020, - MTYPE_0000, - MTYPE_A000, - MTYPE_1F80, -}; - -static int get_ptr_mem_type(u_int a) -{ - if(a < 0x00200000) { - if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0)) - // return wrong, must use memhandler for BIOS self-test to pass - // 007 does similar stuff from a00 mirror, weird stuff - return MTYPE_8000; - return MTYPE_0000; - } - if(0x1f800000 <= a && a < 0x1f801000) - return MTYPE_1F80; - if(0x80200000 <= a && a < 0x80800000) - return MTYPE_8020; - if(0xa0000000 <= a && a < 0xa0200000) - return MTYPE_A000; - return MTYPE_8000; -} - -static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) -{ - void *jaddr = NULL; - int type=0; - int mr=rs1[i]; - if(((smrv_strong|smrv_weak)>>mr)&1) { - type=get_ptr_mem_type(smrv[mr]); - //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type); - } - else { - // use the mirror we are running on - type=get_ptr_mem_type(start); - //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type); - } - - if(type==MTYPE_8020) { // RAM 80200000+ mirror - emit_andimm(addr,~0x00e00000,HOST_TEMPREG); - addr=*addr_reg_override=HOST_TEMPREG; - type=0; - } - else if(type==MTYPE_0000) { // RAM 0 mirror - emit_orimm(addr,0x80000000,HOST_TEMPREG); - addr=*addr_reg_override=HOST_TEMPREG; - type=0; - } - else if(type==MTYPE_A000) { // RAM A mirror - emit_andimm(addr,~0x20000000,HOST_TEMPREG); - addr=*addr_reg_override=HOST_TEMPREG; - type=0; - } - else if(type==MTYPE_1F80) { // scratchpad - if (psxH == (void *)0x1f800000) { - emit_addimm(addr,-0x1f800000,HOST_TEMPREG); - emit_cmpimm(HOST_TEMPREG,0x1000); - jaddr=out; - emit_jc(0); - } - else { - // do usual RAM check, jump will go to the right handler - type=0; - } - } - - if(type==0) - { - emit_cmpimm(addr,RAM_SIZE); - jaddr=out; - #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - // Hint to branch predictor that the branch is unlikely to be taken - if(rs1[i]>=28) - emit_jno_unlikely(0); - else - #endif - emit_jno(0); - if(ram_offset!=0) { - emit_addimm(addr,ram_offset,HOST_TEMPREG); - addr=*addr_reg_override=HOST_TEMPREG; - } - } - - return jaddr; -} - #define shift_assemble shift_assemble_arm static void loadlr_assemble_arm(int i,struct regstat *i_regs) @@ -2867,243 +2349,6 @@ static void loadlr_assemble_arm(int i,struct regstat *i_regs) } #define loadlr_assemble loadlr_assemble_arm -static void cop0_assemble(int i,struct regstat *i_regs) -{ - if(opcode2[i]==0) // MFC0 - { - signed char t=get_reg(i_regs->regmap,rt1[i]); - u_int copr=(source[i]>>11)&0x1f; - //assert(t>=0); // Why does this happen? OOT is weird - if(t>=0&&rt1[i]!=0) { - emit_readword(®_cop0[copr],t); - } - } - else if(opcode2[i]==4) // MTC0 - { - signed char s=get_reg(i_regs->regmap,rs1[i]); - char copr=(source[i]>>11)&0x1f; - assert(s>=0); - wb_register(rs1[i],i_regs->regmap,i_regs->dirty); - if(copr==9||copr==11||copr==12||copr==13) { - emit_readword(&last_count,HOST_TEMPREG); - emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc - emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_writeword(HOST_CCREG,&Count); - } - // What a mess. The status register (12) can enable interrupts, - // so needs a special case to handle a pending interrupt. - // The interrupt must be taken immediately, because a subsequent - // instruction might disable interrupts again. - if(copr==12||copr==13) { - if (is_delayslot) { - // burn cycles to cause cc_interrupt, which will - // reschedule next_interupt. Relies on CCREG from above. - assem_debug("MTC0 DS %d\n", copr); - emit_writeword(HOST_CCREG,&last_count); - emit_movimm(0,HOST_CCREG); - emit_storereg(CCREG,HOST_CCREG); - emit_loadreg(rs1[i],1); - emit_movimm(copr,0); - emit_call(pcsx_mtc0_ds); - emit_loadreg(rs1[i],s); - return; - } - emit_movimm(start+i*4+4,HOST_TEMPREG); - emit_writeword(HOST_TEMPREG,&pcaddr); - emit_movimm(0,HOST_TEMPREG); - emit_writeword(HOST_TEMPREG,&pending_exception); - } - //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12); - //else - if(s==HOST_CCREG) - emit_loadreg(rs1[i],1); - else if(s!=1) - emit_mov(s,1); - emit_movimm(copr,0); - emit_call(pcsx_mtc0); - if(copr==9||copr==11||copr==12||copr==13) { - emit_readword(&Count,HOST_CCREG); - emit_readword(&next_interupt,HOST_TEMPREG); - emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); - emit_writeword(HOST_TEMPREG,&last_count); - emit_storereg(CCREG,HOST_CCREG); - } - if(copr==12||copr==13) { - assert(!is_delayslot); - emit_readword(&pending_exception,14); - emit_test(14,14); - emit_jne(&do_interrupt); - } - emit_loadreg(rs1[i],s); - if(get_reg(i_regs->regmap,rs1[i]|64)>=0) - emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64)); - } - else - { - assert(opcode2[i]==0x10); - //if((source[i]&0x3f)==0x10) // RFE - { - emit_readword(&Status,0); - emit_andimm(0,0x3c,1); - emit_andimm(0,~0xf,0); - emit_orrshr_imm(1,2,0); - emit_writeword(0,&Status); - } - } -} - -static void cop2_get_dreg(u_int copr,signed char tl,signed char temp) -{ - switch (copr) { - case 1: - case 3: - case 5: - case 8: - case 9: - case 10: - case 11: - emit_readword(®_cop2d[copr],tl); - emit_signextend16(tl,tl); - emit_writeword(tl,®_cop2d[copr]); // hmh - break; - case 7: - case 16: - case 17: - case 18: - case 19: - emit_readword(®_cop2d[copr],tl); - emit_andimm(tl,0xffff,tl); - emit_writeword(tl,®_cop2d[copr]); - break; - case 15: - emit_readword(®_cop2d[14],tl); // SXY2 - emit_writeword(tl,®_cop2d[copr]); - break; - case 28: - case 29: - emit_readword(®_cop2d[9],temp); - emit_testimm(temp,0x8000); // do we need this? - emit_andimm(temp,0xf80,temp); - emit_andne_imm(temp,0,temp); - emit_shrimm(temp,7,tl); - emit_readword(®_cop2d[10],temp); - emit_testimm(temp,0x8000); - emit_andimm(temp,0xf80,temp); - emit_andne_imm(temp,0,temp); - emit_orrshr_imm(temp,2,tl); - emit_readword(®_cop2d[11],temp); - emit_testimm(temp,0x8000); - emit_andimm(temp,0xf80,temp); - emit_andne_imm(temp,0,temp); - emit_orrshl_imm(temp,3,tl); - emit_writeword(tl,®_cop2d[copr]); - break; - default: - emit_readword(®_cop2d[copr],tl); - break; - } -} - -static void cop2_put_dreg(u_int copr,signed char sl,signed char temp) -{ - switch (copr) { - case 15: - emit_readword(®_cop2d[13],temp); // SXY1 - emit_writeword(sl,®_cop2d[copr]); - emit_writeword(temp,®_cop2d[12]); // SXY0 - emit_readword(®_cop2d[14],temp); // SXY2 - emit_writeword(sl,®_cop2d[14]); - emit_writeword(temp,®_cop2d[13]); // SXY1 - break; - case 28: - emit_andimm(sl,0x001f,temp); - emit_shlimm(temp,7,temp); - emit_writeword(temp,®_cop2d[9]); - emit_andimm(sl,0x03e0,temp); - emit_shlimm(temp,2,temp); - emit_writeword(temp,®_cop2d[10]); - emit_andimm(sl,0x7c00,temp); - emit_shrimm(temp,3,temp); - emit_writeword(temp,®_cop2d[11]); - emit_writeword(sl,®_cop2d[28]); - break; - case 30: - emit_movs(sl,temp); - emit_mvnmi(temp,temp); -#ifdef HAVE_ARMV5 - emit_clz(temp,temp); -#else - emit_movs(temp,HOST_TEMPREG); - emit_movimm(0,temp); - emit_jeq((int)out+4*4); - emit_addpl_imm(temp,1,temp); - emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); - emit_jns((int)out-2*4); -#endif - emit_writeword(sl,®_cop2d[30]); - emit_writeword(temp,®_cop2d[31]); - break; - case 31: - break; - default: - emit_writeword(sl,®_cop2d[copr]); - break; - } -} - -static void cop2_assemble(int i,struct regstat *i_regs) -{ - u_int copr=(source[i]>>11)&0x1f; - signed char temp=get_reg(i_regs->regmap,-1); - if (opcode2[i]==0) { // MFC2 - signed char tl=get_reg(i_regs->regmap,rt1[i]); - if(tl>=0&&rt1[i]!=0) - cop2_get_dreg(copr,tl,temp); - } - else if (opcode2[i]==4) { // MTC2 - signed char sl=get_reg(i_regs->regmap,rs1[i]); - cop2_put_dreg(copr,sl,temp); - } - else if (opcode2[i]==2) // CFC2 - { - signed char tl=get_reg(i_regs->regmap,rt1[i]); - if(tl>=0&&rt1[i]!=0) - emit_readword(®_cop2c[copr],tl); - } - else if (opcode2[i]==6) // CTC2 - { - signed char sl=get_reg(i_regs->regmap,rs1[i]); - switch(copr) { - case 4: - case 12: - case 20: - case 26: - case 27: - case 29: - case 30: - emit_signextend16(sl,temp); - break; - case 31: - //value = value & 0x7ffff000; - //if (value & 0x7f87e000) value |= 0x80000000; - emit_shrimm(sl,12,temp); - emit_shlimm(temp,12,temp); - emit_testimm(temp,0x7f000000); - emit_testeqimm(temp,0x00870000); - emit_testeqimm(temp,0x0000e000); - emit_orrne_imm(temp,0x80000000,temp); - break; - default: - temp=sl; - break; - } - emit_writeword(temp,®_cop2c[copr]); - assert(sl>=0); - } -} - static void c2op_prologue(u_int op,u_int reglist) { save_regs_all(reglist); @@ -3261,22 +2506,6 @@ static void c2op_assemble(int i,struct regstat *i_regs) } } -static void cop1_unusable(int i,struct regstat *i_regs) -{ - // XXX: should just just do the exception instead - //if(!cop1_usable) - { - void *jaddr=out; - emit_jmp(0); - add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0); - } -} - -static void cop1_assemble(int i,struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); -} - static void multdiv_assemble_arm(int i,struct regstat *i_regs) { // case 0x18: MULT @@ -3448,29 +2677,6 @@ static void do_miniht_insert(u_int return_address,int rt,int temp) { #endif } -static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t u) -{ - //if(dirty_pre==dirty) return; - int hr,reg; - for(hr=0;hr>(reg&63))&1) { - if(reg>0) { - if(((dirty_pre&~dirty)>>hr)&1) { - if(reg>0&®<34) { - emit_storereg(reg,hr); - } - else if(reg>=64) { - assert(0); - } - } - } - } - } - } -} - static void mark_clear_cache(void *target) { u_long offset = (u_char *)target - translation_cache; diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 78a342d26..987892f0b 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -295,6 +295,7 @@ static void add_stub(enum stub_type type, void *addr, void *retaddr, static void add_stub_r(enum stub_type type, void *addr, void *retaddr, int i, int addr_reg, struct regstat *i_regs, int ccadj, u_int reglist); static void add_to_linker(void *addr, u_int target, int ext); +static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override); static void mprotect_w_x(void *start, void *end, int is_x) { @@ -783,6 +784,47 @@ void alloc_all(struct regstat *cur,int i) } } +#ifdef DRC_DBG +extern void gen_interupt(); +extern void do_insn_cmp(); +#define FUNCNAME(f) { (intptr_t)f, " " #f } +static const struct { + intptr_t addr; + const char *name; +} function_names[] = { + FUNCNAME(cc_interrupt), + FUNCNAME(gen_interupt), + FUNCNAME(get_addr_ht), + FUNCNAME(get_addr), + FUNCNAME(jump_handler_read8), + FUNCNAME(jump_handler_read16), + FUNCNAME(jump_handler_read32), + FUNCNAME(jump_handler_write8), + FUNCNAME(jump_handler_write16), + FUNCNAME(jump_handler_write32), + FUNCNAME(invalidate_addr), + FUNCNAME(verify_code_vm), + FUNCNAME(verify_code), + FUNCNAME(jump_hlecall), + FUNCNAME(jump_syscall_hle), + FUNCNAME(new_dyna_leave), + FUNCNAME(pcsx_mtc0), + FUNCNAME(pcsx_mtc0_ds), + FUNCNAME(do_insn_cmp), +}; + +static const char *func_name(intptr_t a) +{ + int i; + for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++) + if (function_names[i].addr == a) + return function_names[i].name; + return ""; +} +#else +#define func_name(x) "" +#endif + #ifdef __i386__ #include "assem_x86.c" #endif @@ -1149,6 +1191,276 @@ void clean_blocks(u_int page) } } +/* Register allocation */ + +// Note: registers are allocated clean (unmodified state) +// if you intend to modify the register, you must call dirty_reg(). +static void alloc_reg(struct regstat *cur,int i,signed char reg) +{ + int r,hr; + int preferred_reg = (reg&7); + if(reg==CCREG) preferred_reg=HOST_CCREG; + if(reg==PTEMP||reg==FTEMP) preferred_reg=12; + + // Don't allocate unused registers + if((cur->u>>reg)&1) return; + + // see if it's already allocated + for(hr=0;hrregmap[hr]==reg) return; + } + + // Keep the same mapping if the register was already allocated in a loop + preferred_reg = loop_reg(i,reg,preferred_reg); + + // Try to allocate the preferred register + if(cur->regmap[preferred_reg]==-1) { + cur->regmap[preferred_reg]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]; + assert(r < 64); + if((cur->u>>r)&1) { + cur->regmap[preferred_reg]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]; + if(r>=0) { + assert(r < 64); + if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;} + } + } + // Try to allocate any available register, but prefer + // registers that have not been used recently. + if(i>0) { + for(hr=0;hrregmap[hr]==-1) { + if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==-1) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]); + //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); + if(i>0) { + // Don't evict the cycle count at entry points, otherwise the entry + // stub will have to write it. + if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; + if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2; + for(j=10;j>=3;j--) + { + // Alloc preferred register if available + if(hsn[r=cur->regmap[preferred_reg]&63]==j) { + for(hr=0;hrregmap[hr]&63)==r) { + cur->regmap[hr]=-1; + cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]=reg; + return; + } + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { + for(hr=0;hrregmap[hr]==r+64) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<=0;j--) + { + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j) { + for(hr=0;hrregmap[hr]==r+64) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==reg) return; + } + + // Try to allocate any available register + for(hr=HOST_REGS-1;hr>=0;hr--) { + if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<=0;hr--) + { + r=cur->regmap[hr]; + if(r>=0) { + assert(r < 64); + if((cur->u>>r)&1) { + if(i==0||((unneeded_reg[i-1]>>r)&1)) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); + if(i>0) { + // Don't evict the cycle count at entry points, otherwise the entry + // stub will have to write it. + if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; + if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2; + for(j=10;j>=3;j--) + { + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { + for(hr=0;hr2) { + if(cur->regmap[hr]==r+64) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<2) { + if(cur->regmap[hr]==r) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<=0;j--) + { + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j) { + for(hr=0;hrregmap[hr]==r+64) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<>(reg&63))&1) { + if(reg>0) { + if(((dirty_pre&~dirty)>>hr)&1) { + if(reg>0&®<34) { + emit_storereg(reg,hr); + } + else if(reg>=64) { + assert(0); + } + } + } + } + } + } +} + void rlist() { int i; @@ -2039,7 +2374,96 @@ void shift_assemble(int i,struct regstat *i_regs) } #endif -void load_assemble(int i,struct regstat *i_regs) +enum { + MTYPE_8000 = 0, + MTYPE_8020, + MTYPE_0000, + MTYPE_A000, + MTYPE_1F80, +}; + +static int get_ptr_mem_type(u_int a) +{ + if(a < 0x00200000) { + if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0)) + // return wrong, must use memhandler for BIOS self-test to pass + // 007 does similar stuff from a00 mirror, weird stuff + return MTYPE_8000; + return MTYPE_0000; + } + if(0x1f800000 <= a && a < 0x1f801000) + return MTYPE_1F80; + if(0x80200000 <= a && a < 0x80800000) + return MTYPE_8020; + if(0xa0000000 <= a && a < 0xa0200000) + return MTYPE_A000; + return MTYPE_8000; +} + +static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) +{ + void *jaddr = NULL; + int type=0; + int mr=rs1[i]; + if(((smrv_strong|smrv_weak)>>mr)&1) { + type=get_ptr_mem_type(smrv[mr]); + //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type); + } + else { + // use the mirror we are running on + type=get_ptr_mem_type(start); + //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type); + } + + if(type==MTYPE_8020) { // RAM 80200000+ mirror + emit_andimm(addr,~0x00e00000,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + type=0; + } + else if(type==MTYPE_0000) { // RAM 0 mirror + emit_orimm(addr,0x80000000,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + type=0; + } + else if(type==MTYPE_A000) { // RAM A mirror + emit_andimm(addr,~0x20000000,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + type=0; + } + else if(type==MTYPE_1F80) { // scratchpad + if (psxH == (void *)0x1f800000) { + emit_addimm(addr,-0x1f800000,HOST_TEMPREG); + emit_cmpimm(HOST_TEMPREG,0x1000); + jaddr=out; + emit_jc(0); + } + else { + // do the usual RAM check, jump will go to the right handler + type=0; + } + } + + if(type==0) + { + emit_cmpimm(addr,RAM_SIZE); + jaddr=out; + #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK + // Hint to branch predictor that the branch is unlikely to be taken + if(rs1[i]>=28) + emit_jno_unlikely(0); + else + #endif + emit_jno(0); + if(ram_offset!=0) { + emit_addimm(addr,ram_offset,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + } + } + + return jaddr; +} + +static void load_assemble(int i,struct regstat *i_regs) { int s,th,tl,addr; int offset; @@ -2480,12 +2904,236 @@ void storelr_assemble(int i,struct regstat *i_regs) } } -void c1ls_assemble(int i,struct regstat *i_regs) +static void cop0_assemble(int i,struct regstat *i_regs) +{ + if(opcode2[i]==0) // MFC0 + { + signed char t=get_reg(i_regs->regmap,rt1[i]); + u_int copr=(source[i]>>11)&0x1f; + //assert(t>=0); // Why does this happen? OOT is weird + if(t>=0&&rt1[i]!=0) { + emit_readword(®_cop0[copr],t); + } + } + else if(opcode2[i]==4) // MTC0 + { + signed char s=get_reg(i_regs->regmap,rs1[i]); + char copr=(source[i]>>11)&0x1f; + assert(s>=0); + wb_register(rs1[i],i_regs->regmap,i_regs->dirty); + if(copr==9||copr==11||copr==12||copr==13) { + emit_readword(&last_count,HOST_TEMPREG); + emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc + emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); + emit_writeword(HOST_CCREG,&Count); + } + // What a mess. The status register (12) can enable interrupts, + // so needs a special case to handle a pending interrupt. + // The interrupt must be taken immediately, because a subsequent + // instruction might disable interrupts again. + if(copr==12||copr==13) { + if (is_delayslot) { + // burn cycles to cause cc_interrupt, which will + // reschedule next_interupt. Relies on CCREG from above. + assem_debug("MTC0 DS %d\n", copr); + emit_writeword(HOST_CCREG,&last_count); + emit_movimm(0,HOST_CCREG); + emit_storereg(CCREG,HOST_CCREG); + emit_loadreg(rs1[i],1); + emit_movimm(copr,0); + emit_call(pcsx_mtc0_ds); + emit_loadreg(rs1[i],s); + return; + } + emit_movimm(start+i*4+4,HOST_TEMPREG); + emit_writeword(HOST_TEMPREG,&pcaddr); + emit_movimm(0,HOST_TEMPREG); + emit_writeword(HOST_TEMPREG,&pending_exception); + } + //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12); + //else + if(s==HOST_CCREG) + emit_loadreg(rs1[i],1); + else if(s!=1) + emit_mov(s,1); + emit_movimm(copr,0); + emit_call(pcsx_mtc0); + if(copr==9||copr==11||copr==12||copr==13) { + emit_readword(&Count,HOST_CCREG); + emit_readword(&next_interupt,HOST_TEMPREG); + emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG); + emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); + emit_writeword(HOST_TEMPREG,&last_count); + emit_storereg(CCREG,HOST_CCREG); + } + if(copr==12||copr==13) { + assert(!is_delayslot); + emit_readword(&pending_exception,14); + emit_test(14,14); + emit_jne(&do_interrupt); + } + emit_loadreg(rs1[i],s); + if(get_reg(i_regs->regmap,rs1[i]|64)>=0) + emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64)); + } + else + { + assert(opcode2[i]==0x10); + //if((source[i]&0x3f)==0x10) // RFE + { + emit_readword(&Status,0); + emit_andimm(0,0x3c,1); + emit_andimm(0,~0xf,0); + emit_orrshr_imm(1,2,0); + emit_writeword(0,&Status); + } + } +} + +static void cop1_unusable(int i,struct regstat *i_regs) +{ + // XXX: should just just do the exception instead + //if(!cop1_usable) + { + void *jaddr=out; + emit_jmp(0); + add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0); + } +} + +static void cop1_assemble(int i,struct regstat *i_regs) +{ + cop1_unusable(i, i_regs); +} + +static void c1ls_assemble(int i,struct regstat *i_regs) { cop1_unusable(i, i_regs); } -void c2ls_assemble(int i,struct regstat *i_regs) +// FP_STUB +static void do_cop1stub(int n) +{ + literal_pool(256); + assem_debug("do_cop1stub %x\n",start+stubs[n].a*4); + set_jump_target(stubs[n].addr, out); + int i=stubs[n].a; +// int rs=stubs[n].b; + struct regstat *i_regs=(struct regstat *)stubs[n].c; + int ds=stubs[n].d; + if(!ds) { + load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i); + //if(i_regs!=®s[i]) printf("oops: regs[i]=%x i_regs=%x",(int)®s[i],(int)i_regs); + } + //else {printf("fp exception in delay slot\n");} + wb_dirtys(i_regs->regmap_entry,i_regs->wasdirty); + if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); + emit_movimm(start+(i-ds)*4,EAX); // Get PC + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... + emit_jmp(ds?fp_exception_ds:fp_exception); +} + +static void cop2_get_dreg(u_int copr,signed char tl,signed char temp) +{ + switch (copr) { + case 1: + case 3: + case 5: + case 8: + case 9: + case 10: + case 11: + emit_readword(®_cop2d[copr],tl); + emit_signextend16(tl,tl); + emit_writeword(tl,®_cop2d[copr]); // hmh + break; + case 7: + case 16: + case 17: + case 18: + case 19: + emit_readword(®_cop2d[copr],tl); + emit_andimm(tl,0xffff,tl); + emit_writeword(tl,®_cop2d[copr]); + break; + case 15: + emit_readword(®_cop2d[14],tl); // SXY2 + emit_writeword(tl,®_cop2d[copr]); + break; + case 28: + case 29: + emit_readword(®_cop2d[9],temp); + emit_testimm(temp,0x8000); // do we need this? + emit_andimm(temp,0xf80,temp); + emit_andne_imm(temp,0,temp); + emit_shrimm(temp,7,tl); + emit_readword(®_cop2d[10],temp); + emit_testimm(temp,0x8000); + emit_andimm(temp,0xf80,temp); + emit_andne_imm(temp,0,temp); + emit_orrshr_imm(temp,2,tl); + emit_readword(®_cop2d[11],temp); + emit_testimm(temp,0x8000); + emit_andimm(temp,0xf80,temp); + emit_andne_imm(temp,0,temp); + emit_orrshl_imm(temp,3,tl); + emit_writeword(tl,®_cop2d[copr]); + break; + default: + emit_readword(®_cop2d[copr],tl); + break; + } +} + +static void cop2_put_dreg(u_int copr,signed char sl,signed char temp) +{ + switch (copr) { + case 15: + emit_readword(®_cop2d[13],temp); // SXY1 + emit_writeword(sl,®_cop2d[copr]); + emit_writeword(temp,®_cop2d[12]); // SXY0 + emit_readword(®_cop2d[14],temp); // SXY2 + emit_writeword(sl,®_cop2d[14]); + emit_writeword(temp,®_cop2d[13]); // SXY1 + break; + case 28: + emit_andimm(sl,0x001f,temp); + emit_shlimm(temp,7,temp); + emit_writeword(temp,®_cop2d[9]); + emit_andimm(sl,0x03e0,temp); + emit_shlimm(temp,2,temp); + emit_writeword(temp,®_cop2d[10]); + emit_andimm(sl,0x7c00,temp); + emit_shrimm(temp,3,temp); + emit_writeword(temp,®_cop2d[11]); + emit_writeword(sl,®_cop2d[28]); + break; + case 30: + emit_movs(sl,temp); + emit_mvnmi(temp,temp); +#ifdef HAVE_ARMV5 + emit_clz(temp,temp); +#else + emit_movs(temp,HOST_TEMPREG); + emit_movimm(0,temp); + emit_jeq((int)out+4*4); + emit_addpl_imm(temp,1,temp); + emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); + emit_jns((int)out-2*4); +#endif + emit_writeword(sl,®_cop2d[30]); + emit_writeword(temp,®_cop2d[31]); + break; + case 31: + break; + default: + emit_writeword(sl,®_cop2d[copr]); + break; + } +} + +static void c2ls_assemble(int i,struct regstat *i_regs) { int s,tl; int ar; @@ -2579,6 +3227,57 @@ void c2ls_assemble(int i,struct regstat *i_regs) } } +static void cop2_assemble(int i,struct regstat *i_regs) +{ + u_int copr=(source[i]>>11)&0x1f; + signed char temp=get_reg(i_regs->regmap,-1); + if (opcode2[i]==0) { // MFC2 + signed char tl=get_reg(i_regs->regmap,rt1[i]); + if(tl>=0&&rt1[i]!=0) + cop2_get_dreg(copr,tl,temp); + } + else if (opcode2[i]==4) { // MTC2 + signed char sl=get_reg(i_regs->regmap,rs1[i]); + cop2_put_dreg(copr,sl,temp); + } + else if (opcode2[i]==2) // CFC2 + { + signed char tl=get_reg(i_regs->regmap,rt1[i]); + if(tl>=0&&rt1[i]!=0) + emit_readword(®_cop2c[copr],tl); + } + else if (opcode2[i]==6) // CTC2 + { + signed char sl=get_reg(i_regs->regmap,rs1[i]); + switch(copr) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + emit_signextend16(sl,temp); + break; + case 31: + //value = value & 0x7ffff000; + //if (value & 0x7f87e000) value |= 0x80000000; + emit_shrimm(sl,12,temp); + emit_shlimm(temp,12,temp); + emit_testimm(temp,0x7f000000); + emit_testeqimm(temp,0x00870000); + emit_testeqimm(temp,0x0000e000); + emit_orrne_imm(temp,0x80000000,temp); + break; + default: + temp=sl; + break; + } + emit_writeword(temp,®_cop2c[copr]); + assert(sl>=0); + } +} + #ifndef multdiv_assemble void multdiv_assemble(int i,struct regstat *i_regs) { @@ -2648,6 +3347,98 @@ void intcall_assemble(int i,struct regstat *i_regs) emit_jmp(jump_intcall); } +static void speculate_mov(int rs,int rt) +{ + if(rt!=0) { + smrv_strong_next|=1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); + else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]); + else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); + else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]); + else { + smrv_strong_next&=~(1<=0) { + if(get_final_value(hr,i,&value)) + smrv[rt1[i]]=value; + else smrv[rt1[i]]=constmap[i][hr]; + smrv_strong_next|=1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); + else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); + } + break; + case LOAD: + if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) { + // special case for BIOS + smrv[rt1[i]]=0xa0000000; + smrv_strong_next|=1<>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst); +#endif +} + void ds_assemble(int i,struct regstat *i_regs) { speculate_register_values(i); From be516ebe45e48044b599e9d9f9f2d296c3f3ee62 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 2 Nov 2021 23:08:49 +0000 Subject: [PATCH 056/597] drc: starting arm64 support mostly stubs only for now --- Makefile | 9 +- libpcsxcore/new_dynarec/assem_arm.c | 6 +- libpcsxcore/new_dynarec/assem_arm64.c | 1158 ++++++++++++++++++ libpcsxcore/new_dynarec/assem_arm64.h | 46 + libpcsxcore/new_dynarec/emu_if.c | 4 +- libpcsxcore/new_dynarec/emu_if.h | 2 + libpcsxcore/new_dynarec/linkage_arm.S | 2 +- libpcsxcore/new_dynarec/linkage_arm64.S | 267 ++++ libpcsxcore/new_dynarec/new_dynarec.c | 51 +- libpcsxcore/new_dynarec/new_dynarec.h | 2 +- libpcsxcore/new_dynarec/new_dynarec_config.h | 4 +- libpcsxcore/new_dynarec/pcsxmem.c | 32 +- 12 files changed, 1542 insertions(+), 41 deletions(-) create mode 100644 libpcsxcore/new_dynarec/assem_arm64.c create mode 100644 libpcsxcore/new_dynarec/assem_arm64.h create mode 100644 libpcsxcore/new_dynarec/linkage_arm64.S diff --git a/Makefile b/Makefile index f8d1dc17d..3f4b87df9 100644 --- a/Makefile +++ b/Makefile @@ -56,8 +56,15 @@ libpcsxcore/psxbios.o: CFLAGS += -Wno-nonnull # dynarec ifeq "$(USE_DYNAREC)" "1" -OBJS += libpcsxcore/new_dynarec/new_dynarec.o libpcsxcore/new_dynarec/linkage_arm.o +OBJS += libpcsxcore/new_dynarec/new_dynarec.o OBJS += libpcsxcore/new_dynarec/pcsxmem.o + ifeq "$(ARCH)" "arm" + OBJS += libpcsxcore/new_dynarec/linkage_arm.o + else ifeq "$(ARCH)" "aarch64" + OBJS += libpcsxcore/new_dynarec/linkage_arm64.o + else + $(error no dynarec support for architecture $(ARCH)) + endif else CFLAGS += -DDRC_DISABLE endif diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 4953faa03..d0b1bd794 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -55,7 +55,6 @@ extern int pcaddr; extern int pending_exception; extern int branch_target; extern uint64_t readmem_dword; -extern void *dynarec_local; extern u_int mini_ht[32][2]; void indirect_jump_indexed(); @@ -1141,6 +1140,11 @@ static void emit_jmpreg(u_int r) output_w32(0xe1a00000|rd_rn_rm(15,0,r)); } +static void emit_ret(void) +{ + emit_jmpreg(14); +} + static void emit_readword_indexed(int offset, int rs, int rt) { assert(offset>-4096&&offset<4096); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c new file mode 100644 index 000000000..f9c837eda --- /dev/null +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -0,0 +1,1158 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus/PCSX - assem_arm64.c * + * Copyright (C) 2009-2011 Ari64 * + * Copyright (C) 2010-2021 notaz * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include "arm_features.h" + +#if defined(BASE_ADDR_FIXED) +#elif defined(BASE_ADDR_DYNAMIC) +u_char *translation_cache; +#else +u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); +#endif + +#define CALLER_SAVE_REGS 0x0007ffff + +#define unused __attribute__((unused)) + +extern int cycle_count; +extern int last_count; +extern int pcaddr; +extern int pending_exception; +extern int branch_target; +extern u_int mini_ht[32][2]; + +static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; + +//void indirect_jump_indexed(); +//void indirect_jump(); +void do_interrupt(); +//void jump_vaddr_r0(); + +void * const jump_vaddr_reg[32]; + +/* Linker */ + +static void set_jump_target(void *addr, void *target_) +{ + assert(0); +} + +// from a pointer to external jump stub (which was produced by emit_extjump2) +// find where the jumping insn is +static void *find_extjump_insn(void *stub) +{ + assert(0); + return NULL; +} + +// find where external branch is liked to using addr of it's stub: +// get address that insn one after stub loads (dyna_linker arg1), +// treat it as a pointer to branch insn, +// return addr where that branch jumps to +static void *get_pointer(void *stub) +{ + //printf("get_pointer(%x)\n",(int)stub); + assert(0); + return NULL; +} + +// Find the "clean" entry point from a "dirty" entry point +// by skipping past the call to verify_code +static void *get_clean_addr(void *addr) +{ + assert(0); + return NULL; +} + +static int verify_dirty(u_int *ptr) +{ + assert(0); + return 0; +} + +// This doesn't necessarily find all clean entry points, just +// guarantees that it's not dirty +static int isclean(void *addr) +{ + assert(0); + return 0; +} + +// get source that block at addr was compiled from (host pointers) +static void get_bounds(void *addr, u_char **start, u_char **end) +{ + assert(0); +} + +// Allocate a specific ARM register. +static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr) +{ + int n; + int dirty=0; + + // see if it's already allocated (and dealloc it) + for(n=0;nregmap[n]==reg) { + dirty=(cur->dirty>>n)&1; + cur->regmap[n]=-1; + } + } + + cur->regmap[hr]=reg; + cur->dirty&=~(1<dirty|=dirty<isconst&=~(1< 134217727) { + if ((uintptr_t)addr > 2) { + SysPrintf("%s: out of range: %08x\n", __func__, offset); + exit(1); + } + return 0; + } + return ((u_int)offset >> 2) & 0x01ffffff; +} + +static u_int genjmpcc(u_char *addr) +{ + intptr_t offset = addr - out; + if (offset < -1048576 || offset > 1048572) { + if ((uintptr_t)addr > 2) { + SysPrintf("%s: out of range: %08x\n", __func__, offset); + exit(1); + } + return 0; + } + return ((u_int)offset >> 2) & 0xfffff; +} + +static void emit_mov(u_int rs, u_int rt) +{ + assem_debug("mov %s,%s\n",regname[rt],regname[rs]); + assert(0); +} + +static void emit_movs(u_int rs, u_int rt) +{ + assem_debug("mov %s,%s\n",regname[rt],regname[rs]); + assert(0); +} + +static void emit_add(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_sbc(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_neg(u_int rs, u_int rt) +{ + assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]); + assert(0); +} + +static void emit_negs(u_int rs, u_int rt) +{ + assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]); + assert(0); +} + +static void emit_sub(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_subs(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_zeroreg(u_int rt) +{ + assem_debug("mov %s,#0\n",regname[rt]); + assert(0); +} + +static void emit_movw(u_int imm,u_int rt) +{ + assert(imm<65536); + assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm); + assert(0); +} + +static void emit_movt(u_int imm,u_int rt) +{ + assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000); + assert(0); +} + +static void emit_movimm(u_int imm, u_int rt) +{ + assem_debug("mov %s,#%x\n", regname[rt], imm); + if ((imm & 0xffff0000) == 0) + output_w32(0x52800000 | imm16_rd(imm, rt)); + else if ((imm & 0xffff0000) == 0xffff0000) + assert(0); + else { + output_w32(0x52800000 | imm16_rd(imm & 0xffff, rt)); + output_w32(0x72a00000 | imm16_rd(imm >> 16, rt)); + } +} + +static void emit_loadreg(u_int r, u_int hr) +{ + assert(r < 64); + if (r == 0) + emit_zeroreg(hr); + else { + void *addr = ®[r]; + switch (r) { + case HIREG: addr = &hi; break; + case LOREG: addr = &lo; break; + case CCREG: addr = &cycle_count; break; + case CSREG: addr = &Status; break; + case INVCP: addr = &invc_ptr; break; + default: assert(r < 32); break; + } + uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; + assert(offset < 4096); + assem_debug("ldr %s,fp+%lx\n", regname[hr], offset); + assert(0); + } +} + +static void emit_storereg(u_int r, int hr) +{ + assert(r < 64); + void *addr = ®[r]; + switch (r) { + case HIREG: addr = &hi; break; + case LOREG: addr = &lo; break; + case CCREG: addr = &cycle_count; break; + default: assert(r < 32); break; + } + uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; + assert(offset < 4096); + assem_debug("str %s,fp+%lx\n", regname[hr], offset); + assert(0); +} + +static void emit_test(u_int rs, u_int rt) +{ + assem_debug("tst %s,%s\n",regname[rs],regname[rt]); + assert(0); +} + +static void emit_testimm(u_int rs,int imm) +{ + assem_debug("tst %s,#%d\n",regname[rs],imm); + assert(0); +} + +static void emit_testeqimm(u_int rs,int imm) +{ + assem_debug("tsteq %s,$%d\n",regname[rs],imm); + assert(0); +} + +static void emit_not(u_int rs,u_int rt) +{ + assem_debug("mvn %s,%s\n",regname[rt],regname[rs]); + assert(0); +} + +static void emit_mvnmi(u_int rs,u_int rt) +{ + assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]); + assert(0); +} + +static void emit_and(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_or(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt) +{ + assert(rs < 31); + assert(rt < 31); + assert(imm < 32); + assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm); + assert(0); +} + +static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) +{ + assert(rs < 31); + assert(rt < 31); + assert(imm < 32); + assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm); + assert(0); +} + +static void emit_or_and_set_flags(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_xor(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_addimm(u_int rs, uintptr_t imm, u_int rt) +{ + assert(rs < 31); + assert(rt < 31); + assert(0); +} + +static void emit_addimm_and_set_flags(int imm, u_int rt) +{ + assert(0); +} + +static void emit_addimm_no_flags(u_int imm,u_int rt) +{ + emit_addimm(rt,imm,rt); +} + +static void emit_addnop(u_int r) +{ + assert(r<16); + assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]); + assert(0); +} + +static void emit_adcimm(u_int rs,int imm,u_int rt) +{ + assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm); + assert(0); +} + +static void emit_rscimm(u_int rs,int imm,u_int rt) +{ + assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm); + assert(0); +} + +static void emit_addimm64_32(u_int rsh,u_int rsl,int imm,u_int rth,u_int rtl) +{ + assert(0); +} + +static void emit_andimm(u_int rs,int imm,u_int rt) +{ + assert(0); +} + +static void emit_orimm(u_int rs,int imm,u_int rt) +{ + assert(0); +} + +static void emit_xorimm(u_int rs,int imm,u_int rt) +{ + assert(0); +} + +static void emit_shlimm(u_int rs,u_int imm,u_int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm); + assert(0); +} + +static void emit_lsls_imm(u_int rs,int imm,u_int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm); + assert(0); +} + +static unused void emit_lslpls_imm(u_int rs,int imm,u_int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm); + assert(0); +} + +static void emit_shrimm(u_int rs,u_int imm,u_int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm); + assert(0); +} + +static void emit_sarimm(u_int rs,u_int imm,u_int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm); + assert(0); +} + +static void emit_rorimm(u_int rs,u_int imm,u_int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm); + assert(0); +} + +static void emit_signextend16(u_int rs, u_int rt) +{ + assem_debug("sxth %s,%s\n", regname[rt], regname[rs]); + assert(0); +} + +static void emit_shl(u_int rs,u_int shift,u_int rt) +{ + assert(rs < 31); + assert(rt < 31); + assert(shift < 16); + assert(0); +} + +static void emit_shr(u_int rs,u_int shift,u_int rt) +{ + assert(rs < 31); + assert(rt < 31); + assert(shift<16); + assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); + assert(0); +} + +static void emit_sar(u_int rs,u_int shift,u_int rt) +{ + assert(rs < 31); + assert(rt < 31); + assert(shift<16); + assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); + assert(0); +} + +static void emit_orrshl(u_int rs,u_int shift,u_int rt) +{ + assert(rs < 31); + assert(rt < 31); + assert(shift<16); + assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]); + assert(0); +} + +static void emit_orrshr(u_int rs,u_int shift,u_int rt) +{ + assert(rs < 31); + assert(rt < 31); + assert(shift<16); + assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]); + assert(0); +} + +static void emit_cmpimm(u_int rs,int imm) +{ + assert(0); +} + +static void emit_cmovne_imm(int imm,u_int rt) +{ + assem_debug("movne %s,#%d\n",regname[rt],imm); + assert(0); +} + +static void emit_cmovl_imm(int imm,u_int rt) +{ + assem_debug("movlt %s,#%d\n",regname[rt],imm); + assert(0); +} + +static void emit_cmovb_imm(int imm,u_int rt) +{ + assem_debug("movcc %s,#%d\n",regname[rt],imm); + assert(0); +} + +static void emit_cmovs_imm(int imm,u_int rt) +{ + assem_debug("movmi %s,#%d\n",regname[rt],imm); + assert(0); +} + +static void emit_cmovne_reg(u_int rs,u_int rt) +{ + assem_debug("movne %s,%s\n",regname[rt],regname[rs]); + assert(0); +} + +static void emit_cmovl_reg(u_int rs,u_int rt) +{ + assem_debug("movlt %s,%s\n",regname[rt],regname[rs]); + assert(0); +} + +static void emit_cmovs_reg(u_int rs,u_int rt) +{ + assem_debug("movmi %s,%s\n",regname[rt],regname[rs]); + assert(0); +} + +static void emit_slti32(u_int rs,int imm,u_int rt) +{ + if(rs!=rt) emit_zeroreg(rt); + emit_cmpimm(rs,imm); + if(rs==rt) emit_movimm(0,rt); + emit_cmovl_imm(1,rt); +} + +static void emit_sltiu32(u_int rs,int imm,u_int rt) +{ + if(rs!=rt) emit_zeroreg(rt); + emit_cmpimm(rs,imm); + if(rs==rt) emit_movimm(0,rt); + emit_cmovb_imm(1,rt); +} + +static void emit_cmp(u_int rs,u_int rt) +{ + assem_debug("cmp %s,%s\n",regname[rs],regname[rt]); + assert(0); +} + +static void emit_set_gz32(u_int rs, u_int rt) +{ + //assem_debug("set_gz32\n"); + emit_cmpimm(rs,1); + emit_movimm(1,rt); + emit_cmovl_imm(0,rt); +} + +static void emit_set_nz32(u_int rs, u_int rt) +{ + //assem_debug("set_nz32\n"); + assert(0); +} + +static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt) +{ + //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); + if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt); + emit_cmp(rs1,rs2); + if(rs1==rt||rs2==rt) emit_movimm(0,rt); + emit_cmovl_imm(1,rt); +} + +static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt) +{ + //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); + if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt); + emit_cmp(rs1,rs2); + if(rs1==rt||rs2==rt) emit_movimm(0,rt); + emit_cmovb_imm(1,rt); +} + +#pragma GCC diagnostic ignored "-Wunused-variable" +static void emit_call(const void *a_) +{ + uintptr_t a = (uintptr_t)a_; + assem_debug("bl %p (%p+%lx)%s\n", a_, out, (u_char *)a_ - out, func_name(a)); + assert(0); +} + +static void emit_jmp(const void *a_) +{ + uintptr_t a = (uintptr_t)a_; + assem_debug("b %p (%p+%lx)%s\n", a_, out, (u_char *)a_ - out, func_name(a)); + assert(0); +} + +static void emit_jne(const void *a_) +{ + uintptr_t a = (uintptr_t)a_; + assem_debug("bne %p\n", a_); + assert(0); +} + +static void emit_jeq(int a) +{ + assem_debug("beq %x\n",a); + assert(0); +} + +static void emit_js(int a) +{ + assem_debug("bmi %x\n",a); + assert(0); +} + +static void emit_jns(int a) +{ + assem_debug("bpl %x\n",a); + assert(0); +} + +static void emit_jl(int a) +{ + assem_debug("blt %x\n",a); + assert(0); +} + +static void emit_jge(int a) +{ + assem_debug("bge %x\n",a); + assert(0); +} + +static void emit_jno(int a) +{ + assem_debug("bvc %x\n",a); + assert(0); +} + +static void emit_jc(int a) +{ + assem_debug("bcs %x\n",a); + assert(0); +} + +static void emit_jcc(void *a_) +{ + uintptr_t a = (uintptr_t)a_; + assem_debug("bcc %p\n", a_); + assert(0); +} + +static void emit_callreg(u_int r) +{ + assert(r < 31); + assem_debug("blx %s\n", regname[r]); + assert(0); +} + +static void emit_jmpreg(u_int r) +{ + assem_debug("mov pc,%s\n",regname[r]); + assert(0); +} + +static void emit_retreg(u_int r) +{ + assem_debug("ret %s\n", r == LR ? "" : regname[r]); + output_w32(0xd65f0000 | rm_rn_rd(0, r, 0)); +} + +static void emit_ret(void) +{ + emit_retreg(LR); +} + +static void emit_readword_indexed(int offset, u_int rs, u_int rt) +{ + assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset); + assert(0); +} + +static void emit_readword_dualindexedx4(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_ldrcc_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_ldrccb_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_ldrccsb_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_ldrcch_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_ldrccsh_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_movsbl_indexed(int offset, u_int rs, u_int rt) +{ + assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset); + assert(0); +} + +static void emit_movswl_indexed(int offset, u_int rs, u_int rt) +{ + assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset); + assert(0); +} + +static void emit_movzbl_indexed(int offset, u_int rs, u_int rt) +{ + assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset); + assert(0); +} + +static void emit_movzwl_indexed(int offset, u_int rs, u_int rt) +{ + assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset); + assert(0); +} + +static void emit_ldrd(int offset, u_int rs, u_int rt) +{ + assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset); + assert(0); +} + +static void emit_readword(void *addr, u_int rt) +{ + uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; + assert(offset<4096); + assem_debug("ldr %s,fp+%lx\n", regname[rt], offset); + assert(0); +} + +static void emit_writeword_indexed(u_int rt, int offset, u_int rs) +{ + assert(offset>-4096&&offset<4096); + assem_debug("str %s,%s+%x\n",regname[rt],regname[rs],offset); + assert(0); +} + +static void emit_writehword_indexed(u_int rt, int offset, u_int rs) +{ + assert(offset>-256&&offset<256); + assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset); + assert(0); +} + +static void emit_writebyte_indexed(u_int rt, int offset, u_int rs) +{ + assert(offset>-4096&&offset<4096); + assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset); + assert(0); +} + +static void emit_strcc_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_strccb_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_strcch_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assert(0); +} + +static void emit_writeword(u_int rt, void *addr) +{ + uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; + assert(offset<4096); + assem_debug("str %s,fp+%lx\n", regname[rt], offset); + assert(0); +} + +static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) +{ + assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); + assert(rs1<16); + assert(rs2<16); + assert(hi<16); + assert(lo<16); + assert(0); +} + +static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo) +{ + assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); + assert(rs1<16); + assert(rs2<16); + assert(hi<16); + assert(lo<16); + assert(0); +} + +static void emit_clz(u_int rs,u_int rt) +{ + assem_debug("clz %s,%s\n",regname[rt],regname[rs]); + assert(0); +} + +// Load 2 immediates optimizing for small code size +static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) +{ + assert(0); +} + +// Conditionally select one of two immediates, optimizing for small code size +// This will only be called if HAVE_CMOV_IMM is defined +static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) +{ + assert(0); +} + +// special case for checking invalid_code +static void emit_cmpmem_indexedsr12_reg(int base,u_int r,int imm) +{ + assert(imm<128&&imm>=0); + assert(r>=0&&r<16); + assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]); + assert(0); +} + +// Used to preload hash table entries +static unused void emit_prefetchreg(u_int r) +{ + assem_debug("pld %s\n",regname[r]); + assert(0); +} + +// Special case for mini_ht +static void emit_ldreq_indexed(u_int rs, u_int offset, u_int rt) +{ + assert(offset<4096); + assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset); + assert(0); +} + +static void emit_orrne_imm(u_int rs,int imm,u_int rt) +{ + assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm); + assert(0); +} + +static void emit_andne_imm(u_int rs,int imm,u_int rt) +{ + assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm); + assert(0); +} + +static unused void emit_addpl_imm(u_int rs,int imm,u_int rt) +{ + assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm); + assert(0); +} + +static void save_regs_all(u_int reglist) +{ + if(!reglist) return; + assert(0); +} + +static void restore_regs_all(u_int reglist) +{ + if(!reglist) return; + assert(0); +} + +// Save registers before function call +static void save_regs(u_int reglist) +{ + reglist &= CALLER_SAVE_REGS; // only save the caller-save registers + save_regs_all(reglist); +} + +// Restore registers after function call +static void restore_regs(u_int reglist) +{ + reglist &= CALLER_SAVE_REGS; + restore_regs_all(reglist); +} + +/* Stubs/epilogue */ + +static void literal_pool(int n) +{ + (void)literals; +} + +static void literal_pool_jumpover(int n) +{ +} + +static void emit_extjump2(u_char *addr, int target, void *linker) +{ + assert(0); +} + +static void emit_extjump(void *addr, int target) +{ + emit_extjump2(addr, target, dyna_linker); +} + +static void emit_extjump_ds(void *addr, int target) +{ + emit_extjump2(addr, target, dyna_linker_ds); +} + +// put rt_val into rt, potentially making use of rs with value rs_val +static void emit_movimm_from(u_int rs_val,u_int rs,u_int rt_val,u_int rt) +{ + assert(0); +} + +// return 1 if above function can do it's job cheaply +static int is_similar_value(u_int v1,u_int v2) +{ + assert(0); + return 0; +} + +//#include "pcsxmem.h" +//#include "pcsxmem_inline.c" + +static void do_readstub(int n) +{ + assem_debug("do_readstub %x\n",start+stubs[n].a*4); + assert(0); +} + +static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) +{ + assert(0); +} + +static void do_writestub(int n) +{ + assem_debug("do_writestub %x\n",start+stubs[n].a*4); + assert(0); +} + +static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) +{ + assert(0); +} + +static void do_unalignedwritestub(int n) +{ + assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4); + assert(0); +} + +static void do_invstub(int n) +{ + assert(0); +} + +void *do_dirty_stub(int i) +{ + assem_debug("do_dirty_stub %x\n",start+i*4); + // Careful about the code output here, verify_dirty needs to parse it. + assert(0); + load_regs_entry(i); + return NULL; +} + +static void do_dirty_stub_ds() +{ + // Careful about the code output here, verify_dirty needs to parse it. + assert(0); +} + +/* Special assem */ + +#define shift_assemble shift_assemble_arm64 + +static void shift_assemble_arm64(int i,struct regstat *i_regs) +{ + assert(0); +} +#define loadlr_assemble loadlr_assemble_arm64 + +static void loadlr_assemble_arm64(int i,struct regstat *i_regs) +{ + assert(0); +} + +static void c2op_assemble(int i,struct regstat *i_regs) +{ + assert(0); +} + +static void multdiv_assemble_arm64(int i,struct regstat *i_regs) +{ + assert(0); +} +#define multdiv_assemble multdiv_assemble_arm64 + +static void do_preload_rhash(u_int r) { + // Don't need this for ARM. On x86, this puts the value 0xf8 into the + // register. On ARM the hash can be done with a single instruction (below) +} + +static void do_preload_rhtbl(u_int ht) { + emit_addimm(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht); +} + +static void do_rhash(u_int rs,u_int rh) { + emit_andimm(rs, 0xf8, rh); +} + +static void do_miniht_load(int ht,u_int rh) { + assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]); + assert(0); +} + +static void do_miniht_jump(u_int rs,u_int rh,int ht) { + emit_cmp(rh,rs); + emit_ldreq_indexed(ht,4,15); + //emit_jmp(jump_vaddr_reg[rs]); + assert(0); +} + +static void do_miniht_insert(u_int return_address,u_int rt,int temp) { + assert(0); +} + +static void mark_clear_cache(void *target) +{ + u_long offset = (u_char *)target - translation_cache; + u_int mask = 1u << ((offset >> 12) & 31); + if (!(needs_clear_cache[offset >> 17] & mask)) { + char *start = (char *)((u_long)target & ~4095ul); + start_tcache_write(start, start + 4096); + needs_clear_cache[offset >> 17] |= mask; + } +} + +// Clearing the cache is rather slow on ARM Linux, so mark the areas +// that need to be cleared, and then only clear these areas once. +static void do_clear_cache() +{ + int i,j; + for (i=0;i<(1<<(TARGET_SIZE_2-17));i++) + { + u_int bitmap=needs_clear_cache[i]; + if(bitmap) { + u_char *start, *end; + for(j=0;j<32;j++) + { + if(bitmap&(1<%u (%d)\n", psxRegs.pc, psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); - new_dyna_start(); + new_dyna_start(dynarec_local); evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); @@ -446,7 +446,7 @@ unsigned char *out; void *mem_rtab; void *scratch_buf_ptr; void new_dynarec_init() {} -void new_dyna_start() {} +void new_dyna_start(void *context) {} void new_dynarec_cleanup() {} void new_dynarec_clear_full() {} void invalidate_all_pages() {} diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index c18a64489..5783ad3f4 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -7,6 +7,8 @@ extern char invalid_code[0x100000]; #define EAX 0 #define ECX 1 +extern int dynarec_local[]; + /* same as psxRegs */ extern int reg[]; diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 5f2b6eea7..84a017817 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -655,7 +655,7 @@ invalidate_addr_call: FUNCTION(new_dyna_start): /* ip is stored to conform EABI alignment */ stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr} - load_varadr fp, dynarec_local + mov fp, r0 /* dynarec_local */ ldr r0, [fp, #LO_pcaddr] bl get_addr_ht ldr r1, [fp, #LO_next_interupt] diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S new file mode 100644 index 000000000..122078791 --- /dev/null +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -0,0 +1,267 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * linkage_arm.s for PCSX * + * Copyright (C) 2009-2011 Ari64 * + * Copyright (C) 2021 notaz * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include "arm_features.h" +#include "new_dynarec_config.h" +#include "assem_arm64.h" +#include "linkage_offsets.h" + +.bss + .align 4 + .global dynarec_local + .type dynarec_local, %object + .size dynarec_local, LO_dynarec_local_size +dynarec_local: + .space LO_dynarec_local_size + +#define DRC_VAR_(name, vname, size_) \ + vname = dynarec_local + LO_##name; \ + .global vname; \ + .type vname, %object; \ + .size vname, size_ + +#define DRC_VAR(name, size_) \ + DRC_VAR_(name, ESYM(name), size_) + +DRC_VAR(next_interupt, 4) +DRC_VAR(cycle_count, 4) +DRC_VAR(last_count, 4) +DRC_VAR(pending_exception, 4) +DRC_VAR(stop, 4) +DRC_VAR(invc_ptr, 4) +DRC_VAR(address, 4) +DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) + +/* psxRegs */ +DRC_VAR(reg, 128) +DRC_VAR(lo, 4) +DRC_VAR(hi, 4) +DRC_VAR(reg_cop0, 128) +DRC_VAR(reg_cop2d, 128) +DRC_VAR(reg_cop2c, 128) +DRC_VAR(pcaddr, 4) +#DRC_VAR(code, 4) +#DRC_VAR(cycle, 4) +#DRC_VAR(interrupt, 4) +#DRC_VAR(intCycle, 256) + +DRC_VAR(rcnts, 7*4*4) +DRC_VAR(mem_rtab, 4) +DRC_VAR(mem_wtab, 4) +DRC_VAR(psxH_ptr, 4) +DRC_VAR(zeromem_ptr, 4) +DRC_VAR(inv_code_start, 4) +DRC_VAR(inv_code_end, 4) +DRC_VAR(branch_target, 4) +DRC_VAR(scratch_buf_ptr, 4) +#DRC_VAR(align0, 12) /* unused/alignment */ +DRC_VAR(mini_ht, 256) +DRC_VAR(restore_candidate, 512) + + + .text + .align 2 + +/* r0 = virtual target address */ +/* r1 = instruction to patch */ +.macro dyna_linker_main + /* XXX: should be able to do better than this... */ + bl get_addr_ht + br x0 +.endm + + +FUNCTION(dyna_linker): + /* r0 = virtual target address */ + /* r1 = instruction to patch */ + dyna_linker_main + .size dyna_linker, .-dyna_linker + +FUNCTION(exec_pagefault): + /* r0 = instruction pointer */ + /* r1 = fault address */ + /* r2 = cause */ + bl abort + .size exec_pagefault, .-exec_pagefault + +/* Special dynamic linker for the case where a page fault + may occur in a branch delay slot */ +FUNCTION(dyna_linker_ds): + /* r0 = virtual target address */ + /* r1 = instruction to patch */ + dyna_linker_main + .size dyna_linker_ds, .-dyna_linker_ds + + .align 2 + +FUNCTION(jump_vaddr): + bl abort + .size jump_vaddr, .-jump_vaddr + + .align 2 + +FUNCTION(verify_code_ds): + bl abort +FUNCTION(verify_code_vm): +FUNCTION(verify_code): + /* r1 = source */ + /* r2 = target */ + /* r3 = length */ + bl abort + .size verify_code, .-verify_code + .size verify_code_vm, .-verify_code_vm + + .align 2 +FUNCTION(cc_interrupt): + bl abort + .size cc_interrupt, .-cc_interrupt + + .align 2 +FUNCTION(do_interrupt): + bl abort + .size do_interrupt, .-do_interrupt + + .align 2 +FUNCTION(fp_exception): + mov w2, #0x10000000 +0: + ldr w1, [fp, #LO_reg_cop0+48] /* Status */ + mov w3, #0x80000000 + str w0, [fp, #LO_reg_cop0+56] /* EPC */ + orr w1, w1, #2 + add w2, w2, #0x2c + str w1, [fp, #LO_reg_cop0+48] /* Status */ + str w2, [fp, #LO_reg_cop0+52] /* Cause */ + add w0, w3, #0x80 + bl get_addr_ht + br x0 + .size fp_exception, .-fp_exception + .align 2 +FUNCTION(fp_exception_ds): + mov w2, #0x90000000 /* Set high bit if delay slot */ + b 0b + .size fp_exception_ds, .-fp_exception_ds + + .align 2 +FUNCTION(jump_syscall): + ldr w1, [fp, #LO_reg_cop0+48] /* Status */ + mov w3, #0x80000000 + str w0, [fp, #LO_reg_cop0+56] /* EPC */ + orr w1, w1, #2 + mov w2, #0x20 + str w1, [fp, #LO_reg_cop0+48] /* Status */ + str w2, [fp, #LO_reg_cop0+52] /* Cause */ + add w0, w3, #0x80 + bl get_addr_ht + br x0 + .size jump_syscall, .-jump_syscall + .align 2 + + .align 2 +FUNCTION(jump_syscall_hle): + bl abort + + /* note: psxException might do recursive recompiler call from it's HLE code, + * so be ready for this */ +pcsx_return: + bl abort // w10 + ldr w1, [fp, #LO_next_interupt] + ldr w10, [fp, #LO_cycle] + ldr w0, [fp, #LO_pcaddr] + sub w10, w10, w1 + str w1, [fp, #LO_last_count] + bl get_addr_ht + br x0 + .size jump_syscall_hle, .-jump_syscall_hle + + .align 2 +FUNCTION(jump_hlecall): + bl abort + .size jump_hlecall, .-jump_hlecall + + .align 2 +FUNCTION(jump_intcall): + bl abort + .size jump_intcall, .-jump_intcall + + .align 2 +FUNCTION(new_dyna_start): + stp x29, x30, [sp, #-96]! // must be aligned by 16 + ldr w1, [x0, #LO_next_interupt] + ldr w2, [x0, #LO_cycle] + stp x19, x20, [sp, #16*1] + stp x21, x22, [sp, #16*2] + stp x23, x24, [sp, #16*3] + stp x25, x26, [sp, #16*4] + stp x27, x28, [sp, #16*5] + mov rFP, x0 + ldr w0, [rFP, #LO_pcaddr] + str w1, [rFP, #LO_last_count] + sub rCC, w2, w1 + bl get_addr_ht + br x0 + .size new_dyna_start, .-new_dyna_start + + .align 2 +FUNCTION(new_dyna_leave): + ldr w0, [rFP, #LO_last_count] + add rCC, rCC, w0 + str rCC, [rFP, #LO_cycle] + ldp x19, x20, [sp, #16*1] + ldp x21, x22, [sp, #16*2] + ldp x23, x24, [sp, #16*3] + ldp x25, x26, [sp, #16*4] + ldp x27, x28, [sp, #16*5] + ldp x29, x30, [sp], #96 + ret + .size new_dyna_leave, .-new_dyna_leave + +/* --------------------------------------- */ + +.align 2 + +FUNCTION(jump_handler_read8): + bl abort + +FUNCTION(jump_handler_read16): + bl abort + +FUNCTION(jump_handler_read32): + bl abort + +FUNCTION(jump_handler_write8): + bl abort + +FUNCTION(jump_handler_write16): + bl abort + +FUNCTION(jump_handler_write32): + bl abort + +FUNCTION(jump_handler_write_h): + bl abort + +FUNCTION(jump_handle_swl): + bl abort + +FUNCTION(jump_handle_swr): + bl abort + diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 987892f0b..0809a4aa4 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -57,6 +57,9 @@ static int sceBlock; #ifdef __arm__ #include "assem_arm.h" #endif +#ifdef __aarch64__ +#include "assem_arm64.h" +#endif #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 @@ -338,6 +341,8 @@ static void end_tcache_write(void *start, void *end) __clear_cache(start, end); #endif (void)len; +#else + __clear_cache(start, end); #endif mprotect_w_x(start, end, 1); @@ -834,6 +839,9 @@ static const char *func_name(intptr_t a) #ifdef __arm__ #include "assem_arm.c" #endif +#ifdef __aarch64__ +#include "assem_arm64.c" +#endif // Add virtual address mapping to linked list void ll_add(struct ll_entry **head,int vaddr,void *addr) @@ -1015,7 +1023,7 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) for(first=page+1;first>21)&0x1f; - //if (op2 & 0x10) { + //if (op2 & 0x10) if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns if (gte_handlers[source[i]&0x3f]!=NULL) { if (gte_regnames[source[i]&0x3f]!=NULL) @@ -8893,7 +8908,7 @@ int new_recompile_block(int addr) break; case 3: // Clear jump_out - #ifdef __arm__ + #if defined(__arm__) || defined(__aarch64__) if((expirep&2047)==0) do_clear_cache(); #endif diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index ddc84a5a3..1bec5e1d5 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -14,7 +14,7 @@ extern int new_dynarec_hacks; void new_dynarec_init(); void new_dynarec_cleanup(); void new_dynarec_clear_full(); -void new_dyna_start(); +void new_dyna_start(void *context); int new_dynarec_save_blocks(void *save, int size); void new_dynarec_load_blocks(const void *save, int size); diff --git a/libpcsxcore/new_dynarec/new_dynarec_config.h b/libpcsxcore/new_dynarec/new_dynarec_config.h index fbd08ac23..321bfbf38 100644 --- a/libpcsxcore/new_dynarec/new_dynarec_config.h +++ b/libpcsxcore/new_dynarec/new_dynarec_config.h @@ -1,6 +1,8 @@ - +#ifdef __arm__ #define CORTEX_A8_BRANCH_PREDICTION_HACK 1 +#endif + #define USE_MINI_HT 1 //#define REG_PREFETCH 1 diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index 9376ff47a..bb471b6a9 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -22,27 +22,27 @@ //#define memprintf printf #define memprintf(...) -static u32 *mem_readtab; -static u32 *mem_writetab; -static u32 mem_iortab[(1+2+4) * 0x1000 / 4]; -static u32 mem_iowtab[(1+2+4) * 0x1000 / 4]; -static u32 mem_ffwtab[(1+2+4) * 0x1000 / 4]; -//static u32 mem_unmrtab[(1+2+4) * 0x1000 / 4]; -static u32 mem_unmwtab[(1+2+4) * 0x1000 / 4]; - -// When this is called in a loop, and 'h' is a function pointer, clang will crash. +static uintptr_t *mem_readtab; +static uintptr_t *mem_writetab; +static uintptr_t mem_iortab[(1+2+4) * 0x1000 / 4]; +static uintptr_t mem_iowtab[(1+2+4) * 0x1000 / 4]; +static uintptr_t mem_ffwtab[(1+2+4) * 0x1000 / 4]; +//static uintptr_t mem_unmrtab[(1+2+4) * 0x1000 / 4]; +static uintptr_t mem_unmwtab[(1+2+4) * 0x1000 / 4]; + +static #ifdef __clang__ -static __attribute__ ((noinline)) void map_item(u32 *out, const void *h, u32 flag) -#else -static void map_item(u32 *out, const void *h, u32 flag) +// When this is called in a loop, and 'h' is a function pointer, clang will crash. +__attribute__ ((noinline)) #endif +void map_item(uintptr_t *out, const void *h, uintptr_t flag) { - u32 hv = (u32)h; + uintptr_t hv = (uintptr_t)h; if (hv & 1) { SysPrintf("FATAL: %p has LSB set\n", h); abort(); } - *out = (hv >> 1) | (flag << 31); + *out = (hv >> 1) | (flag << (sizeof(hv) * 8 - 1)); } // size must be power of 2, at least 4k @@ -90,7 +90,7 @@ static void io_write_sio32(u32 value) sioWrite8((unsigned char)(value >> 24)); } -#ifndef DRC_DBG +#if !defined(DRC_DBG) && defined(__arm__) static void map_rcnt_rcount0(u32 mode) { @@ -306,7 +306,7 @@ void new_dyna_pcsx_mem_init(void) int i; // have to map these further to keep tcache close to .text - mem_readtab = psxMap(0x08000000, 0x200000 * 4, 0, MAP_TAG_LUTS); + mem_readtab = psxMap(0x08000000, 0x200000 * sizeof(mem_readtab[0]), 0, MAP_TAG_LUTS); if (mem_readtab == NULL) { SysPrintf("failed to map mem tables\n"); exit(1); From 687b45804b5c028dd5644bda85981c0235eb4d32 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 3 Nov 2021 23:37:59 +0000 Subject: [PATCH 057/597] drc: arm64 wip --- Makefile | 5 +- libpcsxcore/new_dynarec/assem_arm.c | 57 +--- libpcsxcore/new_dynarec/assem_arm64.c | 364 ++++++++++++---------- libpcsxcore/new_dynarec/assem_arm64.h | 7 + libpcsxcore/new_dynarec/linkage_arm.S | 19 +- libpcsxcore/new_dynarec/linkage_arm64.S | 22 +- libpcsxcore/new_dynarec/linkage_offsets.h | 27 +- libpcsxcore/new_dynarec/new_dynarec.c | 69 +++- 8 files changed, 314 insertions(+), 256 deletions(-) diff --git a/Makefile b/Makefile index 3f4b87df9..db5ab2cf1 100644 --- a/Makefile +++ b/Makefile @@ -60,8 +60,10 @@ OBJS += libpcsxcore/new_dynarec/new_dynarec.o OBJS += libpcsxcore/new_dynarec/pcsxmem.o ifeq "$(ARCH)" "arm" OBJS += libpcsxcore/new_dynarec/linkage_arm.o + libpcsxcore/new_dynarec/new_dynarec.o: libpcsxcore/new_dynarec/assem_arm.c else ifeq "$(ARCH)" "aarch64" OBJS += libpcsxcore/new_dynarec/linkage_arm64.o + libpcsxcore/new_dynarec/new_dynarec.o: libpcsxcore/new_dynarec/assem_arm64.c else $(error no dynarec support for architecture $(ARCH)) endif @@ -69,8 +71,7 @@ else CFLAGS += -DDRC_DISABLE endif OBJS += libpcsxcore/new_dynarec/emu_if.o -libpcsxcore/new_dynarec/new_dynarec.o: libpcsxcore/new_dynarec/assem_arm.c \ - libpcsxcore/new_dynarec/pcsxmem_inline.c +libpcsxcore/new_dynarec/new_dynarec.o: libpcsxcore/new_dynarec/pcsxmem_inline.c ifdef DRC_DBG libpcsxcore/new_dynarec/emu_if.o: CFLAGS += -D_FILE_OFFSET_BITS=64 CFLAGS += -DDRC_DBG diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index d0b1bd794..a80b0464f 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -49,14 +49,6 @@ u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); #pragma GCC diagnostic ignored "-Wunused-but-set-variable" #endif -extern int cycle_count; -extern int last_count; -extern int pcaddr; -extern int pending_exception; -extern int branch_target; -extern uint64_t readmem_dword; -extern u_int mini_ht[32][2]; - void indirect_jump_indexed(); void indirect_jump(); void do_interrupt(); @@ -1674,23 +1666,6 @@ static int is_similar_value(u_int v1,u_int v2) return 0; } -// trashes r2 -static void pass_args(int a0, int a1) -{ - if(a0==1&&a1==0) { - // must swap - emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0); - } - else if(a0!=0&&a1==0) { - emit_mov(a1,1); - if (a0>=0) emit_mov(a0,0); - } - else { - if(a0>=0&&a0!=0) emit_mov(a0,0); - if(a1>=0&&a1!=1) emit_mov(a1,1); - } -} - static void mov_loadtype_adj(enum stub_type type,int rs,int rt) { switch(type) { @@ -1787,40 +1762,14 @@ static void do_readstub(int n) emit_jmp(stubs[n].retaddr); // return address } -// return memhandler, or get directly accessable address and return 0 -static void *get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host) -{ - u_int l1,l2=0; - l1=((u_int *)table)[addr>>12]; - if((l1&(1<<31))==0) { - u_int v=l1<<1; - *addr_host=v+addr; - return NULL; - } - else { - l1<<=1; - if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB) - l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)]; - else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB) - l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2]; - else - l2=((u_int *)l1)[(addr&0xfff)/4]; - if((l2&(1<<31))==0) { - u_int v=l2<<1; - *addr_host=v+(addr&0xfff); - return NULL; - } - return (void *)(l2<<1); - } -} - static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { int rs=get_reg(regmap,target); int rt=get_reg(regmap,target); if(rs<0) rs=get_reg(regmap,-1); assert(rs>=0); - u_int host_addr=0,is_dynamic,far_call=0; + u_int is_dynamic,far_call=0; + uintptr_t host_addr = 0; void *handler; int cc=get_reg(regmap,CCREG); if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt)) @@ -1983,7 +1932,7 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, signed char int rt=get_reg(regmap,target); assert(rs>=0); assert(rt>=0); - u_int host_addr=0; + uintptr_t host_addr = 0; void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr); if (handler == NULL) { if(addr!=host_addr) diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index f9c837eda..df5b5aaa2 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -32,13 +32,6 @@ u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); #define unused __attribute__((unused)) -extern int cycle_count; -extern int last_count; -extern int pcaddr; -extern int pending_exception; -extern int branch_target; -extern u_int mini_ht[32][2]; - static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; //void indirect_jump_indexed(); @@ -141,13 +134,19 @@ static unused const char *regname[32] = { "r24", "r25", "r26", "r27", "r28", "fp", "lr", "sp" }; -#pragma GCC diagnostic ignored "-Wunused-function" static void output_w32(u_int word) { *((u_int *)out) = word; out += 4; } +static u_int rm_rd(u_int rm, u_int rd) +{ + assert(rm < 31); + assert(rd < 31); + return (rm << 16) | rd; +} + static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd) { assert(rm < 31); @@ -156,6 +155,12 @@ static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd) return (rm << 16) | (rn << 5) | rd; } +static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd) +{ + assert(imm6 <= 63); + return rm_rn_rd(rm, rn, rd) | (imm6 << 10); +} + static u_int imm16_rd(u_int imm16, u_int rd) { assert(imm16 < 0x10000); @@ -163,6 +168,15 @@ static u_int imm16_rd(u_int imm16, u_int rd) return (imm16 << 5) | rd; } +static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd) +{ + assert(imm12 < 0x1000); + assert(rn < 31); + assert(rd < 31); + return (imm12 << 10) | (rn << 5) | rd; +} + +#pragma GCC diagnostic ignored "-Wunused-function" static u_int genjmp(u_char *addr) { intptr_t offset = addr - out; @@ -191,20 +205,20 @@ static u_int genjmpcc(u_char *addr) static void emit_mov(u_int rs, u_int rt) { - assem_debug("mov %s,%s\n",regname[rt],regname[rs]); - assert(0); + assem_debug("mov %s,%s\n", regname[rt], regname[rs]); + output_w32(0x2a0003e0 | rm_rd(rs, rt)); } static void emit_movs(u_int rs, u_int rt) { - assem_debug("mov %s,%s\n",regname[rt],regname[rs]); - assert(0); + assem_debug("movs %s,%s\n", regname[rt], regname[rs]); + output_w32(0x31000000 | imm12_rn_rd(0, rs, rt)); } -static void emit_add(u_int rs1,u_int rs2,u_int rt) +static void emit_add(u_int rs1, u_int rs2, u_int rt) { - assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); + assem_debug("add %s, %s, %s\n", regname[rt], regname[rs1], regname[rs2]); + output_w32(0x0b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt)); } static void emit_sbc(u_int rs1,u_int rs2,u_int rt) @@ -225,10 +239,10 @@ static void emit_negs(u_int rs, u_int rt) assert(0); } -static void emit_sub(u_int rs1,u_int rs2,u_int rt) +static void emit_sub(u_int rs1, u_int rs2, u_int rt) { - assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); + assem_debug("sub %s, %s, %s\n", regname[rt], regname[rs1], regname[rs2]); + output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt)); } static void emit_subs(u_int rs1,u_int rs2,u_int rt) @@ -243,22 +257,9 @@ static void emit_zeroreg(u_int rt) assert(0); } -static void emit_movw(u_int imm,u_int rt) -{ - assert(imm<65536); - assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm); - assert(0); -} - -static void emit_movt(u_int imm,u_int rt) -{ - assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000); - assert(0); -} - static void emit_movimm(u_int imm, u_int rt) { - assem_debug("mov %s,#%x\n", regname[rt], imm); + assem_debug("mov %s,#%#x\n", regname[rt], imm); if ((imm & 0xffff0000) == 0) output_w32(0x52800000 | imm16_rd(imm, rt)); else if ((imm & 0xffff0000) == 0xffff0000) @@ -269,6 +270,17 @@ static void emit_movimm(u_int imm, u_int rt) } } +static void emit_readword(void *addr, u_int rt) +{ + uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; + if (!(offset & 3) && offset <= 16380) { + assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset); + output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt)); + } + else + assert(0); +} + static void emit_loadreg(u_int r, u_int hr) { assert(r < 64); @@ -284,14 +296,22 @@ static void emit_loadreg(u_int r, u_int hr) case INVCP: addr = &invc_ptr; break; default: assert(r < 32); break; } - uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; - assert(offset < 4096); - assem_debug("ldr %s,fp+%lx\n", regname[hr], offset); - assert(0); + emit_readword(addr, hr); } } -static void emit_storereg(u_int r, int hr) +static void emit_writeword(u_int rt, void *addr) +{ + uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; + if (!(offset & 3) && offset <= 16380) { + assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset); + output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt)); + } + else + assert(0); +} + +static void emit_storereg(u_int r, u_int hr) { assert(r < 64); void *addr = ®[r]; @@ -301,10 +321,7 @@ static void emit_storereg(u_int r, int hr) case CCREG: addr = &cycle_count; break; default: assert(r < 32); break; } - uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; - assert(offset < 4096); - assem_debug("str %s,fp+%lx\n", regname[hr], offset); - assert(0); + emit_writeword(hr, addr); } static void emit_test(u_int rs, u_int rt) @@ -315,7 +332,7 @@ static void emit_test(u_int rs, u_int rt) static void emit_testimm(u_int rs,int imm) { - assem_debug("tst %s,#%d\n",regname[rs],imm); + assem_debug("tst %s,#%#x\n", regname[rs], imm); assert(0); } @@ -381,9 +398,16 @@ static void emit_xor(u_int rs1,u_int rs2,u_int rt) static void emit_addimm(u_int rs, uintptr_t imm, u_int rt) { - assert(rs < 31); - assert(rt < 31); - assert(0); + if (imm < 4096) { + assem_debug("add %s,%s,%#lx\n", regname[rt], regname[rs], imm); + output_w32(0x11000000 | imm12_rn_rd(imm, rs, rt)); + } + else if (-imm < 4096) { + assem_debug("sub %s,%s,%#lx\n", regname[rt], regname[rs], imm); + output_w32(0x51000000 | imm12_rn_rd(imm, rs, rt)); + } + else + assert(0); } static void emit_addimm_and_set_flags(int imm, u_int rt) @@ -396,22 +420,15 @@ static void emit_addimm_no_flags(u_int imm,u_int rt) emit_addimm(rt,imm,rt); } -static void emit_addnop(u_int r) -{ - assert(r<16); - assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]); - assert(0); -} - static void emit_adcimm(u_int rs,int imm,u_int rt) { - assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm); + assem_debug("adc %s,%s,#%#x\n",regname[rt],regname[rs],imm); assert(0); } static void emit_rscimm(u_int rs,int imm,u_int rt) { - assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm); + assem_debug("rsc %s,%s,#%#x\n",regname[rt],regname[rs],imm); assert(0); } @@ -540,25 +557,25 @@ static void emit_cmpimm(u_int rs,int imm) static void emit_cmovne_imm(int imm,u_int rt) { - assem_debug("movne %s,#%d\n",regname[rt],imm); + assem_debug("movne %s,#%#x\n",regname[rt],imm); assert(0); } static void emit_cmovl_imm(int imm,u_int rt) { - assem_debug("movlt %s,#%d\n",regname[rt],imm); + assem_debug("movlt %s,#%#x\n",regname[rt],imm); assert(0); } static void emit_cmovb_imm(int imm,u_int rt) { - assem_debug("movcc %s,#%d\n",regname[rt],imm); + assem_debug("movcc %s,#%#x\n",regname[rt],imm); assert(0); } static void emit_cmovs_imm(int imm,u_int rt) { - assem_debug("movmi %s,#%d\n",regname[rt],imm); + assem_debug("movmi %s,#%#x\n",regname[rt],imm); assert(0); } @@ -634,14 +651,18 @@ static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt) emit_cmovb_imm(1,rt); } -#pragma GCC diagnostic ignored "-Wunused-variable" static void emit_call(const void *a_) { - uintptr_t a = (uintptr_t)a_; - assem_debug("bl %p (%p+%lx)%s\n", a_, out, (u_char *)a_ - out, func_name(a)); - assert(0); + intptr_t diff = (u_char *)a_ - out; + assem_debug("bl %p (%p+%lx)%s\n", a_, out, diff, func_name(a)); + assert(!(diff & 3)); + if (-134217728 <= diff && diff <= 134217727) + output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff)); + else + assert(0); } +#pragma GCC diagnostic ignored "-Wunused-variable" static void emit_jmp(const void *a_) { uintptr_t a = (uintptr_t)a_; @@ -735,42 +756,6 @@ static void emit_readword_indexed(int offset, u_int rs, u_int rt) assert(0); } -static void emit_readword_dualindexedx4(u_int rs1, u_int rs2, u_int rt) -{ - assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); -} - -static void emit_ldrcc_dualindexed(u_int rs1, u_int rs2, u_int rt) -{ - assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); -} - -static void emit_ldrccb_dualindexed(u_int rs1, u_int rs2, u_int rt) -{ - assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); -} - -static void emit_ldrccsb_dualindexed(u_int rs1, u_int rs2, u_int rt) -{ - assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); -} - -static void emit_ldrcch_dualindexed(u_int rs1, u_int rs2, u_int rt) -{ - assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); -} - -static void emit_ldrccsh_dualindexed(u_int rs1, u_int rs2, u_int rt) -{ - assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); -} - static void emit_movsbl_indexed(int offset, u_int rs, u_int rt) { assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -795,65 +780,31 @@ static void emit_movzwl_indexed(int offset, u_int rs, u_int rt) assert(0); } -static void emit_ldrd(int offset, u_int rs, u_int rt) -{ - assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset); - assert(0); -} - -static void emit_readword(void *addr, u_int rt) -{ - uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; - assert(offset<4096); - assem_debug("ldr %s,fp+%lx\n", regname[rt], offset); - assert(0); -} - static void emit_writeword_indexed(u_int rt, int offset, u_int rs) { - assert(offset>-4096&&offset<4096); - assem_debug("str %s,%s+%x\n",regname[rt],regname[rs],offset); - assert(0); + assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset); + if (!(offset & 3) && offset <= 16380) + output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt)); + else + assert(0); } static void emit_writehword_indexed(u_int rt, int offset, u_int rs) { - assert(offset>-256&&offset<256); - assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset); - assert(0); + assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname[rs], offset); + if (!(offset & 1) && offset <= 8190) + output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt)); + else + assert(0); } static void emit_writebyte_indexed(u_int rt, int offset, u_int rs) { - assert(offset>-4096&&offset<4096); - assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset); - assert(0); -} - -static void emit_strcc_dualindexed(u_int rs1, u_int rs2, u_int rt) -{ - assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); -} - -static void emit_strccb_dualindexed(u_int rs1, u_int rs2, u_int rt) -{ - assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); -} - -static void emit_strcch_dualindexed(u_int rs1, u_int rs2, u_int rt) -{ - assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); -} - -static void emit_writeword(u_int rt, void *addr) -{ - uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; - assert(offset<4096); - assem_debug("str %s,fp+%lx\n", regname[rt], offset); - assert(0); + assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname[rs], offset); + if ((u_int)offset < 4096) + output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt)); + else + assert(0); } static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) @@ -915,52 +866,91 @@ static unused void emit_prefetchreg(u_int r) static void emit_ldreq_indexed(u_int rs, u_int offset, u_int rt) { assert(offset<4096); - assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset); + assem_debug("ldreq %s,[%s, #%#x]\n",regname[rt],regname[rs],offset); assert(0); } static void emit_orrne_imm(u_int rs,int imm,u_int rt) { - assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm); + assem_debug("orrne %s,%s,#%#x\n",regname[rt],regname[rs],imm); assert(0); } static void emit_andne_imm(u_int rs,int imm,u_int rt) { - assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm); + assem_debug("andne %s,%s,#%#x\n",regname[rt],regname[rs],imm); assert(0); } static unused void emit_addpl_imm(u_int rs,int imm,u_int rt) { - assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm); + assem_debug("addpl %s,%s,#%#x\n",regname[rt],regname[rs],imm); assert(0); } -static void save_regs_all(u_int reglist) +static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs) { - if(!reglist) return; - assert(0); + u_int op = 0xb9000000; + const char *ldst = is_st ? "st" : "ld"; + char rp = is64 ? 'x' : 'w'; + assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs); + is64 = is64 ? 1 : 0; + assert((ofs & ((1 << (2+is64)) - 1)) == 0); + ofs = (ofs >> (2+is64)); + assert(ofs <= 0xfff); + if (!is_st) op |= 0x00400000; + if (is64) op |= 0x40000000; + output_w32(op | (ofs << 15) | imm12_rn_rd(ofs, rn, rt)); } -static void restore_regs_all(u_int reglist) +static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs) { - if(!reglist) return; - assert(0); + u_int op = 0x29000000; + const char *ldst = is_st ? "st" : "ld"; + char rp = is64 ? 'x' : 'w'; + assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs); + is64 = is64 ? 1 : 0; + assert((ofs & ((1 << (2+is64)) - 1)) == 0); + ofs = (ofs >> (2+is64)); + assert(-64 <= ofs && ofs <= 63); + ofs &= 0x7f; + if (!is_st) op |= 0x00400000; + if (is64) op |= 0x80000000; + output_w32(op | (ofs << 15) | rm_rn_rd(rt2, rn, rt1)); +} + +static void save_load_regs_all(int is_store, u_int reglist) +{ + int ofs = 0, c = 0; + u_int r, pair[2]; + for (r = 0; reglist; r++, reglist >>= 1) { + if (reglist & 1) + pair[c++] = r; + if (c == 2) { + emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs); + ofs += 8 * 2; + c = 0; + } + } + if (c) { + emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs); + ofs += 8; + } + assert(ofs <= SSP_CALLER_REGS); } // Save registers before function call static void save_regs(u_int reglist) { reglist &= CALLER_SAVE_REGS; // only save the caller-save registers - save_regs_all(reglist); + save_load_regs_all(1, reglist); } // Restore registers after function call static void restore_regs(u_int reglist) { reglist &= CALLER_SAVE_REGS; - restore_regs_all(reglist); + save_load_regs_all(0, reglist); } /* Stubs/epilogue */ @@ -990,16 +980,21 @@ static void emit_extjump_ds(void *addr, int target) } // put rt_val into rt, potentially making use of rs with value rs_val -static void emit_movimm_from(u_int rs_val,u_int rs,u_int rt_val,u_int rt) +static void emit_movimm_from(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt) { - assert(0); + intptr_t diff = rt_val - rs_val; + if (-4096 < diff && diff < 4096) + emit_addimm(rs, diff, rt); + else + // TODO: for inline_writestub, etc + assert(0); } // return 1 if above function can do it's job cheaply -static int is_similar_value(u_int v1,u_int v2) +static int is_similar_value(u_int v1, u_int v2) { - assert(0); - return 0; + int diff = v1 - v2; + return -4096 < diff && diff < 4096; } //#include "pcsxmem.h" @@ -1024,7 +1019,46 @@ static void do_writestub(int n) static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { - assert(0); + int rs = get_reg(regmap,-1); + int rt = get_reg(regmap,target); + assert(rs >= 0); + assert(rt >= 0); + uintptr_t host_addr = 0; + void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr); + if (handler == NULL) { + if (addr != host_addr) + emit_movimm_from(addr, rs, host_addr, rs); + switch(type) { + case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break; + case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break; + case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break; + default: assert(0); + } + return; + } + + // call a memhandler + save_regs(reglist); + //pass_args(rs, rt); + int cc = get_reg(regmap, CCREG); + assert(cc >= 0); + emit_addimm(cc, CLOCK_ADJUST(adj+1), 2); + //emit_movimm((uintptr_t)handler, 3); + // returns new cycle_count + + emit_readword(&last_count, HOST_TEMPREG); + emit_writeword(rs, &address); // some handlers still need it + emit_add(2, HOST_TEMPREG, 2); + emit_writeword(2, &Count); + emit_mov(1, 0); + emit_call(handler); + emit_readword(&next_interupt, 0); + emit_readword(&Count, 1); + emit_writeword(0, &last_count); + emit_sub(1, 0, cc); + + emit_addimm(cc,-CLOCK_ADJUST(adj+1),cc); + restore_regs(reglist); } static void do_unalignedwritestub(int n) diff --git a/libpcsxcore/new_dynarec/assem_arm64.h b/libpcsxcore/new_dynarec/assem_arm64.h index 8ba17ed1c..6789f178d 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.h +++ b/libpcsxcore/new_dynarec/assem_arm64.h @@ -17,6 +17,8 @@ #define ARG3_REG 2 #define ARG4_REG 3 +#define SP 30 + #define LR 30 #define HOST_TEMPREG LR @@ -28,6 +30,11 @@ #define HOST_CCREG 28 #define rCC w28 +// stack space +#define SSP_CALLEE_REGS (8*12) +#define SSP_CALLER_REGS (8*20) +#define SSP_ALL (SSP_CALLEE_REGS+SSP_CALLER_REGS) + #ifndef __ASSEMBLER__ extern char *invc_ptr; diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 84a017817..aa6002fc6 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -59,8 +59,9 @@ DRC_VAR(cycle_count, 4) DRC_VAR(last_count, 4) DRC_VAR(pending_exception, 4) DRC_VAR(stop, 4) -DRC_VAR(invc_ptr, 4) +DRC_VAR(branch_target, 4) DRC_VAR(address, 4) +@DRC_VAR(align0, 4) /* unused/alignment */ DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) /* psxRegs */ @@ -77,15 +78,15 @@ DRC_VAR(pcaddr, 4) @DRC_VAR(intCycle, 256) DRC_VAR(rcnts, 7*4*4) +DRC_VAR(inv_code_start, 4) +DRC_VAR(inv_code_end, 4) DRC_VAR(mem_rtab, 4) DRC_VAR(mem_wtab, 4) DRC_VAR(psxH_ptr, 4) DRC_VAR(zeromem_ptr, 4) -DRC_VAR(inv_code_start, 4) -DRC_VAR(inv_code_end, 4) -DRC_VAR(branch_target, 4) +DRC_VAR(invc_ptr, 4) DRC_VAR(scratch_buf_ptr, 4) -@DRC_VAR(align0, 12) /* unused/alignment */ +@DRC_VAR(align1, 8) /* unused/alignment */ DRC_VAR(mini_ht, 256) DRC_VAR(restore_candidate, 512) @@ -723,10 +724,10 @@ FUNCTION(jump_handler_read32): blx r3 ldr r0, [fp, #LO_next_interupt] - pop {r2, r3} + pop {r2, lr} str r0, [fp, #LO_last_count] sub r0, r2, r0 - bx r3 + bx lr .endm FUNCTION(jump_handler_write8): @@ -751,10 +752,10 @@ FUNCTION(jump_handler_write_h): blx r3 ldr r0, [fp, #LO_next_interupt] - pop {r2, r3} + pop {r2, lr} str r0, [fp, #LO_last_count] sub r0, r2, r0 - bx r3 + bx lr FUNCTION(jump_handle_swl): /* r0 = address, r1 = data, r2 = cycles */ diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 122078791..a8f2d6630 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -46,8 +46,9 @@ DRC_VAR(cycle_count, 4) DRC_VAR(last_count, 4) DRC_VAR(pending_exception, 4) DRC_VAR(stop, 4) -DRC_VAR(invc_ptr, 4) +DRC_VAR(branch_target, 4) DRC_VAR(address, 4) +#DRC_VAR(align0, 16) /* unused/alignment */ DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) /* psxRegs */ @@ -64,15 +65,15 @@ DRC_VAR(pcaddr, 4) #DRC_VAR(intCycle, 256) DRC_VAR(rcnts, 7*4*4) -DRC_VAR(mem_rtab, 4) -DRC_VAR(mem_wtab, 4) -DRC_VAR(psxH_ptr, 4) -DRC_VAR(zeromem_ptr, 4) DRC_VAR(inv_code_start, 4) DRC_VAR(inv_code_end, 4) -DRC_VAR(branch_target, 4) -DRC_VAR(scratch_buf_ptr, 4) -#DRC_VAR(align0, 12) /* unused/alignment */ +DRC_VAR(mem_rtab, 8) +DRC_VAR(mem_wtab, 8) +DRC_VAR(psxH_ptr, 8) +DRC_VAR(invc_ptr, 8) +DRC_VAR(zeromem_ptr, 8) +DRC_VAR(scratch_buf_ptr, 8) +#DRC_VAR(align1, 16) /* unused/alignment */ DRC_VAR(mini_ht, 256) DRC_VAR(restore_candidate, 512) @@ -202,9 +203,10 @@ FUNCTION(jump_intcall): bl abort .size jump_intcall, .-jump_intcall + /* stack must be aligned by 16, and include space for save_regs() use */ .align 2 FUNCTION(new_dyna_start): - stp x29, x30, [sp, #-96]! // must be aligned by 16 + stp x29, x30, [sp, #-SSP_ALL]! ldr w1, [x0, #LO_next_interupt] ldr w2, [x0, #LO_cycle] stp x19, x20, [sp, #16*1] @@ -230,7 +232,7 @@ FUNCTION(new_dyna_leave): ldp x23, x24, [sp, #16*3] ldp x25, x26, [sp, #16*4] ldp x27, x28, [sp, #16*5] - ldp x29, x30, [sp], #96 + ldp x29, x30, [sp], #SSP_ALL ret .size new_dyna_leave, .-new_dyna_leave diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h index e36a75d16..24b8e66d2 100644 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ b/libpcsxcore/new_dynarec/linkage_offsets.h @@ -1,12 +1,15 @@ +#define PTRSZ __SIZEOF_POINTER__ + #define LO_next_interupt 64 #define LO_cycle_count (LO_next_interupt + 4) #define LO_last_count (LO_cycle_count + 4) #define LO_pending_exception (LO_last_count + 4) #define LO_stop (LO_pending_exception + 4) -#define LO_invc_ptr (LO_stop + 4) -#define LO_address (LO_invc_ptr + 4) -#define LO_psxRegs (LO_address + 4) +#define LO_branch_target (LO_stop + 4) +#define LO_address (LO_branch_target + 4) +#define LO_align0 (LO_address + 4) +#define LO_psxRegs (LO_align0 + 4) #define LO_reg (LO_psxRegs) #define LO_lo (LO_reg + 128) #define LO_hi (LO_lo + 4) @@ -22,16 +25,16 @@ #define LO_psxRegs_end (LO_intCycle + 256) #define LO_rcnts (LO_psxRegs_end) #define LO_rcnts_end (LO_rcnts + 7*4*4) -#define LO_mem_rtab (LO_rcnts_end) -#define LO_mem_wtab (LO_mem_rtab + 4) -#define LO_psxH_ptr (LO_mem_wtab + 4) -#define LO_zeromem_ptr (LO_psxH_ptr + 4) -#define LO_inv_code_start (LO_zeromem_ptr + 4) +#define LO_inv_code_start (LO_rcnts_end) #define LO_inv_code_end (LO_inv_code_start + 4) -#define LO_branch_target (LO_inv_code_end + 4) -#define LO_scratch_buf_ptr (LO_branch_target + 4) -#define LO_align0 (LO_scratch_buf_ptr + 4) -#define LO_mini_ht (LO_align0 + 12) +#define LO_mem_rtab (LO_inv_code_end + 4) +#define LO_mem_wtab (LO_mem_rtab + PTRSZ) +#define LO_psxH_ptr (LO_mem_wtab + PTRSZ) +#define LO_zeromem_ptr (LO_psxH_ptr + PTRSZ) +#define LO_invc_ptr (LO_zeromem_ptr + PTRSZ) +#define LO_scratch_buf_ptr (LO_invc_ptr + PTRSZ) +#define LO_align1 (LO_scratch_buf_ptr + PTRSZ) +#define LO_mini_ht (LO_align1 + PTRSZ*2) #define LO_restore_candidate (LO_mini_ht + 256) #define LO_dynarec_local_size (LO_restore_candidate + 512) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 0809a4aa4..1383b2f55 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -198,8 +198,14 @@ struct link_entry int new_dynarec_hacks; int new_dynarec_did_compile; + + extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 + extern int last_count; // last absolute target, often = next_interupt + extern int pcaddr; + extern int pending_exception; + extern int branch_target; + extern u_int mini_ht[32][2]; extern u_char restore_candidate[512]; - extern int cycle_count; /* registers that may be allocated */ /* 1-31 gpr */ @@ -299,6 +305,9 @@ static void add_stub_r(enum stub_type type, void *addr, void *retaddr, int i, int addr_reg, struct regstat *i_regs, int ccadj, u_int reglist); static void add_to_linker(void *addr, u_int target, int ext); static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override); +static void *get_direct_memhandler(void *table, u_int addr, + enum stub_type type, uintptr_t *addr_host); +static void pass_args(int a0, int a1); static void mprotect_w_x(void *start, void *end, int is_x) { @@ -970,7 +979,7 @@ static void ll_kill_pointers(struct ll_entry *head,uintptr_t addr,int shift) { inv_debug("EXP: Kill pointer at %p (%x)\n",head->addr,head->vaddr); void *host_addr=find_extjump_insn(head->addr); - #ifdef __arm__ + #if defined(__arm__) || defined(__aarch64__) mark_clear_cache(host_addr); #endif set_jump_target(host_addr, head->addr); @@ -998,7 +1007,7 @@ void invalidate_page(u_int page) while(head!=NULL) { inv_debug("INVALIDATE: kill pointer to %x (%p)\n",head->vaddr,head->addr); void *host_addr=find_extjump_insn(head->addr); - #ifdef __arm__ + #if defined(__arm__) || defined(__aarch64__) mark_clear_cache(host_addr); #endif set_jump_target(host_addr, head->addr); @@ -2003,7 +2012,24 @@ void rlist() printf("\n"); } -void alu_assemble(int i,struct regstat *i_regs) +// trashes r2 +static void pass_args(int a0, int a1) +{ + if(a0==1&&a1==0) { + // must swap + emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0); + } + else if(a0!=0&&a1==0) { + emit_mov(a1,1); + if (a0>=0) emit_mov(a0,0); + } + else { + if(a0>=0&&a0!=0) emit_mov(a0,0); + if(a1>=0&&a1!=1) emit_mov(a1,1); + } +} + +static void alu_assemble(int i,struct regstat *i_regs) { if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU if(rt1[i]) { @@ -2471,6 +2497,34 @@ static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) return jaddr; } +// return memhandler, or get directly accessable address and return 0 +static void *get_direct_memhandler(void *table, u_int addr, + enum stub_type type, uintptr_t *addr_host) +{ + uintptr_t l1, l2 = 0; + l1 = ((uintptr_t *)table)[addr>>12]; + if ((l1 & (1ul << (sizeof(l1)*8-1))) == 0) { + uintptr_t v = l1 << 1; + *addr_host = v + addr; + return NULL; + } + else { + l1 <<= 1; + if (type == LOADB_STUB || type == LOADBU_STUB || type == STOREB_STUB) + l2 = ((uintptr_t *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)]; + else if (type == LOADH_STUB || type == LOADHU_STUB || type == STOREH_STUB) + l2=((uintptr_t *)l1)[0x1000/4 + (addr&0xfff)/2]; + else + l2=((uintptr_t *)l1)[(addr&0xfff)/4]; + if ((l2 & (1<<31)) == 0) { + uintptr_t v = l2 << 1; + *addr_host = v + (addr&0xfff); + return NULL; + } + return (void *)(l2 << 1); + } +} + static void load_assemble(int i,struct regstat *i_regs) { int s,th,tl,addr; @@ -6198,6 +6252,13 @@ static void new_dynarec_test(void) int ret[2]; size_t i; + // check structure linkage + if ((void *)reg != (void *)&psxRegs + || (u_char *)rcnts - (u_char *)reg != sizeof(psxRegs)) + { + SysPrintf("linkage_arm miscompilation/breakage detected.\n"); + } + SysPrintf("testing if we can run recompiled code...\n"); ((volatile u_int *)out)[0]++; // make cache dirty From 7c3a5182da4384e21a6ace037583fae399de5a02 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 6 Nov 2021 00:19:16 +0200 Subject: [PATCH 058/597] drc: some more general cleanup --- libpcsxcore/new_dynarec/assem_arm.c | 153 +++++------- libpcsxcore/new_dynarec/assem_arm.h | 7 - libpcsxcore/new_dynarec/assem_arm64.c | 49 ++-- libpcsxcore/new_dynarec/emu_if.h | 3 - libpcsxcore/new_dynarec/linkage_arm.S | 5 +- libpcsxcore/new_dynarec/linkage_arm64.S | 5 +- libpcsxcore/new_dynarec/new_dynarec.c | 303 ++++++------------------ 7 files changed, 159 insertions(+), 366 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index a80b0464f..45a2f0866 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -281,7 +281,6 @@ static int isclean(void *addr) if((*ptr&0xFF000000)!=0xeb000000) ptr++; if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0; - if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0; if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0; return 1; } @@ -415,12 +414,11 @@ static void genimm_checked(u_int imm,u_int *encoded) static u_int genjmp(u_int addr) { - int offset=addr-(int)out-8; - if(offset<-33554432||offset>=33554432) { - if (addr>2) { - SysPrintf("genjmp: out of range: %08x\n", offset); - exit(1); - } + if (addr < 3) return 0; // a branch that will be patched later + int offset = addr-(int)out-8; + if (offset < -33554432 || offset >= 33554432) { + SysPrintf("genjmp: out of range: %08x\n", offset); + abort(); return 0; } return ((u_int)offset>>2)&0xffffff; @@ -444,12 +442,6 @@ static void emit_add(int rs1,int rs2,int rt) output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2)); } -static void emit_adds(int rs1,int rs2,int rt) -{ - assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2)); -} - static void emit_adcs(int rs1,int rs2,int rt) { assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); @@ -538,12 +530,15 @@ static void emit_loadreg(int r, int hr) if((r&63)==0) emit_zeroreg(hr); else { - int addr=((int)reg)+((r&63)<>4); - if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4); - if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4); - if(r==CCREG) addr=(int)&cycle_count; - if(r==CSREG) addr=(int)&Status; - if(r==INVCP) addr=(int)&invc_ptr; + int addr = (int)&psxRegs.GPR.r[r]; + switch (r) { + //case HIREG: addr = &hi; break; + //case LOREG: addr = &lo; break; + case CCREG: addr = (int)&cycle_count; break; + case CSREG: addr = (int)&Status; break; + case INVCP: addr = (int)&invc_ptr; break; + default: assert(r < 34); break; + } u_int offset = addr-(u_int)&dynarec_local; assert(offset<4096); assem_debug("ldr %s,fp+%d\n",regname[hr],offset); @@ -558,10 +553,13 @@ static void emit_storereg(int r, int hr) assert(0); return; } - int addr=((int)reg)+((r&63)<>4); - if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4); - if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4); - if(r==CCREG) addr=(int)&cycle_count; + int addr = (int)&psxRegs.GPR.r[r]; + switch (r) { + //case HIREG: addr = &hi; break; + //case LOREG: addr = &lo; break; + case CCREG: addr = (int)&cycle_count; break; + default: assert(r < 34); break; + } u_int offset = addr-(u_int)&dynarec_local; assert(offset<4096); assem_debug("str %s,fp+%d\n",regname[hr],offset); @@ -714,23 +712,6 @@ static void emit_addnop(u_int r) output_w32(0xe2800000|rd_rn_rm(r,r,0)); } -static void emit_adcimm(u_int rs,int imm,u_int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval); -} - -static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl) -{ - // TODO: if(genimm(imm,&armval)) ... - // else - emit_movimm(imm,HOST_TEMPREG); - emit_adds(HOST_TEMPREG,rsl,rtl); - emit_adcimm(rsh,0,rth); -} - static void emit_andimm(int rs,int imm,int rt) { u_int armval; @@ -1062,56 +1043,63 @@ static void emit_jne(const void *a_) output_w32(0x1a000000|offset); } -static void emit_jeq(int a) +static void emit_jeq(const void *a_) { + int a = (int)a_; assem_debug("beq %x\n",a); u_int offset=genjmp(a); output_w32(0x0a000000|offset); } -static void emit_js(int a) +static void emit_js(const void *a_) { + int a = (int)a_; assem_debug("bmi %x\n",a); u_int offset=genjmp(a); output_w32(0x4a000000|offset); } -static void emit_jns(int a) +static void emit_jns(const void *a_) { + int a = (int)a_; assem_debug("bpl %x\n",a); u_int offset=genjmp(a); output_w32(0x5a000000|offset); } -static void emit_jl(int a) +static void emit_jl(const void *a_) { + int a = (int)a_; assem_debug("blt %x\n",a); u_int offset=genjmp(a); output_w32(0xba000000|offset); } -static void emit_jge(int a) +static void emit_jge(const void *a_) { + int a = (int)a_; assem_debug("bge %x\n",a); u_int offset=genjmp(a); output_w32(0xaa000000|offset); } -static void emit_jno(int a) +static void emit_jno(const void *a_) { + int a = (int)a_; assem_debug("bvc %x\n",a); u_int offset=genjmp(a); output_w32(0x7a000000|offset); } -static void emit_jc(int a) +static void emit_jc(const void *a_) { + int a = (int)a_; assem_debug("bcs %x\n",a); u_int offset=genjmp(a); output_w32(0x2a000000|offset); } -static void emit_jcc(void *a_) +static void emit_jcc(const void *a_) { int a = (int)a_; assem_debug("bcc %x\n",a); @@ -1589,7 +1577,8 @@ static void literal_pool_jumpover(int n) set_jump_target(jaddr, out); } -static void emit_extjump2(u_char *addr, int target, void *linker) +// parsed by get_pointer, find_extjump_insn +static void emit_extjump2(u_char *addr, u_int target, void *linker) { u_char *ptr=(u_char *)addr; assert((ptr[3]&0x0e)==0xa); @@ -1612,16 +1601,6 @@ static void emit_extjump2(u_char *addr, int target, void *linker) emit_jmp(linker); } -static void emit_extjump(void *addr, int target) -{ - emit_extjump2(addr, target, dyna_linker); -} - -static void emit_extjump_ds(void *addr, int target) -{ - emit_extjump2(addr, target, dyna_linker_ds); -} - // put rt_val into rt, potentially making use of rs with value rs_val static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt) { @@ -2062,24 +2041,28 @@ static void do_invstub(int n) emit_jmp(stubs[n].retaddr); // return address } -void *do_dirty_stub(int i) +// this output is parsed by verify_dirty, get_bounds +static void do_dirty_stub_emit_args(u_int arg0) { - assem_debug("do_dirty_stub %x\n",start+i*4); - u_int addr=(u_int)source; - // Careful about the code output here, verify_dirty needs to parse it. #ifndef HAVE_ARMV7 - emit_loadlp(addr,1); - emit_loadlp((int)copy,2); - emit_loadlp(slen*4,3); + emit_loadlp((int)source, 1); + emit_loadlp((int)copy, 2); + emit_loadlp(slen*4, 3); #else - emit_movw(addr&0x0000FFFF,1); - emit_movw(((u_int)copy)&0x0000FFFF,2); - emit_movt(addr&0xFFFF0000,1); - emit_movt(((u_int)copy)&0xFFFF0000,2); - emit_movw(slen*4,3); + emit_movw(((u_int)source)&0x0000FFFF, 1); + emit_movw(((u_int)copy)&0x0000FFFF, 2); + emit_movt(((u_int)source)&0xFFFF0000, 1); + emit_movt(((u_int)copy)&0xFFFF0000, 2); + emit_movw(slen*4, 3); #endif - emit_movimm(start+i*4,0); - emit_call((int)start<(int)0xC0000000?&verify_code:&verify_code_vm); + emit_movimm(arg0, 0); +} + +static void *do_dirty_stub(int i) +{ + assem_debug("do_dirty_stub %x\n",start+i*4); + do_dirty_stub_emit_args(start + i*4); + emit_call(verify_code); void *entry = out; load_regs_entry(i); if (entry == out) @@ -2090,20 +2073,8 @@ void *do_dirty_stub(int i) static void do_dirty_stub_ds() { - // Careful about the code output here, verify_dirty needs to parse it. - #ifndef HAVE_ARMV7 - emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1); - emit_loadlp((int)copy,2); - emit_loadlp(slen*4,3); - #else - emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1); - emit_movw(((u_int)copy)&0x0000FFFF,2); - emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1); - emit_movt(((u_int)copy)&0xFFFF0000,2); - emit_movw(slen*4,3); - #endif - emit_movimm(start+1,0); - emit_call(&verify_code_ds); + do_dirty_stub_emit_args(start + 1); + emit_call(verify_code_ds); } /* Special assem */ @@ -2512,7 +2483,7 @@ static void multdiv_assemble_arm(int i,struct regstat *i_regs) emit_negmi(quotient,quotient); // .. quotient and .. emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump) emit_movs(d2,HOST_TEMPREG); - emit_jeq((int)out+52); // Division by zero + emit_jeq(out+52); // Division by zero emit_negsmi(HOST_TEMPREG,HOST_TEMPREG); #ifdef HAVE_ARMV5 emit_clz(HOST_TEMPREG,quotient); @@ -2521,7 +2492,7 @@ static void multdiv_assemble_arm(int i,struct regstat *i_regs) emit_movimm(0,quotient); emit_addpl_imm(quotient,1,quotient); emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); - emit_jns((int)out-2*4); + emit_jns(out-2*4); #endif emit_orimm(quotient,1<<31,quotient); emit_shr(quotient,quotient,quotient); @@ -2548,7 +2519,7 @@ static void multdiv_assemble_arm(int i,struct regstat *i_regs) emit_mov(d1,remainder); emit_movimm(0xffffffff,quotient); // div0 case emit_test(d2,d2); - emit_jeq((int)out+40); // Division by zero + emit_jeq(out+40); // Division by zero #ifdef HAVE_ARMV5 emit_clz(d2,HOST_TEMPREG); emit_movimm(1<<31,quotient); @@ -2557,7 +2528,7 @@ static void multdiv_assemble_arm(int i,struct regstat *i_regs) emit_movimm(0,HOST_TEMPREG); emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG); emit_lslpls_imm(d2,1,d2); - emit_jns((int)out-2*4); + emit_jns(out-2*4); emit_movimm(1<<31,quotient); #endif emit_shr(quotient,HOST_TEMPREG,quotient); diff --git a/libpcsxcore/new_dynarec/assem_arm.h b/libpcsxcore/new_dynarec/assem_arm.h index 2457bb11a..bbaf5b9e9 100644 --- a/libpcsxcore/new_dynarec/assem_arm.h +++ b/libpcsxcore/new_dynarec/assem_arm.h @@ -8,17 +8,10 @@ #define HAVE_CONDITIONAL_CALL 1 #define RAM_SIZE 0x200000 -#define REG_SHIFT 2 - /* ARM calling convention: r0-r3, r12: caller-save r4-r11: callee-save */ -#define ARG1_REG 0 -#define ARG2_REG 1 -#define ARG3_REG 2 -#define ARG4_REG 3 - /* GCC register naming convention: r10 = sl (base) r11 = fp (frame pointer) diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index df5b5aaa2..fabd7dbae 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -287,14 +287,14 @@ static void emit_loadreg(u_int r, u_int hr) if (r == 0) emit_zeroreg(hr); else { - void *addr = ®[r]; + void *addr = &psxRegs.GPR.r[r]; switch (r) { - case HIREG: addr = &hi; break; - case LOREG: addr = &lo; break; + //case HIREG: addr = &hi; break; + //case LOREG: addr = &lo; break; case CCREG: addr = &cycle_count; break; case CSREG: addr = &Status; break; case INVCP: addr = &invc_ptr; break; - default: assert(r < 32); break; + default: assert(r < 34); break; } emit_readword(addr, hr); } @@ -314,12 +314,12 @@ static void emit_writeword(u_int rt, void *addr) static void emit_storereg(u_int r, u_int hr) { assert(r < 64); - void *addr = ®[r]; + void *addr = &psxRegs.GPR.r[r]; switch (r) { - case HIREG: addr = &hi; break; - case LOREG: addr = &lo; break; + //case HIREG: addr = &hi; break; + //case LOREG: addr = &lo; break; case CCREG: addr = &cycle_count; break; - default: assert(r < 32); break; + default: assert(r < 34); break; } emit_writeword(hr, addr); } @@ -677,52 +677,51 @@ static void emit_jne(const void *a_) assert(0); } -static void emit_jeq(int a) +static void emit_jeq(const void *a) { - assem_debug("beq %x\n",a); + assem_debug("beq %p\n",a); assert(0); } -static void emit_js(int a) +static void emit_js(const void *a) { - assem_debug("bmi %x\n",a); + assem_debug("bmi %p\n",a); assert(0); } -static void emit_jns(int a) +static void emit_jns(const void *a) { - assem_debug("bpl %x\n",a); + assem_debug("bpl %p\n",a); assert(0); } -static void emit_jl(int a) +static void emit_jl(const void *a) { - assem_debug("blt %x\n",a); + assem_debug("blt %p\n",a); assert(0); } -static void emit_jge(int a) +static void emit_jge(const void *a) { - assem_debug("bge %x\n",a); + assem_debug("bge %p\n",a); assert(0); } -static void emit_jno(int a) +static void emit_jno(const void *a) { - assem_debug("bvc %x\n",a); + assem_debug("bvc %p\n",a); assert(0); } -static void emit_jc(int a) +static void emit_jc(const void *a) { - assem_debug("bcs %x\n",a); + assem_debug("bcs %p\n",a); assert(0); } -static void emit_jcc(void *a_) +static void emit_jcc(const void *a) { - uintptr_t a = (uintptr_t)a_; - assem_debug("bcc %p\n", a_); + assem_debug("bcc %p\n", a); assert(0); } diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index 5783ad3f4..a6846e2f8 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -9,9 +9,6 @@ extern char invalid_code[0x100000]; extern int dynarec_local[]; -/* same as psxRegs */ -extern int reg[]; - /* same as psxRegs.GPR.n.* */ extern int hi, lo; diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index aa6002fc6..778a67f0a 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -65,7 +65,7 @@ DRC_VAR(address, 4) DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) /* psxRegs */ -DRC_VAR(reg, 128) +@DRC_VAR(reg, 128) DRC_VAR(lo, 4) DRC_VAR(hi, 4) DRC_VAR(reg_cop0, 128) @@ -394,7 +394,6 @@ FUNCTION(jump_vaddr): FUNCTION(verify_code_ds): str r8, [fp, #LO_branch_target] -FUNCTION(verify_code_vm): FUNCTION(verify_code): /* r1 = source */ /* r2 = target */ @@ -429,7 +428,7 @@ FUNCTION(verify_code): bl get_addr mov pc, r0 .size verify_code, .-verify_code - .size verify_code_vm, .-verify_code_vm + .size verify_code_ds, .-verify_code_ds .align 2 FUNCTION(cc_interrupt): diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index a8f2d6630..397874c89 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -52,7 +52,7 @@ DRC_VAR(address, 4) DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) /* psxRegs */ -DRC_VAR(reg, 128) +#DRC_VAR(reg, 128) DRC_VAR(lo, 4) DRC_VAR(hi, 4) DRC_VAR(reg_cop0, 128) @@ -121,14 +121,13 @@ FUNCTION(jump_vaddr): FUNCTION(verify_code_ds): bl abort -FUNCTION(verify_code_vm): FUNCTION(verify_code): /* r1 = source */ /* r2 = target */ /* r3 = length */ bl abort .size verify_code, .-verify_code - .size verify_code_vm, .-verify_code_vm + .size verify_code_ds, .-verify_code_ds .align 2 FUNCTION(cc_interrupt): diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 1383b2f55..7c59a7e8a 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -147,8 +147,6 @@ struct link_entry static u_char rs2[MAXBLOCK]; static u_char rt1[MAXBLOCK]; static u_char rt2[MAXBLOCK]; - static u_char us1[MAXBLOCK]; - static u_char us2[MAXBLOCK]; static u_char dep1[MAXBLOCK]; static u_char dep2[MAXBLOCK]; static u_char lt1[MAXBLOCK]; @@ -209,8 +207,8 @@ struct link_entry /* registers that may be allocated */ /* 1-31 gpr */ -#define HIREG 32 // hi -#define LOREG 33 // lo +#define LOREG 32 // lo +#define HIREG 33 // hi //#define FSREG 34 // FPU status (FCSR) #define CSREG 35 // Coprocessor status #define CCREG 36 // Cycle count @@ -269,6 +267,9 @@ struct link_entry #define NOTTAKEN 2 #define NULLDS 3 +#define DJT_1 (void *)1l // no function, just a label in assem_debug log +#define DJT_2 (void *)2l + // asm linkage int new_recompile_block(int addr); void *get_addr_ht(u_int vaddr); @@ -278,7 +279,6 @@ void remove_hash(int vaddr); void dyna_linker(); void dyna_linker_ds(); void verify_code(); -void verify_code_vm(); void verify_code_ds(); void cc_interrupt(); void fp_exception(); @@ -537,10 +537,6 @@ void set_const(struct regstat *cur,signed char reg,uint64_t value) cur->isconst|=1<regmap[hr]^64)==reg) { - cur->isconst|=1<>32; - } } } @@ -577,7 +573,7 @@ uint64_t get_const(struct regstat *cur,signed char reg) } } SysPrintf("Unknown constant in r%d\n",reg); - exit(1); + abort(); } // Least soon needed registers @@ -817,7 +813,6 @@ static const struct { FUNCNAME(jump_handler_write16), FUNCNAME(jump_handler_write32), FUNCNAME(invalidate_addr), - FUNCNAME(verify_code_vm), FUNCNAME(verify_code), FUNCNAME(jump_hlecall), FUNCNAME(jump_syscall_hle), @@ -1316,16 +1311,6 @@ static void alloc_reg(struct regstat *cur,int i,signed char reg) for(r=1;r<=MAXREG;r++) { if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { @@ -1345,14 +1330,6 @@ static void alloc_reg(struct regstat *cur,int i,signed char reg) for(r=1;r<=MAXREG;r++) { if(hsn[r]==j) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { cur->regmap[hr]=reg; @@ -1364,7 +1341,7 @@ static void alloc_reg(struct regstat *cur,int i,signed char reg) } } } - SysPrintf("This shouldn't happen (alloc_reg)");exit(1); + SysPrintf("This shouldn't happen (alloc_reg)");abort(); } // Allocate a temporary register. This is done without regard to @@ -1427,16 +1404,6 @@ static void alloc_reg_temp(struct regstat *cur,int i,signed char reg) for(r=1;r<=MAXREG;r++) { if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { - for(hr=0;hr2) { - if(cur->regmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<2) { if(cur->regmap[hr]==r) { @@ -1456,14 +1423,6 @@ static void alloc_reg_temp(struct regstat *cur,int i,signed char reg) for(r=1;r<=MAXREG;r++) { if(hsn[r]==j) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { cur->regmap[hr]=reg; @@ -1475,7 +1434,7 @@ static void alloc_reg_temp(struct regstat *cur,int i,signed char reg) } } } - SysPrintf("This shouldn't happen");exit(1); + SysPrintf("This shouldn't happen");abort(); } static void mov_alloc(struct regstat *current,int i) @@ -1858,7 +1817,7 @@ void delayslot_alloc(struct regstat *current,int i) case SYSCALL: case HLECALL: case SPAN: - assem_debug("jump in the delay slot. this shouldn't happen.\n");//exit(1); + assem_debug("jump in the delay slot. this shouldn't happen.\n");//abort(); SysPrintf("Disabled speculative precompilation\n"); stop_after_jal=1; break; @@ -2003,15 +1962,6 @@ static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int } } -void rlist() -{ - int i; - printf("TRACE: "); - for(i=0;i<32;i++) - printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]); - printf("\n"); -} - // trashes r2 static void pass_args(int a0, int a1) { @@ -2212,24 +2162,15 @@ void imm16_assemble(int i,struct regstat *i_regs) } if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU if(rt1[i]) { - signed char sh,sl,th,tl; - th=get_reg(i_regs->regmap,rt1[i]|64); + signed char sl,tl; tl=get_reg(i_regs->regmap,rt1[i]); - sh=get_reg(i_regs->regmap,rs1[i]|64); sl=get_reg(i_regs->regmap,rs1[i]); if(tl>=0) { if(rs1[i]) { - assert(sh>=0); assert(sl>=0); - if(th>=0) { - emit_addimm64_32(sh,sl,imm[i],th,tl); - } - else { - emit_addimm(sl,imm[i],tl); - } + emit_addimm(sl,imm[i],tl); } else { emit_movimm(imm[i],tl); - if(th>=0) emit_movimm(((signed int)imm[i])>>31,th); } } } @@ -2276,10 +2217,8 @@ void imm16_assemble(int i,struct regstat *i_regs) } else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI if(rt1[i]) { - signed char sh,sl,th,tl; - th=get_reg(i_regs->regmap,rt1[i]|64); + signed char sl,tl; tl=get_reg(i_regs->regmap,rt1[i]); - sh=get_reg(i_regs->regmap,rs1[i]|64); sl=get_reg(i_regs->regmap,rs1[i]); if(tl>=0 && !((i_regs->isconst>>tl)&1)) { if(opcode[i]==0x0c) //ANDI @@ -2297,7 +2236,6 @@ void imm16_assemble(int i,struct regstat *i_regs) } else emit_zeroreg(tl); - if(th>=0) emit_zeroreg(th); } else { @@ -2305,13 +2243,6 @@ void imm16_assemble(int i,struct regstat *i_regs) if(sl<0) { if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl); } - if(th>=0) { - if(sh<0) { - emit_loadreg(rs1[i]|64,th); - }else{ - emit_mov(sh,th); - } - } if(opcode[i]==0x0d) { // ORI if(sl<0) { emit_orimm(tl,imm[i],tl); @@ -2335,7 +2266,6 @@ void imm16_assemble(int i,struct regstat *i_regs) } else { emit_movimm(imm[i],tl); - if(th>=0) emit_zeroreg(th); } } } @@ -2404,7 +2334,7 @@ void shiftimm_assemble(int i,struct regstat *i_regs) void shift_assemble(int i,struct regstat *i_regs) { printf("Need shift_assemble for this architecture.\n"); - exit(1); + abort(); } #endif @@ -2527,13 +2457,12 @@ static void *get_direct_memhandler(void *table, u_int addr, static void load_assemble(int i,struct regstat *i_regs) { - int s,th,tl,addr; + int s,tl,addr; int offset; void *jaddr=0; int memtarget=0,c=0; int fastload_reg_override=0; u_int hr,reglist=0; - th=get_reg(i_regs->regmap,rt1[i]|64); tl=get_reg(i_regs->regmap,rt1[i]); s=get_reg(i_regs->regmap,rs1[i]); offset=imm[i]; @@ -2566,7 +2495,6 @@ static void load_assemble(int i,struct regstat *i_regs) //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset); assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O reglist&=~(1<=0) reglist&=~(1<regmap,rt1[i],ccadj[i],reglist); } if (opcode[i]==0x27) { // LWU - assert(th>=0); - if(!c||memtarget) { - if(!dummy) { - int a=addr; - if(fastload_reg_override) a=fastload_reg_override; - emit_readword_indexed(0,a,tl); - } - if(jaddr) - add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); - } - else { - inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); - } - emit_zeroreg(th); + assert(0); } if (opcode[i]==0x37) { // LD assert(0); @@ -2680,7 +2595,7 @@ static void load_assemble(int i,struct regstat *i_regs) void loadlr_assemble(int i,struct regstat *i_regs) { printf("Need loadlr_assemble for this architecture.\n"); - exit(1); + abort(); } #endif @@ -3036,8 +2951,6 @@ static void cop0_assemble(int i,struct regstat *i_regs) emit_jne(&do_interrupt); } emit_loadreg(rs1[i],s); - if(get_reg(i_regs->regmap,rs1[i]|64)>=0) - emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64)); } else { @@ -3344,33 +3257,27 @@ static void cop2_assemble(int i,struct regstat *i_regs) void multdiv_assemble(int i,struct regstat *i_regs) { printf("Need multdiv_assemble for this architecture.\n"); - exit(1); + abort(); } #endif -void mov_assemble(int i,struct regstat *i_regs) +static void mov_assemble(int i,struct regstat *i_regs) { //if(opcode2[i]==0x10||opcode2[i]==0x12) { // MFHI/MFLO //if(opcode2[i]==0x11||opcode2[i]==0x13) { // MTHI/MTLO if(rt1[i]) { - signed char sh,sl,th,tl; - th=get_reg(i_regs->regmap,rt1[i]|64); + signed char sl,tl; tl=get_reg(i_regs->regmap,rt1[i]); //assert(tl>=0); if(tl>=0) { - sh=get_reg(i_regs->regmap,rs1[i]|64); sl=get_reg(i_regs->regmap,rs1[i]); if(sl>=0) emit_mov(sl,tl); else emit_loadreg(rs1[i],tl); - if(th>=0) { - if(sh>=0) emit_mov(sh,th); - else emit_loadreg(rs1[i]|64,th); - } } } } -void syscall_assemble(int i,struct regstat *i_regs) +static void syscall_assemble(int i,struct regstat *i_regs) { signed char ccreg=get_reg(i_regs->regmap,CCREG); assert(ccreg==HOST_CCREG); @@ -3381,7 +3288,7 @@ void syscall_assemble(int i,struct regstat *i_regs) emit_jmp(jump_syscall_hle); // XXX } -void hlecall_assemble(int i,struct regstat *i_regs) +static void hlecall_assemble(int i,struct regstat *i_regs) { extern void psxNULL(); signed char ccreg=get_reg(i_regs->regmap,CCREG); @@ -3398,7 +3305,7 @@ void hlecall_assemble(int i,struct regstat *i_regs) emit_jmp(jump_hlecall); } -void intcall_assemble(int i,struct regstat *i_regs) +static void intcall_assemble(int i,struct regstat *i_regs) { signed char ccreg=get_reg(i_regs->regmap,CCREG); assert(ccreg==HOST_CCREG); @@ -3501,7 +3408,7 @@ static void speculate_register_values(int i) #endif } -void ds_assemble(int i,struct regstat *i_regs) +static void ds_assemble(int i,struct regstat *i_regs) { speculate_register_values(i); is_delayslot=1; @@ -4131,7 +4038,7 @@ static void drc_dbg_emit_do_cmp(int i) #endif // Used when a branch jumps into the delay slot of another branch -void ds_assemble_entry(int i) +static void ds_assemble_entry(int i) { int t=(ba[i]-start)>>2; if (!instr_addr[t]) @@ -4200,6 +4107,16 @@ void ds_assemble_entry(int i) emit_jmp(0); } +static void emit_extjump(void *addr, u_int target) +{ + emit_extjump2(addr, target, dyna_linker); +} + +static void emit_extjump_ds(void *addr, u_int target) +{ + emit_extjump2(addr, target, dyna_linker_ds); +} + void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) { int count; @@ -4428,7 +4345,7 @@ static void do_ccstub(int n) } emit_writeword(r,&pcaddr); } - else {SysPrintf("Unknown branch type in do_ccstub\n");exit(1);} + else {SysPrintf("Unknown branch type in do_ccstub\n");abort();} } // Update cycle count assert(branch_regs[i].regmap[HOST_CCREG]==CCREG||branch_regs[i].regmap[HOST_CCREG]==-1); @@ -4501,7 +4418,7 @@ static void ujump_assemble_write_ra(int i) } } -void ujump_assemble(int i,struct regstat *i_regs) +static void ujump_assemble(int i,struct regstat *i_regs) { int ra_done=0; if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); @@ -4571,7 +4488,7 @@ static void rjump_assemble_write_ra(int i) #endif } -void rjump_assemble(int i,struct regstat *i_regs) +static void rjump_assemble(int i,struct regstat *i_regs) { int temp; int rs,cc; @@ -4668,7 +4585,7 @@ void rjump_assemble(int i,struct regstat *i_regs) #endif } -void cjump_assemble(int i,struct regstat *i_regs) +static void cjump_assemble(int i,struct regstat *i_regs) { signed char *i_regmap=i_regs->regmap; int cc; @@ -4770,7 +4687,7 @@ void cjump_assemble(int i,struct regstat *i_regs) else emit_test(s1l,s1l); if(invert){ nottaken=out; - emit_jne((void *)1l); + emit_jne(DJT_1); }else{ add_to_linker(out,ba[i],internal); emit_jeq(0); @@ -4782,7 +4699,7 @@ void cjump_assemble(int i,struct regstat *i_regs) else emit_test(s1l,s1l); if(invert){ nottaken=out; - emit_jeq(1); + emit_jeq(DJT_1); }else{ add_to_linker(out,ba[i],internal); emit_jne(0); @@ -4793,7 +4710,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_cmpimm(s1l,1); if(invert){ nottaken=out; - emit_jge(1); + emit_jge(DJT_1); }else{ add_to_linker(out,ba[i],internal); emit_jl(0); @@ -4804,7 +4721,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_cmpimm(s1l,1); if(invert){ nottaken=out; - emit_jl(1); + emit_jl(DJT_1); }else{ add_to_linker(out,ba[i],internal); emit_jge(0); @@ -4864,26 +4781,26 @@ void cjump_assemble(int i,struct regstat *i_regs) if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); nottaken=out; - emit_jne((void *)2l); + emit_jne(DJT_2); } if((opcode[i]&0x2f)==5) // BNE { if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); nottaken=out; - emit_jeq(2); + emit_jeq(DJT_2); } if((opcode[i]&0x2f)==6) // BLEZ { emit_cmpimm(s1l,1); nottaken=out; - emit_jge(2); + emit_jge(DJT_2); } if((opcode[i]&0x2f)==7) // BGTZ { emit_cmpimm(s1l,1); nottaken=out; - emit_jl(2); + emit_jl(DJT_2); } } // if(!unconditional) int adj; @@ -4957,7 +4874,7 @@ void cjump_assemble(int i,struct regstat *i_regs) } } -void sjump_assemble(int i,struct regstat *i_regs) +static void sjump_assemble(int i,struct regstat *i_regs) { signed char *i_regmap=i_regs->regmap; int cc; @@ -5063,7 +4980,7 @@ void sjump_assemble(int i,struct regstat *i_regs) emit_test(s1l,s1l); if(invert){ nottaken=out; - emit_jns(1); + emit_jns(DJT_1); }else{ add_to_linker(out,ba[i],internal); emit_js(0); @@ -5074,7 +4991,7 @@ void sjump_assemble(int i,struct regstat *i_regs) emit_test(s1l,s1l); if(invert){ nottaken=out; - emit_js(1); + emit_js(DJT_1); }else{ add_to_linker(out,ba[i],internal); emit_jns(0); @@ -5143,13 +5060,13 @@ void sjump_assemble(int i,struct regstat *i_regs) { emit_test(s1l,s1l); nottaken=out; - emit_jns(1); + emit_jns(DJT_1); } if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL { emit_test(s1l,s1l); nottaken=out; - emit_js(1); + emit_js(DJT_1); } } // if(!unconditional) int adj; @@ -6253,10 +6170,9 @@ static void new_dynarec_test(void) size_t i; // check structure linkage - if ((void *)reg != (void *)&psxRegs - || (u_char *)rcnts - (u_char *)reg != sizeof(psxRegs)) + if ((u_char *)rcnts - (u_char *)&psxRegs != sizeof(psxRegs)) { - SysPrintf("linkage_arm miscompilation/breakage detected.\n"); + SysPrintf("linkage_arm* miscompilation/breakage detected.\n"); } SysPrintf("testing if we can run recompiled code...\n"); @@ -6518,7 +6434,7 @@ int new_recompile_block(int addr) } start = (u_int)addr&~3; - //assert(((u_int)addr&1)==0); + //assert(((u_int)addr&1)==0); // start-in-delay-slot flag new_dynarec_did_compile=1; if (Config.HLE && start == 0x80001000) // hlecall { @@ -6539,7 +6455,7 @@ int new_recompile_block(int addr) source = get_source_start(start, &pagelimit); if (source == NULL) { SysPrintf("Compile at bogus memory address: %08x\n", addr); - exit(1); + abort(); } /* Pass 1: disassemble */ @@ -6750,8 +6666,6 @@ int new_recompile_block(int addr) opcode2[i]=op2; /* Get registers/immediates */ lt1[i]=0; - us1[i]=0; - us2[i]=0; dep1[i]=0; dep2[i]=0; gte_rs[i]=gte_rt[i]=0; @@ -6770,7 +6684,6 @@ int new_recompile_block(int addr) rt1[i]=0; rt2[i]=0; imm[i]=(short)source[i]; - if(op==0x2c||op==0x2d||op==0x3f) us1[i]=rs2[i]; // 64-bit SDL/SDR/SD break; case LOADLR: // LWL/LWR only load part of the register, @@ -6780,7 +6693,6 @@ int new_recompile_block(int addr) rt1[i]=(source[i]>>16)&0x1f; rt2[i]=0; imm[i]=(short)source[i]; - if(op==0x1a||op==0x1b) us1[i]=rs2[i]; // LDR/LDL if(op==0x26) dep1[i]=rt1[i]; // LWR break; case IMM16: @@ -6794,8 +6706,6 @@ int new_recompile_block(int addr) }else{ imm[i]=(short)source[i]; } - if(op==0x18||op==0x19) us1[i]=rs1[i]; // DADDI/DADDIU - if(op==0x0a||op==0x0b) us1[i]=rs1[i]; // SLTI/SLTIU if(op==0x0d||op==0x0e) dep1[i]=rs1[i]; // ORI/XORI break; case UJUMP: @@ -6828,8 +6738,6 @@ int new_recompile_block(int addr) if(op&2) { // BGTZ/BLEZ rs2[i]=0; } - us1[i]=rs1[i]; - us2[i]=rs2[i]; likely[i]=op>>4; break; case SJUMP: @@ -6837,7 +6745,6 @@ int new_recompile_block(int addr) rs2[i]=CCREG; rt1[i]=0; rt2[i]=0; - us1[i]=rs1[i]; if(op2&0x10) { // BxxAL rt1[i]=31; // NOTE: If the branch is not taken, r31 is still overwritten @@ -6849,10 +6756,7 @@ int new_recompile_block(int addr) rs2[i]=(source[i]>>16)&0x1f; // subtract amount rt1[i]=(source[i]>>11)&0x1f; // destination rt2[i]=0; - if(op2==0x2a||op2==0x2b) { // SLT/SLTU - us1[i]=rs1[i];us2[i]=rs2[i]; - } - else if(op2>=0x24&&op2<=0x27) { // AND/OR/XOR/NOR + if(op2>=0x24&&op2<=0x27) { // AND/OR/XOR/NOR dep1[i]=rs1[i];dep2[i]=rs2[i]; } else if(op2>=0x2c&&op2<=0x2f) { // DADD/DSUB @@ -6864,9 +6768,6 @@ int new_recompile_block(int addr) rs2[i]=(source[i]>>16)&0x1f; // divisor rt1[i]=HIREG; rt2[i]=LOREG; - if (op2>=0x1c&&op2<=0x1f) { // DMULT/DMULTU/DDIV/DDIVU - us1[i]=rs1[i];us2[i]=rs2[i]; - } break; case MOV: rs1[i]=0; @@ -6886,8 +6787,6 @@ int new_recompile_block(int addr) rs2[i]=(source[i]>>21)&0x1f; // shift amount rt1[i]=(source[i]>>11)&0x1f; // destination rt2[i]=0; - // DSLLV/DSRLV/DSRAV are 64-bit - if(op2>=0x14&&op2<=0x17) us1[i]=rs1[i]; break; case SHIFTIMM: rs1[i]=(source[i]>>16)&0x1f; @@ -6897,8 +6796,6 @@ int new_recompile_block(int addr) imm[i]=(source[i]>>6)&0x1f; // DSxx32 instructions if(op2>=0x3c) imm[i]|=0x20; - // DSLL/DSRL/DSRA/DSRA32/DSRL32 but not DSLL32 require 64-bit source - if(op2>=0x38&&op2!=0x3c) us1[i]=rs1[i]; break; case COP0: rs1[i]=0; @@ -6917,7 +6814,6 @@ int new_recompile_block(int addr) rt2[i]=0; if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC1/DMFC1/CFC1 if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC1/DMTC1/CTC1 - if(op2==5) us1[i]=rs1[i]; // DMTC1 rs2[i]=CSREG; break; case COP2: @@ -7110,23 +7006,6 @@ int new_recompile_block(int addr) current.isconst=0; current.waswritten=0; } - if(i>1) - { - if((opcode[i-2]&0x2f)==0x05) // BNE/BNEL - { - if(rs1[i-2]==0||rs2[i-2]==0) - { - if(rs1[i-2]) { - int hr=get_reg(current.regmap,rs1[i-2]|64); - if(hr>=0) current.regmap[hr]=-1; - } - if(rs2[i-2]) { - int hr=get_reg(current.regmap,rs2[i-2]|64); - if(hr>=0) current.regmap[hr]=-1; - } - } - } - } memcpy(regmap_pre[i],current.regmap,sizeof(current.regmap)); regs[i].wasconst=current.isconst; @@ -7144,7 +7023,7 @@ int new_recompile_block(int addr) current.u=branch_unneeded_reg[i]&~((1LL<>r)&1) { regs[i].regmap_entry[hr]=-1; regs[i].regmap[hr]=-1; @@ -7186,10 +7065,6 @@ int new_recompile_block(int addr) //current.regmap[hr]=-1; }else regs[i].regmap_entry[hr]=r; - } - else { - assert(0); - } } } else { // First instruction expects CCREG to be allocated @@ -7463,7 +7338,8 @@ int new_recompile_block(int addr) regs[i].regmap_entry[hr]=0; } else - if(r<64){ + { + assert(r<64); if((current.u>>r)&1) { regs[i].regmap_entry[hr]=-1; //regs[i].regmap[hr]=-1; @@ -7471,9 +7347,6 @@ int new_recompile_block(int addr) }else regs[i].regmap_entry[hr]=r; } - else { - assert(0); - } } } else { // Branches expect CCREG to be allocated at the target @@ -7798,12 +7671,8 @@ int new_recompile_block(int addr) if(rt1[i+1]&&rt1[i+1]==(regs[i].regmap[hr]&63)) nr&=~(1<0&&!bt[i]&&((regs[i].wasdirty>>hr)&1)) { - if((regmap_pre[i][hr]>0&®map_pre[i][hr]<64&&!((unneeded_reg[i]>>regmap_pre[i][hr])&1))) { + if((regmap_pre[i][hr]>0&&!((unneeded_reg[i]>>regmap_pre[i][hr])&1))) { if(rt1[i-1]==(regmap_pre[i][hr]&63)) nr|=1<0&®s[i].regmap_entry[hr]<64&&!((unneeded_reg[i]>>regs[i].regmap_entry[hr])&1))) { + if((regs[i].regmap_entry[hr]>0&&!((unneeded_reg[i]>>regs[i].regmap_entry[hr])&1))) { if(rt1[i-1]==(regs[i].regmap_entry[hr]&63)) nr|=1<=0||get_reg(branch_regs[i].regmap,rt1[i+1]|64)>=0) - { - d1=dep1[i+1]; - d2=dep2[i+1]; - } + int map=0,temp=0; if(itype[i+1]==STORE || itype[i+1]==STORELR || (opcode[i+1]&0x3b)==0x39 || (opcode[i+1]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2 map=INVCP; @@ -7914,8 +7774,6 @@ int new_recompile_block(int addr) if((regs[i].regmap[hr]&63)!=rs1[i] && (regs[i].regmap[hr]&63)!=rs2[i] && (regs[i].regmap[hr]&63)!=rt1[i] && (regs[i].regmap[hr]&63)!=rt2[i] && (regs[i].regmap[hr]&63)!=rt1[i+1] && (regs[i].regmap[hr]&63)!=rt2[i+1] && - (regs[i].regmap[hr]^64)!=us1[i+1] && (regs[i].regmap[hr]^64)!=us2[i+1] && - (regs[i].regmap[hr]^64)!=d1 && (regs[i].regmap[hr]^64)!=d2 && regs[i].regmap[hr]!=rs1[i+1] && regs[i].regmap[hr]!=rs2[i+1] && (regs[i].regmap[hr]&63)!=temp && regs[i].regmap[hr]!=PTEMP && regs[i].regmap[hr]!=RHASH && regs[i].regmap[hr]!=RHTBL && @@ -7927,8 +7785,6 @@ int new_recompile_block(int addr) if((branch_regs[i].regmap[hr]&63)!=rs1[i] && (branch_regs[i].regmap[hr]&63)!=rs2[i] && (branch_regs[i].regmap[hr]&63)!=rt1[i] && (branch_regs[i].regmap[hr]&63)!=rt2[i] && (branch_regs[i].regmap[hr]&63)!=rt1[i+1] && (branch_regs[i].regmap[hr]&63)!=rt2[i+1] && - (branch_regs[i].regmap[hr]^64)!=us1[i+1] && (branch_regs[i].regmap[hr]^64)!=us2[i+1] && - (branch_regs[i].regmap[hr]^64)!=d1 && (branch_regs[i].regmap[hr]^64)!=d2 && branch_regs[i].regmap[hr]!=rs1[i+1] && branch_regs[i].regmap[hr]!=rs2[i+1] && (branch_regs[i].regmap[hr]&63)!=temp && branch_regs[i].regmap[hr]!=PTEMP && branch_regs[i].regmap[hr]!=RHASH && branch_regs[i].regmap[hr]!=RHTBL && @@ -7952,12 +7808,7 @@ int new_recompile_block(int addr) // Non-branch if(i>0) { - int d1=0,d2=0,map=-1,temp=-1; - if(get_reg(regs[i].regmap,rt1[i]|64)>=0) - { - d1=dep1[i]; - d2=dep2[i]; - } + int map=-1,temp=-1; if(itype[i]==STORE || itype[i]==STORELR || (opcode[i]&0x3b)==0x39 || (opcode[i]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2 map=INVCP; @@ -7966,8 +7817,6 @@ int new_recompile_block(int addr) itype[i]==C1LS || itype[i]==C2LS) temp=FTEMP; if((regs[i].regmap[hr]&63)!=rt1[i] && (regs[i].regmap[hr]&63)!=rt2[i] && - (regs[i].regmap[hr]^64)!=us1[i] && (regs[i].regmap[hr]^64)!=us2[i] && - (regs[i].regmap[hr]^64)!=d1 && (regs[i].regmap[hr]^64)!=d2 && regs[i].regmap[hr]!=rs1[i] && regs[i].regmap[hr]!=rs2[i] && (regs[i].regmap[hr]&63)!=temp && regs[i].regmap[hr]!=map && (itype[i]!=SPAN||regs[i].regmap[hr]!=CCREG)) @@ -8017,12 +7866,7 @@ int new_recompile_block(int addr) if(t<2||(itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||rt1[t-2]!=31) // call/ret assumes no registers allocated for(hr=0;hr64) { - if(!((regs[i].dirty>>hr)&1)) - f_regmap[hr]=regs[i].regmap[hr]; - else f_regmap[hr]=-1; - } - else if(regs[i].regmap[hr]>=0) { + if(regs[i].regmap[hr]>=0) { if(f_regmap[hr]!=regs[i].regmap[hr]) { // dealloc old register int n; @@ -8034,12 +7878,7 @@ int new_recompile_block(int addr) f_regmap[hr]=regs[i].regmap[hr]; } } - if(branch_regs[i].regmap[hr]>64) { - if(!((branch_regs[i].dirty>>hr)&1)) - f_regmap[hr]=branch_regs[i].regmap[hr]; - else f_regmap[hr]=-1; - } - else if(branch_regs[i].regmap[hr]>=0) { + if(branch_regs[i].regmap[hr]>=0) { if(f_regmap[hr]!=branch_regs[i].regmap[hr]) { // dealloc old register int n; @@ -8222,11 +8061,7 @@ int new_recompile_block(int addr) for(hr=0;hr64) { - if(!((regs[i].dirty>>hr)&1)) - f_regmap[hr]=regs[i].regmap[hr]; - } - else if(regs[i].regmap[hr]>=0) { + if(regs[i].regmap[hr]>=0) { if(f_regmap[hr]!=regs[i].regmap[hr]) { // dealloc old register int n; @@ -8504,6 +8339,7 @@ int new_recompile_block(int addr) #ifdef __arm__ printf("pre: r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d\n",regmap_pre[i][0],regmap_pre[i][1],regmap_pre[i][2],regmap_pre[i][3],regmap_pre[i][4],regmap_pre[i][5],regmap_pre[i][6],regmap_pre[i][7],regmap_pre[i][8],regmap_pre[i][9],regmap_pre[i][10],regmap_pre[i][12]); #endif + #if defined(__i386__) || defined(__x86_64__) printf("needs: "); if(needed_reg[i]&1) printf("eax "); if((needed_reg[i]>>1)&1) printf("ecx "); @@ -8513,7 +8349,6 @@ int new_recompile_block(int addr) if((needed_reg[i]>>6)&1) printf("esi "); if((needed_reg[i]>>7)&1) printf("edi "); printf("\n"); - #if defined(__i386__) || defined(__x86_64__) printf("entry: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7]); printf("dirty: "); if(regs[i].wasdirty&1) printf("eax "); @@ -8580,7 +8415,7 @@ int new_recompile_block(int addr) if((regs[i].isconst>>6)&1) printf("esi=%x ",(u_int)constmap[i][6]); if((regs[i].isconst>>7)&1) printf("edi=%x ",(u_int)constmap[i][7]); #endif - #ifdef __arm__ + #if defined(__arm__) || defined(__aarch64__) int r; for (r = 0; r < ARRAY_SIZE(constmap[i]); r++) if ((regs[i].isconst >> r) & 1) From d1e4ebd9988a9a5d9fb38b89f19e24b9ab6029d7 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 8 Nov 2021 22:26:05 +0000 Subject: [PATCH 059/597] drc: arm64 wip --- libpcsxcore/new_dynarec/assem_arm.c | 70 +- libpcsxcore/new_dynarec/assem_arm64.c | 1277 ++++++++++++----- libpcsxcore/new_dynarec/assem_arm64.h | 12 +- libpcsxcore/new_dynarec/linkage_arm.S | 8 - libpcsxcore/new_dynarec/linkage_arm64.S | 139 +- libpcsxcore/new_dynarec/linkage_offsets.h | 2 +- libpcsxcore/new_dynarec/new_dynarec.c | 140 +- libpcsxcore/new_dynarec/patches/trace_drc_chk | 183 +-- libpcsxcore/new_dynarec/pcsxmem_inline.c | 4 + 9 files changed, 1301 insertions(+), 534 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 45a2f0866..ed00103fd 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -424,6 +424,13 @@ static u_int genjmp(u_int addr) return ((u_int)offset>>2)&0xffffff; } +static unused void emit_breakpoint(void) +{ + assem_debug("bkpt #0\n"); + //output_w32(0xe1200070); + output_w32(0xe7f001f0); +} + static void emit_mov(int rs,int rt) { assem_debug("mov %s,%s\n",regname[rt],regname[rs]); @@ -1022,7 +1029,7 @@ static void emit_set_if_carry32(int rs1, int rs2, int rt) static void emit_call(const void *a_) { int a = (int)a_; - assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a)); + assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_)); u_int offset=genjmp(a); output_w32(0xeb000000|offset); } @@ -1030,7 +1037,7 @@ static void emit_call(const void *a_) static void emit_jmp(const void *a_) { int a = (int)a_; - assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a)); + assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_)); u_int offset=genjmp(a); output_w32(0xea000000|offset); } @@ -1380,21 +1387,6 @@ static void emit_rsbimm(int rs, int imm, int rt) output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval); } -// Load 2 immediates optimizing for small code size -static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) -{ - emit_movimm(imm1,rt1); - u_int armval; - if(genimm(imm2-imm1,&armval)) { - assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1); - output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval); - }else if(genimm(imm1-imm2,&armval)) { - assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2); - output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval); - } - else emit_movimm(imm2,rt2); -} - // Conditionally select one of two immediates, optimizing for small code size // This will only be called if HAVE_CMOV_IMM is defined static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) @@ -1601,6 +1593,13 @@ static void emit_extjump2(u_char *addr, u_int target, void *linker) emit_jmp(linker); } +static void check_extjump2(void *src) +{ + u_int *ptr = src; + assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs] + (void)ptr; +} + // put rt_val into rt, potentially making use of rs with value rs_val static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt) { @@ -2029,19 +2028,7 @@ static void do_unalignedwritestub(int n) #endif } -static void do_invstub(int n) -{ - literal_pool(20); - u_int reglist=stubs[n].a; - set_jump_target(stubs[n].addr, out); - save_regs(reglist); - if(stubs[n].b!=0) emit_mov(stubs[n].b,0); - emit_call(&invalidate_addr); - restore_regs(reglist); - emit_jmp(stubs[n].retaddr); // return address -} - -// this output is parsed by verify_dirty, get_bounds +// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr static void do_dirty_stub_emit_args(u_int arg0) { #ifndef HAVE_ARMV7 @@ -2196,7 +2183,7 @@ static void loadlr_assemble_arm(int i,struct regstat *i_regs) int offset; void *jaddr=0; int memtarget=0,c=0; - int fastload_reg_override=0; + int fastio_reg_override=-1; u_int hr,reglist=0; tl=get_reg(i_regs->regmap,rt1[i]); s=get_reg(i_regs->regmap,rs1[i]); @@ -2224,12 +2211,13 @@ static void loadlr_assemble_arm(int i,struct regstat *i_regs) }else{ emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR } - jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override); + jaddr=emit_fastpath_cmp_jump(i,temp2,&fastio_reg_override); } else { if(ram_offset&&memtarget) { + host_tempreg_acquire(); emit_addimm(temp2,ram_offset,HOST_TEMPREG); - fastload_reg_override=HOST_TEMPREG; + fastio_reg_override=HOST_TEMPREG; } if (opcode[i]==0x22||opcode[i]==0x26) { emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR @@ -2240,8 +2228,9 @@ static void loadlr_assemble_arm(int i,struct regstat *i_regs) if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR if(!c||memtarget) { int a=temp2; - if(fastload_reg_override) a=fastload_reg_override; + if(fastio_reg_override>=0) a=fastio_reg_override; emit_readword_indexed(0,a,temp2); + if(fastio_reg_override==HOST_TEMPREG) host_tempreg_release(); if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist); } else @@ -2555,6 +2544,11 @@ static void multdiv_assemble_arm(int i,struct regstat *i_regs) } #define multdiv_assemble multdiv_assemble_arm +static void do_jump_vaddr(int rs) +{ + emit_jmp(jump_vaddr_reg[rs]); +} + static void do_preload_rhash(int r) { // Don't need this for ARM. On x86, this puts the value 0xf8 into the // register. On ARM the hash can be done with a single instruction (below) @@ -2577,11 +2571,11 @@ static void do_miniht_jump(int rs,int rh,int ht) { emit_cmp(rh,rs); emit_ldreq_indexed(ht,4,15); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - emit_mov(rs,7); - emit_jmp(jump_vaddr_reg[7]); - #else - emit_jmp(jump_vaddr_reg[rs]); + if(rs!=7) + emit_mov(rs,7); + rs=7; #endif + do_jump_vaddr(rs); } static void do_miniht_insert(u_int return_address,int rt,int temp) { diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index fabd7dbae..a0c628b58 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1,7 +1,8 @@ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Mupen64plus/PCSX - assem_arm64.c * * Copyright (C) 2009-2011 Ari64 * - * Copyright (C) 2010-2021 notaz * + * Copyright (C) 2009-2018 Gillou68310 * + * Copyright (C) 2021 notaz * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -27,33 +28,50 @@ u_char *translation_cache; #else u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); #endif +static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; #define CALLER_SAVE_REGS 0x0007ffff #define unused __attribute__((unused)) -static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; - -//void indirect_jump_indexed(); -//void indirect_jump(); -void do_interrupt(); -//void jump_vaddr_r0(); - -void * const jump_vaddr_reg[32]; +void do_memhandler_pre(); +void do_memhandler_post(); /* Linker */ - -static void set_jump_target(void *addr, void *target_) +static void set_jump_target(void *addr, void *target) { - assert(0); + u_int *ptr = addr; + intptr_t offset = (u_char *)target - (u_char *)addr; + + if((*ptr&0xFC000000)==0x14000000) { + assert(offset>=-134217728LL&&offset<134217728LL); + *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff); + } + else if((*ptr&0xff000000)==0x54000000) { + // Conditional branch are limited to +/- 1MB + // block max size is 256k so branching beyond the +/- 1MB limit + // should only happen when jumping to an already compiled block (see add_link) + // a workaround would be to do a trampoline jump via a stub at the end of the block + assert(offset>=-1048576LL&&offset<1048576LL); + *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5); + } + else if((*ptr&0x9f000000)==0x10000000) { //adr + // generated by do_miniht_insert + assert(offset>=-1048576LL&&offset<1048576LL); + *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5; + } + else + assert(0); // should not happen } // from a pointer to external jump stub (which was produced by emit_extjump2) // find where the jumping insn is static void *find_extjump_insn(void *stub) { - assert(0); - return NULL; + int *ptr = (int *)stub + 2; + assert((*ptr&0x9f000000) == 0x10000000); // adr + int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3); + return ptr + offset / 4; } // find where external branch is liked to using addr of it's stub: @@ -62,9 +80,9 @@ static void *find_extjump_insn(void *stub) // return addr where that branch jumps to static void *get_pointer(void *stub) { - //printf("get_pointer(%x)\n",(int)stub); - assert(0); - return NULL; + int *i_ptr = find_extjump_insn(stub); + assert((*i_ptr&0xfc000000) == 0x14000000); // b + return (u_char *)i_ptr+(((signed int)(*i_ptr<<6)>>6)<<2); } // Find the "clean" entry point from a "dirty" entry point @@ -81,18 +99,36 @@ static int verify_dirty(u_int *ptr) return 0; } -// This doesn't necessarily find all clean entry points, just -// guarantees that it's not dirty static int isclean(void *addr) { - assert(0); - return 0; + u_int *ptr = addr; + return (*ptr >> 24) != 0x58; // the only place ldr (literal) is used +} + +static uint64_t get_from_ldr_literal(const u_int *i) +{ + signed int ofs; + assert((i[0] & 0xff000000) == 0x58000000); + ofs = i[0] << 8; + ofs >>= 5+8; + return *(uint64_t *)(i + ofs); +} + +static uint64_t get_from_movz(const u_int *i) +{ + assert((i[0] & 0x7fe00000) == 0x52800000); + return (i[0] >> 5) & 0xffff; } // get source that block at addr was compiled from (host pointers) static void get_bounds(void *addr, u_char **start, u_char **end) { - assert(0); + const u_int *ptr = addr; + assert((ptr[0] & 0xff00001f) == 0x58000001); // ldr x1, source + assert((ptr[1] & 0xff00001f) == 0x58000002); // ldr x2, copy + assert((ptr[2] & 0xffe0001f) == 0x52800003); // movz w3, #slen*4 + *start = (u_char *)get_from_ldr_literal(&ptr[0]); + *end = *start + get_from_movz(&ptr[2]); } // Allocate a specific ARM register. @@ -128,10 +164,27 @@ static void alloc_cc(struct regstat *cur,int i) /* Assembler */ static unused const char *regname[32] = { - "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", - "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" - "ip0", "ip1", "r18", "r19", "r20", "r21", "r22", "r23" - "r24", "r25", "r26", "r27", "r28", "fp", "lr", "sp" + "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", + "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", + "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23", + "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp" +}; + +static unused const char *regname64[32] = { + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp" +}; + +enum { + COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC, + COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV +}; + +static unused const char *condname[16] = { + "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", + "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv" }; static void output_w32(u_int word) @@ -140,21 +193,38 @@ static void output_w32(u_int word) out += 4; } +static void output_w64(uint64_t dword) +{ + *((uint64_t *)out) = dword; + out+=8; +} + +/* static u_int rm_rd(u_int rm, u_int rd) { assert(rm < 31); assert(rd < 31); return (rm << 16) | rd; } +*/ static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd) { - assert(rm < 31); - assert(rn < 31); - assert(rd < 31); + assert(rm < 32); + assert(rn < 32); + assert(rd < 32); return (rm << 16) | (rn << 5) | rd; } +static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt) +{ + assert(imm7 < 0x80); + assert(rt2 < 31); + assert(rn < 32); + assert(rt < 31); + return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt; +} + static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd) { assert(imm6 <= 63); @@ -171,102 +241,217 @@ static u_int imm16_rd(u_int imm16, u_int rd) static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd) { assert(imm12 < 0x1000); + assert(rn < 32); + assert(rd < 32); + return (imm12 << 10) | (rn << 5) | rd; +} + +static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd) +{ + assert(imm9 < 0x200); assert(rn < 31); assert(rd < 31); - return (imm12 << 10) | (rn << 5) | rd; + return (imm9 << 12) | (rn << 5) | rd; } -#pragma GCC diagnostic ignored "-Wunused-function" -static u_int genjmp(u_char *addr) +static u_int imm19_rt(u_int imm19, u_int rt) +{ + assert(imm19 < 0x80000); + assert(rt < 31); + return (imm19 << 5) | rt; +} + +static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd) +{ + assert(n < 2); + assert(immr < 0x40); + assert(imms < 0x40); + assert(rn < 32); + assert(rd < 32); + return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd; +} + +static u_int genjmp(const u_char *addr) { intptr_t offset = addr - out; + if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later if (offset < -134217728 || offset > 134217727) { - if ((uintptr_t)addr > 2) { - SysPrintf("%s: out of range: %08x\n", __func__, offset); - exit(1); - } + SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset); + abort(); return 0; } - return ((u_int)offset >> 2) & 0x01ffffff; + return ((u_int)offset >> 2) & 0x03ffffff; } -static u_int genjmpcc(u_char *addr) +static u_int genjmpcc(const u_char *addr) { intptr_t offset = addr - out; + if ((uintptr_t)addr < 3) return 0; if (offset < -1048576 || offset > 1048572) { - if ((uintptr_t)addr > 2) { - SysPrintf("%s: out of range: %08x\n", __func__, offset); - exit(1); - } + SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset); + abort(); + return 0; + } + return ((u_int)offset >> 2) & 0x7ffff; +} + +static uint32_t is_mask(u_int value) +{ + return value && ((value + 1) & value) == 0; +} + +// This function returns true if the argument contains a +// non-empty sequence of ones (possibly rotated) with the remainder zero. +static uint32_t is_rotated_mask(u_int value) +{ + if (value == 0) return 0; + if (is_mask((value - 1) | value)) + return 1; + return is_mask((~value - 1) | ~value); +} + +static void gen_logical_imm(u_int value, u_int *immr, u_int *imms) +{ + int lzeros, tzeros, ones; + assert(value != 0); + if (is_mask((value - 1) | value)) { + lzeros = __builtin_clz(value); + tzeros = __builtin_ctz(value); + ones = 32 - lzeros - tzeros; + *immr = (32 - tzeros) & 31; + *imms = ones - 1; + return; } - return ((u_int)offset >> 2) & 0xfffff; + value = ~value; + if (is_mask((value - 1) | value)) { + lzeros = __builtin_clz(value); + tzeros = __builtin_ctz(value); + ones = 32 - lzeros - tzeros; + *immr = 31 - tzeros; + *imms = 31 - ones; + return; + } + assert(0); } static void emit_mov(u_int rs, u_int rt) { assem_debug("mov %s,%s\n", regname[rt], regname[rs]); - output_w32(0x2a0003e0 | rm_rd(rs, rt)); + output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt)); +} + +static void emit_mov64(u_int rs, u_int rt) +{ + assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]); + output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt)); } static void emit_movs(u_int rs, u_int rt) { + assert(0); // misleading assem_debug("movs %s,%s\n", regname[rt], regname[rs]); output_w32(0x31000000 | imm12_rn_rd(0, rs, rt)); } static void emit_add(u_int rs1, u_int rs2, u_int rt) { - assem_debug("add %s, %s, %s\n", regname[rt], regname[rs1], regname[rs2]); - output_w32(0x0b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt)); + assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]); + output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt)); } -static void emit_sbc(u_int rs1,u_int rs2,u_int rt) +static void emit_add64(u_int rs1, u_int rs2, u_int rt) { - assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); + assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]); + output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt)); } -static void emit_neg(u_int rs, u_int rt) +#pragma GCC diagnostic ignored "-Wunused-function" +static void emit_adds(u_int rs1, u_int rs2, u_int rt) { - assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]); - assert(0); + assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt)); } -static void emit_negs(u_int rs, u_int rt) +static void emit_adds64(u_int rs1, u_int rs2, u_int rt) { - assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]); - assert(0); + assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt)); +} + +static void emit_neg(u_int rs, u_int rt) +{ + assem_debug("neg %s,%s\n",regname[rt],regname[rs]); + output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt)); } static void emit_sub(u_int rs1, u_int rs2, u_int rt) { - assem_debug("sub %s, %s, %s\n", regname[rt], regname[rs1], regname[rs2]); + assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]); output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt)); } -static void emit_subs(u_int rs1,u_int rs2,u_int rt) +static void emit_movz(u_int imm, u_int rt) { - assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); + assem_debug("movz %s,#%#x\n", regname[rt], imm); + output_w32(0x52800000 | imm16_rd(imm, rt)); +} + +static void emit_movz_lsl16(u_int imm, u_int rt) +{ + assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm); + output_w32(0x52a00000 | imm16_rd(imm, rt)); +} + +static void emit_movn(u_int imm, u_int rt) +{ + assem_debug("movn %s,#%#x\n", regname[rt], imm); + output_w32(0x12800000 | imm16_rd(imm, rt)); +} + +static void emit_movn_lsl16(u_int imm,u_int rt) +{ + assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm); + output_w32(0x12a00000 | imm16_rd(imm, rt)); +} + +static void emit_movk(u_int imm,u_int rt) +{ + assem_debug("movk %s,#%#x\n", regname[rt], imm); + output_w32(0x72800000 | imm16_rd(imm, rt)); +} + +static void emit_movk_lsl16(u_int imm,u_int rt) +{ + assert(imm<65536); + assem_debug("movk %s, #%#x, lsl #16\n", regname[rt], imm); + output_w32(0x72a00000 | imm16_rd(imm, rt)); } static void emit_zeroreg(u_int rt) { - assem_debug("mov %s,#0\n",regname[rt]); - assert(0); + emit_movz(0, rt); } static void emit_movimm(u_int imm, u_int rt) { - assem_debug("mov %s,#%#x\n", regname[rt], imm); - if ((imm & 0xffff0000) == 0) - output_w32(0x52800000 | imm16_rd(imm, rt)); - else if ((imm & 0xffff0000) == 0xffff0000) - assert(0); + if (imm < 65536) + emit_movz(imm, rt); + else if ((~imm) < 65536) + emit_movn(~imm, rt); + else if ((imm&0xffff) == 0) + emit_movz_lsl16(imm >> 16, rt); + else if (((~imm)&0xffff) == 0) + emit_movn_lsl16(~imm >> 16, rt); + else if (is_rotated_mask(imm)) { + u_int immr, imms; + gen_logical_imm(imm, &immr, &imms); + assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm); + output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt)); + } else { - output_w32(0x52800000 | imm16_rd(imm & 0xffff, rt)); - output_w32(0x72a00000 | imm16_rd(imm >> 16, rt)); + emit_movz(imm & 0xffff, rt); + emit_movk_lsl16(imm >> 16, rt); } } @@ -281,8 +466,20 @@ static void emit_readword(void *addr, u_int rt) assert(0); } +static void emit_readdword(void *addr, u_int rt) +{ + uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; + if (!(offset & 7) && offset <= 32760) { + assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset); + output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt)); + } + else + assert(0); +} + static void emit_loadreg(u_int r, u_int hr) { + int is64 = 0; assert(r < 64); if (r == 0) emit_zeroreg(hr); @@ -293,10 +490,13 @@ static void emit_loadreg(u_int r, u_int hr) //case LOREG: addr = &lo; break; case CCREG: addr = &cycle_count; break; case CSREG: addr = &Status; break; - case INVCP: addr = &invc_ptr; break; + case INVCP: addr = &invc_ptr; is64 = 1; break; default: assert(r < 34); break; } - emit_readword(addr, hr); + if (is64) + emit_readdword(addr, hr); + else + emit_readword(addr, hr); } } @@ -311,6 +511,17 @@ static void emit_writeword(u_int rt, void *addr) assert(0); } +static void emit_writedword(u_int rt, void *addr) +{ + uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; + if (!(offset & 7) && offset <= 32760) { + assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset); + output_w32(0xf9000000 | imm12_rn_rd(offset >> 2, FP, rt)); + } + else + assert(0); +} + static void emit_storereg(u_int r, u_int hr) { assert(r < 64); @@ -326,93 +537,113 @@ static void emit_storereg(u_int r, u_int hr) static void emit_test(u_int rs, u_int rt) { - assem_debug("tst %s,%s\n",regname[rs],regname[rt]); - assert(0); + assem_debug("tst %s,%s\n", regname[rs], regname[rt]); + output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR)); } -static void emit_testimm(u_int rs,int imm) +static void emit_testimm(u_int rs, u_int imm) { + u_int immr, imms; assem_debug("tst %s,#%#x\n", regname[rs], imm); - assert(0); + assert(is_rotated_mask(imm)); // good enough for PCSX + gen_logical_imm(imm, &immr, &imms); + output_w32(0xb9000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR)); } static void emit_testeqimm(u_int rs,int imm) { assem_debug("tsteq %s,$%d\n",regname[rs],imm); - assert(0); + assert(0); // TODO eliminate emit_testeqimm } static void emit_not(u_int rs,u_int rt) { assem_debug("mvn %s,%s\n",regname[rt],regname[rs]); - assert(0); + output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt)); } static void emit_mvnmi(u_int rs,u_int rt) { assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]); - assert(0); + assert(0); // eliminate } static void emit_and(u_int rs1,u_int rs2,u_int rt) { assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); + output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt)); } static void emit_or(u_int rs1,u_int rs2,u_int rt) { assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); + output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt)); } static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt) { - assert(rs < 31); - assert(rt < 31); - assert(imm < 32); assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm); - assert(0); + output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt)); } static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) { - assert(rs < 31); - assert(rt < 31); - assert(imm < 32); assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm); - assert(0); -} - -static void emit_or_and_set_flags(u_int rs1,u_int rs2,u_int rt) -{ - assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); + output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt)); } static void emit_xor(u_int rs1,u_int rs2,u_int rt) { assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - assert(0); + output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt)); } -static void emit_addimm(u_int rs, uintptr_t imm, u_int rt) +static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt) { + unused const char *st = s ? "s" : ""; + s = s ? 0x20000000 : 0; + is64 = is64 ? 0x80000000 : 0; if (imm < 4096) { - assem_debug("add %s,%s,%#lx\n", regname[rt], regname[rs], imm); - output_w32(0x11000000 | imm12_rn_rd(imm, rs, rt)); + assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm); + output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt)); } else if (-imm < 4096) { - assem_debug("sub %s,%s,%#lx\n", regname[rt], regname[rs], imm); - output_w32(0x51000000 | imm12_rn_rd(imm, rs, rt)); + assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm); + output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt)); + } + else if (imm < 16777216) { + assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000); + output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt)); + if ((imm & 0xfff) || s) { + assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff); + output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rt, rt)); + } + } + else if (-imm < 16777216) { + assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000); + output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt)); + if ((imm & 0xfff) || s) { + assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff); + output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt)); + } } else assert(0); } +static void emit_addimm(u_int rs, uintptr_t imm, u_int rt) +{ + emit_addimm_s(0, 0, rs, imm, rt); +} + +static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt) +{ + emit_addimm_s(0, 1, rs, imm, rt); +} + static void emit_addimm_and_set_flags(int imm, u_int rt) { - assert(0); + emit_addimm_s(1, 0, rt, imm, rt); } static void emit_addimm_no_flags(u_int imm,u_int rt) @@ -420,181 +651,195 @@ static void emit_addimm_no_flags(u_int imm,u_int rt) emit_addimm(rt,imm,rt); } -static void emit_adcimm(u_int rs,int imm,u_int rt) +static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt) { - assem_debug("adc %s,%s,#%#x\n",regname[rt],regname[rs],imm); - assert(0); + const char *names[] = { "and", "orr", "eor", "ands" }; + const char *name = names[op]; + u_int immr, imms; + op = op << 29; + if (is_rotated_mask(imm)) { + gen_logical_imm(imm, &immr, &imms); + assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm); + output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt)); + } + else { + if (rs == HOST_TEMPREG || rt != HOST_TEMPREG) + host_tempreg_acquire(); + emit_movimm(imm, HOST_TEMPREG); + assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]); + output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt)); + if (rs == HOST_TEMPREG || rt != HOST_TEMPREG) + host_tempreg_release(); + } + (void)name; } -static void emit_rscimm(u_int rs,int imm,u_int rt) +static void emit_andimm(u_int rs, u_int imm, u_int rt) { - assem_debug("rsc %s,%s,#%#x\n",regname[rt],regname[rs],imm); - assert(0); + if (imm == 0) + emit_zeroreg(rt); + else + emit_logicop_imm(0, rs, imm, rt); } -static void emit_addimm64_32(u_int rsh,u_int rsl,int imm,u_int rth,u_int rtl) +static void emit_orimm(u_int rs, u_int imm, u_int rt) { - assert(0); + if (imm == 0) { + if (rs != rt) + emit_mov(rs, rt); + } + else + emit_logicop_imm(1, rs, imm, rt); } -static void emit_andimm(u_int rs,int imm,u_int rt) +static void emit_xorimm(u_int rs, u_int imm, u_int rt) { - assert(0); + if (imm == 0) { + if (rs != rt) + emit_mov(rs, rt); + } + else + emit_logicop_imm(2, rs, imm, rt); } -static void emit_orimm(u_int rs,int imm,u_int rt) +static void emit_sbfm(u_int rs,u_int imm,u_int rt) { - assert(0); + assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt)); } -static void emit_xorimm(u_int rs,int imm,u_int rt) +static void emit_ubfm(u_int rs,u_int imm,u_int rt) { - assert(0); + assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt)); } static void emit_shlimm(u_int rs,u_int imm,u_int rt) { - assert(imm>0); - assert(imm<32); assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm); - assert(0); -} - -static void emit_lsls_imm(u_int rs,int imm,u_int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm); - assert(0); + output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt)); } static unused void emit_lslpls_imm(u_int rs,int imm,u_int rt) { - assert(imm>0); - assert(imm<32); - assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm); - assert(0); + assert(0); // eliminate } static void emit_shrimm(u_int rs,u_int imm,u_int rt) { - assert(imm>0); - assert(imm<32); assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm); - assert(0); + output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt)); } static void emit_sarimm(u_int rs,u_int imm,u_int rt) { - assert(imm>0); - assert(imm<32); assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm); - assert(0); + output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt)); } static void emit_rorimm(u_int rs,u_int imm,u_int rt) { - assert(imm>0); - assert(imm<32); - assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm); - assert(0); + assem_debug("ror %s,%s,#%d",regname[rt],regname[rs],imm); + output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt)); } static void emit_signextend16(u_int rs, u_int rt) { assem_debug("sxth %s,%s\n", regname[rt], regname[rs]); - assert(0); + output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt)); } -static void emit_shl(u_int rs,u_int shift,u_int rt) +static void emit_shl(u_int rs,u_int rshift,u_int rt) { - assert(rs < 31); - assert(rt < 31); - assert(shift < 16); - assert(0); + assem_debug("lsl %s,%s,%s",regname[rt],regname[rs],regname[rshift]); + output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt)); } -static void emit_shr(u_int rs,u_int shift,u_int rt) +static void emit_shr(u_int rs,u_int rshift,u_int rt) { - assert(rs < 31); - assert(rt < 31); - assert(shift<16); - assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); - assert(0); -} - -static void emit_sar(u_int rs,u_int shift,u_int rt) -{ - assert(rs < 31); - assert(rt < 31); - assert(shift<16); - assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); - assert(0); + assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]); + output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt)); } -static void emit_orrshl(u_int rs,u_int shift,u_int rt) +static void emit_sar(u_int rs,u_int rshift,u_int rt) { - assert(rs < 31); - assert(rt < 31); - assert(shift<16); - assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]); - assert(0); + assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]); + output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt)); } -static void emit_orrshr(u_int rs,u_int shift,u_int rt) +static void emit_cmpimm(u_int rs, u_int imm) { - assert(rs < 31); - assert(rt < 31); - assert(shift<16); - assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]); - assert(0); + if (imm < 4096) { + assem_debug("cmp %s,%#x\n", regname[rs], imm); + output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR)); + } + else if (-imm < 4096) { + assem_debug("cmn %s,%#x\n", regname[rs], imm); + output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR)); + } + else if (imm < 16777216 && !(imm & 0xfff)) { + assem_debug("cmp %s,#%#x,lsl #12\n", regname[rs], imm >> 12); + output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR)); + } + else { + host_tempreg_acquire(); + emit_movimm(imm, HOST_TEMPREG); + assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]); + output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR)); + host_tempreg_release(); + } } -static void emit_cmpimm(u_int rs,int imm) +static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt) { - assert(0); + assert(imm == 0 || imm == 1); + assert(cond0 < 0x10); + assert(cond1 < 0x10); + if (imm) { + assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]); + output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt)); + } else { + assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]); + output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt)); + } } -static void emit_cmovne_imm(int imm,u_int rt) +static void emit_cmovne_imm(u_int imm,u_int rt) { - assem_debug("movne %s,#%#x\n",regname[rt],imm); - assert(0); + emit_cmov_imm(COND_NE, COND_EQ, imm, rt); } -static void emit_cmovl_imm(int imm,u_int rt) +static void emit_cmovl_imm(u_int imm,u_int rt) { - assem_debug("movlt %s,#%#x\n",regname[rt],imm); - assert(0); + emit_cmov_imm(COND_LT, COND_GE, imm, rt); } static void emit_cmovb_imm(int imm,u_int rt) { - assem_debug("movcc %s,#%#x\n",regname[rt],imm); - assert(0); + emit_cmov_imm(COND_CC, COND_CS, imm, rt); } static void emit_cmovs_imm(int imm,u_int rt) { - assem_debug("movmi %s,#%#x\n",regname[rt],imm); - assert(0); + emit_cmov_imm(COND_MI, COND_PL, imm, rt); } static void emit_cmovne_reg(u_int rs,u_int rt) { - assem_debug("movne %s,%s\n",regname[rt],regname[rs]); - assert(0); + assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]); + output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt)); } static void emit_cmovl_reg(u_int rs,u_int rt) { - assem_debug("movlt %s,%s\n",regname[rt],regname[rs]); - assert(0); + assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]); + output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt)); } static void emit_cmovs_reg(u_int rs,u_int rt) { - assem_debug("movmi %s,%s\n",regname[rt],regname[rs]); - assert(0); + assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]); + output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt)); } static void emit_slti32(u_int rs,int imm,u_int rt) @@ -616,7 +861,7 @@ static void emit_sltiu32(u_int rs,int imm,u_int rt) static void emit_cmp(u_int rs,u_int rt) { assem_debug("cmp %s,%s\n",regname[rs],regname[rt]); - assert(0); + output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR)); } static void emit_set_gz32(u_int rs, u_int rt) @@ -630,7 +875,9 @@ static void emit_set_gz32(u_int rs, u_int rt) static void emit_set_nz32(u_int rs, u_int rt) { //assem_debug("set_nz32\n"); - assert(0); + if(rs!=rt) emit_mov(rs,rt); + emit_test(rs,rs); + emit_cmovne_imm(1,rt); } static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt) @@ -651,10 +898,10 @@ static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt) emit_cmovb_imm(1,rt); } -static void emit_call(const void *a_) +static void emit_call(const void *a) { - intptr_t diff = (u_char *)a_ - out; - assem_debug("bl %p (%p+%lx)%s\n", a_, out, diff, func_name(a)); + intptr_t diff = (u_char *)a - out; + assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a)); assert(!(diff & 3)); if (-134217728 <= diff && diff <= 134217727) output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff)); @@ -662,85 +909,85 @@ static void emit_call(const void *a_) assert(0); } -#pragma GCC diagnostic ignored "-Wunused-variable" -static void emit_jmp(const void *a_) +static void emit_jmp(const void *a) { - uintptr_t a = (uintptr_t)a_; - assem_debug("b %p (%p+%lx)%s\n", a_, out, (u_char *)a_ - out, func_name(a)); - assert(0); + assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a)); + u_int offset = genjmp(a); + output_w32(0x14000000 | offset); } -static void emit_jne(const void *a_) +static void emit_jne(const void *a) { - uintptr_t a = (uintptr_t)a_; - assem_debug("bne %p\n", a_); - assert(0); + assem_debug("bne %p\n", a); + u_int offset = genjmpcc(a); + output_w32(0x54000000 | (offset << 5) | COND_NE); } static void emit_jeq(const void *a) { - assem_debug("beq %p\n",a); - assert(0); + assem_debug("beq %p\n", a); + u_int offset = genjmpcc(a); + output_w32(0x54000000 | (offset << 5) | COND_EQ); } static void emit_js(const void *a) { - assem_debug("bmi %p\n",a); - assert(0); + assem_debug("bmi %p\n", a); + u_int offset = genjmpcc(a); + output_w32(0x54000000 | (offset << 5) | COND_MI); } static void emit_jns(const void *a) { - assem_debug("bpl %p\n",a); - assert(0); + assem_debug("bpl %p\n", a); + u_int offset = genjmpcc(a); + output_w32(0x54000000 | (offset << 5) | COND_PL); } static void emit_jl(const void *a) { - assem_debug("blt %p\n",a); - assert(0); + assem_debug("blt %p\n", a); + u_int offset = genjmpcc(a); + output_w32(0x54000000 | (offset << 5) | COND_LT); } static void emit_jge(const void *a) { - assem_debug("bge %p\n",a); - assert(0); + assem_debug("bge %p\n", a); + u_int offset = genjmpcc(a); + output_w32(0x54000000 | (offset << 5) | COND_GE); } static void emit_jno(const void *a) { - assem_debug("bvc %p\n",a); - assert(0); + assem_debug("bvc %p\n", a); + u_int offset = genjmpcc(a); + output_w32(0x54000000 | (offset << 5) | COND_VC); } static void emit_jc(const void *a) { - assem_debug("bcs %p\n",a); - assert(0); + assem_debug("bcs %p\n", a); + u_int offset = genjmpcc(a); + output_w32(0x54000000 | (offset << 5) | COND_CS); } static void emit_jcc(const void *a) { assem_debug("bcc %p\n", a); - assert(0); -} - -static void emit_callreg(u_int r) -{ - assert(r < 31); - assem_debug("blx %s\n", regname[r]); - assert(0); + u_int offset = genjmpcc(a); + output_w32(0x54000000 | (offset << 5) | COND_CC); } static void emit_jmpreg(u_int r) { - assem_debug("mov pc,%s\n",regname[r]); - assert(0); + assem_debug("br %s", regname64[r]); + output_w32(0xd61f0000 | rm_rn_rd(0, r, 0)); } static void emit_retreg(u_int r) { - assem_debug("ret %s\n", r == LR ? "" : regname[r]); + assem_debug("ret %s\n", r == LR ? "" : regname64[r]); output_w32(0xd65f0000 | rm_rn_rd(0, r, 0)); } @@ -749,34 +996,101 @@ static void emit_ret(void) emit_retreg(LR); } +static void emit_adr(void *addr, u_int rt) +{ + intptr_t offset = (u_char *)addr - out; + assert(-1048576 <= offset && offset < 1048576); + assem_debug("adr x%d,#%#lx\n", rt, offset); + output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt); +} + static void emit_readword_indexed(int offset, u_int rs, u_int rt) { - assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset); - assert(0); + assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset); + assert(-256 <= offset && offset < 256); + output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt)); +} + +static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]); + output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt)); +} + +static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]); + output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt)); +} + +static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]); + output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt)); +} + +static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]); + output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt)); +} + +static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]); + output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt)); +} + +static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]); + output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt)); +} + +static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]); + output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt)); +} + +static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]); + output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt)); +} + +static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]); + output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt)); } static void emit_movsbl_indexed(int offset, u_int rs, u_int rt) { - assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset); - assert(0); + assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset); + assert(-256 <= offset && offset < 256); + output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt)); } static void emit_movswl_indexed(int offset, u_int rs, u_int rt) { - assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset); - assert(0); + assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset); + assert(-256 <= offset && offset < 256); + output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt)); } static void emit_movzbl_indexed(int offset, u_int rs, u_int rt) { - assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset); - assert(0); + assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset); + assert(-256 <= offset && offset < 256); + output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt)); } static void emit_movzwl_indexed(int offset, u_int rs, u_int rt) { - assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset); - assert(0); + assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset); + assert(-256 <= offset && offset < 256); + output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt)); } static void emit_writeword_indexed(u_int rt, int offset, u_int rs) @@ -832,41 +1146,15 @@ static void emit_clz(u_int rs,u_int rt) assert(0); } -// Load 2 immediates optimizing for small code size -static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) -{ - assert(0); -} - -// Conditionally select one of two immediates, optimizing for small code size -// This will only be called if HAVE_CMOV_IMM is defined -static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) -{ - assert(0); -} - // special case for checking invalid_code -static void emit_cmpmem_indexedsr12_reg(int base,u_int r,int imm) -{ - assert(imm<128&&imm>=0); - assert(r>=0&&r<16); - assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]); - assert(0); -} - -// Used to preload hash table entries -static unused void emit_prefetchreg(u_int r) +static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm) { - assem_debug("pld %s\n",regname[r]); - assert(0); -} - -// Special case for mini_ht -static void emit_ldreq_indexed(u_int rs, u_int offset, u_int rt) -{ - assert(offset<4096); - assem_debug("ldreq %s,[%s, #%#x]\n",regname[rt],regname[rs],offset); - assert(0); + host_tempreg_acquire(); + emit_shrimm(r, 12, HOST_TEMPREG); + assem_debug("ldrb %s,[%s,%s]",regname[HOST_TEMPREG],regname64[rbase],regname64[HOST_TEMPREG]); + output_w32(0x38606800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG)); + emit_cmpimm(HOST_TEMPREG, imm); + host_tempreg_release(); } static void emit_orrne_imm(u_int rs,int imm,u_int rt) @@ -887,26 +1175,30 @@ static unused void emit_addpl_imm(u_int rs,int imm,u_int rt) assert(0); } +static void emit_loadlp_ofs(u_int ofs, u_int rt) +{ + output_w32(0x58000000 | imm19_rt(ofs, rt)); +} + static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs) { u_int op = 0xb9000000; - const char *ldst = is_st ? "st" : "ld"; - char rp = is64 ? 'x' : 'w'; + unused const char *ldst = is_st ? "st" : "ld"; + unused char rp = is64 ? 'x' : 'w'; assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs); is64 = is64 ? 1 : 0; assert((ofs & ((1 << (2+is64)) - 1)) == 0); ofs = (ofs >> (2+is64)); - assert(ofs <= 0xfff); if (!is_st) op |= 0x00400000; if (is64) op |= 0x40000000; - output_w32(op | (ofs << 15) | imm12_rn_rd(ofs, rn, rt)); + output_w32(op | imm12_rn_rd(ofs, rn, rt)); } static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs) { u_int op = 0x29000000; - const char *ldst = is_st ? "st" : "ld"; - char rp = is64 ? 'x' : 'w'; + unused const char *ldst = is_st ? "st" : "ld"; + unused char rp = is64 ? 'x' : 'w'; assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs); is64 = is64 ? 1 : 0; assert((ofs & ((1 << (2+is64)) - 1)) == 0); @@ -915,7 +1207,7 @@ static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs &= 0x7f; if (!is_st) op |= 0x00400000; if (is64) op |= 0x80000000; - output_w32(op | (ofs << 15) | rm_rn_rd(rt2, rn, rt1)); + output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1)); } static void save_load_regs_all(int is_store, u_int reglist) @@ -963,57 +1255,323 @@ static void literal_pool_jumpover(int n) { } -static void emit_extjump2(u_char *addr, int target, void *linker) +// parsed by get_pointer, find_extjump_insn +static void emit_extjump2(u_char *addr, u_int target, void *linker) { - assert(0); -} + assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond -static void emit_extjump(void *addr, int target) -{ - emit_extjump2(addr, target, dyna_linker); + emit_movz(target & 0xffff, 0); + emit_movk_lsl16(target >> 16, 0); + + // addr is in the current recompiled block (max 256k) + // offset shouldn't exceed +/-1MB + emit_adr(addr, 1); + emit_jmp(linker); } -static void emit_extjump_ds(void *addr, int target) +static void check_extjump2(void *src) { - emit_extjump2(addr, target, dyna_linker_ds); + u_int *ptr = src; + assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val + (void)ptr; } // put rt_val into rt, potentially making use of rs with value rs_val -static void emit_movimm_from(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt) +static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt) { - intptr_t diff = rt_val - rs_val; - if (-4096 < diff && diff < 4096) + int diff = rt_val - rs_val; + if ((-4096 <= diff && diff < 4096) + || (-16777216 <= diff && diff < 16777216 && !(diff & 0xfff))) emit_addimm(rs, diff, rt); + else if (is_rotated_mask(rs_val ^ rt_val)) + emit_xorimm(rs, rs_val ^ rt_val, rt); else - // TODO: for inline_writestub, etc - assert(0); + emit_movimm(rt_val, rt); } -// return 1 if above function can do it's job cheaply +// return 1 if the above function can do it's job cheaply static int is_similar_value(u_int v1, u_int v2) { int diff = v1 - v2; - return -4096 < diff && diff < 4096; + return (-4096 <= diff && diff < 4096) + || (-16777216 <= diff && diff < 16777216 && !(diff & 0xfff)) + || is_rotated_mask(v1 ^ v2); +} + +// trashes r2 +static void pass_args64(u_int a0, u_int a1) +{ + if(a0==1&&a1==0) { + // must swap + emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0); + } + else if(a0!=0&&a1==0) { + emit_mov64(a1,1); + if (a0>=0) emit_mov64(a0,0); + } + else { + if(a0>=0&&a0!=0) emit_mov64(a0,0); + if(a1>=0&&a1!=1) emit_mov64(a1,1); + } } -//#include "pcsxmem.h" +static void loadstore_extend(enum stub_type type, u_int rs, u_int rt) +{ + switch(type) { + case LOADB_STUB: emit_sbfm(rs, 7, rt); break; + case LOADBU_STUB: + case STOREB_STUB: emit_ubfm(rs, 7, rt); break; + case LOADH_STUB: emit_sbfm(rs, 15, rt); break; + case LOADHU_STUB: + case STOREH_STUB: emit_ubfm(rs, 15, rt); break; + case LOADW_STUB: + case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break; + default: assert(0); + } +} + +#include "pcsxmem.h" //#include "pcsxmem_inline.c" static void do_readstub(int n) { assem_debug("do_readstub %x\n",start+stubs[n].a*4); - assert(0); + set_jump_target(stubs[n].addr, out); + enum stub_type type = stubs[n].type; + int i = stubs[n].a; + int rs = stubs[n].b; + const struct regstat *i_regs = (void *)stubs[n].c; + u_int reglist = stubs[n].e; + const signed char *i_regmap = i_regs->regmap; + int rt; + if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) { + rt=get_reg(i_regmap,FTEMP); + }else{ + rt=get_reg(i_regmap,rt1[i]); + } + assert(rs>=0); + int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0; + void *restore_jump = NULL, *handler_jump = NULL; + reglist|=(1<=0&&rt1[i]!=0) + reglist&=~(1<=0&&rt1[i]!=0)) { + switch(type) { + case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break; + case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break; + case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break; + case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break; + case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break; + default: assert(0); + } + } + if(regs_saved) { + restore_jump=out; + emit_jmp(0); // jump to reg restore + } + else + emit_jmp(stubs[n].retaddr); // return address + set_jump_target(handler_jump, out); + + if(!regs_saved) + save_regs(reglist); + void *handler=NULL; + if(type==LOADB_STUB||type==LOADBU_STUB) + handler=jump_handler_read8; + if(type==LOADH_STUB||type==LOADHU_STUB) + handler=jump_handler_read16; + if(type==LOADW_STUB) + handler=jump_handler_read32; + assert(handler); + pass_args64(rs,temp2); + int cc=get_reg(i_regmap,CCREG); + if(cc<0) + emit_loadreg(CCREG,2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2); + emit_call(handler); + // (no cycle reload after read) + if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { + loadstore_extend(type,0,rt); + } + if(restore_jump) + set_jump_target(restore_jump, out); + restore_regs(reglist); + emit_jmp(stubs[n].retaddr); } static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { - assert(0); + int rs=get_reg(regmap,target); + int rt=get_reg(regmap,target); + if(rs<0) rs=get_reg(regmap,-1); + assert(rs>=0); + u_int is_dynamic=0; + uintptr_t host_addr = 0; + void *handler; + int cc=get_reg(regmap,CCREG); + //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt)) + // return; + handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr); + if (handler == NULL) { + if(rt<0||rt1[i]==0) + return; + if (addr != host_addr) { + if (host_addr >= 0x100000000ull) + abort(); // ROREG not implemented + emit_movimm_from(addr, rs, host_addr, rs); + } + switch(type) { + case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break; + case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break; + case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break; + case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break; + case LOADW_STUB: emit_readword_indexed(0,rs,rt); break; + default: assert(0); + } + return; + } + is_dynamic=pcsxmem_is_handler_dynamic(addr); + if(is_dynamic) { + if(type==LOADB_STUB||type==LOADBU_STUB) + handler=jump_handler_read8; + if(type==LOADH_STUB||type==LOADHU_STUB) + handler=jump_handler_read16; + if(type==LOADW_STUB) + handler=jump_handler_read32; + } + + // call a memhandler + if(rt>=0&&rt1[i]!=0) + reglist&=~(1<=0&&rt1[i]!=0) + loadstore_extend(type, 0, rt); + restore_regs(reglist); } static void do_writestub(int n) { assem_debug("do_writestub %x\n",start+stubs[n].a*4); - assert(0); + set_jump_target(stubs[n].addr, out); + enum stub_type type=stubs[n].type; + int i=stubs[n].a; + int rs=stubs[n].b; + struct regstat *i_regs=(struct regstat *)stubs[n].c; + u_int reglist=stubs[n].e; + signed char *i_regmap=i_regs->regmap; + int rt,r; + if(itype[i]==C1LS||itype[i]==C2LS) { + rt=get_reg(i_regmap,r=FTEMP); + }else{ + rt=get_reg(i_regmap,r=rs2[i]); + } + assert(rs>=0); + assert(rt>=0); + int rtmp,temp=-1,temp2,regs_saved=0; + void *restore_jump = NULL, *handler_jump = NULL; + int reglist2=reglist|(1<= 0x100000000ull) + abort(); // ROREG not implemented emit_movimm_from(addr, rs, host_addr, rs); - switch(type) { + } + switch (type) { case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break; case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break; case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break; @@ -1038,25 +1599,20 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, signed char // call a memhandler save_regs(reglist); - //pass_args(rs, rt); - int cc = get_reg(regmap, CCREG); - assert(cc >= 0); - emit_addimm(cc, CLOCK_ADJUST(adj+1), 2); - //emit_movimm((uintptr_t)handler, 3); - // returns new cycle_count - - emit_readword(&last_count, HOST_TEMPREG); emit_writeword(rs, &address); // some handlers still need it - emit_add(2, HOST_TEMPREG, 2); - emit_writeword(2, &Count); - emit_mov(1, 0); + loadstore_extend(type, rt, 0); + int cc, cc_use; + cc = cc_use = get_reg(regmap, CCREG); + if (cc < 0) + emit_loadreg(CCREG, (cc_use = 2)); + emit_addimm(cc_use, CLOCK_ADJUST(adj+1), 2); + + emit_call(do_memhandler_pre); emit_call(handler); - emit_readword(&next_interupt, 0); - emit_readword(&Count, 1); - emit_writeword(0, &last_count); - emit_sub(1, 0, cc); - - emit_addimm(cc,-CLOCK_ADJUST(adj+1),cc); + emit_call(do_memhandler_post); + emit_addimm(0, -CLOCK_ADJUST(adj+1), cc_use); + if (cc < 0) + emit_storereg(CCREG, cc_use); restore_regs(reglist); } @@ -1066,24 +1622,56 @@ static void do_unalignedwritestub(int n) assert(0); } -static void do_invstub(int n) +static void set_loadlp(u_int *loadl, void *lit) { - assert(0); + uintptr_t ofs = (u_char *)lit - (u_char *)loadl; + assert((*loadl & ~0x1f) == 0x58000000); + assert((ofs & 3) == 0); + assert(ofs < 0x100000); + *loadl |= (ofs >> 2) << 5; +} + +// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr +static void do_dirty_stub_emit_args(u_int arg0) +{ + assert(slen <= MAXBLOCK); + emit_loadlp_ofs(0, 1); // ldr x1, source + emit_loadlp_ofs(0, 2); // ldr x2, copy + emit_movz(slen*4, 3); + emit_movz(arg0 & 0xffff, 0); + emit_movk_lsl16(arg0 >> 16, 0); +} + +static void do_dirty_stub_emit_literals(u_int *loadlps) +{ + set_loadlp(&loadlps[0], out); + output_w64((uintptr_t)source); + set_loadlp(&loadlps[1], out); + output_w64((uintptr_t)copy); } -void *do_dirty_stub(int i) +static void *do_dirty_stub(int i) { assem_debug("do_dirty_stub %x\n",start+i*4); - // Careful about the code output here, verify_dirty needs to parse it. - assert(0); + u_int *loadlps = (void *)out; + do_dirty_stub_emit_args(start + i*4); + emit_call(verify_code); + void *entry = out; load_regs_entry(i); - return NULL; + if (entry == out) + entry = instr_addr[i]; + emit_jmp(instr_addr[i]); + do_dirty_stub_emit_literals(loadlps); + return entry; } static void do_dirty_stub_ds() { - // Careful about the code output here, verify_dirty needs to parse it. - assert(0); + do_dirty_stub_emit_args(start + 1); + u_int *loadlps = (void *)out; + emit_call(verify_code_ds); + emit_jmp(out + 8*2); + do_dirty_stub_emit_literals(loadlps); } /* Special assem */ @@ -1112,33 +1700,52 @@ static void multdiv_assemble_arm64(int i,struct regstat *i_regs) } #define multdiv_assemble multdiv_assemble_arm64 +static void do_jump_vaddr(u_int rs) +{ + if (rs != 0) + emit_mov(rs, 0); + emit_call(get_addr_ht); + emit_jmpreg(0); +} + static void do_preload_rhash(u_int r) { // Don't need this for ARM. On x86, this puts the value 0xf8 into the // register. On ARM the hash can be done with a single instruction (below) } static void do_preload_rhtbl(u_int ht) { - emit_addimm(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht); + emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht); } static void do_rhash(u_int rs,u_int rh) { emit_andimm(rs, 0xf8, rh); } -static void do_miniht_load(int ht,u_int rh) { - assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]); - assert(0); +static void do_miniht_load(int ht, u_int rh) { + emit_add64(ht, rh, ht); + emit_ldst(0, 0, rh, ht, 0); } -static void do_miniht_jump(u_int rs,u_int rh,int ht) { - emit_cmp(rh,rs); - emit_ldreq_indexed(ht,4,15); - //emit_jmp(jump_vaddr_reg[rs]); - assert(0); +static void do_miniht_jump(u_int rs, u_int rh, u_int ht) { + emit_cmp(rh, rs); + void *jaddr = out; + emit_jeq(0); + do_jump_vaddr(rs); + + set_jump_target(jaddr, out); + assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]); + output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht)); + emit_jmpreg(ht); } +// parsed by set_jump_target? static void do_miniht_insert(u_int return_address,u_int rt,int temp) { - assert(0); + emit_movz_lsl16((return_address>>16)&0xffff,rt); + emit_movk(return_address&0xffff,rt); + add_to_linker(out,return_address,1); + emit_adr(out,temp); + emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]); + emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]); } static void mark_clear_cache(void *target) diff --git a/libpcsxcore/new_dynarec/assem_arm64.h b/libpcsxcore/new_dynarec/assem_arm64.h index 6789f178d..fe12ad75c 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.h +++ b/libpcsxcore/new_dynarec/assem_arm64.h @@ -3,21 +3,15 @@ #define EXCLUDE_REG -1 #define HOST_IMM8 1 -#define HAVE_CMOV_IMM 1 #define RAM_SIZE 0x200000 -//#define REG_SHIFT 2 - /* calling convention: r0 -r17: caller-save r19-r29: callee-save */ -#define ARG1_REG 0 -#define ARG2_REG 1 -#define ARG3_REG 2 -#define ARG4_REG 3 - -#define SP 30 +#define SP 31 +#define WZR SP +#define XZR SP #define LR 30 #define HOST_TEMPREG LR diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 778a67f0a..fcb4e1a74 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -476,14 +476,6 @@ FUNCTION(cc_interrupt): b .E1 .size cc_interrupt, .-cc_interrupt - .align 2 -FUNCTION(do_interrupt): - ldr r0, [fp, #LO_pcaddr] - bl get_addr_ht - add r10, r10, #2 - mov pc, r0 - .size do_interrupt, .-do_interrupt - .align 2 FUNCTION(fp_exception): mov r2, #0x10000000 diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 397874c89..060ac48aa 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -84,7 +84,7 @@ DRC_VAR(restore_candidate, 512) /* r0 = virtual target address */ /* r1 = instruction to patch */ .macro dyna_linker_main - /* XXX: should be able to do better than this... */ + /* XXX TODO: should be able to do better than this... */ bl get_addr_ht br x0 .endm @@ -113,12 +113,6 @@ FUNCTION(dyna_linker_ds): .align 2 -FUNCTION(jump_vaddr): - bl abort - .size jump_vaddr, .-jump_vaddr - - .align 2 - FUNCTION(verify_code_ds): bl abort FUNCTION(verify_code): @@ -131,14 +125,49 @@ FUNCTION(verify_code): .align 2 FUNCTION(cc_interrupt): - bl abort + ldr w0, [rFP, #LO_last_count] + mov w2, #0x1fc + add rCC, w0, rCC + str wzr, [rFP, #LO_pending_exception] + and w2, w2, rCC, lsr #17 + add x3, rFP, #LO_restore_candidate + str rCC, [rFP, #LO_cycle] /* PCSX cycles */ +# str rCC, [rFP, #LO_reg_cop0+36] /* Count */ + ldr w19, [x3, w2, uxtw] + mov x21, lr + cbnz w19, 4f +1: + bl gen_interupt + mov lr, x21 + ldr rCC, [rFP, #LO_cycle] + ldr w0, [rFP, #LO_next_interupt] + ldr w1, [rFP, #LO_pending_exception] + ldr w2, [rFP, #LO_stop] + str w0, [rFP, #LO_last_count] + sub rCC, rCC, w0 + cbnz w2, new_dyna_leave + cbnz w1, 2f + ret +2: + ldr w0, [rFP, #LO_pcaddr] + bl get_addr_ht + br x0 +4: + /* Move 'dirty' blocks to the 'clean' list */ + lsl w20, w2, #3 + str wzr, [x3, w2, uxtw] +5: + mov w0, w20 + add w20, w20, #1 + tbz w19, #0, 6f + bl clean_blocks +6: + lsr w19, w19, #1 + tst w20, #31 + bne 5b + b 1b .size cc_interrupt, .-cc_interrupt - .align 2 -FUNCTION(do_interrupt): - bl abort - .size do_interrupt, .-do_interrupt - .align 2 FUNCTION(fp_exception): mov w2, #0x10000000 @@ -239,26 +268,94 @@ FUNCTION(new_dyna_leave): .align 2 +.macro memhandler_pre + /* w0 = adddr/data, x1 = rhandler, w2 = cycles, x3 = whandler */ + ldr w4, [rFP, #LO_last_count] + add w4, w4, w2 + str w4, [rFP, #LO_cycle] +.endm + +.macro memhandler_post + ldr w2, [rFP, #LO_next_interupt] + ldr w1, [rFP, #LO_cycle] + sub w0, w1, w2 + str w2, [rFP, #LO_last_count] +.endm + +FUNCTION(do_memhandler_pre): + memhandler_pre + ret + +FUNCTION(do_memhandler_post): + memhandler_post + ret + +.macro pcsx_read_mem readop tab_shift + /* w0 = address, x1 = handler_tab, w2 = cycles */ + stp xzr, x30, [sp, #-16]! + ubfm w4, w0, #\tab_shift, #11 + ldr x3, [x1, w4, uxtw #3] + adds x3, x3, x3 + bcs 0f + \readop w0, [x3, w4, uxtw #\tab_shift] + ret +0: + memhandler_pre + blr x3 +.endm + FUNCTION(jump_handler_read8): - bl abort + add x1, x1, #0x1000/4*4 + 0x1000/2*4 /* shift to r8 part */ + pcsx_read_mem ldrb, 0 + b handler_read_end FUNCTION(jump_handler_read16): - bl abort + add x1, x1, #0x1000/4*4 /* shift to r16 part */ + pcsx_read_mem ldrh, 1 + b handler_read_end FUNCTION(jump_handler_read32): - bl abort + pcsx_read_mem ldr, 2 + +handler_read_end: + ldp xzr, x30, [sp], #16 + ret + +.macro pcsx_write_mem wrtop movop tab_shift + /* w0 = address, w1 = data, w2 = cycles, x3 = handler_tab */ + stp xzr, x30, [sp, #-16]! + ubfm w4, w0, #\tab_shift, #11 + ldr x3, [x3, w4, uxtw #3] + str w0, [rFP, #LO_address] /* some handlers still need it... */ + adds x3, x3, x3 +# str lr, [rFP, #0] + bcs 0f + mov w0, w2 /* cycle return */ + \wrtop w1, [x3, w4, uxtw #\tab_shift] + ret +0: + \movop w0, w1 + memhandler_pre + blr x3 +.endm FUNCTION(jump_handler_write8): - bl abort + add x3, x3, #0x1000/4*4 + 0x1000/2*4 /* shift to r8 part */ + pcsx_write_mem strb uxtb 0 + b handler_write_end FUNCTION(jump_handler_write16): - bl abort + add x3, x3, #0x1000/4*4 /* shift to r16 part */ + pcsx_write_mem strh uxth 1 + b handler_write_end FUNCTION(jump_handler_write32): - bl abort + pcsx_write_mem str mov 2 -FUNCTION(jump_handler_write_h): - bl abort +handler_write_end: + memhandler_post + ldp xzr, x30, [sp], #16 + ret FUNCTION(jump_handle_swl): bl abort diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h index 24b8e66d2..82d27bd41 100644 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ b/libpcsxcore/new_dynarec/linkage_offsets.h @@ -35,7 +35,7 @@ #define LO_scratch_buf_ptr (LO_invc_ptr + PTRSZ) #define LO_align1 (LO_scratch_buf_ptr + PTRSZ) #define LO_mini_ht (LO_align1 + PTRSZ*2) -#define LO_restore_candidate (LO_mini_ht + 256) +#define LO_restore_candidate (LO_mini_ht + PTRSZ*32*2) #define LO_dynarec_local_size (LO_restore_candidate + 512) #define LO_cop2_to_scratch_buf (LO_scratch_buf_ptr - LO_reg_cop2d) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 7c59a7e8a..081532981 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -38,6 +38,7 @@ static int sceBlock; #include "../psxhle.h" //emulator interface #include "emu_if.h" //emulator interface +#define noinline __attribute__((noinline,noclone)) #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #endif @@ -202,7 +203,7 @@ struct link_entry extern int pcaddr; extern int pending_exception; extern int branch_target; - extern u_int mini_ht[32][2]; + extern uintptr_t mini_ht[32][2]; extern u_char restore_candidate[512]; /* registers that may be allocated */ @@ -421,7 +422,7 @@ static int doesnt_expire_soon(void *tcaddr) // Get address from virtual address // This is called from the recompiled JR/JALR instructions -void *get_addr(u_int vaddr) +void noinline *get_addr(u_int vaddr) { u_int page=get_page(vaddr); u_int vpage=get_vpage(vaddr); @@ -489,7 +490,7 @@ void clear_all_regs(signed char regmap[]) for (hr=0;hr %x (%d)\n",src,vaddr,page); - int *ptr=(int *)(src+4); - assert((*ptr&0x0fff0000)==0x059f0000); - (void)ptr; + check_extjump2(src); ll_add(jump_out+page,vaddr,src); //void *ptr=get_pointer(src); //inv_debug("add_link: Pointer is to %p\n",ptr); @@ -1905,7 +1935,7 @@ static void pagespan_alloc(struct regstat *current,int i) static void add_stub(enum stub_type type, void *addr, void *retaddr, u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e) { - assert(a < ARRAY_SIZE(stubs)); + assert(stubcount < ARRAY_SIZE(stubs)); stubs[stubcount].type = type; stubs[stubcount].addr = addr; stubs[stubcount].retaddr = retaddr; @@ -2380,24 +2410,29 @@ static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) } if(type==MTYPE_8020) { // RAM 80200000+ mirror + host_tempreg_acquire(); emit_andimm(addr,~0x00e00000,HOST_TEMPREG); addr=*addr_reg_override=HOST_TEMPREG; type=0; } else if(type==MTYPE_0000) { // RAM 0 mirror + host_tempreg_acquire(); emit_orimm(addr,0x80000000,HOST_TEMPREG); addr=*addr_reg_override=HOST_TEMPREG; type=0; } else if(type==MTYPE_A000) { // RAM A mirror + host_tempreg_acquire(); emit_andimm(addr,~0x20000000,HOST_TEMPREG); addr=*addr_reg_override=HOST_TEMPREG; type=0; } else if(type==MTYPE_1F80) { // scratchpad if (psxH == (void *)0x1f800000) { + host_tempreg_acquire(); emit_addimm(addr,-0x1f800000,HOST_TEMPREG); emit_cmpimm(HOST_TEMPREG,0x1000); + host_tempreg_release(); jaddr=out; emit_jc(0); } @@ -2419,6 +2454,7 @@ static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) #endif emit_jno(0); if(ram_offset!=0) { + host_tempreg_acquire(); emit_addimm(addr,ram_offset,HOST_TEMPREG); addr=*addr_reg_override=HOST_TEMPREG; } @@ -2461,7 +2497,7 @@ static void load_assemble(int i,struct regstat *i_regs) int offset; void *jaddr=0; int memtarget=0,c=0; - int fastload_reg_override=0; + int fastio_reg_override=-1; u_int hr,reglist=0; tl=get_reg(i_regs->regmap,rt1[i]); s=get_reg(i_regs->regmap,rs1[i]); @@ -2501,12 +2537,13 @@ static void load_assemble(int i,struct regstat *i_regs) if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) #endif { - jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override); + jaddr=emit_fastpath_cmp_jump(i,addr,&fastio_reg_override); } } else if(ram_offset&&memtarget) { + host_tempreg_acquire(); emit_addimm(addr,ram_offset,HOST_TEMPREG); - fastload_reg_override=HOST_TEMPREG; + fastio_reg_override=HOST_TEMPREG; } int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg if (opcode[i]==0x20) { // LB @@ -2515,7 +2552,7 @@ static void load_assemble(int i,struct regstat *i_regs) { int x=0,a=tl; if(!c) a=addr; - if(fastload_reg_override) a=fastload_reg_override; + if(fastio_reg_override>=0) a=fastio_reg_override; emit_movsbl_indexed(x,a,tl); } @@ -2531,7 +2568,7 @@ static void load_assemble(int i,struct regstat *i_regs) if(!dummy) { int x=0,a=tl; if(!c) a=addr; - if(fastload_reg_override) a=fastload_reg_override; + if(fastio_reg_override>=0) a=fastio_reg_override; emit_movswl_indexed(x,a,tl); } if(jaddr) @@ -2544,7 +2581,7 @@ static void load_assemble(int i,struct regstat *i_regs) if(!c||memtarget) { if(!dummy) { int a=addr; - if(fastload_reg_override) a=fastload_reg_override; + if(fastio_reg_override>=0) a=fastio_reg_override; emit_readword_indexed(0,a,tl); } if(jaddr) @@ -2558,7 +2595,7 @@ static void load_assemble(int i,struct regstat *i_regs) if(!dummy) { int x=0,a=tl; if(!c) a=addr; - if(fastload_reg_override) a=fastload_reg_override; + if(fastio_reg_override>=0) a=fastio_reg_override; emit_movzbl_indexed(x,a,tl); } @@ -2573,7 +2610,7 @@ static void load_assemble(int i,struct regstat *i_regs) if(!dummy) { int x=0,a=tl; if(!c) a=addr; - if(fastload_reg_override) a=fastload_reg_override; + if(fastio_reg_override>=0) a=fastio_reg_override; emit_movzwl_indexed(x,a,tl); } if(jaddr) @@ -2589,6 +2626,8 @@ static void load_assemble(int i,struct regstat *i_regs) assert(0); } } + if (fastio_reg_override == HOST_TEMPREG) + host_tempreg_release(); } #ifndef loadlr_assemble @@ -2608,7 +2647,7 @@ void store_assemble(int i,struct regstat *i_regs) enum stub_type type; int memtarget=0,c=0; int agr=AGEN1+(i&1); - int faststore_reg_override=0; + int fastio_reg_override=-1; u_int hr,reglist=0; tl=get_reg(i_regs->regmap,rs2[i]); s=get_reg(i_regs->regmap,rs1[i]); @@ -2630,18 +2669,19 @@ void store_assemble(int i,struct regstat *i_regs) if(offset||s<0||c) addr=temp; else addr=s; if(!c) { - jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override); + jaddr=emit_fastpath_cmp_jump(i,addr,&fastio_reg_override); } else if(ram_offset&&memtarget) { + host_tempreg_acquire(); emit_addimm(addr,ram_offset,HOST_TEMPREG); - faststore_reg_override=HOST_TEMPREG; + fastio_reg_override=HOST_TEMPREG; } if (opcode[i]==0x28) { // SB if(!c||memtarget) { int x=0,a=temp; if(!c) a=addr; - if(faststore_reg_override) a=faststore_reg_override; + if(fastio_reg_override>=0) a=fastio_reg_override; emit_writebyte_indexed(tl,x,a); } type=STOREB_STUB; @@ -2650,7 +2690,7 @@ void store_assemble(int i,struct regstat *i_regs) if(!c||memtarget) { int x=0,a=temp; if(!c) a=addr; - if(faststore_reg_override) a=faststore_reg_override; + if(fastio_reg_override>=0) a=fastio_reg_override; emit_writehword_indexed(tl,x,a); } type=STOREH_STUB; @@ -2658,7 +2698,7 @@ void store_assemble(int i,struct regstat *i_regs) if (opcode[i]==0x2B) { // SW if(!c||memtarget) { int a=addr; - if(faststore_reg_override) a=faststore_reg_override; + if(fastio_reg_override>=0) a=fastio_reg_override; emit_writeword_indexed(tl,0,a); } type=STOREW_STUB; @@ -2667,6 +2707,8 @@ void store_assemble(int i,struct regstat *i_regs) assert(0); type=STORED_STUB; } + if(fastio_reg_override==HOST_TEMPREG) + host_tempreg_release(); if(jaddr) { // PCSX store handlers don't check invcode again reglist|=1<regmap==regs[i].regmap); // not delay slot @@ -2712,7 +2755,9 @@ void store_assemble(int i,struct regstat *i_regs) wb_dirtys(regs[i].regmap_entry,regs[i].wasdirty); emit_movimm(start+i*4+4,0); emit_writeword(0,&pcaddr); - emit_jmp(do_interrupt); + emit_addimm(HOST_CCREG,2,HOST_CCREG); + emit_call(get_addr_ht); + emit_jmpreg(0); } } } @@ -2948,7 +2993,13 @@ static void cop0_assemble(int i,struct regstat *i_regs) assert(!is_delayslot); emit_readword(&pending_exception,14); emit_test(14,14); - emit_jne(&do_interrupt); + void *jaddr = out; + emit_jeq(0); + emit_readword(&pcaddr, 0); + emit_addimm(HOST_CCREG,2,HOST_CCREG); + emit_call(get_addr_ht); + emit_jmpreg(0); + set_jump_target(jaddr, out); } emit_loadreg(rs1[i],s); } @@ -3117,7 +3168,7 @@ static void c2ls_assemble(int i,struct regstat *i_regs) void *jaddr2=NULL; enum stub_type type; int agr=AGEN1+(i&1); - int fastio_reg_override=0; + int fastio_reg_override=-1; u_int hr,reglist=0; u_int copr=(source[i]>>16)&0x1f; s=get_reg(i_regs->regmap,rs1[i]); @@ -3161,12 +3212,13 @@ static void c2ls_assemble(int i,struct regstat *i_regs) jaddr2=emit_fastpath_cmp_jump(i,ar,&fastio_reg_override); } else if(ram_offset&&memtarget) { + host_tempreg_acquire(); emit_addimm(ar,ram_offset,HOST_TEMPREG); fastio_reg_override=HOST_TEMPREG; } if (opcode[i]==0x32) { // LWC2 int a=ar; - if(fastio_reg_override) a=fastio_reg_override; + if(fastio_reg_override>=0) a=fastio_reg_override; emit_readword_indexed(0,a,tl); } if (opcode[i]==0x3a) { // SWC2 @@ -3174,10 +3226,12 @@ static void c2ls_assemble(int i,struct regstat *i_regs) if(!offset&&!c&&s>=0) emit_mov(s,ar); #endif int a=ar; - if(fastio_reg_override) a=fastio_reg_override; + if(fastio_reg_override>=0) a=fastio_reg_override; emit_writeword_indexed(tl,0,a); } } + if(fastio_reg_override==HOST_TEMPREG) + host_tempreg_release(); if(jaddr2) add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj[i],reglist); if(opcode[i]==0x3a) // SWC2 @@ -3198,7 +3252,9 @@ static void c2ls_assemble(int i,struct regstat *i_regs) #endif } if (opcode[i]==0x32) { // LWC2 + host_tempreg_acquire(); cop2_put_dreg(copr,tl,HOST_TEMPREG); + host_tempreg_release(); } } @@ -4117,6 +4173,13 @@ static void emit_extjump_ds(void *addr, u_int target) emit_extjump2(addr, target, dyna_linker_ds); } +// Load 2 immediates optimizing for small code size +static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) +{ + emit_movimm(imm1,rt1); + emit_movimm_from(imm1,rt1,imm2,rt2); +} + void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) { int count; @@ -4172,7 +4235,7 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) static void do_ccstub(int n) { literal_pool(256); - assem_debug("do_ccstub %lx\n",start+stubs[n].b*4); + assem_debug("do_ccstub %x\n",start+(u_int)stubs[n].b*4); set_jump_target(stubs[n].addr, out); int i=stubs[n].b; if(stubs[n].d==NULLDS) { @@ -4371,7 +4434,10 @@ static void do_ccstub(int n) }else{ load_all_regs(branch_regs[i].regmap); } - emit_jmp(stubs[n].retaddr); + if (stubs[n].retaddr) + emit_jmp(stubs[n].retaddr); + else + do_jump_vaddr(stubs[n].e); } static void add_to_linker(void *addr, u_int target, int ext) @@ -4564,7 +4630,7 @@ static void rjump_assemble(int i,struct regstat *i_regs) //if(adj) emit_addimm(cc,2*(ccadj[i]+2-adj),cc); // ??? - Shouldn't happen //assert(adj==0); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); - add_stub(CC_STUB,out,jump_vaddr_reg[rs],0,i,-1,TAKEN,0); + add_stub(CC_STUB,out,NULL,0,i,-1,TAKEN,rs); if(itype[i+1]==COP0&&(source[i+1]&0x3f)==0x10) // special case for RFE emit_jmp(0); @@ -4578,7 +4644,7 @@ static void rjump_assemble(int i,struct regstat *i_regs) else #endif { - emit_jmp(jump_vaddr_reg[rs]); + do_jump_vaddr(rs); } #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(rt1[i]!=31&&i>16)==0x1000) literal_pool(1024); else -@@ -10256,7 +10277,7 @@ int new_recompile_block(int addr) +@@ -8767,7 +8786,7 @@ int new_recompile_block(int addr) } } // External Branch Targets (jump_in) @@ -256,7 +267,7 @@ index 6d7069d..586a6db 100644 for(i=0;i> 4) & 3; emit_readword(&rcnts[t].mode, rt); + host_tempreg_acquire(); emit_andimm(rt, ~0x1800, HOST_TEMPREG); emit_writeword(HOST_TEMPREG, &rcnts[t].mode); + host_tempreg_release(); mov_loadtype_adj(type, rt, rt); goto hit; } From afec9d44d1170fd6391528f4985211ffb00e8bea Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 10 Nov 2021 23:44:25 +0200 Subject: [PATCH 060/597] drc: adjust bogus looking check not really sure what's going on, but at the start of "Pass 3 - Register allocation" zero reg allocations are removed, so "regmap_pre[i+1][hr] != regs[i].regmap[hr]" assert will not hold. --- libpcsxcore/new_dynarec/new_dynarec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 081532981..f2dbb86a1 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -166,7 +166,7 @@ struct link_entry static char ooo[MAXBLOCK]; static uint64_t unneeded_reg[MAXBLOCK]; static uint64_t branch_unneeded_reg[MAXBLOCK]; - static signed char regmap_pre[MAXBLOCK][HOST_REGS]; + static signed char regmap_pre[MAXBLOCK][HOST_REGS]; // pre-instruction i? static uint64_t current_constmap[HOST_REGS]; static uint64_t constmap[MAXBLOCK][HOST_REGS]; static struct regstat regs[MAXBLOCK]; @@ -7891,7 +7891,7 @@ int new_recompile_block(int addr) { if(i0) if(regmap_pre[i+1][hr]!=regs[i].regmap[hr]) { SysPrintf("fail: %x (%d %d!=%d)\n",start+i*4,hr,regmap_pre[i+1][hr],regs[i].regmap[hr]); From 3968e69e7fa8f9cb0d44ac79477d5929b9649271 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 11 Nov 2021 02:32:02 +0200 Subject: [PATCH 061/597] drc: something works on arm64 --- frontend/menu.c | 2 + libpcsxcore/gte.c | 7 +- libpcsxcore/new_dynarec/assem_arm.c | 357 ++-------- libpcsxcore/new_dynarec/assem_arm64.c | 623 +++++++++++++----- libpcsxcore/new_dynarec/emu_if.c | 4 +- libpcsxcore/new_dynarec/linkage_arm.S | 38 +- libpcsxcore/new_dynarec/linkage_arm64.S | 111 ++-- libpcsxcore/new_dynarec/new_dynarec.c | 370 ++++++++--- libpcsxcore/new_dynarec/patches/trace_drc_chk | 81 +-- libpcsxcore/psxinterpreter.c | 3 +- libpcsxcore/psxinterpreter.h | 4 + 11 files changed, 885 insertions(+), 715 deletions(-) create mode 100644 libpcsxcore/psxinterpreter.h diff --git a/frontend/menu.c b/frontend/menu.c index 37956ffe8..0dbaa400f 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1590,8 +1590,10 @@ static const char h_cfg_nodrc[] = "Disable dynamic recompiler and use interpret "Might be useful to overcome some dynarec bugs"; static const char h_cfg_shacks[] = "Breaks games but may give better performance\n" "must reload game for any change to take effect"; +#ifdef ICACHE_EMULATION static const char h_cfg_icache[] = "Allows you to play the F1 games.\n" "Note: This breaks the PAL version of Spyro 2."; +#endif static menu_entry e_menu_adv_options[] = { diff --git a/libpcsxcore/gte.c b/libpcsxcore/gte.c index 97a4ccd0d..e05f33d25 100644 --- a/libpcsxcore/gte.c +++ b/libpcsxcore/gte.c @@ -322,9 +322,10 @@ static inline void MTC2(u32 value, int reg) { case 28: gteIRGB = value; - gteIR1 = (value & 0x1f) << 7; - gteIR2 = (value & 0x3e0) << 2; - gteIR3 = (value & 0x7c00) >> 3; + // not gteIR1 etc. just to be consistent with dynarec + regs->CP2D.n.ir1 = (value & 0x1f) << 7; + regs->CP2D.n.ir2 = (value & 0x3e0) << 2; + regs->CP2D.n.ir3 = (value & 0x7c00) >> 3; break; case 30: diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index ed00103fd..c61145643 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -237,7 +237,7 @@ static void *get_clean_addr(void *addr) return ptr; } -static int verify_dirty(u_int *ptr) +static int verify_dirty(const u_int *ptr) { #ifndef HAVE_ARMV7 u_int offset; @@ -601,12 +601,6 @@ static void emit_not(int rs,int rt) output_w32(0xe1e00000|rd_rn_rm(rt,0,rs)); } -static void emit_mvnmi(int rs,int rt) -{ - assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]); - output_w32(0x41e00000|rd_rn_rm(rt,0,rs)); -} - static void emit_and(u_int rs1,u_int rs2,u_int rt) { assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); @@ -643,6 +637,12 @@ static void emit_xor(u_int rs1,u_int rs2,u_int rt) output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2)); } +static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt) +{ + assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm); + output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7)); +} + static void emit_addimm(u_int rs,int imm,u_int rt) { assert(rs<16); @@ -888,7 +888,7 @@ static void emit_sar(u_int rs,u_int shift,u_int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8)); } -static void emit_orrshl(u_int rs,u_int shift,u_int rt) +static unused void emit_orrshl(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -897,7 +897,7 @@ static void emit_orrshl(u_int rs,u_int shift,u_int rt) output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8)); } -static void emit_orrshr(u_int rs,u_int shift,u_int rt) +static unused void emit_orrshr(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -952,6 +952,14 @@ static void emit_cmovb_imm(int imm,int rt) output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval); } +static void emit_cmovae_imm(int imm,int rt) +{ + assem_debug("movcs %s,#%d\n",regname[rt],imm); + u_int armval; + genimm_checked(imm,&armval); + output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval); +} + static void emit_cmovne_reg(int rs,int rt) { assem_debug("movne %s,%s\n",regname[rt],regname[rs]); @@ -1114,7 +1122,7 @@ static void emit_jcc(const void *a_) output_w32(0x3a000000|offset); } -static void emit_callreg(u_int r) +static unused void emit_callreg(u_int r) { assert(r<15); assem_debug("blx %s\n",regname[r]); @@ -1379,7 +1387,7 @@ static void emit_teq(int rs, int rt) output_w32(0xe1300000|rd_rn_rm(0,rs,rt)); } -static void emit_rsbimm(int rs, int imm, int rt) +static unused void emit_rsbimm(int rs, int imm, int rt) { u_int armval; genimm_checked(imm,&armval); @@ -1940,94 +1948,6 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, signed char restore_regs(reglist); } -static void do_unalignedwritestub(int n) -{ - assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4); - literal_pool(256); - set_jump_target(stubs[n].addr, out); - - int i=stubs[n].a; - struct regstat *i_regs=(struct regstat *)stubs[n].c; - int addr=stubs[n].b; - u_int reglist=stubs[n].e; - signed char *i_regmap=i_regs->regmap; - int temp2=get_reg(i_regmap,FTEMP); - int rt; - rt=get_reg(i_regmap,rs2[i]); - assert(rt>=0); - assert(addr>=0); - assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented - reglist|=(1<regmap,rt1[i]); - s=get_reg(i_regs->regmap,rs1[i]); - shift=get_reg(i_regs->regmap,rs2[i]); - if(t>=0){ - if(rs1[i]==0) - { - emit_zeroreg(t); - } - else if(rs2[i]==0) - { - assert(s>=0); - if(s!=t) emit_mov(s,t); - } - else - { - emit_andimm(shift,31,HOST_TEMPREG); - if(opcode2[i]==4) // SLLV - { - emit_shl(s,HOST_TEMPREG,t); - } - if(opcode2[i]==6) // SRLV - { - emit_shr(s,HOST_TEMPREG,t); - } - if(opcode2[i]==7) // SRAV - { - emit_sar(s,HOST_TEMPREG,t); - } - } - } - } else { // DSLLV/DSRLV/DSRAV - signed char sh,sl,th,tl,shift; - th=get_reg(i_regs->regmap,rt1[i]|64); - tl=get_reg(i_regs->regmap,rt1[i]); - sh=get_reg(i_regs->regmap,rs1[i]|64); - sl=get_reg(i_regs->regmap,rs1[i]); - shift=get_reg(i_regs->regmap,rs2[i]); - if(tl>=0){ - if(rs1[i]==0) - { - emit_zeroreg(tl); - if(th>=0) emit_zeroreg(th); - } - else if(rs2[i]==0) - { - assert(sl>=0); - if(sl!=tl) emit_mov(sl,tl); - if(th>=0&&sh!=th) emit_mov(sh,th); - } - else - { - // FIXME: What if shift==tl ? - assert(shift!=tl); - int temp=get_reg(i_regs->regmap,-1); - int real_th=th; - if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register - assert(sl>=0); - assert(sh>=0); - emit_andimm(shift,31,HOST_TEMPREG); - if(opcode2[i]==0x14) // DSLLV - { - if(th>=0) emit_shl(sh,HOST_TEMPREG,th); - emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); - emit_orrshr(sl,HOST_TEMPREG,th); - emit_andimm(shift,31,HOST_TEMPREG); - emit_testimm(shift,32); - emit_shl(sl,HOST_TEMPREG,tl); - if(th>=0) emit_cmovne_reg(tl,th); - emit_cmovne_imm(0,tl); - } - if(opcode2[i]==0x16) // DSRLV - { - assert(th>=0); - emit_shr(sl,HOST_TEMPREG,tl); - emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); - emit_orrshl(sh,HOST_TEMPREG,tl); - emit_andimm(shift,31,HOST_TEMPREG); - emit_testimm(shift,32); - emit_shr(sh,HOST_TEMPREG,th); - emit_cmovne_reg(th,tl); - if(real_th>=0) emit_cmovne_imm(0,th); - } - if(opcode2[i]==0x17) // DSRAV - { - assert(th>=0); - emit_shr(sl,HOST_TEMPREG,tl); - emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); - if(real_th>=0) { - assert(temp>=0); - emit_sarimm(th,31,temp); - } - emit_orrshl(sh,HOST_TEMPREG,tl); - emit_andimm(shift,31,HOST_TEMPREG); - emit_testimm(shift,32); - emit_sar(sh,HOST_TEMPREG,th); - emit_cmovne_reg(th,tl); - if(real_th>=0) emit_cmovne_reg(temp,th); - } - } - } - } - } -} -#define shift_assemble shift_assemble_arm - -static void loadlr_assemble_arm(int i,struct regstat *i_regs) -{ - int s,tl,temp,temp2,addr; - int offset; - void *jaddr=0; - int memtarget=0,c=0; - int fastio_reg_override=-1; - u_int hr,reglist=0; - tl=get_reg(i_regs->regmap,rt1[i]); - s=get_reg(i_regs->regmap,rs1[i]); - temp=get_reg(i_regs->regmap,-1); - temp2=get_reg(i_regs->regmap,FTEMP); - addr=get_reg(i_regs->regmap,AGEN1+(i&1)); - assert(addr<0); - offset=imm[i]; - for(hr=0;hrregmap[hr]>=0) reglist|=1<=0) { - c=(i_regs->wasconst>>s)&1; - if(c) { - memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - } - } - if(!c) { - emit_shlimm(addr,3,temp); - if (opcode[i]==0x22||opcode[i]==0x26) { - emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR - }else{ - emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR - } - jaddr=emit_fastpath_cmp_jump(i,temp2,&fastio_reg_override); - } - else { - if(ram_offset&&memtarget) { - host_tempreg_acquire(); - emit_addimm(temp2,ram_offset,HOST_TEMPREG); - fastio_reg_override=HOST_TEMPREG; - } - if (opcode[i]==0x22||opcode[i]==0x26) { - emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR - }else{ - emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR - } - } - if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR - if(!c||memtarget) { - int a=temp2; - if(fastio_reg_override>=0) a=fastio_reg_override; - emit_readword_indexed(0,a,temp2); - if(fastio_reg_override==HOST_TEMPREG) host_tempreg_release(); - if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist); - } - else - inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist); - if(rt1[i]) { - assert(tl>=0); - emit_andimm(temp,24,temp); -#ifdef BIG_ENDIAN_MIPS - if (opcode[i]==0x26) // LWR -#else - if (opcode[i]==0x22) // LWL -#endif - emit_xorimm(temp,24,temp); - emit_movimm(-1,HOST_TEMPREG); - if (opcode[i]==0x26) { - emit_shr(temp2,temp,temp2); - emit_bic_lsr(tl,HOST_TEMPREG,temp,tl); - }else{ - emit_shl(temp2,temp,temp2); - emit_bic_lsl(tl,HOST_TEMPREG,temp,tl); - } - emit_or(temp2,tl,tl); - } - //emit_storereg(rt1[i],tl); // DEBUG - } - if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR - assert(0); - } -} -#define loadlr_assemble loadlr_assemble_arm - static void c2op_prologue(u_int op,u_int reglist) { save_regs_all(reglist); #ifdef PCNT emit_movimm(op,0); - emit_call((int)pcnt_gte_start); + emit_call(pcnt_gte_start); #endif emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs } @@ -2276,7 +2000,7 @@ static void c2op_epilogue(u_int op,u_int reglist) { #ifdef PCNT emit_movimm(op,0); - emit_call((int)pcnt_gte_end); + emit_call(pcnt_gte_end); #endif restore_regs_all(reglist); } @@ -2419,6 +2143,45 @@ static void c2op_assemble(int i,struct regstat *i_regs) } } +static void c2op_ctc2_31_assemble(signed char sl, signed char temp) +{ + //value = value & 0x7ffff000; + //if (value & 0x7f87e000) value |= 0x80000000; + emit_shrimm(sl,12,temp); + emit_shlimm(temp,12,temp); + emit_testimm(temp,0x7f000000); + emit_testeqimm(temp,0x00870000); + emit_testeqimm(temp,0x0000e000); + emit_orrne_imm(temp,0x80000000,temp); +} + +static void do_mfc2_31_one(u_int copr,signed char temp) +{ + emit_readword(®_cop2d[copr],temp); + emit_testimm(temp,0x8000); // do we need this? + emit_andne_imm(temp,0,temp); + emit_cmpimm(temp,0xf80); + emit_andimm(temp,0xf80,temp); + emit_cmovae_imm(0xf80,temp); +} + +static void c2op_mfc2_29_assemble(signed char tl, signed char temp) +{ + if (temp < 0) { + host_tempreg_acquire(); + temp = HOST_TEMPREG; + } + do_mfc2_31_one(9,temp); + emit_shrimm(temp,7,tl); + do_mfc2_31_one(10,temp); + emit_orrshr_imm(temp,2,tl); + do_mfc2_31_one(11,temp); + emit_orrshl_imm(temp,3,tl); + emit_writeword(tl,®_cop2d[29]); + if (temp == HOST_TEMPREG) + host_tempreg_release(); +} + static void multdiv_assemble_arm(int i,struct regstat *i_regs) { // case 0x18: MULT diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index a0c628b58..27f9141d3 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -20,6 +20,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#include "pcnt.h" #include "arm_features.h" #if defined(BASE_ADDR_FIXED) @@ -43,25 +44,26 @@ static void set_jump_target(void *addr, void *target) u_int *ptr = addr; intptr_t offset = (u_char *)target - (u_char *)addr; - if((*ptr&0xFC000000)==0x14000000) { + if ((*ptr&0xFC000000) == 0x14000000) { // b assert(offset>=-134217728LL&&offset<134217728LL); *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff); } - else if((*ptr&0xff000000)==0x54000000) { + else if ((*ptr&0xff000000) == 0x54000000 // b.cond + || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz // Conditional branch are limited to +/- 1MB // block max size is 256k so branching beyond the +/- 1MB limit // should only happen when jumping to an already compiled block (see add_link) // a workaround would be to do a trampoline jump via a stub at the end of the block - assert(offset>=-1048576LL&&offset<1048576LL); + assert(-1048576 <= offset && offset < 1048576); *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5); } - else if((*ptr&0x9f000000)==0x10000000) { //adr + else if((*ptr&0x9f000000)==0x10000000) { // adr // generated by do_miniht_insert assert(offset>=-1048576LL&&offset<1048576LL); *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5; } else - assert(0); // should not happen + abort(); // should not happen } // from a pointer to external jump stub (which was produced by emit_extjump2) @@ -75,62 +77,21 @@ static void *find_extjump_insn(void *stub) } // find where external branch is liked to using addr of it's stub: -// get address that insn one after stub loads (dyna_linker arg1), +// get address that the stub loads (dyna_linker arg1), // treat it as a pointer to branch insn, // return addr where that branch jumps to static void *get_pointer(void *stub) { int *i_ptr = find_extjump_insn(stub); - assert((*i_ptr&0xfc000000) == 0x14000000); // b - return (u_char *)i_ptr+(((signed int)(*i_ptr<<6)>>6)<<2); -} - -// Find the "clean" entry point from a "dirty" entry point -// by skipping past the call to verify_code -static void *get_clean_addr(void *addr) -{ + if ((*i_ptr&0xfc000000) == 0x14000000) // b + return i_ptr + ((signed int)(*i_ptr<<6)>>6); + if ((*i_ptr&0xff000000) == 0x54000000 // b.cond + || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz + return i_ptr + ((signed int)(*i_ptr<<8)>>13); assert(0); return NULL; } -static int verify_dirty(u_int *ptr) -{ - assert(0); - return 0; -} - -static int isclean(void *addr) -{ - u_int *ptr = addr; - return (*ptr >> 24) != 0x58; // the only place ldr (literal) is used -} - -static uint64_t get_from_ldr_literal(const u_int *i) -{ - signed int ofs; - assert((i[0] & 0xff000000) == 0x58000000); - ofs = i[0] << 8; - ofs >>= 5+8; - return *(uint64_t *)(i + ofs); -} - -static uint64_t get_from_movz(const u_int *i) -{ - assert((i[0] & 0x7fe00000) == 0x52800000); - return (i[0] >> 5) & 0xffff; -} - -// get source that block at addr was compiled from (host pointers) -static void get_bounds(void *addr, u_char **start, u_char **end) -{ - const u_int *ptr = addr; - assert((ptr[0] & 0xff00001f) == 0x58000001); // ldr x1, source - assert((ptr[1] & 0xff00001f) == 0x58000002); // ldr x2, copy - assert((ptr[2] & 0xffe0001f) == 0x52800003); // movz w3, #slen*4 - *start = (u_char *)get_from_ldr_literal(&ptr[0]); - *end = *start + get_from_movz(&ptr[2]); -} - // Allocate a specific ARM register. static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr) { @@ -208,6 +169,13 @@ static u_int rm_rd(u_int rm, u_int rd) } */ +static u_int rn_rd(u_int rn, u_int rd) +{ + assert(rn < 31); + assert(rd < 31); + return (rn << 5) | rd; +} + static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd) { assert(rm < 32); @@ -216,6 +184,12 @@ static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd) return (rm << 16) | (rn << 5) | rd; } +static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd) +{ + assert(ra < 32); + return rm_rn_rd(rm, rn, rd) | (ra << 10); +} + static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt) { assert(imm7 < 0x80); @@ -304,7 +278,7 @@ static uint32_t is_mask(u_int value) // non-empty sequence of ones (possibly rotated) with the remainder zero. static uint32_t is_rotated_mask(u_int value) { - if (value == 0) + if (value == 0 || value == ~0) return 0; if (is_mask((value - 1) | value)) return 1; @@ -328,11 +302,11 @@ static void gen_logical_imm(u_int value, u_int *immr, u_int *imms) lzeros = __builtin_clz(value); tzeros = __builtin_ctz(value); ones = 32 - lzeros - tzeros; - *immr = 31 - tzeros; + *immr = lzeros; *imms = 31 - ones; return; } - assert(0); + abort(); } static void emit_mov(u_int rs, u_int rt) @@ -347,13 +321,6 @@ static void emit_mov64(u_int rs, u_int rt) output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt)); } -static void emit_movs(u_int rs, u_int rt) -{ - assert(0); // misleading - assem_debug("movs %s,%s\n", regname[rt], regname[rs]); - output_w32(0x31000000 | imm12_rn_rd(0, rs, rt)); -} - static void emit_add(u_int rs1, u_int rs2, u_int rt) { assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]); @@ -366,16 +333,9 @@ static void emit_add64(u_int rs1, u_int rs2, u_int rt) output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt)); } -#pragma GCC diagnostic ignored "-Wunused-function" -static void emit_adds(u_int rs1, u_int rs2, u_int rt) -{ - assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt)); -} - static void emit_adds64(u_int rs1, u_int rs2, u_int rt) { - assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]); output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt)); } @@ -391,6 +351,12 @@ static void emit_sub(u_int rs1, u_int rs2, u_int rt) output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt)); } +static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) +{ + assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift); + output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt)); +} + static void emit_movz(u_int imm, u_int rt) { assem_debug("movz %s,#%#x\n", regname[rt], imm); @@ -424,7 +390,7 @@ static void emit_movk(u_int imm,u_int rt) static void emit_movk_lsl16(u_int imm,u_int rt) { assert(imm<65536); - assem_debug("movk %s, #%#x, lsl #16\n", regname[rt], imm); + assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm); output_w32(0x72a00000 | imm16_rd(imm, rt)); } @@ -463,7 +429,7 @@ static void emit_readword(void *addr, u_int rt) output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt)); } else - assert(0); + abort(); } static void emit_readdword(void *addr, u_int rt) @@ -473,6 +439,17 @@ static void emit_readdword(void *addr, u_int rt) assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset); output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt)); } + else + abort(); +} + +static void emit_readshword(void *addr, u_int rt) +{ + uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; + if (!(offset & 1) && offset <= 8190) { + assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset); + output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt)); + } else assert(0); } @@ -516,10 +493,10 @@ static void emit_writedword(u_int rt, void *addr) uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; if (!(offset & 7) && offset <= 32760) { assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset); - output_w32(0xf9000000 | imm12_rn_rd(offset >> 2, FP, rt)); + output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt)); } else - assert(0); + abort(); } static void emit_storereg(u_int r, u_int hr) @@ -547,13 +524,7 @@ static void emit_testimm(u_int rs, u_int imm) assem_debug("tst %s,#%#x\n", regname[rs], imm); assert(is_rotated_mask(imm)); // good enough for PCSX gen_logical_imm(imm, &immr, &imms); - output_w32(0xb9000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR)); -} - -static void emit_testeqimm(u_int rs,int imm) -{ - assem_debug("tsteq %s,$%d\n",regname[rs],imm); - assert(0); // TODO eliminate emit_testeqimm + output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR)); } static void emit_not(u_int rs,u_int rt) @@ -562,12 +533,6 @@ static void emit_not(u_int rs,u_int rt) output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt)); } -static void emit_mvnmi(u_int rs,u_int rt) -{ - assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]); - assert(0); // eliminate -} - static void emit_and(u_int rs1,u_int rs2,u_int rt) { assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); @@ -580,6 +545,12 @@ static void emit_or(u_int rs1,u_int rs2,u_int rt) output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt)); } +static void emit_bic(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt)); +} + static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt) { assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm); @@ -592,12 +563,24 @@ static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt)); } +static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt) +{ + assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm); + output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt)); +} + static void emit_xor(u_int rs1,u_int rs2,u_int rt) { assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt)); } +static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt) +{ + assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm); + output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt)); +} + static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt) { unused const char *st = s ? "s" : ""; @@ -608,7 +591,7 @@ static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt)); } else if (-imm < 4096) { - assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm); + assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm); output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt)); } else if (imm < 16777216) { @@ -616,7 +599,7 @@ static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt)); if ((imm & 0xfff) || s) { assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff); - output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rt, rt)); + output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt)); } } else if (-imm < 16777216) { @@ -628,7 +611,7 @@ static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt } } else - assert(0); + abort(); } static void emit_addimm(u_int rs, uintptr_t imm, u_int rt) @@ -720,15 +703,16 @@ static void emit_shlimm(u_int rs,u_int imm,u_int rt) output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt)); } -static unused void emit_lslpls_imm(u_int rs,int imm,u_int rt) +static void emit_shrimm(u_int rs,u_int imm,u_int rt) { - assert(0); // eliminate + assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt)); } -static void emit_shrimm(u_int rs,u_int imm,u_int rt) +static void emit_shrimm64(u_int rs,u_int imm,u_int rt) { assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt)); + output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt)); } static void emit_sarimm(u_int rs,u_int imm,u_int rt) @@ -739,7 +723,7 @@ static void emit_sarimm(u_int rs,u_int imm,u_int rt) static void emit_rorimm(u_int rs,u_int imm,u_int rt) { - assem_debug("ror %s,%s,#%d",regname[rt],regname[rs],imm); + assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm); output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt)); } @@ -751,7 +735,7 @@ static void emit_signextend16(u_int rs, u_int rt) static void emit_shl(u_int rs,u_int rshift,u_int rt) { - assem_debug("lsl %s,%s,%s",regname[rt],regname[rs],regname[rshift]); + assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]); output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt)); } @@ -778,7 +762,7 @@ static void emit_cmpimm(u_int rs, u_int imm) output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR)); } else if (imm < 16777216 && !(imm & 0xfff)) { - assem_debug("cmp %s,#%#x,lsl #12\n", regname[rs], imm >> 12); + assem_debug("cmp %s,#%#x\n", regname[rs], imm); output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR)); } else { @@ -819,9 +803,10 @@ static void emit_cmovb_imm(int imm,u_int rt) emit_cmov_imm(COND_CC, COND_CS, imm, rt); } -static void emit_cmovs_imm(int imm,u_int rt) +static void emit_cmoveq_reg(u_int rs,u_int rt) { - emit_cmov_imm(COND_MI, COND_PL, imm, rt); + assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]); + output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt)); } static void emit_cmovne_reg(u_int rs,u_int rt) @@ -842,6 +827,12 @@ static void emit_cmovs_reg(u_int rs,u_int rt) output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt)); } +static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt)); +} + static void emit_slti32(u_int rs,int imm,u_int rt) { if(rs!=rt) emit_zeroreg(rt); @@ -906,7 +897,7 @@ static void emit_call(const void *a) if (-134217728 <= diff && diff <= 134217727) output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff)); else - assert(0); + abort(); } static void emit_jmp(const void *a) @@ -972,16 +963,23 @@ static void emit_jc(const void *a) output_w32(0x54000000 | (offset << 5) | COND_CS); } -static void emit_jcc(const void *a) +static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r) { - assem_debug("bcc %p\n", a); + assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a); u_int offset = genjmpcc(a); - output_w32(0x54000000 | (offset << 5) | COND_CC); + is64 = is64 ? 0x80000000 : 0; + isnz = isnz ? 0x01000000 : 0; + output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r)); +} + +static void emit_cbz(const void *a, u_int r) +{ + emit_cb(0, 0, a, r); } static void emit_jmpreg(u_int r) { - assem_debug("br %s", regname64[r]); + assem_debug("br %s\n", regname64[r]); output_w32(0xd61f0000 | rm_rn_rd(0, r, 0)); } @@ -1000,10 +998,21 @@ static void emit_adr(void *addr, u_int rt) { intptr_t offset = (u_char *)addr - out; assert(-1048576 <= offset && offset < 1048576); + assert(rt < 31); assem_debug("adr x%d,#%#lx\n", rt, offset); output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt); } +static void emit_adrp(void *addr, u_int rt) +{ + intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl); + assert(-4294967296l <= offset && offset < 4294967296l); + assert(rt < 31); + offset >>= 12; + assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset); + output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt); +} + static void emit_readword_indexed(int offset, u_int rs, u_int rt) { assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset); @@ -1095,55 +1104,80 @@ static void emit_movzwl_indexed(int offset, u_int rs, u_int rt) static void emit_writeword_indexed(u_int rt, int offset, u_int rs) { - assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset); - if (!(offset & 3) && offset <= 16380) + if (!(offset & 3) && (u_int)offset <= 16380) { + assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset); output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt)); + } + else if (-256 <= offset && offset < 256) { + assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset); + output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt)); + } else assert(0); } static void emit_writehword_indexed(u_int rt, int offset, u_int rs) { - assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname[rs], offset); - if (!(offset & 1) && offset <= 8190) + if (!(offset & 1) && (u_int)offset <= 8190) { + assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset); output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt)); + } + else if (-256 <= offset && offset < 256) { + assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset); + output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt)); + } else assert(0); } static void emit_writebyte_indexed(u_int rt, int offset, u_int rs) { - assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname[rs], offset); - if ((u_int)offset < 4096) + if ((u_int)offset < 4096) { + assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset); output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt)); + } + else if (-256 <= offset && offset < 256) { + assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset); + output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt)); + } else assert(0); } -static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) +static void emit_umull(u_int rs1, u_int rs2, u_int rt) { - assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); - assert(rs1<16); - assert(rs2<16); - assert(hi<16); - assert(lo<16); - assert(0); + assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]); + output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt)); } -static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo) +static void emit_smull(u_int rs1, u_int rs2, u_int rt) { - assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); - assert(rs1<16); - assert(rs2<16); - assert(hi<16); - assert(lo<16); - assert(0); + assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]); + output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt)); +} + +static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt) +{ + assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]); + output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt)); } -static void emit_clz(u_int rs,u_int rt) +static void emit_sdiv(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt)); +} + +static void emit_udiv(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt)); +} + +static void emit_clz(u_int rs, u_int rt) { assem_debug("clz %s,%s\n",regname[rt],regname[rs]); - assert(0); + output_w32(0x5ac01000 | rn_rd(rs, rt)); } // special case for checking invalid_code @@ -1151,28 +1185,23 @@ static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm) { host_tempreg_acquire(); emit_shrimm(r, 12, HOST_TEMPREG); - assem_debug("ldrb %s,[%s,%s]",regname[HOST_TEMPREG],regname64[rbase],regname64[HOST_TEMPREG]); - output_w32(0x38606800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG)); + assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]); + output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG)); emit_cmpimm(HOST_TEMPREG, imm); host_tempreg_release(); } -static void emit_orrne_imm(u_int rs,int imm,u_int rt) +// special for loadlr_assemble, rs2 is destroyed +static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) { - assem_debug("orrne %s,%s,#%#x\n",regname[rt],regname[rs],imm); - assert(0); + emit_shl(rs2, shift, rs2); + emit_bic(rs1, rs2, rt); } -static void emit_andne_imm(u_int rs,int imm,u_int rt) +static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) { - assem_debug("andne %s,%s,#%#x\n",regname[rt],regname[rs],imm); - assert(0); -} - -static unused void emit_addpl_imm(u_int rs,int imm,u_int rt) -{ - assem_debug("addpl %s,%s,#%#x\n",regname[rt],regname[rs],imm); - assert(0); + emit_shr(rs2, shift, rs2); + emit_bic(rs1, rs2, rt); } static void emit_loadlp_ofs(u_int ofs, u_int rt) @@ -1280,9 +1309,11 @@ static void check_extjump2(void *src) static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt) { int diff = rt_val - rs_val; - if ((-4096 <= diff && diff < 4096) - || (-16777216 <= diff && diff < 16777216 && !(diff & 0xfff))) + if ((-4096 < diff && diff < 4096) + || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))) emit_addimm(rs, diff, rt); + else if (rt_val == ~rs_val) + emit_not(rs, rt); else if (is_rotated_mask(rs_val ^ rt_val)) emit_xorimm(rs, rs_val ^ rt_val, rt); else @@ -1293,8 +1324,9 @@ static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt) static int is_similar_value(u_int v1, u_int v2) { int diff = v1 - v2; - return (-4096 <= diff && diff < 4096) - || (-16777216 <= diff && diff < 16777216 && !(diff & 0xfff)) + return (-4096 < diff && diff < 4096) + || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)) + || v1 == ~v2 || is_rotated_mask(v1 ^ v2); } @@ -1326,7 +1358,7 @@ static void loadstore_extend(enum stub_type type, u_int rs, u_int rt) case STOREH_STUB: emit_ubfm(rs, 15, rt); break; case LOADW_STUB: case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break; - default: assert(0); + default: assert(0); } } @@ -1381,7 +1413,7 @@ static void do_readstub(int n) case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break; case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break; case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break; - default: assert(0); + default: assert(0); } } if(regs_saved) { @@ -1470,8 +1502,11 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, signed char if(cc<0) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); - if(is_dynamic) - emit_readdword(&mem_rtab,1); + if(is_dynamic) { + uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1; + emit_adrp((void *)l1, 1); + emit_addimm64(1, l1 & 0xfff, 1); + } else emit_call(do_memhandler_pre); @@ -1551,7 +1586,7 @@ static void do_writestub(int n) case STOREB_STUB: handler=jump_handler_write8; break; case STOREH_STUB: handler=jump_handler_write16; break; case STOREW_STUB: handler=jump_handler_write32; break; - default: assert(0); + default: assert(0); } assert(handler); pass_args(rs,rt); @@ -1616,10 +1651,36 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, signed char restore_regs(reglist); } -static void do_unalignedwritestub(int n) +static int verify_code_arm64(const void *source, const void *copy, u_int size) { - assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4); - assert(0); + int ret = memcmp(source, copy, size); + //printf("%s %p,%#x = %d\n", __func__, source, size, ret); + return ret; +} + +// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr +static void do_dirty_stub_base(u_int vaddr) +{ + assert(slen <= MAXBLOCK); + emit_loadlp_ofs(0, 0); // ldr x1, source + emit_loadlp_ofs(0, 1); // ldr x2, copy + emit_movz(slen*4, 2); + emit_call(verify_code_arm64); + void *jmp = out; + emit_cbz(0, 0); + emit_movz(vaddr & 0xffff, 0); + emit_movk_lsl16(vaddr >> 16, 0); + emit_call(get_addr); + emit_jmpreg(0); + set_jump_target(jmp, out); +} + +static void assert_dirty_stub(const u_int *ptr) +{ + assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source + assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy + assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #slen*4 + assert( ptr[8] == 0xd61f0000); // br x0 } static void set_loadlp(u_int *loadl, void *lit) @@ -1631,17 +1692,6 @@ static void set_loadlp(u_int *loadl, void *lit) *loadl |= (ofs >> 2) << 5; } -// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr -static void do_dirty_stub_emit_args(u_int arg0) -{ - assert(slen <= MAXBLOCK); - emit_loadlp_ofs(0, 1); // ldr x1, source - emit_loadlp_ofs(0, 2); // ldr x2, copy - emit_movz(slen*4, 3); - emit_movz(arg0 & 0xffff, 0); - emit_movk_lsl16(arg0 >> 16, 0); -} - static void do_dirty_stub_emit_literals(u_int *loadlps) { set_loadlp(&loadlps[0], out); @@ -1654,8 +1704,7 @@ static void *do_dirty_stub(int i) { assem_debug("do_dirty_stub %x\n",start+i*4); u_int *loadlps = (void *)out; - do_dirty_stub_emit_args(start + i*4); - emit_call(verify_code); + do_dirty_stub_base(start + i*4); void *entry = out; load_regs_entry(i); if (entry == out) @@ -1665,38 +1714,258 @@ static void *do_dirty_stub(int i) return entry; } -static void do_dirty_stub_ds() +static void do_dirty_stub_ds(void) { - do_dirty_stub_emit_args(start + 1); u_int *loadlps = (void *)out; - emit_call(verify_code_ds); + do_dirty_stub_base(start + 1); + void *lit_jumpover = out; emit_jmp(out + 8*2); do_dirty_stub_emit_literals(loadlps); + set_jump_target(lit_jumpover, out); } -/* Special assem */ +static uint64_t get_from_ldr_literal(const u_int *i) +{ + signed int ofs; + assert((i[0] & 0xff000000) == 0x58000000); + ofs = i[0] << 8; + ofs >>= 5+8; + return *(uint64_t *)(i + ofs); +} -#define shift_assemble shift_assemble_arm64 +static uint64_t get_from_movz(const u_int *i) +{ + assert((i[0] & 0x7fe00000) == 0x52800000); + return (i[0] >> 5) & 0xffff; +} -static void shift_assemble_arm64(int i,struct regstat *i_regs) +// Find the "clean" entry point from a "dirty" entry point +// by skipping past the call to verify_code +static void *get_clean_addr(u_int *addr) { - assert(0); + assert_dirty_stub(addr); + return addr + 9; } -#define loadlr_assemble loadlr_assemble_arm64 -static void loadlr_assemble_arm64(int i,struct regstat *i_regs) +static int verify_dirty(const u_int *ptr) { - assert(0); + const void *source, *copy; + u_int len; + assert_dirty_stub(ptr); + source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source + copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy + len = get_from_movz(&ptr[2]); // movz w3, #slen*4 + return !memcmp(source, copy, len); +} + +static int isclean(void *addr) +{ + const u_int *ptr = addr; + if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used + assert_dirty_stub(ptr); + return 0; + } + return 1; +} + +// get source that block at addr was compiled from (host pointers) +static void get_bounds(void *addr, u_char **start, u_char **end) +{ + const u_int *ptr = addr; + assert_dirty_stub(ptr); + *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source + *end = *start + get_from_movz(&ptr[2]); // movz w3, #slen*4 +} + +/* Special assem */ + +static void c2op_prologue(u_int op,u_int reglist) +{ + save_load_regs_all(1, reglist); +#ifdef PCNT + emit_movimm(op, 0); + emit_call(pcnt_gte_start); +#endif + // pointer to cop2 regs + emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); +} + +static void c2op_epilogue(u_int op,u_int reglist) +{ +#ifdef PCNT + emit_movimm(op, 0); + emit_call(pcnt_gte_end); +#endif + save_load_regs_all(0, reglist); } static void c2op_assemble(int i,struct regstat *i_regs) { - assert(0); + u_int c2op=source[i]&0x3f; + u_int hr,reglist_full=0,reglist; + int need_flags,need_ir; + for(hr=0;hrregmap[hr]>=0) reglist_full|=1<>63); // +1 because of how liveness detection works + need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00; + assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n", + source[i],gte_unneeded[i+1],need_flags,need_ir); + if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS) + need_flags=0; + //int shift = (source[i] >> 19) & 1; + //int lm = (source[i] >> 10) & 1; + switch(c2op) { + default: + (void)need_ir; + c2op_prologue(c2op,reglist); + emit_movimm(source[i],1); // opcode + emit_writeword(1,&psxRegs.code); + emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]); + break; + } + c2op_epilogue(c2op,reglist); + } +} + +static void c2op_ctc2_31_assemble(signed char sl, signed char temp) +{ + //value = value & 0x7ffff000; + //if (value & 0x7f87e000) value |= 0x80000000; + emit_andimm(sl, 0x7fffe000, temp); + emit_testimm(temp, 0xff87ffff); + emit_andimm(sl, 0x7ffff000, temp); + host_tempreg_acquire(); + emit_orimm(temp, 0x80000000, HOST_TEMPREG); + emit_cmovne_reg(HOST_TEMPREG, temp); + host_tempreg_release(); + assert(0); // testing needed +} + +static void do_mfc2_31_one(u_int copr,signed char temp) +{ + emit_readshword(®_cop2d[copr],temp); + emit_bicsar_imm(temp,31,temp); + emit_cmpimm(temp,0xf80); + emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0; + emit_andimm(temp,0xf80,temp); +} + +static void c2op_mfc2_29_assemble(signed char tl, signed char temp) +{ + if (temp < 0) { + host_tempreg_acquire(); + temp = HOST_TEMPREG; + } + do_mfc2_31_one(9,temp); + emit_shrimm(temp,7,tl); + do_mfc2_31_one(10,temp); + emit_orrshr_imm(temp,2,tl); + do_mfc2_31_one(11,temp); + emit_orrshl_imm(temp,3,tl); + emit_writeword(tl,®_cop2d[29]); + + if (temp == HOST_TEMPREG) + host_tempreg_release(); } static void multdiv_assemble_arm64(int i,struct regstat *i_regs) { - assert(0); + // case 0x18: MULT + // case 0x19: MULTU + // case 0x1A: DIV + // case 0x1B: DIVU + if(rs1[i]&&rs2[i]) + { + switch(opcode2[i]) + { + case 0x18: // MULT + case 0x19: // MULTU + { + signed char m1=get_reg(i_regs->regmap,rs1[i]); + signed char m2=get_reg(i_regs->regmap,rs2[i]); + signed char hi=get_reg(i_regs->regmap,HIREG); + signed char lo=get_reg(i_regs->regmap,LOREG); + assert(m1>=0); + assert(m2>=0); + assert(hi>=0); + assert(lo>=0); + + if(opcode2[i]==0x18) // MULT + emit_smull(m1,m2,hi); + else // MULTU + emit_umull(m1,m2,hi); + + emit_mov(hi,lo); + emit_shrimm64(hi,32,hi); + break; + } + case 0x1A: // DIV + case 0x1B: // DIVU + { + signed char numerator=get_reg(i_regs->regmap,rs1[i]); + signed char denominator=get_reg(i_regs->regmap,rs2[i]); + signed char quotient=get_reg(i_regs->regmap,LOREG); + signed char remainder=get_reg(i_regs->regmap,HIREG); + assert(numerator>=0); + assert(denominator>=0); + assert(quotient>=0); + assert(remainder>=0); + + if (opcode2[i] == 0x1A) // DIV + emit_sdiv(numerator,denominator,quotient); + else // DIVU + emit_udiv(numerator,denominator,quotient); + emit_msub(quotient,denominator,numerator,remainder); + + // div 0 quotient (remainder is already correct) + host_tempreg_acquire(); + if (opcode2[i] == 0x1A) // DIV + emit_sub_asrimm(0,numerator,31,HOST_TEMPREG); + else + emit_movimm(~0,HOST_TEMPREG); + emit_test(denominator,denominator); + emit_cmoveq_reg(HOST_TEMPREG,quotient); + host_tempreg_release(); + break; + } + default: + assert(0); + } + } + else + { + signed char hr=get_reg(i_regs->regmap,HIREG); + signed char lr=get_reg(i_regs->regmap,LOREG); + if ((opcode2[i]==0x1A || opcode2[i]==0x1B) && rs2[i]==0) // div 0 + { + if (rs1[i]) { + signed char numerator = get_reg(i_regs->regmap, rs1[i]); + assert(numerator >= 0); + if (hr >= 0) + emit_mov(numerator,hr); + if (lr >= 0) { + if (opcode2[i] == 0x1A) // DIV + emit_sub_asrimm(0,numerator,31,lr); + else + emit_movimm(~0,lr); + } + } + else { + if (hr >= 0) emit_zeroreg(hr); + if (lr >= 0) emit_movimm(~0,lr); + } + } + else + { + // Multiply by zero is zero. + if (hr >= 0) emit_zeroreg(hr); + if (lr >= 0) emit_zeroreg(lr); + } + } } #define multdiv_assemble multdiv_assemble_arm64 diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 62b9176b3..2df259b5c 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -338,7 +338,7 @@ static int ari64_init() scratch_buf_ptr = scratch_buf; SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n"); - SysPrintf("%08x/%08x/%08x/%08x/%08x\n", + SysPrintf("%p/%p/%p/%p/%p\n", psxM, psxH, psxR, mem_rtab, out); return 0; @@ -657,6 +657,8 @@ void do_insn_cmp(void) if (allregs_p[i] != allregs_e[i]) { miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle); bad++; + if (i > 32+2) + goto end; } } diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index fcb4e1a74..bbc52c3c0 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -32,8 +32,6 @@ #define get_addr_ht ESYM(get_addr_ht) #define clean_blocks ESYM(clean_blocks) #define gen_interupt ESYM(gen_interupt) -#define psxException ESYM(psxException) -#define execI ESYM(execI) #define invalidate_addr ESYM(invalidate_addr) #endif @@ -393,7 +391,7 @@ FUNCTION(jump_vaddr): .align 2 FUNCTION(verify_code_ds): - str r8, [fp, #LO_branch_target] + str r8, [fp, #LO_branch_target] @ preserve HOST_BTREG? FUNCTION(verify_code): /* r1 = source */ /* r2 = target */ @@ -512,19 +510,9 @@ FUNCTION(jump_syscall): .size jump_syscall, .-jump_syscall .align 2 - .align 2 -FUNCTION(jump_syscall_hle): - str r0, [fp, #LO_pcaddr] /* PC must be set to EPC for psxException */ - ldr r2, [fp, #LO_last_count] - mov r1, #0 /* in delay slot */ - add r2, r2, r10 - mov r0, #0x20 /* cause */ - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ - bl psxException - /* note: psxException might do recursive recompiler call from it's HLE code, * so be ready for this */ -pcsx_return: +FUNCTION(jump_to_new_pc): ldr r1, [fp, #LO_next_interupt] ldr r10, [fp, #LO_cycle] ldr r0, [fp, #LO_pcaddr] @@ -532,27 +520,7 @@ pcsx_return: str r1, [fp, #LO_last_count] bl get_addr_ht mov pc, r0 - .size jump_syscall_hle, .-jump_syscall_hle - - .align 2 -FUNCTION(jump_hlecall): - ldr r2, [fp, #LO_last_count] - str r0, [fp, #LO_pcaddr] - add r2, r2, r10 - adr lr, pcsx_return - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ - bx r1 - .size jump_hlecall, .-jump_hlecall - - .align 2 -FUNCTION(jump_intcall): - ldr r2, [fp, #LO_last_count] - str r0, [fp, #LO_pcaddr] - add r2, r2, r10 - adr lr, pcsx_return - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ - b execI - .size jump_hlecall, .-jump_hlecall + .size jump_to_new_pc, .-jump_to_new_pc .align 2 FUNCTION(new_dyna_leave): diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 060ac48aa..444545ca3 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -111,18 +111,6 @@ FUNCTION(dyna_linker_ds): dyna_linker_main .size dyna_linker_ds, .-dyna_linker_ds - .align 2 - -FUNCTION(verify_code_ds): - bl abort -FUNCTION(verify_code): - /* r1 = source */ - /* r2 = target */ - /* r3 = length */ - bl abort - .size verify_code, .-verify_code - .size verify_code_ds, .-verify_code_ds - .align 2 FUNCTION(cc_interrupt): ldr w0, [rFP, #LO_last_count] @@ -204,32 +192,17 @@ FUNCTION(jump_syscall): .size jump_syscall, .-jump_syscall .align 2 - .align 2 -FUNCTION(jump_syscall_hle): - bl abort - /* note: psxException might do recursive recompiler call from it's HLE code, * so be ready for this */ -pcsx_return: - bl abort // w10 +FUNCTION(jump_to_new_pc): ldr w1, [fp, #LO_next_interupt] - ldr w10, [fp, #LO_cycle] + ldr rCC, [fp, #LO_cycle] ldr w0, [fp, #LO_pcaddr] - sub w10, w10, w1 + sub rCC, rCC, w1 str w1, [fp, #LO_last_count] bl get_addr_ht br x0 - .size jump_syscall_hle, .-jump_syscall_hle - - .align 2 -FUNCTION(jump_hlecall): - bl abort - .size jump_hlecall, .-jump_hlecall - - .align 2 -FUNCTION(jump_intcall): - bl abort - .size jump_intcall, .-jump_intcall + .size jump_to_new_pc, .-jump_to_new_pc /* stack must be aligned by 16, and include space for save_regs() use */ .align 2 @@ -292,7 +265,6 @@ FUNCTION(do_memhandler_post): .macro pcsx_read_mem readop tab_shift /* w0 = address, x1 = handler_tab, w2 = cycles */ - stp xzr, x30, [sp, #-16]! ubfm w4, w0, #\tab_shift, #11 ldr x3, [x1, w4, uxtw #3] adds x3, x3, x3 @@ -300,17 +272,18 @@ FUNCTION(do_memhandler_post): \readop w0, [x3, w4, uxtw #\tab_shift] ret 0: + stp xzr, x30, [sp, #-16]! memhandler_pre blr x3 .endm FUNCTION(jump_handler_read8): - add x1, x1, #0x1000/4*4 + 0x1000/2*4 /* shift to r8 part */ + add x1, x1, #0x1000/4*8 + 0x1000/2*8 /* shift to r8 part */ pcsx_read_mem ldrb, 0 b handler_read_end FUNCTION(jump_handler_read16): - add x1, x1, #0x1000/4*4 /* shift to r16 part */ + add x1, x1, #0x1000/4*8 /* shift to r16 part */ pcsx_read_mem ldrh, 1 b handler_read_end @@ -323,29 +296,28 @@ handler_read_end: .macro pcsx_write_mem wrtop movop tab_shift /* w0 = address, w1 = data, w2 = cycles, x3 = handler_tab */ - stp xzr, x30, [sp, #-16]! ubfm w4, w0, #\tab_shift, #11 ldr x3, [x3, w4, uxtw #3] - str w0, [rFP, #LO_address] /* some handlers still need it... */ adds x3, x3, x3 -# str lr, [rFP, #0] bcs 0f mov w0, w2 /* cycle return */ \wrtop w1, [x3, w4, uxtw #\tab_shift] ret 0: + stp xzr, x30, [sp, #-16]! + str w0, [rFP, #LO_address] /* some handlers still need it... */ \movop w0, w1 memhandler_pre blr x3 .endm FUNCTION(jump_handler_write8): - add x3, x3, #0x1000/4*4 + 0x1000/2*4 /* shift to r8 part */ + add x3, x3, #0x1000/4*8 + 0x1000/2*8 /* shift to r8 part */ pcsx_write_mem strb uxtb 0 b handler_write_end FUNCTION(jump_handler_write16): - add x3, x3, #0x1000/4*4 /* shift to r16 part */ + add x3, x3, #0x1000/4*8 /* shift to r16 part */ pcsx_write_mem strh uxth 1 b handler_write_end @@ -358,8 +330,69 @@ handler_write_end: ret FUNCTION(jump_handle_swl): + /* w0 = address, w1 = data, w2 = cycles */ + ldr x3, [fp, #LO_mem_wtab] + mov w4, w0, lsr #12 + ldr x3, [x3, w4, uxtw #3] + adds x3, x3, x3 + bcs 4f + add x3, x0, x3 + mov w0, w2 + tbz x3, #1, 10f // & 2 + tbz x3, #0, 2f // & 1 +3: + stur w1, [x3, #-3] + ret +2: + lsr w2, w1, #8 + lsr w1, w1, #24 + sturh w2, [x3, #-2] + strb w1, [x3] + ret +10: + tbz x3, #0, 0f // & 1 +1: + lsr w1, w1, #16 + sturh w1, [x3, #-1] + ret +0: + lsr w2, w1, #24 + strb w2, [x3] + ret +4: + mov w0, w2 // todo bl abort + ret FUNCTION(jump_handle_swr): + /* w0 = address, w1 = data, w2 = cycles */ + ldr x3, [fp, #LO_mem_wtab] + mov w4, w0, lsr #12 + ldr x3, [x3, w4, uxtw #3] + adds x3, x3, x3 + bcs 4f + add x3, x0, x3 + mov w0, w2 + tbz x3, #1, 10f // & 2 + tbz x3, #0, 2f // & 1 +3: + strb w1, [x3] + ret +2: + strh w1, [x3] + ret +10: + tbz x3, #0, 0f // & 1 +1: + lsr w2, w1, #8 + strb w1, [x3] + sturh w2, [x3, #1] + ret +0: + str w1, [x3] + ret +4: + mov w0, w2 // todo bl abort + ret diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index f2dbb86a1..9ce1f069a 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -35,7 +35,8 @@ static int sceBlock; #endif #include "new_dynarec_config.h" -#include "../psxhle.h" //emulator interface +#include "../psxhle.h" +#include "../psxinterpreter.h" #include "emu_if.h" //emulator interface #define noinline __attribute__((noinline,noclone)) @@ -272,7 +273,7 @@ struct link_entry #define DJT_2 (void *)2l // asm linkage -int new_recompile_block(int addr); +int new_recompile_block(u_int addr); void *get_addr_ht(u_int vaddr); void invalidate_block(u_int block); void invalidate_addr(u_int addr); @@ -284,9 +285,7 @@ void verify_code_ds(); void cc_interrupt(); void fp_exception(); void fp_exception_ds(); -void jump_syscall_hle(); -void jump_hlecall(); -void jump_intcall(); +void jump_to_new_pc(); void new_dyna_leave(); // Needed by assembler @@ -298,7 +297,7 @@ static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]); static void load_regs_entry(int t); static void load_all_consts(signed char regmap[],u_int dirty,int i); -static int verify_dirty(u_int *ptr); +static int verify_dirty(const u_int *ptr); static int get_final_value(int hr, int i, int *value); static void add_stub(enum stub_type type, void *addr, void *retaddr, u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e); @@ -832,13 +831,14 @@ static const struct { FUNCNAME(jump_handler_write16), FUNCNAME(jump_handler_write32), FUNCNAME(invalidate_addr), - FUNCNAME(verify_code), - FUNCNAME(jump_hlecall), - FUNCNAME(jump_syscall_hle), + FUNCNAME(jump_to_new_pc), FUNCNAME(new_dyna_leave), FUNCNAME(pcsx_mtc0), FUNCNAME(pcsx_mtc0_ds), FUNCNAME(do_insn_cmp), +#ifdef __arm__ + FUNCNAME(verify_code), +#endif }; static const char *func_name(const void *a) @@ -2361,11 +2361,44 @@ void shiftimm_assemble(int i,struct regstat *i_regs) } #ifndef shift_assemble -void shift_assemble(int i,struct regstat *i_regs) +static void shift_assemble(int i,struct regstat *i_regs) { - printf("Need shift_assemble for this architecture.\n"); - abort(); + signed char s,t,shift; + if (rt1[i] == 0) + return; + assert(opcode2[i]<=0x07); // SLLV/SRLV/SRAV + t = get_reg(i_regs->regmap, rt1[i]); + s = get_reg(i_regs->regmap, rs1[i]); + shift = get_reg(i_regs->regmap, rs2[i]); + if (t < 0) + return; + + if(rs1[i]==0) + emit_zeroreg(t); + else if(rs2[i]==0) { + assert(s>=0); + if(s!=t) emit_mov(s,t); + } + else { + host_tempreg_acquire(); + emit_andimm(shift,31,HOST_TEMPREG); + switch(opcode2[i]) { + case 4: // SLLV + emit_shl(s,HOST_TEMPREG,t); + break; + case 6: // SRLV + emit_shr(s,HOST_TEMPREG,t); + break; + case 7: // SRAV + emit_sar(s,HOST_TEMPREG,t); + break; + default: + assert(0); + } + host_tempreg_release(); + } } + #endif enum { @@ -2430,7 +2463,7 @@ static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) else if(type==MTYPE_1F80) { // scratchpad if (psxH == (void *)0x1f800000) { host_tempreg_acquire(); - emit_addimm(addr,-0x1f800000,HOST_TEMPREG); + emit_xorimm(addr,0x1f800000,HOST_TEMPREG); emit_cmpimm(HOST_TEMPREG,0x1000); host_tempreg_release(); jaddr=out; @@ -2631,10 +2664,86 @@ static void load_assemble(int i,struct regstat *i_regs) } #ifndef loadlr_assemble -void loadlr_assemble(int i,struct regstat *i_regs) +static void loadlr_assemble(int i,struct regstat *i_regs) { - printf("Need loadlr_assemble for this architecture.\n"); - abort(); + int s,tl,temp,temp2,addr; + int offset; + void *jaddr=0; + int memtarget=0,c=0; + int fastio_reg_override=-1; + u_int hr,reglist=0; + tl=get_reg(i_regs->regmap,rt1[i]); + s=get_reg(i_regs->regmap,rs1[i]); + temp=get_reg(i_regs->regmap,-1); + temp2=get_reg(i_regs->regmap,FTEMP); + addr=get_reg(i_regs->regmap,AGEN1+(i&1)); + assert(addr<0); + offset=imm[i]; + for(hr=0;hrregmap[hr]>=0) reglist|=1<=0) { + c=(i_regs->wasconst>>s)&1; + if(c) { + memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; + } + } + if(!c) { + emit_shlimm(addr,3,temp); + if (opcode[i]==0x22||opcode[i]==0x26) { + emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR + }else{ + emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR + } + jaddr=emit_fastpath_cmp_jump(i,temp2,&fastio_reg_override); + } + else { + if(ram_offset&&memtarget) { + host_tempreg_acquire(); + emit_addimm(temp2,ram_offset,HOST_TEMPREG); + fastio_reg_override=HOST_TEMPREG; + } + if (opcode[i]==0x22||opcode[i]==0x26) { + emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR + }else{ + emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR + } + } + if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR + if(!c||memtarget) { + int a=temp2; + if(fastio_reg_override>=0) a=fastio_reg_override; + emit_readword_indexed(0,a,temp2); + if(fastio_reg_override==HOST_TEMPREG) host_tempreg_release(); + if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist); + } + else + inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist); + if(rt1[i]) { + assert(tl>=0); + emit_andimm(temp,24,temp); + if (opcode[i]==0x22) // LWL + emit_xorimm(temp,24,temp); + host_tempreg_acquire(); + emit_movimm(-1,HOST_TEMPREG); + if (opcode[i]==0x26) { + emit_shr(temp2,temp,temp2); + emit_bic_lsr(tl,HOST_TEMPREG,temp,tl); + }else{ + emit_shl(temp2,temp,temp2); + emit_bic_lsl(tl,HOST_TEMPREG,temp,tl); + } + host_tempreg_release(); + emit_or(temp2,tl,tl); + } + //emit_storereg(rt1[i],tl); // DEBUG + } + if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR + assert(0); + } } #endif @@ -2746,7 +2855,7 @@ void store_assemble(int i,struct regstat *i_regs) } // basic current block modification detection.. // not looking back as that should be in mips cache already - // (note: doesn't seem to trigger, migh be broken) + // (see Spyro2 title->attract mode) if(c&&start+i*4regmap==regs[i].regmap); // not delay slot @@ -2762,7 +2871,7 @@ void store_assemble(int i,struct regstat *i_regs) } } -void storelr_assemble(int i,struct regstat *i_regs) +static void storelr_assemble(int i,struct regstat *i_regs) { int s,tl; int temp; @@ -2802,7 +2911,8 @@ void storelr_assemble(int i,struct regstat *i_regs) emit_jmp(0); } } - emit_addimm_no_flags(ram_offset,temp); + if(ram_offset) + emit_addimm_no_flags(ram_offset,temp); if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR assert(0); @@ -2819,15 +2929,11 @@ void storelr_assemble(int i,struct regstat *i_regs) if (opcode[i]==0x2A) { // SWL emit_writeword_indexed(tl,0,temp); } - if (opcode[i]==0x2E) { // SWR + else if (opcode[i]==0x2E) { // SWR emit_writebyte_indexed(tl,3,temp); } - if (opcode[i]==0x2C) { // SDL - assert(0); - } - if (opcode[i]==0x2D) { // SDR + else assert(0); - } done0=out; emit_jmp(0); // 1 @@ -2840,16 +2946,10 @@ void storelr_assemble(int i,struct regstat *i_regs) emit_writebyte_indexed(tl,1,temp); if(rs2[i]) emit_rorimm(tl,8,tl); } - if (opcode[i]==0x2E) { // SWR + else if (opcode[i]==0x2E) { // SWR // Write two lsb into two most significant bytes emit_writehword_indexed(tl,1,temp); } - if (opcode[i]==0x2C) { // SDL - assert(0); - } - if (opcode[i]==0x2D) { // SDR - assert(0); - } done1=out; emit_jmp(0); // 2 @@ -2863,19 +2963,13 @@ void storelr_assemble(int i,struct regstat *i_regs) emit_writehword_indexed(tl,-2,temp); if(rs2[i]) emit_rorimm(tl,16,tl); } - if (opcode[i]==0x2E) { // SWR + else if (opcode[i]==0x2E) { // SWR // Write 3 lsb into three most significant bytes emit_writebyte_indexed(tl,-1,temp); if(rs2[i]) emit_rorimm(tl,8,tl); emit_writehword_indexed(tl,0,temp); if(rs2[i]) emit_rorimm(tl,24,tl); } - if (opcode[i]==0x2C) { // SDL - assert(0); - } - if (opcode[i]==0x2D) { // SDR - assert(0); - } done2=out; emit_jmp(0); // 3 @@ -2886,25 +2980,13 @@ void storelr_assemble(int i,struct regstat *i_regs) emit_writebyte_indexed(tl,-3,temp); if(rs2[i]) emit_rorimm(tl,8,tl); } - if (opcode[i]==0x2E) { // SWR + else if (opcode[i]==0x2E) { // SWR // Write entire word emit_writeword_indexed(tl,-3,temp); } - if (opcode[i]==0x2C) { // SDL - assert(0); - } - if (opcode[i]==0x2D) { // SDR - assert(0); - } set_jump_target(done0, out); set_jump_target(done1, out); set_jump_target(done2, out); - if (opcode[i]==0x2C) { // SDL - assert(0); - } - if (opcode[i]==0x2D) { // SDR - assert(0); - } if(!c||!memtarget) add_stub_r(STORELR_STUB,jaddr,out,i,temp,i_regs,ccadj[i],reglist); if(!(i_regs->waswritten&(1<=0); if (opcode[i]==0x3a) { // SWC2 - cop2_get_dreg(copr,tl,HOST_TEMPREG); + cop2_get_dreg(copr,tl,-1); type=STOREW_STUB; } else @@ -3291,14 +3357,7 @@ static void cop2_assemble(int i,struct regstat *i_regs) emit_signextend16(sl,temp); break; case 31: - //value = value & 0x7ffff000; - //if (value & 0x7f87e000) value |= 0x80000000; - emit_shrimm(sl,12,temp); - emit_shlimm(temp,12,temp); - emit_testimm(temp,0x7f000000); - emit_testeqimm(temp,0x00870000); - emit_testeqimm(temp,0x0000e000); - emit_orrne_imm(temp,0x80000000,temp); + c2op_ctc2_31_assemble(sl,temp); break; default: temp=sl; @@ -3309,6 +3368,90 @@ static void cop2_assemble(int i,struct regstat *i_regs) } } +static void do_unalignedwritestub(int n) +{ + assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4); + literal_pool(256); + set_jump_target(stubs[n].addr, out); + + int i=stubs[n].a; + struct regstat *i_regs=(struct regstat *)stubs[n].c; + int addr=stubs[n].b; + u_int reglist=stubs[n].e; + signed char *i_regmap=i_regs->regmap; + int temp2=get_reg(i_regmap,FTEMP); + int rt; + rt=get_reg(i_regmap,rs2[i]); + assert(rt>=0); + assert(addr>=0); + assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented + reglist|=(1<regmap,CCREG); assert(ccreg==HOST_CCREG); assert(!is_delayslot); (void)ccreg; - emit_movimm(start+i*4,EAX); // Get PC - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... - emit_jmp(jump_syscall_hle); // XXX + + emit_movimm(pc,3); // Get PC + emit_readword(&last_count,2); + emit_writeword(3,&psxRegs.pc); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX + emit_add(2,HOST_CCREG,2); + emit_writeword(2,&psxRegs.cycle); + emit_call(func); + emit_jmp(jump_to_new_pc); +} + +static void syscall_assemble(int i,struct regstat *i_regs) +{ + emit_movimm(0x20,0); // cause code + emit_movimm(0,1); // not in delay slot + call_c_cpu_handler(i,i_regs,start+i*4,psxException); } static void hlecall_assemble(int i,struct regstat *i_regs) { - extern void psxNULL(); - signed char ccreg=get_reg(i_regs->regmap,CCREG); - assert(ccreg==HOST_CCREG); - assert(!is_delayslot); - (void)ccreg; - emit_movimm(start+i*4+4,0); // Get PC + void *hlefunc = psxNULL; uint32_t hleCode = source[i] & 0x03ffffff; - if (hleCode >= ARRAY_SIZE(psxHLEt)) - emit_movimm((uintptr_t)psxNULL,1); - else - emit_movimm((uintptr_t)psxHLEt[hleCode],1); - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX - emit_jmp(jump_hlecall); + if (hleCode < ARRAY_SIZE(psxHLEt)) + hlefunc = psxHLEt[hleCode]; + + call_c_cpu_handler(i,i_regs,start+i*4+4,hlefunc); } static void intcall_assemble(int i,struct regstat *i_regs) { - signed char ccreg=get_reg(i_regs->regmap,CCREG); - assert(ccreg==HOST_CCREG); - assert(!is_delayslot); - (void)ccreg; - emit_movimm(start+i*4,0); // Get PC - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_jmp(jump_intcall); + call_c_cpu_handler(i,i_regs,start+i*4,execI); } static void speculate_mov(int rs,int rt) @@ -4075,7 +4219,7 @@ static int match_bt(signed char i_regmap[],uint64_t i_dirty,int addr) static void drc_dbg_emit_do_cmp(int i) { extern void do_insn_cmp(); - extern int cycle; + //extern int cycle; u_int hr,reglist=0; for(hr=0;hr(ba[i]-start)>>2) invert=1; #endif + #ifdef __aarch64__ + invert=1; // because of near cond. branches + #endif if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); @@ -4956,6 +5104,9 @@ static void sjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; #endif + #ifdef __aarch64__ + invert=1; // because of near cond. branches + #endif //if(opcode2[i]>=0x10) return; // FIXME (BxxZAL) //assert(opcode2[i]<0x10||rs1[i]==0); // FIXME (BxxZAL) @@ -6484,7 +6635,7 @@ void new_dynarec_load_blocks(const void *save, int size) memcpy(&psxRegs.GPR, regs_save, sizeof(regs_save)); } -int new_recompile_block(int addr) +int new_recompile_block(u_int addr) { u_int pagelimit = 0; u_int state_rflags = 0; @@ -7906,8 +8057,8 @@ int new_recompile_block(int addr) } } } - } - } + } // if needed + } // for hr } /* Pass 5 - Pre-allocate registers */ @@ -8533,7 +8684,7 @@ int new_recompile_block(int addr) void *instr_addr0_override = NULL; if (start == 0x80030000) { - // nasty hack for fastbios thing + // nasty hack for the fastbios thing // override block entry to this code instr_addr0_override = out; emit_movimm(start,0); @@ -8543,7 +8694,12 @@ int new_recompile_block(int addr) emit_writeword(0,&pcaddr); emit_writeword(0,&address); emit_cmp(0,1); + #ifdef __aarch64__ + emit_jeq(out + 4*2); + emit_jmp(new_dyna_leave); + #else emit_jne(new_dyna_leave); + #endif } for(i=0;i>16)==0x1000) literal_pool(1024); else -@@ -8767,7 +8786,7 @@ int new_recompile_block(int addr) +@@ -8950,7 +8969,7 @@ int new_recompile_block(int addr) } } // External Branch Targets (jump_in) @@ -267,7 +240,7 @@ index e7b55b6..caa06d0 100644 for(i=0;i Date: Sun, 14 Nov 2021 01:14:55 +0200 Subject: [PATCH 062/597] drc: new far call mechanism somewhat inspired by mupen64plus, but a bit different --- Makefile | 4 +- configure | 2 +- libpcsxcore/new_dynarec/assem_arm.c | 82 +++++++-------- libpcsxcore/new_dynarec/assem_arm.h | 20 +--- libpcsxcore/new_dynarec/assem_arm64.c | 55 +++++----- libpcsxcore/new_dynarec/assem_arm64.h | 17 ++-- libpcsxcore/new_dynarec/new_dynarec.c | 140 ++++++++++++++++++-------- 7 files changed, 186 insertions(+), 134 deletions(-) diff --git a/Makefile b/Makefile index db5ab2cf1..0db94f777 100644 --- a/Makefile +++ b/Makefile @@ -76,8 +76,8 @@ ifdef DRC_DBG libpcsxcore/new_dynarec/emu_if.o: CFLAGS += -D_FILE_OFFSET_BITS=64 CFLAGS += -DDRC_DBG endif -ifeq "$(DRC_CACHE_BASE)" "1" -libpcsxcore/new_dynarec/%.o: CFLAGS += -DBASE_ADDR_FIXED=1 +ifeq "$(BASE_ADDR_DYNAMIC)" "1" +libpcsxcore/new_dynarec/%.o: CFLAGS += -DBASE_ADDR_DYNAMIC=1 endif # spu diff --git a/configure b/configure index 20ff1d53d..b053a7a88 100755 --- a/configure +++ b/configure @@ -584,7 +584,7 @@ if [ "$enable_dynarec" = "yes" ]; then echo "USE_DYNAREC = 1" >> $config_mak fi if [ "$drc_cache_base" = "yes" ]; then - echo "DRC_CACHE_BASE = 1" >> $config_mak + echo "BASE_ADDR_DYNAMIC = 1" >> $config_mak fi if [ "$have_c64x_dsp" = "yes" ]; then echo "HAVE_C64_TOOLS = 1" >> $config_mak diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index c61145643..62038a208 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -1,7 +1,7 @@ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Mupen64plus/PCSX - assem_arm.c * * Copyright (C) 2009-2011 Ari64 * - * Copyright (C) 2010-2011 Gražvydas "notaz" Ignotas * + * Copyright (C) 2010-2021 Gražvydas "notaz" Ignotas * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -28,13 +28,6 @@ #include "pcnt.h" #include "arm_features.h" -#if defined(BASE_ADDR_FIXED) -#elif defined(BASE_ADDR_DYNAMIC) -u_char *translation_cache; -#else -u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); -#endif - #ifndef __MACH__ #define CALLER_SAVE_REGS 0x100f #else @@ -1034,6 +1027,12 @@ static void emit_set_if_carry32(int rs1, int rs2, int rt) emit_cmovb_imm(1,rt); } +static int can_jump_or_call(const void *a) +{ + intptr_t offset = (u_char *)a - out - 8; + return (-33554432 <= offset && offset < 33554432); +} + static void emit_call(const void *a_) { int a = (int)a_; @@ -1598,7 +1597,7 @@ static void emit_extjump2(u_char *addr, u_int target, void *linker) emit_writeword(ECX,&last_count); #endif //DEBUG < - emit_jmp(linker); + emit_far_jump(linker); } static void check_extjump2(void *src) @@ -1738,7 +1737,7 @@ static void do_readstub(int n) if(cc<0) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2); - emit_call(handler); + emit_far_call(handler); if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { mov_loadtype_adj(type,0,rt); } @@ -1754,7 +1753,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, signed char int rt=get_reg(regmap,target); if(rs<0) rs=get_reg(regmap,-1); assert(rs>=0); - u_int is_dynamic,far_call=0; + u_int is_dynamic; uintptr_t host_addr = 0; void *handler; int cc=get_reg(regmap,CCREG); @@ -1794,12 +1793,6 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, signed char emit_movimm(addr,0); else if(rs!=0) emit_mov(rs,0); - int offset=(u_char *)handler-out-8; - if(offset<-33554432||offset>=33554432) { - // unreachable memhandler, a plugin func perhaps - emit_movimm((u_int)handler,12); - far_call=1; - } if(cc<0) emit_loadreg(CCREG,2); if(is_dynamic) { @@ -1813,10 +1806,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, signed char emit_writeword(2,&Count); } - if(far_call) - emit_callreg(12); - else - emit_call(handler); + emit_far_call(handler); if(rt>=0&&rt1[i]!=0) { switch(type) { @@ -1902,7 +1892,7 @@ static void do_writestub(int n) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2); // returns new cycle_count - emit_call(handler); + emit_far_call(handler); emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); @@ -1941,7 +1931,7 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, signed char emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); emit_movimm((u_int)handler,3); // returns new cycle_count - emit_call(jump_handler_write_h); + emit_far_call(jump_handler_write_h); emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); @@ -1969,7 +1959,7 @@ static void *do_dirty_stub(int i) { assem_debug("do_dirty_stub %x\n",start+i*4); do_dirty_stub_emit_args(start + i*4); - emit_call(verify_code); + emit_far_call(verify_code); void *entry = out; load_regs_entry(i); if (entry == out) @@ -1981,7 +1971,7 @@ static void *do_dirty_stub(int i) static void do_dirty_stub_ds() { do_dirty_stub_emit_args(start + 1); - emit_call(verify_code_ds); + emit_far_call(verify_code_ds); } /* Special assem */ @@ -1991,7 +1981,7 @@ static void c2op_prologue(u_int op,u_int reglist) save_regs_all(reglist); #ifdef PCNT emit_movimm(op,0); - emit_call(pcnt_gte_start); + emit_far_call(pcnt_gte_start); #endif emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs } @@ -2000,7 +1990,7 @@ static void c2op_epilogue(u_int op,u_int reglist) { #ifdef PCNT emit_movimm(op,0); - emit_call(pcnt_gte_end); + emit_far_call(pcnt_gte_end); #endif restore_regs_all(reglist); } @@ -2008,19 +1998,19 @@ static void c2op_epilogue(u_int op,u_int reglist) static void c2op_call_MACtoIR(int lm,int need_flags) { if(need_flags) - emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0); + emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0); else - emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf); + emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf); } static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) { - emit_call(func); + emit_far_call(func); // func is C code and trashes r0 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); if(need_flags||need_ir) c2op_call_MACtoIR(lm,need_flags); - emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf); + emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf); } static void c2op_assemble(int i,struct regstat *i_regs) @@ -2070,17 +2060,17 @@ static void c2op_assemble(int i,struct regstat *i_regs) emit_readword(&zeromem_ptr,7); #ifdef __ARM_NEON__ emit_movimm(source[i],1); // opcode - emit_call(gteMVMVA_part_neon); + emit_far_call(gteMVMVA_part_neon); if(need_flags) { emit_movimm(lm,1); - emit_call(gteMACtoIR_flags_neon); + emit_far_call(gteMACtoIR_flags_neon); } #else if(cv==3&&shift) - emit_call((int)gteMVMVA_part_cv3sh12_arm); + emit_far_call((int)gteMVMVA_part_cv3sh12_arm); else { emit_movimm(shift,1); - emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm)); + emit_far_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm)); } if(need_flags||need_ir) c2op_call_MACtoIR(lm,need_flags); @@ -2089,13 +2079,13 @@ static void c2op_assemble(int i,struct regstat *i_regs) c2op_prologue(c2op,reglist); emit_movimm(source[i],1); // opcode emit_writeword(1,&psxRegs.code); - emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); + emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]); #endif break; } case GTE_OP: c2op_prologue(c2op,reglist); - emit_call(shift?gteOP_part_shift:gteOP_part_noshift); + emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift); if(need_flags||need_ir) { emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); c2op_call_MACtoIR(lm,need_flags); @@ -2111,7 +2101,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) break; case GTE_SQR: c2op_prologue(c2op,reglist); - emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift); + emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift); if(need_flags||need_ir) { emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); c2op_call_MACtoIR(lm,need_flags); @@ -2136,7 +2126,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) emit_movimm(source[i],1); // opcode emit_writeword(1,&psxRegs.code); #endif - emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]); + emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]); break; } c2op_epilogue(c2op,reglist); @@ -2309,7 +2299,7 @@ static void multdiv_assemble_arm(int i,struct regstat *i_regs) static void do_jump_vaddr(int rs) { - emit_jmp(jump_vaddr_reg[rs]); + emit_far_jump(jump_vaddr_reg[rs]); } static void do_preload_rhash(int r) { @@ -2402,7 +2392,17 @@ static void do_clear_cache() } // CPU-architecture-specific initialization -static void arch_init() { +static void arch_init(void) +{ + uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8; + struct tramp_insns *ops = ndrc->tramp.ops; + size_t i; + assert(!(diff & 3)); + assert(diff < 0x1000); + start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); + for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) + ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val] + end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); } // vim:shiftwidth=2:expandtab diff --git a/libpcsxcore/new_dynarec/assem_arm.h b/libpcsxcore/new_dynarec/assem_arm.h index bbaf5b9e9..6b3c672c8 100644 --- a/libpcsxcore/new_dynarec/assem_arm.h +++ b/libpcsxcore/new_dynarec/assem_arm.h @@ -31,18 +31,8 @@ extern char *invc_ptr; #define TARGET_SIZE_2 24 // 2^24 = 16 megabytes -// Code generator target address -#if defined(BASE_ADDR_FIXED) - // "round" address helpful for debug - // this produces best code, but not many platforms allow it, - // only use if you are sure this range is always free - #define BASE_ADDR_ 0x1000000 - #define translation_cache (u_char *)BASE_ADDR_ -#elif defined(BASE_ADDR_DYNAMIC) - // for platforms that can't just use .bss buffer, like vita - // otherwise better to use the next option for closer branches - extern u_char *translation_cache; -#else - // using a static buffer in .bss - extern u_char translation_cache[1 << TARGET_SIZE_2]; -#endif +struct tramp_insns +{ + u_int ldrpc; +}; + diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 27f9141d3..5483da15e 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -23,14 +23,6 @@ #include "pcnt.h" #include "arm_features.h" -#if defined(BASE_ADDR_FIXED) -#elif defined(BASE_ADDR_DYNAMIC) -u_char *translation_cache; -#else -u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); -#endif -static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; - #define CALLER_SAVE_REGS 0x0007ffff #define unused __attribute__((unused)) @@ -889,6 +881,12 @@ static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt) emit_cmovb_imm(1,rt); } +static int can_jump_or_call(const void *a) +{ + intptr_t diff = (u_char *)a - out; + return (-134217728 <= diff && diff <= 134217727); +} + static void emit_call(const void *a) { intptr_t diff = (u_char *)a - out; @@ -1295,7 +1293,7 @@ static void emit_extjump2(u_char *addr, u_int target, void *linker) // addr is in the current recompiled block (max 256k) // offset shouldn't exceed +/-1MB emit_adr(addr, 1); - emit_jmp(linker); + emit_far_jump(linker); } static void check_extjump2(void *src) @@ -1439,7 +1437,7 @@ static void do_readstub(int n) if(cc<0) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2); - emit_call(handler); + emit_far_call(handler); // (no cycle reload after read) if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { loadstore_extend(type,0,rt); @@ -1508,9 +1506,9 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, signed char emit_addimm64(1, l1 & 0xfff, 1); } else - emit_call(do_memhandler_pre); + emit_far_call(do_memhandler_pre); - emit_call(handler); + emit_far_call(handler); // (no cycle reload after read) if(rt>=0&&rt1[i]!=0) @@ -1599,7 +1597,7 @@ static void do_writestub(int n) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2); // returns new cycle_count - emit_call(handler); + emit_far_call(handler); emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); @@ -1642,9 +1640,9 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, signed char emit_loadreg(CCREG, (cc_use = 2)); emit_addimm(cc_use, CLOCK_ADJUST(adj+1), 2); - emit_call(do_memhandler_pre); - emit_call(handler); - emit_call(do_memhandler_post); + emit_far_call(do_memhandler_pre); + emit_far_call(handler); + emit_far_call(do_memhandler_post); emit_addimm(0, -CLOCK_ADJUST(adj+1), cc_use); if (cc < 0) emit_storereg(CCREG, cc_use); @@ -1665,12 +1663,12 @@ static void do_dirty_stub_base(u_int vaddr) emit_loadlp_ofs(0, 0); // ldr x1, source emit_loadlp_ofs(0, 1); // ldr x2, copy emit_movz(slen*4, 2); - emit_call(verify_code_arm64); + emit_far_call(verify_code_arm64); void *jmp = out; emit_cbz(0, 0); emit_movz(vaddr & 0xffff, 0); emit_movk_lsl16(vaddr >> 16, 0); - emit_call(get_addr); + emit_far_call(get_addr); emit_jmpreg(0); set_jump_target(jmp, out); } @@ -1784,7 +1782,7 @@ static void c2op_prologue(u_int op,u_int reglist) save_load_regs_all(1, reglist); #ifdef PCNT emit_movimm(op, 0); - emit_call(pcnt_gte_start); + emit_far_call(pcnt_gte_start); #endif // pointer to cop2 regs emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); @@ -1794,7 +1792,7 @@ static void c2op_epilogue(u_int op,u_int reglist) { #ifdef PCNT emit_movimm(op, 0); - emit_call(pcnt_gte_end); + emit_far_call(pcnt_gte_end); #endif save_load_regs_all(0, reglist); } @@ -1824,7 +1822,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) c2op_prologue(c2op,reglist); emit_movimm(source[i],1); // opcode emit_writeword(1,&psxRegs.code); - emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]); + emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]); break; } c2op_epilogue(c2op,reglist); @@ -1973,7 +1971,7 @@ static void do_jump_vaddr(u_int rs) { if (rs != 0) emit_mov(rs, 0); - emit_call(get_addr_ht); + emit_far_call(get_addr_ht); emit_jmpreg(0); } @@ -2061,7 +2059,18 @@ static void do_clear_cache() } // CPU-architecture-specific initialization -static void arch_init() { +static void arch_init(void) +{ + uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops; + struct tramp_insns *ops = ndrc->tramp.ops; + size_t i; + assert(!(diff & 3)); + start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); + for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) { + ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val] + ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17 + } + end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); } // vim:shiftwidth=2:expandtab diff --git a/libpcsxcore/new_dynarec/assem_arm64.h b/libpcsxcore/new_dynarec/assem_arm64.h index fe12ad75c..1360bfadf 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.h +++ b/libpcsxcore/new_dynarec/assem_arm64.h @@ -29,19 +29,18 @@ #define SSP_CALLER_REGS (8*20) #define SSP_ALL (SSP_CALLEE_REGS+SSP_CALLER_REGS) +#define TARGET_SIZE_2 24 // 2^24 = 16 megabytes + #ifndef __ASSEMBLER__ extern char *invc_ptr; -#define TARGET_SIZE_2 24 // 2^24 = 16 megabytes +struct tramp_insns +{ + u_int ldr; + u_int br; +}; -// Code generator target address -#if defined(BASE_ADDR_DYNAMIC) - // for platforms that can't just use .bss buffer (are there any on arm64?) - extern u_char *translation_cache; -#else - // using a static buffer in .bss - extern u_char translation_cache[1 << TARGET_SIZE_2]; -#endif +static void clear_cache_arm64(char *start, char *end); #endif // !__ASSEMBLY__ diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 9ce1f069a..cfeddc297 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -66,6 +66,23 @@ static int sceBlock; #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 +struct ndrc_mem +{ + u_char translation_cache[1 << TARGET_SIZE_2]; + struct + { + struct tramp_insns ops[2048 / sizeof(struct tramp_insns)]; + const void *f[2048 / sizeof(void *)]; + } tramp; +}; + +#ifdef BASE_ADDR_DYNAMIC +static struct ndrc_mem *ndrc; +#else +static struct ndrc_mem ndrc_ __attribute__((aligned(4096))); +static struct ndrc_mem *ndrc = &ndrc_; +#endif + // stubs enum stub_type { CC_STUB = 1, @@ -308,6 +325,8 @@ static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override); static void *get_direct_memhandler(void *table, u_int addr, enum stub_type type, uintptr_t *addr_host); static void pass_args(int a0, int a1); +static void emit_far_jump(const void *f); +static void emit_far_call(const void *f); static void mprotect_w_x(void *start, void *end, int is_x) { @@ -360,8 +379,8 @@ static void end_tcache_write(void *start, void *end) static void *start_block(void) { u_char *end = out + MAX_OUTPUT_BLOCK_SIZE; - if (end > translation_cache + (1< ndrc->translation_cache + sizeof(ndrc->translation_cache)) + end = ndrc->translation_cache + sizeof(ndrc->translation_cache); start_tcache_write(out, end); return out; } @@ -866,6 +885,48 @@ static const char *func_name(const void *a) #include "assem_arm64.c" #endif +static void *get_trampoline(const void *f) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(ndrc->tramp.f); i++) { + if (ndrc->tramp.f[i] == f || ndrc->tramp.f[i] == NULL) + break; + } + if (i == ARRAY_SIZE(ndrc->tramp.f)) { + SysPrintf("trampoline table is full, last func %p\n", f); + abort(); + } + if (ndrc->tramp.f[i] == NULL) { + start_tcache_write(&ndrc->tramp.f[i], &ndrc->tramp.f[i + 1]); + ndrc->tramp.f[i] = f; + end_tcache_write(&ndrc->tramp.f[i], &ndrc->tramp.f[i + 1]); + } + return &ndrc->tramp.ops[i]; +} + +static void emit_far_jump(const void *f) +{ + if (can_jump_or_call(f)) { + emit_jmp(f); + return; + } + + f = get_trampoline(f); + emit_jmp(f); +} + +static void emit_far_call(const void *f) +{ + if (can_jump_or_call(f)) { + emit_call(f); + return; + } + + f = get_trampoline(f); + emit_call(f); +} + // Add virtual address mapping to linked list void ll_add(struct ll_entry **head,int vaddr,void *addr) { @@ -1167,7 +1228,7 @@ static void do_invstub(int n) set_jump_target(stubs[n].addr, out); save_regs(reglist); if(stubs[n].b!=0) emit_mov(stubs[n].b,0); - emit_call(invalidate_addr); + emit_far_call(invalidate_addr); restore_regs(reglist); emit_jmp(stubs[n].retaddr); // return address } @@ -2865,7 +2926,7 @@ void store_assemble(int i,struct regstat *i_regs) emit_movimm(start+i*4+4,0); emit_writeword(0,&pcaddr); emit_addimm(HOST_CCREG,2,HOST_CCREG); - emit_call(get_addr_ht); + emit_far_call(get_addr_ht); emit_jmpreg(0); } } @@ -3046,7 +3107,7 @@ static void cop0_assemble(int i,struct regstat *i_regs) emit_storereg(CCREG,HOST_CCREG); emit_loadreg(rs1[i],1); emit_movimm(copr,0); - emit_call(pcsx_mtc0_ds); + emit_far_call(pcsx_mtc0_ds); emit_loadreg(rs1[i],s); return; } @@ -3055,14 +3116,12 @@ static void cop0_assemble(int i,struct regstat *i_regs) emit_movimm(0,HOST_TEMPREG); emit_writeword(HOST_TEMPREG,&pending_exception); } - //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12); - //else if(s==HOST_CCREG) emit_loadreg(rs1[i],1); else if(s!=1) emit_mov(s,1); emit_movimm(copr,0); - emit_call(pcsx_mtc0); + emit_far_call(pcsx_mtc0); if(copr==9||copr==11||copr==12||copr==13) { emit_readword(&Count,HOST_CCREG); emit_readword(&next_interupt,HOST_TEMPREG); @@ -3079,7 +3138,7 @@ static void cop0_assemble(int i,struct regstat *i_regs) emit_jeq(0); emit_readword(&pcaddr, 0); emit_addimm(HOST_CCREG,2,HOST_CCREG); - emit_call(get_addr_ht); + emit_far_call(get_addr_ht); emit_jmpreg(0); set_jump_target(jaddr, out); } @@ -3139,7 +3198,7 @@ static void do_cop1stub(int n) if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); emit_movimm(start+(i-ds)*4,EAX); // Get PC emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... - emit_jmp(ds?fp_exception_ds:fp_exception); + emit_far_jump(ds?fp_exception_ds:fp_exception); } static void cop2_get_dreg(u_int copr,signed char tl,signed char temp) @@ -3396,7 +3455,7 @@ static void do_unalignedwritestub(int n) if(cc<0) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2); - emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr)); + emit_far_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr)); emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); @@ -3490,8 +3549,8 @@ static void call_c_cpu_handler(int i, const struct regstat *i_regs, u_int pc, vo emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX emit_add(2,HOST_CCREG,2); emit_writeword(2,&psxRegs.cycle); - emit_call(func); - emit_jmp(jump_to_new_pc); + emit_far_call(func); + emit_far_jump(jump_to_new_pc); } static void syscall_assemble(int i,struct regstat *i_regs) @@ -4227,7 +4286,7 @@ static void drc_dbg_emit_do_cmp(int i) save_regs(reglist); emit_movimm(start+i*4,0); emit_writeword(0,&pcaddr); - emit_call(do_insn_cmp); + emit_far_call(do_insn_cmp); //emit_readword(&cycle,0); //emit_addimm(0,2,0); //emit_writeword(0,&cycle); @@ -4558,7 +4617,7 @@ static void do_ccstub(int n) // Update cycle count assert(branch_regs[i].regmap[HOST_CCREG]==CCREG||branch_regs[i].regmap[HOST_CCREG]==-1); if(stubs[n].a) emit_addimm(HOST_CCREG,CLOCK_ADJUST((signed int)stubs[n].a),HOST_CCREG); - emit_call(cc_interrupt); + emit_far_call(cc_interrupt); if(stubs[n].a) emit_addimm(HOST_CCREG,-CLOCK_ADJUST((signed int)stubs[n].a),HOST_CCREG); if(stubs[n].d==TAKEN) { if(internal_branch(ba[i])) @@ -6398,7 +6457,7 @@ static void new_dynarec_test(void) ((volatile u_int *)out)[0]++; // make cache dirty for (i = 0; i < ARRAY_SIZE(ret); i++) { - out = translation_cache; + out = ndrc->translation_cache; beginning = start_block(); emit_movimm(DRC_TEST_VAL + i, 0); // test emit_ret(); @@ -6412,7 +6471,7 @@ static void new_dynarec_test(void) SysPrintf("test passed.\n"); else SysPrintf("test failed, will likely crash soon (r=%08x %08x)\n", ret[0], ret[1]); - out = translation_cache; + out = ndrc->translation_cache; } // clear the state completely, instead of just marking @@ -6420,7 +6479,7 @@ static void new_dynarec_test(void) void new_dynarec_clear_full() { int n; - out = translation_cache; + out = ndrc->translation_cache; memset(invalid_code,1,sizeof(invalid_code)); memset(hash_table,0xff,sizeof(hash_table)); memset(mini_ht,-1,sizeof(mini_ht)); @@ -6442,30 +6501,24 @@ void new_dynarec_init() { SysPrintf("Init new dynarec\n"); - // allocate/prepare a buffer for translation cache - // see assem_arm.h for some explanation -#if defined(BASE_ADDR_FIXED) - if (mmap(translation_cache, 1 << TARGET_SIZE_2, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0) != translation_cache) { - SysPrintf("mmap() failed: %s\n", strerror(errno)); - SysPrintf("disable BASE_ADDR_FIXED and recompile\n"); - abort(); - } -#elif defined(BASE_ADDR_DYNAMIC) +#ifdef BASE_ADDR_DYNAMIC #ifdef VITA sceBlock = sceKernelAllocMemBlockForVM("code", 1 << TARGET_SIZE_2); if (sceBlock < 0) SysPrintf("sceKernelAllocMemBlockForVM failed\n"); - int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&translation_cache); + int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&ndrc); if (ret < 0) SysPrintf("sceKernelGetMemBlockBase failed\n"); #else - translation_cache = mmap (NULL, 1 << TARGET_SIZE_2, + uintptr_t desired_addr = 0; + #ifdef __ELF__ + extern char _end; + desired_addr = ((uintptr_t)&_end + 0xffffff) & ~0xffffffl; + #endif + ndrc = mmap((void *)desired_addr, sizeof(*ndrc), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (translation_cache == MAP_FAILED) { + if (ndrc == MAP_FAILED) { SysPrintf("mmap() failed: %s\n", strerror(errno)); abort(); } @@ -6473,11 +6526,12 @@ void new_dynarec_init() #else #ifndef NO_WRITE_EXEC // not all systems allow execute in data segment by default - if (mprotect(translation_cache, 1<translation_cache) + sizeof(ndrc->tramp.ops), + PROT_READ | PROT_WRITE | PROT_EXEC) != 0) SysPrintf("mprotect() failed: %s\n", strerror(errno)); #endif #endif - out = translation_cache; + out = ndrc->translation_cache; cycle_multiplier=200; new_dynarec_clear_full(); #ifdef HOST_IMM8 @@ -6496,12 +6550,12 @@ void new_dynarec_init() void new_dynarec_cleanup() { int n; -#if defined(BASE_ADDR_FIXED) || defined(BASE_ADDR_DYNAMIC) +#ifdef BASE_ADDR_DYNAMIC #ifdef VITA sceKernelFreeMemBlock(sceBlock); sceBlock = -1; #else - if (munmap(translation_cache, 1<>12]=0; emit_movimm(start,0); emit_writeword(0,&pcaddr); - emit_jmp(new_dyna_leave); + emit_far_jump(new_dyna_leave); literal_pool(0); end_block(beginning); ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning); @@ -8696,7 +8750,7 @@ int new_recompile_block(u_int addr) emit_cmp(0,1); #ifdef __aarch64__ emit_jeq(out + 4*2); - emit_jmp(new_dyna_leave); + emit_far_jump(new_dyna_leave); #else emit_jne(new_dyna_leave); #endif @@ -8968,8 +9022,8 @@ int new_recompile_block(u_int addr) // If we're within 256K of the end of the buffer, // start over from the beginning. (Is 256K enough?) - if (out > translation_cache+(1< ndrc->translation_cache + sizeof(ndrc->translation_cache) - MAX_OUTPUT_BLOCK_SIZE) + out = ndrc->translation_cache; // Trap writes to any of the pages we compiled for(i=start>>12;i<=(start+slen*4)>>12;i++) { @@ -8986,11 +9040,11 @@ int new_recompile_block(u_int addr) /* Pass 10 - Free memory by expiring oldest blocks */ - int end=(((out-translation_cache)>>(TARGET_SIZE_2-16))+16384)&65535; + int end=(((out-ndrc->translation_cache)>>(TARGET_SIZE_2-16))+16384)&65535; while(expirep!=end) { int shift=TARGET_SIZE_2-3; // Divide into 8 blocks - uintptr_t base=(uintptr_t)translation_cache+((expirep>>13)<translation_cache+((expirep>>13)<>11)&3) { From 919981d0bca7a0898133362a91405395678612e3 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 14 Nov 2021 01:20:38 +0200 Subject: [PATCH 063/597] drc: update cache flushing as of now the arm64 __clear_cache workaround is still needed --- libpcsxcore/new_dynarec/assem_arm.c | 43 ---------------- libpcsxcore/new_dynarec/assem_arm64.c | 72 +++++++++++++------------- libpcsxcore/new_dynarec/new_dynarec.c | 74 ++++++++++++++++++++------- libpcsxcore/new_dynarec/new_dynarec.h | 8 +-- 4 files changed, 95 insertions(+), 102 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 62038a208..f9333f2df 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -2348,49 +2348,6 @@ static void do_miniht_insert(u_int return_address,int rt,int temp) { #endif } -static void mark_clear_cache(void *target) -{ - u_long offset = (u_char *)target - translation_cache; - u_int mask = 1u << ((offset >> 12) & 31); - if (!(needs_clear_cache[offset >> 17] & mask)) { - char *start = (char *)((u_long)target & ~4095ul); - start_tcache_write(start, start + 4096); - needs_clear_cache[offset >> 17] |= mask; - } -} - -// Clearing the cache is rather slow on ARM Linux, so mark the areas -// that need to be cleared, and then only clear these areas once. -static void do_clear_cache() -{ - int i,j; - for (i=0;i<(1<<(TARGET_SIZE_2-17));i++) - { - u_int bitmap=needs_clear_cache[i]; - if(bitmap) { - u_char *start, *end; - for(j=0;j<32;j++) - { - if(bitmap&(1<>3][0]); } -static void mark_clear_cache(void *target) +static void clear_cache_arm64(char *start, char *end) { - u_long offset = (u_char *)target - translation_cache; - u_int mask = 1u << ((offset >> 12) & 31); - if (!(needs_clear_cache[offset >> 17] & mask)) { - char *start = (char *)((u_long)target & ~4095ul); - start_tcache_write(start, start + 4096); - needs_clear_cache[offset >> 17] |= mask; + // Don't rely on GCC's __clear_cache implementation, as it caches + // icache/dcache cache line sizes, that can vary between cores on + // big.LITTLE architectures. + uint64_t addr, ctr_el0; + static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff; + size_t isize, dsize; + + __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0)); + isize = 4 << ((ctr_el0 >> 0) & 0xf); + dsize = 4 << ((ctr_el0 >> 16) & 0xf); + + // use the global minimum cache line size + icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize; + dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize; + + /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is + not required for instruction to data coherence. */ + if ((ctr_el0 & (1 << 28)) == 0x0) { + addr = (uint64_t)start & ~(uint64_t)(dsize - 1); + for (; addr < (uint64_t)end; addr += dsize) + // use "civac" instead of "cvau", as this is the suggested workaround for + // Cortex-A53 errata 819472, 826319, 827319 and 824069. + __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory"); } -} + __asm__ volatile("dsb ish" : : : "memory"); -// Clearing the cache is rather slow on ARM Linux, so mark the areas -// that need to be cleared, and then only clear these areas once. -static void do_clear_cache() -{ - int i,j; - for (i=0;i<(1<<(TARGET_SIZE_2-17));i++) - { - u_int bitmap=needs_clear_cache[i]; - if(bitmap) { - u_char *start, *end; - for(j=0;j<32;j++) - { - if(bitmap&(1<translation_cache; + u_int mask = 1u << ((offset >> 12) & 31); + if (!(needs_clear_cache[offset >> 17] & mask)) { + char *start = (char *)((uintptr_t)target & ~4095l); + start_tcache_write(start, start + 4095); + needs_clear_cache[offset >> 17] |= mask; + } +} + +// Clearing the cache is rather slow on ARM Linux, so mark the areas +// that need to be cleared, and then only clear these areas once. +static void do_clear_cache(void) +{ + int i, j; + for (i = 0; i < (1<<(TARGET_SIZE_2-17)); i++) + { + u_int bitmap = needs_clear_cache[i]; + if (!bitmap) + continue; + for (j = 0; j < 32; j++) + { + u_char *start, *end; + if (!(bitmap & (1<translation_cache + i*131072 + j*4096; + end = start + 4095; + for (j++; j < 32; j++) { + if (!(bitmap & (1<addr,head->vaddr); void *host_addr=find_extjump_insn(head->addr); - #if defined(__arm__) || defined(__aarch64__) - mark_clear_cache(host_addr); - #endif + mark_clear_cache(host_addr); set_jump_target(host_addr, head->addr); } head=head->next; @@ -1082,9 +1125,7 @@ static void invalidate_page(u_int page) while(head!=NULL) { inv_debug("INVALIDATE: kill pointer to %x (%p)\n",head->vaddr,head->addr); void *host_addr=find_extjump_insn(head->addr); - #if defined(__arm__) || defined(__aarch64__) - mark_clear_cache(host_addr); - #endif + mark_clear_cache(host_addr); set_jump_target(host_addr, head->addr); next=head->next; free(head); @@ -1107,9 +1148,7 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) for(first=page+1;firsttranslation_cache; @@ -6497,7 +6537,7 @@ void new_dynarec_clear_full() for(n=0;n<4096;n++) ll_clear(jump_dirty+n); } -void new_dynarec_init() +void new_dynarec_init(void) { SysPrintf("Init new dynarec\n"); @@ -6547,7 +6587,7 @@ void new_dynarec_init() SysPrintf("warning: RAM is not directly mapped, performance will suffer\n"); } -void new_dynarec_cleanup() +void new_dynarec_cleanup(void) { int n; #ifdef BASE_ADDR_DYNAMIC @@ -9082,10 +9122,8 @@ int new_recompile_block(u_int addr) break; case 3: // Clear jump_out - #if defined(__arm__) || defined(__aarch64__) if((expirep&2047)==0) do_clear_cache(); - #endif ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift); ll_remove_matching_addrs(jump_out+2048+(expirep&2047),base,shift); break; diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index 1bec5e1d5..a19bff0b7 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -11,12 +11,12 @@ extern int cycle_multiplier; // 100 for 1.0 #define NDHACK_GTE_NO_FLAGS (1<<2) extern int new_dynarec_hacks; -void new_dynarec_init(); -void new_dynarec_cleanup(); -void new_dynarec_clear_full(); +void new_dynarec_init(void); +void new_dynarec_cleanup(void); +void new_dynarec_clear_full(void); void new_dyna_start(void *context); int new_dynarec_save_blocks(void *save, int size); void new_dynarec_load_blocks(const void *save, int size); -void invalidate_all_pages(); +void invalidate_all_pages(void); void invalidate_block(unsigned int block); From 55a2d0dc822079287304932ad20f276f03735cd0 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 14 Nov 2021 02:28:50 +0200 Subject: [PATCH 064/597] enable arm64 dynarec --- configure | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configure b/configure index b053a7a88..37a5294b5 100755 --- a/configure +++ b/configure @@ -287,6 +287,8 @@ arm*) echo " CFLAGS=-march=armv7-a ./configure ..." fi ;; +aarch64) + ;; *) # dynarec only available on ARM enable_dynarec="no" From 5d53bb85b964495d5aa08f142abfad980f14f2d1 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 14 Nov 2021 22:23:12 +0200 Subject: [PATCH 065/597] try to add armhf and arm64 ci build --- .github/extract-foreign-all.sh | 37 ++++++++++++++++++++++++++++ .github/extract-foreign.sh | 11 +++++++++ .github/workflows/ci-linux-arm64.yml | 22 +++++++++++++++++ .github/workflows/ci-linux-armhf.yml | 22 +++++++++++++++++ .github/workflows/ci-linux.yml | 2 +- configure | 1 + 6 files changed, 94 insertions(+), 1 deletion(-) create mode 100755 .github/extract-foreign-all.sh create mode 100755 .github/extract-foreign.sh create mode 100644 .github/workflows/ci-linux-arm64.yml create mode 100644 .github/workflows/ci-linux-armhf.yml diff --git a/.github/extract-foreign-all.sh b/.github/extract-foreign-all.sh new file mode 100755 index 000000000..1296ce634 --- /dev/null +++ b/.github/extract-foreign-all.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +ARCH_="$1" +.github/extract-foreign.sh "zlib1g-dev:${ARCH_}" +.github/extract-foreign.sh "zlib1g:${ARCH_}" +.github/extract-foreign.sh "libpng-dev:${ARCH_}" +.github/extract-foreign.sh "libpng[0-9]*:${ARCH_}" +.github/extract-foreign.sh "libsdl1.2-dev:${ARCH_}" +.github/extract-foreign.sh "libsdl1.2debian:${ARCH_}" +# endless libsdl deps +.github/extract-foreign.sh "libasound[2-9]:${ARCH_}" +.github/extract-foreign.sh "libpulse[0-9]:${ARCH_}" +.github/extract-foreign.sh "libx11-[6-9]:${ARCH_}" +.github/extract-foreign.sh "libx11-dev:${ARCH_}" +.github/extract-foreign.sh "x11proto-dev" +.github/extract-foreign.sh "libxext[6-9]:${ARCH_}" +.github/extract-foreign.sh "libxcb1:${ARCH_}" +.github/extract-foreign.sh "libxau[6-9]:${ARCH_}" +.github/extract-foreign.sh "libxdmcp[6-9]:${ARCH_}" +.github/extract-foreign.sh "libcaca[0-9]:${ARCH_}" +.github/extract-foreign.sh "libdbus-[0-9]*:${ARCH_}" +.github/extract-foreign.sh "libslang[2-9]:${ARCH_}" +.github/extract-foreign.sh "libncursesw[6-9]:${ARCH_}" +.github/extract-foreign.sh "libtinfo[6-9]:${ARCH_}" +.github/extract-foreign.sh "libsystemd[0-9]:${ARCH_}" +.github/extract-foreign.sh "libwrap[0-9]:${ARCH_}" +.github/extract-foreign.sh "libsndfile[0-9]:${ARCH_}" +.github/extract-foreign.sh "libasyncns[0-9]:${ARCH_}" +.github/extract-foreign.sh "libbsd[0-9]:${ARCH_}" +.github/extract-foreign.sh "liblzma[0-9]:${ARCH_}" +.github/extract-foreign.sh "liblz[4-9]-*:${ARCH_}" +.github/extract-foreign.sh "libgcrypt[0-9]*:${ARCH_}" +.github/extract-foreign.sh "libflac[0-9]:${ARCH_}" +.github/extract-foreign.sh "libogg[0-9]:${ARCH_}" +.github/extract-foreign.sh "libvorbis[0-9]*:${ARCH_}" +.github/extract-foreign.sh "libvorbisenc[2-9]:${ARCH_}" +.github/extract-foreign.sh "libgpg-error[0-9]:${ARCH_}" diff --git a/.github/extract-foreign.sh b/.github/extract-foreign.sh new file mode 100755 index 000000000..69aefa448 --- /dev/null +++ b/.github/extract-foreign.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +url_base="http://ports.ubuntu.com/ubuntu-ports/" +paths=`apt-cache show "$@" | grep '^Filename:' | awk '{print $2}'` +for p in $paths; do + base=`basename $p` + wget -nv "${url_base}${p}" + echo "exctacting $base" + dpkg-deb -x "$base" . +done diff --git a/.github/workflows/ci-linux-arm64.yml b/.github/workflows/ci-linux-arm64.yml new file mode 100644 index 000000000..de2f84cee --- /dev/null +++ b/.github/workflows/ci-linux-arm64.yml @@ -0,0 +1,22 @@ +name: CI (Linux arm64) +on: [push, pull_request] +jobs: + build-linux: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - name: Install dependencies + run: | + whoami + sudo bash -c 'echo ''deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports focal main restricted universe'' > /etc/apt/sources.list.d/arm64.list' + sudo sed -i 's/^deb \([^[]\)/deb [arch=amd64] \1/' /etc/apt/sources.list + sudo dpkg --add-architecture arm64 + sudo apt-get update -qq + sudo apt-get install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu + .github/extract-foreign-all.sh arm64 + - name: configure + run: DUMP_CONFIG_LOG=1 CROSS_COMPILE=aarch64-linux-gnu- PATH=$PATH:usr/bin CFLAGS='-Iusr/include/ -Iusr/include/SDL' LDFLAGS='-Lusr/lib/aarch64-linux-gnu/ -Llib/aarch64-linux-gnu/ -Wl,-rpath-link=lib/aarch64-linux-gnu/,-rpath-link=usr/lib/aarch64-linux-gnu/,-rpath-link=usr/lib/aarch64-linux-gnu/pulseaudio/' ./configure + - name: make + run: make diff --git a/.github/workflows/ci-linux-armhf.yml b/.github/workflows/ci-linux-armhf.yml new file mode 100644 index 000000000..0842bf36d --- /dev/null +++ b/.github/workflows/ci-linux-armhf.yml @@ -0,0 +1,22 @@ +name: CI (Linux armhf) +on: [push, pull_request] +jobs: + build-linux: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - name: Install dependencies + run: | + whoami + sudo bash -c 'echo ''deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports focal main restricted universe'' > /etc/apt/sources.list.d/armhf.list' + sudo sed -i 's/^deb \([^[]\)/deb [arch=amd64] \1/' /etc/apt/sources.list + sudo dpkg --add-architecture armhf + sudo apt-get update -qq + sudo apt-get install -y gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf + .github/extract-foreign-all.sh armhf + - name: configure + run: DUMP_CONFIG_LOG=1 CROSS_COMPILE=arm-linux-gnueabihf- PATH=$PATH:usr/bin CFLAGS='-Iusr/include/ -Iusr/include/SDL' LDFLAGS='-Lusr/lib/arm-linux-gnueabihf/ -Llib/arm-linux-gnueabihf/ -Wl,-rpath-link=lib/arm-linux-gnueabihf/,-rpath-link=usr/lib/arm-linux-gnueabihf/,-rpath-link=usr/lib/arm-linux-gnueabihf/pulseaudio/' ./configure + - name: make + run: make diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml index 438b0552e..7ab7d3603 100644 --- a/.github/workflows/ci-linux.yml +++ b/.github/workflows/ci-linux.yml @@ -12,6 +12,6 @@ jobs: sudo apt-get update -qq sudo apt-get install -y libsdl1.2-dev libasound2-dev libpng-dev libz-dev - name: configure - run: ./configure + run: DUMP_CONFIG_LOG=1 ./configure - name: make run: make diff --git a/configure b/configure index 37a5294b5..16f510c7e 100755 --- a/configure +++ b/configure @@ -77,6 +77,7 @@ config_mak="config.mak" fail() { echo "$@" + if test -n "$DUMP_CONFIG_LOG"; then cat config.log; fi exit 1 } From 608e6741ebc7861051e78f2aa5e500f4729573cc Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 14 Nov 2021 23:39:22 +0200 Subject: [PATCH 066/597] update libpicofe --- frontend/libpicofe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/libpicofe b/frontend/libpicofe index c668921a4..a8b4c53d7 160000 --- a/frontend/libpicofe +++ b/frontend/libpicofe @@ -1 +1 @@ -Subproject commit c668921a45b7a7f5f548d0e09836f143f56b4ae0 +Subproject commit a8b4c53d7795e4d448d88b0b8222549ede78622a From 06e425d77eb576e3d05ecc5ec0be51b00ca24f57 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 15 Nov 2021 00:23:06 +0200 Subject: [PATCH 067/597] drc: handle a corner case with SLT --- libpcsxcore/new_dynarec/new_dynarec.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index ef9bec7b8..ceba0e744 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -2157,10 +2157,11 @@ static void alu_assemble(int i,struct regstat *i_regs) s2l=get_reg(i_regs->regmap,rs2[i]); if(rs2[i]==0) // rx=0); - if(opcode2[i]==0x2a) // SLT + if(opcode2[i]==0x2a&&rs1[i]!=0) { // SLT + assert(s1l>=0); emit_shrimm(s1l,31,t); - else // SLTU (unsigned can not be less than zero) + } + else // SLTU (unsigned can not be less than zero, 0<0) emit_zeroreg(t); } else if(rs1[i]==0) // r0 Date: Mon, 15 Nov 2021 21:09:47 +0200 Subject: [PATCH 068/597] drc: adjust constants, 32bit is enough --- libpcsxcore/new_dynarec/new_dynarec.c | 47 +++++++++++++------ libpcsxcore/new_dynarec/patches/trace_drc_chk | 37 ++++++++------- libpcsxcore/new_dynarec/patches/trace_intr | 28 +++++++---- 3 files changed, 72 insertions(+), 40 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index ceba0e744..a1d7f6a4d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -185,8 +185,10 @@ struct link_entry static uint64_t unneeded_reg[MAXBLOCK]; static uint64_t branch_unneeded_reg[MAXBLOCK]; static signed char regmap_pre[MAXBLOCK][HOST_REGS]; // pre-instruction i? - static uint64_t current_constmap[HOST_REGS]; - static uint64_t constmap[MAXBLOCK][HOST_REGS]; + // contains 'real' consts at [i] insn, but may differ from what's actually + // loaded in host reg as 'final' value is always loaded, see get_final_value() + static uint32_t current_constmap[HOST_REGS]; + static uint32_t constmap[MAXBLOCK][HOST_REGS]; static struct regstat regs[MAXBLOCK]; static struct regstat branch_regs[MAXBLOCK]; static signed char minimum_free_regs[MAXBLOCK]; @@ -592,7 +594,7 @@ void dirty_reg(struct regstat *cur,signed char reg) } } -void set_const(struct regstat *cur,signed char reg,uint64_t value) +static void set_const(struct regstat *cur, signed char reg, uint32_t value) { int hr; if(!reg) return; @@ -604,7 +606,7 @@ void set_const(struct regstat *cur,signed char reg,uint64_t value) } } -void clear_const(struct regstat *cur,signed char reg) +static void clear_const(struct regstat *cur, signed char reg) { int hr; if(!reg) return; @@ -615,7 +617,7 @@ void clear_const(struct regstat *cur,signed char reg) } } -int is_const(struct regstat *cur,signed char reg) +static int is_const(struct regstat *cur, signed char reg) { int hr; if(reg<0) return 0; @@ -627,7 +629,8 @@ int is_const(struct regstat *cur,signed char reg) } return 0; } -uint64_t get_const(struct regstat *cur,signed char reg) + +static uint32_t get_const(struct regstat *cur, signed char reg) { int hr; if(!reg) return 0; @@ -1717,7 +1720,7 @@ static void imm16_alloc(struct regstat *current,int i) else clear_const(current,rt1[i]); } else { - set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI + set_const(current,rt1[i],imm[i]<<16); // LUI } dirty_reg(current,rt1[i]); } @@ -4322,9 +4325,24 @@ static void drc_dbg_emit_do_cmp(int i) //extern int cycle; u_int hr,reglist=0; - for(hr=0;hr=0) reglist|=1< 0 && !bt[i]) { + for (hr = 0; hr < HOST_REGS; hr++) { + int reg = regs[i-1].regmap[hr]; + if (hr == EXCLUDE_REG || reg < 0) + continue; + if (!((regs[i-1].isconst >> hr) & 1)) + continue; + if (i > 1 && reg == regs[i-2].regmap[hr] && constmap[i-1][hr] == constmap[i-2][hr]) + continue; + emit_movimm(constmap[i-1][hr],0); + emit_storereg(reg, 0); + } + } emit_movimm(start+i*4,0); emit_writeword(0,&pcaddr); emit_far_call(do_insn_cmp); @@ -4333,6 +4351,7 @@ static void drc_dbg_emit_do_cmp(int i) //emit_writeword(0,&cycle); (void)get_reg2; restore_regs(reglist); + assem_debug("\\\\do_insn_cmp\n"); } #else #define drc_dbg_emit_do_cmp(x) @@ -7697,7 +7716,7 @@ int new_recompile_block(u_int addr) dirty_reg(&branch_regs[i-1],31); } memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); break; case RJUMP: memcpy(&branch_regs[i-1],¤t,sizeof(current)); @@ -7718,7 +7737,7 @@ int new_recompile_block(u_int addr) } #endif memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); break; case CJUMP: if((opcode[i-1]&0x3E)==4) // BEQ/BNE @@ -7745,7 +7764,7 @@ int new_recompile_block(u_int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); } else if((opcode[i-1]&0x3E)==6) // BLEZ/BGTZ @@ -7770,7 +7789,7 @@ int new_recompile_block(u_int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); } else // Alloc the delay slot in case the branch is taken @@ -7824,7 +7843,7 @@ int new_recompile_block(u_int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); } else // Alloc the delay slot in case the branch is taken @@ -7921,7 +7940,7 @@ int new_recompile_block(u_int addr) if(!is_ds[i]) { regs[i].dirty=current.dirty; regs[i].isconst=current.isconst; - memcpy(constmap[i],current_constmap,sizeof(current_constmap)); + memcpy(constmap[i],current_constmap,sizeof(constmap[i])); } for(hr=0;hr=0) { diff --git a/libpcsxcore/new_dynarec/patches/trace_drc_chk b/libpcsxcore/new_dynarec/patches/trace_drc_chk index e09af7ac5..eca104d6b 100644 --- a/libpcsxcore/new_dynarec/patches/trace_drc_chk +++ b/libpcsxcore/new_dynarec/patches/trace_drc_chk @@ -48,7 +48,7 @@ index bbc52c3..83c5b08 100644 ldr r0, [fp, #LO_next_interupt] diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S -index 698bd78..798abea 100644 +index 444545c..031cee2 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -119,7 +119,7 @@ FUNCTION(cc_interrupt): @@ -79,7 +79,7 @@ index 698bd78..798abea 100644 .macro memhandler_post diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c -index 1452db3..8200e44 100644 +index a1d7f6a..3960f3b 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -44,10 +44,10 @@ static int sceBlock; @@ -96,7 +96,7 @@ index 1452db3..8200e44 100644 #define inv_debug(...) #ifdef __i386__ -@@ -423,6 +423,9 @@ static int doesnt_expire_soon(void *tcaddr) +@@ -489,6 +489,9 @@ static int doesnt_expire_soon(void *tcaddr) // This is called from the recompiled JR/JALR instructions void noinline *get_addr(u_int vaddr) { @@ -106,7 +106,7 @@ index 1452db3..8200e44 100644 u_int page=get_page(vaddr); u_int vpage=get_vpage(vaddr); struct ll_entry *head; -@@ -4393,13 +4396,15 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) +@@ -4485,13 +4492,15 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) } emit_addimm_and_set_flags(cycles,HOST_CCREG); jaddr=out; @@ -124,7 +124,7 @@ index 1452db3..8200e44 100644 } add_stub(CC_STUB,jaddr,idle?idle:out,(*adj==0||invert||idle)?0:(count+2),i,addr,taken,0); } -@@ -4807,7 +4812,8 @@ static void rjump_assemble(int i,struct regstat *i_regs) +@@ -4899,7 +4908,8 @@ static void rjump_assemble(int i,struct regstat *i_regs) // special case for RFE emit_jmp(0); else @@ -134,7 +134,7 @@ index 1452db3..8200e44 100644 //load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,-1); #ifdef USE_MINI_HT if(rs1[i]==31) { -@@ -4912,7 +4918,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) +@@ -5004,7 +5014,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) else if(nop) { emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); void *jaddr=out; @@ -144,7 +144,7 @@ index 1452db3..8200e44 100644 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); } else { -@@ -5099,7 +5106,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) +@@ -5191,7 +5202,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) emit_loadreg(CCREG,HOST_CCREG); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); void *jaddr=out; @@ -154,7 +154,7 @@ index 1452db3..8200e44 100644 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); emit_storereg(CCREG,HOST_CCREG); } -@@ -5108,7 +5116,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) +@@ -5200,7 +5212,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) assert(cc==HOST_CCREG); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); void *jaddr=out; @@ -164,7 +164,7 @@ index 1452db3..8200e44 100644 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); } } -@@ -5210,7 +5219,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) +@@ -5302,7 +5315,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) else if(nevertaken) { emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); void *jaddr=out; @@ -174,7 +174,7 @@ index 1452db3..8200e44 100644 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); } else { -@@ -5366,7 +5376,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) +@@ -5458,7 +5472,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) emit_loadreg(CCREG,HOST_CCREG); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); void *jaddr=out; @@ -184,7 +184,7 @@ index 1452db3..8200e44 100644 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); emit_storereg(CCREG,HOST_CCREG); } -@@ -5375,7 +5386,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) +@@ -5467,7 +5482,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) assert(cc==HOST_CCREG); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); void *jaddr=out; @@ -194,7 +194,7 @@ index 1452db3..8200e44 100644 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); } } -@@ -5863,7 +5875,7 @@ void unneeded_registers(int istart,int iend,int r) +@@ -5955,7 +5971,7 @@ void unneeded_registers(int istart,int iend,int r) // R0 is always unneeded u|=1; // Save it @@ -203,7 +203,7 @@ index 1452db3..8200e44 100644 gte_unneeded[i]=gte_u; /* printf("ur (%d,%d) %x: ",istart,iend,start+i*4); -@@ -8387,6 +8399,7 @@ int new_recompile_block(int addr) +@@ -8474,6 +8491,7 @@ int new_recompile_block(u_int addr) // This allocates registers (if possible) one instruction prior // to use, which can avoid a load-use penalty on certain CPUs. @@ -211,7 +211,7 @@ index 1452db3..8200e44 100644 for(i=0;i>16)==0x1000) literal_pool(1024); else -@@ -8950,7 +8969,7 @@ int new_recompile_block(int addr) +@@ -9037,7 +9062,7 @@ int new_recompile_block(u_int addr) } } // External Branch Targets (jump_in) @@ -240,7 +241,7 @@ index 1452db3..8200e44 100644 for(i=0;i> 26; switch (tmp) { -@@ -547,13 +548,15 @@ static void doBranch(u32 tar) { +@@ -546,13 +547,15 @@ static void doBranch(u32 tar) { } break; } @@ -111,7 +123,7 @@ index 02e00a9..a007dc5 100644 } /********************************************************* -@@ -636,12 +639,13 @@ void psxMULTU() { +@@ -635,12 +638,13 @@ void psxMULTU() { psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff); } @@ -127,7 +139,7 @@ index 02e00a9..a007dc5 100644 void psxBGEZ() { RepZBranchi32(>=) } // Branch if Rs >= 0 void psxBGEZAL() { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link -@@ -711,7 +715,7 @@ void psxRFE() { +@@ -710,7 +714,7 @@ void psxRFE() { * Register branch logic * * Format: OP rs, rt, offset * *********************************************************/ @@ -136,7 +148,7 @@ index 02e00a9..a007dc5 100644 void psxBEQ() { RepBranchi32(==) } // Branch if Rs == Rt void psxBNE() { RepBranchi32(!=) } // Branch if Rs != Rt -@@ -895,6 +899,9 @@ void MTC0(int reg, u32 val) { +@@ -894,6 +898,9 @@ void MTC0(int reg, u32 val) { case 12: // Status psxRegs.CP0.r[12] = val; psxTestSWInts(); @@ -146,7 +158,7 @@ index 02e00a9..a007dc5 100644 break; case 13: // Cause -@@ -1057,6 +1064,23 @@ void intExecuteBlock() { +@@ -1056,6 +1063,23 @@ void intExecuteBlock() { while (!branch2) execI(); } From bb4f300c387365b819531cdec63ef17473f37817 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 16 Nov 2021 02:54:03 +0200 Subject: [PATCH 069/597] drc: adjust timing to be closer to the interpreter --- libpcsxcore/new_dynarec/assem_arm.c | 16 ++++++++-------- libpcsxcore/new_dynarec/assem_arm64.c | 14 +++++++------- libpcsxcore/new_dynarec/new_dynarec.c | 2 ++ 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index f9333f2df..a4d418fd2 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -1736,7 +1736,7 @@ static void do_readstub(int n) int cc=get_reg(i_regmap,CCREG); if(cc<0) emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2); emit_far_call(handler); if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { mov_loadtype_adj(type,0,rt); @@ -1757,7 +1757,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, signed char uintptr_t host_addr = 0; void *handler; int cc=get_reg(regmap,CCREG); - if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt)) + if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt)) return; handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr); if (handler == NULL) { @@ -1797,11 +1797,11 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, signed char emit_loadreg(CCREG,2); if(is_dynamic) { emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2); } else { emit_readword(&last_count,3); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2); emit_add(2,3,2); emit_writeword(2,&Count); } @@ -1890,10 +1890,10 @@ static void do_writestub(int n) int cc=get_reg(i_regmap,CCREG); if(cc<0) emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2); // returns new cycle_count emit_far_call(handler); - emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc); + emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); if(restore_jump) @@ -1928,11 +1928,11 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, signed char int cc=get_reg(regmap,CCREG); if(cc<0) emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2); emit_movimm((u_int)handler,3); // returns new cycle_count emit_far_call(jump_handler_write_h); - emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc); + emit_addimm(0,-CLOCK_ADJUST(adj),cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); restore_regs(reglist); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index ef87b293c..6380b2723 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1436,7 +1436,7 @@ static void do_readstub(int n) int cc=get_reg(i_regmap,CCREG); if(cc<0) emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2); emit_far_call(handler); // (no cycle reload after read) if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { @@ -1458,7 +1458,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, signed char uintptr_t host_addr = 0; void *handler; int cc=get_reg(regmap,CCREG); - //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt)) + //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt)) // return; handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr); if (handler == NULL) { @@ -1499,7 +1499,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, signed char emit_mov(rs,0); if(cc<0) emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2); if(is_dynamic) { uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1; emit_adrp((void *)l1, 1); @@ -1595,10 +1595,10 @@ static void do_writestub(int n) int cc=get_reg(i_regmap,CCREG); if(cc<0) emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2); // returns new cycle_count emit_far_call(handler); - emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc); + emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); if(restore_jump) @@ -1638,12 +1638,12 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, signed char cc = cc_use = get_reg(regmap, CCREG); if (cc < 0) emit_loadreg(CCREG, (cc_use = 2)); - emit_addimm(cc_use, CLOCK_ADJUST(adj+1), 2); + emit_addimm(cc_use, CLOCK_ADJUST(adj), 2); emit_far_call(do_memhandler_pre); emit_far_call(handler); emit_far_call(do_memhandler_post); - emit_addimm(0, -CLOCK_ADJUST(adj+1), cc_use); + emit_addimm(0, -CLOCK_ADJUST(adj), cc_use); if (cc < 0) emit_storereg(CCREG, cc_use); restore_regs(reglist); diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index a1d7f6a4d..4ec3c53ce 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -4478,11 +4478,13 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) else if(*adj==0||invert) { int cycles=CLOCK_ADJUST(count+2); // faster loop HACK +#if 0 if (t&&*adj) { int rel=t-i; if(-NO_CYCLE_PENALTY_THR Date: Wed, 17 Nov 2021 01:35:43 +0200 Subject: [PATCH 070/597] drc: add a timing hack for Internal Section --- libpcsxcore/database.c | 12 +++++++++++- libpcsxcore/new_dynarec/new_dynarec.c | 20 ++++++++++++++++---- libpcsxcore/new_dynarec/new_dynarec.h | 2 ++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index f383e3616..ac19d5760 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -1,6 +1,6 @@ #include "misc.h" -#include "../plugins/dfsound/spu_config.h" #include "sio.h" +#include "new_dynarec/new_dynarec.h" /* It's duplicated from emu_if.c */ #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) @@ -33,4 +33,14 @@ void Apply_Hacks_Cdrom() McdDisable[1] = 1; } } + + /* Dynarec game-specific hacks */ + new_dynarec_hacks &= ~NDHACK_OVERRIDE_CYCLE_M; + + /* Internal Section is fussy about timings */ + if (strcmp(CdromId, "SLPS01868") == 0) + { + cycle_multiplier_override = 200; + new_dynarec_hacks |= NDHACK_OVERRIDE_CYCLE_M; + } } diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 4ec3c53ce..4e09592d0 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -442,11 +442,14 @@ static void do_clear_cache(void) #define NO_CYCLE_PENALTY_THR 12 int cycle_multiplier; // 100 for 1.0 +int cycle_multiplier_override; static int CLOCK_ADJUST(int x) { + int m = cycle_multiplier_override + ? cycle_multiplier_override : cycle_multiplier; int s=(x>>31)|1; - return (x * cycle_multiplier + s * 50) / 100; + return (x * m + s * 50) / 100; } static u_int get_page(u_int vaddr) @@ -6631,16 +6634,25 @@ void new_dynarec_cleanup(void) static u_int *get_source_start(u_int addr, u_int *limit) { + if (!(new_dynarec_hacks & NDHACK_OVERRIDE_CYCLE_M)) + cycle_multiplier_override = 0; + if (addr < 0x00200000 || - (0xa0000000 <= addr && addr < 0xa0200000)) { + (0xa0000000 <= addr && addr < 0xa0200000)) + { // used for BIOS calls mostly? *limit = (addr&0xa0000000)|0x00200000; return (u_int *)(rdram + (addr&0x1fffff)); } else if (!Config.HLE && ( /* (0x9fc00000 <= addr && addr < 0x9fc80000) ||*/ - (0xbfc00000 <= addr && addr < 0xbfc80000))) { - // BIOS + (0xbfc00000 <= addr && addr < 0xbfc80000))) + { + // BIOS. The multiplier should be much higher as it's uncached 8bit mem, + // but timings in PCSX are too tied to the interpreter's BIAS + if (!(new_dynarec_hacks & NDHACK_OVERRIDE_CYCLE_M)) + cycle_multiplier_override = 200; + *limit = (addr & 0xfff00000) | 0x80000; return (u_int *)((u_char *)psxR + (addr&0x7ffff)); } diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index a19bff0b7..bab337938 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -5,10 +5,12 @@ extern int pending_exception; extern int stop; extern int new_dynarec_did_compile; extern int cycle_multiplier; // 100 for 1.0 +extern int cycle_multiplier_override; #define NDHACK_NO_SMC_CHECK (1<<0) #define NDHACK_GTE_UNNEEDED (1<<1) #define NDHACK_GTE_NO_FLAGS (1<<2) +#define NDHACK_OVERRIDE_CYCLE_M (1<<3) extern int new_dynarec_hacks; void new_dynarec_init(void); From 17ce04ccbdb6f29bdfebfeb0636cb06e65a4244d Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 17 Nov 2021 01:48:45 +0200 Subject: [PATCH 071/597] fix x86 build forgot a dummy variable, whoops --- libpcsxcore/new_dynarec/emu_if.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 2df259b5c..65bb3f101 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -438,6 +438,7 @@ int pending_exception, stop; unsigned int next_interupt; int new_dynarec_did_compile; int cycle_multiplier; +int cycle_multiplier_override; int new_dynarec_hacks; void *psxH_ptr; void *zeromem_ptr; From e43c9382b0791e28aa2fedc19f502a8e3e9a7dbb Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 19 Nov 2021 00:33:04 +0200 Subject: [PATCH 072/597] fix some crashes when loading bad state --- libpcsxcore/psxcounters.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index bd0f09b99..b2cc07b2f 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -502,13 +502,16 @@ s32 psxRcntFreeze( void *f, s32 Mode ) if (Mode == 0) { // don't trust things from a savestate + rcnts[3].rate = 1; for( i = 0; i < CounterQuantity; ++i ) { _psxRcntWmode( i, rcnts[i].mode ); count = (psxRegs.cycle - rcnts[i].cycleStart) / rcnts[i].rate; _psxRcntWcount( i, count ); } - hsync_steps = (psxRegs.cycle - rcnts[3].cycleStart) / rcnts[3].target; + hsync_steps = 0; + if (rcnts[3].target) + hsync_steps = (psxRegs.cycle - rcnts[3].cycleStart) / rcnts[3].target; psxRcntSet(); base_cycle = 0; From d62c125afc816c30a81f38e7dce75e80940c11e1 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 19 Nov 2021 00:39:26 +0200 Subject: [PATCH 073/597] drc: use a separate var for game hacks because config save/load can clear the main var --- libpcsxcore/database.c | 4 ++-- libpcsxcore/new_dynarec/assem_arm.c | 2 +- libpcsxcore/new_dynarec/assem_arm64.c | 2 +- libpcsxcore/new_dynarec/emu_if.c | 1 + libpcsxcore/new_dynarec/new_dynarec.c | 15 +++++++++------ libpcsxcore/new_dynarec/new_dynarec.h | 1 + 6 files changed, 15 insertions(+), 10 deletions(-) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index ac19d5760..108ccc695 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -35,12 +35,12 @@ void Apply_Hacks_Cdrom() } /* Dynarec game-specific hacks */ - new_dynarec_hacks &= ~NDHACK_OVERRIDE_CYCLE_M; + new_dynarec_hacks_pergame = 0; /* Internal Section is fussy about timings */ if (strcmp(CdromId, "SLPS01868") == 0) { cycle_multiplier_override = 200; - new_dynarec_hacks |= NDHACK_OVERRIDE_CYCLE_M; + new_dynarec_hacks_pergame |= NDHACK_OVERRIDE_CYCLE_M; } } diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index a4d418fd2..9fe13a131 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -1585,7 +1585,7 @@ static void emit_extjump2(u_char *addr, u_int target, void *linker) emit_loadlp(target,0); emit_loadlp((u_int)addr,1); - assert(addr>=translation_cache&&addr<(translation_cache+(1<=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000)); //DEBUG > #ifdef DEBUG_CYCLE_COUNT diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 6380b2723..303dcf00c 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1812,7 +1812,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00; assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n", source[i],gte_unneeded[i+1],need_flags,need_ir); - if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS) + if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS)) need_flags=0; //int shift = (source[i] >> 19) & 1; //int lm = (source[i] >> 10) & 1; diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 65bb3f101..60f005fd7 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -439,6 +439,7 @@ unsigned int next_interupt; int new_dynarec_did_compile; int cycle_multiplier; int cycle_multiplier_override; +int new_dynarec_hacks_pergame; int new_dynarec_hacks; void *psxH_ptr; void *zeromem_ptr; diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 4e09592d0..c0ef579e3 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -216,8 +216,11 @@ struct link_entry #endif int new_dynarec_hacks; + int new_dynarec_hacks_pergame; int new_dynarec_did_compile; + #define HACK_ENABLED(x) ((new_dynarec_hacks | new_dynarec_hacks_pergame) & (x)) + extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 extern int last_count; // last absolute target, often = next_interupt extern int pcaddr; @@ -2932,7 +2935,7 @@ void store_assemble(int i,struct regstat *i_regs) add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj[i],reglist); jaddr=0; } - if(!(i_regs->waswritten&(1<waswritten&(1<waswritten&(1<waswritten&(1<regmap,INVCP); @@ -3407,7 +3410,7 @@ static void c2ls_assemble(int i,struct regstat *i_regs) if(jaddr2) add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj[i],reglist); if(opcode[i]==0x3a) // SWC2 - if(!(i_regs->waswritten&(1<waswritten&(1<regmap,INVCP); assert(ir>=0); @@ -5794,7 +5797,7 @@ void unneeded_registers(int istart,int iend,int r) uint64_t u,gte_u,b,gte_b; uint64_t temp_u,temp_gte_u=0; uint64_t gte_u_unknown=0; - if(new_dynarec_hacks&NDHACK_GTE_UNNEEDED) + if (HACK_ENABLED(NDHACK_GTE_UNNEEDED)) gte_u_unknown=~0ll; if(iend==slen-1) { u=1; @@ -6634,7 +6637,7 @@ void new_dynarec_cleanup(void) static u_int *get_source_start(u_int addr, u_int *limit) { - if (!(new_dynarec_hacks & NDHACK_OVERRIDE_CYCLE_M)) + if (!HACK_ENABLED(NDHACK_OVERRIDE_CYCLE_M)) cycle_multiplier_override = 0; if (addr < 0x00200000 || @@ -6650,7 +6653,7 @@ static u_int *get_source_start(u_int addr, u_int *limit) { // BIOS. The multiplier should be much higher as it's uncached 8bit mem, // but timings in PCSX are too tied to the interpreter's BIAS - if (!(new_dynarec_hacks & NDHACK_OVERRIDE_CYCLE_M)) + if (!HACK_ENABLED(NDHACK_OVERRIDE_CYCLE_M)) cycle_multiplier_override = 200; *limit = (addr & 0xfff00000) | 0x80000; diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index bab337938..bfb48838f 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -12,6 +12,7 @@ extern int cycle_multiplier_override; #define NDHACK_GTE_NO_FLAGS (1<<2) #define NDHACK_OVERRIDE_CYCLE_M (1<<3) extern int new_dynarec_hacks; +extern int new_dynarec_hacks_pergame; void new_dynarec_init(void); void new_dynarec_cleanup(void); From 2d3c4d02ae7877c8b28c3f8e7578dfc0f98bacd6 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 19 Nov 2021 23:36:28 +0200 Subject: [PATCH 074/597] frontend: force full recompile on drc setting change Libretro port should do the same, but here it outdated and nobody cared updating it here. --- frontend/main.c | 2 +- frontend/menu.c | 12 ++++++++++-- frontend/menu.h | 2 +- libpcsxcore/new_dynarec/emu_if.c | 3 ++- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/frontend/main.c b/frontend/main.c index 4631618e5..3ec252f0c 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -634,7 +634,7 @@ int main(int argc, char *argv[]) } if (ready_to_go) { - menu_prepare_emu(); + menu_prepare_emu(0); // If a state has been specified, then load that if (loadst) { diff --git a/frontend/menu.c b/frontend/menu.c index 0dbaa400f..f3049d92a 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1556,6 +1556,7 @@ static const char h_cfg_psxclk[] = "Over/under-clock the PSX, default is " DEFA static const char h_cfg_nosmc[] = "Will cause crashes when loading, break memcards"; static const char h_cfg_gteunn[] = "May cause graphical glitches"; static const char h_cfg_gteflgs[] = "Will cause graphical glitches"; +static const char h_cfg_gtestll[] = "Some games will run too fast"; static menu_entry e_menu_speed_hacks[] = { @@ -2329,8 +2330,11 @@ static void menu_leave_emu(void); void menu_loop(void) { + int cycle_multiplier_old = cycle_multiplier; + int ndrc_hacks_old = new_dynarec_hacks; static int warned_about_bios = 0; static int sel = 0; + int ndrc_changed; menu_leave_emu(); @@ -2365,7 +2369,9 @@ void menu_loop(void) in_set_config_int(0, IN_CFG_BLOCKING, 0); - menu_prepare_emu(); + ndrc_changed = cycle_multiplier_old != cycle_multiplier + || ndrc_hacks_old != new_dynarec_hacks; + menu_prepare_emu(ndrc_changed); } static int qsort_strcmp(const void *p1, const void *p2) @@ -2617,7 +2623,7 @@ static void menu_leave_emu(void) cpu_clock = plat_target_cpu_clock_get(); } -void menu_prepare_emu(void) +void menu_prepare_emu(int ndrc_config_changed) { R3000Acpu *prev_cpu = psxCpu; @@ -2634,6 +2640,8 @@ void menu_prepare_emu(void) // note that this does not really reset, just clears drc caches psxCpu->Reset(); } + else if (ndrc_config_changed) + new_dynarec_clear_full(); // core doesn't care about Config.Cdda changes, // so handle them manually here diff --git a/frontend/menu.h b/frontend/menu.h index 81cd1baf5..9defc1ea8 100644 --- a/frontend/menu.h +++ b/frontend/menu.h @@ -1,5 +1,5 @@ void menu_init(void); -void menu_prepare_emu(void); +void menu_prepare_emu(int ndrc_config_changed); void menu_loop(void); void menu_finish(void); diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 60f005fd7..f660e7f11 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -184,7 +184,8 @@ void new_dyna_freeze(void *f, int mode) if (bytes != size) return; - new_dynarec_load_blocks(addrs, size); + if (psxCpu != &psxInt) + new_dynarec_load_blocks(addrs, size); } //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded"); From 07cd0bc4d90b222cffc4299f15dd31c265d80a92 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 19 Nov 2021 23:33:27 +0200 Subject: [PATCH 075/597] drc: use helpers for jump checks most not converted, maybe another day --- libpcsxcore/new_dynarec/new_dynarec.c | 64 ++++++++++++++++----------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index c0ef579e3..964c07ba7 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -455,6 +455,18 @@ static int CLOCK_ADJUST(int x) return (x * m + s * 50) / 100; } +// is the op an unconditional jump? +static int is_ujump(int i) +{ + return itype[i] == UJUMP || itype[i] == RJUMP + || (source[i] >> 16) == 0x1000; // beq r0, r0, offset // b offset +} + +static int is_jump(int i) +{ + return itype[i] == RJUMP || itype[i] == UJUMP || itype[i] == CJUMP || itype[i] == SJUMP; +} + static u_int get_page(u_int vaddr) { u_int page=vaddr&~0xe0000000; @@ -662,7 +674,7 @@ void lsn(u_char hsn[], int i, int *preferred_reg) j=slen-i-1; break; } - if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000) + if (is_ujump(i+j)) { // Don't go past an unconditonal jump j++; @@ -710,7 +722,7 @@ void lsn(u_char hsn[], int i, int *preferred_reg) // TODO: preferred register based on backward branch } // Delay slot should preferably not overwrite branch conditions or cycle count - if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP)) { + if (i > 0 && is_jump(i-1)) { if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1; if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1; hsn[CCREG]=1; @@ -745,7 +757,7 @@ int needed_again(int r, int i) int b=-1; int rn=10; - if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) + if (i > 0 && is_ujump(i-1)) { if(ba[i-1]start+slen*4-4) return 0; // Don't need any registers if exiting the block @@ -756,7 +768,7 @@ int needed_again(int r, int i) j=slen-i-1; break; } - if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000) + if (is_ujump(i+j)) { // Don't go past an unconditonal jump j++; @@ -812,7 +824,7 @@ int loop_reg(int i, int r, int hr) j=slen-i-1; break; } - if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000) + if (is_ujump(i+j)) { // Don't go past an unconditonal jump j++; @@ -5848,7 +5860,7 @@ void unneeded_registers(int istart,int iend,int r) bt[(ba[i]-start)>>2]=1; if(ba[i]<=start+i*4) { // Backward branch - if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) + if(is_ujump(i)) { // Unconditional branch temp_u=1; @@ -5893,7 +5905,7 @@ void unneeded_registers(int istart,int iend,int r) gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown; } } /*else*/ if(1) { - if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) + if (is_ujump(i)) { // Unconditional branch u=unneeded_reg[(ba[i]-start)>>2]; @@ -6003,7 +6015,7 @@ void clean_registers(int istart,int iend,int wr) if(ba[i]=(start+slen*4)) { // Branch out of this block, flush all regs - if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) + if (is_ujump(i)) { // Unconditional branch will_dirty_i=0; @@ -6083,7 +6095,7 @@ void clean_registers(int istart,int iend,int wr) // Internal branch if(ba[i]<=start+i*4) { // Backward branch - if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) + if (is_ujump(i)) { // Unconditional branch temp_will_dirty=0; @@ -6180,7 +6192,7 @@ void clean_registers(int istart,int iend,int wr) } /*else*/ if(1) { - if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) + if (is_ujump(i)) { // Unconditional branch will_dirty_i=0; @@ -6347,7 +6359,7 @@ void clean_registers(int istart,int iend,int wr) regs[i].dirty&=wont_dirty_i; if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP) { - if(i>16)!=0x1000) { + if (i < iend-1 && !is_ujump(i)) { for(r=0;r>14); else ba[i]=-1; - if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP)) { + if (i > 0 && is_jump(i-1)) { int do_in_intrp=0; // branch in delay slot? if(type==RJUMP||type==UJUMP||type==CJUMP||type==SJUMP) { @@ -7256,7 +7268,7 @@ int new_recompile_block(u_int addr) bt[t+1]=1; // expected return from interpreter } else if(i>=2&&rt1[i-2]==2&&rt1[i]==2&&rs1[i]!=2&&rs2[i]!=2&&rs1[i-1]!=2&&rs2[i-1]!=2&& - !(i>=3&&(itype[i-3]==RJUMP||itype[i-3]==UJUMP||itype[i-3]==CJUMP||itype[i-3]==SJUMP))) { + !(i>=3&&is_jump(i-3))) { // v0 overwrite like this is a sign of trouble, bail out SysPrintf("v0 overwrite @%08x (%08x)\n", addr + i*4, addr); do_in_intrp=1; @@ -7272,7 +7284,7 @@ int new_recompile_block(u_int addr) } } /* Is this the end of the block? */ - if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) { + if (i > 0 && is_ujump(i-1)) { if(rt1[i-1]==0) { // Continue past subroutine call (JAL) done=2; } @@ -7884,7 +7896,7 @@ int new_recompile_block(u_int addr) break; } - if(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000) + if (is_ujump(i-1)) { if(rt1[i-1]==31) // JAL/JALR { @@ -7998,7 +8010,7 @@ int new_recompile_block(u_int addr) } } // Conditional branch may need registers for following instructions - if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000) + if (!is_ujump(i)) { if(i>16)!=0x1000) + if (!is_ujump(i)) { if(likely[i]) { regs[i].regmap[hr]=-1; @@ -8143,7 +8155,7 @@ int new_recompile_block(u_int addr) { branch_regs[i].regmap[hr]=-1; branch_regs[i].regmap_entry[hr]=-1; - if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000) + if (!is_ujump(i)) { if(!likely[i]&&i>16)!=0x1000) { + if (!is_ujump(i)) { regmap_pre[i+2][hr]=f_regmap[hr]; regs[i+2].wasdirty&=~(1<>16)!=0x1000) { + if (!is_ujump(k)) { regmap_pre[k+2][hr]=f_regmap[hr]; regs[k+2].wasdirty&=~(1<>16)==0x1000) + if (is_ujump(j)) { // Stop on unconditional branch break; @@ -8843,7 +8855,7 @@ int new_recompile_block(u_int addr) } else { speculate_register_values(i); #ifndef DESTRUCTIVE_WRITEBACK - if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000)) + if (i < 2 || !is_ujump(i-2)) { wb_valid(regmap_pre[i],regs[i].regmap_entry,dirty_pre,regs[i].wasdirty,unneeded_reg[i]); } @@ -8854,7 +8866,7 @@ int new_recompile_block(u_int addr) } #endif // write back - if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000)) + if (i < 2 || !is_ujump(i-2)) { wb_invalidate(regmap_pre[i],regs[i].regmap_entry,regs[i].wasdirty,unneeded_reg[i]); loop_preload(regmap_pre[i],regs[i].regmap_entry); @@ -8946,17 +8958,17 @@ int new_recompile_block(u_int addr) case SPAN: pagespan_assemble(i,®s[i]);break; } - if(itype[i]==UJUMP||itype[i]==RJUMP||(source[i]>>16)==0x1000) + if (is_ujump(i)) literal_pool(1024); else literal_pool_jumpover(256); } } - //assert(itype[i-2]==UJUMP||itype[i-2]==RJUMP||(source[i-2]>>16)==0x1000); + //assert(is_ujump(i-2)); // If the block did not end with an unconditional branch, // add a jump to the next instruction. if(i>1) { - if(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000&&itype[i-1]!=SPAN) { + if(!is_ujump(i-2)&&itype[i-1]!=SPAN) { assert(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP); assert(i==slen); if(itype[i-2]!=CJUMP&&itype[i-2]!=SJUMP) { From 81dbbf4cbb16fc6c9a82a5b91e102c8005c5726a Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 19 Nov 2021 00:52:31 +0200 Subject: [PATCH 076/597] drc/gte: add some stall handling --- frontend/menu.c | 1 + libpcsxcore/gte.c | 27 +++++ libpcsxcore/gte.h | 5 + libpcsxcore/misc.c | 7 +- libpcsxcore/new_dynarec/assem_arm.c | 57 +++++---- libpcsxcore/new_dynarec/assem_arm.h | 1 - libpcsxcore/new_dynarec/assem_arm64.c | 13 +- libpcsxcore/new_dynarec/assem_arm64.h | 1 - libpcsxcore/new_dynarec/emu_if.c | 9 -- libpcsxcore/new_dynarec/emu_if.h | 1 - libpcsxcore/new_dynarec/linkage_arm.S | 13 ++ libpcsxcore/new_dynarec/linkage_arm64.S | 40 ++++--- libpcsxcore/new_dynarec/linkage_offsets.h | 9 +- libpcsxcore/new_dynarec/new_dynarec.c | 137 ++++++++++++++-------- libpcsxcore/new_dynarec/new_dynarec.h | 1 + libpcsxcore/psxinterpreter.c | 5 +- libpcsxcore/r3000a.h | 4 + 17 files changed, 216 insertions(+), 115 deletions(-) diff --git a/frontend/menu.c b/frontend/menu.c index f3049d92a..1d21dacf2 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1564,6 +1564,7 @@ static menu_entry e_menu_speed_hacks[] = mee_onoff_h ("Disable SMC checks", 0, new_dynarec_hacks, NDHACK_NO_SMC_CHECK, h_cfg_nosmc), mee_onoff_h ("Assume GTE regs unneeded", 0, new_dynarec_hacks, NDHACK_GTE_UNNEEDED, h_cfg_gteunn), mee_onoff_h ("Disable GTE flags", 0, new_dynarec_hacks, NDHACK_GTE_NO_FLAGS, h_cfg_gteflgs), + mee_onoff_h ("Disable GTE stalls", 0, new_dynarec_hacks, NDHACK_GTE_NO_STALL, h_cfg_gtestll), mee_end, }; diff --git a/libpcsxcore/gte.c b/libpcsxcore/gte.c index e05f33d25..d34282253 100644 --- a/libpcsxcore/gte.c +++ b/libpcsxcore/gte.c @@ -275,6 +275,32 @@ INLINE u32 DIVIDE(u16 n, u16 d) { #ifndef FLAGLESS +const char gte_cycletab[64] = { + /* 1 2 3 4 5 6 7 8 9 a b c d e f */ + 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0, + 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0, + 30, 0, 0, 0, 0, 0, 0, 0, 5, 8, 17, 0, 0, 5, 6, 0, + 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39, +}; + +// warning: called by the dynarec +int gteCheckStallRaw(u32 op_cycles, psxRegisters *regs) { + u32 left = regs->gteBusyCycle - regs->cycle; + int stall = 0; + + if (left <= 44) { + //printf("c %2u stall %2u %u\n", op_cycles, left, regs->cycle); + regs->cycle = regs->gteBusyCycle; + stall = left; + } + regs->gteBusyCycle = regs->cycle + op_cycles; + return stall; +} + +void gteCheckStall(u32 op) { + gteCheckStallRaw(gte_cycletab[op], &psxRegs); +} + static inline u32 MFC2(int reg) { psxCP2Regs *regs = &psxRegs.CP2; switch (reg) { @@ -403,6 +429,7 @@ void gteLWC2() { } void gteSWC2() { + gteCheckStall(0); psxMemWrite32(_oB_, MFC2(_Rt_)); } diff --git a/libpcsxcore/gte.h b/libpcsxcore/gte.h index 764622633..9ad73d57a 100644 --- a/libpcsxcore/gte.h +++ b/libpcsxcore/gte.h @@ -67,6 +67,11 @@ extern "C" { struct psxCP2Regs; +extern const char gte_cycletab[64]; + +int gteCheckStallRaw(u32 op_cycles, psxRegisters *regs); +void gteCheckStall(u32 op); + void gteMFC2(); void gteCFC2(); void gteMTC2(); diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index cd16c41a9..02d1761b5 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -21,6 +21,7 @@ * Miscellaneous functions, including savestates and CD-ROM loading. */ +#include #include "misc.h" #include "cdrom.h" #include "mdec.h" @@ -602,7 +603,8 @@ int SaveState(const char *file) { SaveFuncs.write(f, psxM, 0x00200000); SaveFuncs.write(f, psxR, 0x00080000); SaveFuncs.write(f, psxH, 0x00010000); - SaveFuncs.write(f, (void *)&psxRegs, sizeof(psxRegs)); + // only partial save of psxRegisters to maintain savestate compat + SaveFuncs.write(f, &psxRegs, offsetof(psxRegisters, gteBusyCycle)); // gpu gpufP = (GPUFreeze_t *)malloc(sizeof(GPUFreeze_t)); @@ -666,7 +668,8 @@ int LoadState(const char *file) { SaveFuncs.read(f, psxM, 0x00200000); SaveFuncs.read(f, psxR, 0x00080000); SaveFuncs.read(f, psxH, 0x00010000); - SaveFuncs.read(f, (void *)&psxRegs, sizeof(psxRegs)); + SaveFuncs.read(f, &psxRegs, offsetof(psxRegisters, gteBusyCycle)); + psxRegs.gteBusyCycle = psxRegs.cycle; if (Config.HLE) psxBiosFreeze(0); diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 9fe13a131..278033780 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -19,7 +19,6 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -#include "../gte.h" #define FLAGLESS #include "../gte.h" #undef FLAGLESS @@ -1674,9 +1673,9 @@ static void do_readstub(int n) enum stub_type type=stubs[n].type; int i=stubs[n].a; int rs=stubs[n].b; - struct regstat *i_regs=(struct regstat *)stubs[n].c; + const struct regstat *i_regs=(struct regstat *)stubs[n].c; u_int reglist=stubs[n].e; - signed char *i_regmap=i_regs->regmap; + const signed char *i_regmap=i_regs->regmap; int rt; if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) { rt=get_reg(i_regmap,FTEMP); @@ -1747,7 +1746,8 @@ static void do_readstub(int n) emit_jmp(stubs[n].retaddr); // return address } -static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) +static void inline_readstub(enum stub_type type, int i, u_int addr, + const signed char regmap[], int target, int adj, u_int reglist) { int rs=get_reg(regmap,target); int rt=get_reg(regmap,target); @@ -1829,9 +1829,9 @@ static void do_writestub(int n) enum stub_type type=stubs[n].type; int i=stubs[n].a; int rs=stubs[n].b; - struct regstat *i_regs=(struct regstat *)stubs[n].c; + const struct regstat *i_regs=(struct regstat *)stubs[n].c; u_int reglist=stubs[n].e; - signed char *i_regmap=i_regs->regmap; + const signed char *i_regmap=i_regs->regmap; int rt,r; if(itype[i]==C1LS||itype[i]==C2LS) { rt=get_reg(i_regmap,r=FTEMP); @@ -1902,7 +1902,8 @@ static void do_writestub(int n) emit_jmp(stubs[n].retaddr); } -static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) +static void inline_writestub(enum stub_type type, int i, u_int addr, + const signed char regmap[], int target, int adj, u_int reglist) { int rs=get_reg(regmap,-1); int rt=get_reg(regmap,target); @@ -1976,14 +1977,15 @@ static void do_dirty_stub_ds() /* Special assem */ -static void c2op_prologue(u_int op,u_int reglist) +static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist) { save_regs_all(reglist); + cop2_call_stall_check(op, i, i_regs, 0); #ifdef PCNT - emit_movimm(op,0); + emit_movimm(op, 0); emit_far_call(pcnt_gte_start); #endif - emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs + emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs } static void c2op_epilogue(u_int op,u_int reglist) @@ -2013,22 +2015,19 @@ static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf); } -static void c2op_assemble(int i,struct regstat *i_regs) +static void c2op_assemble(int i, const struct regstat *i_regs) { - u_int c2op=source[i]&0x3f; - u_int hr,reglist_full=0,reglist; - int need_flags,need_ir; - for(hr=0;hrregmap[hr]>=0) reglist_full|=1<regmap); + u_int reglist = reglist_full & CALLER_SAVE_REGS; + int need_flags, need_ir; if (gte_handlers[c2op]!=NULL) { need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00; assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n", source[i],gte_unneeded[i+1],need_flags,need_ir); - if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS) + if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS)) need_flags=0; int shift = (source[i] >> 19) & 1; int lm = (source[i] >> 10) & 1; @@ -2040,7 +2039,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) int cv = (source[i] >> 13) & 3; int mx = (source[i] >> 17) & 3; reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7} - c2op_prologue(c2op,reglist); + c2op_prologue(c2op,i,i_regs,reglist); /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */ if(v<3) emit_ldrd(v*8,0,4); @@ -2076,7 +2075,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) c2op_call_MACtoIR(lm,need_flags); #endif #else /* if not HAVE_ARMV5 */ - c2op_prologue(c2op,reglist); + c2op_prologue(c2op,i,i_regs,reglist); emit_movimm(source[i],1); // opcode emit_writeword(1,&psxRegs.code); emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]); @@ -2084,7 +2083,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) break; } case GTE_OP: - c2op_prologue(c2op,reglist); + c2op_prologue(c2op,i,i_regs,reglist); emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift); if(need_flags||need_ir) { emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); @@ -2092,15 +2091,15 @@ static void c2op_assemble(int i,struct regstat *i_regs) } break; case GTE_DPCS: - c2op_prologue(c2op,reglist); + c2op_prologue(c2op,i,i_regs,reglist); c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags); break; case GTE_INTPL: - c2op_prologue(c2op,reglist); + c2op_prologue(c2op,i,i_regs,reglist); c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags); break; case GTE_SQR: - c2op_prologue(c2op,reglist); + c2op_prologue(c2op,i,i_regs,reglist); emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift); if(need_flags||need_ir) { emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); @@ -2108,20 +2107,20 @@ static void c2op_assemble(int i,struct regstat *i_regs) } break; case GTE_DCPL: - c2op_prologue(c2op,reglist); + c2op_prologue(c2op,i,i_regs,reglist); c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags); break; case GTE_GPF: - c2op_prologue(c2op,reglist); + c2op_prologue(c2op,i,i_regs,reglist); c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags); break; case GTE_GPL: - c2op_prologue(c2op,reglist); + c2op_prologue(c2op,i,i_regs,reglist); c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags); break; #endif default: - c2op_prologue(c2op,reglist); + c2op_prologue(c2op,i,i_regs,reglist); #ifdef DRC_DBG emit_movimm(source[i],1); // opcode emit_writeword(1,&psxRegs.code); diff --git a/libpcsxcore/new_dynarec/assem_arm.h b/libpcsxcore/new_dynarec/assem_arm.h index 6b3c672c8..9b3a1e10b 100644 --- a/libpcsxcore/new_dynarec/assem_arm.h +++ b/libpcsxcore/new_dynarec/assem_arm.h @@ -6,7 +6,6 @@ #define HOST_IMM8 1 #define HAVE_CMOV_IMM 1 #define HAVE_CONDITIONAL_CALL 1 -#define RAM_SIZE 0x200000 /* ARM calling convention: r0-r3, r12: caller-save diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 303dcf00c..e7df2b025 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1448,7 +1448,8 @@ static void do_readstub(int n) emit_jmp(stubs[n].retaddr); } -static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) +static void inline_readstub(enum stub_type type, int i, u_int addr, + const signed char regmap[], int target, int adj, u_int reglist) { int rs=get_reg(regmap,target); int rt=get_reg(regmap,target); @@ -1607,7 +1608,8 @@ static void do_writestub(int n) emit_jmp(stubs[n].retaddr); } -static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) +static void inline_writestub(enum stub_type type, int i, u_int addr, + const signed char regmap[], int target, int adj, u_int reglist) { int rs = get_reg(regmap,-1); int rt = get_reg(regmap,target); @@ -1777,9 +1779,10 @@ static void get_bounds(void *addr, u_char **start, u_char **end) /* Special assem */ -static void c2op_prologue(u_int op,u_int reglist) +static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist) { save_load_regs_all(1, reglist); + cop2_call_stall_check(op, i, i_regs, 0); #ifdef PCNT emit_movimm(op, 0); emit_far_call(pcnt_gte_start); @@ -1797,7 +1800,7 @@ static void c2op_epilogue(u_int op,u_int reglist) save_load_regs_all(0, reglist); } -static void c2op_assemble(int i,struct regstat *i_regs) +static void c2op_assemble(int i, const struct regstat *i_regs) { u_int c2op=source[i]&0x3f; u_int hr,reglist_full=0,reglist; @@ -1819,7 +1822,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) switch(c2op) { default: (void)need_ir; - c2op_prologue(c2op,reglist); + c2op_prologue(c2op, i, i_regs, reglist); emit_movimm(source[i],1); // opcode emit_writeword(1,&psxRegs.code); emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]); diff --git a/libpcsxcore/new_dynarec/assem_arm64.h b/libpcsxcore/new_dynarec/assem_arm64.h index 1360bfadf..1aeee0b93 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.h +++ b/libpcsxcore/new_dynarec/assem_arm64.h @@ -3,7 +3,6 @@ #define EXCLUDE_REG -1 #define HOST_IMM8 1 -#define RAM_SIZE 0x200000 /* calling convention: r0 -r17: caller-save diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index f660e7f11..0d6e58d3d 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -218,15 +218,6 @@ const char *gte_regnames[64] = { NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38 }; -/* from gte.txt.. not sure if this is any good. */ -const char gte_cycletab[64] = { - /* 1 2 3 4 5 6 7 8 9 a b c d e f */ - 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0, - 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0, - 30, 0, 0, 0, 0, 0, 0, 0, 5, 8, 17, 0, 0, 5, 6, 0, - 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39, -}; - #define GCBIT(x) \ (1ll << (32+x)) #define GDBIT(x) \ diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index a6846e2f8..36cc275fd 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -52,7 +52,6 @@ extern int reg_cop2d[], reg_cop2c[]; extern void *gte_handlers[64]; extern void *gte_handlers_nf[64]; extern const char *gte_regnames[64]; -extern const char gte_cycletab[64]; extern const uint64_t gte_reg_reads[64]; extern const uint64_t gte_reg_writes[64]; diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index bbc52c3c0..f18488ce6 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -33,6 +33,7 @@ #define clean_blocks ESYM(clean_blocks) #define gen_interupt ESYM(gen_interupt) #define invalidate_addr ESYM(invalidate_addr) +#define gteCheckStallRaw ESYM(gteCheckStallRaw) #endif .bss @@ -820,4 +821,16 @@ FUNCTION(rcnt2_read_count_m1): lsr r0, #16 @ /= 8 bx lr +FUNCTION(call_gteStall): + /* r0 = op_cycles, r1 = cycles */ + ldr r2, [fp, #LO_last_count] + str lr, [fp, #LO_saved_lr] + add r1, r1, r2 + str r1, [fp, #LO_cycle] + add r1, fp, #LO_psxRegs + bl gteCheckStallRaw + ldr lr, [fp, #LO_saved_lr] + add r10, r10, r0 + bx lr + @ vim:filetype=armasm diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 444545ca3..249fecbcb 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -160,13 +160,13 @@ FUNCTION(cc_interrupt): FUNCTION(fp_exception): mov w2, #0x10000000 0: - ldr w1, [fp, #LO_reg_cop0+48] /* Status */ + ldr w1, [rFP, #LO_reg_cop0+48] /* Status */ mov w3, #0x80000000 - str w0, [fp, #LO_reg_cop0+56] /* EPC */ + str w0, [rFP, #LO_reg_cop0+56] /* EPC */ orr w1, w1, #2 add w2, w2, #0x2c - str w1, [fp, #LO_reg_cop0+48] /* Status */ - str w2, [fp, #LO_reg_cop0+52] /* Cause */ + str w1, [rFP, #LO_reg_cop0+48] /* Status */ + str w2, [rFP, #LO_reg_cop0+52] /* Cause */ add w0, w3, #0x80 bl get_addr_ht br x0 @@ -179,13 +179,13 @@ FUNCTION(fp_exception_ds): .align 2 FUNCTION(jump_syscall): - ldr w1, [fp, #LO_reg_cop0+48] /* Status */ + ldr w1, [rFP, #LO_reg_cop0+48] /* Status */ mov w3, #0x80000000 - str w0, [fp, #LO_reg_cop0+56] /* EPC */ + str w0, [rFP, #LO_reg_cop0+56] /* EPC */ orr w1, w1, #2 mov w2, #0x20 - str w1, [fp, #LO_reg_cop0+48] /* Status */ - str w2, [fp, #LO_reg_cop0+52] /* Cause */ + str w1, [rFP, #LO_reg_cop0+48] /* Status */ + str w2, [rFP, #LO_reg_cop0+52] /* Cause */ add w0, w3, #0x80 bl get_addr_ht br x0 @@ -195,11 +195,11 @@ FUNCTION(jump_syscall): /* note: psxException might do recursive recompiler call from it's HLE code, * so be ready for this */ FUNCTION(jump_to_new_pc): - ldr w1, [fp, #LO_next_interupt] - ldr rCC, [fp, #LO_cycle] - ldr w0, [fp, #LO_pcaddr] + ldr w1, [rFP, #LO_next_interupt] + ldr rCC, [rFP, #LO_cycle] + ldr w0, [rFP, #LO_pcaddr] sub rCC, rCC, w1 - str w1, [fp, #LO_last_count] + str w1, [rFP, #LO_last_count] bl get_addr_ht br x0 .size jump_to_new_pc, .-jump_to_new_pc @@ -331,7 +331,7 @@ handler_write_end: FUNCTION(jump_handle_swl): /* w0 = address, w1 = data, w2 = cycles */ - ldr x3, [fp, #LO_mem_wtab] + ldr x3, [rFP, #LO_mem_wtab] mov w4, w0, lsr #12 ldr x3, [x3, w4, uxtw #3] adds x3, x3, x3 @@ -366,7 +366,7 @@ FUNCTION(jump_handle_swl): FUNCTION(jump_handle_swr): /* w0 = address, w1 = data, w2 = cycles */ - ldr x3, [fp, #LO_mem_wtab] + ldr x3, [rFP, #LO_mem_wtab] mov w4, w0, lsr #12 ldr x3, [x3, w4, uxtw #3] adds x3, x3, x3 @@ -396,3 +396,15 @@ FUNCTION(jump_handle_swr): bl abort ret +FUNCTION(call_gteStall): + /* w0 = op_cycles, w1 = cycles */ + ldr w2, [rFP, #LO_last_count] + str lr, [rFP, #LO_saved_lr] + add w1, w1, w2 + str w1, [rFP, #LO_cycle] + add x1, rFP, #LO_psxRegs + bl gteCheckStallRaw + ldr lr, [rFP, #LO_saved_lr] + add rCC, rCC, w0 + ret + diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h index 82d27bd41..4c75e6c01 100644 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ b/libpcsxcore/new_dynarec/linkage_offsets.h @@ -22,7 +22,9 @@ #define LO_cycle (LO_code + 4) #define LO_interrupt (LO_cycle + 4) #define LO_intCycle (LO_interrupt + 4) -#define LO_psxRegs_end (LO_intCycle + 256) +#define LO_gteBusyCycle (LO_intCycle + 256) +#define LO_psxRegs_reserved (LO_gteBusyCycle + 4) +#define LO_psxRegs_end (LO_psxRegs_reserved + 4*3) #define LO_rcnts (LO_psxRegs_end) #define LO_rcnts_end (LO_rcnts + 7*4*4) #define LO_inv_code_start (LO_rcnts_end) @@ -33,8 +35,9 @@ #define LO_zeromem_ptr (LO_psxH_ptr + PTRSZ) #define LO_invc_ptr (LO_zeromem_ptr + PTRSZ) #define LO_scratch_buf_ptr (LO_invc_ptr + PTRSZ) -#define LO_align1 (LO_scratch_buf_ptr + PTRSZ) -#define LO_mini_ht (LO_align1 + PTRSZ*2) +#define LO_saved_lr (LO_scratch_buf_ptr + PTRSZ) +#define LO_align1 (LO_saved_lr + PTRSZ) +#define LO_mini_ht (LO_align1 + PTRSZ) #define LO_restore_candidate (LO_mini_ht + PTRSZ*32*2) #define LO_dynarec_local_size (LO_restore_candidate + 512) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 964c07ba7..1a91c3a1b 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -37,7 +37,8 @@ static int sceBlock; #include "new_dynarec_config.h" #include "../psxhle.h" #include "../psxinterpreter.h" -#include "emu_if.h" //emulator interface +#include "../gte.h" +#include "emu_if.h" // emulator interface #define noinline __attribute__((noinline,noclone)) #ifndef ARRAY_SIZE @@ -63,6 +64,7 @@ static int sceBlock; #include "assem_arm64.h" #endif +#define RAM_SIZE 0x200000 #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 @@ -308,6 +310,7 @@ void cc_interrupt(); void fp_exception(); void fp_exception_ds(); void jump_to_new_pc(); +void call_gteStall(); void new_dyna_leave(); // Needed by assembler @@ -318,17 +321,19 @@ static void load_all_regs(signed char i_regmap[]); static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]); static void load_regs_entry(int t); static void load_all_consts(signed char regmap[],u_int dirty,int i); +static u_int get_host_reglist(const signed char *regmap); static int verify_dirty(const u_int *ptr); static int get_final_value(int hr, int i, int *value); static void add_stub(enum stub_type type, void *addr, void *retaddr, u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e); static void add_stub_r(enum stub_type type, void *addr, void *retaddr, - int i, int addr_reg, struct regstat *i_regs, int ccadj, u_int reglist); + int i, int addr_reg, const struct regstat *i_regs, int ccadj, u_int reglist); static void add_to_linker(void *addr, u_int target, int ext); static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override); static void *get_direct_memhandler(void *table, u_int addr, enum stub_type type, uintptr_t *addr_host); +static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist); static void pass_args(int a0, int a1); static void emit_far_jump(const void *f); static void emit_far_call(const void *f); @@ -917,6 +922,7 @@ static const struct { FUNCNAME(jump_handler_write32), FUNCNAME(invalidate_addr), FUNCNAME(jump_to_new_pc), + FUNCNAME(call_gteStall), FUNCNAME(new_dyna_leave), FUNCNAME(pcsx_mtc0), FUNCNAME(pcsx_mtc0_ds), @@ -1918,19 +1924,19 @@ void cop0_alloc(struct regstat *current,int i) minimum_free_regs[i]=HOST_REGS; } -static void cop12_alloc(struct regstat *current,int i) +static void cop2_alloc(struct regstat *current,int i) { - alloc_reg(current,i,CSREG); // Load status - if(opcode2[i]<3) // MFC1/CFC1 + if (opcode2[i] < 3) // MFC2/CFC2 { + alloc_cc(current,i); // for stalls + dirty_reg(current,CCREG); if(rt1[i]){ clear_const(current,rt1[i]); alloc_reg(current,i,rt1[i]); dirty_reg(current,rt1[i]); } - alloc_reg_temp(current,i,-1); } - else if(opcode2[i]>3) // MTC1/CTC1 + else if (opcode2[i] > 3) // MTC2/CTC2 { if(rs1[i]){ clear_const(current,rs1[i]); @@ -1940,13 +1946,15 @@ static void cop12_alloc(struct regstat *current,int i) current->u&=~1LL; alloc_reg(current,i,0); } - alloc_reg_temp(current,i,-1); } + alloc_reg_temp(current,i,-1); minimum_free_regs[i]=1; } void c2op_alloc(struct regstat *current,int i) { + alloc_cc(current,i); // for stalls + dirty_reg(current,CCREG); alloc_reg_temp(current,i,-1); } @@ -2003,8 +2011,9 @@ void delayslot_alloc(struct regstat *current,int i) cop0_alloc(current,i); break; case COP1: + break; case COP2: - cop12_alloc(current,i); + cop2_alloc(current,i); break; case C1LS: c1ls_alloc(current,i); @@ -2070,7 +2079,7 @@ static void add_stub(enum stub_type type, void *addr, void *retaddr, } static void add_stub_r(enum stub_type type, void *addr, void *retaddr, - int i, int addr_reg, struct regstat *i_regs, int ccadj, u_int reglist) + int i, int addr_reg, const struct regstat *i_regs, int ccadj, u_int reglist) { add_stub(type, addr, retaddr, i, addr_reg, (uintptr_t)i_regs, ccadj, reglist); } @@ -2647,20 +2656,36 @@ static void *get_direct_memhandler(void *table, u_int addr, } } -static void load_assemble(int i,struct regstat *i_regs) +static u_int get_host_reglist(const signed char *regmap) +{ + u_int reglist = 0, hr; + for (hr = 0; hr < HOST_REGS; hr++) { + if (hr != EXCLUDE_REG && regmap[hr] >= 0) + reglist |= 1 << hr; + } + return reglist; +} + +static u_int reglist_exclude(u_int reglist, int r1, int r2) +{ + if (r1 >= 0) + reglist &= ~(1u << r1); + if (r2 >= 0) + reglist &= ~(1u << r2); + return reglist; +} + +static void load_assemble(int i, const struct regstat *i_regs) { int s,tl,addr; int offset; void *jaddr=0; int memtarget=0,c=0; int fastio_reg_override=-1; - u_int hr,reglist=0; + u_int reglist=get_host_reglist(i_regs->regmap); tl=get_reg(i_regs->regmap,rt1[i]); s=get_reg(i_regs->regmap,rs1[i]); offset=imm[i]; - for(hr=0;hrregmap[hr]>=0) reglist|=1<regmap[HOST_CCREG]==CCREG) reglist&=~(1<=0) { c=(i_regs->wasconst>>s)&1; @@ -2787,14 +2812,14 @@ static void load_assemble(int i,struct regstat *i_regs) } #ifndef loadlr_assemble -static void loadlr_assemble(int i,struct regstat *i_regs) +static void loadlr_assemble(int i, const struct regstat *i_regs) { int s,tl,temp,temp2,addr; int offset; void *jaddr=0; int memtarget=0,c=0; int fastio_reg_override=-1; - u_int hr,reglist=0; + u_int reglist=get_host_reglist(i_regs->regmap); tl=get_reg(i_regs->regmap,rt1[i]); s=get_reg(i_regs->regmap,rs1[i]); temp=get_reg(i_regs->regmap,-1); @@ -2802,9 +2827,6 @@ static void loadlr_assemble(int i,struct regstat *i_regs) addr=get_reg(i_regs->regmap,AGEN1+(i&1)); assert(addr<0); offset=imm[i]; - for(hr=0;hrregmap[hr]>=0) reglist|=1<regmap); tl=get_reg(i_regs->regmap,rs2[i]); s=get_reg(i_regs->regmap,rs1[i]); temp=get_reg(i_regs->regmap,agr); @@ -2894,9 +2916,6 @@ void store_assemble(int i,struct regstat *i_regs) } assert(tl>=0); assert(temp>=0); - for(hr=0;hrregmap[hr]>=0) reglist|=1<regmap[HOST_CCREG]==CCREG) reglist&=~(1<regmap); tl=get_reg(i_regs->regmap,rs2[i]); s=get_reg(i_regs->regmap,rs1[i]); temp=get_reg(i_regs->regmap,agr); @@ -3017,9 +3036,6 @@ static void storelr_assemble(int i,struct regstat *i_regs) } } assert(tl>=0); - for(hr=0;hrregmap[hr]>=0) reglist|=1<=0); if(!c) { emit_cmpimm(s<0||offset?temp:s,RAM_SIZE); @@ -3263,6 +3279,25 @@ static void do_cop1stub(int n) emit_far_jump(ds?fp_exception_ds:fp_exception); } +// assumes callee-save regs are already saved +static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist) +{ + if (HACK_ENABLED(NDHACK_GTE_NO_STALL)) + return; + //assert(get_reg(i_regs->regmap, CCREG) == HOST_CCREG); + if (get_reg(i_regs->regmap, CCREG) != HOST_CCREG) { + // happens occasionally... cc evicted? Don't bother then + //printf("no cc %08x\n", start + i*4); + return; + } + assem_debug("cop2_call_stall_check\n"); + save_regs(reglist); + emit_movimm(gte_cycletab[op], 0); + emit_addimm(HOST_CCREG, CLOCK_ADJUST(ccadj[i]), 1); + emit_far_call(call_gteStall); + restore_regs(reglist); +} + static void cop2_get_dreg(u_int copr,signed char tl,signed char temp) { switch (copr) { @@ -3346,7 +3381,7 @@ static void cop2_put_dreg(u_int copr,signed char sl,signed char temp) } } -static void c2ls_assemble(int i,struct regstat *i_regs) +static void c2ls_assemble(int i, const struct regstat *i_regs) { int s,tl; int ar; @@ -3356,7 +3391,7 @@ static void c2ls_assemble(int i,struct regstat *i_regs) enum stub_type type; int agr=AGEN1+(i&1); int fastio_reg_override=-1; - u_int hr,reglist=0; + u_int reglist=get_host_reglist(i_regs->regmap); u_int copr=(source[i]>>16)&0x1f; s=get_reg(i_regs->regmap,rs1[i]); tl=get_reg(i_regs->regmap,FTEMP); @@ -3364,9 +3399,6 @@ static void c2ls_assemble(int i,struct regstat *i_regs) assert(rs1[i]>0); assert(tl>=0); - for(hr=0;hrregmap[hr]>=0) reglist|=1<regmap[HOST_CCREG]==CCREG) reglist&=~(1<=0); if (opcode[i]==0x3a) { // SWC2 + cop2_call_stall_check(0, i, i_regs, reglist_exclude(reglist, tl, -1)); cop2_get_dreg(copr,tl,-1); type=STOREW_STUB; } @@ -3445,10 +3478,18 @@ static void c2ls_assemble(int i,struct regstat *i_regs) } } -static void cop2_assemble(int i,struct regstat *i_regs) +static void cop2_assemble(int i, const struct regstat *i_regs) { - u_int copr=(source[i]>>11)&0x1f; - signed char temp=get_reg(i_regs->regmap,-1); + u_int copr = (source[i]>>11) & 0x1f; + signed char temp = get_reg(i_regs->regmap, -1); + + if (opcode2[i] == 0 || opcode2[i] == 2) { // MFC2/CFC2 + if (!HACK_ENABLED(NDHACK_GTE_NO_STALL)) { + signed char tl = get_reg(i_regs->regmap, rt1[i]); + u_int reglist = reglist_exclude(get_host_reglist(i_regs->regmap), tl, temp); + cop2_call_stall_check(0, i, i_regs, reglist); + } + } if (opcode2[i]==0) { // MFC2 signed char tl=get_reg(i_regs->regmap,rt1[i]); if(tl>=0&&rt1[i]!=0) @@ -4341,11 +4382,9 @@ static void drc_dbg_emit_do_cmp(int i) { extern void do_insn_cmp(); //extern int cycle; - u_int hr,reglist=0; + u_int hr, reglist = get_host_reglist(regs[i].regmap); assem_debug("//do_insn_cmp %08x\n", start+i*4); - for (hr = 0; hr < HOST_REGS; hr++) - if(regs[i].regmap[hr]>=0) reglist|=1< 0 && !bt[i]) { @@ -7651,8 +7690,9 @@ int new_recompile_block(u_int addr) cop0_alloc(¤t,i); break; case COP1: + break; case COP2: - cop12_alloc(¤t,i); + cop2_alloc(¤t,i); break; case C1LS: c1ls_alloc(¤t,i); @@ -7945,12 +7985,10 @@ int new_recompile_block(u_int addr) #if !defined(DRC_DBG) else if(itype[i]==C2OP&>e_cycletab[source[i]&0x3f]>2) { - // GTE runs in parallel until accessed, divide by 2 for a rough guess - cc+=gte_cycletab[source[i]&0x3f]/2; - } - else if(/*itype[i]==LOAD||itype[i]==STORE||*/itype[i]==C1LS) // load,store causes weird timing issues - { - cc+=2; // 2 cycle penalty (after CLOCK_DIVIDER) + // this should really be removed since the real stalls have been implemented, + // but doing so causes sizeable perf regression against the older version + u_int gtec = gte_cycletab[source[i] & 0x3f]; + cc += HACK_ENABLED(NDHACK_GTE_NO_STALL) ? gtec/2 : 2; } else if(i>1&&itype[i]==STORE&&itype[i-1]==STORE&&itype[i-2]==STORE&&!bt[i]) { @@ -7958,7 +7996,8 @@ int new_recompile_block(u_int addr) } else if(itype[i]==C2LS) { - cc+=4; + // same as with C2OP + cc += HACK_ENABLED(NDHACK_GTE_NO_STALL) ? 4 : 2; } #endif else diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index bfb48838f..bff1c1641 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -11,6 +11,7 @@ extern int cycle_multiplier_override; #define NDHACK_GTE_UNNEEDED (1<<1) #define NDHACK_GTE_NO_FLAGS (1<<2) #define NDHACK_OVERRIDE_CYCLE_M (1<<3) +#define NDHACK_GTE_NO_STALL (1<<4) extern int new_dynarec_hacks; extern int new_dynarec_hacks_pergame; diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 61c60edb4..b171b0a6f 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -934,7 +934,10 @@ void psxCOP0() { } void psxCOP2() { - psxCP2[_Funct_]((struct psxCP2Regs *)&psxRegs.CP2D); + u32 f = _Funct_; + if (f != 0 || _Rs_ < 4) // not MTC2/CTC2 + gteCheckStall(f); + psxCP2[f]((struct psxCP2Regs *)&psxRegs.CP2D); } void psxBASIC(struct psxCP2Regs *regs) { diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 4b1ec9e0c..54359159e 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -193,6 +193,10 @@ typedef struct { u32 cycle; u32 interrupt; struct { u32 sCycle, cycle; } intCycle[32]; + u32 gteBusyCycle; + // warning: changing anything in psxRegisters requires update of all + // asm in libpcsxcore/new_dynarec/, but this member can be replaced + u32 reserved[3]; } psxRegisters; extern psxRegisters psxRegs; From e3c6bdb5e46f72f063bb7f588da6588ac1893b17 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 19 Nov 2021 23:35:04 +0200 Subject: [PATCH 077/597] drc: try to make gte stall handling less bloaty --- libpcsxcore/new_dynarec/assem_arm.c | 6 ++ libpcsxcore/new_dynarec/assem_arm64.c | 6 ++ libpcsxcore/new_dynarec/new_dynarec.c | 131 ++++++++++++++++++++++++-- 3 files changed, 136 insertions(+), 7 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 278033780..186d0af40 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -964,6 +964,12 @@ static void emit_cmovl_reg(int rs,int rt) output_w32(0xb1a00000|rd_rn_rm(rt,0,rs)); } +static void emit_cmovb_reg(int rs,int rt) +{ + assem_debug("movcc %s,%s\n",regname[rt],regname[rs]); + output_w32(0x31a00000|rd_rn_rm(rt,0,rs)); +} + static void emit_cmovs_reg(int rs,int rt) { assem_debug("movmi %s,%s\n",regname[rt],regname[rs]); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index e7df2b025..17517eff7 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -813,6 +813,12 @@ static void emit_cmovl_reg(u_int rs,u_int rt) output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt)); } +static void emit_cmovb_reg(u_int rs,u_int rt) +{ + assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]); + output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt)); +} + static void emit_cmovs_reg(u_int rs,u_int rt) { assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]); diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 1a91c3a1b..e0cff62ca 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -44,6 +44,9 @@ static int sceBlock; #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #endif +#ifndef min +#define min(a, b) ((b) < (a) ? (b) : (a)) +#endif //#define DISASM //#define assem_debug printf @@ -2675,6 +2678,15 @@ static u_int reglist_exclude(u_int reglist, int r1, int r2) return reglist; } +// find a temp caller-saved register not in reglist (so assumed to be free) +static int reglist_find_free(u_int reglist) +{ + u_int free_regs = ~reglist & CALLER_SAVE_REGS; + if (free_regs == 0) + return -1; + return __builtin_ctz(free_regs); +} + static void load_assemble(int i, const struct regstat *i_regs) { int s,tl,addr; @@ -3279,9 +3291,50 @@ static void do_cop1stub(int n) emit_far_jump(ds?fp_exception_ds:fp_exception); } -// assumes callee-save regs are already saved +static int cop2_is_stalling_op(int i, int *cycles) +{ + if (opcode[i] == 0x3a) { // SWC2 + *cycles = 0; + return 1; + } + if (itype[i] == COP2 && (opcode2[i] == 0 || opcode2[i] == 2)) { // MFC2/CFC2 + *cycles = 0; + return 1; + } + if (itype[i] == C2OP) { + *cycles = gte_cycletab[source[i] & 0x3f]; + return 1; + } + // ... what about MTC2/CTC2/LWC2? + return 0; +} + +#if 0 +static void log_gte_stall(int stall, u_int cycle) +{ + if ((u_int)stall <= 44) + printf("x stall %2d %u\n", stall, cycle + last_count); + if (cycle + last_count > 1215348544) exit(1); +} + +static void emit_log_gte_stall(int i, int stall, u_int reglist) +{ + save_regs(reglist); + if (stall > 0) + emit_movimm(stall, 0); + else + emit_mov(HOST_TEMPREG, 0); + emit_addimm(HOST_CCREG, CLOCK_ADJUST(ccadj[i]), 1); + emit_far_call(log_gte_stall); + restore_regs(reglist); +} +#endif + static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist) { + int j = i, other_gte_op_cycles = -1, stall = -MAXBLOCK, cycles_passed; + int rtmp = reglist_find_free(reglist); + if (HACK_ENABLED(NDHACK_GTE_NO_STALL)) return; //assert(get_reg(i_regs->regmap, CCREG) == HOST_CCREG); @@ -3290,12 +3343,76 @@ static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, //printf("no cc %08x\n", start + i*4); return; } - assem_debug("cop2_call_stall_check\n"); - save_regs(reglist); - emit_movimm(gte_cycletab[op], 0); - emit_addimm(HOST_CCREG, CLOCK_ADJUST(ccadj[i]), 1); - emit_far_call(call_gteStall); - restore_regs(reglist); + if (!bt[i]) { + for (j = i - 1; j >= 0; j--) { + //if (is_ds[j]) break; + if (cop2_is_stalling_op(j, &other_gte_op_cycles) || bt[j]) + break; + } + } + cycles_passed = CLOCK_ADJUST(ccadj[i] - ccadj[j]); + if (other_gte_op_cycles >= 0) + stall = other_gte_op_cycles - cycles_passed; + else if (cycles_passed >= 44) + stall = 0; // can't stall + if (stall == -MAXBLOCK && rtmp >= 0) { + // unknown stall, do the expensive runtime check + assem_debug("; cop2_call_stall_check\n"); +#if 0 // too slow + save_regs(reglist); + emit_movimm(gte_cycletab[op], 0); + emit_addimm(HOST_CCREG, CLOCK_ADJUST(ccadj[i]), 1); + emit_far_call(call_gteStall); + restore_regs(reglist); +#else + host_tempreg_acquire(); + emit_readword(&psxRegs.gteBusyCycle, rtmp); + emit_addimm(rtmp, -CLOCK_ADJUST(ccadj[i]), rtmp); + emit_sub(rtmp, HOST_CCREG, HOST_TEMPREG); + emit_cmpimm(HOST_TEMPREG, 44); + emit_cmovb_reg(rtmp, HOST_CCREG); + //emit_log_gte_stall(i, 0, reglist); + host_tempreg_release(); +#endif + } + else if (stall > 0) { + //emit_log_gte_stall(i, stall, reglist); + emit_addimm(HOST_CCREG, stall, HOST_CCREG); + } + + // save gteBusyCycle, if needed + if (gte_cycletab[op] == 0) + return; + other_gte_op_cycles = -1; + for (j = i + 1; j < slen; j++) { + if (cop2_is_stalling_op(j, &other_gte_op_cycles)) + break; + if (is_jump(j)) { + // check ds + if (j + 1 < slen && cop2_is_stalling_op(j + 1, &other_gte_op_cycles)) + j++; + break; + } + } + if (other_gte_op_cycles >= 0) + // will handle stall when assembling that op + return; + cycles_passed = CLOCK_ADJUST(ccadj[min(j, slen -1)] - ccadj[i]); + if (cycles_passed >= 44) + return; + assem_debug("; save gteBusyCycle\n"); + host_tempreg_acquire(); +#if 0 + emit_readword(&last_count, HOST_TEMPREG); + emit_add(HOST_TEMPREG, HOST_CCREG, HOST_TEMPREG); + emit_addimm(HOST_TEMPREG, CLOCK_ADJUST(ccadj[i]), HOST_TEMPREG); + emit_addimm(HOST_TEMPREG, gte_cycletab[op]), HOST_TEMPREG); + emit_writeword(HOST_TEMPREG, &psxRegs.gteBusyCycle); +#else + emit_addimm(HOST_CCREG, CLOCK_ADJUST(ccadj[i]) + gte_cycletab[op], HOST_TEMPREG); + emit_writeword(HOST_TEMPREG, &psxRegs.gteBusyCycle); +#endif + host_tempreg_release(); } static void cop2_get_dreg(u_int copr,signed char tl,signed char temp) From 32631e6a5d44d1e6aa5d53d5777a039b2d3d4300 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 21 Nov 2021 00:06:52 +0200 Subject: [PATCH 078/597] drc,interpreter: add mul/div stalls for games like Zero Divide, which will run at twice speed if there is not enough combined slowdown (probably from muldiv/gte/cache misses). --- frontend/libretro.c | 1 + frontend/main.c | 2 +- frontend/menu.c | 18 +-- frontend/menu.h | 2 +- libpcsxcore/gte.c | 13 +- libpcsxcore/gte.h | 4 +- libpcsxcore/new_dynarec/assem_arm.c | 2 +- libpcsxcore/new_dynarec/assem_arm64.c | 2 +- libpcsxcore/new_dynarec/emu_if.c | 17 +++ libpcsxcore/new_dynarec/linkage_offsets.h | 5 +- libpcsxcore/new_dynarec/new_dynarec.c | 165 +++++++++++++++++++--- libpcsxcore/new_dynarec/new_dynarec.h | 4 +- libpcsxcore/psxcommon.h | 1 + libpcsxcore/psxinterpreter.c | 82 ++++++++++- libpcsxcore/r3000a.h | 4 +- 15 files changed, 278 insertions(+), 44 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 940ff05aa..4c285cfbc 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -1063,6 +1063,7 @@ static void update_variables(bool in_flight) } } #endif + psxCpu->ApplyConfig(); var.value = "NULL"; var.key = "pcsx_rearmed_spu_reverb"; diff --git a/frontend/main.c b/frontend/main.c index 3ec252f0c..4631618e5 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -634,7 +634,7 @@ int main(int argc, char *argv[]) } if (ready_to_go) { - menu_prepare_emu(0); + menu_prepare_emu(); // If a state has been specified, then load that if (loadst) { diff --git a/frontend/menu.c b/frontend/menu.c index 1d21dacf2..76d0e8688 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -398,6 +398,7 @@ static const struct { CE_CONFIG_VAL(RCntFix), CE_CONFIG_VAL(VSyncWA), CE_CONFIG_VAL(icache_emulation), + CE_CONFIG_VAL(DisableStalls), CE_CONFIG_VAL(Cpu), CE_INTVAL(region), CE_INTVAL_V(g_scaler, 3), @@ -1556,7 +1557,7 @@ static const char h_cfg_psxclk[] = "Over/under-clock the PSX, default is " DEFA static const char h_cfg_nosmc[] = "Will cause crashes when loading, break memcards"; static const char h_cfg_gteunn[] = "May cause graphical glitches"; static const char h_cfg_gteflgs[] = "Will cause graphical glitches"; -static const char h_cfg_gtestll[] = "Some games will run too fast"; +static const char h_cfg_stalls[] = "Will cause some games to run too fast"; static menu_entry e_menu_speed_hacks[] = { @@ -1564,7 +1565,7 @@ static menu_entry e_menu_speed_hacks[] = mee_onoff_h ("Disable SMC checks", 0, new_dynarec_hacks, NDHACK_NO_SMC_CHECK, h_cfg_nosmc), mee_onoff_h ("Assume GTE regs unneeded", 0, new_dynarec_hacks, NDHACK_GTE_UNNEEDED, h_cfg_gteunn), mee_onoff_h ("Disable GTE flags", 0, new_dynarec_hacks, NDHACK_GTE_NO_FLAGS, h_cfg_gteflgs), - mee_onoff_h ("Disable GTE stalls", 0, new_dynarec_hacks, NDHACK_GTE_NO_STALL, h_cfg_gtestll), + mee_onoff_h ("Disable CPU/GTE stalls", 0, Config.DisableStalls, 1, h_cfg_stalls), mee_end, }; @@ -2331,11 +2332,8 @@ static void menu_leave_emu(void); void menu_loop(void) { - int cycle_multiplier_old = cycle_multiplier; - int ndrc_hacks_old = new_dynarec_hacks; static int warned_about_bios = 0; static int sel = 0; - int ndrc_changed; menu_leave_emu(); @@ -2370,9 +2368,7 @@ void menu_loop(void) in_set_config_int(0, IN_CFG_BLOCKING, 0); - ndrc_changed = cycle_multiplier_old != cycle_multiplier - || ndrc_hacks_old != new_dynarec_hacks; - menu_prepare_emu(ndrc_changed); + menu_prepare_emu(); } static int qsort_strcmp(const void *p1, const void *p2) @@ -2624,7 +2620,7 @@ static void menu_leave_emu(void) cpu_clock = plat_target_cpu_clock_get(); } -void menu_prepare_emu(int ndrc_config_changed) +void menu_prepare_emu(void) { R3000Acpu *prev_cpu = psxCpu; @@ -2641,8 +2637,8 @@ void menu_prepare_emu(int ndrc_config_changed) // note that this does not really reset, just clears drc caches psxCpu->Reset(); } - else if (ndrc_config_changed) - new_dynarec_clear_full(); + + psxCpu->ApplyConfig(); // core doesn't care about Config.Cdda changes, // so handle them manually here diff --git a/frontend/menu.h b/frontend/menu.h index 9defc1ea8..81cd1baf5 100644 --- a/frontend/menu.h +++ b/frontend/menu.h @@ -1,5 +1,5 @@ void menu_init(void); -void menu_prepare_emu(int ndrc_config_changed); +void menu_prepare_emu(void); void menu_loop(void); void menu_finish(void); diff --git a/libpcsxcore/gte.c b/libpcsxcore/gte.c index d34282253..6b3b299fd 100644 --- a/libpcsxcore/gte.c +++ b/libpcsxcore/gte.c @@ -275,7 +275,7 @@ INLINE u32 DIVIDE(u16 n, u16 d) { #ifndef FLAGLESS -const char gte_cycletab[64] = { +const unsigned char gte_cycletab[64] = { /* 1 2 3 4 5 6 7 8 9 a b c d e f */ 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0, 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0, @@ -429,10 +429,19 @@ void gteLWC2() { } void gteSWC2() { - gteCheckStall(0); psxMemWrite32(_oB_, MFC2(_Rt_)); } +void gteLWC2_stall() { + gteCheckStall(0); + gteLWC2(); +} + +void gteSWC2_stall() { + gteCheckStall(0); + gteSWC2(); +} + #endif // FLAGLESS #if 0 diff --git a/libpcsxcore/gte.h b/libpcsxcore/gte.h index 9ad73d57a..75e9e5b37 100644 --- a/libpcsxcore/gte.h +++ b/libpcsxcore/gte.h @@ -67,7 +67,7 @@ extern "C" { struct psxCP2Regs; -extern const char gte_cycletab[64]; +extern const unsigned char gte_cycletab[64]; int gteCheckStallRaw(u32 op_cycles, psxRegisters *regs); void gteCheckStall(u32 op); @@ -78,6 +78,8 @@ void gteMTC2(); void gteCTC2(); void gteLWC2(); void gteSWC2(); +void gteLWC2_stall(); +void gteSWC2_stall(); void gteRTPS(struct psxCP2Regs *regs); void gteOP(struct psxCP2Regs *regs); diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 186d0af40..d68aea6cb 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -1986,7 +1986,7 @@ static void do_dirty_stub_ds() static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist) { save_regs_all(reglist); - cop2_call_stall_check(op, i, i_regs, 0); + cop2_do_stall_check(op, i, i_regs, 0); #ifdef PCNT emit_movimm(op, 0); emit_far_call(pcnt_gte_start); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 17517eff7..070c80fc7 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1788,7 +1788,7 @@ static void get_bounds(void *addr, u_char **start, u_char **end) static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist) { save_load_regs_all(1, reglist); - cop2_call_stall_check(op, i, i_regs, 0); + cop2_do_stall_check(op, i, i_regs, 0); #ifdef PCNT emit_movimm(op, 0); emit_far_call(pcnt_gte_start); diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 0d6e58d3d..f170be7e1 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -405,6 +405,20 @@ static void ari64_notify(int note, void *data) { } #endif +static void ari64_apply_config() +{ + if (Config.DisableStalls) + new_dynarec_hacks |= NDHACK_NO_STALLS; + else + new_dynarec_hacks &= ~NDHACK_NO_STALLS; + + if (cycle_multiplier != cycle_multiplier_old + || new_dynarec_hacks != new_dynarec_hacks_old) + { + new_dynarec_clear_full(); + } +} + static void ari64_shutdown() { new_dynarec_cleanup(); @@ -420,6 +434,7 @@ R3000Acpu psxRec = { #ifdef ICACHE_EMULATION ari64_notify, #endif + ari64_apply_config, ari64_shutdown }; @@ -431,7 +446,9 @@ unsigned int next_interupt; int new_dynarec_did_compile; int cycle_multiplier; int cycle_multiplier_override; +int cycle_multiplier_old; int new_dynarec_hacks_pergame; +int new_dynarec_hacks_old; int new_dynarec_hacks; void *psxH_ptr; void *zeromem_ptr; diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h index 4c75e6c01..916bb1a84 100644 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ b/libpcsxcore/new_dynarec/linkage_offsets.h @@ -23,8 +23,9 @@ #define LO_interrupt (LO_cycle + 4) #define LO_intCycle (LO_interrupt + 4) #define LO_gteBusyCycle (LO_intCycle + 256) -#define LO_psxRegs_reserved (LO_gteBusyCycle + 4) -#define LO_psxRegs_end (LO_psxRegs_reserved + 4*3) +#define LO_muldivBusyCycle (LO_gteBusyCycle + 4) +#define LO_psxRegs_reserved (LO_muldivBusyCycle + 4) +#define LO_psxRegs_end (LO_psxRegs_reserved + 4*2) #define LO_rcnts (LO_psxRegs_end) #define LO_rcnts_end (LO_rcnts + 7*4*4) #define LO_inv_code_start (LO_rcnts_end) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index e0cff62ca..f45322a8f 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -47,11 +47,19 @@ static int sceBlock; #ifndef min #define min(a, b) ((b) < (a) ? (b) : (a)) #endif +#ifndef max +#define max(a, b) ((b) > (a) ? (b) : (a)) +#endif //#define DISASM -//#define assem_debug printf -//#define inv_debug printf +//#define ASSEM_PRINT + +#ifdef ASSEM_PRINT +#define assem_debug printf +#else #define assem_debug(...) +#endif +//#define inv_debug printf #define inv_debug(...) #ifdef __i386__ @@ -222,6 +230,7 @@ struct link_entry int new_dynarec_hacks; int new_dynarec_hacks_pergame; + int new_dynarec_hacks_old; int new_dynarec_did_compile; #define HACK_ENABLED(x) ((new_dynarec_hacks | new_dynarec_hacks_pergame) & (x)) @@ -336,7 +345,7 @@ static void add_to_linker(void *addr, u_int target, int ext); static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override); static void *get_direct_memhandler(void *table, u_int addr, enum stub_type type, uintptr_t *addr_host); -static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist); +static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist); static void pass_args(int a0, int a1); static void emit_far_jump(const void *f); static void emit_far_call(const void *f); @@ -454,6 +463,7 @@ static void do_clear_cache(void) int cycle_multiplier; // 100 for 1.0 int cycle_multiplier_override; +int cycle_multiplier_old; static int CLOCK_ADJUST(int x) { @@ -905,7 +915,7 @@ static void host_tempreg_acquire(void) {} static void host_tempreg_release(void) {} #endif -#ifdef DRC_DBG +#ifdef ASSEM_PRINT extern void gen_interupt(); extern void do_insn_cmp(); #define FUNCNAME(f) { f, " " #f } @@ -929,7 +939,9 @@ static const struct { FUNCNAME(new_dyna_leave), FUNCNAME(pcsx_mtc0), FUNCNAME(pcsx_mtc0_ds), +#ifdef DRC_DBG FUNCNAME(do_insn_cmp), +#endif #ifdef __arm__ FUNCNAME(verify_code), #endif @@ -1600,6 +1612,12 @@ static void alloc_reg_temp(struct regstat *cur,int i,signed char reg) static void mov_alloc(struct regstat *current,int i) { + if (rs1[i] == HIREG || rs1[i] == LOREG) { + // logically this is needed but just won't work, no idea why + //alloc_cc(current,i); // for stalls + //dirty_reg(current,CCREG); + } + // Note: Don't need to actually alloc the source registers //alloc_reg(current,i,rs1[i]); alloc_reg(current,i,rt1[i]); @@ -1863,6 +1881,7 @@ void multdiv_alloc(struct regstat *current,int i) // case 0x1F: DDIVU clear_const(current,rs1[i]); clear_const(current,rs2[i]); + alloc_cc(current,i); // for stalls if(rs1[i]&&rs2[i]) { if((opcode2[i]&4)==0) // 32-bit @@ -3314,7 +3333,6 @@ static void log_gte_stall(int stall, u_int cycle) { if ((u_int)stall <= 44) printf("x stall %2d %u\n", stall, cycle + last_count); - if (cycle + last_count > 1215348544) exit(1); } static void emit_log_gte_stall(int i, int stall, u_int reglist) @@ -3330,14 +3348,13 @@ static void emit_log_gte_stall(int i, int stall, u_int reglist) } #endif -static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist) +static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist) { int j = i, other_gte_op_cycles = -1, stall = -MAXBLOCK, cycles_passed; int rtmp = reglist_find_free(reglist); - if (HACK_ENABLED(NDHACK_GTE_NO_STALL)) + if (HACK_ENABLED(NDHACK_NO_STALLS)) return; - //assert(get_reg(i_regs->regmap, CCREG) == HOST_CCREG); if (get_reg(i_regs->regmap, CCREG) != HOST_CCREG) { // happens occasionally... cc evicted? Don't bother then //printf("no cc %08x\n", start + i*4); @@ -3349,6 +3366,7 @@ static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, if (cop2_is_stalling_op(j, &other_gte_op_cycles) || bt[j]) break; } + j = max(j, 0); } cycles_passed = CLOCK_ADJUST(ccadj[i] - ccadj[j]); if (other_gte_op_cycles >= 0) @@ -3357,7 +3375,7 @@ static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, stall = 0; // can't stall if (stall == -MAXBLOCK && rtmp >= 0) { // unknown stall, do the expensive runtime check - assem_debug("; cop2_call_stall_check\n"); + assem_debug("; cop2_do_stall_check\n"); #if 0 // too slow save_regs(reglist); emit_movimm(gte_cycletab[op], 0); @@ -3415,6 +3433,98 @@ static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, host_tempreg_release(); } +static int is_mflohi(int i) +{ + return (itype[i] == MOV && (rs1[i] == HIREG || rs1[i] == LOREG)); +} + +static int check_multdiv(int i, int *cycles) +{ + if (itype[i] != MULTDIV) + return 0; + if (opcode2[i] == 0x18 || opcode2[i] == 0x19) // MULT(U) + *cycles = 11; // approx from 7 11 14 + else + *cycles = 37; + return 1; +} + +static void multdiv_prepare_stall(int i, const struct regstat *i_regs) +{ + int j, found = 0, c = 0; + if (HACK_ENABLED(NDHACK_NO_STALLS)) + return; + if (get_reg(i_regs->regmap, CCREG) != HOST_CCREG) { + // happens occasionally... cc evicted? Don't bother then + return; + } + for (j = i + 1; j < slen; j++) { + if (bt[j]) + break; + if ((found = is_mflohi(j))) + break; + if (is_jump(j)) { + // check ds + if (j + 1 < slen && (found = is_mflohi(j + 1))) + j++; + break; + } + } + if (found) + // handle all in multdiv_do_stall() + return; + check_multdiv(i, &c); + assert(c > 0); + assem_debug("; muldiv prepare stall %d\n", c); + host_tempreg_acquire(); + emit_addimm(HOST_CCREG, CLOCK_ADJUST(ccadj[i]) + c, HOST_TEMPREG); + emit_writeword(HOST_TEMPREG, &psxRegs.muldivBusyCycle); + host_tempreg_release(); +} + +static void multdiv_do_stall(int i, const struct regstat *i_regs) +{ + int j, known_cycles = 0; + u_int reglist = get_host_reglist(i_regs->regmap); + int rtmp = get_reg(i_regs->regmap, -1); + if (rtmp < 0) + rtmp = reglist_find_free(reglist); + if (HACK_ENABLED(NDHACK_NO_STALLS)) + return; + if (get_reg(i_regs->regmap, CCREG) != HOST_CCREG || rtmp < 0) { + // happens occasionally... cc evicted? Don't bother then + //printf("no cc/rtmp %08x\n", start + i*4); + return; + } + if (!bt[i]) { + for (j = i - 1; j >= 0; j--) { + if (is_ds[j]) break; + if (check_multdiv(j, &known_cycles) || bt[j]) + break; + if (is_mflohi(j)) + // already handled by this op + return; + } + j = max(j, 0); + } + if (known_cycles > 0) { + known_cycles -= CLOCK_ADJUST(ccadj[i] - ccadj[j]); + assem_debug("; muldiv stall resolved %d\n", known_cycles); + if (known_cycles > 0) + emit_addimm(HOST_CCREG, known_cycles, HOST_CCREG); + return; + } + assem_debug("; muldiv stall unresolved\n"); + host_tempreg_acquire(); + emit_readword(&psxRegs.muldivBusyCycle, rtmp); + emit_addimm(rtmp, -CLOCK_ADJUST(ccadj[i]), rtmp); + emit_sub(rtmp, HOST_CCREG, HOST_TEMPREG); + emit_cmpimm(HOST_TEMPREG, 37); + emit_cmovb_reg(rtmp, HOST_CCREG); + //emit_log_gte_stall(i, 0, reglist); + host_tempreg_release(); +} + static void cop2_get_dreg(u_int copr,signed char tl,signed char temp) { switch (copr) { @@ -3532,8 +3642,9 @@ static void c2ls_assemble(int i, const struct regstat *i_regs) if (!offset&&!c&&s>=0) ar=s; assert(ar>=0); + cop2_do_stall_check(0, i, i_regs, reglist); + if (opcode[i]==0x3a) { // SWC2 - cop2_call_stall_check(0, i, i_regs, reglist_exclude(reglist, tl, -1)); cop2_get_dreg(copr,tl,-1); type=STOREW_STUB; } @@ -3600,12 +3711,13 @@ static void cop2_assemble(int i, const struct regstat *i_regs) u_int copr = (source[i]>>11) & 0x1f; signed char temp = get_reg(i_regs->regmap, -1); - if (opcode2[i] == 0 || opcode2[i] == 2) { // MFC2/CFC2 - if (!HACK_ENABLED(NDHACK_GTE_NO_STALL)) { + if (!HACK_ENABLED(NDHACK_NO_STALLS)) { + u_int reglist = reglist_exclude(get_host_reglist(i_regs->regmap), temp, -1); + if (opcode2[i] == 0 || opcode2[i] == 2) { // MFC2/CFC2 signed char tl = get_reg(i_regs->regmap, rt1[i]); - u_int reglist = reglist_exclude(get_host_reglist(i_regs->regmap), tl, temp); - cop2_call_stall_check(0, i, i_regs, reglist); + reglist = reglist_exclude(reglist, tl, -1); } + cop2_do_stall_check(0, i, i_regs, reglist); } if (opcode2[i]==0) { // MFC2 signed char tl=get_reg(i_regs->regmap,rt1[i]); @@ -3753,6 +3865,8 @@ static void mov_assemble(int i,struct regstat *i_regs) else emit_loadreg(rs1[i],tl); } } + if (rs1[i] == HIREG || rs1[i] == LOREG) // MFHI/MFLO + multdiv_do_stall(i, i_regs); } // call interpreter, exception handler, things that change pc/regs/cycles ... @@ -3921,7 +4035,9 @@ static void ds_assemble(int i,struct regstat *i_regs) case C2OP: c2op_assemble(i,i_regs);break; case MULTDIV: - multdiv_assemble(i,i_regs);break; + multdiv_assemble(i,i_regs); + multdiv_prepare_stall(i,i_regs); + break; case MOV: mov_assemble(i,i_regs);break; case SYSCALL: @@ -4577,7 +4693,9 @@ static void ds_assemble_entry(int i) case C2OP: c2op_assemble(t,®s[t]);break; case MULTDIV: - multdiv_assemble(t,®s[t]);break; + multdiv_assemble(t,®s[t]); + multdiv_prepare_stall(i,®s[t]); + break; case MOV: mov_assemble(t,®s[t]);break; case SYSCALL: @@ -5921,7 +6039,9 @@ static void pagespan_ds() case C2OP: c2op_assemble(0,®s[0]);break; case MULTDIV: - multdiv_assemble(0,®s[0]);break; + multdiv_assemble(0,®s[0]); + multdiv_prepare_stall(0,®s[0]); + break; case MOV: mov_assemble(0,®s[0]);break; case SYSCALL: @@ -6731,6 +6851,9 @@ void new_dynarec_clear_full(void) for(n=0;n<4096;n++) ll_clear(jump_in+n); for(n=0;n<4096;n++) ll_clear(jump_out+n); for(n=0;n<4096;n++) ll_clear(jump_dirty+n); + + cycle_multiplier_old = cycle_multiplier; + new_dynarec_hacks_old = new_dynarec_hacks; } void new_dynarec_init(void) @@ -8105,7 +8228,7 @@ int new_recompile_block(u_int addr) // this should really be removed since the real stalls have been implemented, // but doing so causes sizeable perf regression against the older version u_int gtec = gte_cycletab[source[i] & 0x3f]; - cc += HACK_ENABLED(NDHACK_GTE_NO_STALL) ? gtec/2 : 2; + cc += HACK_ENABLED(NDHACK_NO_STALLS) ? gtec/2 : 2; } else if(i>1&&itype[i]==STORE&&itype[i-1]==STORE&&itype[i-2]==STORE&&!bt[i]) { @@ -8114,7 +8237,7 @@ int new_recompile_block(u_int addr) else if(itype[i]==C2LS) { // same as with C2OP - cc += HACK_ENABLED(NDHACK_GTE_NO_STALL) ? 4 : 2; + cc += HACK_ENABLED(NDHACK_NO_STALLS) ? 4 : 2; } #endif else @@ -9094,7 +9217,9 @@ int new_recompile_block(u_int addr) case C2OP: c2op_assemble(i,®s[i]);break; case MULTDIV: - multdiv_assemble(i,®s[i]);break; + multdiv_assemble(i,®s[i]); + multdiv_prepare_stall(i,®s[i]); + break; case MOV: mov_assemble(i,®s[i]);break; case SYSCALL: diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index bff1c1641..b9a3c67cc 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -6,14 +6,16 @@ extern int stop; extern int new_dynarec_did_compile; extern int cycle_multiplier; // 100 for 1.0 extern int cycle_multiplier_override; +extern int cycle_multiplier_old; #define NDHACK_NO_SMC_CHECK (1<<0) #define NDHACK_GTE_UNNEEDED (1<<1) #define NDHACK_GTE_NO_FLAGS (1<<2) #define NDHACK_OVERRIDE_CYCLE_M (1<<3) -#define NDHACK_GTE_NO_STALL (1<<4) +#define NDHACK_NO_STALLS (1<<4) extern int new_dynarec_hacks; extern int new_dynarec_hacks_pergame; +extern int new_dynarec_hacks_old; void new_dynarec_init(void); void new_dynarec_cleanup(void); diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index c9d300aaa..2dd91cf1c 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -133,6 +133,7 @@ typedef struct { boolean UseNet; boolean VSyncWA; boolean icache_emulation; + boolean DisableStalls; u8 Cpu; // CPU_DYNAREC or CPU_INTERPRETER u8 PsxType; // PSX_TYPE_NTSC or PSX_TYPE_PAL #ifdef _WIN32 diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index b171b0a6f..2dd90b0f0 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -27,6 +27,7 @@ #include "psxhle.h" #include "debug.h" #include "psxinterpreter.h" +#include static int branch = 0; static int branch2 = 0; @@ -610,6 +611,11 @@ void psxDIV() { } } +void psxDIV_stall() { + psxRegs.muldivBusyCycle = psxRegs.cycle + 37; + psxDIV(); +} + void psxDIVU() { if (_rRt_ != 0) { _rLo_ = _rRs_ / _rRt_; @@ -621,6 +627,11 @@ void psxDIVU() { } } +void psxDIVU_stall() { + psxRegs.muldivBusyCycle = psxRegs.cycle + 37; + psxDIVU(); +} + void psxMULT() { u64 res = (s64)((s64)_i32(_rRs_) * (s64)_i32(_rRt_)); @@ -628,6 +639,15 @@ void psxMULT() { psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff); } +void psxMULT_stall() { + // approximate, but maybe good enough + u32 rs = _rRs_; + u32 lz = __builtin_clz(((rs ^ ((s32)rs >> 21)) | 1)); + u32 c = 7 + (2 - (lz / 11)) * 4; + psxRegs.muldivBusyCycle = psxRegs.cycle + c; + psxMULT(); +} + void psxMULTU() { u64 res = (u64)((u64)_u32(_rRs_) * (u64)_u32(_rRt_)); @@ -635,6 +655,14 @@ void psxMULTU() { psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff); } +void psxMULTU_stall() { + // approximate, but maybe good enough + u32 lz = __builtin_clz(_rRs_ | 1); + u32 c = 7 + (2 - (lz / 11)) * 4; + psxRegs.muldivBusyCycle = psxRegs.cycle + c; + psxMULTU(); +} + /********************************************************* * Register branch logic * * Format: OP rs, offset * @@ -678,6 +706,18 @@ void psxLUI() { if (!_Rt_) return; _u32(_rRt_) = psxRegs.code << 16; } // Upper void psxMFHI() { if (!_Rd_) return; _rRd_ = _rHi_; } // Rd = Hi void psxMFLO() { if (!_Rd_) return; _rRd_ = _rLo_; } // Rd = Lo +static void mflohiCheckStall(void) +{ + u32 left = psxRegs.muldivBusyCycle - psxRegs.cycle; + if (left <= 37) { + //printf("muldiv stall %u\n", left); + psxRegs.cycle = psxRegs.muldivBusyCycle; + } +} + +void psxMFHI_stall() { mflohiCheckStall(); psxMFHI(); } +void psxMFLO_stall() { mflohiCheckStall(); psxMFLO(); } + /********************************************************* * Move to GPR to HI/LO & Register jump * * Format: OP rs * @@ -934,9 +974,12 @@ void psxCOP0() { } void psxCOP2() { + psxCP2[_Funct_]((struct psxCP2Regs *)&psxRegs.CP2D); +} + +void psxCOP2_stall() { u32 f = _Funct_; - if (f != 0 || _Rs_ < 4) // not MTC2/CTC2 - gteCheckStall(f); + gteCheckStall(f); psxCP2[f]((struct psxCP2Regs *)&psxRegs.CP2D); } @@ -1073,6 +1116,40 @@ void intNotify (int note, void *data) { #endif } +void applyConfig() { + assert(psxBSC[18] == psxCOP2 || psxBSC[18] == psxCOP2_stall); + assert(psxBSC[50] == gteLWC2 || psxBSC[50] == gteLWC2_stall); + assert(psxBSC[58] == gteSWC2 || psxBSC[58] == gteSWC2_stall); + assert(psxSPC[16] == psxMFHI || psxSPC[16] == psxMFHI_stall); + assert(psxSPC[18] == psxMFLO || psxSPC[18] == psxMFLO_stall); + assert(psxSPC[24] == psxMULT || psxSPC[24] == psxMULT_stall); + assert(psxSPC[25] == psxMULTU || psxSPC[25] == psxMULTU_stall); + assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall); + assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall); + + if (Config.DisableStalls) { + psxBSC[18] = psxCOP2; + psxBSC[50] = gteLWC2; + psxBSC[58] = gteSWC2; + psxSPC[16] = psxMFHI; + psxSPC[18] = psxMFLO; + psxSPC[24] = psxMULT; + psxSPC[25] = psxMULTU; + psxSPC[26] = psxDIV; + psxSPC[27] = psxDIVU; + } else { + psxBSC[18] = psxCOP2_stall; + psxBSC[50] = gteLWC2_stall; + psxBSC[58] = gteSWC2_stall; + psxSPC[16] = psxMFHI_stall; + psxSPC[18] = psxMFLO_stall; + psxSPC[24] = psxMULT_stall; + psxSPC[25] = psxMULTU_stall; + psxSPC[26] = psxDIV_stall; + psxSPC[27] = psxDIVU_stall; + } +} + static void intShutdown() { #ifdef ICACHE_EMULATION if (ICache_Addr) @@ -1123,5 +1200,6 @@ R3000Acpu psxInt = { #ifdef ICACHE_EMULATION intNotify, #endif + applyConfig, intShutdown }; diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 54359159e..94d7d9555 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -47,6 +47,7 @@ typedef struct { #ifdef ICACHE_EMULATION void (*Notify)(int note, void *data); #endif + void (*ApplyConfig)(); void (*Shutdown)(); } R3000Acpu; @@ -194,9 +195,10 @@ typedef struct { u32 interrupt; struct { u32 sCycle, cycle; } intCycle[32]; u32 gteBusyCycle; + u32 muldivBusyCycle; // warning: changing anything in psxRegisters requires update of all // asm in libpcsxcore/new_dynarec/, but this member can be replaced - u32 reserved[3]; + u32 reserved[2]; } psxRegisters; extern psxRegisters psxRegs; From 2391c1b4a91068853fb2d783c8f7613658be4250 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 21 Nov 2021 16:44:35 +0200 Subject: [PATCH 079/597] frontend: remove src alignment requirements in asm no measurable perf improvement seen just from :64 alignment both on cortex-a8 and cortex-a72, and Psybadek uses unaligned vram location. --- frontend/cspace_neon.S | 8 ++++---- frontend/plat_pollux.c | 1 + frontend/plugin_lib.c | 2 ++ plugins/gpulib/vout_pl.c | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/frontend/cspace_neon.S b/frontend/cspace_neon.S index 56ab3044a..4cb3d4c8f 100644 --- a/frontend/cspace_neon.S +++ b/frontend/cspace_neon.S @@ -183,8 +183,8 @@ FUNCTION(bgr888_to_rgb888): @ dst, src, bytes umull r12,r2, r3, r2 0: pld [r1, #48*3] - vld3.8 {d0-d2}, [r1, :64]! - vld3.8 {d3-d5}, [r1, :64]! + vld3.8 {d0-d2}, [r1]! + vld3.8 {d3-d5}, [r1]! vswp d0, d2 vswp d3, d5 vst3.8 {d0-d2}, [r0, :64]! @@ -207,8 +207,8 @@ FUNCTION(bgr888_to_rgb565): @ dst, src, bytes vdup.16 q15, r3 0: pld [r1, #48*3] - vld3.8 {d1-d3}, [r1, :64]! - vld3.8 {d5-d7}, [r1, :64]! + vld3.8 {d1-d3}, [r1]! + vld3.8 {d5-d7}, [r1]! vshll.u8 q8, d2, #3 @ g vshll.u8 q9, d6, #3 diff --git a/frontend/plat_pollux.c b/frontend/plat_pollux.c index 18b805319..326a40f11 100644 --- a/frontend/plat_pollux.c +++ b/frontend/plat_pollux.c @@ -309,6 +309,7 @@ static void name(int doffs, const void *vram_, int w, int h, int sstride, int bg int i; \ \ vram += psx_offset_y * 1024 + psx_offset_x; \ + vram = (void *)((long)vram & ~3); \ for (i = psx_src_height; i > 0; i--, vram += psx_step * 1024, dst += dst_stride)\ blitfunc(dst, vram, len); \ } diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index ab4d4152e..c6a2bf0e1 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -396,6 +396,8 @@ static void pl_vout_flip(const void *vram, int stride, int bgr24, int w, int h) #endif else { + src = (void *)((uintptr_t)src & ~3); // align for the blitter + for (; h1-- > 0; dest += dstride * 2, src += stride) { bgr555_to_rgb565(dest, src, w * 2); diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index a9437cb14..d1fdefbc5 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -55,7 +55,7 @@ static void check_mode_change(int force) void vout_update(void) { - int x = gpu.screen.x & ~1; // alignment needed by blitter + int x = gpu.screen.x; int y = gpu.screen.y; int w = gpu.screen.w; int h = gpu.screen.h; From 943f42f30d63217a6ce07b43345b93126e9e4ed0 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 22 Nov 2021 20:20:36 +0200 Subject: [PATCH 080/597] drc: fix block expire was shifting pointers that are not necessarily aligned --- libpcsxcore/new_dynarec/new_dynarec.c | 49 +++++++++++++++------------ 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index f45322a8f..bd553b88d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -1095,12 +1095,14 @@ void remove_hash(int vaddr) } } -void ll_remove_matching_addrs(struct ll_entry **head,uintptr_t addr,int shift) +static void ll_remove_matching_addrs(struct ll_entry **head, + uintptr_t base_offs_s, int shift) { struct ll_entry *next; while(*head) { - if(((uintptr_t)((*head)->addr)>>shift)==(addr>>shift) || - ((uintptr_t)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)) + uintptr_t o1 = (u_char *)(*head)->addr - ndrc->translation_cache; + uintptr_t o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; + if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) { inv_debug("EXP: Remove pointer to %p (%x)\n",(*head)->addr,(*head)->vaddr); remove_hash((*head)->vaddr); @@ -1131,13 +1133,15 @@ void ll_clear(struct ll_entry **head) } // Dereference the pointers and remove if it matches -static void ll_kill_pointers(struct ll_entry *head,uintptr_t addr,int shift) +static void ll_kill_pointers(struct ll_entry *head, + uintptr_t base_offs_s, int shift) { while(head) { - uintptr_t ptr = (uintptr_t)get_pointer(head->addr); - inv_debug("EXP: Lookup pointer to %lx at %p (%x)\n",(long)ptr,head->addr,head->vaddr); - if(((ptr>>shift)==(addr>>shift)) || - (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))) + u_char *ptr = get_pointer(head->addr); + uintptr_t o1 = ptr - ndrc->translation_cache; + uintptr_t o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; + inv_debug("EXP: Lookup pointer to %p at %p (%x)\n",ptr,head->addr,head->vaddr); + if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) { inv_debug("EXP: Kill pointer at %p (%x)\n",head->addr,head->vaddr); void *host_addr=find_extjump_insn(head->addr); @@ -9414,34 +9418,37 @@ int new_recompile_block(u_int addr) while(expirep!=end) { int shift=TARGET_SIZE_2-3; // Divide into 8 blocks - uintptr_t base=(uintptr_t)ndrc->translation_cache+((expirep>>13)<> 13) << shift); // Base offset of this block + uintptr_t base_offs_s = base_offs >> shift; inv_debug("EXP: Phase %d\n",expirep); switch((expirep>>11)&3) { case 0: // Clear jump_in and jump_dirty - ll_remove_matching_addrs(jump_in+(expirep&2047),base,shift); - ll_remove_matching_addrs(jump_dirty+(expirep&2047),base,shift); - ll_remove_matching_addrs(jump_in+2048+(expirep&2047),base,shift); - ll_remove_matching_addrs(jump_dirty+2048+(expirep&2047),base,shift); + ll_remove_matching_addrs(jump_in+(expirep&2047),base_offs_s,shift); + ll_remove_matching_addrs(jump_dirty+(expirep&2047),base_offs_s,shift); + ll_remove_matching_addrs(jump_in+2048+(expirep&2047),base_offs_s,shift); + ll_remove_matching_addrs(jump_dirty+2048+(expirep&2047),base_offs_s,shift); break; case 1: // Clear pointers - ll_kill_pointers(jump_out[expirep&2047],base,shift); - ll_kill_pointers(jump_out[(expirep&2047)+2048],base,shift); + ll_kill_pointers(jump_out[expirep&2047],base_offs_s,shift); + ll_kill_pointers(jump_out[(expirep&2047)+2048],base_offs_s,shift); break; case 2: // Clear hash table for(i=0;i<32;i++) { struct ht_entry *ht_bin = &hash_table[((expirep&2047)<<5)+i]; - if (((uintptr_t)ht_bin->tcaddr[1]>>shift) == (base>>shift) || - (((uintptr_t)ht_bin->tcaddr[1]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) { + uintptr_t o1 = (u_char *)ht_bin->tcaddr[1] - ndrc->translation_cache; + uintptr_t o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; + if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) { inv_debug("EXP: Remove hash %x -> %p\n",ht_bin->vaddr[1],ht_bin->tcaddr[1]); ht_bin->vaddr[1] = -1; ht_bin->tcaddr[1] = NULL; } - if (((uintptr_t)ht_bin->tcaddr[0]>>shift) == (base>>shift) || - (((uintptr_t)ht_bin->tcaddr[0]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) { + o1 = (u_char *)ht_bin->tcaddr[0] - ndrc->translation_cache; + o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; + if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) { inv_debug("EXP: Remove hash %x -> %p\n",ht_bin->vaddr[0],ht_bin->tcaddr[0]); ht_bin->vaddr[0] = ht_bin->vaddr[1]; ht_bin->tcaddr[0] = ht_bin->tcaddr[1]; @@ -9454,8 +9461,8 @@ int new_recompile_block(u_int addr) // Clear jump_out if((expirep&2047)==0) do_clear_cache(); - ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift); - ll_remove_matching_addrs(jump_out+2048+(expirep&2047),base,shift); + ll_remove_matching_addrs(jump_out+(expirep&2047),base_offs_s,shift); + ll_remove_matching_addrs(jump_out+2048+(expirep&2047),base_offs_s,shift); break; } expirep=(expirep+1)&65535; From 3d680478922d5f28e3dbe471308cc27a70e31fdf Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 22 Nov 2021 20:55:37 +0200 Subject: [PATCH 081/597] drc: minor adjustments like not marking INTCALL as compiled code --- libpcsxcore/new_dynarec/assem_arm.c | 16 +++++----- libpcsxcore/new_dynarec/assem_arm64.c | 22 +++++++------- libpcsxcore/new_dynarec/linkage_arm.S | 6 ++-- libpcsxcore/new_dynarec/new_dynarec.c | 42 +++++++++++++++++---------- 4 files changed, 49 insertions(+), 37 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index d68aea6cb..4ccd19fe7 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -206,7 +206,7 @@ static void *get_pointer(void *stub) { //printf("get_pointer(%x)\n",(int)stub); int *i_ptr=find_extjump_insn(stub); - assert((*i_ptr&0x0f000000)==0x0a000000); + assert((*i_ptr&0x0f000000)==0x0a000000); // b return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8; } @@ -1946,26 +1946,26 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, } // this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr -static void do_dirty_stub_emit_args(u_int arg0) +static void do_dirty_stub_emit_args(u_int arg0, u_int source_len) { #ifndef HAVE_ARMV7 emit_loadlp((int)source, 1); emit_loadlp((int)copy, 2); - emit_loadlp(slen*4, 3); + emit_loadlp(source_len, 3); #else emit_movw(((u_int)source)&0x0000FFFF, 1); emit_movw(((u_int)copy)&0x0000FFFF, 2); emit_movt(((u_int)source)&0xFFFF0000, 1); emit_movt(((u_int)copy)&0xFFFF0000, 2); - emit_movw(slen*4, 3); + emit_movw(source_len, 3); #endif emit_movimm(arg0, 0); } -static void *do_dirty_stub(int i) +static void *do_dirty_stub(int i, u_int source_len) { assem_debug("do_dirty_stub %x\n",start+i*4); - do_dirty_stub_emit_args(start + i*4); + do_dirty_stub_emit_args(start + i*4, source_len); emit_far_call(verify_code); void *entry = out; load_regs_entry(i); @@ -1975,9 +1975,9 @@ static void *do_dirty_stub(int i) return entry; } -static void do_dirty_stub_ds() +static void do_dirty_stub_ds(u_int source_len) { - do_dirty_stub_emit_args(start + 1); + do_dirty_stub_emit_args(start + 1, source_len); emit_far_call(verify_code_ds); } diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 070c80fc7..14d715634 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -44,7 +44,7 @@ static void set_jump_target(void *addr, void *target) || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz // Conditional branch are limited to +/- 1MB // block max size is 256k so branching beyond the +/- 1MB limit - // should only happen when jumping to an already compiled block (see add_link) + // should only happen when jumping to an already compiled block (see add_jump_out) // a workaround would be to do a trampoline jump via a stub at the end of the block assert(-1048576 <= offset && offset < 1048576); *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5); @@ -1665,12 +1665,12 @@ static int verify_code_arm64(const void *source, const void *copy, u_int size) } // this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr -static void do_dirty_stub_base(u_int vaddr) +static void do_dirty_stub_base(u_int vaddr, u_int source_len) { - assert(slen <= MAXBLOCK); + assert(source_len <= MAXBLOCK*4); emit_loadlp_ofs(0, 0); // ldr x1, source emit_loadlp_ofs(0, 1); // ldr x2, copy - emit_movz(slen*4, 2); + emit_movz(source_len, 2); emit_far_call(verify_code_arm64); void *jmp = out; emit_cbz(0, 0); @@ -1685,7 +1685,7 @@ static void assert_dirty_stub(const u_int *ptr) { assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy - assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #slen*4 + assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #source_len assert( ptr[8] == 0xd61f0000); // br x0 } @@ -1706,11 +1706,11 @@ static void do_dirty_stub_emit_literals(u_int *loadlps) output_w64((uintptr_t)copy); } -static void *do_dirty_stub(int i) +static void *do_dirty_stub(int i, u_int source_len) { assem_debug("do_dirty_stub %x\n",start+i*4); u_int *loadlps = (void *)out; - do_dirty_stub_base(start + i*4); + do_dirty_stub_base(start + i*4, source_len); void *entry = out; load_regs_entry(i); if (entry == out) @@ -1720,10 +1720,10 @@ static void *do_dirty_stub(int i) return entry; } -static void do_dirty_stub_ds(void) +static void do_dirty_stub_ds(u_int source_len) { u_int *loadlps = (void *)out; - do_dirty_stub_base(start + 1); + do_dirty_stub_base(start + 1, source_len); void *lit_jumpover = out; emit_jmp(out + 8*2); do_dirty_stub_emit_literals(loadlps); @@ -1760,7 +1760,7 @@ static int verify_dirty(const u_int *ptr) assert_dirty_stub(ptr); source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy - len = get_from_movz(&ptr[2]); // movz w3, #slen*4 + len = get_from_movz(&ptr[2]); // movz w3, #source_len return !memcmp(source, copy, len); } @@ -1780,7 +1780,7 @@ static void get_bounds(void *addr, u_char **start, u_char **end) const u_int *ptr = addr; assert_dirty_stub(ptr); *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source - *end = *start + get_from_movz(&ptr[2]); // movz w3, #slen*4 + *end = *start + get_from_movz(&ptr[2]); // movz w3, #source_len } /* Special assem */ diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index f18488ce6..970d91c70 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -26,7 +26,7 @@ #ifdef __MACH__ #define dynarec_local ESYM(dynarec_local) -#define add_link ESYM(add_link) +#define add_jump_out ESYM(add_jump_out) #define new_recompile_block ESYM(new_recompile_block) #define get_addr ESYM(get_addr) #define get_addr_ht ESYM(get_addr_ht) @@ -177,7 +177,7 @@ ptr_hash_table: orrcs r2, r6, #2048 ldr r5, [r3, r2, lsl #2] lsl r12, r12, #8 - add r6, r1, r12, asr #6 + add r6, r1, r12, asr #6 /* old target */ mov r8, #0 /* jump_in lookup */ 1: @@ -197,7 +197,7 @@ ptr_hash_table: mov r5, r1 mov r1, r6 - bl add_link + bl add_jump_out sub r2, r8, r5 and r1, r7, #0xff000000 lsl r2, r2, #6 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index bd553b88d..f81c98536 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -1172,7 +1172,7 @@ static void invalidate_page(u_int page) inv_debug("INVALIDATE: kill pointer to %x (%p)\n",head->vaddr,head->addr); void *host_addr=find_extjump_insn(head->addr); mark_clear_cache(host_addr); - set_jump_target(host_addr, head->addr); + set_jump_target(host_addr, head->addr); // point back to dyna_linker next=head->next; free(head); head=next; @@ -1321,14 +1321,13 @@ static void do_invstub(int n) // Add an entry to jump_out after making a link // src should point to code by emit_extjump2() -void add_link(u_int vaddr,void *src) +void add_jump_out(u_int vaddr,void *src) { u_int page=get_page(vaddr); - inv_debug("add_link: %p -> %x (%d)\n",src,vaddr,page); + inv_debug("add_jump_out: %p -> %x (%d)\n",src,vaddr,page); check_extjump2(src); ll_add(jump_out+page,vaddr,src); - //void *ptr=get_pointer(src); - //inv_debug("add_link: Pointer is to %p\n",ptr); + //inv_debug("add_jump_out: to %p\n",get_pointer(src)); } // If a code block was found to be unmodified (bit was set in @@ -5972,7 +5971,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) emit_extjump_ds(branch_addr, target_addr); if(compiled_target_addr) { set_jump_target(branch_addr, compiled_target_addr); - add_link(target_addr,stub); + add_jump_out(target_addr,stub); } else set_jump_target(branch_addr, stub); if(likely[i]) { @@ -5987,7 +5986,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) emit_extjump_ds(branch_addr, target_addr); if(compiled_target_addr) { set_jump_target(branch_addr, compiled_target_addr); - add_link(target_addr,stub); + add_jump_out(target_addr,stub); } else set_jump_target(branch_addr, stub); } @@ -6001,7 +6000,7 @@ static void pagespan_ds() u_int page=get_page(vaddr); u_int vpage=get_vpage(vaddr); ll_add(jump_dirty+vpage,vaddr,(void *)out); - do_dirty_stub_ds(); + do_dirty_stub_ds(slen*4); ll_add(jump_in+page,vaddr,(void *)out); assert(regs[0].regmap_entry[HOST_CCREG]==CCREG); if(regs[0].regmap[HOST_CCREG]!=CCREG) @@ -9249,10 +9248,14 @@ int new_recompile_block(u_int addr) literal_pool_jumpover(256); } } - //assert(is_ujump(i-2)); + + assert(slen > 0); + if (itype[slen-1] == INTCALL) { + // no ending needed for this block since INTCALL never returns + } // If the block did not end with an unconditional branch, // add a jump to the next instruction. - if(i>1) { + else if (i > 1) { if(!is_ujump(i-2)&&itype[i-1]!=SPAN) { assert(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP); assert(i==slen); @@ -9332,7 +9335,7 @@ int new_recompile_block(u_int addr) emit_extjump(link_addr[i].addr, link_addr[i].target); if (addr) { set_jump_target(link_addr[i].addr, addr); - add_link(link_addr[i].target,stub); + add_jump_out(link_addr[i].target,stub); } else set_jump_target(link_addr[i].addr, stub); @@ -9350,8 +9353,17 @@ int new_recompile_block(u_int addr) //#endif } } + + u_int source_len = slen*4; + if (itype[slen-1] == INTCALL && source_len > 4) + // no need to treat the last instruction as compiled + // as interpreter fully handles it + source_len -= 4; + + if ((u_char *)copy + source_len > (u_char *)shadow + sizeof(shadow)) + copy = shadow; + // External Branch Targets (jump_in) - if(copy+slen*4>(void *)shadow+sizeof(shadow)) copy=shadow; for(i=0;i Date: Mon, 22 Nov 2021 21:08:17 +0200 Subject: [PATCH 082/597] drc: detect unconditional branches early Mainly helps to end the block and not compile data. Unsure if this doesn't break something. --- libpcsxcore/new_dynarec/new_dynarec.c | 47 +++++++++++++++++++-------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index f81c98536..d6a760347 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -485,6 +485,11 @@ static int is_jump(int i) return itype[i] == RJUMP || itype[i] == UJUMP || itype[i] == CJUMP || itype[i] == SJUMP; } +static int ds_writes_rjump_rs(int i) +{ + return rs1[i] != 0 && (rs1[i] == rt1[i+1] || rs1[i] == rt2[i+1]); +} + static u_int get_page(u_int vaddr) { u_int page=vaddr&~0xe0000000; @@ -4964,7 +4969,7 @@ static void do_ccstub(int n) if(itype[i]==RJUMP) { int r=get_reg(branch_regs[i].regmap,rs1[i]); - if(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]) { + if (ds_writes_rjump_rs(i)) { r=get_reg(branch_regs[i].regmap,RTEMP); } emit_writeword(r,&pcaddr); @@ -5122,7 +5127,7 @@ static void rjump_assemble(int i,struct regstat *i_regs) int ra_done=0; rs=get_reg(branch_regs[i].regmap,rs1[i]); assert(rs>=0); - if(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]) { + if (ds_writes_rjump_rs(i)) { // Delay slot abuse, make a copy of the branch address register temp=get_reg(branch_regs[i].regmap,RTEMP); assert(temp>=0); @@ -7193,18 +7198,18 @@ int new_recompile_block(u_int addr) { case 0x00: strcpy(insn[i],"BLTZ"); type=SJUMP; break; case 0x01: strcpy(insn[i],"BGEZ"); type=SJUMP; break; - case 0x02: strcpy(insn[i],"BLTZL"); type=SJUMP; break; - case 0x03: strcpy(insn[i],"BGEZL"); type=SJUMP; break; - case 0x08: strcpy(insn[i],"TGEI"); type=NI; break; - case 0x09: strcpy(insn[i],"TGEIU"); type=NI; break; - case 0x0A: strcpy(insn[i],"TLTI"); type=NI; break; - case 0x0B: strcpy(insn[i],"TLTIU"); type=NI; break; - case 0x0C: strcpy(insn[i],"TEQI"); type=NI; break; - case 0x0E: strcpy(insn[i],"TNEI"); type=NI; break; + //case 0x02: strcpy(insn[i],"BLTZL"); type=SJUMP; break; + //case 0x03: strcpy(insn[i],"BGEZL"); type=SJUMP; break; + //case 0x08: strcpy(insn[i],"TGEI"); type=NI; break; + //case 0x09: strcpy(insn[i],"TGEIU"); type=NI; break; + //case 0x0A: strcpy(insn[i],"TLTI"); type=NI; break; + //case 0x0B: strcpy(insn[i],"TLTIU"); type=NI; break; + //case 0x0C: strcpy(insn[i],"TEQI"); type=NI; break; + //case 0x0E: strcpy(insn[i],"TNEI"); type=NI; break; case 0x10: strcpy(insn[i],"BLTZAL"); type=SJUMP; break; case 0x11: strcpy(insn[i],"BGEZAL"); type=SJUMP; break; - case 0x12: strcpy(insn[i],"BLTZALL"); type=SJUMP; break; - case 0x13: strcpy(insn[i],"BGEZALL"); type=SJUMP; break; + //case 0x12: strcpy(insn[i],"BLTZALL"); type=SJUMP; break; + //case 0x13: strcpy(insn[i],"BGEZALL"); type=SJUMP; break; } break; case 0x02: strcpy(insn[i],"J"); type=UJUMP; break; @@ -7532,10 +7537,23 @@ int new_recompile_block(u_int addr) else if(type==CJUMP||type==SJUMP) ba[i]=start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14); else ba[i]=-1; + + /* simplify always (not)taken branches */ + if (type == CJUMP && rs1[i] == rs2[i]) { + rs1[i] = rs2[i] = 0; + if (!(op & 1)) { + itype[i] = type = UJUMP; + rs2[i] = CCREG; + } + } + else if (type == SJUMP && rs1[i] == 0 && (op2 & 1)) + itype[i] = type = UJUMP; + + /* messy cases to just pass over to the interpreter */ if (i > 0 && is_jump(i-1)) { int do_in_intrp=0; // branch in delay slot? - if(type==RJUMP||type==UJUMP||type==CJUMP||type==SJUMP) { + if (is_jump(i)) { // don't handle first branch and call interpreter if it's hit SysPrintf("branch in delay slot @%08x (%08x)\n", addr + i*4, addr); do_in_intrp=1; @@ -7565,6 +7583,7 @@ int new_recompile_block(u_int addr) i--; // don't compile the DS } } + /* Is this the end of the block? */ if (i > 0 && is_ujump(i-1)) { if(rt1[i-1]==0) { // Continue past subroutine call (JAL) @@ -7751,7 +7770,7 @@ int new_recompile_block(u_int addr) clear_const(¤t,rt1[i]); alloc_cc(¤t,i); dirty_reg(¤t,CCREG); - if(rs1[i]!=rt1[i+1]&&rs1[i]!=rt2[i+1]) { + if (!ds_writes_rjump_rs(i)) { alloc_reg(¤t,i,rs1[i]); if (rt1[i]!=0) { alloc_reg(¤t,i,rt1[i]); From 61ad2a6193b343bed12af5400746254583339304 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 22 Nov 2021 21:29:08 +0200 Subject: [PATCH 083/597] make icache implementation play nice with the dynarec No need to to have a build-time option now. --- configure | 12 --- frontend/menu.c | 17 ++-- libpcsxcore/new_dynarec/emu_if.c | 7 +- libpcsxcore/new_dynarec/patches/trace_intr | 12 --- libpcsxcore/psxbios.c | 2 - libpcsxcore/psxinterpreter.c | 92 ++++++---------------- libpcsxcore/psxinterpreter.h | 3 + libpcsxcore/psxmem.c | 4 - libpcsxcore/r3000a.c | 16 +--- libpcsxcore/r3000a.h | 5 -- 10 files changed, 43 insertions(+), 127 deletions(-) diff --git a/configure b/configure index 16f510c7e..7b3002591 100755 --- a/configure +++ b/configure @@ -59,7 +59,6 @@ need_sdl="no" need_xlib="no" need_libpicofe="yes" need_warm="no" -enable_icache_emu="yes" CFLAGS_GLES="" LDLIBS_GLES="" # these are for known platforms @@ -95,14 +94,12 @@ set_platform() optimize_cortexa8="yes" have_arm_neon="yes" need_xlib="yes" - enable_icache_emu="no" ;; maemo) ram_fixed="yes" drc_cache_base="yes" optimize_cortexa8="yes" have_arm_neon="yes" - enable_icache_emu="no" ;; caanoo) sound_drivers="oss" @@ -110,7 +107,6 @@ set_platform() drc_cache_base="yes" optimize_arm926ej="yes" need_warm="yes" - enable_icache_emu="no" ;; libretro) sound_drivers="libretro" @@ -139,10 +135,6 @@ for opt do ;; --disable-dynarec) enable_dynarec="no" ;; - --disable-icache-emu) enable_icache_emu="no" - ;; - --enable-icache-emu) enable_icache_emu="yes" - ;; *) echo "ERROR: unknown option $opt"; show_help="yes" ;; esac @@ -504,10 +496,6 @@ if [ "x$sizeof_long" = "x4" ]; then CFLAGS="$CFLAGS -D_FILE_OFFSET_BITS=64" fi -if [ "$enable_icache_emu" = "yes" ]; then - CFLAGS="$CFLAGS -DICACHE_EMULATION" -fi - cat > $TMPC <Notify(R3000ACPU_NOTIFY_CACHE_ISOLATED, NULL); psxCpu->Notify(R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL); -#endif pc0 = ra; } diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 2dd90b0f0..d2225c417 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -49,14 +49,19 @@ void (*psxCP0[32])(); void (*psxCP2[64])(struct psxCP2Regs *regs); void (*psxCP2BSC[32])(); -#ifdef ICACHE_EMULATION +static u32 fetchNoCache(u32 pc) +{ + u32 *code = (u32 *)PSXM(pc); + return ((code == NULL) ? 0 : SWAP32(*code)); +} + /* Formula One 2001 : Use old CPU cache code when the RAM location is updated with new code (affects in-game racing) */ static u8* ICache_Addr; static u8* ICache_Code; -uint32_t *Read_ICache(uint32_t pc) +static u32 fetchICache(u32 pc) { uint32_t pc_bank, pc_offset, pc_cache; uint8_t *IAddr, *ICode; @@ -74,7 +79,7 @@ uint32_t *Read_ICache(uint32_t pc) if (SWAP32(*(uint32_t *)(IAddr + pc_cache)) == pc_offset) { // Cache hit - return last opcode used - return (uint32_t *)(ICode + pc_cache); + return *(uint32_t *)(ICode + pc_cache); } else { @@ -104,9 +109,10 @@ uint32_t *Read_ICache(uint32_t pc) TODO: Probably should add cached BIOS */ // default - return (uint32_t *)PSXM(pc); + return fetchNoCache(pc); } -#endif + +u32 (*fetch)(u32 pc) = fetchNoCache; static void delayRead(int reg, u32 bpc) { u32 rold, rnew; @@ -322,21 +328,7 @@ int psxTestLoadDelay(int reg, u32 tmp) { } void psxDelayTest(int reg, u32 bpc) { - u32 *code; - u32 tmp; - - #ifdef ICACHE_EMULATION - if (Config.icache_emulation) - { - code = Read_ICache(psxRegs.pc); - } - else - #endif - { - code = (u32 *)PSXM(psxRegs.pc); - } - - tmp = ((code == NULL) ? 0 : SWAP32(*code)); + u32 tmp = fetch(psxRegs.pc); branch = 1; switch (psxTestLoadDelay(reg, tmp)) { @@ -356,20 +348,9 @@ void psxDelayTest(int reg, u32 bpc) { } static u32 psxBranchNoDelay(void) { - u32 *code; u32 temp; - #ifdef ICACHE_EMULATION - if (Config.icache_emulation) - { - code = Read_ICache(psxRegs.pc); - } - else - #endif - { - code = (u32 *)PSXM(psxRegs.pc); - } - psxRegs.code = ((code == NULL) ? 0 : SWAP32(*code)); + psxRegs.code = fetch(psxRegs.pc); switch (_Op_) { case 0x00: // SPECIAL switch (_Funct_) { @@ -487,7 +468,6 @@ static int psxDelayBranchTest(u32 tar1) { } static void doBranch(u32 tar) { - u32 *code; u32 tmp; branch2 = branch = 1; @@ -497,17 +477,7 @@ static void doBranch(u32 tar) { if (psxDelayBranchTest(tar)) return; - #ifdef ICACHE_EMULATION - if (Config.icache_emulation) - { - code = Read_ICache(psxRegs.pc); - } - else - #endif - { - code = (u32 *)PSXM(psxRegs.pc); - } - psxRegs.code = ((code == NULL) ? 0 : SWAP32(*code)); + psxRegs.code = fetch(psxRegs.pc); debugI(); @@ -1057,7 +1027,6 @@ void (*psxCP2BSC[32])() = { /////////////////////////////////////////// static int intInit() { - #ifdef ICACHE_EMULATION /* We have to allocate the icache memory even if * the user has not enabled it as otherwise it can cause issues. */ @@ -1080,15 +1049,12 @@ static int intInit() { } memset(ICache_Addr, 0xff, 0x1000); memset(ICache_Code, 0xff, 0x1000); - #endif return 0; } static void intReset() { - #ifdef ICACHE_EMULATION memset(ICache_Addr, 0xff, 0x1000); memset(ICache_Code, 0xff, 0x1000); - #endif } void intExecute() { @@ -1106,17 +1072,15 @@ static void intClear(u32 Addr, u32 Size) { } void intNotify (int note, void *data) { - #ifdef ICACHE_EMULATION /* Gameblabla - Only clear the icache if it's isolated */ if (note == R3000ACPU_NOTIFY_CACHE_ISOLATED) { memset(ICache_Addr, 0xff, 0x1000); memset(ICache_Code, 0xff, 0x1000); } - #endif } -void applyConfig() { +void intApplyConfig() { assert(psxBSC[18] == psxCOP2 || psxBSC[18] == psxCOP2_stall); assert(psxBSC[50] == gteLWC2 || psxBSC[50] == gteLWC2_stall); assert(psxBSC[58] == gteSWC2 || psxBSC[58] == gteSWC2_stall); @@ -1148,10 +1112,16 @@ void applyConfig() { psxSPC[26] = psxDIV_stall; psxSPC[27] = psxDIVU_stall; } + + // dynarec may occasionally call the interpreter, in such a case the + // cache won't work (cache only works right if all fetches go through it) + if (!Config.icache_emulation || psxCpu != &psxInt) + fetch = fetchNoCache; + else + fetch = fetchICache; } static void intShutdown() { - #ifdef ICACHE_EMULATION if (ICache_Addr) { free(ICache_Addr); @@ -1163,23 +1133,11 @@ static void intShutdown() { free(ICache_Code); ICache_Code = NULL; } - #endif } // interpreter execution void execI() { - u32 *code; - #ifdef ICACHE_EMULATION - if (Config.icache_emulation) - { - code = Read_ICache(psxRegs.pc); - } - else - #endif - { - code = (u32 *)PSXM(psxRegs.pc); - } - psxRegs.code = ((code == NULL) ? 0 : SWAP32(*code)); + psxRegs.code = fetch(psxRegs.pc); debugI(); @@ -1197,9 +1155,7 @@ R3000Acpu psxInt = { intExecute, intExecuteBlock, intClear, -#ifdef ICACHE_EMULATION intNotify, -#endif - applyConfig, + intApplyConfig, intShutdown }; diff --git a/libpcsxcore/psxinterpreter.h b/libpcsxcore/psxinterpreter.h index 1c97689f1..89dd7ea16 100644 --- a/libpcsxcore/psxinterpreter.h +++ b/libpcsxcore/psxinterpreter.h @@ -1,4 +1,7 @@ +extern u32 (*fetch)(u32 pc); + // called by "new_dynarec" void execI(); void psxNULL(); +void intApplyConfig(); diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index c09965dc1..52d275937 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -389,10 +389,8 @@ void psxMemWrite32(u32 mem, u32 value) { memset(psxMemWLUT + 0x0000, 0, 0x80 * sizeof(void *)); memset(psxMemWLUT + 0x8000, 0, 0x80 * sizeof(void *)); memset(psxMemWLUT + 0xa000, 0, 0x80 * sizeof(void *)); -#ifdef ICACHE_EMULATION /* Required for icache interpreter otherwise Armored Core won't boot on icache interpreter */ psxCpu->Notify(R3000ACPU_NOTIFY_CACHE_ISOLATED, NULL); -#endif break; case 0x00: case 0x1e988: if (writeok == 1) break; @@ -400,10 +398,8 @@ void psxMemWrite32(u32 mem, u32 value) { for (i = 0; i < 0x80; i++) psxMemWLUT[i + 0x0000] = (void *)&psxM[(i & 0x1f) << 16]; memcpy(psxMemWLUT + 0x8000, psxMemWLUT, 0x80 * sizeof(void *)); memcpy(psxMemWLUT + 0xa000, psxMemWLUT, 0x80 * sizeof(void *)); -#ifdef ICACHE_EMULATION /* Dynarecs might take this opportunity to flush their code cache */ psxCpu->Notify(R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL); -#endif break; default: #ifdef PSXMEM_LOG diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index e21d48832..7e6f16b48 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -25,6 +25,7 @@ #include "cdrom.h" #include "mdec.h" #include "gte.h" +#include "psxinterpreter.h" R3000Acpu *psxCpu = NULL; #ifdef DRC_DISABLE @@ -39,6 +40,7 @@ int psxInit() { psxCpu = &psxInt; } else psxCpu = &psxRec; #else + Config.Cpu = CPU_INTERPRETER; psxCpu = &psxInt; #endif @@ -81,19 +83,7 @@ void psxShutdown() { } void psxException(u32 code, u32 bd) { - #ifdef ICACHE_EMULATION - /* Dynarecs may use this codepath and crash as a result. - * This should only be used for the interpreter. - Gameblabla - * */ - if (Config.icache_emulation && Config.Cpu == CPU_INTERPRETER) - { - psxRegs.code = SWAPu32(*Read_ICache(psxRegs.pc)); - } - else - #endif - { - psxRegs.code = PSXMu32(psxRegs.pc); - } + psxRegs.code = fetch(psxRegs.pc); if (!Config.HLE && ((((psxRegs.code) >> 24) & 0xfe) == 0x4a)) { // "hokuto no ken" / "Crash Bandicot 2" ... diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 94d7d9555..cb72bf362 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -29,14 +29,11 @@ extern "C" { #include "psxcounters.h" #include "psxbios.h" -#ifdef ICACHE_EMULATION enum { R3000ACPU_NOTIFY_CACHE_ISOLATED = 0, R3000ACPU_NOTIFY_CACHE_UNISOLATED = 1, R3000ACPU_NOTIFY_DMA3_EXE_LOAD = 2 }; -extern uint32_t *Read_ICache(uint32_t pc); -#endif typedef struct { int (*Init)(); @@ -44,9 +41,7 @@ typedef struct { void (*Execute)(); /* executes up to a break */ void (*ExecuteBlock)(); /* executes up to a jump */ void (*Clear)(u32 Addr, u32 Size); -#ifdef ICACHE_EMULATION void (*Notify)(int note, void *data); -#endif void (*ApplyConfig)(); void (*Shutdown)(); } R3000Acpu; From cf95b4f0e912b19298e38ae60dd66a9f4a773e9b Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 22 Nov 2021 22:33:52 +0200 Subject: [PATCH 084/597] drc: reduce memory usage mostly automated conversion text data bss dec hex filename 137712 0 2753296 2891008 2c1d00 libpcsxcore/new_dynarec/new_dynarec.o 132720 0 2732816 2865536 2bb980 libpcsxcore/new_dynarec/new_dynarec.o --- libpcsxcore/new_dynarec/assem_arm.c | 48 +- libpcsxcore/new_dynarec/assem_arm64.c | 46 +- libpcsxcore/new_dynarec/new_dynarec.c | 2406 ++++++++++++------------- 3 files changed, 1204 insertions(+), 1296 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 4ccd19fe7..4ff1afd67 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -1683,10 +1683,10 @@ static void do_readstub(int n) u_int reglist=stubs[n].e; const signed char *i_regmap=i_regs->regmap; int rt; - if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) { + if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) { rt=get_reg(i_regmap,FTEMP); }else{ - rt=get_reg(i_regmap,rt1[i]); + rt=get_reg(i_regmap,dops[i].rt1); } assert(rs>=0); int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0; @@ -1697,7 +1697,7 @@ static void do_readstub(int n) temp=r; break; } } - if(rt>=0&&rt1[i]!=0) + if(rt>=0&&dops[i].rt1!=0) reglist&=~(1<=0&&rt1[i]!=0)) { + if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { switch(type) { case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break; case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break; @@ -1743,7 +1743,7 @@ static void do_readstub(int n) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2); emit_far_call(handler); - if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { + if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { mov_loadtype_adj(type,0,rt); } if(restore_jump) @@ -1767,7 +1767,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, return; handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr); if (handler == NULL) { - if(rt<0||rt1[i]==0) + if(rt<0||dops[i].rt1==0) return; if(addr!=host_addr) emit_movimm_from(addr,rs,host_addr,rs); @@ -1792,7 +1792,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, } // call a memhandler - if(rt>=0&&rt1[i]!=0) + if(rt>=0&&dops[i].rt1!=0) reglist&=~(1<=0&&rt1[i]!=0) { + if(rt>=0&&dops[i].rt1!=0) { switch(type) { case LOADB_STUB: emit_signextend8(0,rt); break; case LOADBU_STUB: emit_andimm(0,0xff,rt); break; @@ -1839,10 +1839,10 @@ static void do_writestub(int n) u_int reglist=stubs[n].e; const signed char *i_regmap=i_regs->regmap; int rt,r; - if(itype[i]==C1LS||itype[i]==C2LS) { + if(dops[i].itype==C1LS||dops[i].itype==C2LS) { rt=get_reg(i_regmap,r=FTEMP); }else{ - rt=get_reg(i_regmap,r=rs2[i]); + rt=get_reg(i_regmap,r=dops[i].rs2); } assert(rs>=0); assert(rt>=0); @@ -2187,14 +2187,14 @@ static void multdiv_assemble_arm(int i,struct regstat *i_regs) // case 0x1D: DMULTU // case 0x1E: DDIV // case 0x1F: DDIVU - if(rs1[i]&&rs2[i]) + if(dops[i].rs1&&dops[i].rs2) { - if((opcode2[i]&4)==0) // 32-bit + if((dops[i].opcode2&4)==0) // 32-bit { - if(opcode2[i]==0x18) // MULT + if(dops[i].opcode2==0x18) // MULT { - signed char m1=get_reg(i_regs->regmap,rs1[i]); - signed char m2=get_reg(i_regs->regmap,rs2[i]); + signed char m1=get_reg(i_regs->regmap,dops[i].rs1); + signed char m2=get_reg(i_regs->regmap,dops[i].rs2); signed char hi=get_reg(i_regs->regmap,HIREG); signed char lo=get_reg(i_regs->regmap,LOREG); assert(m1>=0); @@ -2203,10 +2203,10 @@ static void multdiv_assemble_arm(int i,struct regstat *i_regs) assert(lo>=0); emit_smull(m1,m2,hi,lo); } - if(opcode2[i]==0x19) // MULTU + if(dops[i].opcode2==0x19) // MULTU { - signed char m1=get_reg(i_regs->regmap,rs1[i]); - signed char m2=get_reg(i_regs->regmap,rs2[i]); + signed char m1=get_reg(i_regs->regmap,dops[i].rs1); + signed char m2=get_reg(i_regs->regmap,dops[i].rs2); signed char hi=get_reg(i_regs->regmap,HIREG); signed char lo=get_reg(i_regs->regmap,LOREG); assert(m1>=0); @@ -2215,10 +2215,10 @@ static void multdiv_assemble_arm(int i,struct regstat *i_regs) assert(lo>=0); emit_umull(m1,m2,hi,lo); } - if(opcode2[i]==0x1A) // DIV + if(dops[i].opcode2==0x1A) // DIV { - signed char d1=get_reg(i_regs->regmap,rs1[i]); - signed char d2=get_reg(i_regs->regmap,rs2[i]); + signed char d1=get_reg(i_regs->regmap,dops[i].rs1); + signed char d2=get_reg(i_regs->regmap,dops[i].rs2); assert(d1>=0); assert(d2>=0); signed char quotient=get_reg(i_regs->regmap,LOREG); @@ -2253,10 +2253,10 @@ static void multdiv_assemble_arm(int i,struct regstat *i_regs) emit_test(d1,d1); emit_negmi(remainder,remainder); } - if(opcode2[i]==0x1B) // DIVU + if(dops[i].opcode2==0x1B) // DIVU { - signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend - signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor + signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend + signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor assert(d1>=0); assert(d2>=0); signed char quotient=get_reg(i_regs->regmap,LOREG); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 14d715634..ea1b8a318 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1380,10 +1380,10 @@ static void do_readstub(int n) u_int reglist = stubs[n].e; const signed char *i_regmap = i_regs->regmap; int rt; - if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) { + if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) { rt=get_reg(i_regmap,FTEMP); }else{ - rt=get_reg(i_regmap,rt1[i]); + rt=get_reg(i_regmap,dops[i].rt1); } assert(rs>=0); int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0; @@ -1395,7 +1395,7 @@ static void do_readstub(int n) break; } } - if(rt>=0&&rt1[i]!=0) + if(rt>=0&&dops[i].rt1!=0) reglist&=~(1<=0&&rt1[i]!=0)) { + if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { switch(type) { case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break; case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break; @@ -1445,7 +1445,7 @@ static void do_readstub(int n) emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2); emit_far_call(handler); // (no cycle reload after read) - if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { + if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { loadstore_extend(type,0,rt); } if(restore_jump) @@ -1469,7 +1469,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, // return; handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr); if (handler == NULL) { - if(rt<0||rt1[i]==0) + if(rt<0||dops[i].rt1==0) return; if (addr != host_addr) { if (host_addr >= 0x100000000ull) @@ -1497,7 +1497,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, } // call a memhandler - if(rt>=0&&rt1[i]!=0) + if(rt>=0&&dops[i].rt1!=0) reglist&=~(1<=0&&rt1[i]!=0) + if(rt>=0&&dops[i].rt1!=0) loadstore_extend(type, 0, rt); restore_regs(reglist); } @@ -1534,10 +1534,10 @@ static void do_writestub(int n) u_int reglist=stubs[n].e; signed char *i_regmap=i_regs->regmap; int rt,r; - if(itype[i]==C1LS||itype[i]==C2LS) { + if(dops[i].itype==C1LS||dops[i].itype==C2LS) { rt=get_reg(i_regmap,r=FTEMP); }else{ - rt=get_reg(i_regmap,r=rs2[i]); + rt=get_reg(i_regmap,r=dops[i].rs2); } assert(rs>=0); assert(rt>=0); @@ -1885,15 +1885,15 @@ static void multdiv_assemble_arm64(int i,struct regstat *i_regs) // case 0x19: MULTU // case 0x1A: DIV // case 0x1B: DIVU - if(rs1[i]&&rs2[i]) + if(dops[i].rs1&&dops[i].rs2) { - switch(opcode2[i]) + switch(dops[i].opcode2) { case 0x18: // MULT case 0x19: // MULTU { - signed char m1=get_reg(i_regs->regmap,rs1[i]); - signed char m2=get_reg(i_regs->regmap,rs2[i]); + signed char m1=get_reg(i_regs->regmap,dops[i].rs1); + signed char m2=get_reg(i_regs->regmap,dops[i].rs2); signed char hi=get_reg(i_regs->regmap,HIREG); signed char lo=get_reg(i_regs->regmap,LOREG); assert(m1>=0); @@ -1901,7 +1901,7 @@ static void multdiv_assemble_arm64(int i,struct regstat *i_regs) assert(hi>=0); assert(lo>=0); - if(opcode2[i]==0x18) // MULT + if(dops[i].opcode2==0x18) // MULT emit_smull(m1,m2,hi); else // MULTU emit_umull(m1,m2,hi); @@ -1913,8 +1913,8 @@ static void multdiv_assemble_arm64(int i,struct regstat *i_regs) case 0x1A: // DIV case 0x1B: // DIVU { - signed char numerator=get_reg(i_regs->regmap,rs1[i]); - signed char denominator=get_reg(i_regs->regmap,rs2[i]); + signed char numerator=get_reg(i_regs->regmap,dops[i].rs1); + signed char denominator=get_reg(i_regs->regmap,dops[i].rs2); signed char quotient=get_reg(i_regs->regmap,LOREG); signed char remainder=get_reg(i_regs->regmap,HIREG); assert(numerator>=0); @@ -1922,7 +1922,7 @@ static void multdiv_assemble_arm64(int i,struct regstat *i_regs) assert(quotient>=0); assert(remainder>=0); - if (opcode2[i] == 0x1A) // DIV + if (dops[i].opcode2 == 0x1A) // DIV emit_sdiv(numerator,denominator,quotient); else // DIVU emit_udiv(numerator,denominator,quotient); @@ -1930,7 +1930,7 @@ static void multdiv_assemble_arm64(int i,struct regstat *i_regs) // div 0 quotient (remainder is already correct) host_tempreg_acquire(); - if (opcode2[i] == 0x1A) // DIV + if (dops[i].opcode2 == 0x1A) // DIV emit_sub_asrimm(0,numerator,31,HOST_TEMPREG); else emit_movimm(~0,HOST_TEMPREG); @@ -1947,15 +1947,15 @@ static void multdiv_assemble_arm64(int i,struct regstat *i_regs) { signed char hr=get_reg(i_regs->regmap,HIREG); signed char lr=get_reg(i_regs->regmap,LOREG); - if ((opcode2[i]==0x1A || opcode2[i]==0x1B) && rs2[i]==0) // div 0 + if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0 { - if (rs1[i]) { - signed char numerator = get_reg(i_regs->regmap, rs1[i]); + if (dops[i].rs1) { + signed char numerator = get_reg(i_regs->regmap, dops[i].rs1); assert(numerator >= 0); if (hr >= 0) emit_mov(numerator,hr); if (lr >= 0) { - if (opcode2[i] == 0x1A) // DIV + if (dops[i].opcode2 == 0x1A) // DIV emit_sub_asrimm(0,numerator,31,lr); else emit_movimm(~0,lr); diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index d6a760347..72f18bff1 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -161,6 +161,22 @@ struct link_entry u_int ext; }; +static struct decoded_insn +{ + u_char itype; + u_char opcode; + u_char opcode2; + u_char rs1; + u_char rs2; + u_char rt1; + u_char rt2; + u_char lt1; + u_char bt:1; + u_char likely:1; + u_char ooo:1; + u_char is_ds:1; +} dops[MAXBLOCK]; + // used by asm: u_char *out; struct ht_entry hash_table[65536] __attribute__((aligned(16))); @@ -171,17 +187,6 @@ struct link_entry static u_int start; static u_int *source; static char insn[MAXBLOCK][10]; - static u_char itype[MAXBLOCK]; - static u_char opcode[MAXBLOCK]; - static u_char opcode2[MAXBLOCK]; - static u_char bt[MAXBLOCK]; - static u_char rs1[MAXBLOCK]; - static u_char rs2[MAXBLOCK]; - static u_char rt1[MAXBLOCK]; - static u_char rt2[MAXBLOCK]; - static u_char dep1[MAXBLOCK]; - static u_char dep2[MAXBLOCK]; - static u_char lt1[MAXBLOCK]; static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs static uint64_t gte_rt[MAXBLOCK]; static uint64_t gte_unneeded[MAXBLOCK]; @@ -192,9 +197,6 @@ struct link_entry static u_int smrv_weak_next; static int imm[MAXBLOCK]; static u_int ba[MAXBLOCK]; - static char likely[MAXBLOCK]; - static char is_ds[MAXBLOCK]; - static char ooo[MAXBLOCK]; static uint64_t unneeded_reg[MAXBLOCK]; static uint64_t branch_unneeded_reg[MAXBLOCK]; static signed char regmap_pre[MAXBLOCK][HOST_REGS]; // pre-instruction i? @@ -476,18 +478,18 @@ static int CLOCK_ADJUST(int x) // is the op an unconditional jump? static int is_ujump(int i) { - return itype[i] == UJUMP || itype[i] == RJUMP + return dops[i].itype == UJUMP || dops[i].itype == RJUMP || (source[i] >> 16) == 0x1000; // beq r0, r0, offset // b offset } static int is_jump(int i) { - return itype[i] == RJUMP || itype[i] == UJUMP || itype[i] == CJUMP || itype[i] == SJUMP; + return dops[i].itype == RJUMP || dops[i].itype == UJUMP || dops[i].itype == CJUMP || dops[i].itype == SJUMP; } static int ds_writes_rjump_rs(int i) { - return rs1[i] != 0 && (rs1[i] == rt1[i+1] || rs1[i] == rt2[i+1]); + return dops[i].rs1 != 0 && (dops[i].rs1 == dops[i+1].rt1 || dops[i].rs1 == dops[i+1].rt2); } static u_int get_page(u_int vaddr) @@ -706,22 +708,22 @@ void lsn(u_char hsn[], int i, int *preferred_reg) } for(;j>=0;j--) { - if(rs1[i+j]) hsn[rs1[i+j]]=j; - if(rs2[i+j]) hsn[rs2[i+j]]=j; - if(rt1[i+j]) hsn[rt1[i+j]]=j; - if(rt2[i+j]) hsn[rt2[i+j]]=j; - if(itype[i+j]==STORE || itype[i+j]==STORELR) { + if(dops[i+j].rs1) hsn[dops[i+j].rs1]=j; + if(dops[i+j].rs2) hsn[dops[i+j].rs2]=j; + if(dops[i+j].rt1) hsn[dops[i+j].rt1]=j; + if(dops[i+j].rt2) hsn[dops[i+j].rt2]=j; + if(dops[i+j].itype==STORE || dops[i+j].itype==STORELR) { // Stores can allocate zero - hsn[rs1[i+j]]=j; - hsn[rs2[i+j]]=j; + hsn[dops[i+j].rs1]=j; + hsn[dops[i+j].rs2]=j; } // On some architectures stores need invc_ptr #if defined(HOST_IMM8) - if(itype[i+j]==STORE || itype[i+j]==STORELR || (opcode[i+j]&0x3b)==0x39 || (opcode[i+j]&0x3b)==0x3a) { + if(dops[i+j].itype==STORE || dops[i+j].itype==STORELR || (dops[i+j].opcode&0x3b)==0x39 || (dops[i+j].opcode&0x3b)==0x3a) { hsn[INVCP]=j; } #endif - if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP)) + if(i+j>=0&&(dops[i+j].itype==UJUMP||dops[i+j].itype==CJUMP||dops[i+j].itype==SJUMP)) { hsn[CCREG]=j; b=j; @@ -736,37 +738,37 @@ void lsn(u_char hsn[], int i, int *preferred_reg) j=7-b;if(t+j>=slen) j=slen-t-1; for(;j>=0;j--) { - if(rs1[t+j]) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2; - if(rs2[t+j]) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2; - //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2; - //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2; + if(dops[t+j].rs1) if(hsn[dops[t+j].rs1]>j+b+2) hsn[dops[t+j].rs1]=j+b+2; + if(dops[t+j].rs2) if(hsn[dops[t+j].rs2]>j+b+2) hsn[dops[t+j].rs2]=j+b+2; + //if(dops[t+j].rt1) if(hsn[dops[t+j].rt1]>j+b+2) hsn[dops[t+j].rt1]=j+b+2; + //if(dops[t+j].rt2) if(hsn[dops[t+j].rt2]>j+b+2) hsn[dops[t+j].rt2]=j+b+2; } } // TODO: preferred register based on backward branch } // Delay slot should preferably not overwrite branch conditions or cycle count if (i > 0 && is_jump(i-1)) { - if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1; - if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1; + if(dops[i-1].rs1) if(hsn[dops[i-1].rs1]>1) hsn[dops[i-1].rs1]=1; + if(dops[i-1].rs2) if(hsn[dops[i-1].rs2]>1) hsn[dops[i-1].rs2]=1; hsn[CCREG]=1; // ...or hash tables hsn[RHASH]=1; hsn[RHTBL]=1; } // Coprocessor load/store needs FTEMP, even if not declared - if(itype[i]==C1LS||itype[i]==C2LS) { + if(dops[i].itype==C1LS||dops[i].itype==C2LS) { hsn[FTEMP]=0; } // Load L/R also uses FTEMP as a temporary register - if(itype[i]==LOADLR) { + if(dops[i].itype==LOADLR) { hsn[FTEMP]=0; } // Also SWL/SWR/SDL/SDR - if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) { + if(dops[i].opcode==0x2a||dops[i].opcode==0x2e||dops[i].opcode==0x2c||dops[i].opcode==0x2d) { hsn[FTEMP]=0; } // Don't remove the miniht registers - if(itype[i]==UJUMP||itype[i]==RJUMP) + if(dops[i].itype==UJUMP||dops[i].itype==RJUMP) { hsn[RHASH]=0; hsn[RHTBL]=0; @@ -797,40 +799,21 @@ int needed_again(int r, int i) j++; break; } - if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||itype[i+j]==INTCALL||((source[i+j]&0xfc00003f)==0x0d)) + if(dops[i+j].itype==SYSCALL||dops[i+j].itype==HLECALL||dops[i+j].itype==INTCALL||((source[i+j]&0xfc00003f)==0x0d)) { break; } } for(;j>=1;j--) { - if(rs1[i+j]==r) rn=j; - if(rs2[i+j]==r) rn=j; + if(dops[i+j].rs1==r) rn=j; + if(dops[i+j].rs2==r) rn=j; if((unneeded_reg[i+j]>>r)&1) rn=10; - if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP)) + if(i+j>=0&&(dops[i+j].itype==UJUMP||dops[i+j].itype==CJUMP||dops[i+j].itype==SJUMP)) { b=j; } } - /* - if(b>=0) - { - if(ba[i+b]>=start && ba[i+b]<(start+slen*4)) - { - // Follow first branch - int o=rn; - int t=(ba[i+b]-start)>>2; - j=7-b;if(t+j>=slen) j=slen-t-1; - for(;j>=0;j--) - { - if(!((unneeded_reg[t+j]>>r)&1)) { - if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2; - if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2; - } - else rn=o; - } - } - }*/ if(rn<10) return 1; (void)b; return 0; @@ -856,14 +839,14 @@ int loop_reg(int i, int r, int hr) } k=0; if(i>0){ - if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP) + if(dops[i-1].itype==UJUMP||dops[i-1].itype==CJUMP||dops[i-1].itype==SJUMP) k--; } for(;k>r)&1) return hr; - if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP)) + if(i+k>=0&&(dops[i+k].itype==UJUMP||dops[i+k].itype==CJUMP||dops[i+k].itype==SJUMP)) { if(ba[i+k]>=start && ba[i+k]<(start+i*4)) { @@ -886,8 +869,8 @@ void alloc_all(struct regstat *cur,int i) for(hr=0;hrregmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&& - ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i])) + if(((cur->regmap[hr]&63)!=dops[i].rs1)&&((cur->regmap[hr]&63)!=dops[i].rs2)&& + ((cur->regmap[hr]&63)!=dops[i].rt1)&&((cur->regmap[hr]&63)!=dops[i].rt2)) { cur->regmap[hr]=-1; cur->dirty&=~(1<0) { for(hr=0;hrregmap[hr]==-1) { - if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) { + if(regs[i-1].regmap[hr]!=dops[i-1].rs1&®s[i-1].regmap[hr]!=dops[i-1].rs2&®s[i-1].regmap[hr]!=dops[i-1].rt1&®s[i-1].regmap[hr]!=dops[i-1].rt2) { cur->regmap[hr]=reg; cur->dirty&=~(1<isconst&=~(1<0) { // Don't evict the cycle count at entry points, otherwise the entry // stub will have to write it. - if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2; + if(dops[i].bt&&hsn[CCREG]>2) hsn[CCREG]=2; + if(i>1&&hsn[CCREG]>2&&(dops[i-2].itype==RJUMP||dops[i-2].itype==UJUMP||dops[i-2].itype==CJUMP||dops[i-2].itype==SJUMP)) hsn[CCREG]=2; for(j=10;j>=3;j--) { // Alloc preferred register if available @@ -1491,7 +1474,7 @@ static void alloc_reg(struct regstat *cur,int i,signed char reg) } for(r=1;r<=MAXREG;r++) { - if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { + if(hsn[r]==j&&r!=dops[i-1].rs1&&r!=dops[i-1].rs2&&r!=dops[i-1].rt1&&r!=dops[i-1].rt2) { for(hr=0;hrregmap[hr]==r) { @@ -1578,13 +1561,13 @@ static void alloc_reg_temp(struct regstat *cur,int i,signed char reg) if(i>0) { // Don't evict the cycle count at entry points, otherwise the entry // stub will have to write it. - if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2; + if(dops[i].bt&&hsn[CCREG]>2) hsn[CCREG]=2; + if(i>1&&hsn[CCREG]>2&&(dops[i-2].itype==RJUMP||dops[i-2].itype==UJUMP||dops[i-2].itype==CJUMP||dops[i-2].itype==SJUMP)) hsn[CCREG]=2; for(j=10;j>=3;j--) { for(r=1;r<=MAXREG;r++) { - if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { + if(hsn[r]==j&&r!=dops[i-1].rs1&&r!=dops[i-1].rs2&&r!=dops[i-1].rt1&&r!=dops[i-1].rt2) { for(hr=0;hr2) { if(cur->regmap[hr]==r) { @@ -1620,58 +1603,58 @@ static void alloc_reg_temp(struct regstat *cur,int i,signed char reg) static void mov_alloc(struct regstat *current,int i) { - if (rs1[i] == HIREG || rs1[i] == LOREG) { + if (dops[i].rs1 == HIREG || dops[i].rs1 == LOREG) { // logically this is needed but just won't work, no idea why //alloc_cc(current,i); // for stalls //dirty_reg(current,CCREG); } // Note: Don't need to actually alloc the source registers - //alloc_reg(current,i,rs1[i]); - alloc_reg(current,i,rt1[i]); + //alloc_reg(current,i,dops[i].rs1); + alloc_reg(current,i,dops[i].rt1); - clear_const(current,rs1[i]); - clear_const(current,rt1[i]); - dirty_reg(current,rt1[i]); + clear_const(current,dops[i].rs1); + clear_const(current,dops[i].rt1); + dirty_reg(current,dops[i].rt1); } static void shiftimm_alloc(struct regstat *current,int i) { - if(opcode2[i]<=0x3) // SLL/SRL/SRA + if(dops[i].opcode2<=0x3) // SLL/SRL/SRA { - if(rt1[i]) { - if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]); - else lt1[i]=rs1[i]; - alloc_reg(current,i,rt1[i]); - dirty_reg(current,rt1[i]); - if(is_const(current,rs1[i])) { - int v=get_const(current,rs1[i]); - if(opcode2[i]==0x00) set_const(current,rt1[i],v<>imm[i]); - if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]); + if(dops[i].rt1) { + if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); + else dops[i].lt1=dops[i].rs1; + alloc_reg(current,i,dops[i].rt1); + dirty_reg(current,dops[i].rt1); + if(is_const(current,dops[i].rs1)) { + int v=get_const(current,dops[i].rs1); + if(dops[i].opcode2==0x00) set_const(current,dops[i].rt1,v<>imm[i]); + if(dops[i].opcode2==0x03) set_const(current,dops[i].rt1,v>>imm[i]); } - else clear_const(current,rt1[i]); + else clear_const(current,dops[i].rt1); } } else { - clear_const(current,rs1[i]); - clear_const(current,rt1[i]); + clear_const(current,dops[i].rs1); + clear_const(current,dops[i].rt1); } - if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA + if(dops[i].opcode2>=0x38&&dops[i].opcode2<=0x3b) // DSLL/DSRL/DSRA { assert(0); } - if(opcode2[i]==0x3c) // DSLL32 + if(dops[i].opcode2==0x3c) // DSLL32 { assert(0); } - if(opcode2[i]==0x3e) // DSRL32 + if(dops[i].opcode2==0x3e) // DSRL32 { assert(0); } - if(opcode2[i]==0x3f) // DSRA32 + if(dops[i].opcode2==0x3f) // DSRA32 { assert(0); } @@ -1679,125 +1662,125 @@ static void shiftimm_alloc(struct regstat *current,int i) static void shift_alloc(struct regstat *current,int i) { - if(rt1[i]) { - if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV + if(dops[i].rt1) { + if(dops[i].opcode2<=0x07) // SLLV/SRLV/SRAV { - if(rs1[i]) alloc_reg(current,i,rs1[i]); - if(rs2[i]) alloc_reg(current,i,rs2[i]); - alloc_reg(current,i,rt1[i]); - if(rt1[i]==rs2[i]) { + if(dops[i].rs1) alloc_reg(current,i,dops[i].rs1); + if(dops[i].rs2) alloc_reg(current,i,dops[i].rs2); + alloc_reg(current,i,dops[i].rt1); + if(dops[i].rt1==dops[i].rs2) { alloc_reg_temp(current,i,-1); minimum_free_regs[i]=1; } } else { // DSLLV/DSRLV/DSRAV assert(0); } - clear_const(current,rs1[i]); - clear_const(current,rs2[i]); - clear_const(current,rt1[i]); - dirty_reg(current,rt1[i]); + clear_const(current,dops[i].rs1); + clear_const(current,dops[i].rs2); + clear_const(current,dops[i].rt1); + dirty_reg(current,dops[i].rt1); } } static void alu_alloc(struct regstat *current,int i) { - if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU - if(rt1[i]) { - if(rs1[i]&&rs2[i]) { - alloc_reg(current,i,rs1[i]); - alloc_reg(current,i,rs2[i]); + if(dops[i].opcode2>=0x20&&dops[i].opcode2<=0x23) { // ADD/ADDU/SUB/SUBU + if(dops[i].rt1) { + if(dops[i].rs1&&dops[i].rs2) { + alloc_reg(current,i,dops[i].rs1); + alloc_reg(current,i,dops[i].rs2); } else { - if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]); - if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]); + if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); + if(dops[i].rs2&&needed_again(dops[i].rs2,i)) alloc_reg(current,i,dops[i].rs2); } - alloc_reg(current,i,rt1[i]); + alloc_reg(current,i,dops[i].rt1); } } - if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU - if(rt1[i]) { - alloc_reg(current,i,rs1[i]); - alloc_reg(current,i,rs2[i]); - alloc_reg(current,i,rt1[i]); + if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU + if(dops[i].rt1) { + alloc_reg(current,i,dops[i].rs1); + alloc_reg(current,i,dops[i].rs2); + alloc_reg(current,i,dops[i].rt1); } } - if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR - if(rt1[i]) { - if(rs1[i]&&rs2[i]) { - alloc_reg(current,i,rs1[i]); - alloc_reg(current,i,rs2[i]); + if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR + if(dops[i].rt1) { + if(dops[i].rs1&&dops[i].rs2) { + alloc_reg(current,i,dops[i].rs1); + alloc_reg(current,i,dops[i].rs2); } else { - if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]); - if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]); + if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); + if(dops[i].rs2&&needed_again(dops[i].rs2,i)) alloc_reg(current,i,dops[i].rs2); } - alloc_reg(current,i,rt1[i]); + alloc_reg(current,i,dops[i].rt1); } } - if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU + if(dops[i].opcode2>=0x2c&&dops[i].opcode2<=0x2f) { // DADD/DADDU/DSUB/DSUBU assert(0); } - clear_const(current,rs1[i]); - clear_const(current,rs2[i]); - clear_const(current,rt1[i]); - dirty_reg(current,rt1[i]); + clear_const(current,dops[i].rs1); + clear_const(current,dops[i].rs2); + clear_const(current,dops[i].rt1); + dirty_reg(current,dops[i].rt1); } static void imm16_alloc(struct regstat *current,int i) { - if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]); - else lt1[i]=rs1[i]; - if(rt1[i]) alloc_reg(current,i,rt1[i]); - if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU + if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); + else dops[i].lt1=dops[i].rs1; + if(dops[i].rt1) alloc_reg(current,i,dops[i].rt1); + if(dops[i].opcode==0x18||dops[i].opcode==0x19) { // DADDI/DADDIU assert(0); } - else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU - clear_const(current,rs1[i]); - clear_const(current,rt1[i]); + else if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU + clear_const(current,dops[i].rs1); + clear_const(current,dops[i].rt1); } - else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI - if(is_const(current,rs1[i])) { - int v=get_const(current,rs1[i]); - if(opcode[i]==0x0c) set_const(current,rt1[i],v&imm[i]); - if(opcode[i]==0x0d) set_const(current,rt1[i],v|imm[i]); - if(opcode[i]==0x0e) set_const(current,rt1[i],v^imm[i]); + else if(dops[i].opcode>=0x0c&&dops[i].opcode<=0x0e) { // ANDI/ORI/XORI + if(is_const(current,dops[i].rs1)) { + int v=get_const(current,dops[i].rs1); + if(dops[i].opcode==0x0c) set_const(current,dops[i].rt1,v&imm[i]); + if(dops[i].opcode==0x0d) set_const(current,dops[i].rt1,v|imm[i]); + if(dops[i].opcode==0x0e) set_const(current,dops[i].rt1,v^imm[i]); } - else clear_const(current,rt1[i]); + else clear_const(current,dops[i].rt1); } - else if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU - if(is_const(current,rs1[i])) { - int v=get_const(current,rs1[i]); - set_const(current,rt1[i],v+imm[i]); + else if(dops[i].opcode==0x08||dops[i].opcode==0x09) { // ADDI/ADDIU + if(is_const(current,dops[i].rs1)) { + int v=get_const(current,dops[i].rs1); + set_const(current,dops[i].rt1,v+imm[i]); } - else clear_const(current,rt1[i]); + else clear_const(current,dops[i].rt1); } else { - set_const(current,rt1[i],imm[i]<<16); // LUI + set_const(current,dops[i].rt1,imm[i]<<16); // LUI } - dirty_reg(current,rt1[i]); + dirty_reg(current,dops[i].rt1); } static void load_alloc(struct regstat *current,int i) { - clear_const(current,rt1[i]); - //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt? - if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register - if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]); - if(rt1[i]&&!((current->u>>rt1[i])&1)) { - alloc_reg(current,i,rt1[i]); - assert(get_reg(current->regmap,rt1[i])>=0); - if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD + clear_const(current,dops[i].rt1); + //if(dops[i].rs1!=dops[i].rt1&&needed_again(dops[i].rs1,i)) clear_const(current,dops[i].rs1); // Does this help or hurt? + if(!dops[i].rs1) current->u&=~1LL; // Allow allocating r0 if it's the source register + if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); + if(dops[i].rt1&&!((current->u>>dops[i].rt1)&1)) { + alloc_reg(current,i,dops[i].rt1); + assert(get_reg(current->regmap,dops[i].rt1)>=0); + if(dops[i].opcode==0x27||dops[i].opcode==0x37) // LWU/LD { assert(0); } - else if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR + else if(dops[i].opcode==0x1A||dops[i].opcode==0x1B) // LDL/LDR { assert(0); } - dirty_reg(current,rt1[i]); + dirty_reg(current,dops[i].rt1); // LWL/LWR need a temporary register for the old value - if(opcode[i]==0x22||opcode[i]==0x26) + if(dops[i].opcode==0x22||dops[i].opcode==0x26) { alloc_reg(current,i,FTEMP); alloc_reg_temp(current,i,-1); @@ -1808,13 +1791,13 @@ static void load_alloc(struct regstat *current,int i) { // Load to r0 or unneeded register (dummy load) // but we still need a register to calculate the address - if(opcode[i]==0x22||opcode[i]==0x26) + if(dops[i].opcode==0x22||dops[i].opcode==0x26) { alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary } alloc_reg_temp(current,i,-1); minimum_free_regs[i]=1; - if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR + if(dops[i].opcode==0x1A||dops[i].opcode==0x1B) // LDL/LDR { assert(0); } @@ -1823,18 +1806,18 @@ static void load_alloc(struct regstat *current,int i) void store_alloc(struct regstat *current,int i) { - clear_const(current,rs2[i]); - if(!(rs2[i])) current->u&=~1LL; // Allow allocating r0 if necessary - if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]); - alloc_reg(current,i,rs2[i]); - if(opcode[i]==0x2c||opcode[i]==0x2d||opcode[i]==0x3f) { // 64-bit SDL/SDR/SD + clear_const(current,dops[i].rs2); + if(!(dops[i].rs2)) current->u&=~1LL; // Allow allocating r0 if necessary + if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); + alloc_reg(current,i,dops[i].rs2); + if(dops[i].opcode==0x2c||dops[i].opcode==0x2d||dops[i].opcode==0x3f) { // 64-bit SDL/SDR/SD assert(0); } #if defined(HOST_IMM8) // On CPUs without 32-bit immediates we need a pointer to invalid_code else alloc_reg(current,i,INVCP); #endif - if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) { // SWL/SWL/SDL/SDR + if(dops[i].opcode==0x2a||dops[i].opcode==0x2e||dops[i].opcode==0x2c||dops[i].opcode==0x2d) { // SWL/SWL/SDL/SDR alloc_reg(current,i,FTEMP); } // We need a temporary register for address generation @@ -1844,17 +1827,17 @@ void store_alloc(struct regstat *current,int i) void c1ls_alloc(struct regstat *current,int i) { - //clear_const(current,rs1[i]); // FIXME - clear_const(current,rt1[i]); - if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]); + //clear_const(current,dops[i].rs1); // FIXME + clear_const(current,dops[i].rt1); + if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); alloc_reg(current,i,CSREG); // Status alloc_reg(current,i,FTEMP); - if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1 + if(dops[i].opcode==0x35||dops[i].opcode==0x3d) { // 64-bit LDC1/SDC1 assert(0); } #if defined(HOST_IMM8) // On CPUs without 32-bit immediates we need a pointer to invalid_code - else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1 + else if((dops[i].opcode&0x3b)==0x39) // SWC1/SDC1 alloc_reg(current,i,INVCP); #endif // We need a temporary register for address generation @@ -1863,12 +1846,12 @@ void c1ls_alloc(struct regstat *current,int i) void c2ls_alloc(struct regstat *current,int i) { - clear_const(current,rt1[i]); - if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]); + clear_const(current,dops[i].rt1); + if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); alloc_reg(current,i,FTEMP); #if defined(HOST_IMM8) // On CPUs without 32-bit immediates we need a pointer to invalid_code - if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2 + if((dops[i].opcode&0x3b)==0x3a) // SWC2/SDC2 alloc_reg(current,i,INVCP); #endif // We need a temporary register for address generation @@ -1887,19 +1870,19 @@ void multdiv_alloc(struct regstat *current,int i) // case 0x1D: DMULTU // case 0x1E: DDIV // case 0x1F: DDIVU - clear_const(current,rs1[i]); - clear_const(current,rs2[i]); + clear_const(current,dops[i].rs1); + clear_const(current,dops[i].rs2); alloc_cc(current,i); // for stalls - if(rs1[i]&&rs2[i]) + if(dops[i].rs1&&dops[i].rs2) { - if((opcode2[i]&4)==0) // 32-bit + if((dops[i].opcode2&4)==0) // 32-bit { current->u&=~(1LL<u&=~(1LL< 3) // MTC2/CTC2 + else if (dops[i].opcode2 > 3) // MTC2/CTC2 { - if(rs1[i]){ - clear_const(current,rs1[i]); - alloc_reg(current,i,rs1[i]); + if(dops[i].rs1){ + clear_const(current,dops[i].rs1); + alloc_reg(current,i,dops[i].rs1); } else { current->u&=~1LL; @@ -1999,7 +1982,7 @@ void syscall_alloc(struct regstat *current,int i) void delayslot_alloc(struct regstat *current,int i) { - switch(itype[i]) { + switch(dops[i].itype) { case UJUMP: case CJUMP: case SJUMP: @@ -2067,28 +2050,28 @@ static void pagespan_alloc(struct regstat *current,int i) alloc_all(current,i); alloc_cc(current,i); dirty_reg(current,CCREG); - if(opcode[i]==3) // JAL + if(dops[i].opcode==3) // JAL { alloc_reg(current,i,31); dirty_reg(current,31); } - if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR + if(dops[i].opcode==0&&(dops[i].opcode2&0x3E)==8) // JR/JALR { - alloc_reg(current,i,rs1[i]); - if (rt1[i]!=0) { - alloc_reg(current,i,rt1[i]); - dirty_reg(current,rt1[i]); + alloc_reg(current,i,dops[i].rs1); + if (dops[i].rt1!=0) { + alloc_reg(current,i,dops[i].rt1); + dirty_reg(current,dops[i].rt1); } } - if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL + if((dops[i].opcode&0x2E)==4) // BEQ/BNE/BEQL/BNEL { - if(rs1[i]) alloc_reg(current,i,rs1[i]); - if(rs2[i]) alloc_reg(current,i,rs2[i]); + if(dops[i].rs1) alloc_reg(current,i,dops[i].rs1); + if(dops[i].rs2) alloc_reg(current,i,dops[i].rs2); } else - if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL + if((dops[i].opcode&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL { - if(rs1[i]) alloc_reg(current,i,rs1[i]); + if(dops[i].rs1) alloc_reg(current,i,dops[i].rs1); } //else ... } @@ -2172,69 +2155,69 @@ static void pass_args(int a0, int a1) static void alu_assemble(int i,struct regstat *i_regs) { - if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU - if(rt1[i]) { + if(dops[i].opcode2>=0x20&&dops[i].opcode2<=0x23) { // ADD/ADDU/SUB/SUBU + if(dops[i].rt1) { signed char s1,s2,t; - t=get_reg(i_regs->regmap,rt1[i]); + t=get_reg(i_regs->regmap,dops[i].rt1); if(t>=0) { - s1=get_reg(i_regs->regmap,rs1[i]); - s2=get_reg(i_regs->regmap,rs2[i]); - if(rs1[i]&&rs2[i]) { + s1=get_reg(i_regs->regmap,dops[i].rs1); + s2=get_reg(i_regs->regmap,dops[i].rs2); + if(dops[i].rs1&&dops[i].rs2) { assert(s1>=0); assert(s2>=0); - if(opcode2[i]&2) emit_sub(s1,s2,t); + if(dops[i].opcode2&2) emit_sub(s1,s2,t); else emit_add(s1,s2,t); } - else if(rs1[i]) { + else if(dops[i].rs1) { if(s1>=0) emit_mov(s1,t); - else emit_loadreg(rs1[i],t); + else emit_loadreg(dops[i].rs1,t); } - else if(rs2[i]) { + else if(dops[i].rs2) { if(s2>=0) { - if(opcode2[i]&2) emit_neg(s2,t); + if(dops[i].opcode2&2) emit_neg(s2,t); else emit_mov(s2,t); } else { - emit_loadreg(rs2[i],t); - if(opcode2[i]&2) emit_neg(t,t); + emit_loadreg(dops[i].rs2,t); + if(dops[i].opcode2&2) emit_neg(t,t); } } else emit_zeroreg(t); } } } - if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU + if(dops[i].opcode2>=0x2c&&dops[i].opcode2<=0x2f) { // DADD/DADDU/DSUB/DSUBU assert(0); } - if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU - if(rt1[i]) { + if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU + if(dops[i].rt1) { signed char s1l,s2l,t; { - t=get_reg(i_regs->regmap,rt1[i]); + t=get_reg(i_regs->regmap,dops[i].rt1); //assert(t>=0); if(t>=0) { - s1l=get_reg(i_regs->regmap,rs1[i]); - s2l=get_reg(i_regs->regmap,rs2[i]); - if(rs2[i]==0) // rxregmap,dops[i].rs1); + s2l=get_reg(i_regs->regmap,dops[i].rs2); + if(dops[i].rs2==0) // rx=0); emit_shrimm(s1l,31,t); } else // SLTU (unsigned can not be less than zero, 0<0) emit_zeroreg(t); } - else if(rs1[i]==0) // r0=0); - if(opcode2[i]==0x2a) // SLT + if(dops[i].opcode2==0x2a) // SLT emit_set_gz32(s2l,t); else // SLTU (set if not zero) emit_set_nz32(s2l,t); } else{ assert(s1l>=0);assert(s2l>=0); - if(opcode2[i]==0x2a) // SLT + if(dops[i].opcode2==0x2a) // SLT emit_set_if_less32(s1l,s2l,t); else // SLTU emit_set_if_carry32(s1l,s2l,t); @@ -2243,61 +2226,61 @@ static void alu_assemble(int i,struct regstat *i_regs) } } } - if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR - if(rt1[i]) { + if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR + if(dops[i].rt1) { signed char s1l,s2l,tl; - tl=get_reg(i_regs->regmap,rt1[i]); + tl=get_reg(i_regs->regmap,dops[i].rt1); { if(tl>=0) { - s1l=get_reg(i_regs->regmap,rs1[i]); - s2l=get_reg(i_regs->regmap,rs2[i]); - if(rs1[i]&&rs2[i]) { + s1l=get_reg(i_regs->regmap,dops[i].rs1); + s2l=get_reg(i_regs->regmap,dops[i].rs2); + if(dops[i].rs1&&dops[i].rs2) { assert(s1l>=0); assert(s2l>=0); - if(opcode2[i]==0x24) { // AND + if(dops[i].opcode2==0x24) { // AND emit_and(s1l,s2l,tl); } else - if(opcode2[i]==0x25) { // OR + if(dops[i].opcode2==0x25) { // OR emit_or(s1l,s2l,tl); } else - if(opcode2[i]==0x26) { // XOR + if(dops[i].opcode2==0x26) { // XOR emit_xor(s1l,s2l,tl); } else - if(opcode2[i]==0x27) { // NOR + if(dops[i].opcode2==0x27) { // NOR emit_or(s1l,s2l,tl); emit_not(tl,tl); } } else { - if(opcode2[i]==0x24) { // AND + if(dops[i].opcode2==0x24) { // AND emit_zeroreg(tl); } else - if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR - if(rs1[i]){ + if(dops[i].opcode2==0x25||dops[i].opcode2==0x26) { // OR/XOR + if(dops[i].rs1){ if(s1l>=0) emit_mov(s1l,tl); - else emit_loadreg(rs1[i],tl); // CHECK: regmap_entry? + else emit_loadreg(dops[i].rs1,tl); // CHECK: regmap_entry? } else - if(rs2[i]){ + if(dops[i].rs2){ if(s2l>=0) emit_mov(s2l,tl); - else emit_loadreg(rs2[i],tl); // CHECK: regmap_entry? + else emit_loadreg(dops[i].rs2,tl); // CHECK: regmap_entry? } else emit_zeroreg(tl); } else - if(opcode2[i]==0x27) { // NOR - if(rs1[i]){ + if(dops[i].opcode2==0x27) { // NOR + if(dops[i].rs1){ if(s1l>=0) emit_not(s1l,tl); else { - emit_loadreg(rs1[i],tl); + emit_loadreg(dops[i].rs1,tl); emit_not(tl,tl); } } else - if(rs2[i]){ + if(dops[i].rs2){ if(s2l>=0) emit_not(s2l,tl); else { - emit_loadreg(rs2[i],tl); + emit_loadreg(dops[i].rs2,tl); emit_not(tl,tl); } } @@ -2312,10 +2295,10 @@ static void alu_assemble(int i,struct regstat *i_regs) void imm16_assemble(int i,struct regstat *i_regs) { - if (opcode[i]==0x0f) { // LUI - if(rt1[i]) { + if (dops[i].opcode==0x0f) { // LUI + if(dops[i].rt1) { signed char t; - t=get_reg(i_regs->regmap,rt1[i]); + t=get_reg(i_regs->regmap,dops[i].rt1); //assert(t>=0); if(t>=0) { if(!((i_regs->isconst>>t)&1)) @@ -2323,18 +2306,18 @@ void imm16_assemble(int i,struct regstat *i_regs) } } } - if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU - if(rt1[i]) { + if(dops[i].opcode==0x08||dops[i].opcode==0x09) { // ADDI/ADDIU + if(dops[i].rt1) { signed char s,t; - t=get_reg(i_regs->regmap,rt1[i]); - s=get_reg(i_regs->regmap,rs1[i]); - if(rs1[i]) { + t=get_reg(i_regs->regmap,dops[i].rt1); + s=get_reg(i_regs->regmap,dops[i].rs1); + if(dops[i].rs1) { //assert(t>=0); //assert(s>=0); if(t>=0) { if(!((i_regs->isconst>>t)&1)) { if(s<0) { - if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t); + if(i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t); emit_addimm(t,imm[i],t); }else{ if(!((i_regs->wasconst>>s)&1)) @@ -2352,13 +2335,13 @@ void imm16_assemble(int i,struct regstat *i_regs) } } } - if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU - if(rt1[i]) { + if(dops[i].opcode==0x18||dops[i].opcode==0x19) { // DADDI/DADDIU + if(dops[i].rt1) { signed char sl,tl; - tl=get_reg(i_regs->regmap,rt1[i]); - sl=get_reg(i_regs->regmap,rs1[i]); + tl=get_reg(i_regs->regmap,dops[i].rt1); + sl=get_reg(i_regs->regmap,dops[i].rs1); if(tl>=0) { - if(rs1[i]) { + if(dops[i].rs1) { assert(sl>=0); emit_addimm(sl,imm[i],tl); } else { @@ -2367,18 +2350,18 @@ void imm16_assemble(int i,struct regstat *i_regs) } } } - else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU - if(rt1[i]) { - //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug + else if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU + if(dops[i].rt1) { + //assert(dops[i].rs1!=0); // r0 might be valid, but it's probably a bug signed char sl,t; - t=get_reg(i_regs->regmap,rt1[i]); - sl=get_reg(i_regs->regmap,rs1[i]); + t=get_reg(i_regs->regmap,dops[i].rt1); + sl=get_reg(i_regs->regmap,dops[i].rs1); //assert(t>=0); if(t>=0) { - if(rs1[i]>0) { - if(opcode[i]==0x0a) { // SLTI + if(dops[i].rs1>0) { + if(dops[i].opcode==0x0a) { // SLTI if(sl<0) { - if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t); + if(i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t); emit_slti32(t,imm[i],t); }else{ emit_slti32(sl,imm[i],t); @@ -2386,7 +2369,7 @@ void imm16_assemble(int i,struct regstat *i_regs) } else { // SLTIU if(sl<0) { - if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t); + if(i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t); emit_sltiu32(t,imm[i],t); }else{ emit_sltiu32(sl,imm[i],t); @@ -2395,7 +2378,7 @@ void imm16_assemble(int i,struct regstat *i_regs) }else{ // SLTI(U) with r0 is just stupid, // nonetheless examples can be found - if(opcode[i]==0x0a) // SLTI + if(dops[i].opcode==0x0a) // SLTI if(0=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI - if(rt1[i]) { + else if(dops[i].opcode>=0x0c&&dops[i].opcode<=0x0e) { // ANDI/ORI/XORI + if(dops[i].rt1) { signed char sl,tl; - tl=get_reg(i_regs->regmap,rt1[i]); - sl=get_reg(i_regs->regmap,rs1[i]); + tl=get_reg(i_regs->regmap,dops[i].rt1); + sl=get_reg(i_regs->regmap,dops[i].rs1); if(tl>=0 && !((i_regs->isconst>>tl)&1)) { - if(opcode[i]==0x0c) //ANDI + if(dops[i].opcode==0x0c) //ANDI { - if(rs1[i]) { + if(dops[i].rs1) { if(sl<0) { - if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl); + if(i_regs->regmap_entry[tl]!=dops[i].rs1) emit_loadreg(dops[i].rs1,tl); emit_andimm(tl,imm[i],tl); }else{ if(!((i_regs->wasconst>>sl)&1)) @@ -2431,11 +2414,11 @@ void imm16_assemble(int i,struct regstat *i_regs) } else { - if(rs1[i]) { + if(dops[i].rs1) { if(sl<0) { - if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl); + if(i_regs->regmap_entry[tl]!=dops[i].rs1) emit_loadreg(dops[i].rs1,tl); } - if(opcode[i]==0x0d) { // ORI + if(dops[i].opcode==0x0d) { // ORI if(sl<0) { emit_orimm(tl,imm[i],tl); }else{ @@ -2445,7 +2428,7 @@ void imm16_assemble(int i,struct regstat *i_regs) emit_movimm(constmap[i][sl]|imm[i],tl); } } - if(opcode[i]==0x0e) { // XORI + if(dops[i].opcode==0x0e) { // XORI if(sl<0) { emit_xorimm(tl,imm[i],tl); }else{ @@ -2467,31 +2450,31 @@ void imm16_assemble(int i,struct regstat *i_regs) void shiftimm_assemble(int i,struct regstat *i_regs) { - if(opcode2[i]<=0x3) // SLL/SRL/SRA + if(dops[i].opcode2<=0x3) // SLL/SRL/SRA { - if(rt1[i]) { + if(dops[i].rt1) { signed char s,t; - t=get_reg(i_regs->regmap,rt1[i]); - s=get_reg(i_regs->regmap,rs1[i]); + t=get_reg(i_regs->regmap,dops[i].rt1); + s=get_reg(i_regs->regmap,dops[i].rs1); //assert(t>=0); if(t>=0&&!((i_regs->isconst>>t)&1)){ - if(rs1[i]==0) + if(dops[i].rs1==0) { emit_zeroreg(t); } else { - if(s<0&&i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t); + if(s<0&&i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t); if(imm[i]) { - if(opcode2[i]==0) // SLL + if(dops[i].opcode2==0) // SLL { emit_shlimm(s<0?t:s,imm[i],t); } - if(opcode2[i]==2) // SRL + if(dops[i].opcode2==2) // SRL { emit_shrimm(s<0?t:s,imm[i],t); } - if(opcode2[i]==3) // SRA + if(dops[i].opcode2==3) // SRA { emit_sarimm(s<0?t:s,imm[i],t); } @@ -2501,22 +2484,22 @@ void shiftimm_assemble(int i,struct regstat *i_regs) } } } - //emit_storereg(rt1[i],t); //DEBUG + //emit_storereg(dops[i].rt1,t); //DEBUG } } - if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA + if(dops[i].opcode2>=0x38&&dops[i].opcode2<=0x3b) // DSLL/DSRL/DSRA { assert(0); } - if(opcode2[i]==0x3c) // DSLL32 + if(dops[i].opcode2==0x3c) // DSLL32 { assert(0); } - if(opcode2[i]==0x3e) // DSRL32 + if(dops[i].opcode2==0x3e) // DSRL32 { assert(0); } - if(opcode2[i]==0x3f) // DSRA32 + if(dops[i].opcode2==0x3f) // DSRA32 { assert(0); } @@ -2526,25 +2509,25 @@ void shiftimm_assemble(int i,struct regstat *i_regs) static void shift_assemble(int i,struct regstat *i_regs) { signed char s,t,shift; - if (rt1[i] == 0) + if (dops[i].rt1 == 0) return; - assert(opcode2[i]<=0x07); // SLLV/SRLV/SRAV - t = get_reg(i_regs->regmap, rt1[i]); - s = get_reg(i_regs->regmap, rs1[i]); - shift = get_reg(i_regs->regmap, rs2[i]); + assert(dops[i].opcode2<=0x07); // SLLV/SRLV/SRAV + t = get_reg(i_regs->regmap, dops[i].rt1); + s = get_reg(i_regs->regmap, dops[i].rs1); + shift = get_reg(i_regs->regmap, dops[i].rs2); if (t < 0) return; - if(rs1[i]==0) + if(dops[i].rs1==0) emit_zeroreg(t); - else if(rs2[i]==0) { + else if(dops[i].rs2==0) { assert(s>=0); if(s!=t) emit_mov(s,t); } else { host_tempreg_acquire(); emit_andimm(shift,31,HOST_TEMPREG); - switch(opcode2[i]) { + switch(dops[i].opcode2) { case 4: // SLLV emit_shl(s,HOST_TEMPREG,t); break; @@ -2593,7 +2576,7 @@ static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) { void *jaddr = NULL; int type=0; - int mr=rs1[i]; + int mr=dops[i].rs1; if(((smrv_strong|smrv_weak)>>mr)&1) { type=get_ptr_mem_type(smrv[mr]); //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type); @@ -2643,7 +2626,7 @@ static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) jaddr=out; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK // Hint to branch predictor that the branch is unlikely to be taken - if(rs1[i]>=28) + if(dops[i].rs1>=28) emit_jno_unlikely(0); else #endif @@ -2722,8 +2705,8 @@ static void load_assemble(int i, const struct regstat *i_regs) int memtarget=0,c=0; int fastio_reg_override=-1; u_int reglist=get_host_reglist(i_regs->regmap); - tl=get_reg(i_regs->regmap,rt1[i]); - s=get_reg(i_regs->regmap,rs1[i]); + tl=get_reg(i_regs->regmap,dops[i].rt1); + s=get_reg(i_regs->regmap,dops[i].rs1); offset=imm[i]; if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<=0) { @@ -2736,7 +2719,7 @@ static void load_assemble(int i, const struct regstat *i_regs) //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset); // FIXME: Even if the load is a NOP, we should check for pagefaults... if((tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80)) - ||rt1[i]==0) { + ||dops[i].rt1==0) { // could be FIFO, must perform the read // ||dummy read assem_debug("(forced read)\n"); @@ -2754,7 +2737,7 @@ static void load_assemble(int i, const struct regstat *i_regs) if(!c) { #ifdef R29_HACK // Strmnnrmn's speed hack - if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) + if(dops[i].rs1!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) #endif { jaddr=emit_fastpath_cmp_jump(i,addr,&fastio_reg_override); @@ -2765,8 +2748,8 @@ static void load_assemble(int i, const struct regstat *i_regs) emit_addimm(addr,ram_offset,HOST_TEMPREG); fastio_reg_override=HOST_TEMPREG; } - int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg - if (opcode[i]==0x20) { // LB + int dummy=(dops[i].rt1==0)||(tl!=get_reg(i_regs->regmap,dops[i].rt1)); // ignore loads to r0 and unneeded reg + if (dops[i].opcode==0x20) { // LB if(!c||memtarget) { if(!dummy) { { @@ -2781,9 +2764,9 @@ static void load_assemble(int i, const struct regstat *i_regs) add_stub_r(LOADB_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else - inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); + inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); } - if (opcode[i]==0x21) { // LH + if (dops[i].opcode==0x21) { // LH if(!c||memtarget) { if(!dummy) { int x=0,a=tl; @@ -2795,9 +2778,9 @@ static void load_assemble(int i, const struct regstat *i_regs) add_stub_r(LOADH_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else - inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); + inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); } - if (opcode[i]==0x23) { // LW + if (dops[i].opcode==0x23) { // LW if(!c||memtarget) { if(!dummy) { int a=addr; @@ -2808,9 +2791,9 @@ static void load_assemble(int i, const struct regstat *i_regs) add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else - inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); + inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); } - if (opcode[i]==0x24) { // LBU + if (dops[i].opcode==0x24) { // LBU if(!c||memtarget) { if(!dummy) { int x=0,a=tl; @@ -2823,9 +2806,9 @@ static void load_assemble(int i, const struct regstat *i_regs) add_stub_r(LOADBU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else - inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); + inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); } - if (opcode[i]==0x25) { // LHU + if (dops[i].opcode==0x25) { // LHU if(!c||memtarget) { if(!dummy) { int x=0,a=tl; @@ -2837,12 +2820,12 @@ static void load_assemble(int i, const struct regstat *i_regs) add_stub_r(LOADHU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else - inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist); + inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); } - if (opcode[i]==0x27) { // LWU + if (dops[i].opcode==0x27) { // LWU assert(0); } - if (opcode[i]==0x37) { // LD + if (dops[i].opcode==0x37) { // LD assert(0); } } @@ -2859,8 +2842,8 @@ static void loadlr_assemble(int i, const struct regstat *i_regs) int memtarget=0,c=0; int fastio_reg_override=-1; u_int reglist=get_host_reglist(i_regs->regmap); - tl=get_reg(i_regs->regmap,rt1[i]); - s=get_reg(i_regs->regmap,rs1[i]); + tl=get_reg(i_regs->regmap,dops[i].rt1); + s=get_reg(i_regs->regmap,dops[i].rs1); temp=get_reg(i_regs->regmap,-1); temp2=get_reg(i_regs->regmap,FTEMP); addr=get_reg(i_regs->regmap,AGEN1+(i&1)); @@ -2877,7 +2860,7 @@ static void loadlr_assemble(int i, const struct regstat *i_regs) } if(!c) { emit_shlimm(addr,3,temp); - if (opcode[i]==0x22||opcode[i]==0x26) { + if (dops[i].opcode==0x22||dops[i].opcode==0x26) { emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR }else{ emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR @@ -2890,13 +2873,13 @@ static void loadlr_assemble(int i, const struct regstat *i_regs) emit_addimm(temp2,ram_offset,HOST_TEMPREG); fastio_reg_override=HOST_TEMPREG; } - if (opcode[i]==0x22||opcode[i]==0x26) { + if (dops[i].opcode==0x22||dops[i].opcode==0x26) { emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR }else{ emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR } } - if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR + if (dops[i].opcode==0x22||dops[i].opcode==0x26) { // LWL/LWR if(!c||memtarget) { int a=temp2; if(fastio_reg_override>=0) a=fastio_reg_override; @@ -2906,14 +2889,14 @@ static void loadlr_assemble(int i, const struct regstat *i_regs) } else inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist); - if(rt1[i]) { + if(dops[i].rt1) { assert(tl>=0); emit_andimm(temp,24,temp); - if (opcode[i]==0x22) // LWL + if (dops[i].opcode==0x22) // LWL emit_xorimm(temp,24,temp); host_tempreg_acquire(); emit_movimm(-1,HOST_TEMPREG); - if (opcode[i]==0x26) { + if (dops[i].opcode==0x26) { emit_shr(temp2,temp,temp2); emit_bic_lsr(tl,HOST_TEMPREG,temp,tl); }else{ @@ -2923,9 +2906,9 @@ static void loadlr_assemble(int i, const struct regstat *i_regs) host_tempreg_release(); emit_or(temp2,tl,tl); } - //emit_storereg(rt1[i],tl); // DEBUG + //emit_storereg(dops[i].rt1,tl); // DEBUG } - if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR + if (dops[i].opcode==0x1A||dops[i].opcode==0x1B) { // LDL/LDR assert(0); } } @@ -2942,8 +2925,8 @@ void store_assemble(int i, const struct regstat *i_regs) int agr=AGEN1+(i&1); int fastio_reg_override=-1; u_int reglist=get_host_reglist(i_regs->regmap); - tl=get_reg(i_regs->regmap,rs2[i]); - s=get_reg(i_regs->regmap,rs1[i]); + tl=get_reg(i_regs->regmap,dops[i].rs2); + s=get_reg(i_regs->regmap,dops[i].rs1); temp=get_reg(i_regs->regmap,agr); if(temp<0) temp=get_reg(i_regs->regmap,-1); offset=imm[i]; @@ -2967,7 +2950,7 @@ void store_assemble(int i, const struct regstat *i_regs) fastio_reg_override=HOST_TEMPREG; } - if (opcode[i]==0x28) { // SB + if (dops[i].opcode==0x28) { // SB if(!c||memtarget) { int x=0,a=temp; if(!c) a=addr; @@ -2976,7 +2959,7 @@ void store_assemble(int i, const struct regstat *i_regs) } type=STOREB_STUB; } - if (opcode[i]==0x29) { // SH + if (dops[i].opcode==0x29) { // SH if(!c||memtarget) { int x=0,a=temp; if(!c) a=addr; @@ -2985,7 +2968,7 @@ void store_assemble(int i, const struct regstat *i_regs) } type=STOREH_STUB; } - if (opcode[i]==0x2B) { // SW + if (dops[i].opcode==0x2B) { // SW if(!c||memtarget) { int a=addr; if(fastio_reg_override>=0) a=fastio_reg_override; @@ -2993,7 +2976,7 @@ void store_assemble(int i, const struct regstat *i_regs) } type=STOREW_STUB; } - if (opcode[i]==0x3F) { // SD + if (dops[i].opcode==0x3F) { // SD assert(0); type=STORED_STUB; } @@ -3005,7 +2988,7 @@ void store_assemble(int i, const struct regstat *i_regs) add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj[i],reglist); jaddr=0; } - if(!(i_regs->waswritten&(1<waswritten&(1<regmap,rs2[i],ccadj[i],reglist); + inline_writestub(type,i,addr_val,i_regs->regmap,dops[i].rs2,ccadj[i],reglist); } // basic current block modification detection.. // not looking back as that should be in mips cache already @@ -3063,8 +3046,8 @@ static void storelr_assemble(int i, const struct regstat *i_regs) int memtarget=0,c=0; int agr=AGEN1+(i&1); u_int reglist=get_host_reglist(i_regs->regmap); - tl=get_reg(i_regs->regmap,rs2[i]); - s=get_reg(i_regs->regmap,rs1[i]); + tl=get_reg(i_regs->regmap,dops[i].rs2); + s=get_reg(i_regs->regmap,dops[i].rs1); temp=get_reg(i_regs->regmap,agr); if(temp<0) temp=get_reg(i_regs->regmap,-1); offset=imm[i]; @@ -3084,7 +3067,7 @@ static void storelr_assemble(int i, const struct regstat *i_regs) } else { - if(!memtarget||!rs1[i]) { + if(!memtarget||!dops[i].rs1) { jaddr=out; emit_jmp(0); } @@ -3092,7 +3075,7 @@ static void storelr_assemble(int i, const struct regstat *i_regs) if(ram_offset) emit_addimm_no_flags(ram_offset,temp); - if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR + if (dops[i].opcode==0x2C||dops[i].opcode==0x2D) { // SDL/SDR assert(0); } @@ -3104,10 +3087,10 @@ static void storelr_assemble(int i, const struct regstat *i_regs) case1=out; emit_jne(0); // 0 - if (opcode[i]==0x2A) { // SWL + if (dops[i].opcode==0x2A) { // SWL emit_writeword_indexed(tl,0,temp); } - else if (opcode[i]==0x2E) { // SWR + else if (dops[i].opcode==0x2E) { // SWR emit_writebyte_indexed(tl,3,temp); } else @@ -3116,15 +3099,15 @@ static void storelr_assemble(int i, const struct regstat *i_regs) emit_jmp(0); // 1 set_jump_target(case1, out); - if (opcode[i]==0x2A) { // SWL + if (dops[i].opcode==0x2A) { // SWL // Write 3 msb into three least significant bytes - if(rs2[i]) emit_rorimm(tl,8,tl); + if(dops[i].rs2) emit_rorimm(tl,8,tl); emit_writehword_indexed(tl,-1,temp); - if(rs2[i]) emit_rorimm(tl,16,tl); + if(dops[i].rs2) emit_rorimm(tl,16,tl); emit_writebyte_indexed(tl,1,temp); - if(rs2[i]) emit_rorimm(tl,8,tl); + if(dops[i].rs2) emit_rorimm(tl,8,tl); } - else if (opcode[i]==0x2E) { // SWR + else if (dops[i].opcode==0x2E) { // SWR // Write two lsb into two most significant bytes emit_writehword_indexed(tl,1,temp); } @@ -3135,30 +3118,30 @@ static void storelr_assemble(int i, const struct regstat *i_regs) emit_testimm(temp,1); case3=out; emit_jne(0); - if (opcode[i]==0x2A) { // SWL + if (dops[i].opcode==0x2A) { // SWL // Write two msb into two least significant bytes - if(rs2[i]) emit_rorimm(tl,16,tl); + if(dops[i].rs2) emit_rorimm(tl,16,tl); emit_writehword_indexed(tl,-2,temp); - if(rs2[i]) emit_rorimm(tl,16,tl); + if(dops[i].rs2) emit_rorimm(tl,16,tl); } - else if (opcode[i]==0x2E) { // SWR + else if (dops[i].opcode==0x2E) { // SWR // Write 3 lsb into three most significant bytes emit_writebyte_indexed(tl,-1,temp); - if(rs2[i]) emit_rorimm(tl,8,tl); + if(dops[i].rs2) emit_rorimm(tl,8,tl); emit_writehword_indexed(tl,0,temp); - if(rs2[i]) emit_rorimm(tl,24,tl); + if(dops[i].rs2) emit_rorimm(tl,24,tl); } done2=out; emit_jmp(0); // 3 set_jump_target(case3, out); - if (opcode[i]==0x2A) { // SWL + if (dops[i].opcode==0x2A) { // SWL // Write msb into least significant byte - if(rs2[i]) emit_rorimm(tl,24,tl); + if(dops[i].rs2) emit_rorimm(tl,24,tl); emit_writebyte_indexed(tl,-3,temp); - if(rs2[i]) emit_rorimm(tl,8,tl); + if(dops[i].rs2) emit_rorimm(tl,8,tl); } - else if (opcode[i]==0x2E) { // SWR + else if (dops[i].opcode==0x2E) { // SWR // Write entire word emit_writeword_indexed(tl,-3,temp); } @@ -3167,7 +3150,7 @@ static void storelr_assemble(int i, const struct regstat *i_regs) set_jump_target(done2, out); if(!c||!memtarget) add_stub_r(STORELR_STUB,jaddr,out,i,temp,i_regs,ccadj[i],reglist); - if(!(i_regs->waswritten&(1<waswritten&(1<regmap,INVCP); @@ -3188,21 +3171,21 @@ static void storelr_assemble(int i, const struct regstat *i_regs) static void cop0_assemble(int i,struct regstat *i_regs) { - if(opcode2[i]==0) // MFC0 + if(dops[i].opcode2==0) // MFC0 { - signed char t=get_reg(i_regs->regmap,rt1[i]); + signed char t=get_reg(i_regs->regmap,dops[i].rt1); u_int copr=(source[i]>>11)&0x1f; //assert(t>=0); // Why does this happen? OOT is weird - if(t>=0&&rt1[i]!=0) { + if(t>=0&&dops[i].rt1!=0) { emit_readword(®_cop0[copr],t); } } - else if(opcode2[i]==4) // MTC0 + else if(dops[i].opcode2==4) // MTC0 { - signed char s=get_reg(i_regs->regmap,rs1[i]); + signed char s=get_reg(i_regs->regmap,dops[i].rs1); char copr=(source[i]>>11)&0x1f; assert(s>=0); - wb_register(rs1[i],i_regs->regmap,i_regs->dirty); + wb_register(dops[i].rs1,i_regs->regmap,i_regs->dirty); if(copr==9||copr==11||copr==12||copr==13) { emit_readword(&last_count,HOST_TEMPREG); emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc @@ -3222,10 +3205,10 @@ static void cop0_assemble(int i,struct regstat *i_regs) emit_writeword(HOST_CCREG,&last_count); emit_movimm(0,HOST_CCREG); emit_storereg(CCREG,HOST_CCREG); - emit_loadreg(rs1[i],1); + emit_loadreg(dops[i].rs1,1); emit_movimm(copr,0); emit_far_call(pcsx_mtc0_ds); - emit_loadreg(rs1[i],s); + emit_loadreg(dops[i].rs1,s); return; } emit_movimm(start+i*4+4,HOST_TEMPREG); @@ -3234,7 +3217,7 @@ static void cop0_assemble(int i,struct regstat *i_regs) emit_writeword(HOST_TEMPREG,&pending_exception); } if(s==HOST_CCREG) - emit_loadreg(rs1[i],1); + emit_loadreg(dops[i].rs1,1); else if(s!=1) emit_mov(s,1); emit_movimm(copr,0); @@ -3259,11 +3242,11 @@ static void cop0_assemble(int i,struct regstat *i_regs) emit_jmpreg(0); set_jump_target(jaddr, out); } - emit_loadreg(rs1[i],s); + emit_loadreg(dops[i].rs1,s); } else { - assert(opcode2[i]==0x10); + assert(dops[i].opcode2==0x10); //if((source[i]&0x3f)==0x10) // RFE { emit_readword(&Status,0); @@ -3320,15 +3303,15 @@ static void do_cop1stub(int n) static int cop2_is_stalling_op(int i, int *cycles) { - if (opcode[i] == 0x3a) { // SWC2 + if (dops[i].opcode == 0x3a) { // SWC2 *cycles = 0; return 1; } - if (itype[i] == COP2 && (opcode2[i] == 0 || opcode2[i] == 2)) { // MFC2/CFC2 + if (dops[i].itype == COP2 && (dops[i].opcode2 == 0 || dops[i].opcode2 == 2)) { // MFC2/CFC2 *cycles = 0; return 1; } - if (itype[i] == C2OP) { + if (dops[i].itype == C2OP) { *cycles = gte_cycletab[source[i] & 0x3f]; return 1; } @@ -3368,10 +3351,10 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u //printf("no cc %08x\n", start + i*4); return; } - if (!bt[i]) { + if (!dops[i].bt) { for (j = i - 1; j >= 0; j--) { - //if (is_ds[j]) break; - if (cop2_is_stalling_op(j, &other_gte_op_cycles) || bt[j]) + //if (dops[j].is_ds) break; + if (cop2_is_stalling_op(j, &other_gte_op_cycles) || dops[j].bt) break; } j = max(j, 0); @@ -3443,14 +3426,14 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u static int is_mflohi(int i) { - return (itype[i] == MOV && (rs1[i] == HIREG || rs1[i] == LOREG)); + return (dops[i].itype == MOV && (dops[i].rs1 == HIREG || dops[i].rs1 == LOREG)); } static int check_multdiv(int i, int *cycles) { - if (itype[i] != MULTDIV) + if (dops[i].itype != MULTDIV) return 0; - if (opcode2[i] == 0x18 || opcode2[i] == 0x19) // MULT(U) + if (dops[i].opcode2 == 0x18 || dops[i].opcode2 == 0x19) // MULT(U) *cycles = 11; // approx from 7 11 14 else *cycles = 37; @@ -3467,7 +3450,7 @@ static void multdiv_prepare_stall(int i, const struct regstat *i_regs) return; } for (j = i + 1; j < slen; j++) { - if (bt[j]) + if (dops[j].bt) break; if ((found = is_mflohi(j))) break; @@ -3504,10 +3487,10 @@ static void multdiv_do_stall(int i, const struct regstat *i_regs) //printf("no cc/rtmp %08x\n", start + i*4); return; } - if (!bt[i]) { + if (!dops[i].bt) { for (j = i - 1; j >= 0; j--) { - if (is_ds[j]) break; - if (check_multdiv(j, &known_cycles) || bt[j]) + if (dops[j].is_ds) break; + if (check_multdiv(j, &known_cycles) || dops[j].bt) break; if (is_mflohi(j)) // already handled by this op @@ -3628,17 +3611,17 @@ static void c2ls_assemble(int i, const struct regstat *i_regs) int fastio_reg_override=-1; u_int reglist=get_host_reglist(i_regs->regmap); u_int copr=(source[i]>>16)&0x1f; - s=get_reg(i_regs->regmap,rs1[i]); + s=get_reg(i_regs->regmap,dops[i].rs1); tl=get_reg(i_regs->regmap,FTEMP); offset=imm[i]; - assert(rs1[i]>0); + assert(dops[i].rs1>0); assert(tl>=0); if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<regmap,agr); if(ar<0) ar=get_reg(i_regs->regmap,-1); reglist|=1<=0) a=fastio_reg_override; emit_readword_indexed(0,a,tl); } - if (opcode[i]==0x3a) { // SWC2 + if (dops[i].opcode==0x3a) { // SWC2 #ifdef DESTRUCTIVE_SHIFT if(!offset&&!c&&s>=0) emit_mov(s,ar); #endif @@ -3690,8 +3673,8 @@ static void c2ls_assemble(int i, const struct regstat *i_regs) host_tempreg_release(); if(jaddr2) add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj[i],reglist); - if(opcode[i]==0x3a) // SWC2 - if(!(i_regs->waswritten&(1<waswritten&(1<regmap,INVCP); assert(ir>=0); @@ -3707,7 +3690,7 @@ static void c2ls_assemble(int i, const struct regstat *i_regs) add_stub(INVCODE_STUB,jaddr3,out,reglist|(1<regmap), temp, -1); - if (opcode2[i] == 0 || opcode2[i] == 2) { // MFC2/CFC2 - signed char tl = get_reg(i_regs->regmap, rt1[i]); + if (dops[i].opcode2 == 0 || dops[i].opcode2 == 2) { // MFC2/CFC2 + signed char tl = get_reg(i_regs->regmap, dops[i].rt1); reglist = reglist_exclude(reglist, tl, -1); } cop2_do_stall_check(0, i, i_regs, reglist); } - if (opcode2[i]==0) { // MFC2 - signed char tl=get_reg(i_regs->regmap,rt1[i]); - if(tl>=0&&rt1[i]!=0) + if (dops[i].opcode2==0) { // MFC2 + signed char tl=get_reg(i_regs->regmap,dops[i].rt1); + if(tl>=0&&dops[i].rt1!=0) cop2_get_dreg(copr,tl,temp); } - else if (opcode2[i]==4) { // MTC2 - signed char sl=get_reg(i_regs->regmap,rs1[i]); + else if (dops[i].opcode2==4) { // MTC2 + signed char sl=get_reg(i_regs->regmap,dops[i].rs1); cop2_put_dreg(copr,sl,temp); } - else if (opcode2[i]==2) // CFC2 + else if (dops[i].opcode2==2) // CFC2 { - signed char tl=get_reg(i_regs->regmap,rt1[i]); - if(tl>=0&&rt1[i]!=0) + signed char tl=get_reg(i_regs->regmap,dops[i].rt1); + if(tl>=0&&dops[i].rt1!=0) emit_readword(®_cop2c[copr],tl); } - else if (opcode2[i]==6) // CTC2 + else if (dops[i].opcode2==6) // CTC2 { - signed char sl=get_reg(i_regs->regmap,rs1[i]); + signed char sl=get_reg(i_regs->regmap,dops[i].rs1); switch(copr) { case 4: case 12: @@ -3780,14 +3763,13 @@ static void do_unalignedwritestub(int n) signed char *i_regmap=i_regs->regmap; int temp2=get_reg(i_regmap,FTEMP); int rt; - rt=get_reg(i_regmap,rs2[i]); + rt=get_reg(i_regmap,dops[i].rs2); assert(rt>=0); assert(addr>=0); - assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented + assert(dops[i].opcode==0x2a||dops[i].opcode==0x2e); // SWL/SWR only implemented reglist|=(1<regmap,rt1[i]); + tl=get_reg(i_regs->regmap,dops[i].rt1); //assert(tl>=0); if(tl>=0) { - sl=get_reg(i_regs->regmap,rs1[i]); + sl=get_reg(i_regs->regmap,dops[i].rs1); if(sl>=0) emit_mov(sl,tl); - else emit_loadreg(rs1[i],tl); + else emit_loadreg(dops[i].rs1,tl); } } - if (rs1[i] == HIREG || rs1[i] == LOREG) // MFHI/MFLO + if (dops[i].rs1 == HIREG || dops[i].rs1 == LOREG) // MFHI/MFLO multdiv_do_stall(i, i_regs); } @@ -3944,61 +3878,61 @@ static void speculate_register_values(int i) } smrv_strong=smrv_strong_next; smrv_weak=smrv_weak_next; - switch(itype[i]) { + switch(dops[i].itype) { case ALU: - if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); - else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]); - else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); - else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]); + if ((smrv_strong>>dops[i].rs1)&1) speculate_mov(dops[i].rs1,dops[i].rt1); + else if((smrv_strong>>dops[i].rs2)&1) speculate_mov(dops[i].rs2,dops[i].rt1); + else if((smrv_weak>>dops[i].rs1)&1) speculate_mov_weak(dops[i].rs1,dops[i].rt1); + else if((smrv_weak>>dops[i].rs2)&1) speculate_mov_weak(dops[i].rs2,dops[i].rt1); else { - smrv_strong_next&=~(1<=0) { if(get_final_value(hr,i,&value)) - smrv[rt1[i]]=value; - else smrv[rt1[i]]=constmap[i][hr]; - smrv_strong_next|=1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); - else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); + if ((smrv_strong>>dops[i].rs1)&1) speculate_mov(dops[i].rs1,dops[i].rt1); + else if((smrv_weak>>dops[i].rs1)&1) speculate_mov_weak(dops[i].rs1,dops[i].rt1); } break; case LOAD: - if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) { + if(start<0x2000&&(dops[i].rt1==26||(smrv[dops[i].rt1]>>24)==0xa0)) { // special case for BIOS - smrv[rt1[i]]=0xa0000000; - smrv_strong_next|=1<regmap,rt1[i]); + if(dops[i].itype==LOAD) { + ra=get_reg(i_regs->regmap,dops[i].rt1); if(ra<0) ra=get_reg(i_regs->regmap,-1); assert(ra>=0); } - if(itype[i]==LOADLR) { + if(dops[i].itype==LOADLR) { ra=get_reg(i_regs->regmap,FTEMP); } - if(itype[i]==STORE||itype[i]==STORELR) { + if(dops[i].itype==STORE||dops[i].itype==STORELR) { ra=get_reg(i_regs->regmap,agr); if(ra<0) ra=get_reg(i_regs->regmap,-1); } - if(itype[i]==C1LS||itype[i]==C2LS) { - if ((opcode[i]&0x3b)==0x31||(opcode[i]&0x3b)==0x32) // LWC1/LDC1/LWC2/LDC2 + if(dops[i].itype==C1LS||dops[i].itype==C2LS) { + if ((dops[i].opcode&0x3b)==0x31||(dops[i].opcode&0x3b)==0x32) // LWC1/LDC1/LWC2/LDC2 ra=get_reg(i_regs->regmap,FTEMP); else { // SWC1/SDC1/SWC2/SDC2 ra=get_reg(i_regs->regmap,agr); if(ra<0) ra=get_reg(i_regs->regmap,-1); } } - int rs=get_reg(i_regs->regmap,rs1[i]); + int rs=get_reg(i_regs->regmap,dops[i].rs1); if(ra>=0) { int offset=imm[i]; int c=(i_regs->wasconst>>rs)&1; - if(rs1[i]==0) { + if(dops[i].rs1==0) { // Using r0 as a base address if(!entry||entry[ra]!=agr) { - if (opcode[i]==0x22||opcode[i]==0x26) { + if (dops[i].opcode==0x22||dops[i].opcode==0x26) { emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR - }else if (opcode[i]==0x1a||opcode[i]==0x1b) { + }else if (dops[i].opcode==0x1a||dops[i].opcode==0x1b) { emit_movimm(offset&0xFFFFFFF8,ra); // LDL/LDR }else{ emit_movimm(offset,ra); @@ -4204,17 +4138,17 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) } // else did it in the previous cycle } else if(rs<0) { - if(!entry||entry[ra]!=rs1[i]) - emit_loadreg(rs1[i],ra); - //if(!entry||entry[ra]!=rs1[i]) + if(!entry||entry[ra]!=dops[i].rs1) + emit_loadreg(dops[i].rs1,ra); + //if(!entry||entry[ra]!=dops[i].rs1) // printf("poor load scheduling!\n"); } else if(c) { - if(rs1[i]!=rt1[i]||itype[i]!=LOAD) { + if(dops[i].rs1!=dops[i].rt1||dops[i].itype!=LOAD) { if(!entry||entry[ra]!=agr) { - if (opcode[i]==0x22||opcode[i]==0x26) { + if (dops[i].opcode==0x22||dops[i].opcode==0x26) { emit_movimm((constmap[i][rs]+offset)&0xFFFFFFFC,ra); // LWL/LWR - }else if (opcode[i]==0x1a||opcode[i]==0x1b) { + }else if (dops[i].opcode==0x1a||dops[i].opcode==0x1b) { emit_movimm((constmap[i][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR }else{ emit_movimm(constmap[i][rs]+offset,ra); @@ -4223,7 +4157,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) } // else did it in the previous cycle } // else load_consts already did it } - if(offset&&!c&&rs1[i]) { + if(offset&&!c&&dops[i].rs1) { if(rs>=0) { emit_addimm(rs,offset,ra); }else{ @@ -4233,30 +4167,30 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) } } // Preload constants for next instruction - if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS||itype[i+1]==C2LS) { + if(dops[i+1].itype==LOAD||dops[i+1].itype==LOADLR||dops[i+1].itype==STORE||dops[i+1].itype==STORELR||dops[i+1].itype==C1LS||dops[i+1].itype==C2LS) { int agr,ra; // Actual address agr=AGEN1+((i+1)&1); ra=get_reg(i_regs->regmap,agr); if(ra>=0) { - int rs=get_reg(regs[i+1].regmap,rs1[i+1]); + int rs=get_reg(regs[i+1].regmap,dops[i+1].rs1); int offset=imm[i+1]; int c=(regs[i+1].wasconst>>rs)&1; - if(c&&(rs1[i+1]!=rt1[i+1]||itype[i+1]!=LOAD)) { - if (opcode[i+1]==0x22||opcode[i+1]==0x26) { + if(c&&(dops[i+1].rs1!=dops[i+1].rt1||dops[i+1].itype!=LOAD)) { + if (dops[i+1].opcode==0x22||dops[i+1].opcode==0x26) { emit_movimm((constmap[i+1][rs]+offset)&0xFFFFFFFC,ra); // LWL/LWR - }else if (opcode[i+1]==0x1a||opcode[i+1]==0x1b) { + }else if (dops[i+1].opcode==0x1a||dops[i+1].opcode==0x1b) { emit_movimm((constmap[i+1][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR }else{ emit_movimm(constmap[i+1][rs]+offset,ra); regs[i+1].loadedconst|=1<>hr)&1)) break; - if(bt[i+1]) break; + if(dops[i+1].bt) break; i++; } if(i>hr)&1)) + if(dops[i+2].itype==LOAD&&dops[i+2].rs1==reg&&dops[i+2].rt1==reg&&((regs[i+1].wasconst>>hr)&1)) { // Precompute load address *value=constmap[i][hr]+imm[i+2]; return 1; } } - if(itype[i+1]==LOAD&&rs1[i+1]==reg&&rt1[i+1]==reg) + if(dops[i+1].itype==LOAD&&dops[i+1].rs1==reg&&dops[i+1].rt1==reg) { // Precompute load address *value=constmap[i][hr]+imm[i+1]; @@ -4311,7 +4245,7 @@ static void load_consts(signed char pre[],signed char regmap[],int i) { int hr,hr2; // propagate loaded constant flags - if(i==0||bt[i]) + if(i==0||dops[i].bt) regs[i].loadedconst=0; else { for(hr=0;hr0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP)) return 0; + //if(t>0&&(dops[t-1].itype==RJUMP||dops[t-1].itype==UJUMP||dops[t-1].itype==CJUMP||dops[t-1].itype==SJUMP)) return 0; // Delay slots require additional processing, so do not match - if(is_ds[t]) return 0; + if(dops[t].is_ds) return 0; } else { @@ -4628,7 +4562,7 @@ static void drc_dbg_emit_do_cmp(int i) assem_debug("//do_insn_cmp %08x\n", start+i*4); save_regs(reglist); // write out changed consts to match the interpreter - if (i > 0 && !bt[i]) { + if (i > 0 && !dops[i].bt) { for (hr = 0; hr < HOST_REGS; hr++) { int reg = regs[i-1].regmap[hr]; if (hr == EXCLUDE_REG || reg < 0) @@ -4666,12 +4600,12 @@ static void ds_assemble_entry(int i) drc_dbg_emit_do_cmp(t); if(regs[t].regmap_entry[HOST_CCREG]==CCREG&®s[t].regmap[HOST_CCREG]!=CCREG) wb_register(CCREG,regs[t].regmap_entry,regs[t].wasdirty); - load_regs(regs[t].regmap_entry,regs[t].regmap,rs1[t],rs2[t]); + load_regs(regs[t].regmap_entry,regs[t].regmap,dops[t].rs1,dops[t].rs2); address_generation(t,®s[t],regs[t].regmap_entry); - if(itype[t]==STORE||itype[t]==STORELR||(opcode[t]&0x3b)==0x39||(opcode[t]&0x3b)==0x3a) + if(dops[t].itype==STORE||dops[t].itype==STORELR||(dops[t].opcode&0x3b)==0x39||(dops[t].opcode&0x3b)==0x3a) load_regs(regs[t].regmap_entry,regs[t].regmap,INVCP,INVCP); is_delayslot=0; - switch(itype[t]) { + switch(dops[t].itype) { case ALU: alu_assemble(t,®s[t]);break; case IMM16: @@ -4750,7 +4684,7 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) void *jaddr; void *idle=NULL; int t=0; - if(itype[i]==RJUMP) + if(dops[i].itype==RJUMP) { *adj=0; } @@ -4758,7 +4692,7 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) if(internal_branch(ba[i])) { t=(ba[i]-start)>>2; - if(is_ds[t]) *adj=-1; // Branch into delay slot adds an extra cycle + if(dops[t].is_ds) *adj=-1; // Branch into delay slot adds an extra cycle else *adj=ccadj[t]; } else @@ -4824,40 +4758,40 @@ static void do_ccstub(int n) else { // Return address depends on which way the branch goes - if(itype[i]==CJUMP||itype[i]==SJUMP) + if(dops[i].itype==CJUMP||dops[i].itype==SJUMP) { - int s1l=get_reg(branch_regs[i].regmap,rs1[i]); - int s2l=get_reg(branch_regs[i].regmap,rs2[i]); - if(rs1[i]==0) + int s1l=get_reg(branch_regs[i].regmap,dops[i].rs1); + int s2l=get_reg(branch_regs[i].regmap,dops[i].rs2); + if(dops[i].rs1==0) { s1l=s2l; s2l=-1; } - else if(rs2[i]==0) + else if(dops[i].rs2==0) { s2l=-1; } assert(s1l>=0); #ifdef DESTRUCTIVE_WRITEBACK - if(rs1[i]) { + if(dops[i].rs1) { if((branch_regs[i].dirty>>s1l)&&1) - emit_loadreg(rs1[i],s1l); + emit_loadreg(dops[i].rs1,s1l); } else { if((branch_regs[i].dirty>>s1l)&1) - emit_loadreg(rs2[i],s1l); + emit_loadreg(dops[i].rs2,s1l); } if(s2l>=0) if((branch_regs[i].dirty>>s2l)&1) - emit_loadreg(rs2[i],s2l); + emit_loadreg(dops[i].rs2,s2l); #endif int hr=0; int addr=-1,alt=-1,ntaddr=-1; while(hr=0) emit_cmp(s1l,s2l); @@ -4900,7 +4834,7 @@ static void do_ccstub(int n) emit_cmovne_reg(alt,addr); #endif } - if((opcode[i]&0x2f)==5) // BNE + if((dops[i].opcode&0x2f)==5) // BNE { #ifdef HAVE_CMOV_IMM if(s2l>=0) emit_cmp(s1l,s2l); @@ -4913,7 +4847,7 @@ static void do_ccstub(int n) emit_cmovne_reg(alt,addr); #endif } - if((opcode[i]&0x2f)==6) // BLEZ + if((dops[i].opcode&0x2f)==6) // BLEZ { //emit_movimm(ba[i],alt); //emit_movimm(start+i*4+8,addr); @@ -4921,7 +4855,7 @@ static void do_ccstub(int n) emit_cmpimm(s1l,1); emit_cmovl_reg(alt,addr); } - if((opcode[i]&0x2f)==7) // BGTZ + if((dops[i].opcode&0x2f)==7) // BGTZ { //emit_movimm(ba[i],addr); //emit_movimm(start+i*4+8,ntaddr); @@ -4929,7 +4863,7 @@ static void do_ccstub(int n) emit_cmpimm(s1l,1); emit_cmovl_reg(ntaddr,addr); } - if((opcode[i]==1)&&(opcode2[i]&0x2D)==0) // BLTZ + if((dops[i].opcode==1)&&(dops[i].opcode2&0x2D)==0) // BLTZ { //emit_movimm(ba[i],alt); //emit_movimm(start+i*4+8,addr); @@ -4937,7 +4871,7 @@ static void do_ccstub(int n) emit_test(s1l,s1l); emit_cmovs_reg(alt,addr); } - if((opcode[i]==1)&&(opcode2[i]&0x2D)==1) // BGEZ + if((dops[i].opcode==1)&&(dops[i].opcode2&0x2D)==1) // BGEZ { //emit_movimm(ba[i],addr); //emit_movimm(start+i*4+8,alt); @@ -4945,7 +4879,7 @@ static void do_ccstub(int n) emit_test(s1l,s1l); emit_cmovs_reg(alt,addr); } - if(opcode[i]==0x11 && opcode2[i]==0x08 ) { + if(dops[i].opcode==0x11 && dops[i].opcode2==0x08 ) { if(source[i]&0x10000) // BC1T { //emit_movimm(ba[i],alt); @@ -4966,9 +4900,9 @@ static void do_ccstub(int n) emit_writeword(addr,&pcaddr); } else - if(itype[i]==RJUMP) + if(dops[i].itype==RJUMP) { - int r=get_reg(branch_regs[i].regmap,rs1[i]); + int r=get_reg(branch_regs[i].regmap,dops[i].rs1); if (ds_writes_rjump_rs(i)) { r=get_reg(branch_regs[i].regmap,RTEMP); } @@ -4984,11 +4918,11 @@ static void do_ccstub(int n) if(stubs[n].d==TAKEN) { if(internal_branch(ba[i])) load_needed_regs(branch_regs[i].regmap,regs[(ba[i]-start)>>2].regmap_entry); - else if(itype[i]==RJUMP) { + else if(dops[i].itype==RJUMP) { if(get_reg(branch_regs[i].regmap,RTEMP)>=0) emit_readword(&pcaddr,get_reg(branch_regs[i].regmap,RTEMP)); else - emit_loadreg(rs1[i],get_reg(branch_regs[i].regmap,rs1[i])); + emit_loadreg(dops[i].rs1,get_reg(branch_regs[i].regmap,dops[i].rs1)); } }else if(stubs[n].d==NOTTAKEN) { if(i=0) { #ifdef USE_MINI_HT - if(internal_branch(return_address)&&rt1[i+1]!=31) { + if(internal_branch(return_address)&&dops[i+1].rt1!=31) { int temp=-1; // note: must be ds-safe #ifdef HOST_TEMPREG temp=HOST_TEMPREG; @@ -5057,7 +4991,7 @@ static void ujump_assemble(int i,struct regstat *i_regs) address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH int temp=get_reg(branch_regs[i].regmap,PTEMP); - if(rt1[i]==31&&temp>=0) + if(dops[i].rt1==31&&temp>=0) { signed char *i_regmap=i_regs->regmap; int return_address=start+i*4+8; @@ -5065,23 +4999,23 @@ static void ujump_assemble(int i,struct regstat *i_regs) if(i_regmap[temp]==PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp); } #endif - if(rt1[i]==31&&(rt1[i]==rs1[i+1]||rt1[i]==rs2[i+1])) { + if(dops[i].rt1==31&&(dops[i].rt1==dops[i+1].rs1||dops[i].rt1==dops[i+1].rs2)) { ujump_assemble_write_ra(i); // writeback ra for DS ra_done=1; } ds_assemble(i+1,i_regs); uint64_t bc_unneeded=branch_regs[i].u; - bc_unneeded|=1|(1LL<=0) emit_prefetchreg(temp); + if(dops[i].rt1==31&&temp>=0) emit_prefetchreg(temp); #endif do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); @@ -5090,7 +5024,7 @@ static void ujump_assemble(int i,struct regstat *i_regs) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if(internal_branch(ba[i])&&is_ds[(ba[i]-start)>>2]) { + if (internal_branch(ba[i]) && dops[(ba[i]-start)>>2].is_ds) { ds_assemble_entry(i); } else { @@ -5102,9 +5036,9 @@ static void ujump_assemble(int i,struct regstat *i_regs) static void rjump_assemble_write_ra(int i) { int rt,return_address; - assert(rt1[i+1]!=rt1[i]); - assert(rt2[i+1]!=rt1[i]); - rt=get_reg(branch_regs[i].regmap,rt1[i]); + assert(dops[i+1].rt1!=dops[i].rt1); + assert(dops[i+1].rt2!=dops[i].rt1); + rt=get_reg(branch_regs[i].regmap,dops[i].rt1); assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(rt>=0); return_address=start+i*4+8; @@ -5125,7 +5059,7 @@ static void rjump_assemble(int i,struct regstat *i_regs) int temp; int rs,cc; int ra_done=0; - rs=get_reg(branch_regs[i].regmap,rs1[i]); + rs=get_reg(branch_regs[i].regmap,dops[i].rs1); assert(rs>=0); if (ds_writes_rjump_rs(i)) { // Delay slot abuse, make a copy of the branch address register @@ -5137,7 +5071,7 @@ static void rjump_assemble(int i,struct regstat *i_regs) } address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH - if(rt1[i]==31) + if(dops[i].rt1==31) { if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) { signed char *i_regmap=i_regs->regmap; @@ -5147,22 +5081,22 @@ static void rjump_assemble(int i,struct regstat *i_regs) } #endif #ifdef USE_MINI_HT - if(rs1[i]==31) { + if(dops[i].rs1==31) { int rh=get_reg(regs[i].regmap,RHASH); if(rh>=0) do_preload_rhash(rh); } #endif - if(rt1[i]!=0&&(rt1[i]==rs1[i+1]||rt1[i]==rs2[i+1])) { + if(dops[i].rt1!=0&&(dops[i].rt1==dops[i+1].rs1||dops[i].rt1==dops[i+1].rs2)) { rjump_assemble_write_ra(i); ra_done=1; } ds_assemble(i+1,i_regs); uint64_t bc_unneeded=branch_regs[i].u; - bc_unneeded|=1|(1LL<>rs)&1) { - if(rs1[i]!=rt1[i+1]&&rs1[i]!=rt2[i+1]) { - emit_loadreg(rs1[i],rs); + if(dops[i].rs1!=dops[i+1].rt1&&dops[i].rs1!=dops[i+1].rt2) { + emit_loadreg(dops[i].rs1,rs); } } #endif #ifdef REG_PREFETCH - if(rt1[i]==31&&temp>=0) emit_prefetchreg(temp); + if(dops[i].rt1==31&&temp>=0) emit_prefetchreg(temp); #endif #ifdef USE_MINI_HT - if(rs1[i]==31) { + if(dops[i].rs1==31) { do_miniht_load(ht,rh); } #endif @@ -5197,14 +5131,14 @@ static void rjump_assemble(int i,struct regstat *i_regs) //assert(adj==0); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); add_stub(CC_STUB,out,NULL,0,i,-1,TAKEN,rs); - if(itype[i+1]==COP0&&(source[i+1]&0x3f)==0x10) + if(dops[i+1].itype==COP0&&(source[i+1]&0x3f)==0x10) // special case for RFE emit_jmp(0); else emit_jns(0); //load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,-1); #ifdef USE_MINI_HT - if(rs1[i]==31) { + if(dops[i].rs1==31) { do_miniht_jump(rs,rh,ht); } else @@ -5213,7 +5147,7 @@ static void rjump_assemble(int i,struct regstat *i_regs) do_jump_vaddr(rs); } #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - if(rt1[i]!=31&&i>2]) { + if (internal && dops[(ba[i]-start)>>2].is_ds) { ds_assemble_entry(i); } else { @@ -5316,7 +5250,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); - if(opcode[i]==4) // BEQ + if(dops[i].opcode==4) // BEQ { if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); @@ -5328,7 +5262,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) emit_jeq(0); } } - if(opcode[i]==5) // BNE + if(dops[i].opcode==5) // BNE { if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); @@ -5340,7 +5274,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) emit_jne(0); } } - if(opcode[i]==6) // BLEZ + if(dops[i].opcode==6) // BLEZ { emit_cmpimm(s1l,1); if(invert){ @@ -5351,7 +5285,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) emit_jl(0); } } - if(opcode[i]==7) // BGTZ + if(dops[i].opcode==7) // BGTZ { emit_cmpimm(s1l,1); if(invert){ @@ -5365,7 +5299,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) if(invert) { if(taken) set_jump_target(taken, out); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { + if (match && (!internal || !dops[(ba[i]-start)>>2].is_ds)) { if(adj) { emit_addimm(cc,-CLOCK_ADJUST(adj),cc); add_to_linker(out,ba[i],internal); @@ -5384,7 +5318,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if(internal&&is_ds[(ba[i]-start)>>2]) { + if (internal && dops[(ba[i] - start) >> 2].is_ds) { ds_assemble_entry(i); } else { @@ -5404,34 +5338,34 @@ static void cjump_assemble(int i,struct regstat *i_regs) else { // In-order execution (branch first) - //if(likely[i]) printf("IOL\n"); + //if(dops[i].likely) printf("IOL\n"); //else //printf("IOE\n"); void *taken = NULL, *nottaken = NULL, *nottaken1 = NULL; if(!unconditional&&!nop) { //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); - if((opcode[i]&0x2f)==4) // BEQ + if((dops[i].opcode&0x2f)==4) // BEQ { if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); nottaken=out; emit_jne(DJT_2); } - if((opcode[i]&0x2f)==5) // BNE + if((dops[i].opcode&0x2f)==5) // BNE { if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); nottaken=out; emit_jeq(DJT_2); } - if((opcode[i]&0x2f)==6) // BLEZ + if((dops[i].opcode&0x2f)==6) // BLEZ { emit_cmpimm(s1l,1); nottaken=out; emit_jge(DJT_2); } - if((opcode[i]&0x2f)==7) // BGTZ + if((dops[i].opcode&0x2f)==7) // BGTZ { emit_cmpimm(s1l,1); nottaken=out; @@ -5440,7 +5374,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) } // if(!unconditional) int adj; uint64_t ds_unneeded=branch_regs[i].u; - ds_unneeded&=~((1LL<>2]) { + if (internal && dops[(ba[i] - start) >> 2].is_ds) { ds_assemble_entry(i); } else { @@ -5480,15 +5414,15 @@ static void cjump_assemble(int i,struct regstat *i_regs) if(nottaken1) set_jump_target(nottaken1, out); set_jump_target(nottaken, out); assem_debug("2:\n"); - if(!likely[i]) { + if(!dops[i].likely) { wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,ds_unneeded); - load_regs(regs[i].regmap,branch_regs[i].regmap,rs1[i+1],rs2[i+1]); + load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2); address_generation(i+1,&branch_regs[i],0); load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG); ds_assemble(i+1,&branch_regs[i]); } cc=get_reg(branch_regs[i].regmap,CCREG); - if(cc==-1&&!likely[i]) { + if(cc==-1&&!dops[i].likely) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); @@ -5503,7 +5437,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); void *jaddr=out; emit_jns(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); + add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,dops[i].likely?NULLDS:NOTTAKEN,0); } } } @@ -5529,39 +5463,39 @@ static void sjump_assemble(int i,struct regstat *i_regs) invert=1; // because of near cond. branches #endif - //if(opcode2[i]>=0x10) return; // FIXME (BxxZAL) - //assert(opcode2[i]<0x10||rs1[i]==0); // FIXME (BxxZAL) + //if(dops[i].opcode2>=0x10) return; // FIXME (BxxZAL) + //assert(dops[i].opcode2<0x10||dops[i].rs1==0); // FIXME (BxxZAL) - if(ooo[i]) { - s1l=get_reg(branch_regs[i].regmap,rs1[i]); + if(dops[i].ooo) { + s1l=get_reg(branch_regs[i].regmap,dops[i].rs1); } else { - s1l=get_reg(i_regmap,rs1[i]); + s1l=get_reg(i_regmap,dops[i].rs1); } - if(rs1[i]==0) + if(dops[i].rs1==0) { - if(opcode2[i]&1) unconditional=1; + if(dops[i].opcode2&1) unconditional=1; else nevertaken=1; // These are never taken (r0 is never less than zero) - //assert(opcode2[i]!=0); - //assert(opcode2[i]!=2); - //assert(opcode2[i]!=0x10); - //assert(opcode2[i]!=0x12); + //assert(dops[i].opcode2!=0); + //assert(dops[i].opcode2!=2); + //assert(dops[i].opcode2!=0x10); + //assert(dops[i].opcode2!=0x12); } - if(ooo[i]) { + if(dops[i].ooo) { // Out of order execution (delay slot first) //printf("OOOE\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); ds_assemble(i+1,i_regs); int adj; uint64_t bc_unneeded=branch_regs[i].u; - bc_unneeded&=~((1LL<>2]) { + if (internal && dops[(ba[i] - start) >> 2].is_ds) { ds_assemble_entry(i); } else { @@ -5613,7 +5547,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); { assert(s1l>=0); - if((opcode2[i]&0xf)==0) // BLTZ/BLTZAL + if((dops[i].opcode2&0xf)==0) // BLTZ/BLTZAL { emit_test(s1l,s1l); if(invert){ @@ -5624,7 +5558,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) emit_js(0); } } - if((opcode2[i]&0xf)==1) // BGEZ/BLTZAL + if((dops[i].opcode2&0xf)==1) // BGEZ/BLTZAL { emit_test(s1l,s1l); if(invert){ @@ -5639,7 +5573,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) if(invert) { #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { + if (match && (!internal || !dops[(ba[i] - start) >> 2].is_ds)) { if(adj) { emit_addimm(cc,-CLOCK_ADJUST(adj),cc); add_to_linker(out,ba[i],internal); @@ -5658,7 +5592,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if(internal&&is_ds[(ba[i]-start)>>2]) { + if (internal && dops[(ba[i] - start) >> 2].is_ds) { ds_assemble_entry(i); } else { @@ -5679,7 +5613,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) // In-order execution (branch first) //printf("IOE\n"); void *nottaken = NULL; - if(rt1[i]==31) { + if(dops[i].rt1==31) { int rt,return_address; rt=get_reg(branch_regs[i].regmap,31); if(rt>=0) { @@ -5694,13 +5628,13 @@ static void sjump_assemble(int i,struct regstat *i_regs) if(!unconditional) { //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); - if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL + if((dops[i].opcode2&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL { emit_test(s1l,s1l); nottaken=out; emit_jns(DJT_1); } - if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL + if((dops[i].opcode2&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL { emit_test(s1l,s1l); nottaken=out; @@ -5709,14 +5643,14 @@ static void sjump_assemble(int i,struct regstat *i_regs) } // if(!unconditional) int adj; uint64_t ds_unneeded=branch_regs[i].u; - ds_unneeded&=~((1LL<>2]) { + if (internal && dops[(ba[i] - start) >> 2].is_ds) { ds_assemble_entry(i); } else { @@ -5747,15 +5681,15 @@ static void sjump_assemble(int i,struct regstat *i_regs) if(!unconditional) { set_jump_target(nottaken, out); assem_debug("1:\n"); - if(!likely[i]) { + if(!dops[i].likely) { wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,ds_unneeded); - load_regs(regs[i].regmap,branch_regs[i].regmap,rs1[i+1],rs2[i+1]); + load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2); address_generation(i+1,&branch_regs[i],0); load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG); ds_assemble(i+1,&branch_regs[i]); } cc=get_reg(branch_regs[i].regmap,CCREG); - if(cc==-1&&!likely[i]) { + if(cc==-1&&!dops[i].likely) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); @@ -5770,7 +5704,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); void *jaddr=out; emit_jns(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); + add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,dops[i].likely?NULLDS:NOTTAKEN,0); } } } @@ -5778,17 +5712,17 @@ static void sjump_assemble(int i,struct regstat *i_regs) static void pagespan_assemble(int i,struct regstat *i_regs) { - int s1l=get_reg(i_regs->regmap,rs1[i]); - int s2l=get_reg(i_regs->regmap,rs2[i]); + int s1l=get_reg(i_regs->regmap,dops[i].rs1); + int s2l=get_reg(i_regs->regmap,dops[i].rs2); void *taken = NULL; void *nottaken = NULL; int unconditional=0; - if(rs1[i]==0) + if(dops[i].rs1==0) { s1l=s2l; s2l=-1; } - else if(rs2[i]==0) + else if(dops[i].rs2==0) { s2l=-1; } @@ -5799,8 +5733,8 @@ static void pagespan_assemble(int i,struct regstat *i_regs) while(hrregmap[hr]&63)!=rs1[i] && - (i_regs->regmap[hr]&63)!=rs2[i] ) + (i_regs->regmap[hr]&63)!=dops[i].rs1 && + (i_regs->regmap[hr]&63)!=dops[i].rs2 ) { addr=hr++;break; } @@ -5810,20 +5744,20 @@ static void pagespan_assemble(int i,struct regstat *i_regs) while(hrregmap[hr]&63)!=rs1[i] && - (i_regs->regmap[hr]&63)!=rs2[i] ) + (i_regs->regmap[hr]&63)!=dops[i].rs1 && + (i_regs->regmap[hr]&63)!=dops[i].rs2 ) { alt=hr++;break; } hr++; } - if((opcode[i]&0x2E)==6) // BLEZ/BGTZ needs another register + if((dops[i].opcode&0x2E)==6) // BLEZ/BGTZ needs another register { while(hrregmap[hr]&63)!=rs1[i] && - (i_regs->regmap[hr]&63)!=rs2[i] ) + (i_regs->regmap[hr]&63)!=dops[i].rs1 && + (i_regs->regmap[hr]&63)!=dops[i].rs2 ) { ntaddr=hr;break; } @@ -5831,33 +5765,33 @@ static void pagespan_assemble(int i,struct regstat *i_regs) } } assert(hrregmap,31); emit_movimm(start+i*4+8,rt); unconditional=1; } - if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR + if(dops[i].opcode==0&&(dops[i].opcode2&0x3E)==8) // JR/JALR { emit_mov(s1l,addr); - if(opcode2[i]==9) // JALR + if(dops[i].opcode2==9) // JALR { - int rt=get_reg(i_regs->regmap,rt1[i]); + int rt=get_reg(i_regs->regmap,dops[i].rt1); emit_movimm(start+i*4+8,rt); } } - if((opcode[i]&0x3f)==4) // BEQ + if((dops[i].opcode&0x3f)==4) // BEQ { - if(rs1[i]==rs2[i]) + if(dops[i].rs1==dops[i].rs2) { unconditional=1; } @@ -5878,7 +5812,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) emit_cmovne_reg(alt,addr); } } - if((opcode[i]&0x3f)==5) // BNE + if((dops[i].opcode&0x3f)==5) // BNE { #ifdef HAVE_CMOV_IMM if(s2l>=0) emit_cmp(s1l,s2l); @@ -5892,7 +5826,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) emit_cmovne_reg(alt,addr); #endif } - if((opcode[i]&0x3f)==0x14) // BEQL + if((dops[i].opcode&0x3f)==0x14) // BEQL { if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); @@ -5900,7 +5834,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) nottaken=out; emit_jne(0); } - if((opcode[i]&0x3f)==0x15) // BNEL + if((dops[i].opcode&0x3f)==0x15) // BNEL { if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); @@ -5908,30 +5842,30 @@ static void pagespan_assemble(int i,struct regstat *i_regs) emit_jeq(0); if(taken) set_jump_target(taken, out); } - if((opcode[i]&0x3f)==6) // BLEZ + if((dops[i].opcode&0x3f)==6) // BLEZ { emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr); emit_cmpimm(s1l,1); emit_cmovl_reg(alt,addr); } - if((opcode[i]&0x3f)==7) // BGTZ + if((dops[i].opcode&0x3f)==7) // BGTZ { emit_mov2imm_compact(ba[i],addr,start+i*4+8,ntaddr); emit_cmpimm(s1l,1); emit_cmovl_reg(ntaddr,addr); } - if((opcode[i]&0x3f)==0x16) // BLEZL + if((dops[i].opcode&0x3f)==0x16) // BLEZL { - assert((opcode[i]&0x3f)!=0x16); + assert((dops[i].opcode&0x3f)!=0x16); } - if((opcode[i]&0x3f)==0x17) // BGTZL + if((dops[i].opcode&0x3f)==0x17) // BGTZL { - assert((opcode[i]&0x3f)!=0x17); + assert((dops[i].opcode&0x3f)!=0x17); } - assert(opcode[i]!=1); // BLTZ/BGEZ + assert(dops[i].opcode!=1); // BLTZ/BGEZ //FIXME: Check CSREG - if(opcode[i]==0x11 && opcode2[i]==0x08 ) { + if(dops[i].opcode==0x11 && dops[i].opcode2==0x08 ) { if((source[i]&0x30000)==0) // BC1F { emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt); @@ -5960,7 +5894,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) assert(i_regs->regmap[HOST_CCREG]==CCREG); wb_dirtys(regs[i].regmap,regs[i].dirty); - if(likely[i]||unconditional) + if(dops[i].likely||unconditional) { emit_movimm(ba[i],HOST_BTREG); } @@ -5979,7 +5913,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) add_jump_out(target_addr,stub); } else set_jump_target(branch_addr, stub); - if(likely[i]) { + if(dops[i].likely) { // Not-taken path set_jump_target(nottaken, out); wb_dirtys(regs[i].regmap,regs[i].dirty); @@ -6012,12 +5946,12 @@ static void pagespan_ds() wb_register(CCREG,regs[0].regmap_entry,regs[0].wasdirty); if(regs[0].regmap[HOST_BTREG]!=BTREG) emit_writeword(HOST_BTREG,&branch_target); - load_regs(regs[0].regmap_entry,regs[0].regmap,rs1[0],rs2[0]); + load_regs(regs[0].regmap_entry,regs[0].regmap,dops[0].rs1,dops[0].rs2); address_generation(0,®s[0],regs[0].regmap_entry); - if(itype[0]==STORE||itype[0]==STORELR||(opcode[0]&0x3b)==0x39||(opcode[0]&0x3b)==0x3a) + if(dops[0].itype==STORE||dops[0].itype==STORELR||(dops[0].opcode&0x3b)==0x39||(dops[0].opcode&0x3b)==0x3a) load_regs(regs[0].regmap_entry,regs[0].regmap,INVCP,INVCP); is_delayslot=0; - switch(itype[0]) { + switch(dops[0].itype) { case ALU: alu_assemble(0,®s[0]);break; case IMM16: @@ -6107,10 +6041,10 @@ void unneeded_registers(int istart,int iend,int r) for (i=iend;i>=istart;i--) { //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r); - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP) + if(dops[i].itype==RJUMP||dops[i].itype==UJUMP||dops[i].itype==CJUMP||dops[i].itype==SJUMP) { // If subroutine call, flag return address as a possible branch target - if(rt1[i]==31 && i=(start+slen*4)) { @@ -6119,14 +6053,14 @@ void unneeded_registers(int istart,int iend,int r) gte_u=gte_u_unknown; branch_unneeded_reg[i]=u; // Merge in delay slot - u|=(1LL<>2]=1; + dops[(ba[i]-start)>>2].bt=1; if(ba[i]<=start+i*4) { // Backward branch if(is_ujump(i)) @@ -6155,14 +6089,14 @@ void unneeded_registers(int istart,int iend,int r) temp_gte_u&=gte_unneeded[i+2]; } // Merge in delay slot - temp_u|=(1LL<>2]; branch_unneeded_reg[i]=u; // Merge in delay slot - u|=(1LL<>2]; branch_unneeded_reg[i]=b; // Branch delay slot - b|=(1LL<=istart;i--) { - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP) + if(dops[i].itype==RJUMP||dops[i].itype==UJUMP||dops[i].itype==CJUMP||dops[i].itype==SJUMP) { if(ba[i]=(start+slen*4)) { @@ -6307,17 +6241,17 @@ void clean_registers(int istart,int iend,int wr) // Merge in delay slot (will dirty) for(r=0;r33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<>2]>>(target_reg&63))&1)<>2]>>(target_reg&63))&1)<>2].regmap_entry[r]) { - will_dirty[i+1]&=will_dirty[(ba[i]-start)>>2]&(1<>2]&(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<istart) { - if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP) + if(dops[i].itype!=RJUMP&&dops[i].itype!=UJUMP&&dops[i].itype!=CJUMP&&dops[i].itype!=SJUMP) { // Don't store a register immediately after writing it, // may prevent dual-issue. - if((regs[i].regmap[r]&63)==rt1[i-1]) wont_dirty_i|=1<>r)&1) { - printf(" r%d",r); - } - } - printf("\n");*/ - - //if(i==istart||(itype[i-1]!=RJUMP&&itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP)) { regs[i].dirty|=will_dirty_i; #ifndef DESTRUCTIVE_WRITEBACK regs[i].dirty&=wont_dirty_i; - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP) + if(dops[i].itype==RJUMP||dops[i].itype==UJUMP||dops[i].itype==CJUMP||dops[i].itype==SJUMP) { if (i < iend-1 && !is_ujump(i)) { for(r=0;r>14):*ba);break; + printf (" %x: %s r%d,r%d,%8x\n",start+i*4,insn[i],dops[i].rs1,dops[i].rs2,i?start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14):*ba);break; case SJUMP: - printf (" %x: %s r%d,%8x\n",start+i*4,insn[i],rs1[i],start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14));break; + printf (" %x: %s r%d,%8x\n",start+i*4,insn[i],dops[i].rs1,start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14));break; case RJUMP: - if (opcode[i]==0x9&&rt1[i]!=31) - printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],rt1[i],rs1[i]); + if (dops[i].opcode==0x9&&dops[i].rt1!=31) + printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1); else - printf (" %x: %s r%d\n",start+i*4,insn[i],rs1[i]); + printf (" %x: %s r%d\n",start+i*4,insn[i],dops[i].rs1); break; case SPAN: - printf (" %x: %s (pagespan) r%d,r%d,%8x\n",start+i*4,insn[i],rs1[i],rs2[i],ba[i]);break; + printf (" %x: %s (pagespan) r%d,r%d,%8x\n",start+i*4,insn[i],dops[i].rs1,dops[i].rs2,ba[i]);break; case IMM16: - if(opcode[i]==0xf) //LUI - printf (" %x: %s r%d,%4x0000\n",start+i*4,insn[i],rt1[i],imm[i]&0xffff); + if(dops[i].opcode==0xf) //LUI + printf (" %x: %s r%d,%4x0000\n",start+i*4,insn[i],dops[i].rt1,imm[i]&0xffff); else - printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],rt1[i],rs1[i],imm[i]); + printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1,imm[i]); break; case LOAD: case LOADLR: - printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],rt1[i],rs1[i],imm[i]); + printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1,imm[i]); break; case STORE: case STORELR: - printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],rs2[i],rs1[i],imm[i]); + printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],dops[i].rs2,dops[i].rs1,imm[i]); break; case ALU: case SHIFT: - printf (" %x: %s r%d,r%d,r%d\n",start+i*4,insn[i],rt1[i],rs1[i],rs2[i]); + printf (" %x: %s r%d,r%d,r%d\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1,dops[i].rs2); break; case MULTDIV: - printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],rs1[i],rs2[i]); + printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],dops[i].rs1,dops[i].rs2); break; case SHIFTIMM: - printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],rt1[i],rs1[i],imm[i]); + printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1,imm[i]); break; case MOV: - if((opcode2[i]&0x1d)==0x10) - printf (" %x: %s r%d\n",start+i*4,insn[i],rt1[i]); - else if((opcode2[i]&0x1d)==0x11) - printf (" %x: %s r%d\n",start+i*4,insn[i],rs1[i]); + if((dops[i].opcode2&0x1d)==0x10) + printf (" %x: %s r%d\n",start+i*4,insn[i],dops[i].rt1); + else if((dops[i].opcode2&0x1d)==0x11) + printf (" %x: %s r%d\n",start+i*4,insn[i],dops[i].rs1); else printf (" %x: %s\n",start+i*4,insn[i]); break; case COP0: - if(opcode2[i]==0) - printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],rt1[i],(source[i]>>11)&0x1f); // MFC0 - else if(opcode2[i]==4) - printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],rs1[i],(source[i]>>11)&0x1f); // MTC0 + if(dops[i].opcode2==0) + printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],dops[i].rt1,(source[i]>>11)&0x1f); // MFC0 + else if(dops[i].opcode2==4) + printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC0 else printf (" %x: %s\n",start+i*4,insn[i]); break; case COP1: - if(opcode2[i]<3) - printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],rt1[i],(source[i]>>11)&0x1f); // MFC1 - else if(opcode2[i]>3) - printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],rs1[i],(source[i]>>11)&0x1f); // MTC1 + if(dops[i].opcode2<3) + printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],dops[i].rt1,(source[i]>>11)&0x1f); // MFC1 + else if(dops[i].opcode2>3) + printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC1 else printf (" %x: %s\n",start+i*4,insn[i]); break; case COP2: - if(opcode2[i]<3) - printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],rt1[i],(source[i]>>11)&0x1f); // MFC2 - else if(opcode2[i]>3) - printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],rs1[i],(source[i]>>11)&0x1f); // MTC2 + if(dops[i].opcode2<3) + printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],dops[i].rt1,(source[i]>>11)&0x1f); // MFC2 + else if(dops[i].opcode2>3) + printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC2 else printf (" %x: %s\n",start+i*4,insn[i]); break; case C1LS: - printf (" %x: %s cpr1[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,rs1[i],imm[i]); + printf (" %x: %s cpr1[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,dops[i].rs1,imm[i]); break; case C2LS: - printf (" %x: %s cpr2[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,rs1[i],imm[i]); + printf (" %x: %s cpr2[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,dops[i].rs1,imm[i]); break; case INTCALL: printf (" %x: %s (INTCALL)\n",start+i*4,insn[i]); @@ -7127,9 +7043,12 @@ int new_recompile_block(u_int addr) /* Pass 1 disassembly */ for(i=0;!done;i++) { - bt[i]=0;likely[i]=0;ooo[i]=0;op2=0; + dops[i].bt=0; + dops[i].likely=0; + dops[i].ooo=0; + op2=0; minimum_free_regs[i]=0; - opcode[i]=op=source[i]>>26; + dops[i].opcode=op=source[i]>>26; switch(op) { case 0x00: strcpy(insn[i],"special"); type=NI; @@ -7311,168 +7230,157 @@ int new_recompile_block(u_int addr) SysPrintf("NI %08x @%08x (%08x)\n", source[i], addr + i*4, addr); break; } - itype[i]=type; - opcode2[i]=op2; + dops[i].itype=type; + dops[i].opcode2=op2; /* Get registers/immediates */ - lt1[i]=0; - dep1[i]=0; - dep2[i]=0; + dops[i].lt1=0; gte_rs[i]=gte_rt[i]=0; switch(type) { case LOAD: - rs1[i]=(source[i]>>21)&0x1f; - rs2[i]=0; - rt1[i]=(source[i]>>16)&0x1f; - rt2[i]=0; + dops[i].rs1=(source[i]>>21)&0x1f; + dops[i].rs2=0; + dops[i].rt1=(source[i]>>16)&0x1f; + dops[i].rt2=0; imm[i]=(short)source[i]; break; case STORE: case STORELR: - rs1[i]=(source[i]>>21)&0x1f; - rs2[i]=(source[i]>>16)&0x1f; - rt1[i]=0; - rt2[i]=0; + dops[i].rs1=(source[i]>>21)&0x1f; + dops[i].rs2=(source[i]>>16)&0x1f; + dops[i].rt1=0; + dops[i].rt2=0; imm[i]=(short)source[i]; break; case LOADLR: // LWL/LWR only load part of the register, // therefore the target register must be treated as a source too - rs1[i]=(source[i]>>21)&0x1f; - rs2[i]=(source[i]>>16)&0x1f; - rt1[i]=(source[i]>>16)&0x1f; - rt2[i]=0; + dops[i].rs1=(source[i]>>21)&0x1f; + dops[i].rs2=(source[i]>>16)&0x1f; + dops[i].rt1=(source[i]>>16)&0x1f; + dops[i].rt2=0; imm[i]=(short)source[i]; - if(op==0x26) dep1[i]=rt1[i]; // LWR break; case IMM16: - if (op==0x0f) rs1[i]=0; // LUI instruction has no source register - else rs1[i]=(source[i]>>21)&0x1f; - rs2[i]=0; - rt1[i]=(source[i]>>16)&0x1f; - rt2[i]=0; + if (op==0x0f) dops[i].rs1=0; // LUI instruction has no source register + else dops[i].rs1=(source[i]>>21)&0x1f; + dops[i].rs2=0; + dops[i].rt1=(source[i]>>16)&0x1f; + dops[i].rt2=0; if(op>=0x0c&&op<=0x0e) { // ANDI/ORI/XORI imm[i]=(unsigned short)source[i]; }else{ imm[i]=(short)source[i]; } - if(op==0x0d||op==0x0e) dep1[i]=rs1[i]; // ORI/XORI break; case UJUMP: - rs1[i]=0; - rs2[i]=0; - rt1[i]=0; - rt2[i]=0; + dops[i].rs1=0; + dops[i].rs2=0; + dops[i].rt1=0; + dops[i].rt2=0; // The JAL instruction writes to r31. if (op&1) { - rt1[i]=31; + dops[i].rt1=31; } - rs2[i]=CCREG; + dops[i].rs2=CCREG; break; case RJUMP: - rs1[i]=(source[i]>>21)&0x1f; - rs2[i]=0; - rt1[i]=0; - rt2[i]=0; + dops[i].rs1=(source[i]>>21)&0x1f; + dops[i].rs2=0; + dops[i].rt1=0; + dops[i].rt2=0; // The JALR instruction writes to rd. if (op2&1) { - rt1[i]=(source[i]>>11)&0x1f; + dops[i].rt1=(source[i]>>11)&0x1f; } - rs2[i]=CCREG; + dops[i].rs2=CCREG; break; case CJUMP: - rs1[i]=(source[i]>>21)&0x1f; - rs2[i]=(source[i]>>16)&0x1f; - rt1[i]=0; - rt2[i]=0; + dops[i].rs1=(source[i]>>21)&0x1f; + dops[i].rs2=(source[i]>>16)&0x1f; + dops[i].rt1=0; + dops[i].rt2=0; if(op&2) { // BGTZ/BLEZ - rs2[i]=0; + dops[i].rs2=0; } - likely[i]=op>>4; + dops[i].likely=(op>>4)?1:0; break; case SJUMP: - rs1[i]=(source[i]>>21)&0x1f; - rs2[i]=CCREG; - rt1[i]=0; - rt2[i]=0; + dops[i].rs1=(source[i]>>21)&0x1f; + dops[i].rs2=CCREG; + dops[i].rt1=0; + dops[i].rt2=0; if(op2&0x10) { // BxxAL - rt1[i]=31; + dops[i].rt1=31; // NOTE: If the branch is not taken, r31 is still overwritten } - likely[i]=(op2&2)>>1; + dops[i].likely=(op2&2)?1:0; break; case ALU: - rs1[i]=(source[i]>>21)&0x1f; // source - rs2[i]=(source[i]>>16)&0x1f; // subtract amount - rt1[i]=(source[i]>>11)&0x1f; // destination - rt2[i]=0; - if(op2>=0x24&&op2<=0x27) { // AND/OR/XOR/NOR - dep1[i]=rs1[i];dep2[i]=rs2[i]; - } - else if(op2>=0x2c&&op2<=0x2f) { // DADD/DSUB - dep1[i]=rs1[i];dep2[i]=rs2[i]; - } + dops[i].rs1=(source[i]>>21)&0x1f; // source + dops[i].rs2=(source[i]>>16)&0x1f; // subtract amount + dops[i].rt1=(source[i]>>11)&0x1f; // destination + dops[i].rt2=0; break; case MULTDIV: - rs1[i]=(source[i]>>21)&0x1f; // source - rs2[i]=(source[i]>>16)&0x1f; // divisor - rt1[i]=HIREG; - rt2[i]=LOREG; + dops[i].rs1=(source[i]>>21)&0x1f; // source + dops[i].rs2=(source[i]>>16)&0x1f; // divisor + dops[i].rt1=HIREG; + dops[i].rt2=LOREG; break; case MOV: - rs1[i]=0; - rs2[i]=0; - rt1[i]=0; - rt2[i]=0; - if(op2==0x10) rs1[i]=HIREG; // MFHI - if(op2==0x11) rt1[i]=HIREG; // MTHI - if(op2==0x12) rs1[i]=LOREG; // MFLO - if(op2==0x13) rt1[i]=LOREG; // MTLO - if((op2&0x1d)==0x10) rt1[i]=(source[i]>>11)&0x1f; // MFxx - if((op2&0x1d)==0x11) rs1[i]=(source[i]>>21)&0x1f; // MTxx - dep1[i]=rs1[i]; + dops[i].rs1=0; + dops[i].rs2=0; + dops[i].rt1=0; + dops[i].rt2=0; + if(op2==0x10) dops[i].rs1=HIREG; // MFHI + if(op2==0x11) dops[i].rt1=HIREG; // MTHI + if(op2==0x12) dops[i].rs1=LOREG; // MFLO + if(op2==0x13) dops[i].rt1=LOREG; // MTLO + if((op2&0x1d)==0x10) dops[i].rt1=(source[i]>>11)&0x1f; // MFxx + if((op2&0x1d)==0x11) dops[i].rs1=(source[i]>>21)&0x1f; // MTxx break; case SHIFT: - rs1[i]=(source[i]>>16)&0x1f; // target of shift - rs2[i]=(source[i]>>21)&0x1f; // shift amount - rt1[i]=(source[i]>>11)&0x1f; // destination - rt2[i]=0; + dops[i].rs1=(source[i]>>16)&0x1f; // target of shift + dops[i].rs2=(source[i]>>21)&0x1f; // shift amount + dops[i].rt1=(source[i]>>11)&0x1f; // destination + dops[i].rt2=0; break; case SHIFTIMM: - rs1[i]=(source[i]>>16)&0x1f; - rs2[i]=0; - rt1[i]=(source[i]>>11)&0x1f; - rt2[i]=0; + dops[i].rs1=(source[i]>>16)&0x1f; + dops[i].rs2=0; + dops[i].rt1=(source[i]>>11)&0x1f; + dops[i].rt2=0; imm[i]=(source[i]>>6)&0x1f; // DSxx32 instructions if(op2>=0x3c) imm[i]|=0x20; break; case COP0: - rs1[i]=0; - rs2[i]=0; - rt1[i]=0; - rt2[i]=0; - if(op2==0||op2==2) rt1[i]=(source[i]>>16)&0x1F; // MFC0/CFC0 - if(op2==4||op2==6) rs1[i]=(source[i]>>16)&0x1F; // MTC0/CTC0 - if(op2==4&&((source[i]>>11)&0x1f)==12) rt2[i]=CSREG; // Status - if(op2==16) if((source[i]&0x3f)==0x18) rs2[i]=CCREG; // ERET + dops[i].rs1=0; + dops[i].rs2=0; + dops[i].rt1=0; + dops[i].rt2=0; + if(op2==0||op2==2) dops[i].rt1=(source[i]>>16)&0x1F; // MFC0/CFC0 + if(op2==4||op2==6) dops[i].rs1=(source[i]>>16)&0x1F; // MTC0/CTC0 + if(op2==4&&((source[i]>>11)&0x1f)==12) dops[i].rt2=CSREG; // Status + if(op2==16) if((source[i]&0x3f)==0x18) dops[i].rs2=CCREG; // ERET break; case COP1: - rs1[i]=0; - rs2[i]=0; - rt1[i]=0; - rt2[i]=0; - if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC1/DMFC1/CFC1 - if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC1/DMTC1/CTC1 - rs2[i]=CSREG; + dops[i].rs1=0; + dops[i].rs2=0; + dops[i].rt1=0; + dops[i].rt2=0; + if(op2<3) dops[i].rt1=(source[i]>>16)&0x1F; // MFC1/DMFC1/CFC1 + if(op2>3) dops[i].rs1=(source[i]>>16)&0x1F; // MTC1/DMTC1/CTC1 + dops[i].rs2=CSREG; break; case COP2: - rs1[i]=0; - rs2[i]=0; - rt1[i]=0; - rt2[i]=0; - if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC2/CFC2 - if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC2/CTC2 - rs2[i]=CSREG; + dops[i].rs1=0; + dops[i].rs2=0; + dops[i].rt1=0; + dops[i].rt2=0; + if(op2<3) dops[i].rt1=(source[i]>>16)&0x1F; // MFC2/CFC2 + if(op2>3) dops[i].rs1=(source[i]>>16)&0x1F; // MTC2/CTC2 + dops[i].rs2=CSREG; int gr=(source[i]>>11)&0x1F; switch(op2) { @@ -7483,26 +7391,26 @@ int new_recompile_block(u_int addr) } break; case C1LS: - rs1[i]=(source[i]>>21)&0x1F; - rs2[i]=CSREG; - rt1[i]=0; - rt2[i]=0; + dops[i].rs1=(source[i]>>21)&0x1F; + dops[i].rs2=CSREG; + dops[i].rt1=0; + dops[i].rt2=0; imm[i]=(short)source[i]; break; case C2LS: - rs1[i]=(source[i]>>21)&0x1F; - rs2[i]=0; - rt1[i]=0; - rt2[i]=0; + dops[i].rs1=(source[i]>>21)&0x1F; + dops[i].rs2=0; + dops[i].rt1=0; + dops[i].rt2=0; imm[i]=(short)source[i]; if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2 else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2 break; case C2OP: - rs1[i]=0; - rs2[i]=0; - rt1[i]=0; - rt2[i]=0; + dops[i].rs1=0; + dops[i].rs2=0; + dops[i].rt1=0; + dops[i].rt2=0; gte_rs[i]=gte_reg_reads[source[i]&0x3f]; gte_rt[i]=gte_reg_writes[source[i]&0x3f]; gte_rt[i]|=1ll<<63; // every op changes flags @@ -7516,38 +7424,38 @@ int new_recompile_block(u_int addr) case SYSCALL: case HLECALL: case INTCALL: - rs1[i]=CCREG; - rs2[i]=0; - rt1[i]=0; - rt2[i]=0; + dops[i].rs1=CCREG; + dops[i].rs2=0; + dops[i].rt1=0; + dops[i].rt2=0; break; default: - rs1[i]=0; - rs2[i]=0; - rt1[i]=0; - rt2[i]=0; + dops[i].rs1=0; + dops[i].rs2=0; + dops[i].rt1=0; + dops[i].rt2=0; } /* Calculate branch target addresses */ if(type==UJUMP) ba[i]=((start+i*4+4)&0xF0000000)|(((unsigned int)source[i]<<6)>>4); - else if(type==CJUMP&&rs1[i]==rs2[i]&&(op&1)) + else if(type==CJUMP&&dops[i].rs1==dops[i].rs2&&(op&1)) ba[i]=start+i*4+8; // Ignore never taken branch - else if(type==SJUMP&&rs1[i]==0&&!(op2&1)) + else if(type==SJUMP&&dops[i].rs1==0&&!(op2&1)) ba[i]=start+i*4+8; // Ignore never taken branch else if(type==CJUMP||type==SJUMP) ba[i]=start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14); else ba[i]=-1; /* simplify always (not)taken branches */ - if (type == CJUMP && rs1[i] == rs2[i]) { - rs1[i] = rs2[i] = 0; + if (type == CJUMP && dops[i].rs1 == dops[i].rs2) { + dops[i].rs1 = dops[i].rs2 = 0; if (!(op & 1)) { - itype[i] = type = UJUMP; - rs2[i] = CCREG; + dops[i].itype = type = UJUMP; + dops[i].rs2 = CCREG; } } - else if (type == SJUMP && rs1[i] == 0 && (op2 & 1)) - itype[i] = type = UJUMP; + else if (type == SJUMP && dops[i].rs1 == 0 && (op2 & 1)) + dops[i].itype = type = UJUMP; /* messy cases to just pass over to the interpreter */ if (i > 0 && is_jump(i-1)) { @@ -7559,15 +7467,15 @@ int new_recompile_block(u_int addr) do_in_intrp=1; } // basic load delay detection - else if((type==LOAD||type==LOADLR||type==COP0||type==COP2||type==C2LS)&&rt1[i]!=0) { + else if((type==LOAD||type==LOADLR||type==COP0||type==COP2||type==C2LS)&&dops[i].rt1!=0) { int t=(ba[i-1]-start)/4; - if(0 <= t && t < i &&(rt1[i]==rs1[t]||rt1[i]==rs2[t])&&itype[t]!=CJUMP&&itype[t]!=SJUMP) { + if(0 <= t && t < i &&(dops[i].rt1==dops[t].rs1||dops[i].rt1==dops[t].rs2)&&dops[t].itype!=CJUMP&&dops[t].itype!=SJUMP) { // jump target wants DS result - potential load delay effect SysPrintf("load delay @%08x (%08x)\n", addr + i*4, addr); do_in_intrp=1; - bt[t+1]=1; // expected return from interpreter + dops[t+1].bt=1; // expected return from interpreter } - else if(i>=2&&rt1[i-2]==2&&rt1[i]==2&&rs1[i]!=2&&rs2[i]!=2&&rs1[i-1]!=2&&rs2[i-1]!=2&& + else if(i>=2&&dops[i-2].rt1==2&&dops[i].rt1==2&&dops[i].rs1!=2&&dops[i].rs2!=2&&dops[i-1].rs1!=2&&dops[i-1].rs2!=2&& !(i>=3&&is_jump(i-3))) { // v0 overwrite like this is a sign of trouble, bail out SysPrintf("v0 overwrite @%08x (%08x)\n", addr + i*4, addr); @@ -7575,10 +7483,10 @@ int new_recompile_block(u_int addr) } } if(do_in_intrp) { - rs1[i-1]=CCREG; - rs2[i-1]=rt1[i-1]=rt2[i-1]=0; + dops[i-1].rs1=CCREG; + dops[i-1].rs2=dops[i-1].rt1=dops[i-1].rt2=0; ba[i-1]=-1; - itype[i-1]=INTCALL; + dops[i-1].itype=INTCALL; done=2; i--; // don't compile the DS } @@ -7586,7 +7494,7 @@ int new_recompile_block(u_int addr) /* Is this the end of the block? */ if (i > 0 && is_ujump(i-1)) { - if(rt1[i-1]==0) { // Continue past subroutine call (JAL) + if(dops[i-1].rt1==0) { // Continue past subroutine call (JAL) done=2; } else { @@ -7599,8 +7507,8 @@ int new_recompile_block(u_int addr) // Don't get too close to the limit if(i>MAXBLOCK/2) done=1; } - if(itype[i]==SYSCALL&&stop_after_jal) done=1; - if(itype[i]==HLECALL||itype[i]==INTCALL) done=2; + if(dops[i].itype==SYSCALL&&stop_after_jal) done=1; + if(dops[i].itype==HLECALL||dops[i].itype==INTCALL) done=2; if(done==2) { // Does the block continue due to a branch? for(j=i-1;j>=0;j--) @@ -7615,15 +7523,15 @@ int new_recompile_block(u_int addr) assert(start+i*40); @@ -7650,7 +7558,7 @@ int new_recompile_block(u_int addr) if((u_int)addr&1) { // First instruction is delay slot cc=-1; - bt[1]=1; + dops[1].bt=1; ds=1; unneeded_reg[0]=1; current.regmap[HOST_BTREG]=BTREG; @@ -7658,7 +7566,7 @@ int new_recompile_block(u_int addr) for(i=0;i0&&(itype[i-1]==STORE||itype[i-1]==STORELR||(itype[i-1]==C2LS&&opcode[i-1]==0x3a))&&(u_int)imm[i-1]<0x800) - current.waswritten|=1<=0x800) - current.waswritten&=~(1<0&&(dops[i-1].itype==STORE||dops[i-1].itype==STORELR||(dops[i-1].itype==C2LS&&dops[i-1].opcode==0x3a))&&(u_int)imm[i-1]<0x800) + current.waswritten|=1<=0x800) + current.waswritten&=~(1<0) { current.wasdirty=current.dirty; - switch(itype[i-1]) { + switch(dops[i-1].itype) { case UJUMP: memcpy(&branch_regs[i-1],¤t,sizeof(current)); branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; - branch_regs[i-1].u=branch_unneeded_reg[i-1]&~((1LL<0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i]==SYSCALL||itype[i]==HLECALL)) + if(i>0&&(dops[i-1].itype==RJUMP||dops[i-1].itype==UJUMP||dops[i-1].itype==CJUMP||dops[i-1].itype==SJUMP||dops[i].itype==SYSCALL||dops[i].itype==HLECALL)) { cc=0; } #if !defined(DRC_DBG) - else if(itype[i]==C2OP&>e_cycletab[source[i]&0x3f]>2) + else if(dops[i].itype==C2OP&>e_cycletab[source[i]&0x3f]>2) { // this should really be removed since the real stalls have been implemented, // but doing so causes sizeable perf regression against the older version u_int gtec = gte_cycletab[source[i] & 0x3f]; cc += HACK_ENABLED(NDHACK_NO_STALLS) ? gtec/2 : 2; } - else if(i>1&&itype[i]==STORE&&itype[i-1]==STORE&&itype[i-2]==STORE&&!bt[i]) + else if(i>1&&dops[i].itype==STORE&&dops[i-1].itype==STORE&&dops[i-2].itype==STORE&&!dops[i].bt) { cc+=4; } - else if(itype[i]==C2LS) + else if(dops[i].itype==C2LS) { // same as with C2OP cc += HACK_ENABLED(NDHACK_NO_STALLS) ? 4 : 2; @@ -8267,7 +8175,7 @@ int new_recompile_block(u_int addr) cc++; } - if(!is_ds[i]) { + if(!dops[i].is_ds) { regs[i].dirty=current.dirty; regs[i].isconst=current.isconst; memcpy(constmap[i],current_constmap,sizeof(constmap[i])); @@ -8290,7 +8198,7 @@ int new_recompile_block(u_int addr) for (i=slen-1;i>=0;i--) { int hr; - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP) + if(dops[i].itype==RJUMP||dops[i].itype==UJUMP||dops[i].itype==CJUMP||dops[i].itype==SJUMP) { if(ba[i]=(start+slen*4)) { @@ -8328,28 +8236,28 @@ int new_recompile_block(u_int addr) // Merge in delay slot for(hr=0;hr0&&!bt[i]&&((regs[i].wasdirty>>hr)&1)) { + if(i>0&&!dops[i].bt&&((regs[i].wasdirty>>hr)&1)) { if((regmap_pre[i][hr]>0&&!((unneeded_reg[i]>>regmap_pre[i][hr])&1))) { - if(rt1[i-1]==(regmap_pre[i][hr]&63)) nr|=1<0&&!((unneeded_reg[i]>>regs[i].regmap_entry[hr])&1))) { - if(rt1[i-1]==(regs[i].regmap_entry[hr]&63)) nr|=1<>hr)&1)) { if(regs[i].regmap_entry[hr]!=CCREG) regs[i].regmap_entry[hr]=-1; - if((regs[i].regmap[hr]&63)!=rs1[i] && (regs[i].regmap[hr]&63)!=rs2[i] && - (regs[i].regmap[hr]&63)!=rt1[i] && (regs[i].regmap[hr]&63)!=rt2[i] && + if((regs[i].regmap[hr]&63)!=dops[i].rs1 && (regs[i].regmap[hr]&63)!=dops[i].rs2 && + (regs[i].regmap[hr]&63)!=dops[i].rt1 && (regs[i].regmap[hr]&63)!=dops[i].rt2 && (regs[i].regmap[hr]&63)!=PTEMP && (regs[i].regmap[hr]&63)!=CCREG) { if (!is_ujump(i)) { - if(likely[i]) { + if(dops[i].likely) { regs[i].regmap[hr]=-1; regs[i].isconst&=~(1<0) { int map=-1,temp=-1; - if(itype[i]==STORE || itype[i]==STORELR || - (opcode[i]&0x3b)==0x39 || (opcode[i]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2 + if(dops[i].itype==STORE || dops[i].itype==STORELR || + (dops[i].opcode&0x3b)==0x39 || (dops[i].opcode&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2 map=INVCP; } - if(itype[i]==LOADLR || itype[i]==STORELR || - itype[i]==C1LS || itype[i]==C2LS) + if(dops[i].itype==LOADLR || dops[i].itype==STORELR || + dops[i].itype==C1LS || dops[i].itype==C2LS) temp=FTEMP; - if((regs[i].regmap[hr]&63)!=rt1[i] && (regs[i].regmap[hr]&63)!=rt2[i] && - regs[i].regmap[hr]!=rs1[i] && regs[i].regmap[hr]!=rs2[i] && + if((regs[i].regmap[hr]&63)!=dops[i].rt1 && (regs[i].regmap[hr]&63)!=dops[i].rt2 && + regs[i].regmap[hr]!=dops[i].rs1 && regs[i].regmap[hr]!=dops[i].rs2 && (regs[i].regmap[hr]&63)!=temp && regs[i].regmap[hr]!=map && - (itype[i]!=SPAN||regs[i].regmap[hr]!=CCREG)) + (dops[i].itype!=SPAN||regs[i].regmap[hr]!=CCREG)) { - if(i0) if(regmap_pre[i+1][hr]!=regs[i].regmap[hr]) @@ -8515,18 +8423,18 @@ int new_recompile_block(u_int addr) clear_all_regs(f_regmap); for(i=0;i=start && ba[i]<(start+i*4)) - if(itype[i+1]==NOP||itype[i+1]==MOV||itype[i+1]==ALU - ||itype[i+1]==SHIFTIMM||itype[i+1]==IMM16||itype[i+1]==LOAD - ||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS - ||itype[i+1]==SHIFT||itype[i+1]==COP1 - ||itype[i+1]==COP2||itype[i+1]==C2LS||itype[i+1]==C2OP) + if(dops[i+1].itype==NOP||dops[i+1].itype==MOV||dops[i+1].itype==ALU + ||dops[i+1].itype==SHIFTIMM||dops[i+1].itype==IMM16||dops[i+1].itype==LOAD + ||dops[i+1].itype==STORE||dops[i+1].itype==STORELR||dops[i+1].itype==C1LS + ||dops[i+1].itype==SHIFT||dops[i+1].itype==COP1 + ||dops[i+1].itype==COP2||dops[i+1].itype==C2LS||dops[i+1].itype==C2OP) { int t=(ba[i]-start)>>2; - if(t>0&&(itype[t-1]!=UJUMP&&itype[t-1]!=RJUMP&&itype[t-1]!=CJUMP&&itype[t-1]!=SJUMP)) // loop_preload can't handle jumps into delay slots - if(t<2||(itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||rt1[t-2]!=31) // call/ret assumes no registers allocated + if(t>0&&(dops[t-1].itype!=UJUMP&&dops[t-1].itype!=RJUMP&&dops[t-1].itype!=CJUMP&&dops[t-1].itype!=SJUMP)) // loop_preload can't handle jumps into delay slots + if(t<2||(dops[t-2].itype!=UJUMP&&dops[t-2].itype!=RJUMP)||dops[t-2].rt1!=31) // call/ret assumes no registers allocated for(hr=0;hr=0) { @@ -8553,7 +8461,7 @@ int new_recompile_block(u_int addr) f_regmap[hr]=branch_regs[i].regmap[hr]; } } - if(ooo[i]) { + if(dops[i].ooo) { if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; }else{ @@ -8598,12 +8506,12 @@ int new_recompile_block(u_int addr) //printf("no-match due to different register\n"); break; } - if(itype[k-2]==UJUMP||itype[k-2]==RJUMP||itype[k-2]==CJUMP||itype[k-2]==SJUMP) { + if(dops[k-2].itype==UJUMP||dops[k-2].itype==RJUMP||dops[k-2].itype==CJUMP||dops[k-2].itype==SJUMP) { //printf("no-match due to branch\n"); break; } // call/ret fast path assumes no registers allocated - if(k>2&&(itype[k-3]==UJUMP||itype[k-3]==RJUMP)&&rt1[k-3]==31) { + if(k>2&&(dops[k-3].itype==UJUMP||dops[k-3].itype==RJUMP)&&dops[k-3].rt1==31) { break; } assert(r < 64); @@ -8662,7 +8570,7 @@ int new_recompile_block(u_int addr) regs[k].dirty&=~(1<=0) + if(dops[i+1].rs1) { + if((hr=get_reg(regs[i+1].regmap,dops[i+1].rs1))>=0) { if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { @@ -8829,8 +8737,8 @@ int new_recompile_block(u_int addr) } } } - if(rs2[i+1]) { - if((hr=get_reg(regs[i+1].regmap,rs2[i+1]))>=0) + if(dops[i+1].rs2) { + if((hr=get_reg(regs[i+1].regmap,dops[i+1].rs2))>=0) { if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { @@ -8846,14 +8754,14 @@ int new_recompile_block(u_int addr) } } // Preload target address for load instruction (non-constant) - if(itype[i+1]==LOAD&&rs1[i+1]&&get_reg(regs[i+1].regmap,rs1[i+1])<0) { - if((hr=get_reg(regs[i+1].regmap,rt1[i+1]))>=0) + if(dops[i+1].itype==LOAD&&dops[i+1].rs1&&get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { + if((hr=get_reg(regs[i+1].regmap,dops[i+1].rt1))>=0) { if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { - regs[i].regmap[hr]=rs1[i+1]; - regmap_pre[i+1][hr]=rs1[i+1]; - regs[i+1].regmap_entry[hr]=rs1[i+1]; + regs[i].regmap[hr]=dops[i+1].rs1; + regmap_pre[i+1][hr]=dops[i+1].rs1; + regs[i+1].regmap_entry[hr]=dops[i+1].rs1; regs[i].isconst&=~(1<=0) + if(dops[i+1].lt1&&get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { + if((hr=get_reg(regs[i+1].regmap,dops[i+1].rt1))>=0) { if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { - regs[i].regmap[hr]=rs1[i+1]; - regmap_pre[i+1][hr]=rs1[i+1]; - regs[i+1].regmap_entry[hr]=rs1[i+1]; + regs[i].regmap[hr]=dops[i+1].rs1; + regmap_pre[i+1][hr]=dops[i+1].rs1; + regs[i+1].regmap_entry[hr]=dops[i+1].rs1; regs[i].isconst&=~(1<=0); if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { - regs[i].regmap[hr]=rs1[i+1]; - regmap_pre[i+1][hr]=rs1[i+1]; - regs[i+1].regmap_entry[hr]=rs1[i+1]; + regs[i].regmap[hr]=dops[i+1].rs1; + regmap_pre[i+1][hr]=dops[i+1].rs1; + regs[i+1].regmap_entry[hr]=dops[i+1].rs1; regs[i].isconst&=~(1<=0); if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { - regs[i].regmap[hr]=rs1[i+1]; - regmap_pre[i+1][hr]=rs1[i+1]; - regs[i+1].regmap_entry[hr]=rs1[i+1]; + regs[i].regmap[hr]=dops[i+1].rs1; + regmap_pre[i+1][hr]=dops[i+1].rs1; + regs[i+1].regmap_entry[hr]=dops[i+1].rs1; regs[i].isconst&=~(1<=0&®s[i].regmap[hr]<0) { - int rs=get_reg(regs[i+1].regmap,rs1[i+1]); + int rs=get_reg(regs[i+1].regmap,dops[i+1].rs1); if(rs>=0&&((regs[i+1].wasconst>>rs)&1)) { regs[i].regmap[hr]=AGEN1+((i+1)&1); regmap_pre[i+1][hr]=AGEN1+((i+1)&1); @@ -8967,19 +8875,19 @@ int new_recompile_block(u_int addr) /* Pass 7 - Identify 32-bit registers */ for (i=slen-1;i>=0;i--) { - if(itype[i]==CJUMP||itype[i]==SJUMP) + if(dops[i].itype==CJUMP||dops[i].itype==SJUMP) { // Conditional branch if((source[i]>>16)!=0x1000&&i 0); - if (itype[slen-1] == INTCALL) { + if (slen > 0 && dops[slen-1].itype == INTCALL) { // no ending needed for this block since INTCALL never returns } // If the block did not end with an unconditional branch, // add a jump to the next instruction. else if (i > 1) { - if(!is_ujump(i-2)&&itype[i-1]!=SPAN) { - assert(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP); + if(!is_ujump(i-2)&&dops[i-1].itype!=SPAN) { + assert(dops[i-1].itype!=UJUMP&&dops[i-1].itype!=CJUMP&&dops[i-1].itype!=SJUMP&&dops[i-1].itype!=RJUMP); assert(i==slen); - if(itype[i-2]!=CJUMP&&itype[i-2]!=SJUMP) { + if(dops[i-2].itype!=CJUMP&&dops[i-2].itype!=SJUMP) { store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG); } - else if(!likely[i-2]) + else if(!dops[i-2].likely) { store_regs_bt(branch_regs[i-2].regmap,branch_regs[i-2].dirty,start+i*4); assert(branch_regs[i-2].regmap[HOST_CCREG]==CCREG); @@ -9301,7 +9209,7 @@ int new_recompile_block(u_int addr) else { assert(i>0); - assert(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP); + assert(dops[i-1].itype!=UJUMP&&dops[i-1].itype!=CJUMP&&dops[i-1].itype!=SJUMP&&dops[i-1].itype!=RJUMP); store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); @@ -9374,7 +9282,7 @@ int new_recompile_block(u_int addr) } u_int source_len = slen*4; - if (itype[slen-1] == INTCALL && source_len > 4) + if (dops[slen-1].itype == INTCALL && source_len > 4) // no need to treat the last instruction as compiled // as interpreter fully handles it source_len -= 4; @@ -9385,7 +9293,7 @@ int new_recompile_block(u_int addr) // External Branch Targets (jump_in) for(i=0;i Date: Mon, 22 Nov 2021 23:18:56 +0200 Subject: [PATCH 085/597] drc: drop 'likely' branch support, simplify not in MIPS1 --- libpcsxcore/new_dynarec/new_dynarec.c | 254 ++++++++------------------ 1 file changed, 79 insertions(+), 175 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 72f18bff1..7e9fa1e1c 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -172,9 +172,10 @@ static struct decoded_insn u_char rt2; u_char lt1; u_char bt:1; - u_char likely:1; u_char ooo:1; u_char is_ds:1; + u_char is_jump:1; + u_char is_ujump:1; } dops[MAXBLOCK]; // used by asm: @@ -475,18 +476,6 @@ static int CLOCK_ADJUST(int x) return (x * m + s * 50) / 100; } -// is the op an unconditional jump? -static int is_ujump(int i) -{ - return dops[i].itype == UJUMP || dops[i].itype == RJUMP - || (source[i] >> 16) == 0x1000; // beq r0, r0, offset // b offset -} - -static int is_jump(int i) -{ - return dops[i].itype == RJUMP || dops[i].itype == UJUMP || dops[i].itype == CJUMP || dops[i].itype == SJUMP; -} - static int ds_writes_rjump_rs(int i) { return dops[i].rs1 != 0 && (dops[i].rs1 == dops[i+1].rt1 || dops[i].rs1 == dops[i+1].rt2); @@ -699,7 +688,7 @@ void lsn(u_char hsn[], int i, int *preferred_reg) j=slen-i-1; break; } - if (is_ujump(i+j)) + if (dops[i+j].is_ujump) { // Don't go past an unconditonal jump j++; @@ -747,7 +736,7 @@ void lsn(u_char hsn[], int i, int *preferred_reg) // TODO: preferred register based on backward branch } // Delay slot should preferably not overwrite branch conditions or cycle count - if (i > 0 && is_jump(i-1)) { + if (i > 0 && dops[i-1].is_jump) { if(dops[i-1].rs1) if(hsn[dops[i-1].rs1]>1) hsn[dops[i-1].rs1]=1; if(dops[i-1].rs2) if(hsn[dops[i-1].rs2]>1) hsn[dops[i-1].rs2]=1; hsn[CCREG]=1; @@ -782,7 +771,7 @@ int needed_again(int r, int i) int b=-1; int rn=10; - if (i > 0 && is_ujump(i-1)) + if (i > 0 && dops[i-1].is_ujump) { if(ba[i-1]start+slen*4-4) return 0; // Don't need any registers if exiting the block @@ -793,7 +782,7 @@ int needed_again(int r, int i) j=slen-i-1; break; } - if (is_ujump(i+j)) + if (dops[i+j].is_ujump) { // Don't go past an unconditonal jump j++; @@ -830,7 +819,7 @@ int loop_reg(int i, int r, int hr) j=slen-i-1; break; } - if (is_ujump(i+j)) + if (dops[i+j].is_ujump) { // Don't go past an unconditonal jump j++; @@ -1456,7 +1445,7 @@ static void alloc_reg(struct regstat *cur,int i,signed char reg) // Don't evict the cycle count at entry points, otherwise the entry // stub will have to write it. if(dops[i].bt&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(dops[i-2].itype==RJUMP||dops[i-2].itype==UJUMP||dops[i-2].itype==CJUMP||dops[i-2].itype==SJUMP)) hsn[CCREG]=2; + if (i>1 && hsn[CCREG] > 2 && dops[i-2].is_jump) hsn[CCREG]=2; for(j=10;j>=3;j--) { // Alloc preferred register if available @@ -1562,7 +1551,7 @@ static void alloc_reg_temp(struct regstat *cur,int i,signed char reg) // Don't evict the cycle count at entry points, otherwise the entry // stub will have to write it. if(dops[i].bt&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(dops[i-2].itype==RJUMP||dops[i-2].itype==UJUMP||dops[i-2].itype==CJUMP||dops[i-2].itype==SJUMP)) hsn[CCREG]=2; + if (i>1 && hsn[CCREG] > 2 && dops[i-2].is_jump) hsn[CCREG]=2; for(j=10;j>=3;j--) { for(r=1;r<=MAXREG;r++) @@ -3396,7 +3385,7 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u for (j = i + 1; j < slen; j++) { if (cop2_is_stalling_op(j, &other_gte_op_cycles)) break; - if (is_jump(j)) { + if (dops[j].is_jump) { // check ds if (j + 1 < slen && cop2_is_stalling_op(j + 1, &other_gte_op_cycles)) j++; @@ -3454,7 +3443,7 @@ static void multdiv_prepare_stall(int i, const struct regstat *i_regs) break; if ((found = is_mflohi(j))) break; - if (is_jump(j)) { + if (dops[j].is_jump) { // check ds if (j + 1 < slen && (found = is_mflohi(j + 1))) j++; @@ -4210,12 +4199,12 @@ static int get_final_value(int hr, int i, int *value) i++; } if(i>hr)&1)) { @@ -4525,7 +4514,7 @@ static int match_bt(signed char i_regmap[],uint64_t i_dirty,int addr) } } // Delay slots are not valid branch targets - //if(t>0&&(dops[t-1].itype==RJUMP||dops[t-1].itype==UJUMP||dops[t-1].itype==CJUMP||dops[t-1].itype==SJUMP)) return 0; + //if(t>0&&(dops[t-1].is_jump) return 0; // Delay slots require additional processing, so do not match if(dops[t].is_ds) return 0; } @@ -5338,9 +5327,6 @@ static void cjump_assemble(int i,struct regstat *i_regs) else { // In-order execution (branch first) - //if(dops[i].likely) printf("IOL\n"); - //else - //printf("IOE\n"); void *taken = NULL, *nottaken = NULL, *nottaken1 = NULL; if(!unconditional&&!nop) { //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); @@ -5414,15 +5400,13 @@ static void cjump_assemble(int i,struct regstat *i_regs) if(nottaken1) set_jump_target(nottaken1, out); set_jump_target(nottaken, out); assem_debug("2:\n"); - if(!dops[i].likely) { - wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,ds_unneeded); - load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2); - address_generation(i+1,&branch_regs[i],0); - load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG); - ds_assemble(i+1,&branch_regs[i]); - } + wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,ds_unneeded); + load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2); + address_generation(i+1,&branch_regs[i],0); + load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG); + ds_assemble(i+1,&branch_regs[i]); cc=get_reg(branch_regs[i].regmap,CCREG); - if(cc==-1&&!dops[i].likely) { + if (cc == -1) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); @@ -5437,7 +5421,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); void *jaddr=out; emit_jns(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,dops[i].likely?NULLDS:NOTTAKEN,0); + add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); } } } @@ -5681,15 +5665,13 @@ static void sjump_assemble(int i,struct regstat *i_regs) if(!unconditional) { set_jump_target(nottaken, out); assem_debug("1:\n"); - if(!dops[i].likely) { - wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,ds_unneeded); - load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2); - address_generation(i+1,&branch_regs[i],0); - load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG); - ds_assemble(i+1,&branch_regs[i]); - } + wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,ds_unneeded); + load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2); + address_generation(i+1,&branch_regs[i],0); + load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG); + ds_assemble(i+1,&branch_regs[i]); cc=get_reg(branch_regs[i].regmap,CCREG); - if(cc==-1&&!dops[i].likely) { + if (cc == -1) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); @@ -5704,7 +5686,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); void *jaddr=out; emit_jns(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,dops[i].likely?NULLDS:NOTTAKEN,0); + add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); } } } @@ -5894,7 +5876,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) assert(i_regs->regmap[HOST_CCREG]==CCREG); wb_dirtys(regs[i].regmap,regs[i].dirty); - if(dops[i].likely||unconditional) + if(unconditional) { emit_movimm(ba[i],HOST_BTREG); } @@ -5913,22 +5895,6 @@ static void pagespan_assemble(int i,struct regstat *i_regs) add_jump_out(target_addr,stub); } else set_jump_target(branch_addr, stub); - if(dops[i].likely) { - // Not-taken path - set_jump_target(nottaken, out); - wb_dirtys(regs[i].regmap,regs[i].dirty); - void *branch_addr=out; - emit_jmp(0); - int target_addr=start+i*4+8; - void *stub=out; - void *compiled_target_addr=check_addr(target_addr); - emit_extjump_ds(branch_addr, target_addr); - if(compiled_target_addr) { - set_jump_target(branch_addr, compiled_target_addr); - add_jump_out(target_addr,stub); - } - else set_jump_target(branch_addr, stub); - } } // Assemble the delay slot for the above @@ -6041,7 +6007,7 @@ void unneeded_registers(int istart,int iend,int r) for (i=iend;i>=istart;i--) { //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r); - if(dops[i].itype==RJUMP||dops[i].itype==UJUMP||dops[i].itype==CJUMP||dops[i].itype==SJUMP) + if(dops[i].is_jump) { // If subroutine call, flag return address as a possible branch target if(dops[i].rt1==31 && i>2].bt=1; if(ba[i]<=start+i*4) { // Backward branch - if(is_ujump(i)) + if(dops[i].is_ujump) { // Unconditional branch temp_u=1; @@ -6094,19 +6047,6 @@ void unneeded_registers(int istart,int iend,int r) temp_u|=1; temp_gte_u|=gte_rt[i+1]; temp_gte_u&=~gte_rs[i+1]; - // If branch is "likely" (and conditional) - // then we skip the delay slot on the fall-thru path - if(dops[i].likely) { - if(i>2]=gte_u_unknown; } } /*else*/ if(1) { - if (is_ujump(i)) + if (dops[i].is_ujump) { // Unconditional branch u=unneeded_reg[(ba[i]-start)>>2]; @@ -6146,19 +6086,8 @@ void unneeded_registers(int istart,int iend,int r) b|=1; gte_b|=gte_rt[i+1]; gte_b&=~gte_rs[i+1]; - // If branch is "likely" then we skip the - // delay slot on the fall-thru path - if(dops[i].likely) { - u=b; - gte_u=gte_b; - if(i=istart;i--) { - if(dops[i].itype==RJUMP||dops[i].itype==UJUMP||dops[i].itype==CJUMP||dops[i].itype==SJUMP) + if(dops[i].is_jump) { if(ba[i]=(start+slen*4)) { // Branch out of this block, flush all regs - if (is_ujump(i)) + if (dops[i].is_ujump) { // Unconditional branch will_dirty_i=0; @@ -6266,7 +6195,7 @@ void clean_registers(int istart,int iend,int wr) // Merge in delay slot (will dirty) for(r=0;ristart) { - if(dops[i].itype!=RJUMP&&dops[i].itype!=UJUMP&&dops[i].itype!=CJUMP&&dops[i].itype!=SJUMP) + if (!dops[i].is_jump) { // Don't store a register immediately after writing it, // may prevent dual-issue. @@ -6557,9 +6486,9 @@ void clean_registers(int istart,int iend,int wr) regs[i].dirty|=will_dirty_i; #ifndef DESTRUCTIVE_WRITEBACK regs[i].dirty&=wont_dirty_i; - if(dops[i].itype==RJUMP||dops[i].itype==UJUMP||dops[i].itype==CJUMP||dops[i].itype==SJUMP) + if(dops[i].is_jump) { - if (i < iend-1 && !is_ujump(i)) { + if (i < iend-1 && !dops[i].is_ujump) { for(r=0;r>4)?1:0; break; case SJUMP: dops[i].rs1=(source[i]>>21)&0x1f; @@ -7313,7 +7240,6 @@ int new_recompile_block(u_int addr) dops[i].rt1=31; // NOTE: If the branch is not taken, r31 is still overwritten } - dops[i].likely=(op2&2)?1:0; break; case ALU: dops[i].rs1=(source[i]>>21)&0x1f; // source @@ -7457,11 +7383,14 @@ int new_recompile_block(u_int addr) else if (type == SJUMP && dops[i].rs1 == 0 && (op2 & 1)) dops[i].itype = type = UJUMP; + dops[i].is_jump = (dops[i].itype == RJUMP || dops[i].itype == UJUMP || dops[i].itype == CJUMP || dops[i].itype == SJUMP); + dops[i].is_ujump = (dops[i].itype == RJUMP || dops[i].itype == UJUMP); // || (source[i] >> 16) == 0x1000 // beq r0,r0 + /* messy cases to just pass over to the interpreter */ - if (i > 0 && is_jump(i-1)) { + if (i > 0 && dops[i-1].is_jump) { int do_in_intrp=0; // branch in delay slot? - if (is_jump(i)) { + if (dops[i].is_jump) { // don't handle first branch and call interpreter if it's hit SysPrintf("branch in delay slot @%08x (%08x)\n", addr + i*4, addr); do_in_intrp=1; @@ -7476,7 +7405,7 @@ int new_recompile_block(u_int addr) dops[t+1].bt=1; // expected return from interpreter } else if(i>=2&&dops[i-2].rt1==2&&dops[i].rt1==2&&dops[i].rs1!=2&&dops[i].rs2!=2&&dops[i-1].rs1!=2&&dops[i-1].rs2!=2&& - !(i>=3&&is_jump(i-3))) { + !(i>=3&&dops[i-3].is_jump)) { // v0 overwrite like this is a sign of trouble, bail out SysPrintf("v0 overwrite @%08x (%08x)\n", addr + i*4, addr); do_in_intrp=1; @@ -7493,7 +7422,7 @@ int new_recompile_block(u_int addr) } /* Is this the end of the block? */ - if (i > 0 && is_ujump(i-1)) { + if (i > 0 && dops[i-1].is_ujump) { if(dops[i-1].rt1==0) { // Continue past subroutine call (JAL) done=2; } @@ -7529,7 +7458,7 @@ int new_recompile_block(u_int addr) } } slen=i; - if(dops[i-1].itype==UJUMP||dops[i-1].itype==CJUMP||dops[i-1].itype==SJUMP||dops[i-1].itype==RJUMP) { + if (dops[i-1].is_jump) { if(start+i*4==pagelimit) { dops[i-1].itype=SPAN; } @@ -7582,7 +7511,7 @@ int new_recompile_block(u_int addr) regs[i].wasconst=current.isconst; regs[i].wasdirty=current.dirty; regs[i].loadedconst=0; - if(dops[i].itype!=UJUMP&&dops[i].itype!=CJUMP&&dops[i].itype!=SJUMP&&dops[i].itype!=RJUMP) { + if (!dops[i].is_jump) { if(i+10&&(dops[i-1].itype==RJUMP||dops[i-1].itype==UJUMP||dops[i-1].itype==CJUMP||dops[i-1].itype==SJUMP||dops[i].itype==SYSCALL||dops[i].itype==HLECALL)) + if (i > 0 && (dops[i-1].is_jump || dops[i].itype == SYSCALL || dops[i].itype == HLECALL)) { cc=0; } @@ -8198,7 +8127,7 @@ int new_recompile_block(u_int addr) for (i=slen-1;i>=0;i--) { int hr; - if(dops[i].itype==RJUMP||dops[i].itype==UJUMP||dops[i].itype==CJUMP||dops[i].itype==SJUMP) + if(dops[i].is_jump) { if(ba[i]=(start+slen*4)) { @@ -8219,7 +8148,7 @@ int new_recompile_block(u_int addr) } } // Conditional branch may need registers for following instructions - if (!is_ujump(i)) + if (!dops[i].is_ujump) { if(i>hr)&1)) { if(regs[i].regmap_entry[hr]!=CCREG) regs[i].regmap_entry[hr]=-1; - if((regs[i].regmap[hr]&63)!=dops[i].rs1 && (regs[i].regmap[hr]&63)!=dops[i].rs2 && - (regs[i].regmap[hr]&63)!=dops[i].rt1 && (regs[i].regmap[hr]&63)!=dops[i].rt2 && - (regs[i].regmap[hr]&63)!=PTEMP && (regs[i].regmap[hr]&63)!=CCREG) - { - if (!is_ujump(i)) - { - if(dops[i].likely) { - regs[i].regmap[hr]=-1; - regs[i].isconst&=~(1<>2; - if(t>0&&(dops[t-1].itype!=UJUMP&&dops[t-1].itype!=RJUMP&&dops[t-1].itype!=CJUMP&&dops[t-1].itype!=SJUMP)) // loop_preload can't handle jumps into delay slots + if(t > 0 && !dops[t-1].is_jump) // loop_preload can't handle jumps into delay slots if(t<2||(dops[t-2].itype!=UJUMP&&dops[t-2].itype!=RJUMP)||dops[t-2].rt1!=31) // call/ret assumes no registers allocated for(hr=0;hr 1) { - if(!is_ujump(i-2)&&dops[i-1].itype!=SPAN) { - assert(dops[i-1].itype!=UJUMP&&dops[i-1].itype!=CJUMP&&dops[i-1].itype!=SJUMP&&dops[i-1].itype!=RJUMP); + if (!dops[i-2].is_ujump && dops[i-1].itype != SPAN) { + assert(!dops[i-1].is_jump); assert(i==slen); if(dops[i-2].itype!=CJUMP&&dops[i-2].itype!=SJUMP) { store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*4); @@ -9192,16 +9101,11 @@ int new_recompile_block(u_int addr) emit_loadreg(CCREG,HOST_CCREG); emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG); } - else if(!dops[i-2].likely) + else { store_regs_bt(branch_regs[i-2].regmap,branch_regs[i-2].dirty,start+i*4); assert(branch_regs[i-2].regmap[HOST_CCREG]==CCREG); } - else - { - store_regs_bt(regs[i-2].regmap,regs[i-2].dirty,start+i*4); - assert(regs[i-2].regmap[HOST_CCREG]==CCREG); - } add_to_linker(out,start+i*4,0); emit_jmp(0); } @@ -9209,7 +9113,7 @@ int new_recompile_block(u_int addr) else { assert(i>0); - assert(dops[i-1].itype!=UJUMP&&dops[i-1].itype!=CJUMP&&dops[i-1].itype!=SJUMP&&dops[i-1].itype!=RJUMP); + assert(!dops[i-1].is_jump); store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); From 9f84fc93d514007355944bd8694c92391a7ae609 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 23 Nov 2021 01:48:53 +0200 Subject: [PATCH 086/597] interpreter: simplify icache implementation Should be more accurate too. --- frontend/menu.c | 5 +- libpcsxcore/psxinterpreter.c | 107 +++++++++-------------------------- 2 files changed, 29 insertions(+), 83 deletions(-) diff --git a/frontend/menu.c b/frontend/menu.c index 289c30c5b..fbcda2d86 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1598,9 +1598,8 @@ static const char h_cfg_nodrc[] = "Disable dynamic recompiler and use interpret "Might be useful to overcome some dynarec bugs"; #endif static const char h_cfg_shacks[] = "Breaks games but may give better performance"; -static const char h_cfg_icache[] = "Support F1 games (only when dynarec is off).\n" - "Note: This breaks the PAL version of Spyro 2."; - +static const char h_cfg_icache[] = "Support F1 games (only when dynarec is off)"; + static menu_entry e_menu_adv_options[] = { mee_onoff_h ("Show CPU load", 0, g_opts, OPT_SHOWCPU, h_cfg_cpul), diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index d2225c417..f7898e9a9 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -59,56 +59,38 @@ static u32 fetchNoCache(u32 pc) Formula One 2001 : Use old CPU cache code when the RAM location is updated with new code (affects in-game racing) */ -static u8* ICache_Addr; -static u8* ICache_Code; +static struct cache_entry { + u32 tag; + u32 data[4]; +} ICache[256]; + static u32 fetchICache(u32 pc) { - uint32_t pc_bank, pc_offset, pc_cache; - uint8_t *IAddr, *ICode; - - pc_bank = pc >> 24; - pc_offset = pc & 0xffffff; - pc_cache = pc & 0xfff; - - IAddr = ICache_Addr; - ICode = ICache_Code; - - // cached - RAM - if (pc_bank == 0x80 || pc_bank == 0x00) + // cached? + if (pc < 0xa0000000) { - if (SWAP32(*(uint32_t *)(IAddr + pc_cache)) == pc_offset) - { - // Cache hit - return last opcode used - return *(uint32_t *)(ICode + pc_cache); - } - else + // this is not how the hardware works but whatever + struct cache_entry *entry = &ICache[(pc & 0xff0) >> 4]; + + if (((entry->tag ^ pc) & 0xfffffff0) != 0 || pc < entry->tag) { - // Cache miss - addresses don't match - // - default: 0xffffffff (not init) - - // cache line is 4 bytes wide - pc_offset &= ~0xf; - pc_cache &= ~0xf; - - // address line - *(uint32_t *)(IAddr + pc_cache + 0x0) = SWAP32(pc_offset + 0x0); - *(uint32_t *)(IAddr + pc_cache + 0x4) = SWAP32(pc_offset + 0x4); - *(uint32_t *)(IAddr + pc_cache + 0x8) = SWAP32(pc_offset + 0x8); - *(uint32_t *)(IAddr + pc_cache + 0xc) = SWAP32(pc_offset + 0xc); - - // opcode line - pc_offset = pc & ~0xf; - *(uint32_t *)(ICode + pc_cache + 0x0) = psxMu32ref(pc_offset + 0x0); - *(uint32_t *)(ICode + pc_cache + 0x4) = psxMu32ref(pc_offset + 0x4); - *(uint32_t *)(ICode + pc_cache + 0x8) = psxMu32ref(pc_offset + 0x8); - *(uint32_t *)(ICode + pc_cache + 0xc) = psxMu32ref(pc_offset + 0xc); + u32 *code = (u32 *)PSXM(pc & ~0x0f); + if (!code) + return 0; + + entry->tag = pc; + // treat as 4 words, although other configurations are said to be possible + switch (pc & 0x0c) + { + case 0x00: entry->data[0] = SWAP32(code[0]); + case 0x04: entry->data[1] = SWAP32(code[1]); + case 0x08: entry->data[2] = SWAP32(code[2]); + case 0x0c: entry->data[3] = SWAP32(code[3]); + } } + return entry->data[(pc & 0x0f) >> 2]; } - /* - TODO: Probably should add cached BIOS - */ - // default return fetchNoCache(pc); } @@ -1027,34 +1009,11 @@ void (*psxCP2BSC[32])() = { /////////////////////////////////////////// static int intInit() { - /* We have to allocate the icache memory even if - * the user has not enabled it as otherwise it can cause issues. - */ - if (!ICache_Addr) - { - ICache_Addr = malloc(0x1000); - if (!ICache_Addr) - { - return -1; - } - } - - if (!ICache_Code) - { - ICache_Code = malloc(0x1000); - if (!ICache_Code) - { - return -1; - } - } - memset(ICache_Addr, 0xff, 0x1000); - memset(ICache_Code, 0xff, 0x1000); return 0; } static void intReset() { - memset(ICache_Addr, 0xff, 0x1000); - memset(ICache_Code, 0xff, 0x1000); + memset(&ICache, 0xff, sizeof(ICache)); } void intExecute() { @@ -1075,8 +1034,7 @@ void intNotify (int note, void *data) { /* Gameblabla - Only clear the icache if it's isolated */ if (note == R3000ACPU_NOTIFY_CACHE_ISOLATED) { - memset(ICache_Addr, 0xff, 0x1000); - memset(ICache_Code, 0xff, 0x1000); + memset(&ICache, 0xff, sizeof(ICache)); } } @@ -1122,17 +1080,6 @@ void intApplyConfig() { } static void intShutdown() { - if (ICache_Addr) - { - free(ICache_Addr); - ICache_Addr = NULL; - } - - if (ICache_Code) - { - free(ICache_Code); - ICache_Code = NULL; - } } // interpreter execution From 39b71d9abccb93457f17df887e50507a91390a78 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 24 Nov 2021 21:17:47 +0200 Subject: [PATCH 087/597] drc: add a hack for f1 games Quite fragile but maybe good enough. What the games do seems to be deliberate to break emulators. It takes the address of some internal function (let's call it f1) and calculates an address add2 in such a way that f1 can't evict code at addr2. It then writes a 4 instruction code piece f2 that just loads an address from stack (which happens to be stacked ra) and jumps to it. f1 then gets called, loads data (?) and overwrites f2 doing it and returns. Right after that f2 gets called again. --- libpcsxcore/new_dynarec/assem_arm.c | 15 ++++++++++ libpcsxcore/new_dynarec/assem_arm64.c | 3 ++ libpcsxcore/new_dynarec/linkage_arm64.S | 4 +++ libpcsxcore/new_dynarec/new_dynarec.c | 40 +++++++++++++++++++++++++ 4 files changed, 62 insertions(+) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 4ff1afd67..c7bd5cc27 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -441,6 +441,13 @@ static void emit_add(int rs1,int rs2,int rt) output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2)); } +static void emit_adds(int rs1,int rs2,int rt) +{ + assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2)); +} +#define emit_adds_ptr emit_adds + static void emit_adcs(int rs1,int rs2,int rt) { assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); @@ -1160,6 +1167,13 @@ static void emit_readword_dualindexedx4(int rs1, int rs2, int rt) assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100); } +#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4 + +static void emit_ldr_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)); +} static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt) { @@ -1253,6 +1267,7 @@ static void emit_readword(void *addr, int rt) assem_debug("ldr %s,fp+%d\n",regname[rt],offset); output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset); } +#define emit_readptr emit_readword static void emit_writeword_indexed(int rt, int offset, int rs) { diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index ea1b8a318..63c6866b3 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -330,6 +330,7 @@ static void emit_adds64(u_int rs1, u_int rs2, u_int rt) assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]); output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt)); } +#define emit_adds_ptr emit_adds64 static void emit_neg(u_int rs, u_int rt) { @@ -434,6 +435,7 @@ static void emit_readdword(void *addr, u_int rt) else abort(); } +#define emit_readptr emit_readdword static void emit_readshword(void *addr, u_int rt) { @@ -1047,6 +1049,7 @@ static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt) assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]); output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt)); } +#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt) { diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 249fecbcb..6f32fa3b6 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -24,6 +24,10 @@ #include "assem_arm64.h" #include "linkage_offsets.h" +#if (LO_mem_wtab & 7) +#error misligned pointers +#endif + .bss .align 4 .global dynarec_local diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 7e9fa1e1c..a8fdf477c 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -225,6 +225,7 @@ static struct decoded_insn static void *copy; static int expirep; static u_int stop_after_jal; + static u_int f1_hack; // 0 - off, ~0 - capture address, else addr #ifndef RAM_FIXED static uintptr_t ram_offset; #else @@ -6700,6 +6701,7 @@ void new_dynarec_clear_full(void) literalcount=0; stop_after_jal=0; inv_code_start=inv_code_end=~0; + f1_hack=0; // TLB for(n=0;n<4096;n++) ll_clear(jump_in+n); for(n=0;n<4096;n++) ll_clear(jump_out+n); @@ -6945,6 +6947,27 @@ int new_recompile_block(u_int addr) ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning); return 0; } + else if (f1_hack == ~0u || (f1_hack != 0 && start == f1_hack)) { + void *beginning = start_block(); + u_int page = get_page(start); + emit_readword(&psxRegs.GPR.n.sp, 0); + emit_readptr(&mem_rtab, 1); + emit_shrimm(0, 12, 2); + emit_readptr_dualindexedx_ptrlen(1, 2, 1); + emit_addimm(0, 0x18, 0); + emit_adds_ptr(1, 1, 1); + emit_ldr_dualindexed(1, 0, 0); + emit_writeword(0, &psxRegs.GPR.r[26]); // lw k0, 0x18(sp) + emit_far_call(get_addr_ht); + emit_jmpreg(0); // jr k0 + literal_pool(0); + end_block(beginning); + + ll_add_flags(jump_in + page, start, state_rflags, beginning); + SysPrintf("F1 hack to %08x\n", start); + f1_hack = start; + return 0; + } source = get_source_start(start, &pagelimit); if (source == NULL) { @@ -7465,6 +7488,23 @@ int new_recompile_block(u_int addr) } assert(slen>0); + /* spacial hack(s) */ + if (i > 10 && source[i-1] == 0 && source[i-2] == 0x03e00008 + && source[i-4] == 0x8fbf0018 && source[i-6] == 0x00c0f809 + && dops[i-7].itype == STORE) + { + i = i-8; + if (dops[i].itype == IMM16) + i--; + // swl r2, 15(r6); swr r2, 12(r6); sw r6, *; jalr r6 + if (dops[i].itype == STORELR && dops[i].rs1 == 6 + && dops[i-1].itype == STORELR && dops[i-1].rs1 == 6) + { + SysPrintf("F1 hack from %08x\n", start); + f1_hack = ~0u; + } + } + /* Pass 2 - Register dependencies and branch targets */ unneeded_registers(0,slen-1,0); From 26bd3dadfd724f73242ab317fce3a50af8135db6 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 24 Nov 2021 22:52:53 +0200 Subject: [PATCH 088/597] drc: only override default cycle_multiplier To let the user tune if they want. Also iS multiplier in db was still too small for HLE mode. --- frontend/menu.c | 2 +- libpcsxcore/database.c | 2 +- libpcsxcore/new_dynarec/new_dynarec.c | 4 ++-- libpcsxcore/new_dynarec/new_dynarec.h | 2 ++ 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/frontend/menu.c b/frontend/menu.c index fbcda2d86..f5d035a04 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -102,7 +102,7 @@ int soft_scaling, analog_deadzone; // for Caanoo int soft_filter; #ifndef HAVE_PRE_ARMV7 -#define DEFAULT_PSX_CLOCK 57 +#define DEFAULT_PSX_CLOCK (10000 / CYCLE_MULT_DEFAULT) #define DEFAULT_PSX_CLOCK_S "57" #else #define DEFAULT_PSX_CLOCK 50 diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 108ccc695..4d4439ad4 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -40,7 +40,7 @@ void Apply_Hacks_Cdrom() /* Internal Section is fussy about timings */ if (strcmp(CdromId, "SLPS01868") == 0) { - cycle_multiplier_override = 200; + cycle_multiplier_override = 202; new_dynarec_hacks_pergame |= NDHACK_OVERRIDE_CYCLE_M; } } diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index a8fdf477c..f6ea2996c 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -465,13 +465,13 @@ static void do_clear_cache(void) #define NO_CYCLE_PENALTY_THR 12 -int cycle_multiplier; // 100 for 1.0 +int cycle_multiplier = CYCLE_MULT_DEFAULT; // 100 for 1.0 int cycle_multiplier_override; int cycle_multiplier_old; static int CLOCK_ADJUST(int x) { - int m = cycle_multiplier_override + int m = cycle_multiplier_override && cycle_multiplier == CYCLE_MULT_DEFAULT ? cycle_multiplier_override : cycle_multiplier; int s=(x>>31)|1; return (x * m + s * 50) / 100; diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index b9a3c67cc..f1464696f 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -4,6 +4,8 @@ extern int pcaddr; extern int pending_exception; extern int stop; extern int new_dynarec_did_compile; + +#define CYCLE_MULT_DEFAULT 175 extern int cycle_multiplier; // 100 for 1.0 extern int cycle_multiplier_override; extern int cycle_multiplier_old; From c979e8c288de90834ceecfd7a37543a44cdd9402 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 27 Nov 2021 00:53:16 +0200 Subject: [PATCH 089/597] drc: botched msb bit check adjust the f1 hack too --- libpcsxcore/new_dynarec/new_dynarec.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index f6ea2996c..910046f9e 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -2635,9 +2635,10 @@ static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) static void *get_direct_memhandler(void *table, u_int addr, enum stub_type type, uintptr_t *addr_host) { + uintptr_t msb = 1ull << (sizeof(uintptr_t)*8 - 1); uintptr_t l1, l2 = 0; l1 = ((uintptr_t *)table)[addr>>12]; - if ((l1 & (1ul << (sizeof(l1)*8-1))) == 0) { + if (!(l1 & msb)) { uintptr_t v = l1 << 1; *addr_host = v + addr; return NULL; @@ -2647,10 +2648,10 @@ static void *get_direct_memhandler(void *table, u_int addr, if (type == LOADB_STUB || type == LOADBU_STUB || type == STOREB_STUB) l2 = ((uintptr_t *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)]; else if (type == LOADH_STUB || type == LOADHU_STUB || type == STOREH_STUB) - l2=((uintptr_t *)l1)[0x1000/4 + (addr&0xfff)/2]; + l2 = ((uintptr_t *)l1)[0x1000/4 + (addr&0xfff)/2]; else - l2=((uintptr_t *)l1)[(addr&0xfff)/4]; - if ((l2 & (1<<31)) == 0) { + l2 = ((uintptr_t *)l1)[(addr&0xfff)/4]; + if (!(l2 & msb)) { uintptr_t v = l2 << 1; *addr_host = v + (addr&0xfff); return NULL; @@ -7501,7 +7502,8 @@ int new_recompile_block(u_int addr) && dops[i-1].itype == STORELR && dops[i-1].rs1 == 6) { SysPrintf("F1 hack from %08x\n", start); - f1_hack = ~0u; + if (f1_hack == 0) + f1_hack = ~0u; } } From 37387d8b2b8b9705ca42bd7582ed48d88aeafb9b Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 27 Nov 2021 01:28:08 +0200 Subject: [PATCH 090/597] drc: get rid of RAM_FIXED, revive ROREG should work better on devices that can't map memory at desired locations --- configure | 8 - libpcsxcore/new_dynarec/assem_arm.c | 48 +- libpcsxcore/new_dynarec/assem_arm64.c | 46 +- libpcsxcore/new_dynarec/emu_if.h | 4 - libpcsxcore/new_dynarec/linkage_arm.S | 2 +- libpcsxcore/new_dynarec/linkage_arm64.S | 2 +- libpcsxcore/new_dynarec/linkage_offsets.h | 4 +- libpcsxcore/new_dynarec/new_dynarec.c | 538 +++++++++++------- libpcsxcore/new_dynarec/patches/trace_drc_chk | 100 ++-- libpcsxcore/new_dynarec/patches/trace_intr | 38 +- libpcsxcore/psxmem.c | 2 - 11 files changed, 451 insertions(+), 341 deletions(-) diff --git a/configure b/configure index 7b3002591..1df9aac08 100755 --- a/configure +++ b/configure @@ -45,7 +45,6 @@ sound_driver_list="oss alsa pulseaudio sdl libretro" sound_drivers="" plugins="plugins/spunull/spunull.so \ plugins/dfxvideo/gpu_peops.so plugins/gpu_unai/gpu_unai.so plugins/gpu_senquack/gpu_senquack.so" -ram_fixed="no" drc_cache_base="no" have_armv5="" have_armv6="" @@ -89,21 +88,18 @@ set_platform() ;; pandora) sound_drivers="oss alsa" - ram_fixed="yes" drc_cache_base="yes" optimize_cortexa8="yes" have_arm_neon="yes" need_xlib="yes" ;; maemo) - ram_fixed="yes" drc_cache_base="yes" optimize_cortexa8="yes" have_arm_neon="yes" ;; caanoo) sound_drivers="oss" - ram_fixed="yes" drc_cache_base="yes" optimize_arm926ej="yes" need_warm="yes" @@ -297,10 +293,6 @@ if [ "$ARCH" != "arm" -o "$have_armv6" = "yes" ]; then PLUGIN_CFLAGS="$PLUGIN_CFLAGS -fPIC" fi -if [ "$ram_fixed" = "yes" ]; then - CFLAGS="$CFLAGS -DRAM_FIXED" -fi - case "$platform" in generic) need_sdl="yes" diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index c7bd5cc27..87490c617 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -543,6 +543,7 @@ static void emit_loadreg(int r, int hr) case CCREG: addr = (int)&cycle_count; break; case CSREG: addr = (int)&Status; break; case INVCP: addr = (int)&invc_ptr; break; + case ROREG: addr = (int)&ram_offset; break; default: assert(r < 34); break; } u_int offset = addr-(u_int)&dynarec_local; @@ -706,11 +707,6 @@ static void emit_addimm_and_set_flags(int imm,int rt) } } -static void emit_addimm_no_flags(u_int imm,u_int rt) -{ - emit_addimm(rt,imm,rt); -} - static void emit_addnop(u_int r) { assert(r<16); @@ -1181,30 +1177,72 @@ static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt) output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2)); } +static void emit_ldrb_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)); +} + static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt) { assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2)); } +static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2)); +} + static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt) { assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2)); } +static void emit_ldrh_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2)); +} + static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt) { assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2)); } +static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2)); +} + static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt) { assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2)); } +static void emit_str_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_strb_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_strh_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2)); +} + static void emit_movsbl_indexed(int offset, int rs, int rt) { assert(offset>-256&&offset<256); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 63c6866b3..431805d4d 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -462,6 +462,7 @@ static void emit_loadreg(u_int r, u_int hr) case CCREG: addr = &cycle_count; break; case CSREG: addr = &Status; break; case INVCP: addr = &invc_ptr; is64 = 1; break; + case ROREG: addr = &ram_offset; is64 = 1; break; default: assert(r < 34); break; } if (is64) @@ -623,11 +624,6 @@ static void emit_addimm_and_set_flags(int imm, u_int rt) emit_addimm_s(1, 0, rt, imm, rt); } -static void emit_addimm_no_flags(u_int imm,u_int rt) -{ - emit_addimm(rt,imm,rt); -} - static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt) { const char *names[] = { "and", "orr", "eor", "ands" }; @@ -1337,7 +1333,27 @@ static int is_similar_value(u_int v1, u_int v2) || is_rotated_mask(v1 ^ v2); } -// trashes r2 +static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt) +{ + if (rt_val < 0x100000000ull) { + emit_movimm_from(rs_val, rs, rt_val, rt); + return; + } + // just move the whole thing. At least on Linux all addresses + // seem to be 48bit, so 3 insns - not great not terrible + assem_debug("movz %s,#%#lx\n", regname64[rt], rt_val & 0xffff); + output_w32(0xd2800000 | imm16_rd(rt_val & 0xffff, rt)); + assem_debug("movk %s,#%#lx,lsl #16\n", regname64[rt], (rt_val >> 16) & 0xffff); + output_w32(0xf2a00000 | imm16_rd((rt_val >> 16) & 0xffff, rt)); + assem_debug("movk %s,#%#lx,lsl #32\n", regname64[rt], (rt_val >> 32) & 0xffff); + output_w32(0xf2c00000 | imm16_rd((rt_val >> 32) & 0xffff, rt)); + if (rt_val >> 48) { + assem_debug("movk %s,#%#lx,lsl #48\n", regname64[rt], (rt_val >> 48) & 0xffff); + output_w32(0xf2e00000 | imm16_rd((rt_val >> 48) & 0xffff, rt)); + } +} + +// trashes x2 static void pass_args64(u_int a0, u_int a1) { if(a0==1&&a1==0) { @@ -1474,11 +1490,8 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, if (handler == NULL) { if(rt<0||dops[i].rt1==0) return; - if (addr != host_addr) { - if (host_addr >= 0x100000000ull) - abort(); // ROREG not implemented - emit_movimm_from(addr, rs, host_addr, rs); - } + if (addr != host_addr) + emit_movimm_from64(addr, rs, host_addr, rs); switch(type) { case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break; case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break; @@ -1489,8 +1502,8 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, } return; } - is_dynamic=pcsxmem_is_handler_dynamic(addr); - if(is_dynamic) { + is_dynamic = pcsxmem_is_handler_dynamic(addr); + if (is_dynamic) { if(type==LOADB_STUB||type==LOADBU_STUB) handler=jump_handler_read8; if(type==LOADH_STUB||type==LOADHU_STUB) @@ -1627,11 +1640,8 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, uintptr_t host_addr = 0; void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr); if (handler == NULL) { - if (addr != host_addr) { - if (host_addr >= 0x100000000ull) - abort(); // ROREG not implemented - emit_movimm_from(addr, rs, host_addr, rs); - } + if (addr != host_addr) + emit_movimm_from64(addr, rs, host_addr, rs); switch (type) { case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break; case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break; diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index 36cc275fd..30cb9ef63 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -95,8 +95,4 @@ void pcsx_mtc0_ds(u32 reg, u32 val); /* misc */ extern void SysPrintf(const char *fmt, ...); -#ifdef RAM_FIXED -#define rdram ((u_char *)0x80000000) -#else #define rdram ((u_char *)psxM) -#endif diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 970d91c70..1d8cefaa4 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -85,7 +85,7 @@ DRC_VAR(psxH_ptr, 4) DRC_VAR(zeromem_ptr, 4) DRC_VAR(invc_ptr, 4) DRC_VAR(scratch_buf_ptr, 4) -@DRC_VAR(align1, 8) /* unused/alignment */ +DRC_VAR(ram_offset, 4) DRC_VAR(mini_ht, 256) DRC_VAR(restore_candidate, 512) diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 6f32fa3b6..7df82b4e9 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -77,7 +77,7 @@ DRC_VAR(psxH_ptr, 8) DRC_VAR(invc_ptr, 8) DRC_VAR(zeromem_ptr, 8) DRC_VAR(scratch_buf_ptr, 8) -#DRC_VAR(align1, 16) /* unused/alignment */ +DRC_VAR(ram_offset, 8) DRC_VAR(mini_ht, 256) DRC_VAR(restore_candidate, 512) diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h index 916bb1a84..e9bb3abdc 100644 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ b/libpcsxcore/new_dynarec/linkage_offsets.h @@ -37,8 +37,8 @@ #define LO_invc_ptr (LO_zeromem_ptr + PTRSZ) #define LO_scratch_buf_ptr (LO_invc_ptr + PTRSZ) #define LO_saved_lr (LO_scratch_buf_ptr + PTRSZ) -#define LO_align1 (LO_saved_lr + PTRSZ) -#define LO_mini_ht (LO_align1 + PTRSZ) +#define LO_ram_offset (LO_saved_lr + PTRSZ) +#define LO_mini_ht (LO_ram_offset + PTRSZ) #define LO_restore_candidate (LO_mini_ht + PTRSZ*32*2) #define LO_dynarec_local_size (LO_restore_candidate + 512) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 910046f9e..921a2ed18 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -176,6 +176,8 @@ static struct decoded_insn u_char is_ds:1; u_char is_jump:1; u_char is_ujump:1; + u_char is_load:1; + u_char is_store:1; } dops[MAXBLOCK]; // used by asm: @@ -226,11 +228,6 @@ static struct decoded_insn static int expirep; static u_int stop_after_jal; static u_int f1_hack; // 0 - off, ~0 - capture address, else addr -#ifndef RAM_FIXED - static uintptr_t ram_offset; -#else - static const uintptr_t ram_offset=0; -#endif int new_dynarec_hacks; int new_dynarec_hacks_pergame; @@ -244,6 +241,7 @@ static struct decoded_insn extern int pcaddr; extern int pending_exception; extern int branch_target; + extern uintptr_t ram_offset; extern uintptr_t mini_ht[32][2]; extern u_char restore_candidate[512]; @@ -256,7 +254,7 @@ static struct decoded_insn #define CCREG 36 // Cycle count #define INVCP 37 // Pointer to invalid_code //#define MMREG 38 // Pointer to memory_map -//#define ROREG 39 // ram offset (if rdram!=0x80000000) +#define ROREG 39 // ram offset (if rdram!=0x80000000) #define TEMPREG 40 #define FTEMP 40 // FPU temporary register #define PTEMP 41 // Prefetch temporary register @@ -346,7 +344,8 @@ static void add_stub(enum stub_type type, void *addr, void *retaddr, static void add_stub_r(enum stub_type type, void *addr, void *retaddr, int i, int addr_reg, const struct regstat *i_regs, int ccadj, u_int reglist); static void add_to_linker(void *addr, u_int target, int ext); -static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override); +static void *emit_fastpath_cmp_jump(int i, const struct regstat *i_regs, + int addr, int *offset_reg, int *addr_reg_override); static void *get_direct_memhandler(void *table, u_int addr, enum stub_type type, uintptr_t *addr_host); static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist); @@ -707,11 +706,12 @@ void lsn(u_char hsn[], int i, int *preferred_reg) hsn[dops[i+j].rs1]=j; hsn[dops[i+j].rs2]=j; } + if (ram_offset && (dops[i+j].is_load || dops[i+j].is_store)) + hsn[ROREG] = j; // On some architectures stores need invc_ptr #if defined(HOST_IMM8) - if(dops[i+j].itype==STORE || dops[i+j].itype==STORELR || (dops[i+j].opcode&0x3b)==0x39 || (dops[i+j].opcode&0x3b)==0x3a) { - hsn[INVCP]=j; - } + if (dops[i+j].is_store) + hsn[INVCP] = j; #endif if(i+j>=0&&(dops[i+j].itype==UJUMP||dops[i+j].itype==CJUMP||dops[i+j].itype==SJUMP)) { @@ -746,7 +746,7 @@ void lsn(u_char hsn[], int i, int *preferred_reg) hsn[RHTBL]=1; } // Coprocessor load/store needs FTEMP, even if not declared - if(dops[i].itype==C1LS||dops[i].itype==C2LS) { + if(dops[i].itype==C2LS) { hsn[FTEMP]=0; } // Load L/R also uses FTEMP as a temporary register @@ -1756,7 +1756,10 @@ static void load_alloc(struct regstat *current,int i) clear_const(current,dops[i].rt1); //if(dops[i].rs1!=dops[i].rt1&&needed_again(dops[i].rs1,i)) clear_const(current,dops[i].rs1); // Does this help or hurt? if(!dops[i].rs1) current->u&=~1LL; // Allow allocating r0 if it's the source register - if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); + if (needed_again(dops[i].rs1, i)) + alloc_reg(current, i, dops[i].rs1); + if (ram_offset) + alloc_reg(current, i, ROREG); if(dops[i].rt1&&!((current->u>>dops[i].rt1)&1)) { alloc_reg(current,i,dops[i].rt1); assert(get_reg(current->regmap,dops[i].rt1)>=0); @@ -1803,9 +1806,11 @@ void store_alloc(struct regstat *current,int i) if(dops[i].opcode==0x2c||dops[i].opcode==0x2d||dops[i].opcode==0x3f) { // 64-bit SDL/SDR/SD assert(0); } + if (ram_offset) + alloc_reg(current, i, ROREG); #if defined(HOST_IMM8) // On CPUs without 32-bit immediates we need a pointer to invalid_code - else alloc_reg(current,i,INVCP); + alloc_reg(current, i, INVCP); #endif if(dops[i].opcode==0x2a||dops[i].opcode==0x2e||dops[i].opcode==0x2c||dops[i].opcode==0x2d) { // SWL/SWL/SDL/SDR alloc_reg(current,i,FTEMP); @@ -1817,21 +1822,8 @@ void store_alloc(struct regstat *current,int i) void c1ls_alloc(struct regstat *current,int i) { - //clear_const(current,dops[i].rs1); // FIXME clear_const(current,dops[i].rt1); - if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); alloc_reg(current,i,CSREG); // Status - alloc_reg(current,i,FTEMP); - if(dops[i].opcode==0x35||dops[i].opcode==0x3d) { // 64-bit LDC1/SDC1 - assert(0); - } - #if defined(HOST_IMM8) - // On CPUs without 32-bit immediates we need a pointer to invalid_code - else if((dops[i].opcode&0x3b)==0x39) // SWC1/SDC1 - alloc_reg(current,i,INVCP); - #endif - // We need a temporary register for address generation - alloc_reg_temp(current,i,-1); } void c2ls_alloc(struct regstat *current,int i) @@ -1839,9 +1831,11 @@ void c2ls_alloc(struct regstat *current,int i) clear_const(current,dops[i].rt1); if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); alloc_reg(current,i,FTEMP); + if (ram_offset) + alloc_reg(current, i, ROREG); #if defined(HOST_IMM8) // On CPUs without 32-bit immediates we need a pointer to invalid_code - if((dops[i].opcode&0x3b)==0x3a) // SWC2/SDC2 + if (dops[i].opcode == 0x3a) // SWC2 alloc_reg(current,i,INVCP); #endif // We need a temporary register for address generation @@ -2562,11 +2556,25 @@ static int get_ptr_mem_type(u_int a) return MTYPE_8000; } -static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) +static int get_ro_reg(const struct regstat *i_regs, int host_tempreg_free) +{ + int r = get_reg(i_regs->regmap, ROREG); + if (r < 0 && host_tempreg_free) { + host_tempreg_acquire(); + emit_loadreg(ROREG, r = HOST_TEMPREG); + } + if (r < 0) + abort(); + return r; +} + +static void *emit_fastpath_cmp_jump(int i, const struct regstat *i_regs, + int addr, int *offset_reg, int *addr_reg_override) { void *jaddr = NULL; - int type=0; - int mr=dops[i].rs1; + int type = 0; + int mr = dops[i].rs1; + *offset_reg = -1; if(((smrv_strong|smrv_weak)>>mr)&1) { type=get_ptr_mem_type(smrv[mr]); //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type); @@ -2610,22 +2618,19 @@ static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) } } - if(type==0) + if (type == 0) // need ram check { emit_cmpimm(addr,RAM_SIZE); - jaddr=out; + jaddr = out; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK // Hint to branch predictor that the branch is unlikely to be taken - if(dops[i].rs1>=28) + if (dops[i].rs1 >= 28) emit_jno_unlikely(0); else #endif emit_jno(0); - if(ram_offset!=0) { - host_tempreg_acquire(); - emit_addimm(addr,ram_offset,HOST_TEMPREG); - addr=*addr_reg_override=HOST_TEMPREG; - } + if (ram_offset != 0) + *offset_reg = get_ro_reg(i_regs, 0); } return jaddr; @@ -2688,13 +2693,56 @@ static int reglist_find_free(u_int reglist) return __builtin_ctz(free_regs); } +static void do_load_word(int a, int rt, int offset_reg) +{ + if (offset_reg >= 0) + emit_ldr_dualindexed(offset_reg, a, rt); + else + emit_readword_indexed(0, a, rt); +} + +static void do_store_word(int a, int ofs, int rt, int offset_reg, int preseve_a) +{ + if (offset_reg < 0) { + emit_writeword_indexed(rt, ofs, a); + return; + } + if (ofs != 0) + emit_addimm(a, ofs, a); + emit_str_dualindexed(offset_reg, a, rt); + if (ofs != 0 && preseve_a) + emit_addimm(a, -ofs, a); +} + +static void do_store_hword(int a, int ofs, int rt, int offset_reg, int preseve_a) +{ + if (offset_reg < 0) { + emit_writehword_indexed(rt, ofs, a); + return; + } + if (ofs != 0) + emit_addimm(a, ofs, a); + emit_strh_dualindexed(offset_reg, a, rt); + if (ofs != 0 && preseve_a) + emit_addimm(a, -ofs, a); +} + +static void do_store_byte(int a, int rt, int offset_reg) +{ + if (offset_reg >= 0) + emit_strb_dualindexed(offset_reg, a, rt); + else + emit_writebyte_indexed(rt, 0, a); +} + static void load_assemble(int i, const struct regstat *i_regs) { int s,tl,addr; int offset; void *jaddr=0; int memtarget=0,c=0; - int fastio_reg_override=-1; + int offset_reg = -1; + int fastio_reg_override = -1; u_int reglist=get_host_reglist(i_regs->regmap); tl=get_reg(i_regs->regmap,dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); @@ -2731,96 +2779,110 @@ static void load_assemble(int i, const struct regstat *i_regs) if(dops[i].rs1!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) #endif { - jaddr=emit_fastpath_cmp_jump(i,addr,&fastio_reg_override); + jaddr = emit_fastpath_cmp_jump(i, i_regs, addr, + &offset_reg, &fastio_reg_override); } } - else if(ram_offset&&memtarget) { - host_tempreg_acquire(); - emit_addimm(addr,ram_offset,HOST_TEMPREG); - fastio_reg_override=HOST_TEMPREG; + else if (ram_offset && memtarget) { + offset_reg = get_ro_reg(i_regs, 0); } int dummy=(dops[i].rt1==0)||(tl!=get_reg(i_regs->regmap,dops[i].rt1)); // ignore loads to r0 and unneeded reg - if (dops[i].opcode==0x20) { // LB + switch (dops[i].opcode) { + case 0x20: // LB if(!c||memtarget) { if(!dummy) { - { - int x=0,a=tl; - if(!c) a=addr; - if(fastio_reg_override>=0) a=fastio_reg_override; + int a = tl; + if (!c) a = addr; + if (fastio_reg_override >= 0) + a = fastio_reg_override; - emit_movsbl_indexed(x,a,tl); - } + if (offset_reg >= 0) + emit_ldrsb_dualindexed(offset_reg, a, tl); + else + emit_movsbl_indexed(0, a, tl); } if(jaddr) add_stub_r(LOADB_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); - } - if (dops[i].opcode==0x21) { // LH + break; + case 0x21: // LH if(!c||memtarget) { if(!dummy) { - int x=0,a=tl; - if(!c) a=addr; - if(fastio_reg_override>=0) a=fastio_reg_override; - emit_movswl_indexed(x,a,tl); + int a = tl; + if (!c) a = addr; + if (fastio_reg_override >= 0) + a = fastio_reg_override; + if (offset_reg >= 0) + emit_ldrsh_dualindexed(offset_reg, a, tl); + else + emit_movswl_indexed(0, a, tl); } if(jaddr) add_stub_r(LOADH_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); - } - if (dops[i].opcode==0x23) { // LW + break; + case 0x23: // LW if(!c||memtarget) { if(!dummy) { - int a=addr; - if(fastio_reg_override>=0) a=fastio_reg_override; - emit_readword_indexed(0,a,tl); + int a = addr; + if (fastio_reg_override >= 0) + a = fastio_reg_override; + do_load_word(a, tl, offset_reg); } if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); - } - if (dops[i].opcode==0x24) { // LBU + break; + case 0x24: // LBU if(!c||memtarget) { if(!dummy) { - int x=0,a=tl; - if(!c) a=addr; - if(fastio_reg_override>=0) a=fastio_reg_override; + int a = tl; + if (!c) a = addr; + if (fastio_reg_override >= 0) + a = fastio_reg_override; - emit_movzbl_indexed(x,a,tl); + if (offset_reg >= 0) + emit_ldrb_dualindexed(offset_reg, a, tl); + else + emit_movzbl_indexed(0, a, tl); } if(jaddr) add_stub_r(LOADBU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); - } - if (dops[i].opcode==0x25) { // LHU + break; + case 0x25: // LHU if(!c||memtarget) { if(!dummy) { - int x=0,a=tl; - if(!c) a=addr; - if(fastio_reg_override>=0) a=fastio_reg_override; - emit_movzwl_indexed(x,a,tl); + int a = tl; + if(!c) a = addr; + if (fastio_reg_override >= 0) + a = fastio_reg_override; + if (offset_reg >= 0) + emit_ldrh_dualindexed(offset_reg, a, tl); + else + emit_movzwl_indexed(0, a, tl); } if(jaddr) add_stub_r(LOADHU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); } else inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); - } - if (dops[i].opcode==0x27) { // LWU - assert(0); - } - if (dops[i].opcode==0x37) { // LD + break; + case 0x27: // LWU + case 0x37: // LD + default: assert(0); } } - if (fastio_reg_override == HOST_TEMPREG) + if (fastio_reg_override == HOST_TEMPREG || offset_reg == HOST_TEMPREG) host_tempreg_release(); } @@ -2831,7 +2893,8 @@ static void loadlr_assemble(int i, const struct regstat *i_regs) int offset; void *jaddr=0; int memtarget=0,c=0; - int fastio_reg_override=-1; + int offset_reg = -1; + int fastio_reg_override = -1; u_int reglist=get_host_reglist(i_regs->regmap); tl=get_reg(i_regs->regmap,dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); @@ -2856,13 +2919,12 @@ static void loadlr_assemble(int i, const struct regstat *i_regs) }else{ emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR } - jaddr=emit_fastpath_cmp_jump(i,temp2,&fastio_reg_override); + jaddr = emit_fastpath_cmp_jump(i, i_regs, temp2, + &offset_reg, &fastio_reg_override); } else { - if(ram_offset&&memtarget) { - host_tempreg_acquire(); - emit_addimm(temp2,ram_offset,HOST_TEMPREG); - fastio_reg_override=HOST_TEMPREG; + if (ram_offset && memtarget) { + offset_reg = get_ro_reg(i_regs, 0); } if (dops[i].opcode==0x22||dops[i].opcode==0x26) { emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR @@ -2872,10 +2934,12 @@ static void loadlr_assemble(int i, const struct regstat *i_regs) } if (dops[i].opcode==0x22||dops[i].opcode==0x26) { // LWL/LWR if(!c||memtarget) { - int a=temp2; - if(fastio_reg_override>=0) a=fastio_reg_override; - emit_readword_indexed(0,a,temp2); - if(fastio_reg_override==HOST_TEMPREG) host_tempreg_release(); + int a = temp2; + if (fastio_reg_override >= 0) + a = fastio_reg_override; + do_load_word(a, temp2, offset_reg); + if (fastio_reg_override == HOST_TEMPREG || offset_reg == HOST_TEMPREG) + host_tempreg_release(); if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist); } else @@ -2905,16 +2969,17 @@ static void loadlr_assemble(int i, const struct regstat *i_regs) } #endif -void store_assemble(int i, const struct regstat *i_regs) +static void store_assemble(int i, const struct regstat *i_regs) { int s,tl; int addr,temp; int offset; void *jaddr=0; - enum stub_type type; + enum stub_type type=0; int memtarget=0,c=0; int agr=AGEN1+(i&1); - int fastio_reg_override=-1; + int offset_reg = -1; + int fastio_reg_override = -1; u_int reglist=get_host_reglist(i_regs->regmap); tl=get_reg(i_regs->regmap,dops[i].rs2); s=get_reg(i_regs->regmap,dops[i].rs1); @@ -2932,46 +2997,49 @@ void store_assemble(int i, const struct regstat *i_regs) if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<=0) a=fastio_reg_override; - emit_writebyte_indexed(tl,x,a); - } - type=STOREB_STUB; - } - if (dops[i].opcode==0x29) { // SH + int a = temp; + if (!c) a = addr; + if (fastio_reg_override >= 0) + a = fastio_reg_override; + do_store_byte(a, tl, offset_reg); + } + type = STOREB_STUB; + break; + case 0x29: // SH if(!c||memtarget) { - int x=0,a=temp; - if(!c) a=addr; - if(fastio_reg_override>=0) a=fastio_reg_override; - emit_writehword_indexed(tl,x,a); - } - type=STOREH_STUB; - } - if (dops[i].opcode==0x2B) { // SW + int a = temp; + if (!c) a = addr; + if (fastio_reg_override >= 0) + a = fastio_reg_override; + do_store_hword(a, 0, tl, offset_reg, 1); + } + type = STOREH_STUB; + break; + case 0x2B: // SW if(!c||memtarget) { - int a=addr; - if(fastio_reg_override>=0) a=fastio_reg_override; - emit_writeword_indexed(tl,0,a); - } - type=STOREW_STUB; - } - if (dops[i].opcode==0x3F) { // SD + int a = addr; + if (fastio_reg_override >= 0) + a = fastio_reg_override; + do_store_word(a, 0, tl, offset_reg, 1); + } + type = STOREW_STUB; + break; + case 0x3F: // SD + default: assert(0); - type=STORED_STUB; } - if(fastio_reg_override==HOST_TEMPREG) + if (fastio_reg_override == HOST_TEMPREG || offset_reg == HOST_TEMPREG) host_tempreg_release(); if(jaddr) { // PCSX store handlers don't check invcode again @@ -3032,10 +3100,11 @@ static void storelr_assemble(int i, const struct regstat *i_regs) int temp; int offset; void *jaddr=0; - void *case1, *case2, *case3; + void *case1, *case23, *case3; void *done0, *done1, *done2; int memtarget=0,c=0; int agr=AGEN1+(i&1); + int offset_reg = -1; u_int reglist=get_host_reglist(i_regs->regmap); tl=get_reg(i_regs->regmap,dops[i].rs2); s=get_reg(i_regs->regmap,dops[i].rs1); @@ -3063,86 +3132,85 @@ static void storelr_assemble(int i, const struct regstat *i_regs) emit_jmp(0); } } - if(ram_offset) - emit_addimm_no_flags(ram_offset,temp); + if (ram_offset) + offset_reg = get_ro_reg(i_regs, 0); if (dops[i].opcode==0x2C||dops[i].opcode==0x2D) { // SDL/SDR assert(0); } - emit_xorimm(temp,3,temp); emit_testimm(temp,2); - case2=out; + case23=out; emit_jne(0); emit_testimm(temp,1); case1=out; emit_jne(0); // 0 - if (dops[i].opcode==0x2A) { // SWL - emit_writeword_indexed(tl,0,temp); + if (dops[i].opcode == 0x2A) { // SWL + // Write msb into least significant byte + if (dops[i].rs2) emit_rorimm(tl, 24, tl); + do_store_byte(temp, tl, offset_reg); + if (dops[i].rs2) emit_rorimm(tl, 8, tl); } - else if (dops[i].opcode==0x2E) { // SWR - emit_writebyte_indexed(tl,3,temp); + else if (dops[i].opcode == 0x2E) { // SWR + // Write entire word + do_store_word(temp, 0, tl, offset_reg, 1); } - else - assert(0); - done0=out; + done0 = out; emit_jmp(0); // 1 set_jump_target(case1, out); - if (dops[i].opcode==0x2A) { // SWL - // Write 3 msb into three least significant bytes - if(dops[i].rs2) emit_rorimm(tl,8,tl); - emit_writehword_indexed(tl,-1,temp); - if(dops[i].rs2) emit_rorimm(tl,16,tl); - emit_writebyte_indexed(tl,1,temp); - if(dops[i].rs2) emit_rorimm(tl,8,tl); + if (dops[i].opcode == 0x2A) { // SWL + // Write two msb into two least significant bytes + if (dops[i].rs2) emit_rorimm(tl, 16, tl); + do_store_hword(temp, -1, tl, offset_reg, 0); + if (dops[i].rs2) emit_rorimm(tl, 16, tl); } - else if (dops[i].opcode==0x2E) { // SWR - // Write two lsb into two most significant bytes - emit_writehword_indexed(tl,1,temp); + else if (dops[i].opcode == 0x2E) { // SWR + // Write 3 lsb into three most significant bytes + do_store_byte(temp, tl, offset_reg); + if (dops[i].rs2) emit_rorimm(tl, 8, tl); + do_store_hword(temp, 1, tl, offset_reg, 0); + if (dops[i].rs2) emit_rorimm(tl, 24, tl); } done1=out; emit_jmp(0); - // 2 - set_jump_target(case2, out); + // 2,3 + set_jump_target(case23, out); emit_testimm(temp,1); - case3=out; + case3 = out; emit_jne(0); + // 2 if (dops[i].opcode==0x2A) { // SWL - // Write two msb into two least significant bytes - if(dops[i].rs2) emit_rorimm(tl,16,tl); - emit_writehword_indexed(tl,-2,temp); - if(dops[i].rs2) emit_rorimm(tl,16,tl); + // Write 3 msb into three least significant bytes + if (dops[i].rs2) emit_rorimm(tl, 8, tl); + do_store_hword(temp, -2, tl, offset_reg, 1); + if (dops[i].rs2) emit_rorimm(tl, 16, tl); + do_store_byte(temp, tl, offset_reg); + if (dops[i].rs2) emit_rorimm(tl, 8, tl); } - else if (dops[i].opcode==0x2E) { // SWR - // Write 3 lsb into three most significant bytes - emit_writebyte_indexed(tl,-1,temp); - if(dops[i].rs2) emit_rorimm(tl,8,tl); - emit_writehword_indexed(tl,0,temp); - if(dops[i].rs2) emit_rorimm(tl,24,tl); + else if (dops[i].opcode == 0x2E) { // SWR + // Write two lsb into two most significant bytes + do_store_hword(temp, 0, tl, offset_reg, 1); } - done2=out; + done2 = out; emit_jmp(0); // 3 set_jump_target(case3, out); - if (dops[i].opcode==0x2A) { // SWL - // Write msb into least significant byte - if(dops[i].rs2) emit_rorimm(tl,24,tl); - emit_writebyte_indexed(tl,-3,temp); - if(dops[i].rs2) emit_rorimm(tl,8,tl); + if (dops[i].opcode == 0x2A) { // SWL + do_store_word(temp, -3, tl, offset_reg, 0); } - else if (dops[i].opcode==0x2E) { // SWR - // Write entire word - emit_writeword_indexed(tl,-3,temp); + else if (dops[i].opcode == 0x2E) { // SWR + do_store_byte(temp, tl, offset_reg); } set_jump_target(done0, out); set_jump_target(done1, out); set_jump_target(done2, out); + if (offset_reg == HOST_TEMPREG) + host_tempreg_release(); if(!c||!memtarget) add_stub_r(STORELR_STUB,jaddr,out,i,temp,i_regs,ccadj[i],reglist); if(!(i_regs->waswritten&(1<regmap,INVCP); assert(ir>=0); @@ -3599,7 +3667,8 @@ static void c2ls_assemble(int i, const struct regstat *i_regs) void *jaddr2=NULL; enum stub_type type; int agr=AGEN1+(i&1); - int fastio_reg_override=-1; + int offset_reg = -1; + int fastio_reg_override = -1; u_int reglist=get_host_reglist(i_regs->regmap); u_int copr=(source[i]>>16)&0x1f; s=get_reg(i_regs->regmap,dops[i].rs1); @@ -3639,28 +3708,35 @@ static void c2ls_assemble(int i, const struct regstat *i_regs) } else { if(!c) { - jaddr2=emit_fastpath_cmp_jump(i,ar,&fastio_reg_override); - } - else if(ram_offset&&memtarget) { - host_tempreg_acquire(); - emit_addimm(ar,ram_offset,HOST_TEMPREG); - fastio_reg_override=HOST_TEMPREG; - } - if (dops[i].opcode==0x32) { // LWC2 - int a=ar; - if(fastio_reg_override>=0) a=fastio_reg_override; - emit_readword_indexed(0,a,tl); + jaddr2 = emit_fastpath_cmp_jump(i, i_regs, ar, + &offset_reg, &fastio_reg_override); + } + else if (ram_offset && memtarget) { + offset_reg = get_ro_reg(i_regs, 0); + } + switch (dops[i].opcode) { + case 0x32: { // LWC2 + int a = ar; + if (fastio_reg_override >= 0) + a = fastio_reg_override; + do_load_word(a, tl, offset_reg); + break; } - if (dops[i].opcode==0x3a) { // SWC2 + case 0x3a: { // SWC2 #ifdef DESTRUCTIVE_SHIFT if(!offset&&!c&&s>=0) emit_mov(s,ar); #endif - int a=ar; - if(fastio_reg_override>=0) a=fastio_reg_override; - emit_writeword_indexed(tl,0,a); + int a = ar; + if (fastio_reg_override >= 0) + a = fastio_reg_override; + do_store_word(a, 0, tl, offset_reg, 1); + break; + } + default: + assert(0); } } - if(fastio_reg_override==HOST_TEMPREG) + if (fastio_reg_override == HOST_TEMPREG || offset_reg == HOST_TEMPREG) host_tempreg_release(); if(jaddr2) add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj[i],reglist); @@ -4089,7 +4165,7 @@ static void loop_preload(signed char pre[],signed char entry[]) // goes to AGEN for writes, FTEMP for LOADLR and cop1/2 loads void address_generation(int i,struct regstat *i_regs,signed char entry[]) { - if(dops[i].itype==LOAD||dops[i].itype==LOADLR||dops[i].itype==STORE||dops[i].itype==STORELR||dops[i].itype==C1LS||dops[i].itype==C2LS) { + if (dops[i].is_load || dops[i].is_store) { int ra=-1; int agr=AGEN1+(i&1); if(dops[i].itype==LOAD) { @@ -4104,7 +4180,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) ra=get_reg(i_regs->regmap,agr); if(ra<0) ra=get_reg(i_regs->regmap,-1); } - if(dops[i].itype==C1LS||dops[i].itype==C2LS) { + if(dops[i].itype==C2LS) { if ((dops[i].opcode&0x3b)==0x31||(dops[i].opcode&0x3b)==0x32) // LWC1/LDC1/LWC2/LDC2 ra=get_reg(i_regs->regmap,FTEMP); else { // SWC1/SDC1/SWC2/SDC2 @@ -4158,7 +4234,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) } } // Preload constants for next instruction - if(dops[i+1].itype==LOAD||dops[i+1].itype==LOADLR||dops[i+1].itype==STORE||dops[i+1].itype==STORELR||dops[i+1].itype==C1LS||dops[i+1].itype==C2LS) { + if (dops[i+1].is_load || dops[i+1].is_store) { int agr,ra; // Actual address agr=AGEN1+((i+1)&1); @@ -4593,7 +4669,9 @@ static void ds_assemble_entry(int i) wb_register(CCREG,regs[t].regmap_entry,regs[t].wasdirty); load_regs(regs[t].regmap_entry,regs[t].regmap,dops[t].rs1,dops[t].rs2); address_generation(t,®s[t],regs[t].regmap_entry); - if(dops[t].itype==STORE||dops[t].itype==STORELR||(dops[t].opcode&0x3b)==0x39||(dops[t].opcode&0x3b)==0x3a) + if (ram_offset && (dops[t].is_load || dops[t].is_store)) + load_regs(regs[t].regmap_entry,regs[t].regmap,ROREG,ROREG); + if (dops[t].is_store) load_regs(regs[t].regmap_entry,regs[t].regmap,INVCP,INVCP); is_delayslot=0; switch(dops[t].itype) { @@ -5372,6 +5450,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) // load regs load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2); address_generation(i+1,&branch_regs[i],0); + if (ram_offset) + load_regs(regs[i].regmap,branch_regs[i].regmap,ROREG,ROREG); load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP); ds_assemble(i+1,&branch_regs[i]); cc=get_reg(branch_regs[i].regmap,CCREG); @@ -5403,9 +5483,12 @@ static void cjump_assemble(int i,struct regstat *i_regs) set_jump_target(nottaken, out); assem_debug("2:\n"); wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,ds_unneeded); + // load regs load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2); address_generation(i+1,&branch_regs[i],0); - load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG); + if (ram_offset) + load_regs(regs[i].regmap,branch_regs[i].regmap,ROREG,ROREG); + load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP); ds_assemble(i+1,&branch_regs[i]); cc=get_reg(branch_regs[i].regmap,CCREG); if (cc == -1) { @@ -5638,6 +5721,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) // load regs load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2); address_generation(i+1,&branch_regs[i],0); + if (ram_offset) + load_regs(regs[i].regmap,branch_regs[i].regmap,ROREG,ROREG); load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP); ds_assemble(i+1,&branch_regs[i]); cc=get_reg(branch_regs[i].regmap,CCREG); @@ -5916,7 +6001,9 @@ static void pagespan_ds() emit_writeword(HOST_BTREG,&branch_target); load_regs(regs[0].regmap_entry,regs[0].regmap,dops[0].rs1,dops[0].rs2); address_generation(0,®s[0],regs[0].regmap_entry); - if(dops[0].itype==STORE||dops[0].itype==STORELR||(dops[0].opcode&0x3b)==0x39||(dops[0].opcode&0x3b)==0x3a) + if (ram_offset && (dops[0].is_load || dops[0].is_store)) + load_regs(regs[0].regmap_entry,regs[0].regmap,ROREG,ROREG); + if (dops[0].is_store) load_regs(regs[0].regmap_entry,regs[0].regmap,INVCP,INVCP); is_delayslot=0; switch(dops[0].itype) { @@ -6755,9 +6842,7 @@ void new_dynarec_init(void) #endif arch_init(); new_dynarec_test(); -#ifndef RAM_FIXED ram_offset=(uintptr_t)rdram-0x80000000; -#endif if (ram_offset!=0) SysPrintf("warning: RAM is not directly mapped, performance will suffer\n"); } @@ -7409,6 +7494,8 @@ int new_recompile_block(u_int addr) dops[i].is_jump = (dops[i].itype == RJUMP || dops[i].itype == UJUMP || dops[i].itype == CJUMP || dops[i].itype == SJUMP); dops[i].is_ujump = (dops[i].itype == RJUMP || dops[i].itype == UJUMP); // || (source[i] >> 16) == 0x1000 // beq r0,r0 + dops[i].is_load = (dops[i].itype == LOAD || dops[i].itype == LOADLR || op == 0x32); // LWC2 + dops[i].is_store = (dops[i].itype == STORE || dops[i].itype == STORELR || op == 0x3a); // SWC2 /* messy cases to just pass over to the interpreter */ if (i > 0 && dops[i-1].is_jump) { @@ -8213,7 +8300,11 @@ int new_recompile_block(u_int addr) if(dops[i+1].rs2==regmap_pre[i][hr]) nr|=1<0) { - int map=-1,temp=-1; - if(dops[i].itype==STORE || dops[i].itype==STORELR || - (dops[i].opcode&0x3b)==0x39 || (dops[i].opcode&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2 - map=INVCP; - } - if(dops[i].itype==LOADLR || dops[i].itype==STORELR || - dops[i].itype==C1LS || dops[i].itype==C2LS) - temp=FTEMP; + int map1 = -1, map2 = -1, temp=-1; + if (dops[i].is_load || dops[i].is_store) + map1 = ROREG; + if (dops[i].is_store) + map2 = INVCP; + if (dops[i].itype==LOADLR || dops[i].itype==STORELR || dops[i].itype==C2LS) + temp = FTEMP; if((regs[i].regmap[hr]&63)!=dops[i].rt1 && (regs[i].regmap[hr]&63)!=dops[i].rt2 && regs[i].regmap[hr]!=dops[i].rs1 && regs[i].regmap[hr]!=dops[i].rs2 && - (regs[i].regmap[hr]&63)!=temp && regs[i].regmap[hr]!=map && + (regs[i].regmap[hr]&63)!=temp && regs[i].regmap[hr]!=map1 && regs[i].regmap[hr]!=map2 && (dops[i].itype!=SPAN||regs[i].regmap[hr]!=CCREG)) { if(i>16)==0x1000) + if (dops[i].is_ujump) literal_pool(1024); else -@@ -9037,7 +9062,7 @@ int new_recompile_block(u_int addr) - } - } - // External Branch Targets (jump_in) -- if(copy+slen*4>(void *)shadow+sizeof(shadow)) copy=shadow; -+ if(copy+slen*4>(void *)shadow+sizeof(shadow)) {copy=shadow;printf("shadow overflow\n");} - for(i=0;i> 26; switch (tmp) { -@@ -546,13 +547,15 @@ static void doBranch(u32 tar) { +@@ -499,13 +500,15 @@ static void doBranch(u32 tar) { } break; } @@ -111,8 +111,8 @@ index 61c60ed..0fa5283 100644 } /********************************************************* -@@ -635,12 +638,13 @@ void psxMULTU() { - psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff); +@@ -615,12 +618,13 @@ void psxMULTU_stall() { + psxMULTU(); } +#define doBranchNotTaken() do { psxRegs.cycle -= BIAS; execI(); psxBranchTest(); psxRegs.cycle += BIAS; } while(0) @@ -127,7 +127,7 @@ index 61c60ed..0fa5283 100644 void psxBGEZ() { RepZBranchi32(>=) } // Branch if Rs >= 0 void psxBGEZAL() { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link -@@ -710,7 +714,7 @@ void psxRFE() { +@@ -702,7 +706,7 @@ void psxRFE() { * Register branch logic * * Format: OP rs, rt, offset * *********************************************************/ @@ -136,7 +136,7 @@ index 61c60ed..0fa5283 100644 void psxBEQ() { RepBranchi32(==) } // Branch if Rs == Rt void psxBNE() { RepBranchi32(!=) } // Branch if Rs != Rt -@@ -894,6 +898,9 @@ void MTC0(int reg, u32 val) { +@@ -886,6 +890,9 @@ void MTC0(int reg, u32 val) { case 12: // Status psxRegs.CP0.r[12] = val; psxTestSWInts(); @@ -146,7 +146,7 @@ index 61c60ed..0fa5283 100644 break; case 13: // Cause -@@ -1056,6 +1063,23 @@ void intExecuteBlock() { +@@ -1027,6 +1034,23 @@ void intExecuteBlock() { while (!branch2) execI(); } @@ -171,10 +171,10 @@ index 61c60ed..0fa5283 100644 } diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c -index c09965d..135a5d0 100644 +index 04aeec2..1242653 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c -@@ -219,11 +219,13 @@ void psxMemShutdown() { +@@ -217,11 +217,13 @@ void psxMemShutdown() { } static int writeok = 1; @@ -188,7 +188,7 @@ index c09965d..135a5d0 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -249,6 +251,7 @@ u16 psxMemRead16(u32 mem) { +@@ -247,6 +249,7 @@ u16 psxMemRead16(u32 mem) { char *p; u32 t; @@ -196,7 +196,7 @@ index c09965d..135a5d0 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -274,6 +277,7 @@ u32 psxMemRead32(u32 mem) { +@@ -272,6 +275,7 @@ u32 psxMemRead32(u32 mem) { char *p; u32 t; @@ -204,7 +204,7 @@ index c09965d..135a5d0 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -299,6 +303,7 @@ void psxMemWrite8(u32 mem, u8 value) { +@@ -297,6 +301,7 @@ void psxMemWrite8(u32 mem, u8 value) { char *p; u32 t; @@ -212,7 +212,7 @@ index c09965d..135a5d0 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -326,6 +331,7 @@ void psxMemWrite16(u32 mem, u16 value) { +@@ -324,6 +329,7 @@ void psxMemWrite16(u32 mem, u16 value) { char *p; u32 t; @@ -220,7 +220,7 @@ index c09965d..135a5d0 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -353,6 +359,7 @@ void psxMemWrite32(u32 mem, u32 value) { +@@ -351,6 +357,7 @@ void psxMemWrite32(u32 mem, u32 value) { char *p; u32 t; diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 52d275937..04aeec27b 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -136,10 +136,8 @@ int psxMemInit() { memset(psxMemWLUT, 0, 0x10000 * sizeof(void *)); psxM = psxMap(0x80000000, 0x00210000, 1, MAP_TAG_RAM); -#ifndef RAM_FIXED if (psxM == NULL) psxM = psxMap(0x77000000, 0x00210000, 0, MAP_TAG_RAM); -#endif if (psxM == NULL) { SysMessage(_("mapping main RAM failed")); return -1; From b7ec323c2e42a9ff8df844e5a95665733abb4bc1 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 29 Nov 2021 00:03:20 +0200 Subject: [PATCH 091/597] drc: prefer callee-saved regs on alloc reduces amount of saved/restored regs (random game): arm32: ~45% arm64: ~80% --- libpcsxcore/new_dynarec/assem_arm.c | 6 ----- libpcsxcore/new_dynarec/assem_arm.h | 21 ++++++++++----- libpcsxcore/new_dynarec/assem_arm64.c | 3 --- libpcsxcore/new_dynarec/assem_arm64.h | 12 ++++++--- libpcsxcore/new_dynarec/new_dynarec.c | 39 ++++++++++++++++++++------- 5 files changed, 52 insertions(+), 29 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 87490c617..23d3b7f7b 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -27,12 +27,6 @@ #include "pcnt.h" #include "arm_features.h" -#ifndef __MACH__ -#define CALLER_SAVE_REGS 0x100f -#else -#define CALLER_SAVE_REGS 0x120f -#endif - #define unused __attribute__((unused)) #ifdef DRC_DBG diff --git a/libpcsxcore/new_dynarec/assem_arm.h b/libpcsxcore/new_dynarec/assem_arm.h index 9b3a1e10b..75273aa88 100644 --- a/libpcsxcore/new_dynarec/assem_arm.h +++ b/libpcsxcore/new_dynarec/assem_arm.h @@ -1,8 +1,3 @@ -#define HOST_REGS 13 -#define HOST_CCREG 10 -#define HOST_BTREG 8 -#define EXCLUDE_REG 11 - #define HOST_IMM8 1 #define HAVE_CMOV_IMM 1 #define HAVE_CONDITIONAL_CALL 1 @@ -19,12 +14,24 @@ r14 = lr (link register) r15 = pc (program counter) */ +#define HOST_REGS 13 +#define HOST_CCREG 10 +#define HOST_BTREG 8 +#define EXCLUDE_REG 11 + +// Note: FP is set to &dynarec_local when executing generated code. +// Thus the local variables are actually global and not on the stack. #define FP 11 #define LR 14 #define HOST_TEMPREG 14 -// Note: FP is set to &dynarec_local when executing generated code. -// Thus the local variables are actually global and not on the stack. +#ifndef __MACH__ +#define CALLER_SAVE_REGS 0x100f +#else +#define CALLER_SAVE_REGS 0x120f +#endif +#define PREFERRED_REG_FIRST 4 +#define PREFERRED_REG_LAST 9 extern char *invc_ptr; diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 431805d4d..1c52c3e6f 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -23,8 +23,6 @@ #include "pcnt.h" #include "arm_features.h" -#define CALLER_SAVE_REGS 0x0007ffff - #define unused __attribute__((unused)) void do_memhandler_pre(); @@ -1599,7 +1597,6 @@ static void do_writestub(int n) emit_jmp(stubs[n].retaddr); // return address (invcode check) set_jump_target(handler_jump, out); - // TODO FIXME: regalloc should prefer callee-saved regs if(!regs_saved) save_regs(reglist); void *handler=NULL; diff --git a/libpcsxcore/new_dynarec/assem_arm64.h b/libpcsxcore/new_dynarec/assem_arm64.h index 1aeee0b93..c5fcadf39 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.h +++ b/libpcsxcore/new_dynarec/assem_arm64.h @@ -1,13 +1,13 @@ -#define HOST_REGS 29 -#define HOST_BTREG 27 -#define EXCLUDE_REG -1 - #define HOST_IMM8 1 /* calling convention: r0 -r17: caller-save r19-r29: callee-save */ +#define HOST_REGS 29 +#define HOST_BTREG 27 +#define EXCLUDE_REG -1 + #define SP 31 #define WZR SP #define XZR SP @@ -23,6 +23,10 @@ #define HOST_CCREG 28 #define rCC w28 +#define CALLER_SAVE_REGS 0x0007ffff +#define PREFERRED_REG_FIRST 19 +#define PREFERRED_REG_LAST 27 + // stack space #define SSP_CALLEE_REGS (8*12) #define SSP_CALLER_REGS (8*20) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 921a2ed18..50410665e 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -1364,9 +1364,11 @@ void clean_blocks(u_int page) static void alloc_reg(struct regstat *cur,int i,signed char reg) { int r,hr; - int preferred_reg = (reg&7); - if(reg==CCREG) preferred_reg=HOST_CCREG; - if(reg==PTEMP||reg==FTEMP) preferred_reg=12; + int preferred_reg = PREFERRED_REG_FIRST + + reg % (PREFERRED_REG_LAST - PREFERRED_REG_FIRST + 1); + if (reg == CCREG) preferred_reg = HOST_CCREG; + if (reg == PTEMP || reg == FTEMP) preferred_reg = 12; + assert(PREFERRED_REG_FIRST != EXCLUDE_REG && EXCLUDE_REG != HOST_REGS); // Don't allocate unused registers if((cur->u>>reg)&1) return; @@ -1410,28 +1412,47 @@ static void alloc_reg(struct regstat *cur,int i,signed char reg) if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;} } } + // Try to allocate any available register, but prefer // registers that have not been used recently. - if(i>0) { - for(hr=0;hrregmap[hr]==-1) { - if(regs[i-1].regmap[hr]!=dops[i-1].rs1&®s[i-1].regmap[hr]!=dops[i-1].rs2&®s[i-1].regmap[hr]!=dops[i-1].rt1&®s[i-1].regmap[hr]!=dops[i-1].rt2) { + if (i > 0) { + for (hr = PREFERRED_REG_FIRST; ; ) { + if (cur->regmap[hr] < 0) { + int oldreg = regs[i-1].regmap[hr]; + if (oldreg < 0 || (oldreg != dops[i-1].rs1 && oldreg != dops[i-1].rs2 + && oldreg != dops[i-1].rt1 && oldreg != dops[i-1].rt2)) + { cur->regmap[hr]=reg; cur->dirty&=~(1<isconst&=~(1<regmap[hr]==-1) { + for (hr = PREFERRED_REG_FIRST; ; ) { + if (cur->regmap[hr] < 0) { cur->regmap[hr]=reg; cur->dirty&=~(1<isconst&=~(1< Date: Sun, 28 Nov 2021 17:12:04 +0200 Subject: [PATCH 092/597] drc: rework cycle counting The way it was done before caused different behaviour on different platforms because the dynarec can invert branches depending on register pressure and maybe other things. Because of that cycle counts would change slightly but sufficiently to break/fix timing sensitive games. Now it should be more consistent, maybe. --- libpcsxcore/new_dynarec/assem_arm.c | 18 +- libpcsxcore/new_dynarec/assem_arm64.c | 16 +- libpcsxcore/new_dynarec/emu_if.c | 92 ++-- libpcsxcore/new_dynarec/new_dynarec.c | 503 ++++++++---------- libpcsxcore/new_dynarec/patches/trace_drc_chk | 261 +++------ libpcsxcore/new_dynarec/patches/trace_intr | 133 ++++- 6 files changed, 502 insertions(+), 521 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 23d3b7f7b..c5c2c66e3 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -1788,7 +1788,7 @@ static void do_readstub(int n) int cc=get_reg(i_regmap,CCREG); if(cc<0) emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2); + emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); emit_far_call(handler); if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { mov_loadtype_adj(type,0,rt); @@ -1810,7 +1810,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, uintptr_t host_addr = 0; void *handler; int cc=get_reg(regmap,CCREG); - if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt)) + if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt)) return; handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr); if (handler == NULL) { @@ -1850,11 +1850,11 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, emit_loadreg(CCREG,2); if(is_dynamic) { emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2); + emit_addimm(cc<0?2:cc,adj,2); } else { emit_readword(&last_count,3); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2); + emit_addimm(cc<0?2:cc,adj,2); emit_add(2,3,2); emit_writeword(2,&Count); } @@ -1943,10 +1943,10 @@ static void do_writestub(int n) int cc=get_reg(i_regmap,CCREG); if(cc<0) emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2); + emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); // returns new cycle_count emit_far_call(handler); - emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc); + emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); if(restore_jump) @@ -1982,11 +1982,11 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, int cc=get_reg(regmap,CCREG); if(cc<0) emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2); + emit_addimm(cc<0?2:cc,adj,2); emit_movimm((u_int)handler,3); // returns new cycle_count emit_far_call(jump_handler_write_h); - emit_addimm(0,-CLOCK_ADJUST(adj),cc<0?2:cc); + emit_addimm(0,-adj,cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); restore_regs(reglist); @@ -2224,7 +2224,7 @@ static void c2op_mfc2_29_assemble(signed char tl, signed char temp) host_tempreg_release(); } -static void multdiv_assemble_arm(int i,struct regstat *i_regs) +static void multdiv_assemble_arm(int i, const struct regstat *i_regs) { // case 0x18: MULT // case 0x19: MULTU diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 1c52c3e6f..0b4922113 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1459,7 +1459,7 @@ static void do_readstub(int n) int cc=get_reg(i_regmap,CCREG); if(cc<0) emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2); + emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); emit_far_call(handler); // (no cycle reload after read) if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { @@ -1482,7 +1482,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, uintptr_t host_addr = 0; void *handler; int cc=get_reg(regmap,CCREG); - //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt)) + //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt)) // return; handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr); if (handler == NULL) { @@ -1520,7 +1520,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, emit_mov(rs,0); if(cc<0) emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2); + emit_addimm(cc<0?2:cc,adj,2); if(is_dynamic) { uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1; emit_adrp((void *)l1, 1); @@ -1615,10 +1615,10 @@ static void do_writestub(int n) int cc=get_reg(i_regmap,CCREG); if(cc<0) emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2); + emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); // returns new cycle_count emit_far_call(handler); - emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc); + emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); if(restore_jump) @@ -1656,12 +1656,12 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, cc = cc_use = get_reg(regmap, CCREG); if (cc < 0) emit_loadreg(CCREG, (cc_use = 2)); - emit_addimm(cc_use, CLOCK_ADJUST(adj), 2); + emit_addimm(cc_use, adj, 2); emit_far_call(do_memhandler_pre); emit_far_call(handler); emit_far_call(do_memhandler_post); - emit_addimm(0, -CLOCK_ADJUST(adj), cc_use); + emit_addimm(0, -adj, cc_use); if (cc < 0) emit_storereg(CCREG, cc_use); restore_regs(reglist); @@ -1889,7 +1889,7 @@ static void c2op_mfc2_29_assemble(signed char tl, signed char temp) host_tempreg_release(); } -static void multdiv_assemble_arm64(int i,struct regstat *i_regs) +static void multdiv_assemble_arm64(int i, const struct regstat *i_regs) { // case 0x18: MULT // case 0x19: MULTU diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 8c9650460..90c466059 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -473,7 +473,9 @@ void new_dynarec_load_blocks(const void *save, int size) {} #include static FILE *f; -extern u32 last_io_addr; +u32 irq_test_cycle; +u32 handler_cycle; +u32 last_io_addr; static void dump_mem(const char *fname, void *mem, size_t size) { @@ -503,8 +505,6 @@ static u32 memcheck_read(u32 a) void do_insn_trace(void) { static psxRegisters oldregs; - static u32 old_io_addr = (u32)-1; - static u32 old_io_data = 0xbad0c0de; static u32 event_cycles_o[PSXINT_COUNT]; u32 *allregs_p = (void *)&psxRegs; u32 *allregs_o = (void *)&oldregs; @@ -528,27 +528,27 @@ void do_insn_trace(void) // log event changes for (i = 0; i < PSXINT_COUNT; i++) { if (event_cycles[i] != event_cycles_o[i]) { - byte = 0xfc; + byte = 0xf8; fwrite(&byte, 1, 1, f); fwrite(&i, 1, 1, f); fwrite(&event_cycles[i], 1, 4, f); event_cycles_o[i] = event_cycles[i]; } } - // log last io - if (old_io_addr != last_io_addr) { - byte = 0xfd; - fwrite(&byte, 1, 1, f); - fwrite(&last_io_addr, 1, 4, f); - old_io_addr = last_io_addr; + #define SAVE_IF_CHANGED(code_, name_) { \ + static u32 old_##name_ = 0xbad0c0de; \ + if (old_##name_ != name_) { \ + byte = code_; \ + fwrite(&byte, 1, 1, f); \ + fwrite(&name_, 1, 4, f); \ + old_##name_ = name_; \ + } \ } + SAVE_IF_CHANGED(0xfb, irq_test_cycle); + SAVE_IF_CHANGED(0xfc, handler_cycle); + SAVE_IF_CHANGED(0xfd, last_io_addr); io_data = memcheck_read(last_io_addr); - if (old_io_data != io_data) { - byte = 0xfe; - fwrite(&byte, 1, 1, f); - fwrite(&io_data, 1, 4, f); - old_io_data = io_data; - } + SAVE_IF_CHANGED(0xfe, io_data); byte = 0xff; fwrite(&byte, 1, 1, f); @@ -610,12 +610,15 @@ void breakme() {} void do_insn_cmp(void) { + extern int last_count; static psxRegisters rregs; static u32 mem_addr, mem_val; + static u32 irq_test_cycle_intr; + static u32 handler_cycle_intr; u32 *allregs_p = (void *)&psxRegs; u32 *allregs_e = (void *)&rregs; static u32 ppc, failcount; - int i, ret, bad = 0, which_event = -1; + int i, ret, bad = 0, fatal = 0, which_event = -1; u32 ev_cycles = 0; u8 code; @@ -630,11 +633,17 @@ void do_insn_cmp(void) if (code == 0xff) break; switch (code) { - case 0xfc: + case 0xf8: which_event = 0; fread(&which_event, 1, 1, f); fread(&ev_cycles, 1, 4, f); continue; + case 0xfb: + fread(&irq_test_cycle_intr, 1, 4, f); + continue; + case 0xfc: + fread(&handler_cycle_intr, 1, 4, f); + continue; case 0xfd: fread(&mem_addr, 1, 4, f); continue; @@ -642,23 +651,43 @@ void do_insn_cmp(void) fread(&mem_val, 1, 4, f); continue; } + assert(code < offsetof(psxRegisters, intCycle) / 4); fread(&allregs_e[code], 1, 4, f); } if (ret <= 0) { printf("EOF?\n"); - goto end; + exit(1); } psxRegs.code = rregs.code; // don't care - psxRegs.cycle = rregs.cycle; + psxRegs.cycle += last_count; + //psxRegs.cycle = rregs.cycle; psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count //if (psxRegs.cycle == 166172) breakme(); - if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 && - mem_val == memcheck_read(mem_addr) - ) { + if (which_event >= 0 && event_cycles[which_event] != ev_cycles) { + printf("bad ev_cycles #%d: %08x %08x\n", which_event, event_cycles[which_event], ev_cycles); + fatal = 1; + } + + if (irq_test_cycle > irq_test_cycle_intr) { + printf("bad irq_test_cycle: %u %u\n", irq_test_cycle, irq_test_cycle_intr); + fatal = 1; + } + + if (handler_cycle != handler_cycle_intr) { + printf("bad handler_cycle: %u %u\n", handler_cycle, handler_cycle_intr); + fatal = 1; + } + + if (mem_val != memcheck_read(mem_addr)) { + printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val); + fatal = 1; + } + + if (!fatal && !memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle))) { failcount = 0; goto ok; } @@ -668,21 +697,11 @@ void do_insn_cmp(void) miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle); bad++; if (i > 32+2) - goto end; + fatal = 1; } } - if (mem_val != memcheck_read(mem_addr)) { - printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val); - goto end; - } - - if (which_event >= 0 && event_cycles[which_event] != ev_cycles) { - printf("bad ev_cycles #%d: %08x %08x\n", which_event, event_cycles[which_event], ev_cycles); - goto end; - } - - if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) { + if (!fatal && psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) { static int last_mcycle; if (last_mcycle != psxRegs.cycle >> 20) { printf("%u\n", psxRegs.cycle); @@ -692,7 +711,6 @@ void do_insn_cmp(void) goto ok; } -end: for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask) printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n", regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val, @@ -706,7 +724,7 @@ void do_insn_cmp(void) dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000); exit(1); ok: - psxRegs.cycle = rregs.cycle + 2; // sync timing + //psxRegs.cycle = rregs.cycle + 2; // sync timing ppc = psxRegs.pc; } diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 50410665e..71fabb8bd 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -116,7 +116,7 @@ enum stub_type { struct regstat { - signed char regmap_entry[HOST_REGS]; + signed char regmap_entry[HOST_REGS]; // pre-insn + loop preloaded regs? signed char regmap[HOST_REGS]; uint64_t wasdirty; uint64_t dirty; @@ -202,7 +202,8 @@ static struct decoded_insn static u_int ba[MAXBLOCK]; static uint64_t unneeded_reg[MAXBLOCK]; static uint64_t branch_unneeded_reg[MAXBLOCK]; - static signed char regmap_pre[MAXBLOCK][HOST_REGS]; // pre-instruction i? + // pre-instruction [i], excluding loop-preload regs? + static signed char regmap_pre[MAXBLOCK][HOST_REGS]; // contains 'real' consts at [i] insn, but may differ from what's actually // loaded in host reg as 'final' value is always loaded, see get_final_value() static uint32_t current_constmap[HOST_REGS]; @@ -328,13 +329,13 @@ void call_gteStall(); void new_dyna_leave(); // Needed by assembler -static void wb_register(signed char r,signed char regmap[],uint64_t dirty); -static void wb_dirtys(signed char i_regmap[],uint64_t i_dirty); -static void wb_needed_dirtys(signed char i_regmap[],uint64_t i_dirty,int addr); -static void load_all_regs(signed char i_regmap[]); -static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]); +static void wb_register(signed char r, const signed char regmap[], uint64_t dirty); +static void wb_dirtys(const signed char i_regmap[], uint64_t i_dirty); +static void wb_needed_dirtys(const signed char i_regmap[], uint64_t i_dirty, int addr); +static void load_all_regs(const signed char i_regmap[]); +static void load_needed_regs(const signed char i_regmap[], const signed char next_regmap[]); static void load_regs_entry(int t); -static void load_all_consts(signed char regmap[],u_int dirty,int i); +static void load_all_consts(const signed char regmap[], u_int dirty, int i); static u_int get_host_reglist(const signed char *regmap); static int verify_dirty(const u_int *ptr); @@ -2103,7 +2104,7 @@ static void add_stub_r(enum stub_type type, void *addr, void *retaddr, } // Write out a single register -static void wb_register(signed char r,signed char regmap[],uint64_t dirty) +static void wb_register(signed char r, const signed char regmap[], uint64_t dirty) { int hr; for(hr=0;hr=0x20&&dops[i].opcode2<=0x23) { // ADD/ADDU/SUB/SUBU if(dops[i].rt1) { @@ -2298,7 +2299,7 @@ static void alu_assemble(int i,struct regstat *i_regs) } } -void imm16_assemble(int i,struct regstat *i_regs) +static void imm16_assemble(int i, const struct regstat *i_regs) { if (dops[i].opcode==0x0f) { // LUI if(dops[i].rt1) { @@ -2453,7 +2454,7 @@ void imm16_assemble(int i,struct regstat *i_regs) } } -void shiftimm_assemble(int i,struct regstat *i_regs) +static void shiftimm_assemble(int i, const struct regstat *i_regs) { if(dops[i].opcode2<=0x3) // SLL/SRL/SRA { @@ -2511,7 +2512,7 @@ void shiftimm_assemble(int i,struct regstat *i_regs) } #ifndef shift_assemble -static void shift_assemble(int i,struct regstat *i_regs) +static void shift_assemble(int i, const struct regstat *i_regs) { signed char s,t,shift; if (dops[i].rt1 == 0) @@ -2756,7 +2757,7 @@ static void do_store_byte(int a, int rt, int offset_reg) emit_writebyte_indexed(rt, 0, a); } -static void load_assemble(int i, const struct regstat *i_regs) +static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) { int s,tl,addr; int offset; @@ -2823,10 +2824,10 @@ static void load_assemble(int i, const struct regstat *i_regs) emit_movsbl_indexed(0, a, tl); } if(jaddr) - add_stub_r(LOADB_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); + add_stub_r(LOADB_STUB,jaddr,out,i,addr,i_regs,ccadj_,reglist); } else - inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); + inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj_,reglist); break; case 0x21: // LH if(!c||memtarget) { @@ -2841,10 +2842,10 @@ static void load_assemble(int i, const struct regstat *i_regs) emit_movswl_indexed(0, a, tl); } if(jaddr) - add_stub_r(LOADH_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); + add_stub_r(LOADH_STUB,jaddr,out,i,addr,i_regs,ccadj_,reglist); } else - inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); + inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj_,reglist); break; case 0x23: // LW if(!c||memtarget) { @@ -2855,10 +2856,10 @@ static void load_assemble(int i, const struct regstat *i_regs) do_load_word(a, tl, offset_reg); } if(jaddr) - add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); + add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj_,reglist); } else - inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); + inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj_,reglist); break; case 0x24: // LBU if(!c||memtarget) { @@ -2874,10 +2875,10 @@ static void load_assemble(int i, const struct regstat *i_regs) emit_movzbl_indexed(0, a, tl); } if(jaddr) - add_stub_r(LOADBU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); + add_stub_r(LOADBU_STUB,jaddr,out,i,addr,i_regs,ccadj_,reglist); } else - inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); + inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj_,reglist); break; case 0x25: // LHU if(!c||memtarget) { @@ -2892,10 +2893,10 @@ static void load_assemble(int i, const struct regstat *i_regs) emit_movzwl_indexed(0, a, tl); } if(jaddr) - add_stub_r(LOADHU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist); + add_stub_r(LOADHU_STUB,jaddr,out,i,addr,i_regs,ccadj_,reglist); } else - inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj[i],reglist); + inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj_,reglist); break; case 0x27: // LWU case 0x37: // LD @@ -2908,7 +2909,7 @@ static void load_assemble(int i, const struct regstat *i_regs) } #ifndef loadlr_assemble -static void loadlr_assemble(int i, const struct regstat *i_regs) +static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) { int s,tl,temp,temp2,addr; int offset; @@ -2961,10 +2962,10 @@ static void loadlr_assemble(int i, const struct regstat *i_regs) do_load_word(a, temp2, offset_reg); if (fastio_reg_override == HOST_TEMPREG || offset_reg == HOST_TEMPREG) host_tempreg_release(); - if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist); + if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj_,reglist); } else - inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist); + inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj_,reglist); if(dops[i].rt1) { assert(tl>=0); emit_andimm(temp,24,temp); @@ -2990,7 +2991,7 @@ static void loadlr_assemble(int i, const struct regstat *i_regs) } #endif -static void store_assemble(int i, const struct regstat *i_regs) +static void store_assemble(int i, const struct regstat *i_regs, int ccadj_) { int s,tl; int addr,temp; @@ -3065,7 +3066,7 @@ static void store_assemble(int i, const struct regstat *i_regs) if(jaddr) { // PCSX store handlers don't check invcode again reglist|=1<waswritten&(1<regmap,dops[i].rs2,ccadj[i],reglist); + inline_writestub(type,i,addr_val,i_regs->regmap,dops[i].rs2,ccadj_,reglist); } // basic current block modification detection.. // not looking back as that should be in mips cache already @@ -3115,7 +3116,7 @@ static void store_assemble(int i, const struct regstat *i_regs) } } -static void storelr_assemble(int i, const struct regstat *i_regs) +static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) { int s,tl; int temp; @@ -3230,7 +3231,7 @@ static void storelr_assemble(int i, const struct regstat *i_regs) if (offset_reg == HOST_TEMPREG) host_tempreg_release(); if(!c||!memtarget) - add_stub_r(STORELR_STUB,jaddr,out,i,temp,i_regs,ccadj[i],reglist); + add_stub_r(STORELR_STUB,jaddr,out,i,temp,i_regs,ccadj_,reglist); if(!(i_regs->waswritten&(1<regmap,INVCP); @@ -3249,7 +3250,7 @@ static void storelr_assemble(int i, const struct regstat *i_regs) } } -static void cop0_assemble(int i,struct regstat *i_regs) +static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) { if(dops[i].opcode2==0) // MFC0 { @@ -3270,7 +3271,7 @@ static void cop0_assemble(int i,struct regstat *i_regs) emit_readword(&last_count,HOST_TEMPREG); emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); + emit_addimm(HOST_CCREG,ccadj_,HOST_CCREG); emit_writeword(HOST_CCREG,&Count); } // What a mess. The status register (12) can enable interrupts, @@ -3305,7 +3306,7 @@ static void cop0_assemble(int i,struct regstat *i_regs) if(copr==9||copr==11||copr==12||copr==13) { emit_readword(&Count,HOST_CCREG); emit_readword(&next_interupt,HOST_TEMPREG); - emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG); + emit_addimm(HOST_CCREG,-ccadj_,HOST_CCREG); emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); emit_writeword(HOST_TEMPREG,&last_count); emit_storereg(CCREG,HOST_CCREG); @@ -3338,7 +3339,7 @@ static void cop0_assemble(int i,struct regstat *i_regs) } } -static void cop1_unusable(int i,struct regstat *i_regs) +static void cop1_unusable(int i, const struct regstat *i_regs) { // XXX: should just just do the exception instead //if(!cop1_usable) @@ -3349,12 +3350,12 @@ static void cop1_unusable(int i,struct regstat *i_regs) } } -static void cop1_assemble(int i,struct regstat *i_regs) +static void cop1_assemble(int i, const struct regstat *i_regs) { cop1_unusable(i, i_regs); } -static void c1ls_assemble(int i,struct regstat *i_regs) +static void c1ls_assemble(int i, const struct regstat *i_regs) { cop1_unusable(i, i_regs); } @@ -3377,7 +3378,7 @@ static void do_cop1stub(int n) wb_dirtys(i_regs->regmap_entry,i_regs->wasdirty); if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); emit_movimm(start+(i-ds)*4,EAX); // Get PC - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... + emit_addimm(HOST_CCREG,ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... emit_far_jump(ds?fp_exception_ds:fp_exception); } @@ -3413,7 +3414,7 @@ static void emit_log_gte_stall(int i, int stall, u_int reglist) emit_movimm(stall, 0); else emit_mov(HOST_TEMPREG, 0); - emit_addimm(HOST_CCREG, CLOCK_ADJUST(ccadj[i]), 1); + emit_addimm(HOST_CCREG, ccadj[i], 1); emit_far_call(log_gte_stall); restore_regs(reglist); } @@ -3436,10 +3437,12 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u //if (dops[j].is_ds) break; if (cop2_is_stalling_op(j, &other_gte_op_cycles) || dops[j].bt) break; + if (j > 0 && ccadj[j - 1] > ccadj[j]) + break; } j = max(j, 0); } - cycles_passed = CLOCK_ADJUST(ccadj[i] - ccadj[j]); + cycles_passed = ccadj[i] - ccadj[j]; if (other_gte_op_cycles >= 0) stall = other_gte_op_cycles - cycles_passed; else if (cycles_passed >= 44) @@ -3450,13 +3453,13 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u #if 0 // too slow save_regs(reglist); emit_movimm(gte_cycletab[op], 0); - emit_addimm(HOST_CCREG, CLOCK_ADJUST(ccadj[i]), 1); + emit_addimm(HOST_CCREG, ccadj[i], 1); emit_far_call(call_gteStall); restore_regs(reglist); #else host_tempreg_acquire(); emit_readword(&psxRegs.gteBusyCycle, rtmp); - emit_addimm(rtmp, -CLOCK_ADJUST(ccadj[i]), rtmp); + emit_addimm(rtmp, -ccadj[i], rtmp); emit_sub(rtmp, HOST_CCREG, HOST_TEMPREG); emit_cmpimm(HOST_TEMPREG, 44); emit_cmovb_reg(rtmp, HOST_CCREG); @@ -3486,7 +3489,7 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u if (other_gte_op_cycles >= 0) // will handle stall when assembling that op return; - cycles_passed = CLOCK_ADJUST(ccadj[min(j, slen -1)] - ccadj[i]); + cycles_passed = ccadj[min(j, slen -1)] - ccadj[i]; if (cycles_passed >= 44) return; assem_debug("; save gteBusyCycle\n"); @@ -3494,11 +3497,11 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u #if 0 emit_readword(&last_count, HOST_TEMPREG); emit_add(HOST_TEMPREG, HOST_CCREG, HOST_TEMPREG); - emit_addimm(HOST_TEMPREG, CLOCK_ADJUST(ccadj[i]), HOST_TEMPREG); + emit_addimm(HOST_TEMPREG, ccadj[i], HOST_TEMPREG); emit_addimm(HOST_TEMPREG, gte_cycletab[op]), HOST_TEMPREG); emit_writeword(HOST_TEMPREG, &psxRegs.gteBusyCycle); #else - emit_addimm(HOST_CCREG, CLOCK_ADJUST(ccadj[i]) + gte_cycletab[op], HOST_TEMPREG); + emit_addimm(HOST_CCREG, ccadj[i] + gte_cycletab[op], HOST_TEMPREG); emit_writeword(HOST_TEMPREG, &psxRegs.gteBusyCycle); #endif host_tempreg_release(); @@ -3520,7 +3523,7 @@ static int check_multdiv(int i, int *cycles) return 1; } -static void multdiv_prepare_stall(int i, const struct regstat *i_regs) +static void multdiv_prepare_stall(int i, const struct regstat *i_regs, int ccadj_) { int j, found = 0, c = 0; if (HACK_ENABLED(NDHACK_NO_STALLS)) @@ -3548,7 +3551,7 @@ static void multdiv_prepare_stall(int i, const struct regstat *i_regs) assert(c > 0); assem_debug("; muldiv prepare stall %d\n", c); host_tempreg_acquire(); - emit_addimm(HOST_CCREG, CLOCK_ADJUST(ccadj[i]) + c, HOST_TEMPREG); + emit_addimm(HOST_CCREG, ccadj_ + c, HOST_TEMPREG); emit_writeword(HOST_TEMPREG, &psxRegs.muldivBusyCycle); host_tempreg_release(); } @@ -3570,16 +3573,18 @@ static void multdiv_do_stall(int i, const struct regstat *i_regs) if (!dops[i].bt) { for (j = i - 1; j >= 0; j--) { if (dops[j].is_ds) break; - if (check_multdiv(j, &known_cycles) || dops[j].bt) + if (check_multdiv(j, &known_cycles)) break; if (is_mflohi(j)) // already handled by this op return; + if (dops[j].bt || (j > 0 && ccadj[j - 1] > ccadj[j])) + break; } j = max(j, 0); } if (known_cycles > 0) { - known_cycles -= CLOCK_ADJUST(ccadj[i] - ccadj[j]); + known_cycles -= ccadj[i] - ccadj[j]; assem_debug("; muldiv stall resolved %d\n", known_cycles); if (known_cycles > 0) emit_addimm(HOST_CCREG, known_cycles, HOST_CCREG); @@ -3588,7 +3593,7 @@ static void multdiv_do_stall(int i, const struct regstat *i_regs) assem_debug("; muldiv stall unresolved\n"); host_tempreg_acquire(); emit_readword(&psxRegs.muldivBusyCycle, rtmp); - emit_addimm(rtmp, -CLOCK_ADJUST(ccadj[i]), rtmp); + emit_addimm(rtmp, -ccadj[i], rtmp); emit_sub(rtmp, HOST_CCREG, HOST_TEMPREG); emit_cmpimm(HOST_TEMPREG, 37); emit_cmovb_reg(rtmp, HOST_CCREG); @@ -3679,7 +3684,7 @@ static void cop2_put_dreg(u_int copr,signed char sl,signed char temp) } } -static void c2ls_assemble(int i, const struct regstat *i_regs) +static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) { int s,tl; int ar; @@ -3760,7 +3765,7 @@ static void c2ls_assemble(int i, const struct regstat *i_regs) if (fastio_reg_override == HOST_TEMPREG || offset_reg == HOST_TEMPREG) host_tempreg_release(); if(jaddr2) - add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj[i],reglist); + add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj_,reglist); if(dops[i].opcode==0x3a) // SWC2 if(!(i_regs->waswritten&(1<regmap,CCREG); assert(ccreg==HOST_CCREG); @@ -3910,33 +3915,33 @@ static void call_c_cpu_handler(int i, const struct regstat *i_regs, u_int pc, vo emit_movimm(pc,3); // Get PC emit_readword(&last_count,2); emit_writeword(3,&psxRegs.pc); - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX + emit_addimm(HOST_CCREG,ccadj_,HOST_CCREG); emit_add(2,HOST_CCREG,2); emit_writeword(2,&psxRegs.cycle); emit_far_call(func); emit_far_jump(jump_to_new_pc); } -static void syscall_assemble(int i,struct regstat *i_regs) +static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_) { emit_movimm(0x20,0); // cause code emit_movimm(0,1); // not in delay slot - call_c_cpu_handler(i,i_regs,start+i*4,psxException); + call_c_cpu_handler(i, i_regs, ccadj_, start+i*4, psxException); } -static void hlecall_assemble(int i,struct regstat *i_regs) +static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_) { void *hlefunc = psxNULL; uint32_t hleCode = source[i] & 0x03ffffff; if (hleCode < ARRAY_SIZE(psxHLEt)) hlefunc = psxHLEt[hleCode]; - call_c_cpu_handler(i,i_regs,start+i*4+4,hlefunc); + call_c_cpu_handler(i, i_regs, ccadj_, start + i*4+4, hlefunc); } -static void intcall_assemble(int i,struct regstat *i_regs) +static void intcall_assemble(int i, const struct regstat *i_regs, int ccadj_) { - call_c_cpu_handler(i,i_regs,start+i*4,execI); + call_c_cpu_handler(i, i_regs, ccadj_, start + i*4, execI); } static void speculate_mov(int rs,int rt) @@ -4031,45 +4036,108 @@ static void speculate_register_values(int i) #endif } -static void ds_assemble(int i,struct regstat *i_regs) +static void ujump_assemble(int i, const struct regstat *i_regs); +static void rjump_assemble(int i, const struct regstat *i_regs); +static void cjump_assemble(int i, const struct regstat *i_regs); +static void sjump_assemble(int i, const struct regstat *i_regs); +static void pagespan_assemble(int i, const struct regstat *i_regs); + +static int assemble(int i, const struct regstat *i_regs, int ccadj_) { - speculate_register_values(i); - is_delayslot=1; - switch(dops[i].itype) { + int ds = 0; + switch (dops[i].itype) { case ALU: - alu_assemble(i,i_regs);break; + alu_assemble(i, i_regs); + break; case IMM16: - imm16_assemble(i,i_regs);break; + imm16_assemble(i, i_regs); + break; case SHIFT: - shift_assemble(i,i_regs);break; + shift_assemble(i, i_regs); + break; case SHIFTIMM: - shiftimm_assemble(i,i_regs);break; + shiftimm_assemble(i, i_regs); + break; case LOAD: - load_assemble(i,i_regs);break; + load_assemble(i, i_regs, ccadj_); + break; case LOADLR: - loadlr_assemble(i,i_regs);break; + loadlr_assemble(i, i_regs, ccadj_); + break; case STORE: - store_assemble(i,i_regs);break; + store_assemble(i, i_regs, ccadj_); + break; case STORELR: - storelr_assemble(i,i_regs);break; + storelr_assemble(i, i_regs, ccadj_); + break; case COP0: - cop0_assemble(i,i_regs);break; + cop0_assemble(i, i_regs, ccadj_); + break; case COP1: - cop1_assemble(i,i_regs);break; + cop1_assemble(i, i_regs); + break; case C1LS: - c1ls_assemble(i,i_regs);break; + c1ls_assemble(i, i_regs); + break; case COP2: - cop2_assemble(i,i_regs);break; + cop2_assemble(i, i_regs); + break; case C2LS: - c2ls_assemble(i,i_regs);break; + c2ls_assemble(i, i_regs, ccadj_); + break; case C2OP: - c2op_assemble(i,i_regs);break; + c2op_assemble(i, i_regs); + break; case MULTDIV: - multdiv_assemble(i,i_regs); - multdiv_prepare_stall(i,i_regs); + multdiv_assemble(i, i_regs); + multdiv_prepare_stall(i, i_regs, ccadj_); break; case MOV: - mov_assemble(i,i_regs);break; + mov_assemble(i, i_regs); + break; + case SYSCALL: + syscall_assemble(i, i_regs, ccadj_); + break; + case HLECALL: + hlecall_assemble(i, i_regs, ccadj_); + break; + case INTCALL: + intcall_assemble(i, i_regs, ccadj_); + break; + case UJUMP: + ujump_assemble(i, i_regs); + ds = 1; + break; + case RJUMP: + rjump_assemble(i, i_regs); + ds = 1; + break; + case CJUMP: + cjump_assemble(i, i_regs); + ds = 1; + break; + case SJUMP: + sjump_assemble(i, i_regs); + ds = 1; + break; + case SPAN: + pagespan_assemble(i, i_regs); + break; + case OTHER: + case NI: + // not handled, just skip + break; + default: + assert(0); + } + return ds; +} + +static void ds_assemble(int i, const struct regstat *i_regs) +{ + speculate_register_values(i); + is_delayslot = 1; + switch (dops[i].itype) { case SYSCALL: case HLECALL: case INTCALL: @@ -4079,8 +4147,11 @@ static void ds_assemble(int i,struct regstat *i_regs) case CJUMP: case SJUMP: SysPrintf("Jump in the delay slot. This is probably a bug.\n"); + break; + default: + assemble(i, i_regs, ccadj[i]); } - is_delayslot=0; + is_delayslot = 0; } // Is the branch target a valid internal jump? @@ -4184,7 +4255,7 @@ static void loop_preload(signed char pre[],signed char entry[]) // Generate address for load/store instruction // goes to AGEN for writes, FTEMP for LOADLR and cop1/2 loads -void address_generation(int i,struct regstat *i_regs,signed char entry[]) +void address_generation(int i, const struct regstat *i_regs, signed char entry[]) { if (dops[i].is_load || dops[i].is_store) { int ra=-1; @@ -4383,7 +4454,7 @@ static void load_consts(signed char pre[],signed char regmap[],int i) } } -void load_all_consts(signed char regmap[], u_int dirty, int i) +static void load_all_consts(const signed char regmap[], u_int dirty, int i) { int hr; // Load 32-bit regs @@ -4404,7 +4475,7 @@ void load_all_consts(signed char regmap[], u_int dirty, int i) } // Write out all dirty registers (except cycle count) -static void wb_dirtys(signed char i_regmap[],uint64_t i_dirty) +static void wb_dirtys(const signed char i_regmap[], uint64_t i_dirty) { int hr; for(hr=0;hr>2; @@ -4444,7 +4515,7 @@ void wb_needed_dirtys(signed char i_regmap[],uint64_t i_dirty,int addr) } // Load all registers (except cycle count) -void load_all_regs(signed char i_regmap[]) +static void load_all_regs(const signed char i_regmap[]) { int hr; for(hr=0;hr 0 && !dops[i].bt) { for (hr = 0; hr < HOST_REGS; hr++) { - int reg = regs[i-1].regmap[hr]; + int reg = regs[i].regmap_entry[hr]; // regs[i-1].regmap[hr]; if (hr == EXCLUDE_REG || reg < 0) continue; if (!((regs[i-1].isconst >> hr) & 1)) @@ -4665,6 +4736,11 @@ static void drc_dbg_emit_do_cmp(int i) } emit_movimm(start+i*4,0); emit_writeword(0,&pcaddr); + int cc = get_reg(regs[i].regmap_entry, CCREG); + if (cc < 0) + emit_loadreg(CCREG, cc = 0); + emit_addimm(cc, ccadj_, 0); + emit_writeword(0, &psxRegs.cycle); emit_far_call(do_insn_cmp); //emit_readword(&cycle,0); //emit_addimm(0,2,0); @@ -4674,18 +4750,19 @@ static void drc_dbg_emit_do_cmp(int i) assem_debug("\\\\do_insn_cmp\n"); } #else -#define drc_dbg_emit_do_cmp(x) +#define drc_dbg_emit_do_cmp(x,y) #endif // Used when a branch jumps into the delay slot of another branch static void ds_assemble_entry(int i) { - int t=(ba[i]-start)>>2; + int t = (ba[i] - start) >> 2; + int ccadj_ = -CLOCK_ADJUST(1); if (!instr_addr[t]) instr_addr[t] = out; assem_debug("Assemble delay slot at %x\n",ba[i]); assem_debug("<->\n"); - drc_dbg_emit_do_cmp(t); + drc_dbg_emit_do_cmp(t, ccadj_); if(regs[t].regmap_entry[HOST_CCREG]==CCREG&®s[t].regmap[HOST_CCREG]!=CCREG) wb_register(CCREG,regs[t].regmap_entry,regs[t].wasdirty); load_regs(regs[t].regmap_entry,regs[t].regmap,dops[t].rs1,dops[t].rs2); @@ -4695,41 +4772,7 @@ static void ds_assemble_entry(int i) if (dops[t].is_store) load_regs(regs[t].regmap_entry,regs[t].regmap,INVCP,INVCP); is_delayslot=0; - switch(dops[t].itype) { - case ALU: - alu_assemble(t,®s[t]);break; - case IMM16: - imm16_assemble(t,®s[t]);break; - case SHIFT: - shift_assemble(t,®s[t]);break; - case SHIFTIMM: - shiftimm_assemble(t,®s[t]);break; - case LOAD: - load_assemble(t,®s[t]);break; - case LOADLR: - loadlr_assemble(t,®s[t]);break; - case STORE: - store_assemble(t,®s[t]);break; - case STORELR: - storelr_assemble(t,®s[t]);break; - case COP0: - cop0_assemble(t,®s[t]);break; - case COP1: - cop1_assemble(t,®s[t]);break; - case C1LS: - c1ls_assemble(t,®s[t]);break; - case COP2: - cop2_assemble(t,®s[t]);break; - case C2LS: - c2ls_assemble(t,®s[t]);break; - case C2OP: - c2op_assemble(t,®s[t]);break; - case MULTDIV: - multdiv_assemble(t,®s[t]); - multdiv_prepare_stall(i,®s[t]); - break; - case MOV: - mov_assemble(t,®s[t]);break; + switch (dops[t].itype) { case SYSCALL: case HLECALL: case INTCALL: @@ -4739,6 +4782,9 @@ static void ds_assemble_entry(int i) case CJUMP: case SJUMP: SysPrintf("Jump in the delay slot. This is probably a bug.\n"); + break; + default: + assemble(t, ®s[t], ccadj_); } store_regs_bt(regs[t].regmap,regs[t].dirty,ba[i]+4); load_regs_bt(regs[t].regmap,regs[t].dirty,ba[i]+4); @@ -4768,9 +4814,10 @@ static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) emit_movimm_from(imm1,rt1,imm2,rt2); } -void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) +static void do_cc(int i, const signed char i_regmap[], int *adj, + int addr, int taken, int invert) { - int count; + int count, count_plus2; void *jaddr; void *idle=NULL; int t=0; @@ -4782,14 +4829,15 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) if(internal_branch(ba[i])) { t=(ba[i]-start)>>2; - if(dops[t].is_ds) *adj=-1; // Branch into delay slot adds an extra cycle + if(dops[t].is_ds) *adj=-CLOCK_ADJUST(1); // Branch into delay slot adds an extra cycle else *adj=ccadj[t]; } else { *adj=0; } - count=ccadj[i]; + count = ccadj[i]; + count_plus2 = count + CLOCK_ADJUST(2); if(taken==TAKEN && i==(ba[i]-start)>>2 && source[i+1]==0) { // Idle loop if(count&1) emit_addimm_and_set_flags(2*(count+2),HOST_CCREG); @@ -4800,26 +4848,26 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) emit_jmp(0); } else if(*adj==0||invert) { - int cycles=CLOCK_ADJUST(count+2); + int cycles = count_plus2; // faster loop HACK #if 0 if (t&&*adj) { int rel=t-i; if(-NO_CYCLE_PENALTY_THR>2].regmap_entry); @@ -5074,7 +5122,7 @@ static void ujump_assemble_write_ra(int i) } } -static void ujump_assemble(int i,struct regstat *i_regs) +static void ujump_assemble(int i, const struct regstat *i_regs) { int ra_done=0; if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); @@ -5108,7 +5156,7 @@ static void ujump_assemble(int i,struct regstat *i_regs) if(dops[i].rt1==31&&temp>=0) emit_prefetchreg(temp); #endif do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); - if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc, ccadj[i] + CLOCK_ADJUST(2) - adj, cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); if(internal_branch(ba[i])) assem_debug("branch: internal\n"); @@ -5144,7 +5192,7 @@ static void rjump_assemble_write_ra(int i) #endif } -static void rjump_assemble(int i,struct regstat *i_regs) +static void rjump_assemble(int i, const struct regstat *i_regs) { int temp; int rs,cc; @@ -5219,7 +5267,7 @@ static void rjump_assemble(int i,struct regstat *i_regs) //do_cc(i,branch_regs[i].regmap,&adj,-1,TAKEN); //if(adj) emit_addimm(cc,2*(ccadj[i]+2-adj),cc); // ??? - Shouldn't happen //assert(adj==0); - emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), HOST_CCREG); add_stub(CC_STUB,out,NULL,0,i,-1,TAKEN,rs); if(dops[i+1].itype==COP0&&(source[i+1]&0x3f)==0x10) // special case for RFE @@ -5241,9 +5289,9 @@ static void rjump_assemble(int i,struct regstat *i_regs) #endif } -static void cjump_assemble(int i,struct regstat *i_regs) +static void cjump_assemble(int i, const struct regstat *i_regs) { - signed char *i_regmap=i_regs->regmap; + const signed char *i_regmap = i_regs->regmap; int cc; int match; match=match_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); @@ -5309,7 +5357,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) if(unconditional) { do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); if(i!=(ba[i]-start)>>2 || source[i+1]!=0) { - if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc, ccadj[i] + CLOCK_ADJUST(2) - adj, cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5328,7 +5376,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) } } else if(nop) { - emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); + emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), cc); void *jaddr=out; emit_jns(0); add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5336,7 +5384,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) else { void *taken = NULL, *nottaken = NULL, *nottaken1 = NULL; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); - if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); + if(adj&&!invert) emit_addimm(cc, ccadj[i] + CLOCK_ADJUST(2) - adj, cc); //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); @@ -5391,7 +5439,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if (match && (!internal || !dops[(ba[i]-start)>>2].is_ds)) { if(adj) { - emit_addimm(cc,-CLOCK_ADJUST(adj),cc); + emit_addimm(cc,-adj,cc); add_to_linker(out,ba[i],internal); }else{ emit_addnop(13); @@ -5401,7 +5449,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) }else #endif { - if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); + if(adj) emit_addimm(cc,-adj,cc); store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); if(internal) @@ -5421,7 +5469,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) if(nottaken1) set_jump_target(nottaken1, out); if(adj) { - if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); + if(!invert) emit_addimm(cc,adj,cc); } } // (!unconditional) } // if(ooo) @@ -5484,7 +5532,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc, ccadj[i] + CLOCK_ADJUST(2) - adj, cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5515,7 +5563,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) if (cc == -1) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), HOST_CCREG); void *jaddr=out; emit_jns(0); add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5524,7 +5572,7 @@ static void cjump_assemble(int i,struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); + emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), cc); void *jaddr=out; emit_jns(0); add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5533,9 +5581,9 @@ static void cjump_assemble(int i,struct regstat *i_regs) } } -static void sjump_assemble(int i,struct regstat *i_regs) +static void sjump_assemble(int i, const struct regstat *i_regs) { - signed char *i_regmap=i_regs->regmap; + const signed char *i_regmap = i_regs->regmap; int cc; int match; match=match_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); @@ -5607,7 +5655,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) if(unconditional) { do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); if(i!=(ba[i]-start)>>2 || source[i+1]!=0) { - if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc, ccadj[i] + CLOCK_ADJUST(2) - adj, cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5626,7 +5674,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) } } else if(nevertaken) { - emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); + emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), cc); void *jaddr=out; emit_jns(0); add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5634,7 +5682,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) else { void *nottaken = NULL; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); - if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); + if(adj&&!invert) emit_addimm(cc, ccadj[i] + CLOCK_ADJUST(2) - adj, cc); { assert(s1l>=0); if((dops[i].opcode2&0xf)==0) // BLTZ/BLTZAL @@ -5665,7 +5713,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if (match && (!internal || !dops[(ba[i] - start) >> 2].is_ds)) { if(adj) { - emit_addimm(cc,-CLOCK_ADJUST(adj),cc); + emit_addimm(cc,-adj,cc); add_to_linker(out,ba[i],internal); }else{ emit_addnop(13); @@ -5675,7 +5723,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) }else #endif { - if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); + if(adj) emit_addimm(cc,-adj,cc); store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); if(internal) @@ -5694,7 +5742,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) } if(adj) { - if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); + if(!invert) emit_addimm(cc,adj,cc); } } // (!unconditional) } // if(ooo) @@ -5755,7 +5803,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc, ccadj[i] + CLOCK_ADJUST(2) - adj, cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5782,7 +5830,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) if (cc == -1) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), HOST_CCREG); void *jaddr=out; emit_jns(0); add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5791,7 +5839,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); + emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), cc); void *jaddr=out; emit_jns(0); add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5800,7 +5848,7 @@ static void sjump_assemble(int i,struct regstat *i_regs) } } -static void pagespan_assemble(int i,struct regstat *i_regs) +static void pagespan_assemble(int i, const struct regstat *i_regs) { int s1l=get_reg(i_regs->regmap,dops[i].rs1); int s2l=get_reg(i_regs->regmap,dops[i].rs2); @@ -5858,7 +5906,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) if((dops[i].opcode&0x2e)==4||dops[i].opcode==0x11) { // BEQ/BNE/BEQL/BNEL/BC1 load_regs(regs[i].regmap_entry,regs[i].regmap,CCREG,CCREG); } - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); + emit_addimm(HOST_CCREG, ccadj[i] + CLOCK_ADJUST(2), HOST_CCREG); if(dops[i].opcode==2) // J { unconditional=1; @@ -6027,41 +6075,7 @@ static void pagespan_ds() if (dops[0].is_store) load_regs(regs[0].regmap_entry,regs[0].regmap,INVCP,INVCP); is_delayslot=0; - switch(dops[0].itype) { - case ALU: - alu_assemble(0,®s[0]);break; - case IMM16: - imm16_assemble(0,®s[0]);break; - case SHIFT: - shift_assemble(0,®s[0]);break; - case SHIFTIMM: - shiftimm_assemble(0,®s[0]);break; - case LOAD: - load_assemble(0,®s[0]);break; - case LOADLR: - loadlr_assemble(0,®s[0]);break; - case STORE: - store_assemble(0,®s[0]);break; - case STORELR: - storelr_assemble(0,®s[0]);break; - case COP0: - cop0_assemble(0,®s[0]);break; - case COP1: - cop1_assemble(0,®s[0]);break; - case C1LS: - c1ls_assemble(0,®s[0]);break; - case COP2: - cop2_assemble(0,®s[0]);break; - case C2LS: - c2ls_assemble(0,®s[0]);break; - case C2OP: - c2op_assemble(0,®s[0]);break; - case MULTDIV: - multdiv_assemble(0,®s[0]); - multdiv_prepare_stall(0,®s[0]); - break; - case MOV: - mov_assemble(0,®s[0]);break; + switch (dops[0].itype) { case SYSCALL: case HLECALL: case INTCALL: @@ -6071,6 +6085,9 @@ static void pagespan_ds() case CJUMP: case SJUMP: SysPrintf("Jump in the delay slot. This is probably a bug.\n"); + break; + default: + assemble(0, ®s[0], 0); } int btaddr=get_reg(regs[0].regmap,BTREG); if(btaddr<0) { @@ -8226,7 +8243,7 @@ int new_recompile_block(u_int addr) } // Count cycles in between branches - ccadj[i]=cc; + ccadj[i] = CLOCK_ADJUST(cc); if (i > 0 && (dops[i-1].is_jump || dops[i].itype == SYSCALL || dops[i].itype == HLECALL)) { cc=0; @@ -9148,7 +9165,7 @@ int new_recompile_block(u_int addr) // branch target entry point instr_addr[i] = out; assem_debug("<->\n"); - drc_dbg_emit_do_cmp(i); + drc_dbg_emit_do_cmp(i, ccadj[i]); // load regs if(regs[i].regmap_entry[HOST_CCREG]==CCREG&®s[i].regmap[HOST_CCREG]!=CCREG) @@ -9185,59 +9202,9 @@ int new_recompile_block(u_int addr) load_regs(regs[i].regmap_entry,regs[i].regmap,ROREG,ROREG); if (dops[i].is_store) load_regs(regs[i].regmap_entry,regs[i].regmap,INVCP,INVCP); - // assemble - switch(dops[i].itype) { - case ALU: - alu_assemble(i,®s[i]);break; - case IMM16: - imm16_assemble(i,®s[i]);break; - case SHIFT: - shift_assemble(i,®s[i]);break; - case SHIFTIMM: - shiftimm_assemble(i,®s[i]);break; - case LOAD: - load_assemble(i,®s[i]);break; - case LOADLR: - loadlr_assemble(i,®s[i]);break; - case STORE: - store_assemble(i,®s[i]);break; - case STORELR: - storelr_assemble(i,®s[i]);break; - case COP0: - cop0_assemble(i,®s[i]);break; - case COP1: - cop1_assemble(i,®s[i]);break; - case C1LS: - c1ls_assemble(i,®s[i]);break; - case COP2: - cop2_assemble(i,®s[i]);break; - case C2LS: - c2ls_assemble(i,®s[i]);break; - case C2OP: - c2op_assemble(i,®s[i]);break; - case MULTDIV: - multdiv_assemble(i,®s[i]); - multdiv_prepare_stall(i,®s[i]); - break; - case MOV: - mov_assemble(i,®s[i]);break; - case SYSCALL: - syscall_assemble(i,®s[i]);break; - case HLECALL: - hlecall_assemble(i,®s[i]);break; - case INTCALL: - intcall_assemble(i,®s[i]);break; - case UJUMP: - ujump_assemble(i,®s[i]);ds=1;break; - case RJUMP: - rjump_assemble(i,®s[i]);ds=1;break; - case CJUMP: - cjump_assemble(i,®s[i]);ds=1;break; - case SJUMP: - sjump_assemble(i,®s[i]);ds=1;break; - case SPAN: - pagespan_assemble(i,®s[i]);break; - } + + ds = assemble(i, ®s[i], ccadj[i]); + if (dops[i].is_ujump) literal_pool(1024); else @@ -9259,7 +9226,7 @@ int new_recompile_block(u_int addr) store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG); + emit_addimm(HOST_CCREG, ccadj[i-1] + CLOCK_ADJUST(1), HOST_CCREG); } else { @@ -9277,7 +9244,7 @@ int new_recompile_block(u_int addr) store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG); + emit_addimm(HOST_CCREG, ccadj[i-1] + CLOCK_ADJUST(1), HOST_CCREG); add_to_linker(out,start+i*4,0); emit_jmp(0); } diff --git a/libpcsxcore/new_dynarec/patches/trace_drc_chk b/libpcsxcore/new_dynarec/patches/trace_drc_chk index 93ca59802..e98a48e7f 100644 --- a/libpcsxcore/new_dynarec/patches/trace_drc_chk +++ b/libpcsxcore/new_dynarec/patches/trace_drc_chk @@ -1,88 +1,25 @@ -diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S -index 1d8cefa..528929f 100644 ---- a/libpcsxcore/new_dynarec/linkage_arm.S -+++ b/libpcsxcore/new_dynarec/linkage_arm.S -@@ -438,7 +438,7 @@ FUNCTION(cc_interrupt): - str r1, [fp, #LO_pending_exception] - and r2, r2, r10, lsr #17 - add r3, fp, #LO_restore_candidate -- str r10, [fp, #LO_cycle] /* PCSX cycles */ -+@@@ str r10, [fp, #LO_cycle] /* PCSX cycles */ - @@ str r10, [fp, #LO_reg_cop0+36] /* Count */ - ldr r4, [r2, r3] - mov r10, lr -@@ -528,7 +528,7 @@ FUNCTION(new_dyna_leave): - ldr r0, [fp, #LO_last_count] - add r12, fp, #28 - add r10, r0, r10 -- str r10, [fp, #LO_cycle] -+@@@ str r10, [fp, #LO_cycle] - ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} - .size new_dyna_leave, .-new_dyna_leave - -@@ -645,7 +645,7 @@ FUNCTION(new_dyna_start): - \readop r0, [r1, r3, lsl #\tab_shift] - .endif - movcc pc, lr -- str r2, [fp, #LO_cycle] -+@@@ str r2, [fp, #LO_cycle] - bx r1 - .endm - -@@ -680,7 +680,7 @@ FUNCTION(jump_handler_read32): - mov r0, r1 - add r2, r2, r12 - push {r2, lr} -- str r2, [fp, #LO_cycle] -+@@@ str r2, [fp, #LO_cycle] - blx r3 - - ldr r0, [fp, #LO_next_interupt] -@@ -708,7 +708,7 @@ FUNCTION(jump_handler_write_h): - add r2, r2, r12 - mov r0, r1 - push {r2, lr} -- str r2, [fp, #LO_cycle] -+@@@ str r2, [fp, #LO_cycle] - blx r3 - - ldr r0, [fp, #LO_next_interupt] -diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S -index 7df82b4..79298e4 100644 ---- a/libpcsxcore/new_dynarec/linkage_arm64.S -+++ b/libpcsxcore/new_dynarec/linkage_arm64.S -@@ -123,7 +123,7 @@ FUNCTION(cc_interrupt): - str wzr, [rFP, #LO_pending_exception] - and w2, w2, rCC, lsr #17 - add x3, rFP, #LO_restore_candidate -- str rCC, [rFP, #LO_cycle] /* PCSX cycles */ -+## str rCC, [rFP, #LO_cycle] /* PCSX cycles */ - # str rCC, [rFP, #LO_reg_cop0+36] /* Count */ - ldr w19, [x3, w2, uxtw] - mov x21, lr -@@ -231,7 +231,7 @@ FUNCTION(new_dyna_start): - FUNCTION(new_dyna_leave): - ldr w0, [rFP, #LO_last_count] - add rCC, rCC, w0 -- str rCC, [rFP, #LO_cycle] -+## str rCC, [rFP, #LO_cycle] - ldp x19, x20, [sp, #16*1] - ldp x21, x22, [sp, #16*2] - ldp x23, x24, [sp, #16*3] -@@ -249,7 +249,7 @@ FUNCTION(new_dyna_leave): - /* w0 = adddr/data, x1 = rhandler, w2 = cycles, x3 = whandler */ - ldr w4, [rFP, #LO_last_count] - add w4, w4, w2 -- str w4, [rFP, #LO_cycle] -+## str w4, [rFP, #LO_cycle] - .endm - - .macro memhandler_post diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c -index 2f77516..21481bc 100644 +index f1005db..ebd1d4f 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c -@@ -521,6 +521,9 @@ static int doesnt_expire_soon(void *tcaddr) +@@ -235,7 +235,7 @@ static struct decoded_insn + int new_dynarec_hacks_old; + int new_dynarec_did_compile; + +- #define HACK_ENABLED(x) ((new_dynarec_hacks | new_dynarec_hacks_pergame) & (x)) ++ #define HACK_ENABLED(x) ((NDHACK_NO_STALLS) & (x)) + + extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 + extern int last_count; // last absolute target, often = next_interupt +@@ -471,6 +471,7 @@ int cycle_multiplier_old; + + static int CLOCK_ADJUST(int x) + { ++ return x * 2; + int m = cycle_multiplier_override && cycle_multiplier == CYCLE_MULT_DEFAULT + ? cycle_multiplier_override : cycle_multiplier; + int s=(x>>31)|1; +@@ -522,6 +523,9 @@ static int doesnt_expire_soon(void *tcaddr) // This is called from the recompiled JR/JALR instructions void noinline *get_addr(u_int vaddr) { @@ -92,95 +29,7 @@ index 2f77516..21481bc 100644 u_int page=get_page(vaddr); u_int vpage=get_vpage(vaddr); struct ll_entry *head; -@@ -4790,13 +4793,15 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) - #endif - emit_addimm_and_set_flags(cycles,HOST_CCREG); - jaddr=out; -- emit_jns(0); -+ //emit_jns(0); -+ emit_jmp(0); - } - else - { - emit_cmpimm(HOST_CCREG,-CLOCK_ADJUST(count+2)); - jaddr=out; -- emit_jns(0); -+ //emit_jns(0); -+ emit_jmp(0); - } - add_stub(CC_STUB,jaddr,idle?idle:out,(*adj==0||invert||idle)?0:(count+2),i,addr,taken,0); - } -@@ -5204,7 +5209,8 @@ static void rjump_assemble(int i,struct regstat *i_regs) - // special case for RFE - emit_jmp(0); - else -- emit_jns(0); -+ //emit_jns(0); -+ emit_jmp(0); - //load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,-1); - #ifdef USE_MINI_HT - if(dops[i].rs1==31) { -@@ -5309,7 +5315,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) - else if(nop) { - emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); - void *jaddr=out; -- emit_jns(0); -+ //emit_jns(0); -+ emit_jmp(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); - } - else { -@@ -5496,7 +5503,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) - emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); - void *jaddr=out; -- emit_jns(0); -+ //emit_jns(0); -+ emit_jmp(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); - emit_storereg(CCREG,HOST_CCREG); - } -@@ -5505,7 +5513,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) - assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); - void *jaddr=out; -- emit_jns(0); -+ //emit_jns(0); -+ emit_jmp(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); - } - } -@@ -5607,7 +5616,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) - else if(nevertaken) { - emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); - void *jaddr=out; -- emit_jns(0); -+ //emit_jns(0); -+ emit_jmp(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); - } - else { -@@ -5763,7 +5773,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) - emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); - void *jaddr=out; -- emit_jns(0); -+ //emit_jns(0); -+ emit_jmp(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); - emit_storereg(CCREG,HOST_CCREG); - } -@@ -5772,7 +5783,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) - assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); - void *jaddr=out; -- emit_jns(0); -+ //emit_jns(0); -+ emit_jmp(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); - } - } -@@ -6211,7 +6223,7 @@ void unneeded_registers(int istart,int iend,int r) +@@ -6248,7 +6252,7 @@ void unneeded_registers(int istart,int iend,int r) // R0 is always unneeded u|=1; // Save it @@ -189,7 +38,7 @@ index 2f77516..21481bc 100644 gte_unneeded[i]=gte_u; /* printf("ur (%d,%d) %x: ",istart,iend,start+i*4); -@@ -8756,6 +8768,7 @@ int new_recompile_block(u_int addr) +@@ -8794,6 +8798,7 @@ int new_recompile_block(u_int addr) // This allocates registers (if possible) one instruction prior // to use, which can avoid a load-use penalty on certain CPUs. @@ -197,7 +46,7 @@ index 2f77516..21481bc 100644 for(i=0;i> 26; switch (tmp) { -@@ -499,13 +500,15 @@ static void doBranch(u32 tar) { +@@ -499,13 +501,15 @@ static void doBranch(u32 tar) { } break; } @@ -105,17 +157,17 @@ index f7898e9..176a0f7 100644 branch = 0; psxRegs.pc = branchPC; - psxBranchTest(); -+ + psxRegs.cycle += BIAS; + psxBranchTest(); ++ psxRegs.cycle -= BIAS; } /********************************************************* -@@ -615,12 +618,13 @@ void psxMULTU_stall() { +@@ -615,12 +619,13 @@ void psxMULTU_stall() { psxMULTU(); } -+#define doBranchNotTaken() do { psxRegs.cycle -= BIAS; execI(); psxBranchTest(); psxRegs.cycle += BIAS; } while(0) ++#define doBranchNotTaken() do { psxRegs.cycle += BIAS; execI(); psxBranchTest(); psxRegs.cycle -= BIAS; } while(0) /********************************************************* * Register branch logic * * Format: OP rs, offset * @@ -127,7 +179,7 @@ index f7898e9..176a0f7 100644 void psxBGEZ() { RepZBranchi32(>=) } // Branch if Rs >= 0 void psxBGEZAL() { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link -@@ -702,7 +706,7 @@ void psxRFE() { +@@ -702,7 +707,7 @@ void psxRFE() { * Register branch logic * * Format: OP rs, rt, offset * *********************************************************/ @@ -136,17 +188,15 @@ index f7898e9..176a0f7 100644 void psxBEQ() { RepBranchi32(==) } // Branch if Rs == Rt void psxBNE() { RepBranchi32(!=) } // Branch if Rs != Rt -@@ -886,6 +890,9 @@ void MTC0(int reg, u32 val) { +@@ -886,6 +891,7 @@ void MTC0(int reg, u32 val) { case 12: // Status psxRegs.CP0.r[12] = val; psxTestSWInts(); -+#ifndef __arm__ -+ psxBranchTest(); -+#endif ++ //psxBranchTest(); break; case 13: // Cause -@@ -1027,6 +1034,23 @@ void intExecuteBlock() { +@@ -1027,6 +1033,23 @@ void intExecuteBlock() { while (!branch2) execI(); } @@ -170,15 +220,36 @@ index f7898e9..176a0f7 100644 static void intClear(u32 Addr, u32 Size) { } +@@ -1049,7 +1072,7 @@ void intApplyConfig() { + assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall); + assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall); + +- if (Config.DisableStalls) { ++ if (1) { + psxBSC[18] = psxCOP2; + psxBSC[50] = gteLWC2; + psxBSC[58] = gteSWC2; +@@ -1091,9 +1114,10 @@ void execI() { + if (Config.Debug) ProcessDebug(); + + psxRegs.pc += 4; +- psxRegs.cycle += BIAS; + + psxBSC[psxRegs.code >> 26](); ++ ++ psxRegs.cycle += BIAS; + } + + R3000Acpu psxInt = { diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c -index 04aeec2..1242653 100644 +index 04aeec2..710a379 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -217,11 +217,13 @@ void psxMemShutdown() { } static int writeok = 1; -+u32 last_io_addr; ++extern u32 last_io_addr; u8 psxMemRead8(u32 mem) { char *p; @@ -228,3 +299,25 @@ index 04aeec2..1242653 100644 // if ((mem&0x1fffff) == 0x71E18 || value == 0x48088800) SysPrintf("t2fix!!\n"); t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { +@@ -380,6 +387,8 @@ void psxMemWrite32(u32 mem, u32 value) { + } else { + int i; + ++extern u32 handler_cycle; ++handler_cycle = psxRegs.cycle; + switch (value) { + case 0x800: case 0x804: + if (writeok == 0) break; +diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c +index 7e6f16b..0114947 100644 +--- a/libpcsxcore/r3000a.c ++++ b/libpcsxcore/r3000a.c +@@ -120,6 +120,8 @@ void psxException(u32 code, u32 bd) { + } + + void psxBranchTest() { ++ extern u32 irq_test_cycle; ++ irq_test_cycle = psxRegs.cycle; + if ((psxRegs.cycle - psxNextsCounter) >= psxNextCounter) + psxRcntUpdate(); + From 4b1c7cd1dd8da0be729bae21df9243a9fa33157b Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 28 Nov 2021 17:23:34 +0200 Subject: [PATCH 093/597] drc: don't cull ccreg It didn't even seem to be written back. What was the point doing so anyway, other regs were (maybe?) culled to prevent saving/restoring them in memhandlers, but ccreg is in callee-saved reg anyway? We also need ccreg more often now for stalls. --- libpcsxcore/new_dynarec/new_dynarec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 71fabb8bd..77fc35794 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -8472,7 +8472,8 @@ int new_recompile_block(u_int addr) if((regs[i].regmap[hr]&63)!=dops[i].rt1 && (regs[i].regmap[hr]&63)!=dops[i].rt2 && regs[i].regmap[hr]!=dops[i].rs1 && regs[i].regmap[hr]!=dops[i].rs2 && (regs[i].regmap[hr]&63)!=temp && regs[i].regmap[hr]!=map1 && regs[i].regmap[hr]!=map2 && - (dops[i].itype!=SPAN||regs[i].regmap[hr]!=CCREG)) + //(dops[i].itype!=SPAN||regs[i].regmap[hr]!=CCREG) + regs[i].regmap[hr] != CCREG) { if(i Date: Sun, 28 Nov 2021 23:08:57 +0200 Subject: [PATCH 094/597] drc: fix CCREG loading CCREG was always manually loaded by *jump_assemble and few other places, however when I added more CCREG allocs I expected loading to be handled automatically, but turns out it wasn't. --- libpcsxcore/new_dynarec/new_dynarec.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 77fc35794..f1005db85 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -1616,9 +1616,8 @@ static void alloc_reg_temp(struct regstat *cur,int i,signed char reg) static void mov_alloc(struct regstat *current,int i) { if (dops[i].rs1 == HIREG || dops[i].rs1 == LOREG) { - // logically this is needed but just won't work, no idea why - //alloc_cc(current,i); // for stalls - //dirty_reg(current,CCREG); + alloc_cc(current,i); // for stalls + dirty_reg(current,CCREG); } // Note: Don't need to actually alloc the source registers @@ -9197,7 +9196,7 @@ int new_recompile_block(u_int addr) load_regs(regs[i].regmap_entry,regs[i].regmap,dops[i+1].rs2,dops[i+1].rs2); } // TODO: if(is_ooo(i)) address_generation(i+1); - if (dops[i].itype == CJUMP) + if (!dops[i].is_jump || dops[i].itype == CJUMP) load_regs(regs[i].regmap_entry,regs[i].regmap,CCREG,CCREG); if (ram_offset && (dops[i].is_load || dops[i].is_store)) load_regs(regs[i].regmap_entry,regs[i].regmap,ROREG,ROREG); From 9b9af0d1b0739a1954ce6f6a4bdd019344d17a85 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 28 Nov 2021 18:34:07 +0200 Subject: [PATCH 095/597] drc: fix an old memhandler oversight accidentally got it right when doing arm64 port though --- libpcsxcore/new_dynarec/linkage_arm.S | 27 +++++++++++++++---------- libpcsxcore/new_dynarec/linkage_arm64.S | 8 ++++---- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 1d8cefaa4..637173152 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -661,6 +661,13 @@ FUNCTION(jump_handler_read32): pcsx_read_mem ldrcc, 2 +.macro memhandler_post + ldr r0, [fp, #LO_next_interupt] + ldr r2, [fp, #LO_cycle] @ memhandlers can modify cc, like dma + str r0, [fp, #LO_last_count] + sub r0, r2, r0 +.endm + .macro pcsx_write_mem wrtop tab_shift /* r0 = address, r1 = data, r2 = cycles, r3 = handler_tab */ lsl r12,r0, #20 @@ -668,7 +675,7 @@ FUNCTION(jump_handler_read32): ldr r3, [r3, r12, lsl #2] str r0, [fp, #LO_address] @ some handlers still need it.. lsls r3, #1 - mov r0, r2 @ cycle return in case of direct store + mov r0, r2 @ cycle return in case of direct store .if \tab_shift == 1 lsl r12, #1 \wrtop r1, [r3, r12] @@ -679,14 +686,13 @@ FUNCTION(jump_handler_read32): ldr r12, [fp, #LO_last_count] mov r0, r1 add r2, r2, r12 - push {r2, lr} str r2, [fp, #LO_cycle] + + str lr, [fp, #LO_saved_lr] blx r3 + ldr lr, [fp, #LO_saved_lr] - ldr r0, [fp, #LO_next_interupt] - pop {r2, lr} - str r0, [fp, #LO_last_count] - sub r0, r2, r0 + memhandler_post bx lr .endm @@ -707,14 +713,13 @@ FUNCTION(jump_handler_write_h): str r0, [fp, #LO_address] @ some handlers still need it.. add r2, r2, r12 mov r0, r1 - push {r2, lr} str r2, [fp, #LO_cycle] + + str lr, [fp, #LO_saved_lr] blx r3 + ldr lr, [fp, #LO_saved_lr] - ldr r0, [fp, #LO_next_interupt] - pop {r2, lr} - str r0, [fp, #LO_last_count] - sub r0, r2, r0 + memhandler_post bx lr FUNCTION(jump_handle_swl): diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 7df82b4e9..7075ebd9a 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -253,10 +253,10 @@ FUNCTION(new_dyna_leave): .endm .macro memhandler_post - ldr w2, [rFP, #LO_next_interupt] - ldr w1, [rFP, #LO_cycle] - sub w0, w1, w2 - str w2, [rFP, #LO_last_count] + ldr w0, [rFP, #LO_next_interupt] + ldr w2, [rFP, #LO_cycle] // memhandlers can modify cc, like dma + str w0, [rFP, #LO_last_count] + sub w0, w2, w0 .endm FUNCTION(do_memhandler_pre): From 9c997d193518cfb9f14d001ad18f721a809acfc8 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 29 Nov 2021 02:10:27 +0200 Subject: [PATCH 096/597] drc: fix some mistake done during arm64 porting --- libpcsxcore/new_dynarec/assem_arm.c | 32 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index c5c2c66e3..3267cb6b8 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -949,6 +949,14 @@ static void emit_cmovae_imm(int imm,int rt) output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval); } +static void emit_cmovs_imm(int imm,int rt) +{ + assem_debug("movmi %s,#%d\n",regname[rt],imm); + u_int armval; + genimm_checked(imm,&armval); + output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval); +} + static void emit_cmovne_reg(int rs,int rt) { assem_debug("movne %s,%s\n",regname[rt],regname[rs]); @@ -1521,14 +1529,6 @@ static void emit_orrne_imm(int rs,int imm,int rt) output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval); } -static void emit_andne_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval); -} - static unused void emit_addpl_imm(int rs,int imm,int rt) { u_int armval; @@ -2200,11 +2200,11 @@ static void c2op_ctc2_31_assemble(signed char sl, signed char temp) static void do_mfc2_31_one(u_int copr,signed char temp) { emit_readword(®_cop2d[copr],temp); - emit_testimm(temp,0x8000); // do we need this? - emit_andne_imm(temp,0,temp); - emit_cmpimm(temp,0xf80); - emit_andimm(temp,0xf80,temp); - emit_cmovae_imm(0xf80,temp); + emit_lsls_imm(temp,16,temp); + emit_cmovs_imm(0,temp); + emit_cmpimm(temp,0xf80<<16); + emit_andimm(temp,0xf80<<16,temp); + emit_cmovae_imm(0xf80<<16,temp); } static void c2op_mfc2_29_assemble(signed char tl, signed char temp) @@ -2214,11 +2214,11 @@ static void c2op_mfc2_29_assemble(signed char tl, signed char temp) temp = HOST_TEMPREG; } do_mfc2_31_one(9,temp); - emit_shrimm(temp,7,tl); + emit_shrimm(temp,7+16,tl); do_mfc2_31_one(10,temp); - emit_orrshr_imm(temp,2,tl); + emit_orrshr_imm(temp,2+16,tl); do_mfc2_31_one(11,temp); - emit_orrshl_imm(temp,3,tl); + emit_orrshr_imm(temp,-3+16,tl); emit_writeword(tl,®_cop2d[29]); if (temp == HOST_TEMPREG) host_tempreg_release(); From 2405813165a8259d501406d91c3d3161972db9ec Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 29 Nov 2021 23:35:08 +0200 Subject: [PATCH 097/597] drc: rearrange hacks Also adds PE2 hack. PE2 probably checks if a frame didn't take too long to complete by comparing counter 2 to a hardcoded value. If it did exceed the limit it restarts all the processing, probably assuming that the GPU was busy that time and retrying on next frame would work out? Still it will hang if not enough cycles are available, regardless of how root counters are handled, and currently won't work on the interpreter even with root counter hack because of recently added stalls. PCSX's timing is just too wrong all over the place... --- frontend/menu.c | 13 +++-- libpcsxcore/database.c | 1 + libpcsxcore/new_dynarec/new_dynarec.c | 73 +++++++++++++++++---------- libpcsxcore/new_dynarec/new_dynarec.h | 1 + 4 files changed, 58 insertions(+), 30 deletions(-) diff --git a/frontend/menu.c b/frontend/menu.c index f5d035a04..85f7b7f93 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1555,6 +1555,7 @@ static int menu_loop_plugin_options(int id, int keys) #ifndef DRC_DISABLE static const char h_cfg_psxclk[] = "Over/under-clock the PSX, default is " DEFAULT_PSX_CLOCK_S "\n" "(lower value - less work for the emu, may be faster)"; +static const char h_cfg_noch[] = "Disables game-specific compatibility hacks"; static const char h_cfg_nosmc[] = "Will cause crashes when loading, break memcards"; static const char h_cfg_gteunn[] = "May cause graphical glitches"; static const char h_cfg_gteflgs[] = "Will cause graphical glitches"; @@ -1565,6 +1566,7 @@ static menu_entry e_menu_speed_hacks[] = { #ifndef DRC_DISABLE mee_range_h ("PSX CPU clock, %%", 0, psx_clock, 1, 500, h_cfg_psxclk), + mee_onoff_h ("Disable compat hacks", 0, new_dynarec_hacks, NDHACK_NO_COMPAT_HACKS, h_cfg_noch), mee_onoff_h ("Disable SMC checks", 0, new_dynarec_hacks, NDHACK_NO_SMC_CHECK, h_cfg_nosmc), mee_onoff_h ("Assume GTE regs unneeded", 0, new_dynarec_hacks, NDHACK_GTE_UNNEEDED, h_cfg_gteunn), mee_onoff_h ("Disable GTE flags", 0, new_dynarec_hacks, NDHACK_GTE_NO_FLAGS, h_cfg_gteflgs), @@ -1589,11 +1591,12 @@ static const char h_cfg_cdda[] = "Disable CD Audio for a performance boost\n" "(proper .cue/.bin dump is needed otherwise)"; //static const char h_cfg_sio[] = "You should not need this, breaks games"; static const char h_cfg_spuirq[] = "Compatibility tweak; should be left off"; -//static const char h_cfg_rcnt1[] = "Parasite Eve 2, Vandal Hearts 1/2 Fix\n" -// "(timing hack, breaks other games)"; static const char h_cfg_rcnt2[] = "InuYasha Sengoku Battle Fix\n" "(timing hack, breaks other games)"; -#ifndef DRC_DISABLE +#ifdef DRC_DISABLE +static const char h_cfg_rcnt1[] = "Parasite Eve 2, Vandal Hearts 1/2 Fix\n" + "(timing hack, breaks other games)"; +#else static const char h_cfg_nodrc[] = "Disable dynamic recompiler and use interpreter\n" "Might be useful to overcome some dynarec bugs"; #endif @@ -1610,7 +1613,9 @@ static menu_entry e_menu_adv_options[] = //mee_onoff_h ("SIO IRQ Always Enabled", 0, Config.Sio, 1, h_cfg_sio), mee_onoff_h ("SPU IRQ Always Enabled", 0, Config.SpuIrq, 1, h_cfg_spuirq), mee_onoff_h ("ICache emulation", 0, Config.icache_emulation, 1, h_cfg_icache), - //mee_onoff_h ("Rootcounter hack", 0, Config.RCntFix, 1, h_cfg_rcnt1), +#ifdef DRC_DISABLE + mee_onoff_h ("Rootcounter hack", 0, Config.RCntFix, 1, h_cfg_rcnt1), +#endif mee_onoff_h ("Rootcounter hack 2", 0, Config.VSyncWA, 1, h_cfg_rcnt2), #ifndef DRC_DISABLE mee_onoff_h ("Disable dynarec (slow!)",0, Config.Cpu, 1, h_cfg_nodrc), diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 4d4439ad4..61312e092 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -36,6 +36,7 @@ void Apply_Hacks_Cdrom() /* Dynarec game-specific hacks */ new_dynarec_hacks_pergame = 0; + cycle_multiplier_override = 0; /* Internal Section is fussy about timings */ if (strcmp(CdromId, "SLPS01868") == 0) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index f1005db85..27d9d4696 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -121,8 +121,8 @@ struct regstat uint64_t wasdirty; uint64_t dirty; uint64_t u; - u_int wasconst; - u_int isconst; + u_int wasconst; // before; for example 'lw r2, (r2)' wasconst is true + u_int isconst; // ... but isconst is false when r2 is known u_int loadedconst; // host regs that have constants loaded u_int waswritten; // MIPS regs that were used as store base before }; @@ -468,12 +468,12 @@ static void do_clear_cache(void) int cycle_multiplier = CYCLE_MULT_DEFAULT; // 100 for 1.0 int cycle_multiplier_override; int cycle_multiplier_old; +static int cycle_multiplier_active; static int CLOCK_ADJUST(int x) { - int m = cycle_multiplier_override && cycle_multiplier == CYCLE_MULT_DEFAULT - ? cycle_multiplier_override : cycle_multiplier; - int s=(x>>31)|1; + int m = cycle_multiplier_active; + int s = (x >> 31) | 1; return (x * m + s * 50) / 100; } @@ -4122,6 +4122,7 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) case SPAN: pagespan_assemble(i, i_regs); break; + case NOP: case OTHER: case NI: // not handled, just skip @@ -6906,9 +6907,6 @@ void new_dynarec_cleanup(void) static u_int *get_source_start(u_int addr, u_int *limit) { - if (!HACK_ENABLED(NDHACK_OVERRIDE_CYCLE_M)) - cycle_multiplier_override = 0; - if (addr < 0x00200000 || (0xa0000000 <= addr && addr < 0xa0200000)) { @@ -6923,7 +6921,7 @@ static u_int *get_source_start(u_int addr, u_int *limit) // BIOS. The multiplier should be much higher as it's uncached 8bit mem, // but timings in PCSX are too tied to the interpreter's BIAS if (!HACK_ENABLED(NDHACK_OVERRIDE_CYCLE_M)) - cycle_multiplier_override = 200; + cycle_multiplier_active = 200; *limit = (addr & 0xfff00000) | 0x80000; return (u_int *)((u_char *)psxR + (addr&0x7ffff)); @@ -7035,6 +7033,42 @@ void new_dynarec_load_blocks(const void *save, int size) memcpy(&psxRegs.GPR, regs_save, sizeof(regs_save)); } +static void apply_hacks(void) +{ + int i; + if (HACK_ENABLED(NDHACK_NO_COMPAT_HACKS)) + return; + /* special hack(s) */ + for (i = 0; i < slen - 4; i++) + { + // lui a4, 0xf200; jal ; addu a0, 2; slti v0, 28224 + if (source[i] == 0x3c04f200 && dops[i+1].itype == UJUMP + && source[i+2] == 0x34840002 && dops[i+3].opcode == 0x0a + && imm[i+3] == 0x6e40 && dops[i+3].rs1 == 2) + { + SysPrintf("PE2 hack @%08x\n", start + (i+3)*4); + dops[i + 3].itype = NOP; + } + } + i = slen; + if (i > 10 && source[i-1] == 0 && source[i-2] == 0x03e00008 + && source[i-4] == 0x8fbf0018 && source[i-6] == 0x00c0f809 + && dops[i-7].itype == STORE) + { + i = i-8; + if (dops[i].itype == IMM16) + i--; + // swl r2, 15(r6); swr r2, 12(r6); sw r6, *; jalr r6 + if (dops[i].itype == STORELR && dops[i].rs1 == 6 + && dops[i-1].itype == STORELR && dops[i-1].rs1 == 6) + { + SysPrintf("F1 hack from %08x\n", start); + if (f1_hack == 0) + f1_hack = ~0u; + } + } +} + int new_recompile_block(u_int addr) { u_int pagelimit = 0; @@ -7092,6 +7126,9 @@ int new_recompile_block(u_int addr) return 0; } + cycle_multiplier_active = cycle_multiplier_override && cycle_multiplier == CYCLE_MULT_DEFAULT + ? cycle_multiplier_override : cycle_multiplier; + source = get_source_start(start, &pagelimit); if (source == NULL) { SysPrintf("Compile at bogus memory address: %08x\n", addr); @@ -7613,23 +7650,7 @@ int new_recompile_block(u_int addr) } assert(slen>0); - /* spacial hack(s) */ - if (i > 10 && source[i-1] == 0 && source[i-2] == 0x03e00008 - && source[i-4] == 0x8fbf0018 && source[i-6] == 0x00c0f809 - && dops[i-7].itype == STORE) - { - i = i-8; - if (dops[i].itype == IMM16) - i--; - // swl r2, 15(r6); swr r2, 12(r6); sw r6, *; jalr r6 - if (dops[i].itype == STORELR && dops[i].rs1 == 6 - && dops[i-1].itype == STORELR && dops[i-1].rs1 == 6) - { - SysPrintf("F1 hack from %08x\n", start); - if (f1_hack == 0) - f1_hack = ~0u; - } - } + apply_hacks(); /* Pass 2 - Register dependencies and branch targets */ diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index f1464696f..8991faca2 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -15,6 +15,7 @@ extern int cycle_multiplier_old; #define NDHACK_GTE_NO_FLAGS (1<<2) #define NDHACK_OVERRIDE_CYCLE_M (1<<3) #define NDHACK_NO_STALLS (1<<4) +#define NDHACK_NO_COMPAT_HACKS (1<<5) extern int new_dynarec_hacks; extern int new_dynarec_hacks_pergame; extern int new_dynarec_hacks_old; From 48ce252818e5a9751b0ef5067b2b6136b41698a1 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 14 Dec 2021 02:37:51 +0200 Subject: [PATCH 098/597] drc: adjust asm to make clang happy --- libpcsxcore/new_dynarec/linkage_arm64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 7075ebd9a..5e9626f5e 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -336,7 +336,7 @@ handler_write_end: FUNCTION(jump_handle_swl): /* w0 = address, w1 = data, w2 = cycles */ ldr x3, [rFP, #LO_mem_wtab] - mov w4, w0, lsr #12 + orr w4, wzr, w0, lsr #12 ldr x3, [x3, w4, uxtw #3] adds x3, x3, x3 bcs 4f @@ -371,7 +371,7 @@ FUNCTION(jump_handle_swl): FUNCTION(jump_handle_swr): /* w0 = address, w1 = data, w2 = cycles */ ldr x3, [rFP, #LO_mem_wtab] - mov w4, w0, lsr #12 + orr w4, wzr, w0, lsr #12 ldr x3, [x3, w4, uxtw #3] adds x3, x3, x3 bcs 4f From 0aeb0cb9d5aee3004954bd18ef5b7ea1f69a0378 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 14 Dec 2021 21:03:25 +0200 Subject: [PATCH 099/597] drc: reapply some libretro changes --- libpcsxcore/new_dynarec/new_dynarec.c | 10 +++++++++- libpcsxcore/new_dynarec/new_dynarec_config.h | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 27d9d4696..15b938363 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -6841,14 +6841,22 @@ void new_dynarec_init(void) { SysPrintf("Init new dynarec\n"); +#ifdef _3DS + check_rosalina(); +#endif #ifdef BASE_ADDR_DYNAMIC #ifdef VITA - sceBlock = sceKernelAllocMemBlockForVM("code", 1 << TARGET_SIZE_2); + sceBlock = getVMBlock(); //sceKernelAllocMemBlockForVM("code", sizeof(*ndrc)); if (sceBlock < 0) SysPrintf("sceKernelAllocMemBlockForVM failed\n"); int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&ndrc); if (ret < 0) SysPrintf("sceKernelGetMemBlockBase failed\n"); + sceKernelOpenVMDomain(); + sceClibPrintf("translation_cache = 0x%08lx\n ", (long)ndrc->translation_cache); + #elif defined(_MSC_VER) + ndrc = VirtualAlloc(NULL, sizeof(*ndrc), MEM_COMMIT | MEM_RESERVE, + PAGE_EXECUTE_READWRITE); #else uintptr_t desired_addr = 0; #ifdef __ELF__ diff --git a/libpcsxcore/new_dynarec/new_dynarec_config.h b/libpcsxcore/new_dynarec/new_dynarec_config.h index 321bfbf38..f93613ffa 100644 --- a/libpcsxcore/new_dynarec/new_dynarec_config.h +++ b/libpcsxcore/new_dynarec/new_dynarec_config.h @@ -6,7 +6,7 @@ #define USE_MINI_HT 1 //#define REG_PREFETCH 1 -#if defined(__MACH__) || defined(VITA) +#if defined(__MACH__) #define NO_WRITE_EXEC 1 #endif #ifdef VITA From 337887986422262fb88611d0b6cfcd79936e11c8 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 14 Dec 2021 21:09:45 +0200 Subject: [PATCH 100/597] fix some issues and warnings seen on ctr build --- libpcsxcore/new_dynarec/assem_arm.c | 23 ++++++++++++++--------- libpcsxcore/new_dynarec/assem_arm64.c | 7 +++++-- libpcsxcore/new_dynarec/emu_if.h | 2 +- libpcsxcore/psxbios.c | 2 +- libpcsxcore/psxcommon.h | 4 ++++ plugins/dfsound/spu.c | 6 ++---- 6 files changed, 27 insertions(+), 17 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 3267cb6b8..381a54191 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -473,6 +473,7 @@ static void emit_loadlp(u_int imm,u_int rt) output_w32(0xe5900000|rd_rn_rm(rt,15,0)); } +#ifdef HAVE_ARMV7 static void emit_movw(u_int imm,u_int rt) { assert(imm<65536); @@ -485,6 +486,7 @@ static void emit_movt(u_int imm,u_int rt) assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000); output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000)); } +#endif static void emit_movimm(u_int imm,u_int rt) { @@ -530,17 +532,20 @@ static void emit_loadreg(int r, int hr) if((r&63)==0) emit_zeroreg(hr); else { - int addr = (int)&psxRegs.GPR.r[r]; + void *addr; switch (r) { //case HIREG: addr = &hi; break; //case LOREG: addr = &lo; break; - case CCREG: addr = (int)&cycle_count; break; - case CSREG: addr = (int)&Status; break; - case INVCP: addr = (int)&invc_ptr; break; - case ROREG: addr = (int)&ram_offset; break; - default: assert(r < 34); break; + case CCREG: addr = &cycle_count; break; + case CSREG: addr = &Status; break; + case INVCP: addr = &invc_ptr; break; + case ROREG: addr = &ram_offset; break; + default: + assert(r < 34); + addr = &psxRegs.GPR.r[r]; + break; } - u_int offset = addr-(u_int)&dynarec_local; + u_int offset = (u_char *)addr - (u_char *)&dynarec_local; assert(offset<4096); assem_debug("ldr %s,fp+%d\n",regname[hr],offset); output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset); @@ -2119,10 +2124,10 @@ static void c2op_assemble(int i, const struct regstat *i_regs) } #else if(cv==3&&shift) - emit_far_call((int)gteMVMVA_part_cv3sh12_arm); + emit_far_call(gteMVMVA_part_cv3sh12_arm); else { emit_movimm(shift,1); - emit_far_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm)); + emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm); } if(need_flags||need_ir) c2op_call_MACtoIR(lm,need_flags); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 0b4922113..84e4fc6e8 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -453,7 +453,7 @@ static void emit_loadreg(u_int r, u_int hr) if (r == 0) emit_zeroreg(hr); else { - void *addr = &psxRegs.GPR.r[r]; + void *addr; switch (r) { //case HIREG: addr = &hi; break; //case LOREG: addr = &lo; break; @@ -461,7 +461,10 @@ static void emit_loadreg(u_int r, u_int hr) case CSREG: addr = &Status; break; case INVCP: addr = &invc_ptr; is64 = 1; break; case ROREG: addr = &ram_offset; is64 = 1; break; - default: assert(r < 34); break; + default: + assert(r < 34); + addr = &psxRegs.GPR.r[r]; + break; } if (is64) emit_readdword(addr, hr); diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index 30cb9ef63..0ce9584ce 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -85,7 +85,7 @@ extern void *scratch_buf_ptr; extern u32 inv_code_start, inv_code_end; /* cycles/irqs */ -extern unsigned int next_interupt; +extern u32 next_interupt; extern int pending_exception; /* called by drc */ diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 16b48d9c2..8e993c3f4 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -1065,7 +1065,7 @@ void psxBios_qsort() { // 0x31 } void psxBios_malloc() { // 0x33 - unsigned int *chunk, *newchunk = NULL; + u32 *chunk, *newchunk = NULL; unsigned int dsize = 0, csize, cstat; int colflag; #ifdef PSXBIOS_LOG diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index 2dd91cf1c..224caa541 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -37,6 +37,10 @@ extern "C" { #pragma GCC diagnostic ignored "-Wformat-overflow" #pragma GCC diagnostic ignored "-Wstringop-truncation" #endif +// devkitpro has uint32_t as long, unfortunately +#ifdef _3DS +#pragma GCC diagnostic ignored "-Wformat" +#endif // System includes #include diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 637e85216..b0b083dd2 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1318,14 +1318,12 @@ static void SetupStreams(void) spu.pSpuBuffer = (unsigned char *)malloc(32768); // alloc mixing buffer spu.SSumLR = calloc(NSSIZE * 2, sizeof(spu.SSumLR[0])); - spu.XAStart = // alloc xa buffer - (uint32_t *)malloc(44100 * sizeof(uint32_t)); + spu.XAStart = malloc(44100 * sizeof(uint32_t)); // alloc xa buffer spu.XAEnd = spu.XAStart + 44100; spu.XAPlay = spu.XAStart; spu.XAFeed = spu.XAStart; - spu.CDDAStart = // alloc cdda buffer - (uint32_t *)malloc(CDDA_BUFFER_SIZE); + spu.CDDAStart = malloc(CDDA_BUFFER_SIZE); // alloc cdda buffer spu.CDDAEnd = spu.CDDAStart + 16384; spu.CDDAPlay = spu.CDDAStart; spu.CDDAFeed = spu.CDDAStart; From 7ebfcedf465063e7692daac83081a556abeb41c1 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 15 Dec 2021 00:04:11 +0200 Subject: [PATCH 101/597] drc: always clear full decoded_insn Some stuff was left after INTCALL conversion and broke branch in delay slot handling, like in Rayman 2. --- libpcsxcore/new_dynarec/new_dynarec.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 15b938363..8fe3f93df 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -7162,9 +7162,9 @@ int new_recompile_block(u_int addr) /* Pass 1 disassembly */ - for(i=0;!done;i++) { - dops[i].bt=0; - dops[i].ooo=0; + for (i = 0; !done; i++) + { + memset(&dops[i], 0, sizeof(dops[i])); op2=0; minimum_free_regs[i]=0; dops[i].opcode=op=source[i]>>26; @@ -7604,12 +7604,12 @@ int new_recompile_block(u_int addr) do_in_intrp=1; } } - if(do_in_intrp) { - dops[i-1].rs1=CCREG; - dops[i-1].rs2=dops[i-1].rt1=dops[i-1].rt2=0; - ba[i-1]=-1; - dops[i-1].itype=INTCALL; - done=2; + if (do_in_intrp) { + memset(&dops[i-1], 0, sizeof(dops[i-1])); + dops[i-1].itype = INTCALL; + dops[i-1].rs1 = CCREG; + ba[i-1] = -1; + done = 2; i--; // don't compile the DS } } @@ -7718,7 +7718,10 @@ int new_recompile_block(u_int addr) current.u=branch_unneeded_reg[i]&~((1LL< Date: Sat, 22 Jan 2022 22:15:54 +0200 Subject: [PATCH 102/597] cdrom: play cdda regardless of report mode Vib-Ribbon doesn't use it while playing. --- libpcsxcore/cdrom.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 753c51262..4eea46bfc 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -451,7 +451,6 @@ static void cdrPlayInterrupt_Autopause() StopCdda(); } else if (((cdr.Mode & MODE_REPORT) || cdr.FastForward || cdr.FastBackward)) { - CDR_readCDDA(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], (u8 *)read_buf); cdr.Result[0] = cdr.StatP; cdr.Result[1] = cdr.subq.Track; cdr.Result[2] = cdr.subq.Index; @@ -527,13 +526,14 @@ void cdrPlayInterrupt() StopCdda(); cdr.TrackChanged = TRUE; } + else { + CDR_readCDDA(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], (u8 *)read_buf); + } if (!cdr.Irq && !cdr.Stat && (cdr.Mode & (MODE_AUTOPAUSE|MODE_REPORT))) cdrPlayInterrupt_Autopause(); - if (!cdr.Play) return; - - if (CDR_readCDDA && !cdr.Muted && cdr.Mode & MODE_REPORT) { + if (CDR_readCDDA && !cdr.Muted && !Config.Cdda) { cdrAttenuate(read_buf, CD_FRAMESIZE_RAW / 4, 1); if (SPU_playCDDAchannel) SPU_playCDDAchannel(read_buf, CD_FRAMESIZE_RAW); From d9dcc8cb7803c89a528c8d808a429932673f8074 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 22 Jan 2022 22:20:42 +0200 Subject: [PATCH 103/597] spu: save/restore decode_pos SPUOSSFreeze_t conveniently had padding space for it --- plugins/dfsound/freeze.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/plugins/dfsound/freeze.c b/plugins/dfsound/freeze.c index 43e6535ce..3bdbab1bd 100644 --- a/plugins/dfsound/freeze.c +++ b/plugins/dfsound/freeze.c @@ -119,6 +119,7 @@ typedef struct typedef struct { unsigned short spuIrq; + unsigned short decode_pos; uint32_t pSpuIrq; uint32_t spuAddr; uint32_t dummy1; @@ -261,6 +262,7 @@ long CALLBACK SPUfreeze(uint32_t ulFreezeMode, SPUFreeze_t * pF, pFO->spuAddr=spu.spuAddr; if(pFO->spuAddr==0) pFO->spuAddr=0xbaadf00d; + pFO->decode_pos = spu.decode_pos; for(i=0;ispuAddr == 0xbaadf00d) spu.spuAddr = 0; else spu.spuAddr = pFO->spuAddr & 0x7fffe; } + spu.decode_pos = pFO->decode_pos & 0x1ff; spu.dwNewChannel=0; spu.dwChannelOn=0; From 4197fb21f37842640807bfed0f05eb49df7f350b Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 22 Jan 2022 22:25:05 +0200 Subject: [PATCH 104/597] spu: try to eliminate some cdda clicks/pops cdda feeds less data than consumes due to poor timings, especially for PAL --- plugins/dfsound/xa.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/plugins/dfsound/xa.c b/plugins/dfsound/xa.c index ad7e8247d..c3658af41 100644 --- a/plugins/dfsound/xa.c +++ b/plugins/dfsound/xa.c @@ -43,14 +43,13 @@ INLINE void MixXA(int *SSumLR, int ns_to, int decode_pos) int cursor = decode_pos; int ns; short l, r; - uint32_t v; + uint32_t v = spu.XALastVal; if(spu.XAPlay != spu.XAFeed || spu.XARepeat > 0) { if(spu.XAPlay == spu.XAFeed) spu.XARepeat--; - v = spu.XALastVal; for(ns = 0; ns < ns_to*2; ) { if(spu.XAPlay != spu.XAFeed) v=*spu.XAPlay++; @@ -67,21 +66,28 @@ INLINE void MixXA(int *SSumLR, int ns_to, int decode_pos) } spu.XALastVal = v; } + // occasionally CDDAFeed underflows by a few samples due to poor timing, + // hence this 'ns_to < 8' + else if(spu.CDDAPlay != spu.CDDAFeed || ns_to < 8) + { + for(ns = 0; ns < ns_to*2; ) + { + if(spu.CDDAPlay != spu.CDDAFeed) v=*spu.CDDAPlay++; + if(spu.CDDAPlay == spu.CDDAEnd) spu.CDDAPlay=spu.CDDAStart; - for(ns = 0; ns < ns_to * 2 && spu.CDDAPlay!=spu.CDDAFeed && (spu.CDDAPlay!=spu.CDDAEnd-1||spu.CDDAFeed!=spu.CDDAStart);) - { - v=*spu.CDDAPlay++; - if(spu.CDDAPlay==spu.CDDAEnd) spu.CDDAPlay=spu.CDDAStart; - - l = ((int)(short)v * spu.iLeftXAVol) >> 15; - r = ((int)(short)(v >> 16) * spu.iLeftXAVol) >> 15; - SSumLR[ns++] += l; - SSumLR[ns++] += r; + l = ((int)(short)v * spu.iLeftXAVol) >> 15; + r = ((int)(short)(v >> 16) * spu.iLeftXAVol) >> 15; + SSumLR[ns++] += l; + SSumLR[ns++] += r; - spu.spuMem[cursor] = v; - spu.spuMem[cursor + 0x400/2] = v >> 16; - cursor = (cursor + 1) & 0x1ff; - } + spu.spuMem[cursor] = v; + spu.spuMem[cursor + 0x400/2] = v >> 16; + cursor = (cursor + 1) & 0x1ff; + } + spu.XALastVal = v; + } + else + spu.XALastVal = 0; } //////////////////////////////////////////////////////////////////////// From 761fdd0a4ef77be843ff7a8fb10d33a4401965d5 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 23 Jan 2022 01:24:36 +0200 Subject: [PATCH 105/597] drc: align size passed to mprotect Maybe this makes svcControlProcessMemory() not do it's work there? It doesn't seem to be failing though. --- libpcsxcore/new_dynarec/new_dynarec.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 8fe3f93df..bcbc00952 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -6789,7 +6789,7 @@ static void new_dynarec_test(void) SysPrintf("linkage_arm* miscompilation/breakage detected.\n"); } - SysPrintf("testing if we can run recompiled code...\n"); + SysPrintf("testing if we can run recompiled code @%p...\n", out); ((volatile u_int *)out)[0]++; // make cache dirty for (i = 0; i < ARRAY_SIZE(ret); i++) { @@ -6874,7 +6874,8 @@ void new_dynarec_init(void) #else #ifndef NO_WRITE_EXEC // not all systems allow execute in data segment by default - if (mprotect(ndrc, sizeof(ndrc->translation_cache) + sizeof(ndrc->tramp.ops), + // size must be 4K aligned for 3DS? + if (mprotect(ndrc, sizeof(*ndrc), PROT_READ | PROT_WRITE | PROT_EXEC) != 0) SysPrintf("mprotect() failed: %s\n", strerror(errno)); #endif From 9c67c98f2350736b964e616743daf7aa10d022ec Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 23 Jan 2022 16:14:39 +0200 Subject: [PATCH 106/597] drc: possibly the missing piece for Vita Lost during merge of dynarec fixes. _newlib_vm_size_user might be the critical bit that overrides a weak symbol in bootstrap/vita/sbrk.c --- libpcsxcore/new_dynarec/new_dynarec.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index bcbc00952..e937b762e 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -29,10 +29,6 @@ #ifdef _3DS #include <3ds_utils.h> #endif -#ifdef VITA -#include -static int sceBlock; -#endif #include "new_dynarec_config.h" #include "../psxhle.h" @@ -354,6 +350,14 @@ static void pass_args(int a0, int a1); static void emit_far_jump(const void *f); static void emit_far_call(const void *f); +#ifdef VITA +#include +static int sceBlock; +// note: this interacts with RetroArch's Vita bootstrap code: bootstrap/vita/sbrk.c +extern int getVMBlock(); +int _newlib_vm_size_user = sizeof(*ndrc); +#endif + static void mprotect_w_x(void *start, void *end, int is_x) { #ifdef NO_WRITE_EXEC @@ -6899,8 +6903,8 @@ void new_dynarec_cleanup(void) int n; #ifdef BASE_ADDR_DYNAMIC #ifdef VITA - sceKernelFreeMemBlock(sceBlock); - sceBlock = -1; + //sceKernelFreeMemBlock(sceBlock); + //sceBlock = -1; #else if (munmap(ndrc, sizeof(*ndrc)) < 0) SysPrintf("munmap() failed\n"); From 4527b1fdb51d9d7aa23112257a7446c35092b4fd Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 23 Jan 2022 22:57:06 +0200 Subject: [PATCH 107/597] always describe to get git hash in log messages --- Makefile | 4 ++-- jni/Android.mk | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 0db94f777..3dac458b7 100644 --- a/Makefile +++ b/Makefile @@ -266,7 +266,7 @@ libpcsxcore/gte_nf.o: libpcsxcore/gte.c $(CC) -c -o $@ $^ $(CFLAGS) -DFLAGLESS frontend/revision.h: FORCE - @(git describe || echo) | sed -e 's/.*/#define REV "\0"/' > $@_ + @(git describe --always || echo) | sed -e 's/.*/#define REV "\0"/' > $@_ @diff -q $@_ $@ > /dev/null 2>&1 || cp $@_ $@ @rm $@_ @@ -307,7 +307,7 @@ endif # ----------- release ----------- -VER ?= $(shell git describe HEAD) +VER ?= $(shell git describe --always HEAD) ifeq "$(PLATFORM)" "generic" OUT = pcsx_rearmed_$(VER) diff --git a/jni/Android.mk b/jni/Android.mk index 72c67385a..da000f3ea 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -1,5 +1,9 @@ LOCAL_PATH := $(call my-dir) +$(shell cd "$(LOCAL_PATH)" && ((git describe --always || echo) | sed -e 's/.*/#define REV "\0"/' > ../frontend/revision.h_)) +$(shell cd "$(LOCAL_PATH)" && (diff -q ../frontend/revision.h_ ../frontend/revision.h > /dev/null 2>&1 || cp ../frontend/revision.h_ ../frontend/revision.h)) +$(shell cd "$(LOCAL_PATH)" && (rm ../frontend/revision.h_)) + include $(CLEAR_VARS) APP_DIR := ../../src @@ -60,10 +64,6 @@ ifneq ($(TARGET_ARCH),arm) LOCAL_SRC_FILES += ../plugins/gpu_unai/gpulib_if.cpp endif -$(shell cd "$(LOCAL_PATH)" && ((git describe || echo) | sed -e 's/.*/#define REV "\0"/' > ../frontend/revision.h_)) -$(shell cd "$(LOCAL_PATH)" && (diff -q ../frontend/revision.h_ ../frontend/revision.h > /dev/null 2>&1 || cp ../frontend/revision.h_ ../frontend/revision.h)) -$(shell cd "$(LOCAL_PATH)" && (rm ../frontend/revision.h_)) - LOCAL_SRC_FILES += ../libpcsxcore/cdriso.c ../libpcsxcore/cdrom.c ../libpcsxcore/cheat.c ../libpcsxcore/debug.c \ ../libpcsxcore/decode_xa.c ../libpcsxcore/disr3000a.c ../libpcsxcore/mdec.c \ ../libpcsxcore/misc.c ../libpcsxcore/plugins.c ../libpcsxcore/ppf.c ../libpcsxcore/psxbios.c \ From 7f94b097581d5833921f6db86f18c7ef1203aa62 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 24 Jan 2022 01:49:45 +0200 Subject: [PATCH 108/597] drc: a bit more sophisticated f1 hack yes it sucks, I know... --- libpcsxcore/new_dynarec/emu_if.h | 1 + libpcsxcore/new_dynarec/linkage_arm.S | 2 +- libpcsxcore/new_dynarec/linkage_arm64.S | 2 +- libpcsxcore/new_dynarec/linkage_offsets.h | 4 ++-- libpcsxcore/new_dynarec/new_dynarec.c | 26 +++++++++++++++-------- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index 0ce9584ce..db11f7b25 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -76,6 +76,7 @@ void rcnt2_read_count_m0(u32 addr, u32, u32 cycles); void rcnt2_read_count_m1(u32 addr, u32, u32 cycles); extern unsigned int address; +extern unsigned int hack_addr; extern void *psxH_ptr; extern void *zeromem_ptr; extern void *scratch_buf_ptr; diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 637173152..d409aff5e 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -60,7 +60,7 @@ DRC_VAR(pending_exception, 4) DRC_VAR(stop, 4) DRC_VAR(branch_target, 4) DRC_VAR(address, 4) -@DRC_VAR(align0, 4) /* unused/alignment */ +DRC_VAR(hack_addr, 4) DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) /* psxRegs */ diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 5e9626f5e..b9ab726b2 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -52,7 +52,7 @@ DRC_VAR(pending_exception, 4) DRC_VAR(stop, 4) DRC_VAR(branch_target, 4) DRC_VAR(address, 4) -#DRC_VAR(align0, 16) /* unused/alignment */ +DRC_VAR(hack_addr, 4) DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) /* psxRegs */ diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h index e9bb3abdc..7ac2e6119 100644 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ b/libpcsxcore/new_dynarec/linkage_offsets.h @@ -8,8 +8,8 @@ #define LO_stop (LO_pending_exception + 4) #define LO_branch_target (LO_stop + 4) #define LO_address (LO_branch_target + 4) -#define LO_align0 (LO_address + 4) -#define LO_psxRegs (LO_align0 + 4) +#define LO_hack_addr (LO_address + 4) +#define LO_psxRegs (LO_hack_addr + 4) #define LO_reg (LO_psxRegs) #define LO_lo (LO_reg + 128) #define LO_hi (LO_lo + 4) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index e937b762e..0900736d3 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -224,7 +224,7 @@ static struct decoded_insn static void *copy; static int expirep; static u_int stop_after_jal; - static u_int f1_hack; // 0 - off, ~0 - capture address, else addr + static u_int f1_hack; int new_dynarec_hacks; int new_dynarec_hacks_pergame; @@ -6831,6 +6831,7 @@ void new_dynarec_clear_full(void) literalcount=0; stop_after_jal=0; inv_code_start=inv_code_end=~0; + hack_addr=0; f1_hack=0; // TLB for(n=0;n<4096;n++) ll_clear(jump_in+n); @@ -7046,11 +7047,11 @@ void new_dynarec_load_blocks(const void *save, int size) memcpy(&psxRegs.GPR, regs_save, sizeof(regs_save)); } -static void apply_hacks(void) +static int apply_hacks(void) { int i; if (HACK_ENABLED(NDHACK_NO_COMPAT_HACKS)) - return; + return 0; /* special hack(s) */ for (i = 0; i < slen - 4; i++) { @@ -7075,11 +7076,12 @@ static void apply_hacks(void) if (dops[i].itype == STORELR && dops[i].rs1 == 6 && dops[i-1].itype == STORELR && dops[i-1].rs1 == 6) { - SysPrintf("F1 hack from %08x\n", start); - if (f1_hack == 0) - f1_hack = ~0u; + SysPrintf("F1 hack from %08x, old dst %08x\n", start, hack_addr); + f1_hack = 1; + return 1; } } + return 0; } int new_recompile_block(u_int addr) @@ -7117,9 +7119,11 @@ int new_recompile_block(u_int addr) ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning); return 0; } - else if (f1_hack == ~0u || (f1_hack != 0 && start == f1_hack)) { + else if (f1_hack && hack_addr == 0) { void *beginning = start_block(); u_int page = get_page(start); + emit_movimm(start, 0); + emit_writeword(0, &hack_addr); emit_readword(&psxRegs.GPR.n.sp, 0); emit_readptr(&mem_rtab, 1); emit_shrimm(0, 12, 2); @@ -7135,7 +7139,6 @@ int new_recompile_block(u_int addr) ll_add_flags(jump_in + page, start, state_rflags, beginning); SysPrintf("F1 hack to %08x\n", start); - f1_hack = start; return 0; } @@ -7663,7 +7666,7 @@ int new_recompile_block(u_int addr) } assert(slen>0); - apply_hacks(); + int clear_hack_addr = apply_hacks(); /* Pass 2 - Register dependencies and branch targets */ @@ -9203,6 +9206,11 @@ int new_recompile_block(u_int addr) instr_addr[i] = out; assem_debug("<->\n"); drc_dbg_emit_do_cmp(i, ccadj[i]); + if (clear_hack_addr) { + emit_movimm(0, 0); + emit_writeword(0, &hack_addr); + clear_hack_addr = 0; + } // load regs if(regs[i].regmap_entry[HOST_CCREG]==CCREG&®s[i].regmap[HOST_CCREG]!=CCREG) From 66ea165fc20153e117c903c874637251e2122374 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 24 Jan 2022 20:49:04 +0200 Subject: [PATCH 109/597] drc: another hack to try to get Vita to work --- libpcsxcore/new_dynarec/assem_arm.c | 5 ++--- libpcsxcore/new_dynarec/new_dynarec.c | 19 ++++++++++++++----- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 381a54191..ef3219f40 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -101,8 +101,6 @@ const u_int invalidate_addr_reg[16] = { 0, 0}; -static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; - /* Linker */ static void set_jump_target(void *addr, void *target_) @@ -1642,7 +1640,8 @@ static void emit_extjump2(u_char *addr, u_int target, void *linker) emit_loadlp(target,0); emit_loadlp((u_int)addr,1); - assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<translation_cache <= addr && + addr < ndrc->translation_cache + sizeof(ndrc->translation_cache)); //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000)); //DEBUG > #ifdef DEBUG_CYCLE_COUNT diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 0900736d3..107a6304d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -75,9 +75,17 @@ #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 +#ifdef VITA +// apparently Vita has a 16MB limit, so either we cut tc in half, +// or use this hack (it's a hack because tc size was designed to be power-of-2) +#define TC_REDUCE_BYTES 4096 +#else +#define TC_REDUCE_BYTES 0 +#endif + struct ndrc_mem { - u_char translation_cache[1 << TARGET_SIZE_2]; + u_char translation_cache[(1 << TARGET_SIZE_2) - TC_REDUCE_BYTES]; struct { struct tramp_insns ops[2048 / sizeof(struct tramp_insns)]; @@ -6844,7 +6852,7 @@ void new_dynarec_clear_full(void) void new_dynarec_init(void) { - SysPrintf("Init new dynarec\n"); + SysPrintf("Init new dynarec, ndrc size %x\n", (int)sizeof(*ndrc)); #ifdef _3DS check_rosalina(); @@ -6852,11 +6860,11 @@ void new_dynarec_init(void) #ifdef BASE_ADDR_DYNAMIC #ifdef VITA sceBlock = getVMBlock(); //sceKernelAllocMemBlockForVM("code", sizeof(*ndrc)); - if (sceBlock < 0) - SysPrintf("sceKernelAllocMemBlockForVM failed\n"); + if (sceBlock <= 0) + SysPrintf("sceKernelAllocMemBlockForVM failed: %x\n", sceBlock); int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&ndrc); if (ret < 0) - SysPrintf("sceKernelGetMemBlockBase failed\n"); + SysPrintf("sceKernelGetMemBlockBase failed: %x\n", ret); sceKernelOpenVMDomain(); sceClibPrintf("translation_cache = 0x%08lx\n ", (long)ndrc->translation_cache); #elif defined(_MSC_VER) @@ -6904,6 +6912,7 @@ void new_dynarec_cleanup(void) int n; #ifdef BASE_ADDR_DYNAMIC #ifdef VITA + // sceBlock is managed by retroarch's bootstrap code //sceKernelFreeMemBlock(sceBlock); //sceBlock = -1; #else From 079ab0c6836e83bae50c55cad2baf733049f5136 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 26 Jan 2022 22:02:18 +0200 Subject: [PATCH 110/597] drc: don't cache psxRegs.interrupt cdrom code can now cancel interrupts, so must always use the global --- libpcsxcore/new_dynarec/emu_if.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 90c466059..f9ee64169 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -67,22 +67,18 @@ static irq_func * const irq_funcs[] = { /* local dupe of psxBranchTest, using event_cycles */ static void irq_test(void) { - u32 irqs = psxRegs.interrupt; u32 cycle = psxRegs.cycle; u32 irq, irq_bits; - // irq_funcs() may queue more irqs - psxRegs.interrupt = 0; - - for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) { + for (irq = 0, irq_bits = psxRegs.interrupt; irq_bits != 0; irq++, irq_bits >>= 1) { if (!(irq_bits & 1)) continue; if ((s32)(cycle - event_cycles[irq]) >= 0) { - irqs &= ~(1 << irq); + // note: irq_funcs() also modify psxRegs.interrupt + psxRegs.interrupt &= ~(1u << irq); irq_funcs[irq](); } } - psxRegs.interrupt |= irqs; if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) { psxException(0x400, 0); @@ -477,7 +473,7 @@ u32 irq_test_cycle; u32 handler_cycle; u32 last_io_addr; -static void dump_mem(const char *fname, void *mem, size_t size) +void dump_mem(const char *fname, void *mem, size_t size) { FILE *f1 = fopen(fname, "wb"); if (f1 == NULL) @@ -662,7 +658,7 @@ void do_insn_cmp(void) psxRegs.code = rregs.code; // don't care psxRegs.cycle += last_count; - //psxRegs.cycle = rregs.cycle; + //psxRegs.cycle = rregs.cycle; // needs reload in _cmp psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count //if (psxRegs.cycle == 166172) breakme(); @@ -719,9 +715,9 @@ void do_insn_cmp(void) for (i = 0; i < 8; i++) printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i], i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]); - printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle); - dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000); - dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000); + printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc, psxRegs.cycle, next_interupt); + //dump_mem("/tmp/psxram.dump", psxM, 0x200000); + //dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000); exit(1); ok: //psxRegs.cycle = rregs.cycle + 2; // sync timing From d1150cd66676ce43b8451c65818c2dc3e2f8a1d6 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 26 Jan 2022 22:09:22 +0200 Subject: [PATCH 111/597] drc: emulate break opcode sort-of-but-not-really used by "F1 2000" (SLUS01120, not "Formula One 2000"). --- libpcsxcore/new_dynarec/linkage_arm.S | 34 ++++++++++++++--------- libpcsxcore/new_dynarec/linkage_arm64.S | 33 ++++++++++++++--------- libpcsxcore/new_dynarec/new_dynarec.c | 36 ++++++++++++++++++------- 3 files changed, 69 insertions(+), 34 deletions(-) diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index d409aff5e..5538462f8 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -34,6 +34,7 @@ #define gen_interupt ESYM(gen_interupt) #define invalidate_addr ESYM(invalidate_addr) #define gteCheckStallRaw ESYM(gteCheckStallRaw) +#define psxException ESYM(psxException) #endif .bss @@ -497,19 +498,28 @@ FUNCTION(fp_exception_ds): .size fp_exception_ds, .-fp_exception_ds .align 2 +FUNCTION(jump_break_ds): + mov r0, #0x24 + mov r1, #1 + b call_psxException +FUNCTION(jump_break): + mov r0, #0x24 + mov r1, #0 + b call_psxException +FUNCTION(jump_syscall_ds): + mov r0, #0x20 + mov r1, #1 + b call_psxException FUNCTION(jump_syscall): - ldr r1, [fp, #LO_reg_cop0+48] /* Status */ - mov r3, #0x80000000 - str r0, [fp, #LO_reg_cop0+56] /* EPC */ - orr r1, #2 - mov r2, #0x20 - str r1, [fp, #LO_reg_cop0+48] /* Status */ - str r2, [fp, #LO_reg_cop0+52] /* Cause */ - add r0, r3, #0x80 - bl get_addr_ht - mov pc, r0 - .size jump_syscall, .-jump_syscall - .align 2 + mov r0, #0x20 + mov r1, #0 + +call_psxException: + ldr r3, [fp, #LO_last_count] + str r2, [fp, #LO_pcaddr] + add r10, r3, r10 + str r10, [fp, #LO_cycle] /* PCSX cycles */ + bl psxException /* note: psxException might do recursive recompiler call from it's HLE code, * so be ready for this */ diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index b9ab726b2..39e95a834 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -182,19 +182,28 @@ FUNCTION(fp_exception_ds): .size fp_exception_ds, .-fp_exception_ds .align 2 +FUNCTION(jump_break_ds): + mov w0, #0x24 + mov w1, #1 + b call_psxException +FUNCTION(jump_break): + mov w0, #0x24 + mov w1, #0 + b call_psxException +FUNCTION(jump_syscall_ds): + mov w0, #0x20 + mov w1, #1 + b call_psxException FUNCTION(jump_syscall): - ldr w1, [rFP, #LO_reg_cop0+48] /* Status */ - mov w3, #0x80000000 - str w0, [rFP, #LO_reg_cop0+56] /* EPC */ - orr w1, w1, #2 - mov w2, #0x20 - str w1, [rFP, #LO_reg_cop0+48] /* Status */ - str w2, [rFP, #LO_reg_cop0+52] /* Cause */ - add w0, w3, #0x80 - bl get_addr_ht - br x0 - .size jump_syscall, .-jump_syscall - .align 2 + mov w0, #0x20 + mov w1, #0 + +call_psxException: + ldr w3, [rFP, #LO_last_count] + str w2, [rFP, #LO_pcaddr] + add rCC, w3, rCC + str rCC, [rFP, #LO_cycle] /* PCSX cycles */ + bl psxException /* note: psxException might do recursive recompiler call from it's HLE code, * so be ready for this */ diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 107a6304d..2ca0f6044 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -49,6 +49,7 @@ //#define DISASM //#define ASSEM_PRINT +//#define REG_ALLOC_PRINT #ifdef ASSEM_PRINT #define assem_debug printf @@ -297,7 +298,7 @@ static struct decoded_insn //#define FLOAT 19 // Floating point unit //#define FCONV 20 // Convert integer to float //#define FCOMP 21 // Floating point compare (sets FSREG) -#define SYSCALL 22// SYSCALL +#define SYSCALL 22// SYSCALL,BREAK #define OTHER 23 // Other #define SPAN 24 // Branch/delay slot spans 2 pages #define NI 25 // Not implemented @@ -328,6 +329,10 @@ void verify_code_ds(); void cc_interrupt(); void fp_exception(); void fp_exception_ds(); +void jump_syscall (u_int u0, u_int u1, u_int pc); +void jump_syscall_ds(u_int u0, u_int u1, u_int pc); +void jump_break (u_int u0, u_int u1, u_int pc); +void jump_break_ds(u_int u0, u_int u1, u_int pc); void jump_to_new_pc(); void call_gteStall(); void new_dyna_leave(); @@ -926,6 +931,10 @@ static const struct { FUNCNAME(jump_handler_write32), FUNCNAME(invalidate_addr), FUNCNAME(jump_to_new_pc), + FUNCNAME(jump_break), + FUNCNAME(jump_break_ds), + FUNCNAME(jump_syscall), + FUNCNAME(jump_syscall_ds), FUNCNAME(call_gteStall), FUNCNAME(new_dyna_leave), FUNCNAME(pcsx_mtc0), @@ -3935,9 +3944,16 @@ static void call_c_cpu_handler(int i, const struct regstat *i_regs, int ccadj_, static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_) { - emit_movimm(0x20,0); // cause code - emit_movimm(0,1); // not in delay slot - call_c_cpu_handler(i, i_regs, ccadj_, start+i*4, psxException); + // 'break' tends to be littered around to catch things like + // division by 0 and is almost never executed, so don't emit much code here + void *func = (dops[i].opcode2 == 0x0C) + ? (is_delayslot ? jump_syscall_ds : jump_syscall) + : (is_delayslot ? jump_break_ds : jump_break); + signed char ccreg = get_reg(i_regs->regmap, CCREG); + assert(ccreg == HOST_CCREG); + emit_movimm(start + i*4, 2); // pc + emit_addimm(HOST_CCREG, ccadj_ + CLOCK_ADJUST(1), HOST_CCREG); + emit_far_jump(func); } static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_) @@ -7200,7 +7216,7 @@ int new_recompile_block(u_int addr) case 0x08: strcpy(insn[i],"JR"); type=RJUMP; break; case 0x09: strcpy(insn[i],"JALR"); type=RJUMP; break; case 0x0C: strcpy(insn[i],"SYSCALL"); type=SYSCALL; break; - case 0x0D: strcpy(insn[i],"BREAK"); type=OTHER; break; + case 0x0D: strcpy(insn[i],"BREAK"); type=SYSCALL; break; case 0x0F: strcpy(insn[i],"SYNC"); type=OTHER; break; case 0x10: strcpy(insn[i],"MFHI"); type=MOV; break; case 0x11: strcpy(insn[i],"MTHI"); type=MOV; break; @@ -7646,9 +7662,9 @@ int new_recompile_block(u_int addr) // Don't get too close to the limit if(i>MAXBLOCK/2) done=1; } - if(dops[i].itype==SYSCALL&&stop_after_jal) done=1; - if(dops[i].itype==HLECALL||dops[i].itype==INTCALL) done=2; - if(done==2) { + if (dops[i].itype == SYSCALL || dops[i].itype == HLECALL || dops[i].itype == INTCALL) + done = stop_after_jal ? 1 : 2; + if (done == 2) { // Does the block continue due to a branch? for(j=i-1;j>=0;j--) { @@ -9021,7 +9037,7 @@ int new_recompile_block(u_int addr) dops[slen-1].bt=1; // Mark as a branch target so instruction can restart after exception } -#ifdef DISASM +#ifdef REG_ALLOC_PRINT /* Debug/disassembly */ for(i=0;i Date: Thu, 27 Jan 2022 23:54:42 +0200 Subject: [PATCH 112/597] drc: init some variables, adjust comments no proof that is actually needed, but it felt like some path could pick up something uninitialized --- libpcsxcore/new_dynarec/assem_arm.c | 4 +-- libpcsxcore/new_dynarec/emu_if.h | 12 ++++----- libpcsxcore/new_dynarec/new_dynarec.c | 37 +++++++++++++++++---------- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index ef3219f40..32ef9794a 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -545,7 +545,7 @@ static void emit_loadreg(int r, int hr) } u_int offset = (u_char *)addr - (u_char *)&dynarec_local; assert(offset<4096); - assem_debug("ldr %s,fp+%d\n",regname[hr],offset); + assem_debug("ldr %s,fp+%d # r%d\n",regname[hr],offset,r); output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset); } } @@ -566,7 +566,7 @@ static void emit_storereg(int r, int hr) } u_int offset = addr-(u_int)&dynarec_local; assert(offset<4096); - assem_debug("str %s,fp+%d\n",regname[hr],offset); + assem_debug("str %s,fp+%d # r%d\n",regname[hr],offset,r); output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset); } diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index db11f7b25..5a3a5e8d3 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -68,12 +68,12 @@ void jump_handler_write32(u32 addr, u32 data, u32 cycles, u32 *table); void jump_handler_write_h(u32 addr, u32 data, u32 cycles, void *handler); void jump_handle_swl(u32 addr, u32 data, u32 cycles); void jump_handle_swr(u32 addr, u32 data, u32 cycles); -void rcnt0_read_count_m0(u32 addr, u32, u32 cycles); -void rcnt0_read_count_m1(u32 addr, u32, u32 cycles); -void rcnt1_read_count_m0(u32 addr, u32, u32 cycles); -void rcnt1_read_count_m1(u32 addr, u32, u32 cycles); -void rcnt2_read_count_m0(u32 addr, u32, u32 cycles); -void rcnt2_read_count_m1(u32 addr, u32, u32 cycles); +u32 rcnt0_read_count_m0(u32 addr, u32, u32 cycles); +u32 rcnt0_read_count_m1(u32 addr, u32, u32 cycles); +u32 rcnt1_read_count_m0(u32 addr, u32, u32 cycles); +u32 rcnt1_read_count_m1(u32 addr, u32, u32 cycles); +u32 rcnt2_read_count_m0(u32 addr, u32, u32 cycles); +u32 rcnt2_read_count_m1(u32 addr, u32, u32 cycles); extern unsigned int address; extern unsigned int hack_addr; diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 2ca0f6044..de444483f 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -119,9 +119,13 @@ enum stub_type { INVCODE_STUB = 14, }; +// regmap_pre[i] - regs before [i] insn starts; dirty things here that +// don't match .regmap will be written back +// [i].regmap_entry - regs that must be set up if someone jumps here +// [i].regmap - regs [i] insn will read/(over)write struct regstat { - signed char regmap_entry[HOST_REGS]; // pre-insn + loop preloaded regs? + signed char regmap_entry[HOST_REGS]; signed char regmap[HOST_REGS]; uint64_t wasdirty; uint64_t dirty; @@ -207,7 +211,7 @@ static struct decoded_insn static u_int ba[MAXBLOCK]; static uint64_t unneeded_reg[MAXBLOCK]; static uint64_t branch_unneeded_reg[MAXBLOCK]; - // pre-instruction [i], excluding loop-preload regs? + // see 'struct regstat' for a description static signed char regmap_pre[MAXBLOCK][HOST_REGS]; // contains 'real' consts at [i] insn, but may differ from what's actually // loaded in host reg as 'final' value is always loaded, see get_final_value() @@ -599,10 +603,9 @@ void *get_addr_ht(u_int vaddr) return get_addr(vaddr); } -void clear_all_regs(signed char regmap[]) +static void clear_all_regs(signed char regmap[]) { - int hr; - for (hr=0;hr=0); if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { From a550c61c2bad6706ecd46003cdb7b43760d02b03 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 28 Jan 2022 00:02:09 +0200 Subject: [PATCH 113/597] drc: clear dirty flag when deallocating a reg In later stages it may place special regs like AGEN there, and things like AGEN being marked dirty seemed wrong, although probably harmless as there is no code to writeback AGEN. --- libpcsxcore/new_dynarec/new_dynarec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index de444483f..1b2479b8d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -8508,6 +8508,8 @@ int new_recompile_block(u_int addr) { regs[i].regmap[hr]=-1; regs[i].isconst&=~(1< Date: Fri, 28 Jan 2022 01:09:44 +0200 Subject: [PATCH 114/597] drc: add seemingly missing double-alloc check seems like it was possible to alloc the same reg on 2 host regs edit: found after all the debugging: mupen64plus/mupen64plus-core@8f4cc2bdc93c41801077abda3197a3cb3b89cbb7 --- libpcsxcore/new_dynarec/new_dynarec.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 1b2479b8d..8ae158965 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -6144,8 +6144,21 @@ static void pagespan_ds() load_regs_bt(regs[0].regmap,regs[0].dirty,start+4); } +static void check_regmap(signed char *regmap) +{ +#ifndef NDEBUG + int i,j; + for (i = 0; i < HOST_REGS; i++) { + if (regmap[i] < 0) + continue; + for (j = i + 1; j < HOST_REGS; j++) + assert(regmap[i] != regmap[j]); + } +#endif +} + // Basic liveness analysis for MIPS registers -void unneeded_registers(int istart,int iend,int r) +static void unneeded_registers(int istart,int iend,int r) { int i; uint64_t u,gte_u,b,gte_b; @@ -8650,11 +8663,8 @@ int new_recompile_block(u_int addr) //printf("Hit %x -> %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r); int k; if(regs[i].regmap[hr]==-1&&branch_regs[i].regmap[hr]==-1) { + if(get_reg(regs[i].regmap,f_regmap[hr])>=0) break; if(get_reg(regs[i+2].regmap,f_regmap[hr])>=0) break; - if(r>63) { - if(get_reg(regs[i].regmap,r&63)<0) break; - if(get_reg(branch_regs[i].regmap,r&63)<0) break; - } k=i; while(k>1&®s[k-1].regmap[hr]==-1) { if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) { @@ -8673,7 +8683,6 @@ int new_recompile_block(u_int addr) if(k>2&&(dops[k-3].itype==UJUMP||dops[k-3].itype==RJUMP)&&dops[k-3].rt1==31) { break; } - assert(r < 64); k--; } if(regs[k-1].regmap[hr]==f_regmap[hr]&®map_pre[k][hr]==f_regmap[hr]) { @@ -9217,6 +9226,9 @@ int new_recompile_block(u_int addr) } for(i=0;i Date: Sun, 30 Jan 2022 01:37:45 +0200 Subject: [PATCH 115/597] drc: some more debug logging --- libpcsxcore/new_dynarec/new_dynarec.c | 34 ++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 8ae158965..432f9da0a 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -123,6 +123,7 @@ enum stub_type { // don't match .regmap will be written back // [i].regmap_entry - regs that must be set up if someone jumps here // [i].regmap - regs [i] insn will read/(over)write +// branch_regs[i].* - same as above but for branches, takes delay slot into account struct regstat { signed char regmap_entry[HOST_REGS]; @@ -3952,8 +3953,7 @@ static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_) void *func = (dops[i].opcode2 == 0x0C) ? (is_delayslot ? jump_syscall_ds : jump_syscall) : (is_delayslot ? jump_break_ds : jump_break); - signed char ccreg = get_reg(i_regs->regmap, CCREG); - assert(ccreg == HOST_CCREG); + assert(get_reg(i_regs->regmap, CCREG) == HOST_CCREG); emit_movimm(start + i*4, 2); // pc emit_addimm(HOST_CCREG, ccadj_ + CLOCK_ADJUST(1), HOST_CCREG); emit_far_jump(func); @@ -5618,7 +5618,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) int cc; int match; match=match_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); - assem_debug("smatch=%d\n",match); + assem_debug("smatch=%d ooo=%d\n", match, dops[i].ooo); int s1l; int unconditional=0,nevertaken=0; int invert=0; @@ -6728,6 +6728,24 @@ void clean_registers(int istart,int iend,int wr) } #ifdef DISASM +#include +void print_regmap(const char *name, const signed char *regmap) +{ + char buf[5]; + int i, l; + fputs(name, stdout); + for (i = 0; i < HOST_REGS; i++) { + l = 0; + if (regmap[i] >= 0) + l = snprintf(buf, sizeof(buf), "$%d", regmap[i]); + for (; l < 3; l++) + buf[l] = ' '; + buf[l] = 0; + printf(" r%d=%s", i, buf); + } + fputs("\n", stdout); +} + /* disassembly */ void disassemble_inst(int i) { @@ -6813,6 +6831,16 @@ void disassemble_inst(int i) //printf (" %s %8x\n",insn[i],source[i]); printf (" %x: %s\n",start+i*4,insn[i]); } + return; + printf("D: %"PRIu64" WD: %"PRIu64" U: %"PRIu64"\n", + regs[i].dirty, regs[i].wasdirty, unneeded_reg[i]); + print_regmap("pre: ", regmap_pre[i]); + print_regmap("entry: ", regs[i].regmap_entry); + print_regmap("map: ", regs[i].regmap); + if (dops[i].is_jump) { + print_regmap("bentry:", branch_regs[i].regmap_entry); + print_regmap("bmap: ", branch_regs[i].regmap); + } } #else static void disassemble_inst(int i) {} From 5a18ce2e9a7dac901f6cfb5d0b42c58f23c53b29 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 30 Jan 2022 01:39:40 +0200 Subject: [PATCH 116/597] drc: add apparently missing ROREG loading sjump_assemble had different loading for taken/not taken cases for whatever reason, and I added ROREG loading according to INVCP, which was (maybe?) erroneously missing for 'not taken' case. Fixes CTR crash shortly after starting driving. --- libpcsxcore/new_dynarec/new_dynarec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 432f9da0a..f9046bda9 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -5855,7 +5855,9 @@ static void sjump_assemble(int i, const struct regstat *i_regs) wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,ds_unneeded); load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2); address_generation(i+1,&branch_regs[i],0); - load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG); + if (ram_offset) + load_regs(regs[i].regmap,branch_regs[i].regmap,ROREG,ROREG); + load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP); ds_assemble(i+1,&branch_regs[i]); cc=get_reg(branch_regs[i].regmap,CCREG); if (cc == -1) { From 7d7672a58c82341268c6f5d856c558e768aa3974 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 30 Jan 2022 17:34:42 +0200 Subject: [PATCH 117/597] rm the old debugger stuff, like in libretro fork --- Makefile | 7 ++++--- frontend/main.c | 4 ++-- libpcsxcore/psxcounters.c | 3 ++- libpcsxcore/psxinterpreter.c | 3 ++- libpcsxcore/psxmem.c | 3 ++- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 3dac458b7..7616cac55 100644 --- a/Makefile +++ b/Makefile @@ -39,13 +39,14 @@ CFLAGS += -DPCNT endif # core -OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cheat.o libpcsxcore/database.o libpcsxcore/debug.o \ - libpcsxcore/decode_xa.o libpcsxcore/disr3000a.o libpcsxcore/mdec.o \ +OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cheat.o libpcsxcore/database.o \ + libpcsxcore/decode_xa.o libpcsxcore/mdec.o \ libpcsxcore/misc.o libpcsxcore/plugins.o libpcsxcore/ppf.o libpcsxcore/psxbios.o \ libpcsxcore/psxcommon.o libpcsxcore/psxcounters.o libpcsxcore/psxdma.o libpcsxcore/psxhle.o \ libpcsxcore/psxhw.o libpcsxcore/psxinterpreter.o libpcsxcore/psxmem.o libpcsxcore/r3000a.o \ - libpcsxcore/sio.o libpcsxcore/socket.o libpcsxcore/spu.o + libpcsxcore/sio.o libpcsxcore/spu.o OBJS += libpcsxcore/gte.o libpcsxcore/gte_nf.o libpcsxcore/gte_divider.o +#OBJS += libpcsxcore/debug.o libpcsxcore/socket.o libpcsxcore/disr3000a.o ifeq "$(ARCH)" "arm" OBJS += libpcsxcore/gte_arm.o endif diff --git a/frontend/main.c b/frontend/main.c index 4631618e5..234364504 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -43,8 +43,8 @@ static void check_memcards(void); #endif // don't include debug.h - it breaks ARM build (R1 redefined) -void StartDebugger(); -void StopDebugger(); +static void StartDebugger() {} +static void StopDebugger() {} int ready_to_go, g_emu_want_quit, g_emu_resetting; unsigned long gpuDisp; diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index b2cc07b2f..ff0efbced 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -23,7 +23,8 @@ #include "psxcounters.h" #include "gpu.h" -#include "debug.h" +//#include "debug.h" +#define DebugVSync() /******************************************************************************/ diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index f7898e9a9..19a5fc4ec 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -25,9 +25,10 @@ #include "r3000a.h" #include "gte.h" #include "psxhle.h" -#include "debug.h" #include "psxinterpreter.h" #include +//#include "debug.h" +#define ProcessDebug() static int branch = 0; static int branch2 = 0; diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 04aeec27b..46cee0cab 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -27,7 +27,8 @@ #include "psxmem_map.h" #include "r3000a.h" #include "psxhw.h" -#include "debug.h" +//#include "debug.h" +#define DebugCheckBP(...) #include "memmap.h" From b4ab351d15a94065d14877b0976b4d9a7056f7ac Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 30 Jan 2022 18:40:03 +0200 Subject: [PATCH 118/597] drc: don't abort on game crash Was not the best user experience, with this the user can load another game at least. --- libpcsxcore/new_dynarec/emu_if.h | 2 -- libpcsxcore/new_dynarec/linkage_arm.S | 14 +++----------- libpcsxcore/new_dynarec/new_dynarec.c | 20 +++++++++++--------- 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index 5a3a5e8d3..7fa0a171a 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -18,8 +18,6 @@ extern int reg_cop0[]; #define Cause psxRegs.CP0.n.Cause #define EPC psxRegs.CP0.n.EPC #define BadVAddr psxRegs.CP0.n.BadVAddr -#define Context psxRegs.CP0.n.Context -#define EntryHi psxRegs.CP0.n.EntryHi #define Count psxRegs.cycle // psxRegs.CP0.n.Count /* COP2/GTE */ diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 5538462f8..8d9074f4d 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -264,7 +264,7 @@ FUNCTION(dyna_linker): beq dyna_linker /* pagefault */ mov r1, r0 - mov r2, #8 + mov r2, #(4<<2) /* Address error (fetch) */ .size dyna_linker, .-dyna_linker FUNCTION(exec_pagefault): @@ -272,21 +272,13 @@ FUNCTION(exec_pagefault): /* r1 = fault address */ /* r2 = cause */ ldr r3, [fp, #LO_reg_cop0+48] /* Status */ - mvn r6, #0xF000000F - ldr r4, [fp, #LO_reg_cop0+16] /* Context */ - bic r6, r6, #0x0F800000 str r0, [fp, #LO_reg_cop0+56] /* EPC */ orr r3, r3, #2 str r1, [fp, #LO_reg_cop0+32] /* BadVAddr */ - bic r4, r4, r6 str r3, [fp, #LO_reg_cop0+48] /* Status */ - and r5, r6, r1, lsr #9 str r2, [fp, #LO_reg_cop0+52] /* Cause */ - and r1, r1, r6, lsl #9 - str r1, [fp, #LO_reg_cop0+40] /* EntryHi */ - orr r4, r4, r5 - str r4, [fp, #LO_reg_cop0+16] /* Context */ mov r0, #0x80000000 + orr r0, r0, #0x80 bl get_addr_ht mov pc, r0 .size exec_pagefault, .-exec_pagefault @@ -440,7 +432,7 @@ FUNCTION(cc_interrupt): and r2, r2, r10, lsr #17 add r3, fp, #LO_restore_candidate str r10, [fp, #LO_cycle] /* PCSX cycles */ -@@ str r10, [fp, #LO_reg_cop0+36] /* Count */ +@@ str r10, [fp, #LO_reg_cop0+36] /* Count - not on PSX */ ldr r4, [r2, r3] mov r10, lr tst r4, r4 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index f9046bda9..cc6c094de 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -585,14 +585,12 @@ void noinline *get_addr(u_int vaddr) //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr); int r=new_recompile_block(vaddr); if(r==0) return get_addr(vaddr); - // Execute in unmapped page, generate pagefault execption + // generate an address error Status|=2; - Cause=(vaddr<<31)|0x8; + Cause=(vaddr<<31)|(4<<2); EPC=(vaddr&1)?vaddr-5:vaddr; BadVAddr=(vaddr&~1); - Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0); - EntryHi=BadVAddr&0xFFFFE000; - return get_addr_ht(0x80000000); + return get_addr_ht(0x80000080); } // Look up address in hash table first void *get_addr_ht(u_int vaddr) @@ -7218,8 +7216,12 @@ int new_recompile_block(u_int addr) source = get_source_start(start, &pagelimit); if (source == NULL) { - SysPrintf("Compile at bogus memory address: %08x\n", addr); - abort(); + if (addr != hack_addr) { + SysPrintf("Compile at bogus memory address: %08x\n", addr); + hack_addr = addr; + } + //abort(); + return -1; } /* Pass 1: disassemble */ @@ -7234,7 +7236,7 @@ int new_recompile_block(u_int addr) /* Pass 10: garbage collection / free memory */ int j; - int done=0; + int done = 0, ni_count = 0; unsigned int type,op,op2; //printf("addr = %x source = %x %x\n", addr,source,source[0]); @@ -7724,7 +7726,7 @@ int new_recompile_block(u_int addr) assert(start+i*4 8 || dops[i].opcode == 0x11)) { done=stop_after_jal=1; SysPrintf("Disabled speculative precompilation\n"); } From b7f5c059b7f60b521424d47a5117c3bdc20f8551 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 31 Jan 2022 02:07:25 +0200 Subject: [PATCH 119/597] gpu_neon: reserved bpp is 16bpp, not 4bpp --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 4 ++-- plugins/gpu_neon/psx_gpu/psx_gpu_4x.c | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index e113f064f..a5e7aa181 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -2885,7 +2885,7 @@ blend_blocks_builder(textured, unblended, on); render_blocks_switch_block_texture_mode(4bpp), \ render_blocks_switch_block_texture_mode(8bpp), \ render_blocks_switch_block_texture_mode(16bpp), \ - render_blocks_switch_block_texture_mode(4bpp) \ + render_blocks_switch_block_texture_mode(16bpp) \ render_block_handler_struct render_triangle_block_handlers[] = @@ -4251,7 +4251,7 @@ void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, render_sprite_blocks_switch_block_texture_mode(4bpp), \ render_sprite_blocks_switch_block_texture_mode(8bpp), \ render_sprite_blocks_switch_block_texture_mode(16bpp), \ - render_sprite_blocks_switch_block_texture_mode(4bpp) \ + render_sprite_blocks_switch_block_texture_mode(16bpp) \ render_block_handler_struct render_sprite_block_handlers[] = diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c index 83c6680f4..85e972c5c 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c @@ -291,7 +291,7 @@ static void setup_sprite_untextured_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, render_sprite_blocks_switch_block_texture_mode_4x(4bpp), \ render_sprite_blocks_switch_block_texture_mode_4x(8bpp), \ render_sprite_blocks_switch_block_texture_mode_4x(16bpp), \ - render_sprite_blocks_switch_block_texture_mode_4x(4bpp) \ + render_sprite_blocks_switch_block_texture_mode_4x(16bpp) \ render_block_handler_struct render_sprite_block_handlers_4x[] = diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index ffa9b9a0f..3818c95d2 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -45,7 +45,6 @@ void update_texture_ptr(psx_gpu_struct *psx_gpu) switch((psx_gpu->render_state_base >> 8) & 0x3) { - default: case TEXTURE_MODE_4BPP: texture_base = psx_gpu->texture_4bpp_cache[psx_gpu->current_texture_page]; @@ -75,6 +74,7 @@ void update_texture_ptr(psx_gpu_struct *psx_gpu) texture_ptr += (psx_gpu->texture_window_y >> 4) << 12; break; + default: case TEXTURE_MODE_16BPP: texture_base = (u8 *)(psx_gpu->vram_ptr); texture_base += (psx_gpu->current_texture_page & 0xF) * 128; From 718a9e586b1e50d6af813a85fa0b493d5eca1f77 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 1 Feb 2022 01:50:12 +0200 Subject: [PATCH 120/597] gpu_neon: fix apparent missing msb setting in blend_blocks_textured_add_fourth The source bit is taken, which also enables semi transparency, so if we did blending the bit must be set. --- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 7c820d273..c0199a08a 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -3935,7 +3935,7 @@ blend_blocks_add_untextured_builder(on) #define blend_blocks_subtract_combine_textured() \ vbif.u16 blend_pixels, pixels, blend_mask \ -#define blend_blocks_subtract_set_stb_textured() \ +#define blend_blocks_subtract_set_stp_textured() \ vorr.u16 blend_pixels, #0x8000 \ #define blend_blocks_subtract_msb_mask_textured() \ @@ -3945,7 +3945,7 @@ blend_blocks_add_untextured_builder(on) #define blend_blocks_subtract_combine_untextured() \ -#define blend_blocks_subtract_set_stb_untextured() \ +#define blend_blocks_subtract_set_stp_untextured() \ vorr.u16 blend_pixels, blend_pixels, msb_mask \ #define blend_blocks_subtract_msb_mask_untextured() \ @@ -4010,7 +4010,7 @@ function(blend_blocks_##texturing##_subtract_##mask_evaluate) \ vld1.u32 { pixels_next }, [pixel_ptr, :128], c_64; \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ vand.u16 pixels_rb, pixels_next, d128_0x7C1F; \ - blend_blocks_subtract_set_stb_##texturing(); \ + blend_blocks_subtract_set_stp_##texturing(); \ vand.u16 pixels_g, pixels_next, d128_0x03E0; \ blend_blocks_subtract_combine_##texturing(); \ blend_blocks_subtract_set_blend_mask_##texturing(); \ @@ -4038,7 +4038,7 @@ function(blend_blocks_##texturing##_subtract_##mask_evaluate) \ \ blend_blocks_subtract_msb_mask_##texturing(); \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ - blend_blocks_subtract_set_stb_##texturing(); \ + blend_blocks_subtract_set_stp_##texturing(); \ blend_blocks_subtract_combine_##texturing(); \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ @@ -4110,6 +4110,7 @@ function(blend_blocks_textured_add_fourth_##mask_evaluate) \ ldr fb_ptr_next, [pixel_ptr, #28]; \ \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ + vorr.u16 blend_pixels, #0x8000; /* stp */ \ vbif.u16 blend_pixels, pixels, blend_mask; \ \ vld1.u32 { pixels }, [pixel_ptr, :128], c_64; \ @@ -4145,8 +4146,9 @@ function(blend_blocks_textured_add_fourth_##mask_evaluate) \ \ 1: \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ - vorr.u16 blend_pixels, blend_pixels, msb_mask; \ + vorr.u16 blend_pixels, #0x8000; /* stp */ \ vbif.u16 blend_pixels, pixels, blend_mask; \ + vorr.u16 blend_pixels, blend_pixels, msb_mask; \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ From 6c192edd2b1d024d1270aa78cb1d3e6b2207d46d Mon Sep 17 00:00:00 2001 From: gameblabla Date: Sat, 5 Feb 2022 21:43:34 +0000 Subject: [PATCH 121/597] Fix silly typo mistake for CDROM pause/resume fix for DOA. (#242) My bad guys. --- libpcsxcore/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 4eea46bfc..191a7373d 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -775,7 +775,7 @@ void cdrInterrupt() { * Mednafen's timing don't work for Gundam Battle Assault 2 in PAL/50hz mode, * seems to be timing sensitive as it can depend on the CPU's clock speed. * */ - if (cdr.DriveState != DRIVESTATE_STANDBY) + if (cdr.DriveState == DRIVESTATE_STANDBY) { delay = 7000; } From 50ae51487697da0d2f9c93295f89d2f10694b6d8 Mon Sep 17 00:00:00 2001 From: gameblabla Date: Mon, 7 Feb 2022 19:17:34 +0000 Subject: [PATCH 122/597] clear Index0 data FIFO flag (#241) Merge PCSX Redux fix. This is what he had to say about it : "So, this commit works around/fixes two issues with loading unirom. There's a fix for logging invalid commands which should be pretty straight forward. The other change is around the FIFO flag. Not really experienced with debugging/verifying this sort of thing so not feeling really confident the change is "right", though every game I've tried so far still seem to work as expected. There's still something going on with not having an iso mounted that I haven't quite nailed down. Even having the iso mounted with "lid open" gives a bootable result. Feel like I'm going in circles a bit for something that might be an easy fix. Unirom is technically usable via ISO, but the unirom exe freezes if no iso is loaded" Co-authored-by: johnbaumann <76951440+johnbaumann@users.noreply.github.com> --- libpcsxcore/cdrom.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 191a7373d..ea973081d 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1281,8 +1281,8 @@ unsigned char cdrRead0(void) { if (cdr.OCUP) cdr.Ctrl |= 0x40; -// else -// cdr.Ctrl &= ~0x40; + else + cdr.Ctrl &= ~0x40; // What means the 0x10 and the 0x08 bits? I only saw it used by the bios cdr.Ctrl |= 0x18; @@ -1378,6 +1378,7 @@ unsigned char cdrRead2(void) { unsigned char ret; if (cdr.Readed == 0) { + cdr.OCUP = 0; ret = 0; } else { ret = *pTransfer++; From 9de8a0c3587effdbd19584cbca3baf566d1d21bd Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 2 Feb 2022 23:40:48 +0200 Subject: [PATCH 123/597] drc: rm unneeded &63 masking 64bit leftover --- libpcsxcore/new_dynarec/assem_arm.c | 4 +- libpcsxcore/new_dynarec/assem_arm64.c | 4 +- libpcsxcore/new_dynarec/new_dynarec.c | 413 +++++++++++++------------- 3 files changed, 215 insertions(+), 206 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 32ef9794a..da32f5b78 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -1808,7 +1808,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, { int rs=get_reg(regmap,target); int rt=get_reg(regmap,target); - if(rs<0) rs=get_reg(regmap,-1); + if(rs<0) rs=get_reg_temp(regmap); assert(rs>=0); u_int is_dynamic; uintptr_t host_addr = 0; @@ -1962,7 +1962,7 @@ static void do_writestub(int n) static void inline_writestub(enum stub_type type, int i, u_int addr, const signed char regmap[], int target, int adj, u_int reglist) { - int rs=get_reg(regmap,-1); + int rs=get_reg_temp(regmap); int rt=get_reg(regmap,target); assert(rs>=0); assert(rt>=0); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 84e4fc6e8..1157aafe4 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1479,7 +1479,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, { int rs=get_reg(regmap,target); int rt=get_reg(regmap,target); - if(rs<0) rs=get_reg(regmap,-1); + if(rs<0) rs=get_reg_temp(regmap); assert(rs>=0); u_int is_dynamic=0; uintptr_t host_addr = 0; @@ -1633,7 +1633,7 @@ static void do_writestub(int n) static void inline_writestub(enum stub_type type, int i, u_int addr, const signed char regmap[], int target, int adj, u_int reglist) { - int rs = get_reg(regmap,-1); + int rs = get_reg_temp(regmap); int rt = get_reg(regmap,target); assert(rs >= 0); assert(rt >= 0); diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index cc6c094de..abb0d0761 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -607,10 +607,27 @@ static void clear_all_regs(signed char regmap[]) memset(regmap, -1, sizeof(regmap[0]) * HOST_REGS); } -static signed char get_reg(const signed char regmap[],int r) +static signed char get_reg(const signed char regmap[], signed char r) { int hr; - for (hr=0;hrregmap[hr]&63)==reg) { - cur->dirty|=1<regmap, reg); + if (hr >= 0) + cur->dirty |= 1<regmap[hr]==reg) { - cur->isconst|=1<regmap, reg); + if (hr >= 0) { + cur->isconst |= 1<regmap[hr]&63)==reg) { - cur->isconst&=~(1<regmap, reg); + if (hr >= 0) + cur->isconst &= ~(1<regmap[hr]&63)==reg) { - return (cur->isconst>>hr)&1; - } - } + if (reg < 0) return 0; + if (!reg) return 1; + hr = get_reg(cur->regmap, reg); + if (hr >= 0) + return (cur->isconst>>hr)&1; return 0; } -static uint32_t get_const(struct regstat *cur, signed char reg) +static uint32_t get_const(const struct regstat *cur, signed char reg) { int hr; - if(!reg) return 0; - for (hr=0;hrregmap[hr]==reg) { - return current_constmap[hr]; - } - } - SysPrintf("Unknown constant in r%d\n",reg); + if (!reg) return 0; + hr = get_reg(cur->regmap, reg); + if (hr >= 0) + return current_constmap[hr]; + + SysPrintf("Unknown constant in r%d\n", reg); abort(); } @@ -879,14 +888,14 @@ void alloc_all(struct regstat *cur,int i) for(hr=0;hrregmap[hr]&63)!=dops[i].rs1)&&((cur->regmap[hr]&63)!=dops[i].rs2)&& - ((cur->regmap[hr]&63)!=dops[i].rt1)&&((cur->regmap[hr]&63)!=dops[i].rt2)) + if((cur->regmap[hr]!=dops[i].rs1)&&(cur->regmap[hr]!=dops[i].rs2)&& + (cur->regmap[hr]!=dops[i].rt1)&&(cur->regmap[hr]!=dops[i].rt2)) { cur->regmap[hr]=-1; cur->dirty&=~(1<regmap[hr]&63)==0) + if(cur->regmap[hr]==0) { cur->regmap[hr]=-1; cur->dirty&=~(1<regmap[preferred_reg]&63]==j) { for(hr=0;hrregmap[hr]&63)==r) { + if(cur->regmap[hr]==r) { cur->regmap[hr]=-1; cur->dirty&=~(1<isconst&=~(1<>hr)&1) { assert(regmap[hr]<64); emit_storereg(r,hr); @@ -2148,7 +2157,7 @@ static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int for(hr=0;hr>(reg&63))&1) { + if(((~u)>>reg)&1) { if(reg>0) { if(((dirty_pre&~dirty)>>hr)&1) { if(reg>0&®<34) { @@ -2806,12 +2815,12 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) // could be FIFO, must perform the read // ||dummy read assem_debug("(forced read)\n"); - tl=get_reg(i_regs->regmap,-1); + tl=get_reg_temp(i_regs->regmap); assert(tl>=0); } if(offset||s<0||c) addr=tl; else addr=s; - //if(tl<0) tl=get_reg(i_regs->regmap,-1); + //if(tl<0) tl=get_reg_temp(i_regs->regmap); if(tl>=0) { //printf("load_assemble: c=%d\n",c); //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset); @@ -2942,7 +2951,7 @@ static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) u_int reglist=get_host_reglist(i_regs->regmap); tl=get_reg(i_regs->regmap,dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); - temp=get_reg(i_regs->regmap,-1); + temp=get_reg_temp(i_regs->regmap); temp2=get_reg(i_regs->regmap,FTEMP); addr=get_reg(i_regs->regmap,AGEN1+(i&1)); assert(addr<0); @@ -3028,7 +3037,7 @@ static void store_assemble(int i, const struct regstat *i_regs, int ccadj_) tl=get_reg(i_regs->regmap,dops[i].rs2); s=get_reg(i_regs->regmap,dops[i].rs1); temp=get_reg(i_regs->regmap,agr); - if(temp<0) temp=get_reg(i_regs->regmap,-1); + if(temp<0) temp=get_reg_temp(i_regs->regmap); offset=imm[i]; if(s>=0) { c=(i_regs->wasconst>>s)&1; @@ -3153,7 +3162,7 @@ static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) tl=get_reg(i_regs->regmap,dops[i].rs2); s=get_reg(i_regs->regmap,dops[i].rs1); temp=get_reg(i_regs->regmap,agr); - if(temp<0) temp=get_reg(i_regs->regmap,-1); + if(temp<0) temp=get_reg_temp(i_regs->regmap); offset=imm[i]; if(s>=0) { c=(i_regs->isconst>>s)&1; @@ -3582,7 +3591,7 @@ static void multdiv_do_stall(int i, const struct regstat *i_regs) { int j, known_cycles = 0; u_int reglist = get_host_reglist(i_regs->regmap); - int rtmp = get_reg(i_regs->regmap, -1); + int rtmp = get_reg_temp(i_regs->regmap); if (rtmp < 0) rtmp = reglist_find_free(reglist); if (HACK_ENABLED(NDHACK_NO_STALLS)) @@ -3731,7 +3740,7 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) // get the address if (dops[i].opcode==0x3a) { // SWC2 ar=get_reg(i_regs->regmap,agr); - if(ar<0) ar=get_reg(i_regs->regmap,-1); + if(ar<0) ar=get_reg_temp(i_regs->regmap); reglist|=1<>11) & 0x1f; - signed char temp = get_reg(i_regs->regmap, -1); + signed char temp = get_reg_temp(i_regs->regmap); if (!HACK_ENABLED(NDHACK_NO_STALLS)) { u_int reglist = reglist_exclude(get_host_reglist(i_regs->regmap), temp, -1); @@ -4216,7 +4225,7 @@ static void wb_invalidate(signed char pre[],signed char entry[],uint64_t dirty,u for(hr=0;hr=0&&(pre[hr]&63)=0&&pre[hr]=0) { emit_mov(hr,nr); @@ -4291,7 +4300,7 @@ void address_generation(int i, const struct regstat *i_regs, signed char entry[] int agr=AGEN1+(i&1); if(dops[i].itype==LOAD) { ra=get_reg(i_regs->regmap,dops[i].rt1); - if(ra<0) ra=get_reg(i_regs->regmap,-1); + if(ra<0) ra=get_reg_temp(i_regs->regmap); assert(ra>=0); } if(dops[i].itype==LOADLR) { @@ -4299,14 +4308,14 @@ void address_generation(int i, const struct regstat *i_regs, signed char entry[] } if(dops[i].itype==STORE||dops[i].itype==STORELR) { ra=get_reg(i_regs->regmap,agr); - if(ra<0) ra=get_reg(i_regs->regmap,-1); + if(ra<0) ra=get_reg_temp(i_regs->regmap); } if(dops[i].itype==C2LS) { if ((dops[i].opcode&0x3b)==0x31||(dops[i].opcode&0x3b)==0x32) // LWC1/LDC1/LWC2/LDC2 ra=get_reg(i_regs->regmap,FTEMP); else { // SWC1/SDC1/SWC2/SDC2 ra=get_reg(i_regs->regmap,agr); - if(ra<0) ra=get_reg(i_regs->regmap,-1); + if(ra<0) ra=get_reg_temp(i_regs->regmap); } } int rs=get_reg(i_regs->regmap,dops[i].rs1); @@ -4553,7 +4562,7 @@ static void load_all_regs(const signed char i_regmap[]) emit_zeroreg(hr); } else - if(i_regmap[hr]>0 && (i_regmap[hr]&63)0 && i_regmap[hr]0 && (i_regmap[hr]&63)0 && i_regmap[hr]regmap[hr]&63)!=dops[i].rs1 && - (i_regs->regmap[hr]&63)!=dops[i].rs2 ) + i_regs->regmap[hr]!=dops[i].rs1 && + i_regs->regmap[hr]!=dops[i].rs2 ) { addr=hr++;break; } @@ -5913,8 +5922,8 @@ static void pagespan_assemble(int i, const struct regstat *i_regs) while(hrregmap[hr]&63)!=dops[i].rs1 && - (i_regs->regmap[hr]&63)!=dops[i].rs2 ) + i_regs->regmap[hr]!=dops[i].rs1 && + i_regs->regmap[hr]!=dops[i].rs2 ) { alt=hr++;break; } @@ -5925,8 +5934,8 @@ static void pagespan_assemble(int i, const struct regstat *i_regs) while(hrregmap[hr]&63)!=dops[i].rs1 && - (i_regs->regmap[hr]&63)!=dops[i].rs2 ) + i_regs->regmap[hr]!=dops[i].rs1 && + i_regs->regmap[hr]!=dops[i].rs2 ) { ntaddr=hr;break; } @@ -6122,7 +6131,7 @@ static void pagespan_ds() } int btaddr=get_reg(regs[0].regmap,BTREG); if(btaddr<0) { - btaddr=get_reg(regs[0].regmap,-1); + btaddr=get_reg_temp(regs[0].regmap); emit_readword(&branch_target,btaddr); } assert(btaddr!=HOST_CCREG); @@ -6328,6 +6337,7 @@ void clean_registers(int istart,int iend,int wr) } for (i=iend;i>=istart;i--) { + __builtin_prefetch(regs[i-1].regmap); if(dops[i].is_jump) { if(ba[i]=(start+slen*4)) @@ -6341,18 +6351,18 @@ void clean_registers(int istart,int iend,int wr) // Merge in delay slot (will dirty) for(r=0;r33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<0 && (regmap_pre[i][r]&63)<34) { - temp_will_dirty|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<>(regmap_pre[i][r]&63))&1)<0 && regmap_pre[i][r]<34) { + temp_will_dirty|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<>regmap_pre[i][r])&1)<>2]&(1<=0) { - will_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(branch_regs[i].regmap[r]&63))&1)<>2]>>(branch_regs[i].regmap[r]&63))&1)<>2]>>branch_regs[i].regmap[r])&1)<>2]>>branch_regs[i].regmap[r])&1)<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<start+i*4) { // Disable recursion (for debugging) + //if(ba[i]>start+i*4) // Disable recursion (for debugging) for(r=0;r>2]&(1<=0) { - will_dirty_i&=((unneeded_reg[(ba[i]-start)>>2]>>(target_reg&63))&1)<>2]>>(target_reg&63))&1)<>2]>>target_reg)&1)<>2]>>target_reg)&1)<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<istart) { if (!dops[i].is_jump) { // Don't store a register immediately after writing it, // may prevent dual-issue. - if((regs[i].regmap[r]&63)==dops[i-1].rt1) wont_dirty_i|=1<0 && (regmap_pre[i][r]&63)<34) { - will_dirty_i|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<>(regmap_pre[i][r]&63))&1)<0 && regmap_pre[i][r]<34) { + will_dirty_i|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<>regmap_pre[i][r])&1)<>r)&1));*/ @@ -8105,7 +8113,7 @@ int new_recompile_block(u_int addr) if(r!=regmap_pre[i][hr]) { // TODO: delay slot (?) or=get_reg(regmap_pre[i],r); // Get old mapping for this register - if(or<0||(r&63)>=TEMPREG){ + if(or<0||r>=TEMPREG){ regs[i].regmap_entry[hr]=-1; } else @@ -8113,7 +8121,7 @@ int new_recompile_block(u_int addr) // Just move it to a different register regs[i].regmap_entry[hr]=r; // If it was dirty before, it's still dirty - if((regs[i].wasdirty>>or)&1) dirty_reg(¤t,r&63); + if((regs[i].wasdirty>>or)&1) dirty_reg(¤t,r); } } else @@ -8449,8 +8457,8 @@ int new_recompile_block(u_int addr) // Merge in delay slot for(hr=0;hr0&&!dops[i].bt&&((regs[i].wasdirty>>hr)&1)) { if((regmap_pre[i][hr]>0&&!((unneeded_reg[i]>>regmap_pre[i][hr])&1))) { - if(dops[i-1].rt1==(regmap_pre[i][hr]&63)) nr|=1<0&&!((unneeded_reg[i]>>regs[i].regmap_entry[hr])&1))) { - if(dops[i-1].rt1==(regs[i].regmap_entry[hr]&63)) nr|=1< %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r); if(r<34&&((unneeded_reg[j]>>r)&1)) break; assert(r < 64); - if(regs[j].regmap[hr]==f_regmap[hr]&&(f_regmap[hr]&63) %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r); int k; if(regs[i].regmap[hr]==-1&&branch_regs[i].regmap[hr]==-1) { @@ -8992,7 +9000,7 @@ int new_recompile_block(u_int addr) ||(dops[i+1].opcode&0x3b)==0x39||(dops[i+1].opcode&0x3b)==0x3a) { // SB/SH/SW/SD/SWC1/SDC1/SWC2/SDC2 if(get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { hr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1); - if(hr<0) hr=get_reg(regs[i+1].regmap,-1); + if(hr<0) hr=get_reg_temp(regs[i+1].regmap); else { regs[i+1].regmap[hr]=AGEN1+((i+1)&1); regs[i+1].isconst&=~(1<=0&®s[i].regmap[hr]<0) { int rs=get_reg(regs[i+1].regmap,dops[i+1].rs1); @@ -9258,6 +9266,7 @@ int new_recompile_block(u_int addr) } for(i=0;i Date: Thu, 3 Feb 2022 02:04:52 +0200 Subject: [PATCH 124/597] drc: use optimized get_reg arm32-only for now --- libpcsxcore/new_dynarec/linkage_arm.S | 40 +++++++++++++++++++++++++++ libpcsxcore/new_dynarec/new_dynarec.c | 9 ++++++ 2 files changed, 49 insertions(+) diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 8d9074f4d..1a16aa04d 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -840,4 +840,44 @@ FUNCTION(call_gteStall): add r10, r10, r0 bx lr +#ifdef HAVE_ARMV6 + +FUNCTION(get_reg): + ldr r12, [r0] + and r1, r1, #0xff + ldr r2, [r0, #4] + orr r1, r1, r1, lsl #8 + ldr r3, [r0, #8] + orr r1, r1, r1, lsl #16 @ searched char in every byte + ldrb r0, [r0, #12] @ last byte + eor r12, r12, r1 + eor r2, r2, r1 + eor r3, r3, r1 + cmp r0, r1, lsr #24 + mov r0, #12 + mvn r1, #0 @ r1=~0 + bxeq lr + orr r3, r3, #0xff000000 @ EXCLUDE_REG + uadd8 r0, r12, r1 @ add and set GE bits when not 0 (match) + mov r12, #0 + sel r0, r12, r1 @ 0 if no match, else ff in some byte + uadd8 r2, r2, r1 + sel r2, r12, r1 + uadd8 r3, r3, r1 + sel r3, r12, r1 + mov r12, #3 + clz r0, r0 @ 0, 8, 16, 24 or 32 + clz r2, r2 + clz r3, r3 + sub r0, r12, r0, lsr #3 @ 3, 2, 1, 0 or -1 + sub r2, r12, r2, lsr #3 + sub r3, r12, r3, lsr #3 + orr r2, r2, #4 + orr r3, r3, #8 + and r0, r0, r2 + and r0, r0, r3 + bx lr + +#endif /* HAVE_ARMV6 */ + @ vim:filetype=armasm diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index abb0d0761..0dea9a352 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -35,6 +35,7 @@ #include "../psxinterpreter.h" #include "../gte.h" #include "emu_if.h" // emulator interface +#include "arm_features.h" #define noinline __attribute__((noinline,noclone)) #ifndef ARRAY_SIZE @@ -607,6 +608,12 @@ static void clear_all_regs(signed char regmap[]) memset(regmap, -1, sizeof(regmap[0]) * HOST_REGS); } +#if defined(__arm__) && defined(HAVE_ARMV6) && HOST_REGS == 13 && EXCLUDE_REG == 11 + +extern signed char get_reg(const signed char regmap[], signed char r); + +#else + static signed char get_reg(const signed char regmap[], signed char r) { int hr; @@ -619,6 +626,8 @@ static signed char get_reg(const signed char regmap[], signed char r) return -1; } +#endif + static signed char get_reg_temp(const signed char regmap[]) { int hr; From 53dc27f6de389570312e2bae8a533230dc42ed1b Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Feb 2022 00:10:18 +0200 Subject: [PATCH 125/597] drc: try to make some passes not as slow --- libpcsxcore/new_dynarec/new_dynarec.c | 623 +++++++++++++------------- 1 file changed, 300 insertions(+), 323 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 0dea9a352..a8a750e4b 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -181,7 +181,7 @@ static struct decoded_insn u_char rs2; u_char rt1; u_char rt2; - u_char lt1; + u_char use_lt1:1; u_char bt:1; u_char ooo:1; u_char is_ds:1; @@ -200,7 +200,6 @@ static struct decoded_insn static struct ll_entry *jump_out[4096]; static u_int start; static u_int *source; - static char insn[MAXBLOCK][10]; static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs static uint64_t gte_rt[MAXBLOCK]; static uint64_t gte_unneeded[MAXBLOCK]; @@ -648,6 +647,36 @@ static signed char get_reg2(signed char regmap1[], const signed char regmap2[], return -1; } +// reverse reg map: mips -> host +#define RRMAP_SIZE 64 +static void make_rregs(const signed char regmap[], signed char rrmap[RRMAP_SIZE], + u_int *regs_can_change) +{ + u_int r, hr, hr_can_change = 0; + memset(rrmap, -1, RRMAP_SIZE); + for (hr = 0; hr < HOST_REGS; ) + { + r = regmap[hr]; + rrmap[r & (RRMAP_SIZE - 1)] = hr; + // only add mips $1-$31+$lo, others shifted out + hr_can_change |= (uint64_t)1 << (hr + ((r - 1) & 32)); + hr++; + if (hr == EXCLUDE_REG) + hr++; + } + hr_can_change |= 1u << (rrmap[33] & 31); + hr_can_change |= 1u << (rrmap[CCREG] & 31); + hr_can_change &= ~(1u << 31); + *regs_can_change = hr_can_change; +} + +// same as get_reg, but takes rrmap +static signed char get_rreg(signed char rrmap[RRMAP_SIZE], signed char r) +{ + assert(0 <= r && r < RRMAP_SIZE); + return rrmap[r]; +} + static int count_free_regs(const signed char regmap[]) { int count=0; @@ -1676,7 +1705,7 @@ static void shiftimm_alloc(struct regstat *current,int i) { if(dops[i].rt1) { if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); - else dops[i].lt1=dops[i].rs1; + else dops[i].use_lt1=!!dops[i].rs1; alloc_reg(current,i,dops[i].rt1); dirty_reg(current,dops[i].rt1); if(is_const(current,dops[i].rs1)) { @@ -1782,7 +1811,7 @@ static void alu_alloc(struct regstat *current,int i) static void imm16_alloc(struct regstat *current,int i) { if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); - else dops[i].lt1=dops[i].rs1; + else dops[i].use_lt1=!!dops[i].rs1; if(dops[i].rt1) alloc_reg(current,i,dops[i].rt1); if(dops[i].opcode==0x18||dops[i].opcode==0x19) { // DADDI/DADDIU assert(0); @@ -6331,7 +6360,7 @@ static void unneeded_registers(int istart,int iend,int r) // Write back dirty registers as soon as we will no longer modify them, // so that we don't end up with lots of writes at the branches. -void clean_registers(int istart,int iend,int wr) +static void clean_registers(int istart, int iend, int wr) { int i; int r; @@ -6346,80 +6375,63 @@ void clean_registers(int istart,int iend,int wr) } for (i=iend;i>=istart;i--) { + signed char rregmap_i[RRMAP_SIZE]; + u_int hr_candirty = 0; + assert(HOST_REGS < 32); + make_rregs(regs[i].regmap, rregmap_i, &hr_candirty); __builtin_prefetch(regs[i-1].regmap); if(dops[i].is_jump) { + signed char branch_rregmap_i[RRMAP_SIZE]; + u_int branch_hr_candirty = 0; + make_rregs(branch_regs[i].regmap, branch_rregmap_i, &branch_hr_candirty); if(ba[i]=(start+slen*4)) { // Branch out of this block, flush all regs + will_dirty_i = 0; + will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31); + will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31); + will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31); + will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31); + will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31); + will_dirty_i &= branch_hr_candirty; if (dops[i].is_ujump) { // Unconditional branch - will_dirty_i=0; - wont_dirty_i=0; + wont_dirty_i = 0; // Merge in delay slot (will dirty) - for(r=0;r33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<istart) { - if (!dops[i].is_jump) - { - // Don't store a register immediately after writing it, - // may prevent dual-issue. - if(regs[i].regmap[r]==dops[i-1].rt1) wont_dirty_i|=1< istart && !dops[i].is_jump) { + // Don't store a register immediately after writing it, + // may prevent dual-issue. + wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i-1].rt1) & 31); + wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i-1].rt2) & 31); } // Save it will_dirty[i]=will_dirty_i; @@ -6715,7 +6684,7 @@ void clean_registers(int istart,int iend,int wr) regs[i].wasdirty|=will_dirty_i&(1<=0&&(nr=get_reg(regs[i].regmap,regmap_pre[i][r]))>=0) { + else if(regmap_pre[i][r]>=0&&(nr=get_rreg(rregmap_i,regmap_pre[i][r]))>=0) { // Register moved to a different register will_dirty_i&=~(1< +static char insn[MAXBLOCK][10]; + +#define set_mnemonic(i_, n_) \ + strcpy(insn[i_], n_) + void print_regmap(const char *name, const signed char *regmap) { char buf[5]; @@ -6860,6 +6834,7 @@ void disassemble_inst(int i) } } #else +#define set_mnemonic(i_, n_) static void disassemble_inst(int i) {} #endif // DISASM @@ -7268,189 +7243,191 @@ int new_recompile_block(u_int addr) dops[i].opcode=op=source[i]>>26; switch(op) { - case 0x00: strcpy(insn[i],"special"); type=NI; + case 0x00: set_mnemonic(i, "special"); type=NI; op2=source[i]&0x3f; switch(op2) { - case 0x00: strcpy(insn[i],"SLL"); type=SHIFTIMM; break; - case 0x02: strcpy(insn[i],"SRL"); type=SHIFTIMM; break; - case 0x03: strcpy(insn[i],"SRA"); type=SHIFTIMM; break; - case 0x04: strcpy(insn[i],"SLLV"); type=SHIFT; break; - case 0x06: strcpy(insn[i],"SRLV"); type=SHIFT; break; - case 0x07: strcpy(insn[i],"SRAV"); type=SHIFT; break; - case 0x08: strcpy(insn[i],"JR"); type=RJUMP; break; - case 0x09: strcpy(insn[i],"JALR"); type=RJUMP; break; - case 0x0C: strcpy(insn[i],"SYSCALL"); type=SYSCALL; break; - case 0x0D: strcpy(insn[i],"BREAK"); type=SYSCALL; break; - case 0x0F: strcpy(insn[i],"SYNC"); type=OTHER; break; - case 0x10: strcpy(insn[i],"MFHI"); type=MOV; break; - case 0x11: strcpy(insn[i],"MTHI"); type=MOV; break; - case 0x12: strcpy(insn[i],"MFLO"); type=MOV; break; - case 0x13: strcpy(insn[i],"MTLO"); type=MOV; break; - case 0x18: strcpy(insn[i],"MULT"); type=MULTDIV; break; - case 0x19: strcpy(insn[i],"MULTU"); type=MULTDIV; break; - case 0x1A: strcpy(insn[i],"DIV"); type=MULTDIV; break; - case 0x1B: strcpy(insn[i],"DIVU"); type=MULTDIV; break; - case 0x20: strcpy(insn[i],"ADD"); type=ALU; break; - case 0x21: strcpy(insn[i],"ADDU"); type=ALU; break; - case 0x22: strcpy(insn[i],"SUB"); type=ALU; break; - case 0x23: strcpy(insn[i],"SUBU"); type=ALU; break; - case 0x24: strcpy(insn[i],"AND"); type=ALU; break; - case 0x25: strcpy(insn[i],"OR"); type=ALU; break; - case 0x26: strcpy(insn[i],"XOR"); type=ALU; break; - case 0x27: strcpy(insn[i],"NOR"); type=ALU; break; - case 0x2A: strcpy(insn[i],"SLT"); type=ALU; break; - case 0x2B: strcpy(insn[i],"SLTU"); type=ALU; break; - case 0x30: strcpy(insn[i],"TGE"); type=NI; break; - case 0x31: strcpy(insn[i],"TGEU"); type=NI; break; - case 0x32: strcpy(insn[i],"TLT"); type=NI; break; - case 0x33: strcpy(insn[i],"TLTU"); type=NI; break; - case 0x34: strcpy(insn[i],"TEQ"); type=NI; break; - case 0x36: strcpy(insn[i],"TNE"); type=NI; break; + case 0x00: set_mnemonic(i, "SLL"); type=SHIFTIMM; break; + case 0x02: set_mnemonic(i, "SRL"); type=SHIFTIMM; break; + case 0x03: set_mnemonic(i, "SRA"); type=SHIFTIMM; break; + case 0x04: set_mnemonic(i, "SLLV"); type=SHIFT; break; + case 0x06: set_mnemonic(i, "SRLV"); type=SHIFT; break; + case 0x07: set_mnemonic(i, "SRAV"); type=SHIFT; break; + case 0x08: set_mnemonic(i, "JR"); type=RJUMP; break; + case 0x09: set_mnemonic(i, "JALR"); type=RJUMP; break; + case 0x0C: set_mnemonic(i, "SYSCALL"); type=SYSCALL; break; + case 0x0D: set_mnemonic(i, "BREAK"); type=SYSCALL; break; + case 0x0F: set_mnemonic(i, "SYNC"); type=OTHER; break; + case 0x10: set_mnemonic(i, "MFHI"); type=MOV; break; + case 0x11: set_mnemonic(i, "MTHI"); type=MOV; break; + case 0x12: set_mnemonic(i, "MFLO"); type=MOV; break; + case 0x13: set_mnemonic(i, "MTLO"); type=MOV; break; + case 0x18: set_mnemonic(i, "MULT"); type=MULTDIV; break; + case 0x19: set_mnemonic(i, "MULTU"); type=MULTDIV; break; + case 0x1A: set_mnemonic(i, "DIV"); type=MULTDIV; break; + case 0x1B: set_mnemonic(i, "DIVU"); type=MULTDIV; break; + case 0x20: set_mnemonic(i, "ADD"); type=ALU; break; + case 0x21: set_mnemonic(i, "ADDU"); type=ALU; break; + case 0x22: set_mnemonic(i, "SUB"); type=ALU; break; + case 0x23: set_mnemonic(i, "SUBU"); type=ALU; break; + case 0x24: set_mnemonic(i, "AND"); type=ALU; break; + case 0x25: set_mnemonic(i, "OR"); type=ALU; break; + case 0x26: set_mnemonic(i, "XOR"); type=ALU; break; + case 0x27: set_mnemonic(i, "NOR"); type=ALU; break; + case 0x2A: set_mnemonic(i, "SLT"); type=ALU; break; + case 0x2B: set_mnemonic(i, "SLTU"); type=ALU; break; + case 0x30: set_mnemonic(i, "TGE"); type=NI; break; + case 0x31: set_mnemonic(i, "TGEU"); type=NI; break; + case 0x32: set_mnemonic(i, "TLT"); type=NI; break; + case 0x33: set_mnemonic(i, "TLTU"); type=NI; break; + case 0x34: set_mnemonic(i, "TEQ"); type=NI; break; + case 0x36: set_mnemonic(i, "TNE"); type=NI; break; #if 0 - case 0x14: strcpy(insn[i],"DSLLV"); type=SHIFT; break; - case 0x16: strcpy(insn[i],"DSRLV"); type=SHIFT; break; - case 0x17: strcpy(insn[i],"DSRAV"); type=SHIFT; break; - case 0x1C: strcpy(insn[i],"DMULT"); type=MULTDIV; break; - case 0x1D: strcpy(insn[i],"DMULTU"); type=MULTDIV; break; - case 0x1E: strcpy(insn[i],"DDIV"); type=MULTDIV; break; - case 0x1F: strcpy(insn[i],"DDIVU"); type=MULTDIV; break; - case 0x2C: strcpy(insn[i],"DADD"); type=ALU; break; - case 0x2D: strcpy(insn[i],"DADDU"); type=ALU; break; - case 0x2E: strcpy(insn[i],"DSUB"); type=ALU; break; - case 0x2F: strcpy(insn[i],"DSUBU"); type=ALU; break; - case 0x38: strcpy(insn[i],"DSLL"); type=SHIFTIMM; break; - case 0x3A: strcpy(insn[i],"DSRL"); type=SHIFTIMM; break; - case 0x3B: strcpy(insn[i],"DSRA"); type=SHIFTIMM; break; - case 0x3C: strcpy(insn[i],"DSLL32"); type=SHIFTIMM; break; - case 0x3E: strcpy(insn[i],"DSRL32"); type=SHIFTIMM; break; - case 0x3F: strcpy(insn[i],"DSRA32"); type=SHIFTIMM; break; + case 0x14: set_mnemonic(i, "DSLLV"); type=SHIFT; break; + case 0x16: set_mnemonic(i, "DSRLV"); type=SHIFT; break; + case 0x17: set_mnemonic(i, "DSRAV"); type=SHIFT; break; + case 0x1C: set_mnemonic(i, "DMULT"); type=MULTDIV; break; + case 0x1D: set_mnemonic(i, "DMULTU"); type=MULTDIV; break; + case 0x1E: set_mnemonic(i, "DDIV"); type=MULTDIV; break; + case 0x1F: set_mnemonic(i, "DDIVU"); type=MULTDIV; break; + case 0x2C: set_mnemonic(i, "DADD"); type=ALU; break; + case 0x2D: set_mnemonic(i, "DADDU"); type=ALU; break; + case 0x2E: set_mnemonic(i, "DSUB"); type=ALU; break; + case 0x2F: set_mnemonic(i, "DSUBU"); type=ALU; break; + case 0x38: set_mnemonic(i, "DSLL"); type=SHIFTIMM; break; + case 0x3A: set_mnemonic(i, "DSRL"); type=SHIFTIMM; break; + case 0x3B: set_mnemonic(i, "DSRA"); type=SHIFTIMM; break; + case 0x3C: set_mnemonic(i, "DSLL32"); type=SHIFTIMM; break; + case 0x3E: set_mnemonic(i, "DSRL32"); type=SHIFTIMM; break; + case 0x3F: set_mnemonic(i, "DSRA32"); type=SHIFTIMM; break; #endif } break; - case 0x01: strcpy(insn[i],"regimm"); type=NI; + case 0x01: set_mnemonic(i, "regimm"); type=NI; op2=(source[i]>>16)&0x1f; switch(op2) { - case 0x00: strcpy(insn[i],"BLTZ"); type=SJUMP; break; - case 0x01: strcpy(insn[i],"BGEZ"); type=SJUMP; break; - //case 0x02: strcpy(insn[i],"BLTZL"); type=SJUMP; break; - //case 0x03: strcpy(insn[i],"BGEZL"); type=SJUMP; break; - //case 0x08: strcpy(insn[i],"TGEI"); type=NI; break; - //case 0x09: strcpy(insn[i],"TGEIU"); type=NI; break; - //case 0x0A: strcpy(insn[i],"TLTI"); type=NI; break; - //case 0x0B: strcpy(insn[i],"TLTIU"); type=NI; break; - //case 0x0C: strcpy(insn[i],"TEQI"); type=NI; break; - //case 0x0E: strcpy(insn[i],"TNEI"); type=NI; break; - case 0x10: strcpy(insn[i],"BLTZAL"); type=SJUMP; break; - case 0x11: strcpy(insn[i],"BGEZAL"); type=SJUMP; break; - //case 0x12: strcpy(insn[i],"BLTZALL"); type=SJUMP; break; - //case 0x13: strcpy(insn[i],"BGEZALL"); type=SJUMP; break; + case 0x00: set_mnemonic(i, "BLTZ"); type=SJUMP; break; + case 0x01: set_mnemonic(i, "BGEZ"); type=SJUMP; break; + //case 0x02: set_mnemonic(i, "BLTZL"); type=SJUMP; break; + //case 0x03: set_mnemonic(i, "BGEZL"); type=SJUMP; break; + //case 0x08: set_mnemonic(i, "TGEI"); type=NI; break; + //case 0x09: set_mnemonic(i, "TGEIU"); type=NI; break; + //case 0x0A: set_mnemonic(i, "TLTI"); type=NI; break; + //case 0x0B: set_mnemonic(i, "TLTIU"); type=NI; break; + //case 0x0C: set_mnemonic(i, "TEQI"); type=NI; break; + //case 0x0E: set_mnemonic(i, "TNEI"); type=NI; break; + case 0x10: set_mnemonic(i, "BLTZAL"); type=SJUMP; break; + case 0x11: set_mnemonic(i, "BGEZAL"); type=SJUMP; break; + //case 0x12: set_mnemonic(i, "BLTZALL"); type=SJUMP; break; + //case 0x13: set_mnemonic(i, "BGEZALL"); type=SJUMP; break; } break; - case 0x02: strcpy(insn[i],"J"); type=UJUMP; break; - case 0x03: strcpy(insn[i],"JAL"); type=UJUMP; break; - case 0x04: strcpy(insn[i],"BEQ"); type=CJUMP; break; - case 0x05: strcpy(insn[i],"BNE"); type=CJUMP; break; - case 0x06: strcpy(insn[i],"BLEZ"); type=CJUMP; break; - case 0x07: strcpy(insn[i],"BGTZ"); type=CJUMP; break; - case 0x08: strcpy(insn[i],"ADDI"); type=IMM16; break; - case 0x09: strcpy(insn[i],"ADDIU"); type=IMM16; break; - case 0x0A: strcpy(insn[i],"SLTI"); type=IMM16; break; - case 0x0B: strcpy(insn[i],"SLTIU"); type=IMM16; break; - case 0x0C: strcpy(insn[i],"ANDI"); type=IMM16; break; - case 0x0D: strcpy(insn[i],"ORI"); type=IMM16; break; - case 0x0E: strcpy(insn[i],"XORI"); type=IMM16; break; - case 0x0F: strcpy(insn[i],"LUI"); type=IMM16; break; - case 0x10: strcpy(insn[i],"cop0"); type=NI; + case 0x02: set_mnemonic(i, "J"); type=UJUMP; break; + case 0x03: set_mnemonic(i, "JAL"); type=UJUMP; break; + case 0x04: set_mnemonic(i, "BEQ"); type=CJUMP; break; + case 0x05: set_mnemonic(i, "BNE"); type=CJUMP; break; + case 0x06: set_mnemonic(i, "BLEZ"); type=CJUMP; break; + case 0x07: set_mnemonic(i, "BGTZ"); type=CJUMP; break; + case 0x08: set_mnemonic(i, "ADDI"); type=IMM16; break; + case 0x09: set_mnemonic(i, "ADDIU"); type=IMM16; break; + case 0x0A: set_mnemonic(i, "SLTI"); type=IMM16; break; + case 0x0B: set_mnemonic(i, "SLTIU"); type=IMM16; break; + case 0x0C: set_mnemonic(i, "ANDI"); type=IMM16; break; + case 0x0D: set_mnemonic(i, "ORI"); type=IMM16; break; + case 0x0E: set_mnemonic(i, "XORI"); type=IMM16; break; + case 0x0F: set_mnemonic(i, "LUI"); type=IMM16; break; + case 0x10: set_mnemonic(i, "cop0"); type=NI; op2=(source[i]>>21)&0x1f; switch(op2) { - case 0x00: strcpy(insn[i],"MFC0"); type=COP0; break; - case 0x02: strcpy(insn[i],"CFC0"); type=COP0; break; - case 0x04: strcpy(insn[i],"MTC0"); type=COP0; break; - case 0x06: strcpy(insn[i],"CTC0"); type=COP0; break; - case 0x10: strcpy(insn[i],"RFE"); type=COP0; break; + case 0x00: set_mnemonic(i, "MFC0"); type=COP0; break; + case 0x02: set_mnemonic(i, "CFC0"); type=COP0; break; + case 0x04: set_mnemonic(i, "MTC0"); type=COP0; break; + case 0x06: set_mnemonic(i, "CTC0"); type=COP0; break; + case 0x10: set_mnemonic(i, "RFE"); type=COP0; break; } break; - case 0x11: strcpy(insn[i],"cop1"); type=COP1; + case 0x11: set_mnemonic(i, "cop1"); type=COP1; op2=(source[i]>>21)&0x1f; break; #if 0 - case 0x14: strcpy(insn[i],"BEQL"); type=CJUMP; break; - case 0x15: strcpy(insn[i],"BNEL"); type=CJUMP; break; - case 0x16: strcpy(insn[i],"BLEZL"); type=CJUMP; break; - case 0x17: strcpy(insn[i],"BGTZL"); type=CJUMP; break; - case 0x18: strcpy(insn[i],"DADDI"); type=IMM16; break; - case 0x19: strcpy(insn[i],"DADDIU"); type=IMM16; break; - case 0x1A: strcpy(insn[i],"LDL"); type=LOADLR; break; - case 0x1B: strcpy(insn[i],"LDR"); type=LOADLR; break; + case 0x14: set_mnemonic(i, "BEQL"); type=CJUMP; break; + case 0x15: set_mnemonic(i, "BNEL"); type=CJUMP; break; + case 0x16: set_mnemonic(i, "BLEZL"); type=CJUMP; break; + case 0x17: set_mnemonic(i, "BGTZL"); type=CJUMP; break; + case 0x18: set_mnemonic(i, "DADDI"); type=IMM16; break; + case 0x19: set_mnemonic(i, "DADDIU"); type=IMM16; break; + case 0x1A: set_mnemonic(i, "LDL"); type=LOADLR; break; + case 0x1B: set_mnemonic(i, "LDR"); type=LOADLR; break; #endif - case 0x20: strcpy(insn[i],"LB"); type=LOAD; break; - case 0x21: strcpy(insn[i],"LH"); type=LOAD; break; - case 0x22: strcpy(insn[i],"LWL"); type=LOADLR; break; - case 0x23: strcpy(insn[i],"LW"); type=LOAD; break; - case 0x24: strcpy(insn[i],"LBU"); type=LOAD; break; - case 0x25: strcpy(insn[i],"LHU"); type=LOAD; break; - case 0x26: strcpy(insn[i],"LWR"); type=LOADLR; break; + case 0x20: set_mnemonic(i, "LB"); type=LOAD; break; + case 0x21: set_mnemonic(i, "LH"); type=LOAD; break; + case 0x22: set_mnemonic(i, "LWL"); type=LOADLR; break; + case 0x23: set_mnemonic(i, "LW"); type=LOAD; break; + case 0x24: set_mnemonic(i, "LBU"); type=LOAD; break; + case 0x25: set_mnemonic(i, "LHU"); type=LOAD; break; + case 0x26: set_mnemonic(i, "LWR"); type=LOADLR; break; #if 0 - case 0x27: strcpy(insn[i],"LWU"); type=LOAD; break; + case 0x27: set_mnemonic(i, "LWU"); type=LOAD; break; #endif - case 0x28: strcpy(insn[i],"SB"); type=STORE; break; - case 0x29: strcpy(insn[i],"SH"); type=STORE; break; - case 0x2A: strcpy(insn[i],"SWL"); type=STORELR; break; - case 0x2B: strcpy(insn[i],"SW"); type=STORE; break; + case 0x28: set_mnemonic(i, "SB"); type=STORE; break; + case 0x29: set_mnemonic(i, "SH"); type=STORE; break; + case 0x2A: set_mnemonic(i, "SWL"); type=STORELR; break; + case 0x2B: set_mnemonic(i, "SW"); type=STORE; break; #if 0 - case 0x2C: strcpy(insn[i],"SDL"); type=STORELR; break; - case 0x2D: strcpy(insn[i],"SDR"); type=STORELR; break; + case 0x2C: set_mnemonic(i, "SDL"); type=STORELR; break; + case 0x2D: set_mnemonic(i, "SDR"); type=STORELR; break; #endif - case 0x2E: strcpy(insn[i],"SWR"); type=STORELR; break; - case 0x2F: strcpy(insn[i],"CACHE"); type=NOP; break; - case 0x30: strcpy(insn[i],"LL"); type=NI; break; - case 0x31: strcpy(insn[i],"LWC1"); type=C1LS; break; + case 0x2E: set_mnemonic(i, "SWR"); type=STORELR; break; + case 0x2F: set_mnemonic(i, "CACHE"); type=NOP; break; + case 0x30: set_mnemonic(i, "LL"); type=NI; break; + case 0x31: set_mnemonic(i, "LWC1"); type=C1LS; break; #if 0 - case 0x34: strcpy(insn[i],"LLD"); type=NI; break; - case 0x35: strcpy(insn[i],"LDC1"); type=C1LS; break; - case 0x37: strcpy(insn[i],"LD"); type=LOAD; break; + case 0x34: set_mnemonic(i, "LLD"); type=NI; break; + case 0x35: set_mnemonic(i, "LDC1"); type=C1LS; break; + case 0x37: set_mnemonic(i, "LD"); type=LOAD; break; #endif - case 0x38: strcpy(insn[i],"SC"); type=NI; break; - case 0x39: strcpy(insn[i],"SWC1"); type=C1LS; break; + case 0x38: set_mnemonic(i, "SC"); type=NI; break; + case 0x39: set_mnemonic(i, "SWC1"); type=C1LS; break; #if 0 - case 0x3C: strcpy(insn[i],"SCD"); type=NI; break; - case 0x3D: strcpy(insn[i],"SDC1"); type=C1LS; break; - case 0x3F: strcpy(insn[i],"SD"); type=STORE; break; + case 0x3C: set_mnemonic(i, "SCD"); type=NI; break; + case 0x3D: set_mnemonic(i, "SDC1"); type=C1LS; break; + case 0x3F: set_mnemonic(i, "SD"); type=STORE; break; #endif - case 0x12: strcpy(insn[i],"COP2"); type=NI; + case 0x12: set_mnemonic(i, "COP2"); type=NI; op2=(source[i]>>21)&0x1f; //if (op2 & 0x10) if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns if (gte_handlers[source[i]&0x3f]!=NULL) { +#ifdef DISASM if (gte_regnames[source[i]&0x3f]!=NULL) strcpy(insn[i],gte_regnames[source[i]&0x3f]); else snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f); +#endif type=C2OP; } } else switch(op2) { - case 0x00: strcpy(insn[i],"MFC2"); type=COP2; break; - case 0x02: strcpy(insn[i],"CFC2"); type=COP2; break; - case 0x04: strcpy(insn[i],"MTC2"); type=COP2; break; - case 0x06: strcpy(insn[i],"CTC2"); type=COP2; break; + case 0x00: set_mnemonic(i, "MFC2"); type=COP2; break; + case 0x02: set_mnemonic(i, "CFC2"); type=COP2; break; + case 0x04: set_mnemonic(i, "MTC2"); type=COP2; break; + case 0x06: set_mnemonic(i, "CTC2"); type=COP2; break; } break; - case 0x32: strcpy(insn[i],"LWC2"); type=C2LS; break; - case 0x3A: strcpy(insn[i],"SWC2"); type=C2LS; break; - case 0x3B: strcpy(insn[i],"HLECALL"); type=HLECALL; break; - default: strcpy(insn[i],"???"); type=NI; + case 0x32: set_mnemonic(i, "LWC2"); type=C2LS; break; + case 0x3A: set_mnemonic(i, "SWC2"); type=C2LS; break; + case 0x3B: set_mnemonic(i, "HLECALL"); type=HLECALL; break; + default: set_mnemonic(i, "???"); type=NI; SysPrintf("NI %08x @%08x (%08x)\n", source[i], addr + i*4, addr); break; } dops[i].itype=type; dops[i].opcode2=op2; /* Get registers/immediates */ - dops[i].lt1=0; + dops[i].use_lt1=0; gte_rs[i]=gte_rt[i]=0; switch(type) { case LOAD: @@ -8988,7 +8965,7 @@ int new_recompile_block(u_int addr) } } // Load source into target register - if(dops[i+1].lt1&&get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { + if(dops[i+1].use_lt1&&get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { if((hr=get_reg(regs[i+1].regmap,dops[i+1].rt1))>=0) { if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) From 4149788d40b6d55eb0d612f7068d2904ad3e5aa3 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Feb 2022 22:38:54 +0200 Subject: [PATCH 126/597] drc: put passes in their own functions mostly for profiling --- libpcsxcore/new_dynarec/new_dynarec.c | 1959 ++++++++++++------------- 1 file changed, 928 insertions(+), 1031 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index a8a750e4b..9913b1160 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -50,7 +50,6 @@ //#define DISASM //#define ASSEM_PRINT -//#define REG_ALLOC_PRINT #ifdef ASSEM_PRINT #define assem_debug printf @@ -340,6 +339,8 @@ void jump_break (u_int u0, u_int u1, u_int pc); void jump_break_ds(u_int u0, u_int u1, u_int pc); void jump_to_new_pc(); void call_gteStall(); +void clean_blocks(u_int page); +void add_jump_out(u_int vaddr, void *src); void new_dyna_leave(); // Needed by assembler @@ -745,7 +746,7 @@ static uint32_t get_const(const struct regstat *cur, signed char reg) // Least soon needed registers // Look at the next ten instructions and see which registers // will be used. Try not to reallocate these. -void lsn(u_char hsn[], int i, int *preferred_reg) +static void lsn(u_char hsn[], int i, int *preferred_reg) { int j; int b=-1; @@ -833,7 +834,7 @@ void lsn(u_char hsn[], int i, int *preferred_reg) } // We only want to allocate registers if we're going to use them again soon -int needed_again(int r, int i) +static int needed_again(int r, int i) { int j; int b=-1; @@ -878,7 +879,7 @@ int needed_again(int r, int i) // Try to match register allocations at the end of a loop with those // at the beginning -int loop_reg(int i, int r, int hr) +static int loop_reg(int i, int r, int hr) { int j,k; for(j=0;j<9;j++) @@ -920,7 +921,7 @@ int loop_reg(int i, int r, int hr) // Allocate every register, preserving source/target regs -void alloc_all(struct regstat *cur,int i) +static void alloc_all(struct regstat *cur,int i) { int hr; @@ -985,6 +986,7 @@ static const struct { FUNCNAME(jump_syscall), FUNCNAME(jump_syscall_ds), FUNCNAME(call_gteStall), + FUNCNAME(clean_blocks), FUNCNAME(new_dyna_leave), FUNCNAME(pcsx_mtc0), FUNCNAME(pcsx_mtc0_ds), @@ -1064,7 +1066,7 @@ static void emit_far_call(const void *f) } // Add virtual address mapping to linked list -void ll_add(struct ll_entry **head,int vaddr,void *addr) +static void ll_add(struct ll_entry **head,int vaddr,void *addr) { struct ll_entry *new_entry; new_entry=malloc(sizeof(struct ll_entry)); @@ -1076,7 +1078,7 @@ void ll_add(struct ll_entry **head,int vaddr,void *addr) *head=new_entry; } -void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr) +static void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr) { ll_add(head,vaddr,addr); (*head)->reg_sv_flags=reg_sv_flags; @@ -1084,7 +1086,7 @@ void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr // Check if an address is already compiled // but don't return addresses which are about to expire from the cache -void *check_addr(u_int vaddr) +static void *check_addr(u_int vaddr) { struct ht_entry *ht_bin = hash_table_get(vaddr); size_t i; @@ -1167,7 +1169,7 @@ static void ll_remove_matching_addrs(struct ll_entry **head, } // Remove all entries from linked list -void ll_clear(struct ll_entry **head) +static void ll_clear(struct ll_entry **head) { struct ll_entry *cur; struct ll_entry *next; @@ -1888,7 +1890,7 @@ static void load_alloc(struct regstat *current,int i) } } -void store_alloc(struct regstat *current,int i) +static void store_alloc(struct regstat *current,int i) { clear_const(current,dops[i].rs2); if(!(dops[i].rs2)) current->u&=~1LL; // Allow allocating r0 if necessary @@ -1911,13 +1913,13 @@ void store_alloc(struct regstat *current,int i) minimum_free_regs[i]=1; } -void c1ls_alloc(struct regstat *current,int i) +static void c1ls_alloc(struct regstat *current,int i) { clear_const(current,dops[i].rt1); alloc_reg(current,i,CSREG); // Status } -void c2ls_alloc(struct regstat *current,int i) +static void c2ls_alloc(struct regstat *current,int i) { clear_const(current,dops[i].rt1); if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); @@ -1935,7 +1937,7 @@ void c2ls_alloc(struct regstat *current,int i) } #ifndef multdiv_alloc -void multdiv_alloc(struct regstat *current,int i) +static void multdiv_alloc(struct regstat *current,int i) { // case 0x18: MULT // case 0x19: MULTU @@ -1979,7 +1981,7 @@ void multdiv_alloc(struct regstat *current,int i) } #endif -void cop0_alloc(struct regstat *current,int i) +static void cop0_alloc(struct regstat *current,int i) { if(dops[i].opcode2==0) // MFC0 { @@ -2039,14 +2041,14 @@ static void cop2_alloc(struct regstat *current,int i) minimum_free_regs[i]=1; } -void c2op_alloc(struct regstat *current,int i) +static void c2op_alloc(struct regstat *current,int i) { alloc_cc(current,i); // for stalls dirty_reg(current,CCREG); alloc_reg_temp(current,i,-1); } -void syscall_alloc(struct regstat *current,int i) +static void syscall_alloc(struct regstat *current,int i) { alloc_cc(current,i); dirty_reg(current,CCREG); @@ -2055,7 +2057,7 @@ void syscall_alloc(struct regstat *current,int i) current->isconst=0; } -void delayslot_alloc(struct regstat *current,int i) +static void delayslot_alloc(struct regstat *current,int i) { switch(dops[i].itype) { case UJUMP: @@ -4331,7 +4333,7 @@ static void loop_preload(signed char pre[],signed char entry[]) // Generate address for load/store instruction // goes to AGEN for writes, FTEMP for LOADLR and cop1/2 loads -void address_generation(int i, const struct regstat *i_regs, signed char entry[]) +static void address_generation(int i, const struct regstat *i_regs, signed char entry[]) { if (dops[i].is_load || dops[i].is_store) { int ra=-1; @@ -4652,7 +4654,7 @@ static void load_regs_entry(int t) } // Store dirty registers prior to branch -void store_regs_bt(signed char i_regmap[],uint64_t i_dirty,int addr) +static void store_regs_bt(signed char i_regmap[],uint64_t i_dirty,int addr) { if(internal_branch(addr)) { @@ -6204,515 +6206,6 @@ static void check_regmap(signed char *regmap) #endif } -// Basic liveness analysis for MIPS registers -static void unneeded_registers(int istart,int iend,int r) -{ - int i; - uint64_t u,gte_u,b,gte_b; - uint64_t temp_u,temp_gte_u=0; - uint64_t gte_u_unknown=0; - if (HACK_ENABLED(NDHACK_GTE_UNNEEDED)) - gte_u_unknown=~0ll; - if(iend==slen-1) { - u=1; - gte_u=gte_u_unknown; - }else{ - //u=unneeded_reg[iend+1]; - u=1; - gte_u=gte_unneeded[iend+1]; - } - - for (i=iend;i>=istart;i--) - { - //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r); - if(dops[i].is_jump) - { - // If subroutine call, flag return address as a possible branch target - if(dops[i].rt1==31 && i=(start+slen*4)) - { - // Branch out of this block, flush all regs - u=1; - gte_u=gte_u_unknown; - branch_unneeded_reg[i]=u; - // Merge in delay slot - u|=(1LL<>2].bt=1; - if(ba[i]<=start+i*4) { - // Backward branch - if(dops[i].is_ujump) - { - // Unconditional branch - temp_u=1; - temp_gte_u=0; - } else { - // Conditional branch (not taken case) - temp_u=unneeded_reg[i+2]; - temp_gte_u&=gte_unneeded[i+2]; - } - // Merge in delay slot - temp_u|=(1LL<>2,i-1,r+1); - }else{ - unneeded_reg[(ba[i]-start)>>2]=1; - gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown; - } - } /*else*/ if(1) { - if (dops[i].is_ujump) - { - // Unconditional branch - u=unneeded_reg[(ba[i]-start)>>2]; - gte_u=gte_unneeded[(ba[i]-start)>>2]; - branch_unneeded_reg[i]=u; - // Merge in delay slot - u|=(1LL<>2]; - gte_b=gte_unneeded[(ba[i]-start)>>2]; - branch_unneeded_reg[i]=b; - // Branch delay slot - b|=(1LL<>r)&1) { - if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } - printf("\n"); - */ - } -} - -// Write back dirty registers as soon as we will no longer modify them, -// so that we don't end up with lots of writes at the branches. -static void clean_registers(int istart, int iend, int wr) -{ - int i; - int r; - u_int will_dirty_i,will_dirty_next,temp_will_dirty; - u_int wont_dirty_i,wont_dirty_next,temp_wont_dirty; - if(iend==slen-1) { - will_dirty_i=will_dirty_next=0; - wont_dirty_i=wont_dirty_next=0; - }else{ - will_dirty_i=will_dirty_next=will_dirty[iend+1]; - wont_dirty_i=wont_dirty_next=wont_dirty[iend+1]; - } - for (i=iend;i>=istart;i--) - { - signed char rregmap_i[RRMAP_SIZE]; - u_int hr_candirty = 0; - assert(HOST_REGS < 32); - make_rregs(regs[i].regmap, rregmap_i, &hr_candirty); - __builtin_prefetch(regs[i-1].regmap); - if(dops[i].is_jump) - { - signed char branch_rregmap_i[RRMAP_SIZE]; - u_int branch_hr_candirty = 0; - make_rregs(branch_regs[i].regmap, branch_rregmap_i, &branch_hr_candirty); - if(ba[i]=(start+slen*4)) - { - // Branch out of this block, flush all regs - will_dirty_i = 0; - will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31); - will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31); - will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31); - will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31); - will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31); - will_dirty_i &= branch_hr_candirty; - if (dops[i].is_ujump) - { - // Unconditional branch - wont_dirty_i = 0; - // Merge in delay slot (will dirty) - will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31); - will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31); - will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31); - will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31); - will_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31); - will_dirty_i &= hr_candirty; - } - else - { - // Conditional branch - wont_dirty_i = wont_dirty_next; - // Merge in delay slot (will dirty) - // (the original code had no explanation why these 2 are commented out) - //will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31); - //will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31); - will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31); - will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31); - will_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31); - will_dirty_i &= hr_candirty; - } - // Merge in delay slot (wont dirty) - wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31); - wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31); - wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31); - wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31); - wont_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31); - wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31); - wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31); - wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31); - wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31); - wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31); - wont_dirty_i &= ~(1u << 31); - if(wr) { - #ifndef DESTRUCTIVE_WRITEBACK - branch_regs[i].dirty&=wont_dirty_i; - #endif - branch_regs[i].dirty|=will_dirty_i; - } - } - else - { - // Internal branch - if(ba[i]<=start+i*4) { - // Backward branch - if (dops[i].is_ujump) - { - // Unconditional branch - temp_will_dirty=0; - temp_wont_dirty=0; - // Merge in delay slot (will dirty) - temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31); - temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31); - temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31); - temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31); - temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31); - temp_will_dirty &= branch_hr_candirty; - temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31); - temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31); - temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31); - temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31); - temp_will_dirty |= 1u << (get_rreg(rregmap_i, CCREG) & 31); - temp_will_dirty &= hr_candirty; - } else { - // Conditional branch (not taken case) - temp_will_dirty=will_dirty_next; - temp_wont_dirty=wont_dirty_next; - // Merge in delay slot (will dirty) - temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31); - temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31); - temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31); - temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31); - temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31); - temp_will_dirty &= branch_hr_candirty; - //temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31); - //temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31); - temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31); - temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31); - temp_will_dirty |= 1u << (get_rreg(rregmap_i, CCREG) & 31); - temp_will_dirty &= hr_candirty; - } - // Merge in delay slot (wont dirty) - temp_wont_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31); - temp_wont_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31); - temp_wont_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31); - temp_wont_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31); - temp_wont_dirty |= 1u << (get_rreg(rregmap_i, CCREG) & 31); - temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31); - temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31); - temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31); - temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31); - temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31); - temp_wont_dirty &= ~(1u << 31); - // Deal with changed mappings - if(i0 && regmap_pre[i][r]<34) { - temp_will_dirty|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<>regmap_pre[i][r])&1)<>2,i-1,0); - }else{ - // Limit recursion. It can take an excessive amount - // of time if there are a lot of nested loops. - will_dirty[(ba[i]-start)>>2]=0; - wont_dirty[(ba[i]-start)>>2]=-1; - } - } - /*else*/ if(1) - { - if (dops[i].is_ujump) - { - // Unconditional branch - will_dirty_i=0; - wont_dirty_i=0; - //if(ba[i]>start+i*4) { // Disable recursion (for debugging) - for(r=0;r>2].regmap_entry[r]) { - will_dirty_i|=will_dirty[(ba[i]-start)>>2]&(1<>2]&(1<=0) { - will_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>branch_regs[i].regmap[r])&1)<>2]>>branch_regs[i].regmap[r])&1)<start+i*4) // Disable recursion (for debugging) - for(r=0;r>2].regmap_entry[r]) { - will_dirty_i&=will_dirty[(ba[i]-start)>>2]&(1<>2]&(1<=0) { - will_dirty_i&=((unneeded_reg[(ba[i]-start)>>2]>>target_reg)&1)<>2]>>target_reg)&1)< istart && !dops[i].is_jump) { - // Don't store a register immediately after writing it, - // may prevent dual-issue. - wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i-1].rt1) & 31); - wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i-1].rt2) & 31); - } - // Save it - will_dirty[i]=will_dirty_i; - wont_dirty[i]=wont_dirty_i; - // Mark registers that won't be dirtied as not dirty - if(wr) { - regs[i].dirty|=will_dirty_i; - #ifndef DESTRUCTIVE_WRITEBACK - regs[i].dirty&=wont_dirty_i; - if(dops[i].is_jump) - { - if (i < iend-1 && !dops[i].is_ujump) { - for(r=0;r>r)&1));*/} - } - } - } - } - else - { - if(i>r)&1));*/} - } - } - } - } - #endif - } - // Deal with changed mappings - temp_will_dirty=will_dirty_i; - temp_wont_dirty=wont_dirty_i; - for(r=0;r=0&&(nr=get_rreg(rregmap_i,regmap_pre[i][r]))>=0) { - // Register moved to a different register - will_dirty_i&=~(1<>nr)&1)<>nr)&1)<0 && regmap_pre[i][r]<34) { - will_dirty_i|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<>regmap_pre[i][r])&1)<>r)&1));*/ - } - } - } - } - } -} - #ifdef DISASM #include static char insn[MAXBLOCK][10]; @@ -7145,96 +6638,11 @@ static int apply_hacks(void) return 0; } -int new_recompile_block(u_int addr) +static noinline void pass1_disassemble(u_int pagelimit) { - u_int pagelimit = 0; - u_int state_rflags = 0; - int i; - - assem_debug("NOTCOMPILED: addr = %x -> %p\n", addr, out); - //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr); - //if(debug) - //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29); - - // this is just for speculation - for (i = 1; i < 32; i++) { - if ((psxRegs.GPR.r[i] & 0xffff0000) == 0x1f800000) - state_rflags |= 1 << i; - } - - start = (u_int)addr&~3; - //assert(((u_int)addr&1)==0); // start-in-delay-slot flag - new_dynarec_did_compile=1; - if (Config.HLE && start == 0x80001000) // hlecall - { - // XXX: is this enough? Maybe check hleSoftCall? - void *beginning=start_block(); - u_int page=get_page(start); - - invalid_code[start>>12]=0; - emit_movimm(start,0); - emit_writeword(0,&pcaddr); - emit_far_jump(new_dyna_leave); - literal_pool(0); - end_block(beginning); - ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning); - return 0; - } - else if (f1_hack && hack_addr == 0) { - void *beginning = start_block(); - u_int page = get_page(start); - emit_movimm(start, 0); - emit_writeword(0, &hack_addr); - emit_readword(&psxRegs.GPR.n.sp, 0); - emit_readptr(&mem_rtab, 1); - emit_shrimm(0, 12, 2); - emit_readptr_dualindexedx_ptrlen(1, 2, 1); - emit_addimm(0, 0x18, 0); - emit_adds_ptr(1, 1, 1); - emit_ldr_dualindexed(1, 0, 0); - emit_writeword(0, &psxRegs.GPR.r[26]); // lw k0, 0x18(sp) - emit_far_call(get_addr_ht); - emit_jmpreg(0); // jr k0 - literal_pool(0); - end_block(beginning); - - ll_add_flags(jump_in + page, start, state_rflags, beginning); - SysPrintf("F1 hack to %08x\n", start); - return 0; - } - - cycle_multiplier_active = cycle_multiplier_override && cycle_multiplier == CYCLE_MULT_DEFAULT - ? cycle_multiplier_override : cycle_multiplier; - - source = get_source_start(start, &pagelimit); - if (source == NULL) { - if (addr != hack_addr) { - SysPrintf("Compile at bogus memory address: %08x\n", addr); - hack_addr = addr; - } - //abort(); - return -1; - } - - /* Pass 1: disassemble */ - /* Pass 2: register dependencies, branch targets */ - /* Pass 3: register allocation */ - /* Pass 4: branch dependencies */ - /* Pass 5: pre-alloc */ - /* Pass 6: optimize clean/dirty state */ - /* Pass 7: flag 32-bit registers */ - /* Pass 8: assembly */ - /* Pass 9: linker */ - /* Pass 10: garbage collection / free memory */ - - int j; - int done = 0, ni_count = 0; + int i, j, done = 0, ni_count = 0; unsigned int type,op,op2; - //printf("addr = %x source = %x %x\n", addr,source,source[0]); - - /* Pass 1 disassembly */ - for (i = 0; !done; i++) { memset(&dops[i], 0, sizeof(dops[i])); @@ -7421,7 +6829,7 @@ int new_recompile_block(u_int addr) case 0x3A: set_mnemonic(i, "SWC2"); type=C2LS; break; case 0x3B: set_mnemonic(i, "HLECALL"); type=HLECALL; break; default: set_mnemonic(i, "???"); type=NI; - SysPrintf("NI %08x @%08x (%08x)\n", source[i], addr + i*4, addr); + SysPrintf("NI %08x @%08x (%08x)\n", source[i], start + i*4, start); break; } dops[i].itype=type; @@ -7660,7 +7068,7 @@ int new_recompile_block(u_int addr) // branch in delay slot? if (dops[i].is_jump) { // don't handle first branch and call interpreter if it's hit - SysPrintf("branch in delay slot @%08x (%08x)\n", addr + i*4, addr); + SysPrintf("branch in delay slot @%08x (%08x)\n", start + i*4, start); do_in_intrp=1; } // basic load delay detection @@ -7668,14 +7076,14 @@ int new_recompile_block(u_int addr) int t=(ba[i-1]-start)/4; if(0 <= t && t < i &&(dops[i].rt1==dops[t].rs1||dops[i].rt1==dops[t].rs2)&&dops[t].itype!=CJUMP&&dops[t].itype!=SJUMP) { // jump target wants DS result - potential load delay effect - SysPrintf("load delay @%08x (%08x)\n", addr + i*4, addr); + SysPrintf("load delay @%08x (%08x)\n", start + i*4, start); do_in_intrp=1; dops[t+1].bt=1; // expected return from interpreter } else if(i>=2&&dops[i-2].rt1==2&&dops[i].rt1==2&&dops[i].rs1!=2&&dops[i].rs2!=2&&dops[i-1].rs1!=2&&dops[i-1].rs2!=2&& !(i>=3&&dops[i-3].is_jump)) { // v0 overwrite like this is a sign of trouble, bail out - SysPrintf("v0 overwrite @%08x (%08x)\n", addr + i*4, addr); + SysPrintf("v0 overwrite @%08x (%08x)\n", start + i*4, start); do_in_intrp=1; } } @@ -7731,16 +7139,165 @@ int new_recompile_block(u_int addr) dops[i-1].itype=SPAN; } } - assert(slen>0); - - int clear_hack_addr = apply_hacks(); - - /* Pass 2 - Register dependencies and branch targets */ - - unneeded_registers(0,slen-1,0); - - /* Pass 3 - Register allocation */ + assert(slen>0); +} + +// Basic liveness analysis for MIPS registers +static noinline void pass2_unneeded_regs(int istart,int iend,int r) +{ + int i; + uint64_t u,gte_u,b,gte_b; + uint64_t temp_u,temp_gte_u=0; + uint64_t gte_u_unknown=0; + if (HACK_ENABLED(NDHACK_GTE_UNNEEDED)) + gte_u_unknown=~0ll; + if(iend==slen-1) { + u=1; + gte_u=gte_u_unknown; + }else{ + //u=unneeded_reg[iend+1]; + u=1; + gte_u=gte_unneeded[iend+1]; + } + + for (i=iend;i>=istart;i--) + { + //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r); + if(dops[i].is_jump) + { + // If subroutine call, flag return address as a possible branch target + if(dops[i].rt1==31 && i=(start+slen*4)) + { + // Branch out of this block, flush all regs + u=1; + gte_u=gte_u_unknown; + branch_unneeded_reg[i]=u; + // Merge in delay slot + u|=(1LL<>2].bt=1; + if(ba[i]<=start+i*4) { + // Backward branch + if(dops[i].is_ujump) + { + // Unconditional branch + temp_u=1; + temp_gte_u=0; + } else { + // Conditional branch (not taken case) + temp_u=unneeded_reg[i+2]; + temp_gte_u&=gte_unneeded[i+2]; + } + // Merge in delay slot + temp_u|=(1LL<>2,i-1,r+1); + }else{ + unneeded_reg[(ba[i]-start)>>2]=1; + gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown; + } + } /*else*/ if(1) { + if (dops[i].is_ujump) + { + // Unconditional branch + u=unneeded_reg[(ba[i]-start)>>2]; + gte_u=gte_unneeded[(ba[i]-start)>>2]; + branch_unneeded_reg[i]=u; + // Merge in delay slot + u|=(1LL<>2]; + gte_b=gte_unneeded[(ba[i]-start)>>2]; + branch_unneeded_reg[i]=b; + // Branch delay slot + b|=(1LL<>r)&1) { + if(r==HIREG) printf(" HI"); + else if(r==LOREG) printf(" LO"); + else printf(" r%d",r); + } + } + printf("\n"); + */ + } +} +static noinline void pass3_register_alloc(u_int addr) +{ struct regstat current; // Current register allocations/status clear_all_regs(current.regmap_entry); clear_all_regs(current.regmap); @@ -7754,9 +7311,10 @@ int new_recompile_block(u_int addr) current.waswritten = 0; int ds=0; int cc=0; - int hr=-1; + int hr; + int i, j; - if((u_int)addr&1) { + if (addr & 1) { // First instruction is delay slot cc=-1; dops[1].bt=1; @@ -7769,7 +7327,6 @@ int new_recompile_block(u_int addr) { if(dops[i].bt) { - int hr; for(hr=0;hr=0;i--) { @@ -8610,13 +8169,14 @@ int new_recompile_block(u_int addr) } // if needed } // for hr } +} - /* Pass 5 - Pre-allocate registers */ - - // If a register is allocated during a loop, try to allocate it for the - // entire loop, if possible. This avoids loading/storing registers - // inside of the loop. - +// If a register is allocated during a loop, try to allocate it for the +// entire loop, if possible. This avoids loading/storing registers +// inside of the loop. +static noinline void pass5a_preallocate1(void) +{ + int i, j, hr; signed char f_regmap[HOST_REGS]; clear_all_regs(f_regmap); for(i=0;i %x\n",start+k*4,start+j*4); + while(ki&&f_regmap[HOST_CCREG]==CCREG) + { + //printf("Extend backwards\n"); + int k; + k=i; + while(regs[k-1].regmap[HOST_CCREG]==-1) { + if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) { + //printf("no free regs for store %x\n",start+(k-1)*4); + break; + } + k--; + } + if(regs[k-1].regmap[HOST_CCREG]==CCREG) { + //printf("Extend CC, %x ->\n",start+k*4); + while(k<=i) { + regs[k].regmap_entry[HOST_CCREG]=CCREG; + regs[k].regmap[HOST_CCREG]=CCREG; + regmap_pre[k+1][HOST_CCREG]=CCREG; + regs[k+1].wasdirty|=1<\n",start+k*4); + } + } + } + if(dops[i].itype!=STORE&&dops[i].itype!=STORELR&&dops[i].itype!=C1LS&&dops[i].itype!=SHIFT&& + dops[i].itype!=NOP&&dops[i].itype!=MOV&&dops[i].itype!=ALU&&dops[i].itype!=SHIFTIMM&& + dops[i].itype!=IMM16&&dops[i].itype!=LOAD&&dops[i].itype!=COP1) + { + memcpy(f_regmap,regs[i].regmap,sizeof(f_regmap)); + } + } + } +} + +// This allocates registers (if possible) one instruction prior +// to use, which can avoid a load-use penalty on certain CPUs. +static noinline void pass5b_preallocate2(void) +{ + int i, hr; + for(i=0;i=0) + { + if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) + { + regs[i].regmap[hr]=regs[i+1].regmap[hr]; + regmap_pre[i+1][hr]=regs[i+1].regmap[hr]; + regs[i+1].regmap_entry[hr]=regs[i+1].regmap[hr]; + regs[i].isconst&=~(1<=0) + { + if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) + { + regs[i].regmap[hr]=regs[i+1].regmap[hr]; + regmap_pre[i+1][hr]=regs[i+1].regmap[hr]; + regs[i+1].regmap_entry[hr]=regs[i+1].regmap[hr]; + regs[i].isconst&=~(1<=0) + { + if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) + { + regs[i].regmap[hr]=dops[i+1].rs1; + regmap_pre[i+1][hr]=dops[i+1].rs1; + regs[i+1].regmap_entry[hr]=dops[i+1].rs1; + regs[i].isconst&=~(1<=0) + { + if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) + { + regs[i].regmap[hr]=dops[i+1].rs1; + regmap_pre[i+1][hr]=dops[i+1].rs1; + regs[i+1].regmap_entry[hr]=dops[i+1].rs1; + regs[i].isconst&=~(1<=0); + if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) + { + regs[i].regmap[hr]=dops[i+1].rs1; + regmap_pre[i+1][hr]=dops[i+1].rs1; + regs[i+1].regmap_entry[hr]=dops[i+1].rs1; + regs[i].isconst&=~(1<=0); + if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) + { + regs[i].regmap[hr]=dops[i+1].rs1; + regmap_pre[i+1][hr]=dops[i+1].rs1; + regs[i+1].regmap_entry[hr]=dops[i+1].rs1; + regs[i].isconst&=~(1<=0) + { + // move it to another register + regs[i+1].regmap[hr]=-1; + regmap_pre[i+2][hr]=-1; + regs[i+1].regmap[nr]=FTEMP; + regmap_pre[i+2][nr]=FTEMP; + regs[i].regmap[nr]=dops[i+1].rs1; + regmap_pre[i+1][nr]=dops[i+1].rs1; + regs[i+1].regmap_entry[nr]=dops[i+1].rs1; + regs[i].isconst&=~(1<=0&®s[i].regmap[hr]<0) { + int rs=get_reg(regs[i+1].regmap,dops[i+1].rs1); + if(rs>=0&&((regs[i+1].wasconst>>rs)&1)) { + regs[i].regmap[hr]=AGEN1+((i+1)&1); + regmap_pre[i+1][hr]=AGEN1+((i+1)&1); + regs[i+1].regmap_entry[hr]=AGEN1+((i+1)&1); + regs[i].isconst&=~(1<=istart;i--) + { + signed char rregmap_i[RRMAP_SIZE]; + u_int hr_candirty = 0; + assert(HOST_REGS < 32); + make_rregs(regs[i].regmap, rregmap_i, &hr_candirty); + __builtin_prefetch(regs[i-1].regmap); + if(dops[i].is_jump) + { + signed char branch_rregmap_i[RRMAP_SIZE]; + u_int branch_hr_candirty = 0; + make_rregs(branch_regs[i].regmap, branch_rregmap_i, &branch_hr_candirty); + if(ba[i]=(start+slen*4)) + { + // Branch out of this block, flush all regs + will_dirty_i = 0; + will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31); + will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31); + will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31); + will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31); + will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31); + will_dirty_i &= branch_hr_candirty; + if (dops[i].is_ujump) + { + // Unconditional branch + wont_dirty_i = 0; + // Merge in delay slot (will dirty) + will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31); + will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31); + will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31); + will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31); + will_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31); + will_dirty_i &= hr_candirty; } - if(regs[j].regmap[HOST_CCREG]==CCREG) { - int k=i; - //printf("Extend CC, %x -> %x\n",start+k*4,start+j*4); - while(k0 && regmap_pre[i][r]<34) { + temp_will_dirty|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<>regmap_pre[i][r])&1)<>2,i-1,0); + }else{ + // Limit recursion. It can take an excessive amount + // of time if there are a lot of nested loops. + will_dirty[(ba[i]-start)>>2]=0; + wont_dirty[(ba[i]-start)>>2]=-1; } - regs[j].regmap_entry[HOST_CCREG]=CCREG; } - // Work backwards from the branch target - if(j>i&&f_regmap[HOST_CCREG]==CCREG) + /*else*/ if(1) { - //printf("Extend backwards\n"); - int k; - k=i; - while(regs[k-1].regmap[HOST_CCREG]==-1) { - if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) { - //printf("no free regs for store %x\n",start+(k-1)*4); - break; + if (dops[i].is_ujump) + { + // Unconditional branch + will_dirty_i=0; + wont_dirty_i=0; + //if(ba[i]>start+i*4) { // Disable recursion (for debugging) + for(r=0;r>2].regmap_entry[r]) { + will_dirty_i|=will_dirty[(ba[i]-start)>>2]&(1<>2]&(1<=0) { + will_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>branch_regs[i].regmap[r])&1)<>2]>>branch_regs[i].regmap[r])&1)<\n",start+k*4); - while(k<=i) { - regs[k].regmap_entry[HOST_CCREG]=CCREG; - regs[k].regmap[HOST_CCREG]=CCREG; - regmap_pre[k+1][HOST_CCREG]=CCREG; - regs[k+1].wasdirty|=1<start+i*4) // Disable recursion (for debugging) + for(r=0;r>2].regmap_entry[r]) { + will_dirty_i&=will_dirty[(ba[i]-start)>>2]&(1<>2]&(1<=0) { + will_dirty_i&=((unneeded_reg[(ba[i]-start)>>2]>>target_reg)&1)<>2]>>target_reg)&1)<\n",start+k*4); + // Merge in delay slot (won't dirty) + wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31); + wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31); + wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31); + wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31); + wont_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31); + wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31); + wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31); + wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31); + wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31); + wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31); + wont_dirty_i &= ~(1u << 31); + if(wr) { + #ifndef DESTRUCTIVE_WRITEBACK + branch_regs[i].dirty&=wont_dirty_i; + #endif + branch_regs[i].dirty|=will_dirty_i; } } } - if(dops[i].itype!=STORE&&dops[i].itype!=STORELR&&dops[i].itype!=C1LS&&dops[i].itype!=SHIFT&& - dops[i].itype!=NOP&&dops[i].itype!=MOV&&dops[i].itype!=ALU&&dops[i].itype!=SHIFTIMM&& - dops[i].itype!=IMM16&&dops[i].itype!=LOAD&&dops[i].itype!=COP1) - { - memcpy(f_regmap,regs[i].regmap,sizeof(f_regmap)); - } } - } - - // This allocates registers (if possible) one instruction prior - // to use, which can avoid a load-use penalty on certain CPUs. - for(i=0;i istart && !dops[i].is_jump) { + // Don't store a register immediately after writing it, + // may prevent dual-issue. + wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i-1].rt1) & 31); + wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i-1].rt2) & 31); + } + // Save it + will_dirty[i]=will_dirty_i; + wont_dirty[i]=wont_dirty_i; + // Mark registers that won't be dirtied as not dirty + if(wr) { + regs[i].dirty|=will_dirty_i; + #ifndef DESTRUCTIVE_WRITEBACK + regs[i].dirty&=wont_dirty_i; + if(dops[i].is_jump) { - if(dops[i+1].rs1) { - if((hr=get_reg(regs[i+1].regmap,dops[i+1].rs1))>=0) - { - if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) - { - regs[i].regmap[hr]=regs[i+1].regmap[hr]; - regmap_pre[i+1][hr]=regs[i+1].regmap[hr]; - regs[i+1].regmap_entry[hr]=regs[i+1].regmap[hr]; - regs[i].isconst&=~(1<=0) - { - if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) - { - regs[i].regmap[hr]=regs[i+1].regmap[hr]; - regmap_pre[i+1][hr]=regs[i+1].regmap[hr]; - regs[i+1].regmap_entry[hr]=regs[i+1].regmap[hr]; - regs[i].isconst&=~(1<=0) - { - if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) - { - regs[i].regmap[hr]=dops[i+1].rs1; - regmap_pre[i+1][hr]=dops[i+1].rs1; - regs[i+1].regmap_entry[hr]=dops[i+1].rs1; - regs[i].isconst&=~(1<=0) - { - if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) - { - regs[i].regmap[hr]=dops[i+1].rs1; - regmap_pre[i+1][hr]=dops[i+1].rs1; - regs[i+1].regmap_entry[hr]=dops[i+1].rs1; - regs[i].isconst&=~(1<>r)&1));*/} } } } - // Address for store instruction (non-constant) - if(dops[i+1].itype==STORE||dops[i+1].itype==STORELR - ||(dops[i+1].opcode&0x3b)==0x39||(dops[i+1].opcode&0x3b)==0x3a) { // SB/SH/SW/SD/SWC1/SDC1/SWC2/SDC2 - if(get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { - hr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1); - if(hr<0) hr=get_reg_temp(regs[i+1].regmap); - else { - regs[i+1].regmap[hr]=AGEN1+((i+1)&1); - regs[i+1].isconst&=~(1<=0); - if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) - { - regs[i].regmap[hr]=dops[i+1].rs1; - regmap_pre[i+1][hr]=dops[i+1].rs1; - regs[i+1].regmap_entry[hr]=dops[i+1].rs1; - regs[i].isconst&=~(1<>r)&1));*/} } } } - if(dops[i+1].itype==LOADLR||(dops[i+1].opcode&0x3b)==0x31||(dops[i+1].opcode&0x3b)==0x32) { // LWC1/LDC1, LWC2/LDC2 - if(get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { - int nr; - hr=get_reg(regs[i+1].regmap,FTEMP); - assert(hr>=0); - if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) - { - regs[i].regmap[hr]=dops[i+1].rs1; - regmap_pre[i+1][hr]=dops[i+1].rs1; - regs[i+1].regmap_entry[hr]=dops[i+1].rs1; - regs[i].isconst&=~(1<=0) - { - // move it to another register - regs[i+1].regmap[hr]=-1; - regmap_pre[i+2][hr]=-1; - regs[i+1].regmap[nr]=FTEMP; - regmap_pre[i+2][nr]=FTEMP; - regs[i].regmap[nr]=dops[i+1].rs1; - regmap_pre[i+1][nr]=dops[i+1].rs1; - regs[i+1].regmap_entry[nr]=dops[i+1].rs1; - regs[i].isconst&=~(1<=0&®s[i].regmap[hr]<0) { - int rs=get_reg(regs[i+1].regmap,dops[i+1].rs1); - if(rs>=0&&((regs[i+1].wasconst>>rs)&1)) { - regs[i].regmap[hr]=AGEN1+((i+1)&1); - regmap_pre[i+1][hr]=AGEN1+((i+1)&1); - regs[i+1].regmap_entry[hr]=AGEN1+((i+1)&1); - regs[i].isconst&=~(1<=0&&(nr=get_rreg(rregmap_i,regmap_pre[i][r]))>=0) { + // Register moved to a different register + will_dirty_i&=~(1<>nr)&1)<>nr)&1)<0 && regmap_pre[i][r]<34) { + will_dirty_i|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<>regmap_pre[i][r])&1)<>r)&1));*/ } } } } } +} + +static noinline void pass10_expire_blocks(void) +{ + int i, end; + end = (((out-ndrc->translation_cache)>>(TARGET_SIZE_2-16)) + 16384) & 65535; + while (expirep != end) + { + int shift=TARGET_SIZE_2-3; // Divide into 8 blocks + uintptr_t base_offs = ((uintptr_t)(expirep >> 13) << shift); // Base offset of this block + uintptr_t base_offs_s = base_offs >> shift; + inv_debug("EXP: Phase %d\n",expirep); + switch((expirep>>11)&3) + { + case 0: + // Clear jump_in and jump_dirty + ll_remove_matching_addrs(jump_in+(expirep&2047),base_offs_s,shift); + ll_remove_matching_addrs(jump_dirty+(expirep&2047),base_offs_s,shift); + ll_remove_matching_addrs(jump_in+2048+(expirep&2047),base_offs_s,shift); + ll_remove_matching_addrs(jump_dirty+2048+(expirep&2047),base_offs_s,shift); + break; + case 1: + // Clear pointers + ll_kill_pointers(jump_out[expirep&2047],base_offs_s,shift); + ll_kill_pointers(jump_out[(expirep&2047)+2048],base_offs_s,shift); + break; + case 2: + // Clear hash table + for(i=0;i<32;i++) { + struct ht_entry *ht_bin = &hash_table[((expirep&2047)<<5)+i]; + uintptr_t o1 = (u_char *)ht_bin->tcaddr[1] - ndrc->translation_cache; + uintptr_t o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; + if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) { + inv_debug("EXP: Remove hash %x -> %p\n",ht_bin->vaddr[1],ht_bin->tcaddr[1]); + ht_bin->vaddr[1] = -1; + ht_bin->tcaddr[1] = NULL; + } + o1 = (u_char *)ht_bin->tcaddr[0] - ndrc->translation_cache; + o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; + if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) { + inv_debug("EXP: Remove hash %x -> %p\n",ht_bin->vaddr[0],ht_bin->tcaddr[0]); + ht_bin->vaddr[0] = ht_bin->vaddr[1]; + ht_bin->tcaddr[0] = ht_bin->tcaddr[1]; + ht_bin->vaddr[1] = -1; + ht_bin->tcaddr[1] = NULL; + } + } + break; + case 3: + // Clear jump_out + if((expirep&2047)==0) + do_clear_cache(); + ll_remove_matching_addrs(jump_out+(expirep&2047),base_offs_s,shift); + ll_remove_matching_addrs(jump_out+2048+(expirep&2047),base_offs_s,shift); + break; + } + expirep=(expirep+1)&65535; + } +} + +int new_recompile_block(u_int addr) +{ + u_int pagelimit = 0; + u_int state_rflags = 0; + int i; + + assem_debug("NOTCOMPILED: addr = %x -> %p\n", addr, out); + + // this is just for speculation + for (i = 1; i < 32; i++) { + if ((psxRegs.GPR.r[i] & 0xffff0000) == 0x1f800000) + state_rflags |= 1 << i; + } + + start = (u_int)addr&~3; + //assert(((u_int)addr&1)==0); // start-in-delay-slot flag + new_dynarec_did_compile=1; + if (Config.HLE && start == 0x80001000) // hlecall + { + // XXX: is this enough? Maybe check hleSoftCall? + void *beginning=start_block(); + u_int page=get_page(start); + + invalid_code[start>>12]=0; + emit_movimm(start,0); + emit_writeword(0,&pcaddr); + emit_far_jump(new_dyna_leave); + literal_pool(0); + end_block(beginning); + ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning); + return 0; + } + else if (f1_hack && hack_addr == 0) { + void *beginning = start_block(); + u_int page = get_page(start); + emit_movimm(start, 0); + emit_writeword(0, &hack_addr); + emit_readword(&psxRegs.GPR.n.sp, 0); + emit_readptr(&mem_rtab, 1); + emit_shrimm(0, 12, 2); + emit_readptr_dualindexedx_ptrlen(1, 2, 1); + emit_addimm(0, 0x18, 0); + emit_adds_ptr(1, 1, 1); + emit_ldr_dualindexed(1, 0, 0); + emit_writeword(0, &psxRegs.GPR.r[26]); // lw k0, 0x18(sp) + emit_far_call(get_addr_ht); + emit_jmpreg(0); // jr k0 + literal_pool(0); + end_block(beginning); + + ll_add_flags(jump_in + page, start, state_rflags, beginning); + SysPrintf("F1 hack to %08x\n", start); + return 0; + } + + cycle_multiplier_active = cycle_multiplier_override && cycle_multiplier == CYCLE_MULT_DEFAULT + ? cycle_multiplier_override : cycle_multiplier; + + source = get_source_start(start, &pagelimit); + if (source == NULL) { + if (addr != hack_addr) { + SysPrintf("Compile at bogus memory address: %08x\n", addr); + hack_addr = addr; + } + //abort(); + return -1; + } + + /* Pass 1: disassemble */ + /* Pass 2: register dependencies, branch targets */ + /* Pass 3: register allocation */ + /* Pass 4: branch dependencies */ + /* Pass 5: pre-alloc */ + /* Pass 6: optimize clean/dirty state */ + /* Pass 7: flag 32-bit registers */ + /* Pass 8: assembly */ + /* Pass 9: linker */ + /* Pass 10: garbage collection / free memory */ + + /* Pass 1 disassembly */ + + pass1_disassemble(pagelimit); + + int clear_hack_addr = apply_hacks(); + + /* Pass 2 - Register dependencies and branch targets */ + + pass2_unneeded_regs(0,slen-1,0); + + /* Pass 3 - Register allocation */ + + pass3_register_alloc(addr); + + /* Pass 4 - Cull unused host registers */ + + pass4_cull_unused_regs(); + + /* Pass 5 - Pre-allocate registers */ + + pass5a_preallocate1(); + pass5b_preallocate2(); /* Pass 6 - Optimize clean/dirty state */ - clean_registers(0,slen-1,1); + pass6_clean_registers(0, slen-1, 1); /* Pass 7 - Identify 32-bit registers */ for (i=slen-1;i>=0;i--) @@ -9087,145 +9168,12 @@ int new_recompile_block(u_int addr) dops[slen-1].bt=1; // Mark as a branch target so instruction can restart after exception } -#ifdef REG_ALLOC_PRINT - /* Debug/disassembly */ - for(i=0;i>r)&1) { - if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } - printf("\n"); - #if defined(__i386__) || defined(__x86_64__) - printf("pre: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regmap_pre[i][0],regmap_pre[i][1],regmap_pre[i][2],regmap_pre[i][3],regmap_pre[i][5],regmap_pre[i][6],regmap_pre[i][7]); - #endif - #ifdef __arm__ - printf("pre: r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d\n",regmap_pre[i][0],regmap_pre[i][1],regmap_pre[i][2],regmap_pre[i][3],regmap_pre[i][4],regmap_pre[i][5],regmap_pre[i][6],regmap_pre[i][7],regmap_pre[i][8],regmap_pre[i][9],regmap_pre[i][10],regmap_pre[i][12]); - #endif - #if defined(__i386__) || defined(__x86_64__) - printf("needs: "); - if(needed_reg[i]&1) printf("eax "); - if((needed_reg[i]>>1)&1) printf("ecx "); - if((needed_reg[i]>>2)&1) printf("edx "); - if((needed_reg[i]>>3)&1) printf("ebx "); - if((needed_reg[i]>>5)&1) printf("ebp "); - if((needed_reg[i]>>6)&1) printf("esi "); - if((needed_reg[i]>>7)&1) printf("edi "); - printf("\n"); - printf("entry: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7]); - printf("dirty: "); - if(regs[i].wasdirty&1) printf("eax "); - if((regs[i].wasdirty>>1)&1) printf("ecx "); - if((regs[i].wasdirty>>2)&1) printf("edx "); - if((regs[i].wasdirty>>3)&1) printf("ebx "); - if((regs[i].wasdirty>>5)&1) printf("ebp "); - if((regs[i].wasdirty>>6)&1) printf("esi "); - if((regs[i].wasdirty>>7)&1) printf("edi "); - #endif - #ifdef __arm__ - printf("entry: r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[4],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7],regs[i].regmap_entry[8],regs[i].regmap_entry[9],regs[i].regmap_entry[10],regs[i].regmap_entry[12]); - printf("dirty: "); - if(regs[i].wasdirty&1) printf("r0 "); - if((regs[i].wasdirty>>1)&1) printf("r1 "); - if((regs[i].wasdirty>>2)&1) printf("r2 "); - if((regs[i].wasdirty>>3)&1) printf("r3 "); - if((regs[i].wasdirty>>4)&1) printf("r4 "); - if((regs[i].wasdirty>>5)&1) printf("r5 "); - if((regs[i].wasdirty>>6)&1) printf("r6 "); - if((regs[i].wasdirty>>7)&1) printf("r7 "); - if((regs[i].wasdirty>>8)&1) printf("r8 "); - if((regs[i].wasdirty>>9)&1) printf("r9 "); - if((regs[i].wasdirty>>10)&1) printf("r10 "); - if((regs[i].wasdirty>>12)&1) printf("r12 "); - #endif - printf("\n"); - disassemble_inst(i); - //printf ("ccadj[%d] = %d\n",i,ccadj[i]); - #if defined(__i386__) || defined(__x86_64__) - printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d dirty: ",regs[i].regmap[0],regs[i].regmap[1],regs[i].regmap[2],regs[i].regmap[3],regs[i].regmap[5],regs[i].regmap[6],regs[i].regmap[7]); - if(regs[i].dirty&1) printf("eax "); - if((regs[i].dirty>>1)&1) printf("ecx "); - if((regs[i].dirty>>2)&1) printf("edx "); - if((regs[i].dirty>>3)&1) printf("ebx "); - if((regs[i].dirty>>5)&1) printf("ebp "); - if((regs[i].dirty>>6)&1) printf("esi "); - if((regs[i].dirty>>7)&1) printf("edi "); - #endif - #ifdef __arm__ - printf("r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d dirty: ",regs[i].regmap[0],regs[i].regmap[1],regs[i].regmap[2],regs[i].regmap[3],regs[i].regmap[4],regs[i].regmap[5],regs[i].regmap[6],regs[i].regmap[7],regs[i].regmap[8],regs[i].regmap[9],regs[i].regmap[10],regs[i].regmap[12]); - if(regs[i].dirty&1) printf("r0 "); - if((regs[i].dirty>>1)&1) printf("r1 "); - if((regs[i].dirty>>2)&1) printf("r2 "); - if((regs[i].dirty>>3)&1) printf("r3 "); - if((regs[i].dirty>>4)&1) printf("r4 "); - if((regs[i].dirty>>5)&1) printf("r5 "); - if((regs[i].dirty>>6)&1) printf("r6 "); - if((regs[i].dirty>>7)&1) printf("r7 "); - if((regs[i].dirty>>8)&1) printf("r8 "); - if((regs[i].dirty>>9)&1) printf("r9 "); - if((regs[i].dirty>>10)&1) printf("r10 "); - if((regs[i].dirty>>12)&1) printf("r12 "); - #endif - printf("\n"); - if(regs[i].isconst) { - printf("constants: "); - #if defined(__i386__) || defined(__x86_64__) - if(regs[i].isconst&1) printf("eax=%x ",(u_int)constmap[i][0]); - if((regs[i].isconst>>1)&1) printf("ecx=%x ",(u_int)constmap[i][1]); - if((regs[i].isconst>>2)&1) printf("edx=%x ",(u_int)constmap[i][2]); - if((regs[i].isconst>>3)&1) printf("ebx=%x ",(u_int)constmap[i][3]); - if((regs[i].isconst>>5)&1) printf("ebp=%x ",(u_int)constmap[i][5]); - if((regs[i].isconst>>6)&1) printf("esi=%x ",(u_int)constmap[i][6]); - if((regs[i].isconst>>7)&1) printf("edi=%x ",(u_int)constmap[i][7]); - #endif - #if defined(__arm__) || defined(__aarch64__) - int r; - for (r = 0; r < ARRAY_SIZE(constmap[i]); r++) - if ((regs[i].isconst >> r) & 1) - printf(" r%d=%x", r, (u_int)constmap[i][r]); - #endif - printf("\n"); - } - if(dops[i].is_jump) { - #if defined(__i386__) || defined(__x86_64__) - printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d dirty: ",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); - if(branch_regs[i].dirty&1) printf("eax "); - if((branch_regs[i].dirty>>1)&1) printf("ecx "); - if((branch_regs[i].dirty>>2)&1) printf("edx "); - if((branch_regs[i].dirty>>3)&1) printf("ebx "); - if((branch_regs[i].dirty>>5)&1) printf("ebp "); - if((branch_regs[i].dirty>>6)&1) printf("esi "); - if((branch_regs[i].dirty>>7)&1) printf("edi "); - #endif - #ifdef __arm__ - printf("branch(%d): r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d dirty: ",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[4],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7],branch_regs[i].regmap[8],branch_regs[i].regmap[9],branch_regs[i].regmap[10],branch_regs[i].regmap[12]); - if(branch_regs[i].dirty&1) printf("r0 "); - if((branch_regs[i].dirty>>1)&1) printf("r1 "); - if((branch_regs[i].dirty>>2)&1) printf("r2 "); - if((branch_regs[i].dirty>>3)&1) printf("r3 "); - if((branch_regs[i].dirty>>4)&1) printf("r4 "); - if((branch_regs[i].dirty>>5)&1) printf("r5 "); - if((branch_regs[i].dirty>>6)&1) printf("r6 "); - if((branch_regs[i].dirty>>7)&1) printf("r7 "); - if((branch_regs[i].dirty>>8)&1) printf("r8 "); - if((branch_regs[i].dirty>>9)&1) printf("r9 "); - if((branch_regs[i].dirty>>10)&1) printf("r10 "); - if((branch_regs[i].dirty>>12)&1) printf("r12 "); - #endif - } - } -#endif // REG_ALLOC_PRINT - /* Pass 8 - Assembly */ linkcount=0;stubcount=0; - ds=0;is_delayslot=0; + is_delayslot=0; u_int dirty_pre=0; void *beginning=start_block(); + int ds = 0; if((u_int)addr&1) { ds=1; pagespan_ds(); @@ -9508,59 +9456,8 @@ int new_recompile_block(u_int addr) /* Pass 10 - Free memory by expiring oldest blocks */ - int end=(((out-ndrc->translation_cache)>>(TARGET_SIZE_2-16))+16384)&65535; - while(expirep!=end) - { - int shift=TARGET_SIZE_2-3; // Divide into 8 blocks - uintptr_t base_offs = ((uintptr_t)(expirep >> 13) << shift); // Base offset of this block - uintptr_t base_offs_s = base_offs >> shift; - inv_debug("EXP: Phase %d\n",expirep); - switch((expirep>>11)&3) - { - case 0: - // Clear jump_in and jump_dirty - ll_remove_matching_addrs(jump_in+(expirep&2047),base_offs_s,shift); - ll_remove_matching_addrs(jump_dirty+(expirep&2047),base_offs_s,shift); - ll_remove_matching_addrs(jump_in+2048+(expirep&2047),base_offs_s,shift); - ll_remove_matching_addrs(jump_dirty+2048+(expirep&2047),base_offs_s,shift); - break; - case 1: - // Clear pointers - ll_kill_pointers(jump_out[expirep&2047],base_offs_s,shift); - ll_kill_pointers(jump_out[(expirep&2047)+2048],base_offs_s,shift); - break; - case 2: - // Clear hash table - for(i=0;i<32;i++) { - struct ht_entry *ht_bin = &hash_table[((expirep&2047)<<5)+i]; - uintptr_t o1 = (u_char *)ht_bin->tcaddr[1] - ndrc->translation_cache; - uintptr_t o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; - if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) { - inv_debug("EXP: Remove hash %x -> %p\n",ht_bin->vaddr[1],ht_bin->tcaddr[1]); - ht_bin->vaddr[1] = -1; - ht_bin->tcaddr[1] = NULL; - } - o1 = (u_char *)ht_bin->tcaddr[0] - ndrc->translation_cache; - o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; - if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) { - inv_debug("EXP: Remove hash %x -> %p\n",ht_bin->vaddr[0],ht_bin->tcaddr[0]); - ht_bin->vaddr[0] = ht_bin->vaddr[1]; - ht_bin->tcaddr[0] = ht_bin->tcaddr[1]; - ht_bin->vaddr[1] = -1; - ht_bin->tcaddr[1] = NULL; - } - } - break; - case 3: - // Clear jump_out - if((expirep&2047)==0) - do_clear_cache(); - ll_remove_matching_addrs(jump_out+(expirep&2047),base_offs_s,shift); - ll_remove_matching_addrs(jump_out+2048+(expirep&2047),base_offs_s,shift); - break; - } - expirep=(expirep+1)&65535; - } + pass10_expire_blocks(); + #ifdef ASSEM_PRINT fflush(stdout); #endif From 53358c1d5b032cc7186b71e3cc14f0ad6c2d5468 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 7 Feb 2022 01:41:12 +0200 Subject: [PATCH 127/597] drc: try to make some passes not as slow, part 2 --- libpcsxcore/new_dynarec/assem_arm.c | 14 +- libpcsxcore/new_dynarec/assem_arm64.c | 1 - libpcsxcore/new_dynarec/new_dynarec.c | 208 +++++++++++--------------- 3 files changed, 94 insertions(+), 129 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index da32f5b78..b9dd3cf96 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -522,12 +522,8 @@ static void emit_pcreladdr(u_int rt) static void emit_loadreg(int r, int hr) { - if(r&64) { - SysPrintf("64bit load in 32bit mode!\n"); - assert(0); - return; - } - if((r&63)==0) + assert(hr != EXCLUDE_REG); + if (r == 0) emit_zeroreg(hr); else { void *addr; @@ -552,11 +548,7 @@ static void emit_loadreg(int r, int hr) static void emit_storereg(int r, int hr) { - if(r&64) { - SysPrintf("64bit store in 32bit mode!\n"); - assert(0); - return; - } + assert(hr != EXCLUDE_REG); int addr = (int)&psxRegs.GPR.r[r]; switch (r) { //case HIREG: addr = &hi; break; diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 1157aafe4..ee7b4f7c9 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -449,7 +449,6 @@ static void emit_readshword(void *addr, u_int rt) static void emit_loadreg(u_int r, u_int hr) { int is64 = 0; - assert(r < 64); if (r == 0) emit_zeroreg(hr); else { diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 9913b1160..955642308 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -220,9 +220,6 @@ static struct decoded_insn static struct regstat regs[MAXBLOCK]; static struct regstat branch_regs[MAXBLOCK]; static signed char minimum_free_regs[MAXBLOCK]; - static u_int needed_reg[MAXBLOCK]; - static u_int wont_dirty[MAXBLOCK]; - static u_int will_dirty[MAXBLOCK]; static int ccadj[MAXBLOCK]; static int slen; static void *instr_addr[MAXBLOCK]; @@ -608,6 +605,8 @@ static void clear_all_regs(signed char regmap[]) memset(regmap, -1, sizeof(regmap[0]) * HOST_REGS); } +// get_reg: get allocated host reg from mips reg +// returns -1 if no such mips reg was allocated #if defined(__arm__) && defined(HAVE_ARMV6) && HOST_REGS == 13 && EXCLUDE_REG == 11 extern signed char get_reg(const signed char regmap[], signed char r); @@ -628,6 +627,12 @@ static signed char get_reg(const signed char regmap[], signed char r) #endif +// get reg as mask bit (1 << hr) +static u_int get_regm(const signed char regmap[], signed char r) +{ + return (1u << (get_reg(regmap, r) & 31)) & ~(1u << 31); +} + static signed char get_reg_temp(const signed char regmap[]) { int hr; @@ -1442,15 +1447,14 @@ static void alloc_reg(struct regstat *cur,int i,signed char reg) if (reg == CCREG) preferred_reg = HOST_CCREG; if (reg == PTEMP || reg == FTEMP) preferred_reg = 12; assert(PREFERRED_REG_FIRST != EXCLUDE_REG && EXCLUDE_REG != HOST_REGS); + assert(reg >= 0); // Don't allocate unused registers if((cur->u>>reg)&1) return; // see if it's already allocated - for(hr=0;hrregmap[hr]==reg) return; - } + if (get_reg(cur->regmap, reg) >= 0) + return; // Keep the same mapping if the register was already allocated in a loop preferred_reg = loop_reg(i,reg,preferred_reg); @@ -2193,23 +2197,13 @@ static void wb_register(signed char r, const signed char regmap[], uint64_t dirt static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t u) { //if(dirty_pre==dirty) return; - int hr,reg; - for(hr=0;hr>reg)&1) { - if(reg>0) { - if(((dirty_pre&~dirty)>>hr)&1) { - if(reg>0&®<34) { - emit_storereg(reg,hr); - } - else if(reg>=64) { - assert(0); - } - } - } - } - } + int hr, r; + for (hr = 0; hr < HOST_REGS; hr++) { + r = pre[hr]; + if (r < 1 || r > 33 || ((u >> r) & 1)) + continue; + if (((dirty_pre & ~dirty) >> hr) & 1) + emit_storereg(r, hr); } } @@ -4279,26 +4273,18 @@ static void wb_invalidate(signed char pre[],signed char entry[],uint64_t dirty,u // Load the specified registers // This only loads the registers given as arguments because // we don't want to load things that will be overwritten -static void load_regs(signed char entry[],signed char regmap[],int rs1,int rs2) +static inline void load_reg(signed char entry[], signed char regmap[], int rs) { - int hr; - // Load 32-bit regs - for(hr=0;hr=0) { - if(entry[hr]!=regmap[hr]) { - if(regmap[hr]==rs1||regmap[hr]==rs2) - { - if(regmap[hr]==0) { - emit_zeroreg(hr); - } - else - { - emit_loadreg(regmap[hr],hr); - } - } - } - } - } + int hr = get_reg(regmap, rs); + if (hr >= 0 && entry[hr] != regmap[hr]) + emit_loadreg(regmap[hr], hr); +} + +static void load_regs(signed char entry[], signed char regmap[], int rs1, int rs2) +{ + load_reg(entry, regmap, rs1); + if (rs1 != rs2) + load_reg(entry, regmap, rs2); } // Load registers prior to the start of a loop @@ -4306,27 +4292,12 @@ static void load_regs(signed char entry[],signed char regmap[],int rs1,int rs2) static void loop_preload(signed char pre[],signed char entry[]) { int hr; - for(hr=0;hr=0) { - if(get_reg(pre,entry[hr])<0) { - assem_debug("loop preload:\n"); - //printf("loop preload: %d\n",hr); - if(entry[hr]==0) { - emit_zeroreg(hr); - } - else if(entry[hr]= 0 && pre[hr] != r && get_reg(pre, r) < 0) { + assem_debug("loop preload:\n"); + if (r < TEMPREG) + emit_loadreg(r, hr); } } } @@ -4846,9 +4817,9 @@ static void ds_assemble_entry(int i) load_regs(regs[t].regmap_entry,regs[t].regmap,dops[t].rs1,dops[t].rs2); address_generation(t,®s[t],regs[t].regmap_entry); if (ram_offset && (dops[t].is_load || dops[t].is_store)) - load_regs(regs[t].regmap_entry,regs[t].regmap,ROREG,ROREG); + load_reg(regs[t].regmap_entry,regs[t].regmap,ROREG); if (dops[t].is_store) - load_regs(regs[t].regmap_entry,regs[t].regmap,INVCP,INVCP); + load_reg(regs[t].regmap_entry,regs[t].regmap,INVCP); is_delayslot=0; switch (dops[t].itype) { case SYSCALL: @@ -5223,7 +5194,7 @@ static void ujump_assemble(int i, const struct regstat *i_regs) uint64_t bc_unneeded=branch_regs[i].u; bc_unneeded|=1|(1LL<=0;i--) { int hr; + __builtin_prefetch(regs[i-2].regmap); if(dops[i].is_jump) { if(ba[i]=(start+slen*4)) @@ -7988,7 +7961,7 @@ static noinline void pass4_cull_unused_regs(void) if (!dops[i].is_ujump) { if(i=0&&get_reg(regs[i+2].regmap_entry,regmap_pre[i+2][hr])<0) nr&=~(1< 0 && !dops[i].bt && regs[i].wasdirty) for(hr=0;hr0&&!dops[i].bt&&((regs[i].wasdirty>>hr)&1)) { + if((regs[i].wasdirty>>hr)&1) { if((regmap_pre[i][hr]>0&&!((unneeded_reg[i]>>regmap_pre[i][hr])&1))) { if(dops[i-1].rt1==regmap_pre[i][hr]) nr|=1< Date: Sat, 12 Feb 2022 22:58:23 +0200 Subject: [PATCH 128/597] drc: add a clock override for Super Robot Taisen Alpha libretro/pcsx_rearmed#610 --- libpcsxcore/database.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 61312e092..52d17a7e6 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -17,6 +17,21 @@ static const char MemorycardHack_db[8][10] = {"SCUS94409"} }; +static const struct +{ + const char * const id; + int mult; +} +new_dynarec_clock_overrides[] = +{ + /* Internal Section - fussy about timings */ + { "SLPS01868", 202 }, + /* Super Robot Taisen Alpha - on the edge with 175, + * changing memcard settings is enough to break/unbreak it */ + { "SLPS02528", 190 }, + { "SLPS02636", 190 }, +}; + /* Function for automatic patching according to GameID. */ void Apply_Hacks_Cdrom() { @@ -38,10 +53,15 @@ void Apply_Hacks_Cdrom() new_dynarec_hacks_pergame = 0; cycle_multiplier_override = 0; - /* Internal Section is fussy about timings */ - if (strcmp(CdromId, "SLPS01868") == 0) + for (i = 0; i < ARRAY_SIZE(new_dynarec_clock_overrides); i++) { - cycle_multiplier_override = 202; - new_dynarec_hacks_pergame |= NDHACK_OVERRIDE_CYCLE_M; + if (strcmp(CdromId, new_dynarec_clock_overrides[i].id) == 0) + { + cycle_multiplier_override = new_dynarec_clock_overrides[i].mult; + new_dynarec_hacks_pergame |= NDHACK_OVERRIDE_CYCLE_M; + SysPrintf("using new_dynarec clock override: %d\n", + cycle_multiplier_override); + break; + } } } From 705d9544dd4bd26b720c7ff9ff9dae935a2af796 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 15 Feb 2022 23:17:42 +0200 Subject: [PATCH 129/597] psxinterpreter: unbreak load delay handling Fixes: 943a507a4156b8f5b00e4431152e41eeb4dc6f3d --- libpcsxcore/psxinterpreter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 19a5fc4ec..e7e32690b 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -311,7 +311,7 @@ int psxTestLoadDelay(int reg, u32 tmp) { } void psxDelayTest(int reg, u32 bpc) { - u32 tmp = fetch(psxRegs.pc); + u32 tmp = fetch(bpc); branch = 1; switch (psxTestLoadDelay(reg, tmp)) { From 0787af868fcaf537eb53483c50973983894de7c3 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 16 Feb 2022 01:20:31 +0200 Subject: [PATCH 130/597] drc: try not to end the block prematurely Fixes: 4919de1e88095f00466f6674323d518fc520b0db (was not really broken, only load delay detection can't work with blocks that are too small) --- libpcsxcore/new_dynarec/new_dynarec.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 955642308..6ade3bb0d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -7070,8 +7070,24 @@ static noinline void pass1_disassemble(u_int pagelimit) /* Is this the end of the block? */ if (i > 0 && dops[i-1].is_ujump) { - if(dops[i-1].rt1==0) { // Continue past subroutine call (JAL) - done=2; + if (dops[i-1].rt1 == 0) { // not jal + int found_bbranch = 0, t = (ba[i-1] - start) / 4; + if ((u_int)(t - i) < 64 && start + (t+64)*4 < pagelimit) { + // scan for a branch back to i+1 + for (j = t; j < t + 64; j++) { + int tmpop = source[j] >> 26; + if (tmpop == 1 || ((tmpop & ~3) == 4)) { + int t2 = j + 1 + (int)(signed short)source[j]; + if (t2 == i + 1) { + //printf("blk expand %08x<-%08x\n", start + (i+1)*4, start + j*4); + found_bbranch = 1; + break; + } + } + } + } + if (!found_bbranch) + done = 2; } else { if(stop_after_jal) done=1; From dda25fa49e3a12a31470834461964b4486c3b302 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 17 Feb 2022 02:14:03 +0200 Subject: [PATCH 131/597] Revert "clear Index0 data FIFO flag (#241)" This reverts commit 50ae51487697da0d2f9c93295f89d2f10694b6d8. It broke Driver 2. --- libpcsxcore/cdrom.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index ea973081d..191a7373d 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1281,8 +1281,8 @@ unsigned char cdrRead0(void) { if (cdr.OCUP) cdr.Ctrl |= 0x40; - else - cdr.Ctrl &= ~0x40; +// else +// cdr.Ctrl &= ~0x40; // What means the 0x10 and the 0x08 bits? I only saw it used by the bios cdr.Ctrl |= 0x18; @@ -1378,7 +1378,6 @@ unsigned char cdrRead2(void) { unsigned char ret; if (cdr.Readed == 0) { - cdr.OCUP = 0; ret = 0; } else { ret = *pTransfer++; From 398d69247112982aa5ddb91bf0fa2a435c6e008c Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 19 Feb 2022 21:37:23 +0200 Subject: [PATCH 132/597] drc: don't delay block restoration Not sure why it was done the way it was (maybe something N64 related?), but it occasionally caused dyna_linker to repeatedly walk the jump_in list. What's worse, if the dirty block was deemed to expire soon, it would never be restored and dyna_linker repeated jump_in walking would never end, causing severe slowdown. --- libpcsxcore/new_dynarec/emu_if.c | 6 - libpcsxcore/new_dynarec/linkage_arm.S | 121 +++++--------- libpcsxcore/new_dynarec/linkage_arm64.S | 20 --- libpcsxcore/new_dynarec/linkage_offsets.h | 3 +- libpcsxcore/new_dynarec/new_dynarec.c | 183 +++++++++------------- 5 files changed, 114 insertions(+), 219 deletions(-) diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index f9ee64169..e9008ae8f 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -296,7 +296,6 @@ static int ari64_init() static u32 scratch_buf[8*8*2] __attribute__((aligned(64))); extern void (*psxCP2[64])(); extern void psxNULL(); - extern unsigned char *out; size_t i; new_dynarec_init(); @@ -326,10 +325,6 @@ static int ari64_init() zeromem_ptr = zero_mem; scratch_buf_ptr = scratch_buf; - SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n"); - SysPrintf("%p/%p/%p/%p/%p\n", - psxM, psxH, psxR, mem_rtab, out); - return 0; } @@ -448,7 +443,6 @@ int new_dynarec_hacks; void *psxH_ptr; void *zeromem_ptr; u8 zero_mem[0x1000]; -unsigned char *out; void *mem_rtab; void *scratch_buf_ptr; void new_dynarec_init() {} diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 1a16aa04d..978280a84 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -28,9 +28,9 @@ #define dynarec_local ESYM(dynarec_local) #define add_jump_out ESYM(add_jump_out) #define new_recompile_block ESYM(new_recompile_block) +#define ndrc_try_restore_block ESYM(ndrc_try_restore_block) #define get_addr ESYM(get_addr) #define get_addr_ht ESYM(get_addr_ht) -#define clean_blocks ESYM(clean_blocks) #define gen_interupt ESYM(gen_interupt) #define invalidate_addr ESYM(invalidate_addr) #define gteCheckStallRaw ESYM(gteCheckStallRaw) @@ -88,7 +88,6 @@ DRC_VAR(invc_ptr, 4) DRC_VAR(scratch_buf_ptr, 4) DRC_VAR(ram_offset, 4) DRC_VAR(mini_ht, 256) -DRC_VAR(restore_candidate, 512) #ifdef TEXRELS_FORBIDDEN @@ -96,8 +95,6 @@ DRC_VAR(restore_candidate, 512) .align 2 ptr_jump_in: .word ESYM(jump_in) -ptr_jump_dirty: - .word ESYM(jump_dirty) ptr_hash_table: .word ESYM(hash_table) #endif @@ -159,44 +156,44 @@ ptr_hash_table: #endif .endm -/* r0 = virtual target address */ -/* r1 = instruction to patch */ +/* r4 = virtual target address */ +/* r5 = instruction to patch */ .macro dyna_linker_main #ifndef NO_WRITE_EXEC load_varadr_ext r3, jump_in /* get_page */ - lsr r2, r0, #12 + lsr r2, r4, #12 mov r6, #4096 bic r2, r2, #0xe0000 sub r6, r6, #1 cmp r2, #0x1000 - ldr r7, [r1] + ldr r7, [r5] biclt r2, #0x0e00 and r6, r6, r2 cmp r2, #2048 add r12, r7, #2 orrcs r2, r6, #2048 - ldr r5, [r3, r2, lsl #2] + ldr r1, [r3, r2, lsl #2] lsl r12, r12, #8 - add r6, r1, r12, asr #6 /* old target */ + add r6, r5, r12, asr #6 /* old target */ mov r8, #0 /* jump_in lookup */ 1: - movs r4, r5 + movs r0, r1 beq 2f - ldr r3, [r5] /* ll_entry .vaddr */ - ldrd r4, r5, [r4, #8] /* ll_entry .next, .addr */ - teq r3, r0 + ldr r3, [r1] /* ll_entry .vaddr */ + ldrd r0, r1, [r0, #8] /* ll_entry .addr, .next */ + teq r3, r4 bne 1b - teq r4, r6 - moveq pc, r4 /* Stale i-cache */ - mov r8, r4 + teq r0, r6 + moveq pc, r0 /* Stale i-cache */ + mov r8, r0 b 1b /* jump_in may have dupes, continue search */ 2: tst r8, r8 - beq 3f /* r0 not in jump_in */ + beq 3f /* r4 not in jump_in */ - mov r5, r1 + mov r0, r4 mov r1, r6 bl add_jump_out sub r2, r8, r5 @@ -207,43 +204,13 @@ ptr_hash_table: str r1, [r5] mov pc, r8 3: - /* hash_table lookup */ - cmp r2, #2048 - load_varadr_ext r3, jump_dirty - eor r4, r0, r0, lsl #16 - lslcc r2, r0, #9 - load_varadr_ext r6, hash_table - lsr r4, r4, #12 - lsrcc r2, r2, #21 - bic r4, r4, #15 - ldr r5, [r3, r2, lsl #2] - ldr r7, [r6, r4]! - teq r7, r0 - ldreq pc, [r6, #8] - ldr r7, [r6, #4] - teq r7, r0 - ldreq pc, [r6, #12] - /* jump_dirty lookup */ -6: - movs r4, r5 - beq 8f - ldr r3, [r5] - ldr r5, [r4, #12] - teq r3, r0 - bne 6b -7: - ldr r1, [r4, #8] - /* hash_table insert */ - ldr r2, [r6] - ldr r3, [r6, #8] - str r0, [r6] - str r1, [r6, #8] - str r2, [r6, #4] - str r3, [r6, #12] - mov pc, r1 -8: + mov r0, r4 + bl ndrc_try_restore_block + tst r0, r0 + movne pc, r0 #else /* XXX: should be able to do better than this... */ + mov r0, r4 bl get_addr_ht mov pc, r0 #endif @@ -253,16 +220,18 @@ ptr_hash_table: FUNCTION(dyna_linker): /* r0 = virtual target address */ /* r1 = instruction to patch */ - dyna_linker_main - mov r4, r0 mov r5, r1 +10: + dyna_linker_main + + mov r0, r4 bl new_recompile_block tst r0, r0 - mov r0, r4 - mov r1, r5 - beq dyna_linker + beq 10b + /* pagefault */ + mov r0, r4 mov r1, r0 mov r2, #(4<<2) /* Address error (fetch) */ .size dyna_linker, .-dyna_linker @@ -288,18 +257,19 @@ FUNCTION(exec_pagefault): FUNCTION(dyna_linker_ds): /* r0 = virtual target address */ /* r1 = instruction to patch */ - dyna_linker_main - mov r4, r0 - bic r0, r0, #7 mov r5, r1 +10: + dyna_linker_main + + bic r0, r4, #7 orr r0, r0, #1 bl new_recompile_block tst r0, r0 - mov r0, r4 - mov r1, r5 - beq dyna_linker_ds + beq 10b + /* pagefault */ + mov r0, r4 bic r1, r0, #7 mov r2, #0x80000008 /* High bit set indicates pagefault in delay slot */ sub r0, r1, #4 @@ -426,18 +396,12 @@ FUNCTION(verify_code): FUNCTION(cc_interrupt): ldr r0, [fp, #LO_last_count] mov r1, #0 - mov r2, #0x1fc add r10, r0, r10 str r1, [fp, #LO_pending_exception] - and r2, r2, r10, lsr #17 - add r3, fp, #LO_restore_candidate str r10, [fp, #LO_cycle] /* PCSX cycles */ @@ str r10, [fp, #LO_reg_cop0+36] /* Count - not on PSX */ - ldr r4, [r2, r3] mov r10, lr - tst r4, r4 - bne .E4 -.E1: + bl gen_interupt mov lr, r10 ldr r10, [fp, #LO_cycle] @@ -450,22 +414,9 @@ FUNCTION(cc_interrupt): ldmfdne sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} tst r1, r1 moveq pc, lr -.E2: ldr r0, [fp, #LO_pcaddr] bl get_addr_ht mov pc, r0 -.E4: - /* Move 'dirty' blocks to the 'clean' list */ - lsl r5, r2, #3 - str r1, [r2, r3] -.E5: - lsrs r4, r4, #1 - mov r0, r5 - add r5, r5, #1 - blcs clean_blocks - tst r5, #31 - bne .E5 - b .E1 .size cc_interrupt, .-cc_interrupt .align 2 diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 39e95a834..5c4d12746 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -79,7 +79,6 @@ DRC_VAR(zeromem_ptr, 8) DRC_VAR(scratch_buf_ptr, 8) DRC_VAR(ram_offset, 8) DRC_VAR(mini_ht, 256) -DRC_VAR(restore_candidate, 512) .text @@ -118,16 +117,11 @@ FUNCTION(dyna_linker_ds): .align 2 FUNCTION(cc_interrupt): ldr w0, [rFP, #LO_last_count] - mov w2, #0x1fc add rCC, w0, rCC str wzr, [rFP, #LO_pending_exception] - and w2, w2, rCC, lsr #17 - add x3, rFP, #LO_restore_candidate str rCC, [rFP, #LO_cycle] /* PCSX cycles */ # str rCC, [rFP, #LO_reg_cop0+36] /* Count */ - ldr w19, [x3, w2, uxtw] mov x21, lr - cbnz w19, 4f 1: bl gen_interupt mov lr, x21 @@ -144,20 +138,6 @@ FUNCTION(cc_interrupt): ldr w0, [rFP, #LO_pcaddr] bl get_addr_ht br x0 -4: - /* Move 'dirty' blocks to the 'clean' list */ - lsl w20, w2, #3 - str wzr, [x3, w2, uxtw] -5: - mov w0, w20 - add w20, w20, #1 - tbz w19, #0, 6f - bl clean_blocks -6: - lsr w19, w19, #1 - tst w20, #31 - bne 5b - b 1b .size cc_interrupt, .-cc_interrupt .align 2 diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h index 7ac2e6119..0c189d78f 100644 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ b/libpcsxcore/new_dynarec/linkage_offsets.h @@ -39,7 +39,6 @@ #define LO_saved_lr (LO_scratch_buf_ptr + PTRSZ) #define LO_ram_offset (LO_saved_lr + PTRSZ) #define LO_mini_ht (LO_ram_offset + PTRSZ) -#define LO_restore_candidate (LO_mini_ht + PTRSZ*32*2) -#define LO_dynarec_local_size (LO_restore_candidate + 512) +#define LO_dynarec_local_size (LO_mini_ht + PTRSZ*32*2) #define LO_cop2_to_scratch_buf (LO_scratch_buf_ptr - LO_reg_cop2d) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 6ade3bb0d..93319ec02 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -191,11 +191,11 @@ static struct decoded_insn } dops[MAXBLOCK]; // used by asm: - u_char *out; struct ht_entry hash_table[65536] __attribute__((aligned(16))); struct ll_entry *jump_in[4096] __attribute__((aligned(16))); - struct ll_entry *jump_dirty[4096]; + static u_char *out; + static struct ll_entry *jump_dirty[4096]; static struct ll_entry *jump_out[4096]; static u_int start; static u_int *source; @@ -250,7 +250,6 @@ static struct decoded_insn extern int branch_target; extern uintptr_t ram_offset; extern uintptr_t mini_ht[32][2]; - extern u_char restore_candidate[512]; /* registers that may be allocated */ /* 1-31 gpr */ @@ -336,10 +335,13 @@ void jump_break (u_int u0, u_int u1, u_int pc); void jump_break_ds(u_int u0, u_int u1, u_int pc); void jump_to_new_pc(); void call_gteStall(); -void clean_blocks(u_int page); void add_jump_out(u_int vaddr, void *src); void new_dyna_leave(); +static void *get_clean_addr(void *addr); +static void get_bounds(void *addr, u_char **start, u_char **end); +static void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr); + // Needed by assembler static void wb_register(signed char r, const signed char regmap[], uint64_t dirty); static void wb_dirtys(const signed char i_regmap[], uint64_t i_dirty); @@ -531,6 +533,21 @@ static void hash_table_add(struct ht_entry *ht_bin, u_int vaddr, void *tcaddr) ht_bin->tcaddr[0] = tcaddr; } +static void mark_valid_code(u_int vaddr, u_int len) +{ + u_int i, j; + vaddr &= 0x1fffffff; + for (i = vaddr & ~0xfff; i < vaddr + len; i += 0x1000) { + // ram mirrors, but should not hurt bios + for (j = 0; j < 0x800000; j += 0x200000) { + invalid_code[(i|j) >> 12] = + invalid_code[(i|j|0x80000000u) >> 12] = + invalid_code[(i|j|0xa0000000u) >> 12] = 0; + } + } + inv_code_start = inv_code_end = ~0; +} + // some messy ari64's code, seems to rely on unsigned 32bit overflow static int doesnt_expire_soon(void *tcaddr) { @@ -538,51 +555,69 @@ static int doesnt_expire_soon(void *tcaddr) return diff > (u_int)(0x60000000 + (MAX_OUTPUT_BLOCK_SIZE << (32-TARGET_SIZE_2))); } +void *ndrc_try_restore_block(u_int vaddr) +{ + u_int page = get_page(vaddr); + struct ll_entry *head; + + for (head = jump_dirty[page]; head != NULL; head = head->next) + { + if (head->vaddr != vaddr) + continue; + // don't restore blocks which are about to expire from the cache + if (!doesnt_expire_soon(head->addr)) + continue; + if (!verify_dirty(head->addr)) + continue; + + // restore + u_char *start, *end; + get_bounds(head->addr, &start, &end); + mark_valid_code(vaddr, end - start); + + void *clean_addr = get_clean_addr(head->addr); + ll_add_flags(jump_in + page, vaddr, head->reg_sv_flags, clean_addr); + + struct ht_entry *ht_bin = hash_table_get(vaddr); + int in_ht = 0; + if (ht_bin->vaddr[0] == vaddr) { + ht_bin->tcaddr[0] = clean_addr; // Replace existing entry + in_ht = 1; + } + if (ht_bin->vaddr[1] == vaddr) { + ht_bin->tcaddr[1] = clean_addr; // Replace existing entry + in_ht = 1; + } + if (!in_ht) + hash_table_add(ht_bin, vaddr, clean_addr); + inv_debug("INV: Restored %08x (%p/%p)\n", head->vaddr, head->addr, clean_addr); + return clean_addr; + } + return NULL; +} + // Get address from virtual address // This is called from the recompiled JR/JALR instructions void noinline *get_addr(u_int vaddr) { - u_int page=get_page(vaddr); - u_int vpage=get_vpage(vaddr); + u_int page = get_page(vaddr); struct ll_entry *head; - //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page); - head=jump_in[page]; - while(head!=NULL) { - if(head->vaddr==vaddr) { - //printf("TRACE: count=%d next=%d (get_addr match %x: %p)\n",Count,next_interupt,vaddr,head->addr); + void *code; + + for (head = jump_in[page]; head != NULL; head = head->next) { + if (head->vaddr == vaddr) { hash_table_add(hash_table_get(vaddr), vaddr, head->addr); return head->addr; } - head=head->next; } - head=jump_dirty[vpage]; - while(head!=NULL) { - if(head->vaddr==vaddr) { - //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %p)\n",Count,next_interupt,vaddr,head->addr); - // Don't restore blocks which are about to expire from the cache - if (doesnt_expire_soon(head->addr)) - if (verify_dirty(head->addr)) { - //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); - invalid_code[vaddr>>12]=0; - inv_code_start=inv_code_end=~0; - if(vpage<2048) { - restore_candidate[vpage>>3]|=1<<(vpage&7); - } - else restore_candidate[page>>3]|=1<<(page&7); - struct ht_entry *ht_bin = hash_table_get(vaddr); - if (ht_bin->vaddr[0] == vaddr) - ht_bin->tcaddr[0] = head->addr; // Replace existing entry - else - hash_table_add(ht_bin, vaddr, head->addr); + code = ndrc_try_restore_block(vaddr); + if (code) + return code; + + int r = new_recompile_block(vaddr); + if (r == 0) + return get_addr(vaddr); - return head->addr; - } - } - head=head->next; - } - //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr); - int r=new_recompile_block(vaddr); - if(r==0) return get_addr(vaddr); // generate an address error Status|=2; Cause=(vaddr<<31)|(4<<2); @@ -991,7 +1026,6 @@ static const struct { FUNCNAME(jump_syscall), FUNCNAME(jump_syscall_ds), FUNCNAME(call_gteStall), - FUNCNAME(clean_blocks), FUNCNAME(new_dyna_leave), FUNCNAME(pcsx_mtc0), FUNCNAME(pcsx_mtc0_ds), @@ -1352,11 +1386,6 @@ void invalidate_all_pages(void) u_int page; for(page=0;page<4096;page++) invalidate_page(page); - for(page=0;page<1048576;page++) - if(!invalid_code[page]) { - restore_candidate[(page&2047)>>3]|=1<<(page&7); - restore_candidate[((page&2047)>>3)+256]|=1<<(page&7); - } #ifdef USE_MINI_HT memset(mini_ht,-1,sizeof(mini_ht)); #endif @@ -1386,55 +1415,6 @@ void add_jump_out(u_int vaddr,void *src) //inv_debug("add_jump_out: to %p\n",get_pointer(src)); } -// If a code block was found to be unmodified (bit was set in -// restore_candidate) and it remains unmodified (bit is clear -// in invalid_code) then move the entries for that 4K page from -// the dirty list to the clean list. -void clean_blocks(u_int page) -{ - struct ll_entry *head; - inv_debug("INV: clean_blocks page=%d\n",page); - head=jump_dirty[page]; - while(head!=NULL) { - if(!invalid_code[head->vaddr>>12]) { - // Don't restore blocks which are about to expire from the cache - if (doesnt_expire_soon(head->addr)) { - if(verify_dirty(head->addr)) { - u_char *start, *end; - //printf("Possibly Restore %x (%p)\n",head->vaddr, head->addr); - u_int i; - u_int inv=0; - get_bounds(head->addr, &start, &end); - if (start - rdram < RAM_SIZE) { - for (i = (start-rdram+0x80000000)>>12; i <= (end-1-rdram+0x80000000)>>12; i++) { - inv|=invalid_code[i]; - } - } - else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) { - inv=1; - } - if(!inv) { - void *clean_addr = get_clean_addr(head->addr); - if (doesnt_expire_soon(clean_addr)) { - u_int ppage=page; - inv_debug("INV: Restored %x (%p/%p)\n",head->vaddr, head->addr, clean_addr); - //printf("page=%x, addr=%x\n",page,head->vaddr); - //assert(head->vaddr>>12==(page|0x80000)); - ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr); - struct ht_entry *ht_bin = hash_table_get(head->vaddr); - if (ht_bin->vaddr[0] == head->vaddr) - ht_bin->tcaddr[0] = clean_addr; // Replace existing entry - if (ht_bin->vaddr[1] == head->vaddr) - ht_bin->tcaddr[1] = clean_addr; // Replace existing entry - } - } - } - } - } - head=head->next; - } -} - /* Register allocation */ // Note: registers are allocated clean (unmodified state) @@ -6347,7 +6327,6 @@ void new_dynarec_clear_full(void) memset(invalid_code,1,sizeof(invalid_code)); memset(hash_table,0xff,sizeof(hash_table)); memset(mini_ht,-1,sizeof(mini_ht)); - memset(restore_candidate,0,sizeof(restore_candidate)); memset(shadow,0,sizeof(shadow)); copy=shadow; expirep=16384; // Expiry pointer, +2 blocks @@ -6421,6 +6400,8 @@ void new_dynarec_init(void) ram_offset=(uintptr_t)rdram-0x80000000; if (ram_offset!=0) SysPrintf("warning: RAM is not directly mapped, performance will suffer\n"); + SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n"); + SysPrintf("%p/%p/%p/%p/%p\n", psxM, psxH, psxR, mem_rtab, out); } void new_dynarec_cleanup(void) @@ -9432,17 +9413,7 @@ int new_recompile_block(u_int addr) out = ndrc->translation_cache; // Trap writes to any of the pages we compiled - for(i=start>>12;i<=(start+slen*4)>>12;i++) { - invalid_code[i]=0; - } - inv_code_start=inv_code_end=~0; - - // for PCSX we need to mark all mirrors too - if(get_page(start)<(RAM_SIZE>>12)) - for(i=start>>12;i<=(start+slen*4)>>12;i++) - invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]= - invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]= - invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0; + mark_valid_code(start, slen*4); /* Pass 10 - Free memory by expiring oldest blocks */ From ece032e6deb31bbbbe037c7d1dd630994d46b954 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 20 Feb 2022 00:11:52 +0200 Subject: [PATCH 133/597] drc: restore all block entry points at once and add some stats --- frontend/plugin_lib.c | 1 + libpcsxcore/new_dynarec/new_dynarec.c | 86 ++++++++++++++++++++++----- libpcsxcore/new_dynarec/new_dynarec.h | 1 + 3 files changed, 73 insertions(+), 15 deletions(-) diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index c6a2bf0e1..d215636f4 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -672,6 +672,7 @@ void pl_frame_limit(void) hud_msg[0] = 0; } tv_old = now; + //new_dynarec_print_stats(); } #ifdef PCNT static int ya_vsync_count; diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 93319ec02..962b338a8 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -50,6 +50,7 @@ //#define DISASM //#define ASSEM_PRINT +//#define STAT_PRINT #ifdef ASSEM_PRINT #define assem_debug printf @@ -235,6 +236,19 @@ static struct decoded_insn static int expirep; static u_int stop_after_jal; static u_int f1_hack; +#ifdef STAT_PRINT + static int stat_bc_direct; + static int stat_bc_pre; + static int stat_bc_restore; + static int stat_jump_in_lookups; + static int stat_restore_tries; + static int stat_restore_compares; + static int stat_inv_addr_calls; + static int stat_inv_hits; + #define stat_inc(s) s++ +#else + #define stat_inc(s) +#endif int new_dynarec_hacks; int new_dynarec_hacks_pergame; @@ -321,7 +335,6 @@ int new_recompile_block(u_int addr); void *get_addr_ht(u_int vaddr); void invalidate_block(u_int block); void invalidate_addr(u_int addr); -void remove_hash(int vaddr); void dyna_linker(); void dyna_linker_ds(); void verify_code(); @@ -557,9 +570,13 @@ static int doesnt_expire_soon(void *tcaddr) void *ndrc_try_restore_block(u_int vaddr) { - u_int page = get_page(vaddr); - struct ll_entry *head; + u_char *source_start = NULL, *source_end = NULL; + void *found_stub = NULL, *found_clean = NULL; + u_int len, page = get_page(vaddr); + const struct ll_entry *head; + int ep_count = 0; + stat_inc(stat_restore_tries); for (head = jump_dirty[page]; head != NULL; head = head->next) { if (head->vaddr != vaddr) @@ -567,33 +584,53 @@ void *ndrc_try_restore_block(u_int vaddr) // don't restore blocks which are about to expire from the cache if (!doesnt_expire_soon(head->addr)) continue; + stat_inc(stat_restore_compares); if (!verify_dirty(head->addr)) continue; - // restore - u_char *start, *end; + found_stub = head->addr; + break; + } + if (!found_stub) + return NULL; + + found_clean = get_clean_addr(found_stub); + get_bounds(found_stub, &source_start, &source_end); + assert(source_start < source_end); + len = source_end - source_start; + mark_valid_code(vaddr, len); + + // restore all entry points + for (head = jump_dirty[page]; head != NULL; head = head->next) + { + if (head->vaddr < vaddr || head->vaddr >= vaddr + len) + continue; + + u_char *start = NULL, *end = NULL; get_bounds(head->addr, &start, &end); - mark_valid_code(vaddr, end - start); + if (start != source_start || end != source_end) + continue; void *clean_addr = get_clean_addr(head->addr); - ll_add_flags(jump_in + page, vaddr, head->reg_sv_flags, clean_addr); + ll_add_flags(jump_in + page, head->vaddr, head->reg_sv_flags, clean_addr); - struct ht_entry *ht_bin = hash_table_get(vaddr); int in_ht = 0; - if (ht_bin->vaddr[0] == vaddr) { + struct ht_entry *ht_bin = hash_table_get(head->vaddr); + if (ht_bin->vaddr[0] == head->vaddr) { ht_bin->tcaddr[0] = clean_addr; // Replace existing entry in_ht = 1; } - if (ht_bin->vaddr[1] == vaddr) { + if (ht_bin->vaddr[1] == head->vaddr) { ht_bin->tcaddr[1] = clean_addr; // Replace existing entry in_ht = 1; } if (!in_ht) - hash_table_add(ht_bin, vaddr, clean_addr); - inv_debug("INV: Restored %08x (%p/%p)\n", head->vaddr, head->addr, clean_addr); - return clean_addr; + hash_table_add(ht_bin, head->vaddr, clean_addr); + ep_count++; } - return NULL; + inv_debug("INV: Restored %08x %p (%d)\n", vaddr, found_stub, ep_count); + stat_inc(stat_bc_restore); + return found_clean; } // Get address from virtual address @@ -604,6 +641,7 @@ void noinline *get_addr(u_int vaddr) struct ll_entry *head; void *code; + stat_inc(stat_jump_in_lookups); for (head = jump_in[page]; head != NULL; head = head->next) { if (head->vaddr == vaddr) { hash_table_add(hash_table_get(vaddr), vaddr, head->addr); @@ -1169,7 +1207,7 @@ static void *check_addr(u_int vaddr) return 0; } -void remove_hash(int vaddr) +static void remove_hash(int vaddr) { //printf("remove hash: %x\n",vaddr); struct ht_entry *ht_bin = hash_table_get(vaddr); @@ -1248,6 +1286,7 @@ static void invalidate_page(u_int page) struct ll_entry *head; struct ll_entry *next; head=jump_in[page]; + if (head) stat_inc(stat_inv_hits); jump_in[page]=0; while(head!=NULL) { inv_debug("INVALIDATE: %x\n",head->vaddr); @@ -1327,6 +1366,7 @@ void invalidate_addr(u_int addr) //static int rhits; // this check is done by the caller //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; } + stat_inc(stat_inv_addr_calls); u_int page=get_vpage(addr); if(page<2048) { // RAM struct ll_entry *head; @@ -6423,6 +6463,7 @@ void new_dynarec_cleanup(void) #ifdef ROM_COPY if (munmap (ROM_COPY, 67108864) < 0) {SysPrintf("munmap() failed\n");} #endif + new_dynarec_print_stats(); } static u_int *get_source_start(u_int addr, u_int *limit) @@ -6553,6 +6594,20 @@ void new_dynarec_load_blocks(const void *save, int size) memcpy(&psxRegs.GPR, regs_save, sizeof(regs_save)); } +void new_dynarec_print_stats(void) +{ +#ifdef STAT_PRINT + printf("cc %3d,%3d,%3d lu%3d,%3d c%3d inv%3d,%3d tc_offs %zu\n", + stat_bc_pre, stat_bc_direct, stat_bc_restore, + stat_jump_in_lookups, stat_restore_tries, stat_restore_compares, + stat_inv_addr_calls, stat_inv_hits, + out - ndrc->translation_cache); + stat_bc_direct = stat_bc_pre = stat_bc_restore = + stat_jump_in_lookups = stat_restore_tries = stat_restore_compares = + stat_inv_addr_calls = stat_inv_hits = 0; +#endif +} + static int apply_hacks(void) { int i; @@ -9422,6 +9477,7 @@ int new_recompile_block(u_int addr) #ifdef ASSEM_PRINT fflush(stdout); #endif + stat_inc(stat_bc_direct); return 0; } diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index 8991faca2..c152c45d0 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -26,6 +26,7 @@ void new_dynarec_clear_full(void); void new_dyna_start(void *context); int new_dynarec_save_blocks(void *save, int size); void new_dynarec_load_blocks(const void *save, int size); +void new_dynarec_print_stats(void); void invalidate_all_pages(void); void invalidate_block(unsigned int block); From 4bdc30ab36281e9f9934efb026e264def03cef46 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 20 Feb 2022 00:46:51 +0200 Subject: [PATCH 134/597] drc: get rid of SPAN Should have done it 10 years ago, it's a huge maintenance burden for something than almost never happens. --- libpcsxcore/new_dynarec/assem_arm.c | 7 - libpcsxcore/new_dynarec/assem_arm64.c | 10 - libpcsxcore/new_dynarec/linkage_arm.S | 34 +-- libpcsxcore/new_dynarec/linkage_arm64.S | 27 +- libpcsxcore/new_dynarec/new_dynarec.c | 356 +----------------------- 5 files changed, 14 insertions(+), 420 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index b9dd3cf96..eb695c58c 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -265,7 +265,6 @@ static int isclean(void *addr) if((*ptr&0xFF000000)!=0xeb000000) ptr++; if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0; - if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0; return 1; } @@ -2018,12 +2017,6 @@ static void *do_dirty_stub(int i, u_int source_len) return entry; } -static void do_dirty_stub_ds(u_int source_len) -{ - do_dirty_stub_emit_args(start + 1, source_len); - emit_far_call(verify_code_ds); -} - /* Special assem */ static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist) diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index ee7b4f7c9..13d2f9347 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1732,16 +1732,6 @@ static void *do_dirty_stub(int i, u_int source_len) return entry; } -static void do_dirty_stub_ds(u_int source_len) -{ - u_int *loadlps = (void *)out; - do_dirty_stub_base(start + 1, source_len); - void *lit_jumpover = out; - emit_jmp(out + 8*2); - do_dirty_stub_emit_literals(loadlps); - set_jump_target(lit_jumpover, out); -} - static uint64_t get_from_ldr_literal(const u_int *i) { signed int ofs; diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 978280a84..4fc111d6a 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -230,16 +230,12 @@ FUNCTION(dyna_linker): tst r0, r0 beq 10b - /* pagefault */ mov r0, r4 mov r1, r0 mov r2, #(4<<2) /* Address error (fetch) */ - .size dyna_linker, .-dyna_linker -FUNCTION(exec_pagefault): /* r0 = instruction pointer */ /* r1 = fault address */ - /* r2 = cause */ ldr r3, [fp, #LO_reg_cop0+48] /* Status */ str r0, [fp, #LO_reg_cop0+56] /* EPC */ orr r3, r3, #2 @@ -250,34 +246,9 @@ FUNCTION(exec_pagefault): orr r0, r0, #0x80 bl get_addr_ht mov pc, r0 - .size exec_pagefault, .-exec_pagefault - -/* Special dynamic linker for the case where a page fault - may occur in a branch delay slot */ -FUNCTION(dyna_linker_ds): - /* r0 = virtual target address */ - /* r1 = instruction to patch */ - mov r4, r0 - mov r5, r1 -10: - dyna_linker_main - - bic r0, r4, #7 - orr r0, r0, #1 - bl new_recompile_block - tst r0, r0 - beq 10b - - /* pagefault */ - mov r0, r4 - bic r1, r0, #7 - mov r2, #0x80000008 /* High bit set indicates pagefault in delay slot */ - sub r0, r1, #4 - b exec_pagefault - .size dyna_linker_ds, .-dyna_linker_ds + .size dyna_linker, .-dyna_linker .align 2 - FUNCTION(jump_vaddr_r0): eor r2, r0, r0, lsl #16 b jump_vaddr @@ -354,8 +325,6 @@ FUNCTION(jump_vaddr): .align 2 -FUNCTION(verify_code_ds): - str r8, [fp, #LO_branch_target] @ preserve HOST_BTREG? FUNCTION(verify_code): /* r1 = source */ /* r2 = target */ @@ -390,7 +359,6 @@ FUNCTION(verify_code): bl get_addr mov pc, r0 .size verify_code, .-verify_code - .size verify_code_ds, .-verify_code_ds .align 2 FUNCTION(cc_interrupt): diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 5c4d12746..33fc048e3 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -84,36 +84,13 @@ DRC_VAR(mini_ht, 256) .text .align 2 -/* r0 = virtual target address */ -/* r1 = instruction to patch */ -.macro dyna_linker_main - /* XXX TODO: should be able to do better than this... */ - bl get_addr_ht - br x0 -.endm - - FUNCTION(dyna_linker): /* r0 = virtual target address */ /* r1 = instruction to patch */ - dyna_linker_main + bl get_addr_ht + br x0 .size dyna_linker, .-dyna_linker -FUNCTION(exec_pagefault): - /* r0 = instruction pointer */ - /* r1 = fault address */ - /* r2 = cause */ - bl abort - .size exec_pagefault, .-exec_pagefault - -/* Special dynamic linker for the case where a page fault - may occur in a branch delay slot */ -FUNCTION(dyna_linker_ds): - /* r0 = virtual target address */ - /* r1 = instruction to patch */ - dyna_linker_main - .size dyna_linker_ds, .-dyna_linker_ds - .align 2 FUNCTION(cc_interrupt): ldr w0, [rFP, #LO_last_count] diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 962b338a8..518d07165 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -314,7 +314,7 @@ static struct decoded_insn //#define FCOMP 21 // Floating point compare (sets FSREG) #define SYSCALL 22// SYSCALL,BREAK #define OTHER 23 // Other -#define SPAN 24 // Branch/delay slot spans 2 pages +//#define SPAN 24 // Branch/delay slot spans 2 pages #define NI 25 // Not implemented #define HLECALL 26// PCSX fake opcodes for HLE #define COP2 27 // Coprocessor 2 move @@ -336,9 +336,7 @@ void *get_addr_ht(u_int vaddr); void invalidate_block(u_int block); void invalidate_addr(u_int addr); void dyna_linker(); -void dyna_linker_ds(); void verify_code(); -void verify_code_ds(); void cc_interrupt(); void fp_exception(); void fp_exception_ds(); @@ -2090,11 +2088,6 @@ static void delayslot_alloc(struct regstat *current,int i) case RJUMP: case SYSCALL: case HLECALL: - case SPAN: - assem_debug("jump in the delay slot. this shouldn't happen.\n");//abort(); - SysPrintf("Disabled speculative precompilation\n"); - stop_after_jal=1; - break; case IMM16: imm16_alloc(current,i); break; @@ -2141,42 +2134,6 @@ static void delayslot_alloc(struct regstat *current,int i) } } -// Special case where a branch and delay slot span two pages in virtual memory -static void pagespan_alloc(struct regstat *current,int i) -{ - current->isconst=0; - current->wasconst=0; - regs[i].wasconst=0; - minimum_free_regs[i]=HOST_REGS; - alloc_all(current,i); - alloc_cc(current,i); - dirty_reg(current,CCREG); - if(dops[i].opcode==3) // JAL - { - alloc_reg(current,i,31); - dirty_reg(current,31); - } - if(dops[i].opcode==0&&(dops[i].opcode2&0x3E)==8) // JR/JALR - { - alloc_reg(current,i,dops[i].rs1); - if (dops[i].rt1!=0) { - alloc_reg(current,i,dops[i].rt1); - dirty_reg(current,dops[i].rt1); - } - } - if((dops[i].opcode&0x2E)==4) // BEQ/BNE/BEQL/BNEL - { - if(dops[i].rs1) alloc_reg(current,i,dops[i].rs1); - if(dops[i].rs2) alloc_reg(current,i,dops[i].rs2); - } - else - if((dops[i].opcode&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL - { - if(dops[i].rs1) alloc_reg(current,i,dops[i].rs1); - } - //else ... -} - static void add_stub(enum stub_type type, void *addr, void *retaddr, u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e) { @@ -4131,7 +4088,6 @@ static void ujump_assemble(int i, const struct regstat *i_regs); static void rjump_assemble(int i, const struct regstat *i_regs); static void cjump_assemble(int i, const struct regstat *i_regs); static void sjump_assemble(int i, const struct regstat *i_regs); -static void pagespan_assemble(int i, const struct regstat *i_regs); static int assemble(int i, const struct regstat *i_regs, int ccadj_) { @@ -4211,9 +4167,6 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) sjump_assemble(i, i_regs); ds = 1; break; - case SPAN: - pagespan_assemble(i, i_regs); - break; case NOP: case OTHER: case NI: @@ -4233,7 +4186,6 @@ static void ds_assemble(int i, const struct regstat *i_regs) case SYSCALL: case HLECALL: case INTCALL: - case SPAN: case UJUMP: case RJUMP: case CJUMP: @@ -4845,7 +4797,6 @@ static void ds_assemble_entry(int i) case SYSCALL: case HLECALL: case INTCALL: - case SPAN: case UJUMP: case RJUMP: case CJUMP: @@ -4871,11 +4822,6 @@ static void emit_extjump(void *addr, u_int target) emit_extjump2(addr, target, dyna_linker); } -static void emit_extjump_ds(void *addr, u_int target) -{ - emit_extjump2(addr, target, dyna_linker_ds); -} - // Load 2 immediates optimizing for small code size static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) { @@ -5919,271 +5865,6 @@ static void sjump_assemble(int i, const struct regstat *i_regs) } } -static void pagespan_assemble(int i, const struct regstat *i_regs) -{ - int s1l=get_reg(i_regs->regmap,dops[i].rs1); - int s2l=get_reg(i_regs->regmap,dops[i].rs2); - void *taken = NULL; - void *nottaken = NULL; - int unconditional=0; - if(dops[i].rs1==0) - { - s1l=s2l; - s2l=-1; - } - else if(dops[i].rs2==0) - { - s2l=-1; - } - int hr=0; - int addr=-1,alt=-1,ntaddr=-1; - if(i_regs->regmap[HOST_BTREG]<0) {addr=HOST_BTREG;} - else { - while(hrregmap[hr]!=dops[i].rs1 && - i_regs->regmap[hr]!=dops[i].rs2 ) - { - addr=hr++;break; - } - hr++; - } - } - while(hrregmap[hr]!=dops[i].rs1 && - i_regs->regmap[hr]!=dops[i].rs2 ) - { - alt=hr++;break; - } - hr++; - } - if((dops[i].opcode&0x2E)==6) // BLEZ/BGTZ needs another register - { - while(hrregmap[hr]!=dops[i].rs1 && - i_regs->regmap[hr]!=dops[i].rs2 ) - { - ntaddr=hr;break; - } - hr++; - } - } - assert(hrregmap,31); - emit_movimm(start+i*4+8,rt); - unconditional=1; - } - if(dops[i].opcode==0&&(dops[i].opcode2&0x3E)==8) // JR/JALR - { - emit_mov(s1l,addr); - if(dops[i].opcode2==9) // JALR - { - int rt=get_reg(i_regs->regmap,dops[i].rt1); - emit_movimm(start+i*4+8,rt); - } - } - if((dops[i].opcode&0x3f)==4) // BEQ - { - if(dops[i].rs1==dops[i].rs2) - { - unconditional=1; - } - else - #ifdef HAVE_CMOV_IMM - if(1) { - if(s2l>=0) emit_cmp(s1l,s2l); - else emit_test(s1l,s1l); - emit_cmov2imm_e_ne_compact(ba[i],start+i*4+8,addr); - } - else - #endif - { - assert(s1l>=0); - emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt); - if(s2l>=0) emit_cmp(s1l,s2l); - else emit_test(s1l,s1l); - emit_cmovne_reg(alt,addr); - } - } - if((dops[i].opcode&0x3f)==5) // BNE - { - #ifdef HAVE_CMOV_IMM - if(s2l>=0) emit_cmp(s1l,s2l); - else emit_test(s1l,s1l); - emit_cmov2imm_e_ne_compact(start+i*4+8,ba[i],addr); - #else - assert(s1l>=0); - emit_mov2imm_compact(start+i*4+8,addr,ba[i],alt); - if(s2l>=0) emit_cmp(s1l,s2l); - else emit_test(s1l,s1l); - emit_cmovne_reg(alt,addr); - #endif - } - if((dops[i].opcode&0x3f)==0x14) // BEQL - { - if(s2l>=0) emit_cmp(s1l,s2l); - else emit_test(s1l,s1l); - if(nottaken) set_jump_target(nottaken, out); - nottaken=out; - emit_jne(0); - } - if((dops[i].opcode&0x3f)==0x15) // BNEL - { - if(s2l>=0) emit_cmp(s1l,s2l); - else emit_test(s1l,s1l); - nottaken=out; - emit_jeq(0); - if(taken) set_jump_target(taken, out); - } - if((dops[i].opcode&0x3f)==6) // BLEZ - { - emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr); - emit_cmpimm(s1l,1); - emit_cmovl_reg(alt,addr); - } - if((dops[i].opcode&0x3f)==7) // BGTZ - { - emit_mov2imm_compact(ba[i],addr,start+i*4+8,ntaddr); - emit_cmpimm(s1l,1); - emit_cmovl_reg(ntaddr,addr); - } - if((dops[i].opcode&0x3f)==0x16) // BLEZL - { - assert((dops[i].opcode&0x3f)!=0x16); - } - if((dops[i].opcode&0x3f)==0x17) // BGTZL - { - assert((dops[i].opcode&0x3f)!=0x17); - } - assert(dops[i].opcode!=1); // BLTZ/BGEZ - - //FIXME: Check CSREG - if(dops[i].opcode==0x11 && dops[i].opcode2==0x08 ) { - if((source[i]&0x30000)==0) // BC1F - { - emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt); - emit_testimm(s1l,0x800000); - emit_cmovne_reg(alt,addr); - } - if((source[i]&0x30000)==0x10000) // BC1T - { - emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr); - emit_testimm(s1l,0x800000); - emit_cmovne_reg(alt,addr); - } - if((source[i]&0x30000)==0x20000) // BC1FL - { - emit_testimm(s1l,0x800000); - nottaken=out; - emit_jne(0); - } - if((source[i]&0x30000)==0x30000) // BC1TL - { - emit_testimm(s1l,0x800000); - nottaken=out; - emit_jeq(0); - } - } - - assert(i_regs->regmap[HOST_CCREG]==CCREG); - wb_dirtys(regs[i].regmap,regs[i].dirty); - if(unconditional) - { - emit_movimm(ba[i],HOST_BTREG); - } - else if(addr!=HOST_BTREG) - { - emit_mov(addr,HOST_BTREG); - } - void *branch_addr=out; - emit_jmp(0); - int target_addr=start+i*4+5; - void *stub=out; - void *compiled_target_addr=check_addr(target_addr); - emit_extjump_ds(branch_addr, target_addr); - if(compiled_target_addr) { - set_jump_target(branch_addr, compiled_target_addr); - add_jump_out(target_addr,stub); - } - else set_jump_target(branch_addr, stub); -} - -// Assemble the delay slot for the above -static void pagespan_ds() -{ - assem_debug("initial delay slot:\n"); - u_int vaddr=start+1; - u_int page=get_page(vaddr); - u_int vpage=get_vpage(vaddr); - ll_add(jump_dirty+vpage,vaddr,(void *)out); - do_dirty_stub_ds(slen*4); - ll_add(jump_in+page,vaddr,(void *)out); - assert(regs[0].regmap_entry[HOST_CCREG]==CCREG); - if(regs[0].regmap[HOST_CCREG]!=CCREG) - wb_register(CCREG,regs[0].regmap_entry,regs[0].wasdirty); - if(regs[0].regmap[HOST_BTREG]!=BTREG) - emit_writeword(HOST_BTREG,&branch_target); - load_regs(regs[0].regmap_entry,regs[0].regmap,dops[0].rs1,dops[0].rs2); - address_generation(0,®s[0],regs[0].regmap_entry); - if (ram_offset && (dops[0].is_load || dops[0].is_store)) - load_reg(regs[0].regmap_entry,regs[0].regmap,ROREG); - if (dops[0].is_store) - load_reg(regs[0].regmap_entry,regs[0].regmap,INVCP); - is_delayslot=0; - switch (dops[0].itype) { - case SYSCALL: - case HLECALL: - case INTCALL: - case SPAN: - case UJUMP: - case RJUMP: - case CJUMP: - case SJUMP: - SysPrintf("Jump in the delay slot. This is probably a bug.\n"); - break; - default: - assemble(0, ®s[0], 0); - } - int btaddr=get_reg(regs[0].regmap,BTREG); - if(btaddr<0) { - btaddr=get_reg_temp(regs[0].regmap); - emit_readword(&branch_target,btaddr); - } - assert(btaddr!=HOST_CCREG); - if(regs[0].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); -#ifdef HOST_IMM8 - host_tempreg_acquire(); - emit_movimm(start+4,HOST_TEMPREG); - emit_cmp(btaddr,HOST_TEMPREG); - host_tempreg_release(); -#else - emit_cmpimm(btaddr,start+4); -#endif - void *branch = out; - emit_jeq(0); - store_regs_bt(regs[0].regmap,regs[0].dirty,-1); - do_jump_vaddr(btaddr); - set_jump_target(branch, out); - store_regs_bt(regs[0].regmap,regs[0].dirty,start+4); - load_regs_bt(regs[0].regmap,regs[0].dirty,start+4); -} - static void check_regmap(signed char *regmap) { #ifndef NDEBUG @@ -6238,8 +5919,6 @@ void disassemble_inst(int i) else printf (" %x: %s r%d\n",start+i*4,insn[i],dops[i].rs1); break; - case SPAN: - printf (" %x: %s (pagespan) r%d,r%d,%8x\n",start+i*4,insn[i],dops[i].rs1,dops[i].rs2,ba[i]);break; case IMM16: if(dops[i].opcode==0xf) //LUI printf (" %x: %s r%d,%4x0000\n",start+i*4,insn[i],dops[i].rt1,imm[i]&0xffff); @@ -7156,13 +6835,11 @@ static noinline void pass1_disassemble(u_int pagelimit) SysPrintf("Disabled speculative precompilation\n"); } } - slen=i; - if (dops[i-1].is_jump) { - if(start+i*4==pagelimit) { - dops[i-1].itype=SPAN; - } - } - assert(slen>0); + while (i > 0 && dops[i-1].is_jump) + i--; + assert(i > 0); + assert(!dops[i-1].is_jump); + slen = i; } // Basic liveness analysis for MIPS registers @@ -7665,9 +7342,6 @@ static noinline void pass3_register_alloc(u_int addr) case INTCALL: syscall_alloc(¤t,i); break; - case SPAN: - pagespan_alloc(¤t,i); - break; } // Create entry (branch target) regmap @@ -8098,7 +7772,7 @@ static noinline void pass4_cull_unused_regs(void) } } // Cycle count is needed at branches. Assume it is needed at the target too. - if(i==0||dops[i].bt||dops[i].itype==CJUMP||dops[i].itype==SPAN) { + if(i==0||dops[i].bt||dops[i].itype==CJUMP) { if(regmap_pre[i][HOST_CCREG]==CCREG) nr|=1< 1) { - if (!dops[i-2].is_ujump && dops[i-1].itype != SPAN) { + if (!dops[i-2].is_ujump) { assert(!dops[i-1].is_jump); assert(i==slen); if(dops[i-2].itype!=CJUMP&&dops[i-2].itype!=SJUMP) { From 104df9d3b15f92d5c73d2d6beb6f01f0cc158e03 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 22 Feb 2022 01:23:09 +0200 Subject: [PATCH 135/597] drc: rework block tracking Mostly to remove that horrible generated code parsing. Also seems to generate less code now. --- libpcsxcore/new_dynarec/assem_arm.c | 147 +---- libpcsxcore/new_dynarec/assem_arm64.c | 146 +---- libpcsxcore/new_dynarec/emu_if.c | 18 +- libpcsxcore/new_dynarec/linkage_arm.S | 210 ++---- libpcsxcore/new_dynarec/linkage_arm64.S | 10 +- libpcsxcore/new_dynarec/new_dynarec.c | 835 +++++++++++++----------- libpcsxcore/new_dynarec/new_dynarec.h | 4 +- 7 files changed, 528 insertions(+), 842 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index eb695c58c..005c280c3 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -194,6 +194,7 @@ static void *find_extjump_insn(void *stub) // get address that insn one after stub loads (dyna_linker arg1), // treat it as a pointer to branch insn, // return addr where that branch jumps to +#if 0 static void *get_pointer(void *stub) { //printf("get_pointer(%x)\n",(int)stub); @@ -201,106 +202,7 @@ static void *get_pointer(void *stub) assert((*i_ptr&0x0f000000)==0x0a000000); // b return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8; } - -// Find the "clean" entry point from a "dirty" entry point -// by skipping past the call to verify_code -static void *get_clean_addr(void *addr) -{ - signed int *ptr = addr; - #ifndef HAVE_ARMV7 - ptr+=4; - #else - ptr+=6; - #endif - if((*ptr&0xFF000000)!=0xeb000000) ptr++; - assert((*ptr&0xFF000000)==0xeb000000); // bl instruction - ptr++; - if((*ptr&0xFF000000)==0xea000000) { - return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump - } - return ptr; -} - -static int verify_dirty(const u_int *ptr) -{ - #ifndef HAVE_ARMV7 - u_int offset; - // get from literal pool - assert((*ptr&0xFFFF0000)==0xe59f0000); - offset=*ptr&0xfff; - u_int source=*(u_int*)((void *)ptr+offset+8); - ptr++; - assert((*ptr&0xFFFF0000)==0xe59f0000); - offset=*ptr&0xfff; - u_int copy=*(u_int*)((void *)ptr+offset+8); - ptr++; - assert((*ptr&0xFFFF0000)==0xe59f0000); - offset=*ptr&0xfff; - u_int len=*(u_int*)((void *)ptr+offset+8); - ptr++; - ptr++; - #else - // ARMv7 movw/movt - assert((*ptr&0xFFF00000)==0xe3000000); - u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000); - u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000); - u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000); - ptr+=6; - #endif - if((*ptr&0xFF000000)!=0xeb000000) ptr++; - assert((*ptr&0xFF000000)==0xeb000000); // bl instruction - //printf("verify_dirty: %x %x %x\n",source,copy,len); - return !memcmp((void *)source,(void *)copy,len); -} - -// This doesn't necessarily find all clean entry points, just -// guarantees that it's not dirty -static int isclean(void *addr) -{ - #ifndef HAVE_ARMV7 - u_int *ptr=((u_int *)addr)+4; - #else - u_int *ptr=((u_int *)addr)+6; - #endif - if((*ptr&0xFF000000)!=0xeb000000) ptr++; - if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction - if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0; - return 1; -} - -// get source that block at addr was compiled from (host pointers) -static void get_bounds(void *addr, u_char **start, u_char **end) -{ - u_int *ptr = addr; - #ifndef HAVE_ARMV7 - u_int offset; - // get from literal pool - assert((*ptr&0xFFFF0000)==0xe59f0000); - offset=*ptr&0xfff; - u_int source=*(u_int*)((void *)ptr+offset+8); - ptr++; - //assert((*ptr&0xFFFF0000)==0xe59f0000); - //offset=*ptr&0xfff; - //u_int copy=*(u_int*)((void *)ptr+offset+8); - ptr++; - assert((*ptr&0xFFFF0000)==0xe59f0000); - offset=*ptr&0xfff; - u_int len=*(u_int*)((void *)ptr+offset+8); - ptr++; - ptr++; - #else - // ARMv7 movw/movt - assert((*ptr&0xFFF00000)==0xe3000000); - u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000); - //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000); - u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000); - ptr+=6; - #endif - if((*ptr&0xFF000000)!=0xeb000000) ptr++; - assert((*ptr&0xFF000000)==0xeb000000); // bl instruction - *start=(u_char *)source; - *end=(u_char *)source+len; -} +#endif // Allocate a specific ARM register. static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr) @@ -1623,7 +1525,7 @@ static void literal_pool_jumpover(int n) } // parsed by get_pointer, find_extjump_insn -static void emit_extjump2(u_char *addr, u_int target, void *linker) +static void emit_extjump(u_char *addr, u_int target) { u_char *ptr=(u_char *)addr; assert((ptr[3]&0x0e)==0xa); @@ -1633,18 +1535,7 @@ static void emit_extjump2(u_char *addr, u_int target, void *linker) emit_loadlp((u_int)addr,1); assert(ndrc->translation_cache <= addr && addr < ndrc->translation_cache + sizeof(ndrc->translation_cache)); - //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000)); -//DEBUG > -#ifdef DEBUG_CYCLE_COUNT - emit_readword(&last_count,ECX); - emit_add(HOST_CCREG,ECX,HOST_CCREG); - emit_readword(&next_interupt,ECX); - emit_writeword(HOST_CCREG,&Count); - emit_sub(HOST_CCREG,ECX,HOST_CCREG); - emit_writeword(ECX,&last_count); -#endif -//DEBUG < - emit_far_jump(linker); + emit_far_jump(dyna_linker); } static void check_extjump2(void *src) @@ -1987,36 +1878,6 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, restore_regs(reglist); } -// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr -static void do_dirty_stub_emit_args(u_int arg0, u_int source_len) -{ - #ifndef HAVE_ARMV7 - emit_loadlp((int)source, 1); - emit_loadlp((int)copy, 2); - emit_loadlp(source_len, 3); - #else - emit_movw(((u_int)source)&0x0000FFFF, 1); - emit_movw(((u_int)copy)&0x0000FFFF, 2); - emit_movt(((u_int)source)&0xFFFF0000, 1); - emit_movt(((u_int)copy)&0xFFFF0000, 2); - emit_movw(source_len, 3); - #endif - emit_movimm(arg0, 0); -} - -static void *do_dirty_stub(int i, u_int source_len) -{ - assem_debug("do_dirty_stub %x\n",start+i*4); - do_dirty_stub_emit_args(start + i*4, source_len); - emit_far_call(verify_code); - void *entry = out; - load_regs_entry(i); - if (entry == out) - entry = instr_addr[i]; - emit_jmp(instr_addr[i]); - return entry; -} - /* Special assem */ static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist) diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 13d2f9347..3a88f9efc 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -66,6 +66,7 @@ static void *find_extjump_insn(void *stub) return ptr + offset / 4; } +#if 0 // find where external branch is liked to using addr of it's stub: // get address that the stub loads (dyna_linker arg1), // treat it as a pointer to branch insn, @@ -81,6 +82,7 @@ static void *get_pointer(void *stub) assert(0); return NULL; } +#endif // Allocate a specific ARM register. static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr) @@ -144,21 +146,6 @@ static void output_w32(u_int word) out += 4; } -static void output_w64(uint64_t dword) -{ - *((uint64_t *)out) = dword; - out+=8; -} - -/* -static u_int rm_rd(u_int rm, u_int rd) -{ - assert(rm < 31); - assert(rd < 31); - return (rm << 16) | rd; -} -*/ - static u_int rn_rd(u_int rn, u_int rd) { assert(rn < 31); @@ -974,7 +961,7 @@ static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r) output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r)); } -static void emit_cbz(const void *a, u_int r) +static unused void emit_cbz(const void *a, u_int r) { emit_cb(0, 0, a, r); } @@ -1207,11 +1194,6 @@ static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) emit_bic(rs1, rs2, rt); } -static void emit_loadlp_ofs(u_int ofs, u_int rt) -{ - output_w32(0x58000000 | imm19_rt(ofs, rt)); -} - static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs) { u_int op = 0xb9000000; @@ -1288,7 +1270,7 @@ static void literal_pool_jumpover(int n) } // parsed by get_pointer, find_extjump_insn -static void emit_extjump2(u_char *addr, u_int target, void *linker) +static void emit_extjump(u_char *addr, u_int target) { assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond @@ -1298,7 +1280,7 @@ static void emit_extjump2(u_char *addr, u_int target, void *linker) // addr is in the current recompiled block (max 256k) // offset shouldn't exceed +/-1MB emit_adr(addr, 1); - emit_far_jump(linker); + emit_far_jump(dyna_linker); } static void check_extjump2(void *src) @@ -1669,122 +1651,6 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, restore_regs(reglist); } -static int verify_code_arm64(const void *source, const void *copy, u_int size) -{ - int ret = memcmp(source, copy, size); - //printf("%s %p,%#x = %d\n", __func__, source, size, ret); - return ret; -} - -// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr -static void do_dirty_stub_base(u_int vaddr, u_int source_len) -{ - assert(source_len <= MAXBLOCK*4); - emit_loadlp_ofs(0, 0); // ldr x1, source - emit_loadlp_ofs(0, 1); // ldr x2, copy - emit_movz(source_len, 2); - emit_far_call(verify_code_arm64); - void *jmp = out; - emit_cbz(0, 0); - emit_movz(vaddr & 0xffff, 0); - emit_movk_lsl16(vaddr >> 16, 0); - emit_far_call(get_addr); - emit_jmpreg(0); - set_jump_target(jmp, out); -} - -static void assert_dirty_stub(const u_int *ptr) -{ - assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source - assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy - assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #source_len - assert( ptr[8] == 0xd61f0000); // br x0 -} - -static void set_loadlp(u_int *loadl, void *lit) -{ - uintptr_t ofs = (u_char *)lit - (u_char *)loadl; - assert((*loadl & ~0x1f) == 0x58000000); - assert((ofs & 3) == 0); - assert(ofs < 0x100000); - *loadl |= (ofs >> 2) << 5; -} - -static void do_dirty_stub_emit_literals(u_int *loadlps) -{ - set_loadlp(&loadlps[0], out); - output_w64((uintptr_t)source); - set_loadlp(&loadlps[1], out); - output_w64((uintptr_t)copy); -} - -static void *do_dirty_stub(int i, u_int source_len) -{ - assem_debug("do_dirty_stub %x\n",start+i*4); - u_int *loadlps = (void *)out; - do_dirty_stub_base(start + i*4, source_len); - void *entry = out; - load_regs_entry(i); - if (entry == out) - entry = instr_addr[i]; - emit_jmp(instr_addr[i]); - do_dirty_stub_emit_literals(loadlps); - return entry; -} - -static uint64_t get_from_ldr_literal(const u_int *i) -{ - signed int ofs; - assert((i[0] & 0xff000000) == 0x58000000); - ofs = i[0] << 8; - ofs >>= 5+8; - return *(uint64_t *)(i + ofs); -} - -static uint64_t get_from_movz(const u_int *i) -{ - assert((i[0] & 0x7fe00000) == 0x52800000); - return (i[0] >> 5) & 0xffff; -} - -// Find the "clean" entry point from a "dirty" entry point -// by skipping past the call to verify_code -static void *get_clean_addr(u_int *addr) -{ - assert_dirty_stub(addr); - return addr + 9; -} - -static int verify_dirty(const u_int *ptr) -{ - const void *source, *copy; - u_int len; - assert_dirty_stub(ptr); - source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source - copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy - len = get_from_movz(&ptr[2]); // movz w3, #source_len - return !memcmp(source, copy, len); -} - -static int isclean(void *addr) -{ - const u_int *ptr = addr; - if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used - assert_dirty_stub(ptr); - return 0; - } - return 1; -} - -// get source that block at addr was compiled from (host pointers) -static void get_bounds(void *addr, u_char **start, u_char **end) -{ - const u_int *ptr = addr; - assert_dirty_stub(ptr); - *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source - *end = *start + get_from_movz(&ptr[2]); // movz w3, #source_len -} - /* Special assem */ static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist) @@ -1982,7 +1848,7 @@ static void do_jump_vaddr(u_int rs) { if (rs != 0) emit_mov(rs, 0); - emit_far_call(get_addr_ht); + emit_far_call(ndrc_get_addr_ht); emit_jmpreg(0); } diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index e9008ae8f..7591093f5 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -332,7 +332,7 @@ static void ari64_reset() { printf("ari64_reset\n"); new_dyna_pcsx_mem_reset(); - invalidate_all_pages(); + new_dynarec_invalidate_all_pages(); new_dyna_restore(); pending_exception = 1; } @@ -362,21 +362,11 @@ static void ari64_execute() static void ari64_clear(u32 addr, u32 size) { - u32 start, end, main_ram; - size *= 4; /* PCSX uses DMA units (words) */ evprintf("ari64_clear %08x %04x\n", addr, size); - /* check for RAM mirrors */ - main_ram = (addr & 0xffe00000) == 0x80000000; - - start = addr >> 12; - end = (addr + size) >> 12; - - for (; start <= end; start++) - if (!main_ram || !invalid_code[start]) - invalidate_block(start); + new_dynarec_invalidate_range(addr, addr + size); } static void ari64_notify(int note, void *data) { @@ -449,8 +439,8 @@ void new_dynarec_init() {} void new_dyna_start(void *context) {} void new_dynarec_cleanup() {} void new_dynarec_clear_full() {} -void invalidate_all_pages() {} -void invalidate_block(unsigned int block) {} +void new_dynarec_invalidate_all_pages() {} +void new_dynarec_invalidate_range(unsigned int start, unsigned int end) { return 0; } void new_dyna_pcsx_mem_init(void) {} void new_dyna_pcsx_mem_reset(void) {} void new_dyna_pcsx_mem_load_state(void) {} diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 4fc111d6a..513911cab 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -26,13 +26,12 @@ #ifdef __MACH__ #define dynarec_local ESYM(dynarec_local) -#define add_jump_out ESYM(add_jump_out) -#define new_recompile_block ESYM(new_recompile_block) +#define ndrc_add_jump_out ESYM(ndrc_add_jump_out) #define ndrc_try_restore_block ESYM(ndrc_try_restore_block) -#define get_addr ESYM(get_addr) -#define get_addr_ht ESYM(get_addr_ht) +#define ndrc_get_addr_ht ESYM(ndrc_get_addr_ht) +#define ndrc_get_addr_ht_param ESYM(ndrc_get_addr_ht_param) +#define ndrc_invalidate_addr ESYM(ndrc_invalidate_addr) #define gen_interupt ESYM(gen_interupt) -#define invalidate_addr ESYM(invalidate_addr) #define gteCheckStallRaw ESYM(gteCheckStallRaw) #define psxException ESYM(psxException) #endif @@ -90,16 +89,6 @@ DRC_VAR(ram_offset, 4) DRC_VAR(mini_ht, 256) -#ifdef TEXRELS_FORBIDDEN - .data - .align 2 -ptr_jump_in: - .word ESYM(jump_in) -ptr_hash_table: - .word ESYM(hash_table) -#endif - - .syntax unified .text .align 2 @@ -156,46 +145,28 @@ ptr_hash_table: #endif .endm -/* r4 = virtual target address */ -/* r5 = instruction to patch */ -.macro dyna_linker_main +FUNCTION(dyna_linker): + /* r0 = virtual target address */ + /* r1 = pointer to an instruction to patch */ #ifndef NO_WRITE_EXEC - load_varadr_ext r3, jump_in - /* get_page */ - lsr r2, r4, #12 - mov r6, #4096 - bic r2, r2, #0xe0000 - sub r6, r6, #1 - cmp r2, #0x1000 - ldr r7, [r5] - biclt r2, #0x0e00 - and r6, r6, r2 - cmp r2, #2048 - add r12, r7, #2 - orrcs r2, r6, #2048 - ldr r1, [r3, r2, lsl #2] - lsl r12, r12, #8 - add r6, r5, r12, asr #6 /* old target */ - mov r8, #0 - /* jump_in lookup */ -1: - movs r0, r1 - beq 2f - ldr r3, [r1] /* ll_entry .vaddr */ - ldrd r0, r1, [r0, #8] /* ll_entry .addr, .next */ - teq r3, r4 - bne 1b + ldr r7, [r1] + mov r4, r0 + add r6, r7, #2 + mov r5, r1 + lsl r6, r6, #8 + /* must not compile - that might expire the caller block */ + mov r1, #0 + bl ndrc_get_addr_ht_param + + movs r8, r0 + beq 0f + add r6, r5, r6, asr #6 /* old target */ teq r0, r6 moveq pc, r0 /* Stale i-cache */ - mov r8, r0 - b 1b /* jump_in may have dupes, continue search */ -2: - tst r8, r8 - beq 3f /* r4 not in jump_in */ - mov r0, r4 mov r1, r6 - bl add_jump_out + bl ndrc_add_jump_out + sub r2, r8, r5 and r1, r7, #0xff000000 lsl r2, r2, #6 @@ -203,162 +174,63 @@ ptr_hash_table: add r1, r1, r2, lsr #8 str r1, [r5] mov pc, r8 -3: +0: mov r0, r4 - bl ndrc_try_restore_block - tst r0, r0 - movne pc, r0 #else /* XXX: should be able to do better than this... */ - mov r0, r4 - bl get_addr_ht - mov pc, r0 #endif -.endm - - -FUNCTION(dyna_linker): - /* r0 = virtual target address */ - /* r1 = instruction to patch */ - mov r4, r0 - mov r5, r1 -10: - dyna_linker_main - - mov r0, r4 - bl new_recompile_block - tst r0, r0 - beq 10b - - mov r0, r4 - mov r1, r0 - mov r2, #(4<<2) /* Address error (fetch) */ - - /* r0 = instruction pointer */ - /* r1 = fault address */ - ldr r3, [fp, #LO_reg_cop0+48] /* Status */ - str r0, [fp, #LO_reg_cop0+56] /* EPC */ - orr r3, r3, #2 - str r1, [fp, #LO_reg_cop0+32] /* BadVAddr */ - str r3, [fp, #LO_reg_cop0+48] /* Status */ - str r2, [fp, #LO_reg_cop0+52] /* Cause */ - mov r0, #0x80000000 - orr r0, r0, #0x80 - bl get_addr_ht + bl ndrc_get_addr_ht mov pc, r0 .size dyna_linker, .-dyna_linker .align 2 -FUNCTION(jump_vaddr_r0): - eor r2, r0, r0, lsl #16 - b jump_vaddr - .size jump_vaddr_r0, .-jump_vaddr_r0 FUNCTION(jump_vaddr_r1): - eor r2, r1, r1, lsl #16 mov r0, r1 - b jump_vaddr + b jump_vaddr_r0 .size jump_vaddr_r1, .-jump_vaddr_r1 FUNCTION(jump_vaddr_r2): mov r0, r2 - eor r2, r2, r2, lsl #16 - b jump_vaddr + b jump_vaddr_r0 .size jump_vaddr_r2, .-jump_vaddr_r2 FUNCTION(jump_vaddr_r3): - eor r2, r3, r3, lsl #16 mov r0, r3 - b jump_vaddr + b jump_vaddr_r0 .size jump_vaddr_r3, .-jump_vaddr_r3 FUNCTION(jump_vaddr_r4): - eor r2, r4, r4, lsl #16 mov r0, r4 - b jump_vaddr + b jump_vaddr_r0 .size jump_vaddr_r4, .-jump_vaddr_r4 FUNCTION(jump_vaddr_r5): - eor r2, r5, r5, lsl #16 mov r0, r5 - b jump_vaddr + b jump_vaddr_r0 .size jump_vaddr_r5, .-jump_vaddr_r5 FUNCTION(jump_vaddr_r6): - eor r2, r6, r6, lsl #16 mov r0, r6 - b jump_vaddr + b jump_vaddr_r0 .size jump_vaddr_r6, .-jump_vaddr_r6 FUNCTION(jump_vaddr_r8): - eor r2, r8, r8, lsl #16 mov r0, r8 - b jump_vaddr + b jump_vaddr_r0 .size jump_vaddr_r8, .-jump_vaddr_r8 FUNCTION(jump_vaddr_r9): - eor r2, r9, r9, lsl #16 mov r0, r9 - b jump_vaddr + b jump_vaddr_r0 .size jump_vaddr_r9, .-jump_vaddr_r9 FUNCTION(jump_vaddr_r10): - eor r2, r10, r10, lsl #16 mov r0, r10 - b jump_vaddr + b jump_vaddr_r0 .size jump_vaddr_r10, .-jump_vaddr_r10 FUNCTION(jump_vaddr_r12): - eor r2, r12, r12, lsl #16 mov r0, r12 - b jump_vaddr + b jump_vaddr_r0 .size jump_vaddr_r12, .-jump_vaddr_r12 FUNCTION(jump_vaddr_r7): - eor r2, r7, r7, lsl #16 add r0, r7, #0 .size jump_vaddr_r7, .-jump_vaddr_r7 -FUNCTION(jump_vaddr): - load_varadr_ext r1, hash_table - mvn r3, #15 - and r2, r3, r2, lsr #12 - ldr r2, [r1, r2]! - teq r2, r0 - ldreq pc, [r1, #8] - ldr r2, [r1, #4] - teq r2, r0 - ldreq pc, [r1, #12] - str r10, [fp, #LO_cycle_count] - bl get_addr - ldr r10, [fp, #LO_cycle_count] - mov pc, r0 - .size jump_vaddr, .-jump_vaddr - - .align 2 - -FUNCTION(verify_code): - /* r1 = source */ - /* r2 = target */ - /* r3 = length */ - tst r3, #4 - mov r4, #0 - add r3, r1, r3 - mov r5, #0 - ldrne r4, [r1], #4 - mov r12, #0 - ldrne r5, [r2], #4 - teq r1, r3 - beq .D3 -.D2: - ldr r7, [r1], #4 - eor r9, r4, r5 - ldr r8, [r2], #4 - orrs r9, r9, r12 - bne .D4 - ldr r4, [r1], #4 - eor r12, r7, r8 - ldr r5, [r2], #4 - cmp r1, r3 - bcc .D2 - teq r7, r8 -.D3: - teqeq r4, r5 -.D4: - ldr r8, [fp, #LO_branch_target] - moveq pc, lr -.D5: - bl get_addr +FUNCTION(jump_vaddr_r0): + bl ndrc_get_addr_ht mov pc, r0 - .size verify_code, .-verify_code + .size jump_vaddr_r0, .-jump_vaddr_r0 .align 2 FUNCTION(cc_interrupt): @@ -383,7 +255,7 @@ FUNCTION(cc_interrupt): tst r1, r1 moveq pc, lr ldr r0, [fp, #LO_pcaddr] - bl get_addr_ht + bl ndrc_get_addr_ht mov pc, r0 .size cc_interrupt, .-cc_interrupt @@ -399,7 +271,7 @@ FUNCTION(fp_exception): str r1, [fp, #LO_reg_cop0+48] /* Status */ str r2, [fp, #LO_reg_cop0+52] /* Cause */ add r0, r3, #0x80 - bl get_addr_ht + bl ndrc_get_addr_ht mov pc, r0 .size fp_exception, .-fp_exception .align 2 @@ -440,7 +312,7 @@ FUNCTION(jump_to_new_pc): ldr r0, [fp, #LO_pcaddr] sub r10, r10, r1 str r1, [fp, #LO_last_count] - bl get_addr_ht + bl ndrc_get_addr_ht mov pc, r0 .size jump_to_new_pc, .-jump_to_new_pc @@ -529,7 +401,7 @@ invalidate_addr_call: ldr lr, [fp, #LO_inv_code_end] cmp r0, r12 cmpcs lr, r0 - blcc invalidate_addr + blcc ndrc_invalidate_addr ldmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, pc} .size invalidate_addr_call, .-invalidate_addr_call @@ -539,7 +411,7 @@ FUNCTION(new_dyna_start): stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr} mov fp, r0 /* dynarec_local */ ldr r0, [fp, #LO_pcaddr] - bl get_addr_ht + bl ndrc_get_addr_ht ldr r1, [fp, #LO_next_interupt] ldr r10, [fp, #LO_cycle] str r1, [fp, #LO_last_count] diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 33fc048e3..d073ded4c 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -87,7 +87,7 @@ DRC_VAR(mini_ht, 256) FUNCTION(dyna_linker): /* r0 = virtual target address */ /* r1 = instruction to patch */ - bl get_addr_ht + bl ndrc_get_addr_ht br x0 .size dyna_linker, .-dyna_linker @@ -113,7 +113,7 @@ FUNCTION(cc_interrupt): ret 2: ldr w0, [rFP, #LO_pcaddr] - bl get_addr_ht + bl ndrc_get_addr_ht br x0 .size cc_interrupt, .-cc_interrupt @@ -129,7 +129,7 @@ FUNCTION(fp_exception): str w1, [rFP, #LO_reg_cop0+48] /* Status */ str w2, [rFP, #LO_reg_cop0+52] /* Cause */ add w0, w3, #0x80 - bl get_addr_ht + bl ndrc_get_addr_ht br x0 .size fp_exception, .-fp_exception .align 2 @@ -170,7 +170,7 @@ FUNCTION(jump_to_new_pc): ldr w0, [rFP, #LO_pcaddr] sub rCC, rCC, w1 str w1, [rFP, #LO_last_count] - bl get_addr_ht + bl ndrc_get_addr_ht br x0 .size jump_to_new_pc, .-jump_to_new_pc @@ -189,7 +189,7 @@ FUNCTION(new_dyna_start): ldr w0, [rFP, #LO_pcaddr] str w1, [rFP, #LO_last_count] sub rCC, w2, w1 - bl get_addr_ht + bl ndrc_get_addr_ht br x0 .size new_dyna_start, .-new_dyna_start diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 518d07165..a84e33e50 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -142,7 +142,6 @@ struct regstat struct ll_entry { u_int vaddr; - u_int reg_sv_flags; void *addr; struct ll_entry *next; }; @@ -169,7 +168,25 @@ struct link_entry { void *addr; u_int target; - u_int ext; + u_int internal; +}; + +struct block_info +{ + struct block_info *next; + const void *source; + const void *copy; + u_int start; // vaddr of the block start + u_int len; // of the whole block source + u_int tc_offs; + //u_int tc_len; + u_int reg_sv_flags; + u_short is_dirty; + u_short jump_in_cnt; + struct { + u_int vaddr; + void *addr; + } jump_in[0]; }; static struct decoded_insn @@ -191,12 +208,9 @@ static struct decoded_insn u_char is_store:1; } dops[MAXBLOCK]; - // used by asm: - struct ht_entry hash_table[65536] __attribute__((aligned(16))); - struct ll_entry *jump_in[4096] __attribute__((aligned(16))); - static u_char *out; - static struct ll_entry *jump_dirty[4096]; + static struct ht_entry hash_table[65536]; + static struct block_info *blocks[4096]; static struct ll_entry *jump_out[4096]; static u_int start; static u_int *source; @@ -240,14 +254,21 @@ static struct decoded_insn static int stat_bc_direct; static int stat_bc_pre; static int stat_bc_restore; + static int stat_ht_lookups; static int stat_jump_in_lookups; static int stat_restore_tries; static int stat_restore_compares; static int stat_inv_addr_calls; static int stat_inv_hits; + static int stat_blocks; + static int stat_links; #define stat_inc(s) s++ + #define stat_dec(s) s-- + #define stat_clear(s) s = 0 #else #define stat_inc(s) + #define stat_dec(s) + #define stat_clear(s) #endif int new_dynarec_hacks; @@ -331,12 +352,7 @@ static struct decoded_insn #define DJT_2 (void *)2l // asm linkage -int new_recompile_block(u_int addr); -void *get_addr_ht(u_int vaddr); -void invalidate_block(u_int block); -void invalidate_addr(u_int addr); void dyna_linker(); -void verify_code(); void cc_interrupt(); void fp_exception(); void fp_exception_ds(); @@ -346,12 +362,15 @@ void jump_break (u_int u0, u_int u1, u_int pc); void jump_break_ds(u_int u0, u_int u1, u_int pc); void jump_to_new_pc(); void call_gteStall(); -void add_jump_out(u_int vaddr, void *src); void new_dyna_leave(); -static void *get_clean_addr(void *addr); -static void get_bounds(void *addr, u_char **start, u_char **end); -static void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr); +void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile); +void *ndrc_get_addr_ht(u_int vaddr); +void ndrc_invalidate_addr(u_int addr); +void ndrc_add_jump_out(u_int vaddr, void *src); + +static int new_recompile_block(u_int addr); +static void invalidate_block(struct block_info *block); // Needed by assembler static void wb_register(signed char r, const signed char regmap[], uint64_t dirty); @@ -363,7 +382,6 @@ static void load_regs_entry(int t); static void load_all_consts(const signed char regmap[], u_int dirty, int i); static u_int get_host_reglist(const signed char *regmap); -static int verify_dirty(const u_int *ptr); static int get_final_value(int hr, int i, int *value); static void add_stub(enum stub_type type, void *addr, void *retaddr, u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e); @@ -515,20 +533,31 @@ static int ds_writes_rjump_rs(int i) return dops[i].rs1 != 0 && (dops[i].rs1 == dops[i+1].rt1 || dops[i].rs1 == dops[i+1].rt2); } +// psx addr mirror masking (for invalidation) +static u_int pmmask(u_int vaddr) +{ + vaddr &= ~0xe0000000; + if (vaddr < 0x01000000) + vaddr &= ~0x00e00000; // RAM mirrors + return vaddr; +} + static u_int get_page(u_int vaddr) { - u_int page=vaddr&~0xe0000000; - if (page < 0x1000000) - page &= ~0x0e00000; // RAM mirrors - page>>=12; + u_int page = pmmask(vaddr) >> 12; if(page>2048) page=2048+(page&2047); return page; } -// no virtual mem in PCSX -static u_int get_vpage(u_int vaddr) +// get a page for looking for a block that has vaddr +// (needed because the block may start in previous page) +static u_int get_page_prev(u_int vaddr) { - return get_page(vaddr); + assert(MAXBLOCK <= (1 << 12)); + u_int page = get_page(vaddr); + if (page & 511) + page--; + return page; } static struct ht_entry *hash_table_get(u_int vaddr) @@ -536,15 +565,33 @@ static struct ht_entry *hash_table_get(u_int vaddr) return &hash_table[((vaddr>>16)^vaddr)&0xFFFF]; } -static void hash_table_add(struct ht_entry *ht_bin, u_int vaddr, void *tcaddr) +static void hash_table_add(u_int vaddr, void *tcaddr) { + struct ht_entry *ht_bin = hash_table_get(vaddr); + assert(tcaddr); ht_bin->vaddr[1] = ht_bin->vaddr[0]; ht_bin->tcaddr[1] = ht_bin->tcaddr[0]; ht_bin->vaddr[0] = vaddr; ht_bin->tcaddr[0] = tcaddr; } -static void mark_valid_code(u_int vaddr, u_int len) +static void hash_table_remove(int vaddr) +{ + //printf("remove hash: %x\n",vaddr); + struct ht_entry *ht_bin = hash_table_get(vaddr); + if (ht_bin->vaddr[1] == vaddr) { + ht_bin->vaddr[1] = -1; + ht_bin->tcaddr[1] = NULL; + } + if (ht_bin->vaddr[0] == vaddr) { + ht_bin->vaddr[0] = ht_bin->vaddr[1]; + ht_bin->tcaddr[0] = ht_bin->tcaddr[1]; + ht_bin->vaddr[1] = -1; + ht_bin->tcaddr[1] = NULL; + } +} + +static void mark_invalid_code(u_int vaddr, u_int len, char invalid) { u_int i, j; vaddr &= 0x1fffffff; @@ -553,10 +600,11 @@ static void mark_valid_code(u_int vaddr, u_int len) for (j = 0; j < 0x800000; j += 0x200000) { invalid_code[(i|j) >> 12] = invalid_code[(i|j|0x80000000u) >> 12] = - invalid_code[(i|j|0xa0000000u) >> 12] = 0; + invalid_code[(i|j|0xa0000000u) >> 12] = invalid; } } - inv_code_start = inv_code_end = ~0; + if (!invalid) + inv_code_start = inv_code_end = ~0; } // some messy ari64's code, seems to rely on unsigned 32bit overflow @@ -566,109 +614,99 @@ static int doesnt_expire_soon(void *tcaddr) return diff > (u_int)(0x60000000 + (MAX_OUTPUT_BLOCK_SIZE << (32-TARGET_SIZE_2))); } -void *ndrc_try_restore_block(u_int vaddr) +static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) { - u_char *source_start = NULL, *source_end = NULL; - void *found_stub = NULL, *found_clean = NULL; - u_int len, page = get_page(vaddr); - const struct ll_entry *head; - int ep_count = 0; + void *found_clean = NULL; + u_int i, page; stat_inc(stat_restore_tries); - for (head = jump_dirty[page]; head != NULL; head = head->next) - { - if (head->vaddr != vaddr) - continue; - // don't restore blocks which are about to expire from the cache - if (!doesnt_expire_soon(head->addr)) - continue; - stat_inc(stat_restore_compares); - if (!verify_dirty(head->addr)) - continue; - - found_stub = head->addr; - break; - } - if (!found_stub) - return NULL; - - found_clean = get_clean_addr(found_stub); - get_bounds(found_stub, &source_start, &source_end); - assert(source_start < source_end); - len = source_end - source_start; - mark_valid_code(vaddr, len); - - // restore all entry points - for (head = jump_dirty[page]; head != NULL; head = head->next) - { - if (head->vaddr < vaddr || head->vaddr >= vaddr + len) - continue; - - u_char *start = NULL, *end = NULL; - get_bounds(head->addr, &start, &end); - if (start != source_start || end != source_end) - continue; - - void *clean_addr = get_clean_addr(head->addr); - ll_add_flags(jump_in + page, head->vaddr, head->reg_sv_flags, clean_addr); + for (page = start_page; page <= end_page; page++) { + struct block_info *block; + for (block = blocks[page]; block != NULL; block = block->next) { + if (vaddr < block->start) + break; + if (!block->is_dirty || vaddr >= block->start + block->len) + continue; + for (i = 0; i < block->jump_in_cnt; i++) + if (block->jump_in[i].vaddr == vaddr) + break; + if (i == block->jump_in_cnt) + continue; + assert(block->source && block->copy); + stat_inc(stat_restore_compares); + if (memcmp(block->source, block->copy, block->len)) + continue; - int in_ht = 0; - struct ht_entry *ht_bin = hash_table_get(head->vaddr); - if (ht_bin->vaddr[0] == head->vaddr) { - ht_bin->tcaddr[0] = clean_addr; // Replace existing entry - in_ht = 1; - } - if (ht_bin->vaddr[1] == head->vaddr) { - ht_bin->tcaddr[1] = clean_addr; // Replace existing entry - in_ht = 1; + block->is_dirty = 0; + found_clean = block->jump_in[i].addr; + hash_table_add(vaddr, found_clean); + mark_invalid_code(block->start, block->len, 0); + stat_inc(stat_bc_restore); + inv_debug("INV: restored %08x %p (%d)\n", vaddr, found_clean, block->jump_in_cnt); + return found_clean; } - if (!in_ht) - hash_table_add(ht_bin, head->vaddr, clean_addr); - ep_count++; } - inv_debug("INV: Restored %08x %p (%d)\n", vaddr, found_stub, ep_count); - stat_inc(stat_bc_restore); - return found_clean; + return NULL; } // Get address from virtual address // This is called from the recompiled JR/JALR instructions -void noinline *get_addr(u_int vaddr) +static void noinline *get_addr(u_int vaddr, int can_compile) { - u_int page = get_page(vaddr); - struct ll_entry *head; - void *code; + u_int start_page = get_page_prev(vaddr); + u_int i, page, end_page = get_page(vaddr); + void *found_clean = NULL; stat_inc(stat_jump_in_lookups); - for (head = jump_in[page]; head != NULL; head = head->next) { - if (head->vaddr == vaddr) { - hash_table_add(hash_table_get(vaddr), vaddr, head->addr); - return head->addr; + for (page = start_page; page <= end_page; page++) { + const struct block_info *block; + for (block = blocks[page]; block != NULL; block = block->next) { + if (vaddr < block->start) + break; + if (block->is_dirty || vaddr >= block->start + block->len) + continue; + for (i = 0; i < block->jump_in_cnt; i++) + if (block->jump_in[i].vaddr == vaddr) + break; + if (i == block->jump_in_cnt) + continue; + found_clean = block->jump_in[i].addr; + hash_table_add(vaddr, found_clean); + return found_clean; } } - code = ndrc_try_restore_block(vaddr); - if (code) - return code; + found_clean = try_restore_block(vaddr, start_page, end_page); + if (found_clean) + return found_clean; + + if (!can_compile) + return NULL; int r = new_recompile_block(vaddr); if (r == 0) - return get_addr(vaddr); + return ndrc_get_addr_ht(vaddr); // generate an address error Status|=2; Cause=(vaddr<<31)|(4<<2); EPC=(vaddr&1)?vaddr-5:vaddr; BadVAddr=(vaddr&~1); - return get_addr_ht(0x80000080); + return ndrc_get_addr_ht(0x80000080); } + // Look up address in hash table first -void *get_addr_ht(u_int vaddr) +void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile) { - //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr); const struct ht_entry *ht_bin = hash_table_get(vaddr); + stat_inc(stat_ht_lookups); if (ht_bin->vaddr[0] == vaddr) return ht_bin->tcaddr[0]; if (ht_bin->vaddr[1] == vaddr) return ht_bin->tcaddr[1]; - return get_addr(vaddr); + return get_addr(vaddr, can_compile); +} + +void *ndrc_get_addr_ht(u_int vaddr) +{ + return ndrc_get_addr_ht_param(vaddr, 1); } static void clear_all_regs(signed char regmap[]) @@ -1047,15 +1085,14 @@ static const struct { } function_names[] = { FUNCNAME(cc_interrupt), FUNCNAME(gen_interupt), - FUNCNAME(get_addr_ht), - FUNCNAME(get_addr), + FUNCNAME(ndrc_get_addr_ht), FUNCNAME(jump_handler_read8), FUNCNAME(jump_handler_read16), FUNCNAME(jump_handler_read32), FUNCNAME(jump_handler_write8), FUNCNAME(jump_handler_write16), FUNCNAME(jump_handler_write32), - FUNCNAME(invalidate_addr), + FUNCNAME(ndrc_invalidate_addr), FUNCNAME(jump_to_new_pc), FUNCNAME(jump_break), FUNCNAME(jump_break_ds), @@ -1068,9 +1105,6 @@ static const struct { #ifdef DRC_DBG FUNCNAME(do_insn_cmp), #endif -#ifdef __arm__ - FUNCNAME(verify_code), -#endif }; static const char *func_name(const void *a) @@ -1147,18 +1181,11 @@ static void ll_add(struct ll_entry **head,int vaddr,void *addr) new_entry=malloc(sizeof(struct ll_entry)); assert(new_entry!=NULL); new_entry->vaddr=vaddr; - new_entry->reg_sv_flags=0; new_entry->addr=addr; new_entry->next=*head; *head=new_entry; } -static void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr) -{ - ll_add(head,vaddr,addr); - (*head)->reg_sv_flags=reg_sv_flags; -} - // Check if an address is already compiled // but don't return addresses which are about to expire from the cache static void *check_addr(u_int vaddr) @@ -1168,57 +1195,54 @@ static void *check_addr(u_int vaddr) for (i = 0; i < ARRAY_SIZE(ht_bin->vaddr); i++) { if (ht_bin->vaddr[i] == vaddr) if (doesnt_expire_soon((u_char *)ht_bin->tcaddr[i] - MAX_OUTPUT_BLOCK_SIZE)) - if (isclean(ht_bin->tcaddr[i])) - return ht_bin->tcaddr[i]; + return ht_bin->tcaddr[i]; } - u_int page=get_page(vaddr); - struct ll_entry *head; - head=jump_in[page]; - while (head != NULL) { - if (head->vaddr == vaddr) { - if (doesnt_expire_soon(head->addr)) { - // Update existing entry with current address - if (ht_bin->vaddr[0] == vaddr) { - ht_bin->tcaddr[0] = head->addr; - return head->addr; - } - if (ht_bin->vaddr[1] == vaddr) { - ht_bin->tcaddr[1] = head->addr; - return head->addr; - } - // Insert into hash table with low priority. - // Don't evict existing entries, as they are probably - // addresses that are being accessed frequently. - if (ht_bin->vaddr[0] == -1) { - ht_bin->vaddr[0] = vaddr; - ht_bin->tcaddr[0] = head->addr; - } - else if (ht_bin->vaddr[1] == -1) { - ht_bin->vaddr[1] = vaddr; - ht_bin->tcaddr[1] = head->addr; - } - return head->addr; + + // refactor to get_addr_nocompile? + u_int start_page = get_page_prev(vaddr); + u_int page, end_page = get_page(vaddr); + + stat_inc(stat_jump_in_lookups); + for (page = start_page; page <= end_page; page++) { + const struct block_info *block; + for (block = blocks[page]; block != NULL; block = block->next) { + if (vaddr < block->start) + break; + if (block->is_dirty || vaddr >= block->start + block->len) + continue; + if (!doesnt_expire_soon(ndrc->translation_cache + block->tc_offs)) + continue; + for (i = 0; i < block->jump_in_cnt; i++) + if (block->jump_in[i].vaddr == vaddr) + break; + if (i == block->jump_in_cnt) + continue; + + // Update existing entry with current address + void *addr = block->jump_in[i].addr; + if (ht_bin->vaddr[0] == vaddr) { + ht_bin->tcaddr[0] = addr; + return addr; + } + if (ht_bin->vaddr[1] == vaddr) { + ht_bin->tcaddr[1] = addr; + return addr; + } + // Insert into hash table with low priority. + // Don't evict existing entries, as they are probably + // addresses that are being accessed frequently. + if (ht_bin->vaddr[0] == -1) { + ht_bin->vaddr[0] = vaddr; + ht_bin->tcaddr[0] = addr; } + else if (ht_bin->vaddr[1] == -1) { + ht_bin->vaddr[1] = vaddr; + ht_bin->tcaddr[1] = addr; + } + return addr; } - head=head->next; - } - return 0; -} - -static void remove_hash(int vaddr) -{ - //printf("remove hash: %x\n",vaddr); - struct ht_entry *ht_bin = hash_table_get(vaddr); - if (ht_bin->vaddr[1] == vaddr) { - ht_bin->vaddr[1] = -1; - ht_bin->tcaddr[1] = NULL; - } - if (ht_bin->vaddr[0] == vaddr) { - ht_bin->vaddr[0] = ht_bin->vaddr[1]; - ht_bin->tcaddr[0] = ht_bin->tcaddr[1]; - ht_bin->vaddr[1] = -1; - ht_bin->tcaddr[1] = NULL; } + return NULL; } static void ll_remove_matching_addrs(struct ll_entry **head, @@ -1230,11 +1254,12 @@ static void ll_remove_matching_addrs(struct ll_entry **head, uintptr_t o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) { - inv_debug("EXP: Remove pointer to %p (%x)\n",(*head)->addr,(*head)->vaddr); - remove_hash((*head)->vaddr); + inv_debug("EXP: rm pointer to %08x (%p)\n", (*head)->vaddr, (*head)->addr); + hash_table_remove((*head)->vaddr); next=(*head)->next; free(*head); *head=next; + stat_dec(stat_links); } else { @@ -1258,6 +1283,7 @@ static void ll_clear(struct ll_entry **head) } } +#if 0 // Dereference the pointers and remove if it matches static void ll_kill_pointers(struct ll_entry *head, uintptr_t base_offs_s, int shift) @@ -1277,153 +1303,169 @@ static void ll_kill_pointers(struct ll_entry *head, head=head->next; } } +#endif + +static void blocks_clear(struct block_info **head) +{ + struct block_info *cur, *next; + + if ((cur = *head)) { + *head = NULL; + while (cur) { + next = cur->next; + free(cur); + cur = next; + } + } +} + +static void blocks_remove_matching_addrs(struct block_info **head, + uintptr_t base_offs_s, int shift) +{ + struct block_info *next; + while (*head) { + u_int o1 = (*head)->tc_offs; + u_int o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; + if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) + { + inv_debug("EXP: rm block %08x (tc_offs %u)\n", (*head)->start, o1); + invalidate_block(*head); + next = (*head)->next; + free(*head); + *head = next; + stat_dec(stat_blocks); + } + else + { + head = &((*head)->next); + } + } +} // This is called when we write to a compiled block (see do_invstub) -static void invalidate_page(u_int page) +static void unlink_jumps_range(u_int start, u_int end) { - struct ll_entry *head; - struct ll_entry *next; - head=jump_in[page]; - if (head) stat_inc(stat_inv_hits); - jump_in[page]=0; - while(head!=NULL) { - inv_debug("INVALIDATE: %x\n",head->vaddr); - remove_hash(head->vaddr); - next=head->next; - free(head); - head=next; - } - head=jump_out[page]; - jump_out[page]=0; - while(head!=NULL) { - inv_debug("INVALIDATE: kill pointer to %x (%p)\n",head->vaddr,head->addr); - void *host_addr=find_extjump_insn(head->addr); - mark_clear_cache(host_addr); - set_jump_target(host_addr, head->addr); // point back to dyna_linker - next=head->next; - free(head); - head=next; - } -} - -static void invalidate_block_range(u_int block, u_int first, u_int last) -{ - u_int page=get_page(block<<12); - //printf("first=%d last=%d\n",first,last); - invalidate_page(page); - assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages) - assert(lastvaddr < start || (*head)->vaddr >= end) { + head = &((*head)->next); + continue; + } + inv_debug("INV: rm pointer to %08x (%p)\n", (*head)->vaddr, (*head)->addr); + void *host_addr = find_extjump_insn((*head)->addr); + mark_clear_cache(host_addr); + set_jump_target(host_addr, (*head)->addr); // point back to dyna_linker stub + + next = (*head)->next; + free(*head); + *head = next; + stat_dec(stat_links); + } } - do_clear_cache(); +} - // Don't trap writes - invalid_code[block]=1; +static void invalidate_block(struct block_info *block) +{ + u_int i; - #ifdef USE_MINI_HT - memset(mini_ht,-1,sizeof(mini_ht)); - #endif + block->is_dirty = 1; + unlink_jumps_range(block->start, block->start + block->len); + for (i = 0; i < block->jump_in_cnt; i++) + hash_table_remove(block->jump_in[i].vaddr); } -void invalidate_block(u_int block) +static int invalidate_range(u_int start, u_int end, + u32 *inv_start_ret, u32 *inv_end_ret) { - u_int page=get_page(block<<12); - u_int vpage=get_vpage(block<<12); - inv_debug("INVALIDATE: %x (%d)\n",block<<12,page); - //inv_debug("invalid_code[block]=%d\n",invalid_code[block]); - u_int first,last; - first=last=page; - struct ll_entry *head; - head=jump_dirty[vpage]; - //printf("page=%d vpage=%d\n",page,vpage); - while(head!=NULL) { - if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision - u_char *start, *end; - get_bounds(head->addr, &start, &end); - //printf("start: %p end: %p\n", start, end); - if (page < 2048 && start >= rdram && end < rdram+RAM_SIZE) { - if (((start-rdram)>>12) <= page && ((end-1-rdram)>>12) >= page) { - if ((((start-rdram)>>12)&2047) < first) first = ((start-rdram)>>12)&2047; - if ((((end-1-rdram)>>12)&2047) > last) last = ((end-1-rdram)>>12)&2047; - } + u_int start_page = get_page_prev(start); + u_int end_page = get_page(end - 1); + u_int start_m = pmmask(start); + u_int end_m = pmmask(end); + u_int inv_start, inv_end; + u_int blk_start_m, blk_end_m; + u_int page; + int hit = 0; + + // additional area without code (to supplement invalid_code[]), [start, end) + // avoids excessive ndrc_invalidate_addr() calls + inv_start = start_m & ~0xfff; + inv_end = end_m | 0xfff; + + for (page = start_page; page <= end_page; page++) { + struct block_info *block; + for (block = blocks[page]; block != NULL; block = block->next) { + if (block->is_dirty) + continue; + blk_end_m = pmmask(block->start + block->len); + if (blk_end_m <= start_m) { + inv_start = max(inv_start, blk_end_m); + continue; + } + blk_start_m = pmmask(block->start); + if (end_m <= blk_start_m) { + inv_end = min(inv_end, blk_start_m - 1); + continue; } + if (!block->source) // "hack" block - leave it alone + continue; + + hit++; + invalidate_block(block); + stat_inc(stat_inv_hits); } - head=head->next; } - invalidate_block_range(block,first,last); + + if (hit) { + do_clear_cache(); +#ifdef USE_MINI_HT + memset(mini_ht, -1, sizeof(mini_ht)); +#endif + } + if (inv_start <= (start_m & ~0xfff) && inv_end >= (start_m | 0xfff)) + // the whole page is empty now + mark_invalid_code(start, 1, 1); + + if (inv_start_ret) *inv_start_ret = inv_start | (start & 0xe0000000); + if (inv_end_ret) *inv_end_ret = inv_end | (end & 0xe0000000); + return hit; } -void invalidate_addr(u_int addr) +void new_dynarec_invalidate_range(unsigned int start, unsigned int end) +{ + invalidate_range(start, end, NULL, NULL); +} + +void ndrc_invalidate_addr(u_int addr) { - //static int rhits; // this check is done by the caller //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; } + int ret = invalidate_range(addr, addr + 4, &inv_code_start, &inv_code_end); + if (ret) + inv_debug("INV ADDR: %08x hit %d blocks\n", addr, ret); + else + inv_debug("INV ADDR: %08x miss, inv %08x-%08x\n", addr, inv_code_start, inv_code_end); stat_inc(stat_inv_addr_calls); - u_int page=get_vpage(addr); - if(page<2048) { // RAM - struct ll_entry *head; - u_int addr_min=~0, addr_max=0; - u_int mask=RAM_SIZE-1; - u_int addr_main=0x80000000|(addr&mask); - int pg1; - inv_code_start=addr_main&~0xfff; - inv_code_end=addr_main|0xfff; - pg1=page; - if (pg1>0) { - // must check previous page too because of spans.. - pg1--; - inv_code_start-=0x1000; - } - for(;pg1<=page;pg1++) { - for(head=jump_dirty[pg1];head!=NULL;head=head->next) { - u_char *start_h, *end_h; - u_int start, end; - get_bounds(head->addr, &start_h, &end_h); - start = (uintptr_t)start_h - ram_offset; - end = (uintptr_t)end_h - ram_offset; - if(start<=addr_main&&addr_mainaddr_max) addr_max=end; - } - else if(addr_maininv_code_start) - inv_code_start=end; - } - } - } - if (addr_min!=~0) { - inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max); - inv_code_start=inv_code_end=~0; - invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12); - return; - } - else { - inv_code_start=(addr&~mask)|(inv_code_start&mask); - inv_code_end=(addr&~mask)|(inv_code_end&mask); - inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0); - return; - } - } - invalidate_block(addr>>12); } // This is called when loading a save state. // Anything could have changed, so invalidate everything. -void invalidate_all_pages(void) +void new_dynarec_invalidate_all_pages(void) { + struct block_info *block; u_int page; - for(page=0;page<4096;page++) - invalidate_page(page); + for (page = 0; page < ARRAY_SIZE(blocks); page++) { + for (block = blocks[page]; block != NULL; block = block->next) { + if (block->is_dirty) + continue; + if (!block->source) // hack block? + continue; + invalidate_block(block); + } + } + #ifdef USE_MINI_HT memset(mini_ht,-1,sizeof(mini_ht)); #endif @@ -1437,20 +1479,21 @@ static void do_invstub(int n) set_jump_target(stubs[n].addr, out); save_regs(reglist); if(stubs[n].b!=0) emit_mov(stubs[n].b,0); - emit_far_call(invalidate_addr); + emit_far_call(ndrc_invalidate_addr); restore_regs(reglist); emit_jmp(stubs[n].retaddr); // return address } // Add an entry to jump_out after making a link -// src should point to code by emit_extjump2() -void add_jump_out(u_int vaddr,void *src) +// src should point to code by emit_extjump() +void ndrc_add_jump_out(u_int vaddr,void *src) { u_int page=get_page(vaddr); - inv_debug("add_jump_out: %p -> %x (%d)\n",src,vaddr,page); + inv_debug("ndrc_add_jump_out: %p -> %x (%d)\n",src,vaddr,page); check_extjump2(src); ll_add(jump_out+page,vaddr,src); - //inv_debug("add_jump_out: to %p\n",get_pointer(src)); + //inv_debug("ndrc_add_jump_out: to %p\n",get_pointer(src)); + stat_inc(stat_links); } /* Register allocation */ @@ -3152,7 +3195,7 @@ static void store_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_movimm(start+i*4+4,0); emit_writeword(0,&pcaddr); emit_addimm(HOST_CCREG,2,HOST_CCREG); - emit_far_call(get_addr_ht); + emit_far_call(ndrc_get_addr_ht); emit_jmpreg(0); } } @@ -3361,7 +3404,7 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_jeq(0); emit_readword(&pcaddr, 0); emit_addimm(HOST_CCREG,2,HOST_CCREG); - emit_far_call(get_addr_ht); + emit_far_call(ndrc_get_addr_ht); emit_jmpreg(0); set_jump_target(jaddr, out); } @@ -4817,11 +4860,6 @@ static void ds_assemble_entry(int i) emit_jmp(0); } -static void emit_extjump(void *addr, u_int target) -{ - emit_extjump2(addr, target, dyna_linker); -} - // Load 2 immediates optimizing for small code size static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) { @@ -5093,12 +5131,12 @@ static void do_ccstub(int n) do_jump_vaddr(stubs[n].e); } -static void add_to_linker(void *addr, u_int target, int ext) +static void add_to_linker(void *addr, u_int target, int is_internal) { assert(linkcount < ARRAY_SIZE(link_addr)); link_addr[linkcount].addr = addr; link_addr[linkcount].target = target; - link_addr[linkcount].ext = ext; + link_addr[linkcount].internal = is_internal; linkcount++; } @@ -6056,9 +6094,10 @@ void new_dynarec_clear_full(void) hack_addr=0; f1_hack=0; // TLB - for(n=0;n<4096;n++) ll_clear(jump_in+n); + for(n=0;n<4096;n++) blocks_clear(&blocks[n]); for(n=0;n<4096;n++) ll_clear(jump_out+n); - for(n=0;n<4096;n++) ll_clear(jump_dirty+n); + stat_clear(stat_blocks); + stat_clear(stat_links); cycle_multiplier_old = cycle_multiplier; new_dynarec_hacks_old = new_dynarec_hacks; @@ -6136,9 +6175,10 @@ void new_dynarec_cleanup(void) SysPrintf("munmap() failed\n"); #endif #endif - for(n=0;n<4096;n++) ll_clear(jump_in+n); + for(n=0;n<4096;n++) blocks_clear(&blocks[n]); for(n=0;n<4096;n++) ll_clear(jump_out+n); - for(n=0;n<4096;n++) ll_clear(jump_dirty+n); + stat_clear(stat_blocks); + stat_clear(stat_links); #ifdef ROM_COPY if (munmap (ROM_COPY, 67108864) < 0) {SysPrintf("munmap() failed\n");} #endif @@ -6204,19 +6244,21 @@ static int addr_cmp(const void *p1_, const void *p2_) int new_dynarec_save_blocks(void *save, int size) { - struct savestate_block *blocks = save; - int maxcount = size / sizeof(blocks[0]); + struct savestate_block *sblocks = save; + int maxcount = size / sizeof(sblocks[0]); struct savestate_block tmp_blocks[1024]; - struct ll_entry *head; + struct block_info *block; int p, s, d, o, bcnt; u_int addr; o = 0; - for (p = 0; p < ARRAY_SIZE(jump_in); p++) { + for (p = 0; p < ARRAY_SIZE(blocks); p++) { bcnt = 0; - for (head = jump_in[p]; head != NULL; head = head->next) { - tmp_blocks[bcnt].addr = head->vaddr; - tmp_blocks[bcnt].regflags = head->reg_sv_flags; + for (block = blocks[p]; block != NULL; block = block->next) { + if (block->is_dirty) + continue; + tmp_blocks[bcnt].addr = block->start; + tmp_blocks[bcnt].regflags = block->reg_sv_flags; bcnt++; } if (bcnt < 1) @@ -6234,22 +6276,39 @@ int new_dynarec_save_blocks(void *save, int size) if (o + d > maxcount) d = maxcount - o; - memcpy(&blocks[o], tmp_blocks, d * sizeof(blocks[0])); + memcpy(&sblocks[o], tmp_blocks, d * sizeof(sblocks[0])); o += d; } - return o * sizeof(blocks[0]); + return o * sizeof(sblocks[0]); } void new_dynarec_load_blocks(const void *save, int size) { - const struct savestate_block *blocks = save; - int count = size / sizeof(blocks[0]); + const struct savestate_block *sblocks = save; + int count = size / sizeof(sblocks[0]); + struct block_info *block; u_int regs_save[32]; + u_int page; uint32_t f; int i, b; - get_addr(psxRegs.pc); + // restore clean blocks, if any + for (page = 0, b = i = 0; page < ARRAY_SIZE(blocks); page++) { + for (block = blocks[page]; block != NULL; block = block->next, b++) { + if (!block->is_dirty) + continue; + assert(block->source && block->copy); + if (memcmp(block->source, block->copy, block->len)) + continue; + + // see try_restore_block + block->is_dirty = 0; + mark_invalid_code(block->start, block->len, 0); + i++; + } + } + inv_debug("load_blocks: %d/%d clean blocks\n", i, b); // change GPRs for speculation to at least partially work.. memcpy(regs_save, &psxRegs.GPR, sizeof(regs_save)); @@ -6257,14 +6316,14 @@ void new_dynarec_load_blocks(const void *save, int size) psxRegs.GPR.r[i] = 0x80000000; for (b = 0; b < count; b++) { - for (f = blocks[b].regflags, i = 0; f; f >>= 1, i++) { + for (f = sblocks[b].regflags, i = 0; f; f >>= 1, i++) { if (f & 1) psxRegs.GPR.r[i] = 0x1f800000; } - get_addr(blocks[b].addr); + ndrc_get_addr_ht(sblocks[b].addr); - for (f = blocks[b].regflags, i = 0; f; f >>= 1, i++) { + for (f = sblocks[b].regflags, i = 0; f; f >>= 1, i++) { if (f & 1) psxRegs.GPR.r[i] = 0x80000000; } @@ -6276,14 +6335,14 @@ void new_dynarec_load_blocks(const void *save, int size) void new_dynarec_print_stats(void) { #ifdef STAT_PRINT - printf("cc %3d,%3d,%3d lu%3d,%3d c%3d inv%3d,%3d tc_offs %zu\n", + printf("cc %3d,%3d,%3d lu%6d,%3d,%3d c%3d inv%3d,%3d tc_offs %zu b %u,%u\n", stat_bc_pre, stat_bc_direct, stat_bc_restore, - stat_jump_in_lookups, stat_restore_tries, stat_restore_compares, - stat_inv_addr_calls, stat_inv_hits, - out - ndrc->translation_cache); + stat_ht_lookups, stat_jump_in_lookups, stat_restore_tries, + stat_restore_compares, stat_inv_addr_calls, stat_inv_hits, + out - ndrc->translation_cache, stat_blocks, stat_links); stat_bc_direct = stat_bc_pre = stat_bc_restore = - stat_jump_in_lookups = stat_restore_tries = stat_restore_compares = - stat_inv_addr_calls = stat_inv_hits = 0; + stat_ht_lookups = stat_jump_in_lookups = stat_restore_tries = + stat_restore_compares = stat_inv_addr_calls = stat_inv_hits = 0; #endif } @@ -8696,20 +8755,19 @@ static noinline void pass10_expire_blocks(void) int shift=TARGET_SIZE_2-3; // Divide into 8 blocks uintptr_t base_offs = ((uintptr_t)(expirep >> 13) << shift); // Base offset of this block uintptr_t base_offs_s = base_offs >> shift; - inv_debug("EXP: Phase %d\n",expirep); + if (!(expirep & ((1 << 13) - 1))) + inv_debug("EXP: base_offs %x\n", base_offs); switch((expirep>>11)&3) { case 0: - // Clear jump_in and jump_dirty - ll_remove_matching_addrs(jump_in+(expirep&2047),base_offs_s,shift); - ll_remove_matching_addrs(jump_dirty+(expirep&2047),base_offs_s,shift); - ll_remove_matching_addrs(jump_in+2048+(expirep&2047),base_offs_s,shift); - ll_remove_matching_addrs(jump_dirty+2048+(expirep&2047),base_offs_s,shift); + // Clear blocks + blocks_remove_matching_addrs(&blocks[expirep & 2047], base_offs_s, shift); + blocks_remove_matching_addrs(&blocks[2048 + (expirep & 2047)], base_offs_s, shift); break; case 1: // Clear pointers - ll_kill_pointers(jump_out[expirep&2047],base_offs_s,shift); - ll_kill_pointers(jump_out[(expirep&2047)+2048],base_offs_s,shift); + //ll_kill_pointers(jump_out[expirep&2047],base_offs_s,shift); + //ll_kill_pointers(jump_out[(expirep&2047)+2048],base_offs_s,shift); break; case 2: // Clear hash table @@ -8745,7 +8803,39 @@ static noinline void pass10_expire_blocks(void) } } -int new_recompile_block(u_int addr) +static struct block_info *new_block_info(u_int start, u_int len, + const void *source, const void *copy, u_char *beginning, u_short jump_in_count) +{ + struct block_info **b_pptr; + struct block_info *block; + u_int page = get_page(start); + + block = malloc(sizeof(*block) + jump_in_count * sizeof(block->jump_in[0])); + assert(block); + assert(jump_in_count > 0); + block->source = source; + block->copy = copy; + block->start = start; + block->len = len; + block->reg_sv_flags = 0; + block->tc_offs = beginning - ndrc->translation_cache; + //block->tc_len = out - beginning; + block->is_dirty = 0; + block->jump_in_cnt = jump_in_count; + + // insert sorted by start vaddr + for (b_pptr = &blocks[page]; ; b_pptr = &((*b_pptr)->next)) { + if (*b_pptr == NULL || (*b_pptr)->start >= start) { + block->next = *b_pptr; + *b_pptr = block; + break; + } + } + stat_inc(stat_blocks); + return block; +} + +static int new_recompile_block(u_int addr) { u_int pagelimit = 0; u_int state_rflags = 0; @@ -8765,21 +8855,20 @@ int new_recompile_block(u_int addr) if (Config.HLE && start == 0x80001000) // hlecall { // XXX: is this enough? Maybe check hleSoftCall? - void *beginning=start_block(); - u_int page=get_page(start); + void *beginning = start_block(); - invalid_code[start>>12]=0; emit_movimm(start,0); emit_writeword(0,&pcaddr); emit_far_jump(new_dyna_leave); literal_pool(0); end_block(beginning); - ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning); + struct block_info *block = new_block_info(start, 4, NULL, NULL, beginning, 1); + block->jump_in[0].vaddr = start; + block->jump_in[0].addr = beginning; return 0; } else if (f1_hack && hack_addr == 0) { void *beginning = start_block(); - u_int page = get_page(start); emit_movimm(start, 0); emit_writeword(0, &hack_addr); emit_readword(&psxRegs.GPR.n.sp, 0); @@ -8790,12 +8879,14 @@ int new_recompile_block(u_int addr) emit_adds_ptr(1, 1, 1); emit_ldr_dualindexed(1, 0, 0); emit_writeword(0, &psxRegs.GPR.r[26]); // lw k0, 0x18(sp) - emit_far_call(get_addr_ht); + emit_far_call(ndrc_get_addr_ht); emit_jmpreg(0); // jr k0 literal_pool(0); end_block(beginning); - ll_add_flags(jump_in + page, start, state_rflags, beginning); + struct block_info *block = new_block_info(start, 4, NULL, NULL, beginning, 1); + block->jump_in[0].vaddr = start; + block->jump_in[0].addr = beginning; SysPrintf("F1 hack to %08x\n", start); return 0; } @@ -9050,14 +9141,14 @@ int new_recompile_block(u_int addr) { assem_debug("%p -> %8x\n",link_addr[i].addr,link_addr[i].target); literal_pool(64); - if (!link_addr[i].ext) + if (!link_addr[i].internal) { void *stub = out; void *addr = check_addr(link_addr[i].target); emit_extjump(link_addr[i].addr, link_addr[i].target); if (addr) { set_jump_target(link_addr[i].addr, addr); - add_jump_out(link_addr[i].target,stub); + ndrc_add_jump_out(link_addr[i].target,stub); } else set_jump_target(link_addr[i].addr, stub); @@ -9086,35 +9177,41 @@ int new_recompile_block(u_int addr) copy = shadow; // External Branch Targets (jump_in) - for(i=0;ireg_sv_flags = state_rflags; + + int jump_in_i = 0; + for (i = 0; i < slen; i++) { - if(dops[i].bt||i==0) + if ((i == 0 || dops[i].bt) && instr_addr[i]) { - if(instr_addr[i]) // TODO - delay slots (=null) - { - u_int vaddr=start+i*4; - u_int page=get_page(vaddr); - u_int vpage=get_vpage(vaddr); - literal_pool(256); - { - assem_debug("%p (%d) <- %8x\n",instr_addr[i],i,start+i*4); - assem_debug("jump_in: %x\n",start+i*4); - ll_add(jump_dirty+vpage,vaddr,out); - void *entry_point = do_dirty_stub(i, source_len); - ll_add_flags(jump_in+page,vaddr,state_rflags,entry_point); - // If there was an existing entry in the hash table, - // replace it with the new address. - // Don't add new entries. We'll insert the - // ones that actually get used in check_addr(). - struct ht_entry *ht_bin = hash_table_get(vaddr); - if (ht_bin->vaddr[0] == vaddr) - ht_bin->tcaddr[0] = entry_point; - if (ht_bin->vaddr[1] == vaddr) - ht_bin->tcaddr[1] = entry_point; - } - } + assem_debug("%p (%d) <- %8x\n", instr_addr[i], i, start + i*4); + u_int vaddr = start + i*4; + + literal_pool(256); + void *entry = out; + load_regs_entry(i); + if (entry == out) + entry = instr_addr[i]; + else + emit_jmp(instr_addr[i]); + + block->jump_in[jump_in_i].vaddr = vaddr; + block->jump_in[jump_in_i].addr = entry; + jump_in_i++; } } + assert(jump_in_i == jump_in_count); + hash_table_add(block->jump_in[0].vaddr, block->jump_in[0].addr); // Write out the literal pool if necessary literal_pool(0); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -9134,7 +9231,7 @@ int new_recompile_block(u_int addr) out = ndrc->translation_cache; // Trap writes to any of the pages we compiled - mark_valid_code(start, slen*4); + mark_invalid_code(start, slen*4, 0); /* Pass 10 - Free memory by expiring oldest blocks */ diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index c152c45d0..e32846543 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -28,5 +28,5 @@ int new_dynarec_save_blocks(void *save, int size); void new_dynarec_load_blocks(const void *save, int size); void new_dynarec_print_stats(void); -void invalidate_all_pages(void); -void invalidate_block(unsigned int block); +void new_dynarec_invalidate_range(unsigned int start, unsigned int end); +void new_dynarec_invalidate_all_pages(void); From 882a08fc49541450bc403b2e920e4bccc257dfdf Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 23 Feb 2022 00:26:51 +0200 Subject: [PATCH 136/597] drc: more precise invalidation checking for the invstub case --- libpcsxcore/new_dynarec/assem_arm.c | 6 ++++++ libpcsxcore/new_dynarec/assem_arm64.c | 6 ++++++ libpcsxcore/new_dynarec/new_dynarec.c | 24 ++++++++++++++++++------ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 005c280c3..6570f1e80 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -899,6 +899,12 @@ static void emit_cmp(int rs,int rt) output_w32(0xe1500000|rd_rn_rm(0,rs,rt)); } +static void emit_cmpcs(int rs,int rt) +{ + assem_debug("cmpcs %s,%s\n",regname[rs],regname[rt]); + output_w32(0x21500000|rd_rn_rm(0,rs,rt)); +} + static void emit_set_gz32(int rs, int rt) { //assem_debug("set_gz32\n"); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 3a88f9efc..4eda43a9b 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -838,6 +838,12 @@ static void emit_cmp(u_int rs,u_int rt) output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR)); } +static void emit_cmpcs(u_int rs,u_int rt) +{ + assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]); + output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0)); +} + static void emit_set_gz32(u_int rs, u_int rt) { //assem_debug("set_gz32\n"); diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index a84e33e50..4ff20c490 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -77,6 +77,10 @@ #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 +#if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) +#define INVALIDATE_USE_COND_CALL +#endif + #ifdef VITA // apparently Vita has a 16MB limit, so either we cut tc in half, // or use this hack (it's a hack because tc size was designed to be power-of-2) @@ -603,7 +607,7 @@ static void mark_invalid_code(u_int vaddr, u_int len, char invalid) invalid_code[(i|j|0xa0000000u) >> 12] = invalid; } } - if (!invalid) + if (!invalid && vaddr + len > inv_code_start && vaddr <= inv_code_end) inv_code_start = inv_code_end = ~0; } @@ -1475,11 +1479,19 @@ void new_dynarec_invalidate_all_pages(void) static void do_invstub(int n) { literal_pool(20); - u_int reglist=stubs[n].a; + u_int reglist = stubs[n].a; set_jump_target(stubs[n].addr, out); save_regs(reglist); - if(stubs[n].b!=0) emit_mov(stubs[n].b,0); + if (stubs[n].b != 0) + emit_mov(stubs[n].b, 0); + emit_readword(&inv_code_start, 1); + emit_readword(&inv_code_end, 2); + emit_cmp(0, 1); + emit_cmpcs(2, 0); + void *jaddr = out; + emit_jc(0); emit_far_call(ndrc_invalidate_addr); + set_jump_target(jaddr, out); restore_regs(reglist); emit_jmp(stubs[n].retaddr); // return address } @@ -3168,7 +3180,7 @@ static void store_assemble(int i, const struct regstat *i_regs, int ccadj_) #else emit_cmpmem_indexedsr12_imm(invalid_code,addr,1); #endif - #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) + #ifdef INVALIDATE_USE_COND_CALL emit_callne(invalidate_addr_reg[addr]); #else void *jaddr2 = out; @@ -3325,7 +3337,7 @@ static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) #else emit_cmpmem_indexedsr12_imm(invalid_code,temp,1); #endif - #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) + #ifdef INVALIDATE_USE_COND_CALL emit_callne(invalidate_addr_reg[temp]); #else void *jaddr2 = out; @@ -3860,7 +3872,7 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) #else emit_cmpmem_indexedsr12_imm(invalid_code,ar,1); #endif - #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) + #ifdef INVALIDATE_USE_COND_CALL emit_callne(invalidate_addr_reg[ar]); #else void *jaddr3 = out; From 93c0345be944a8f53a06433c3c59cfa9c23cd16b Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 24 Feb 2022 01:08:35 +0200 Subject: [PATCH 137/597] drc: rework expire code Should be quite a bit simpler now, and no longer wastes ~4MB of translation cache for aggressive block expiring. However more stuff in cache means longer lists which may slow things down if recompilation is happening nonstop, so further tuning might be needed. --- libpcsxcore/new_dynarec/new_dynarec.c | 190 +++++++++++--------------- 1 file changed, 78 insertions(+), 112 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 4ff20c490..66e3dd67c 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -76,6 +76,8 @@ #define RAM_SIZE 0x200000 #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 +#define EXPIRITY_OFFSET (MAX_OUTPUT_BLOCK_SIZE * 2) +#define PAGE_COUNT 1024 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) #define INVALIDATE_USE_COND_CALL @@ -214,8 +216,8 @@ static struct decoded_insn static u_char *out; static struct ht_entry hash_table[65536]; - static struct block_info *blocks[4096]; - static struct ll_entry *jump_out[4096]; + static struct block_info *blocks[PAGE_COUNT]; + static struct ll_entry *jump_out[PAGE_COUNT]; static u_int start; static u_int *source; static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs @@ -251,7 +253,7 @@ static struct decoded_insn static int is_delayslot; static char shadow[1048576] __attribute__((aligned(16))); static void *copy; - static int expirep; + static u_int expirep; static u_int stop_after_jal; static u_int f1_hack; #ifdef STAT_PRINT @@ -500,13 +502,13 @@ static void do_clear_cache(void) for (j = 0; j < 32; j++) { u_char *start, *end; - if (!(bitmap & (1<translation_cache + i*131072 + j*4096; end = start + 4095; for (j++; j < 32; j++) { - if (!(bitmap & (1<> 12; - if(page>2048) page=2048+(page&2047); + if (page >= PAGE_COUNT / 2) + page = PAGE_COUNT / 2 + (page & (PAGE_COUNT / 2 - 1)); return page; } @@ -611,11 +614,10 @@ static void mark_invalid_code(u_int vaddr, u_int len, char invalid) inv_code_start = inv_code_end = ~0; } -// some messy ari64's code, seems to rely on unsigned 32bit overflow -static int doesnt_expire_soon(void *tcaddr) +static int doesnt_expire_soon(u_char *tcaddr) { - u_int diff = (u_int)((u_char *)tcaddr - out) << (32-TARGET_SIZE_2); - return diff > (u_int)(0x60000000 + (MAX_OUTPUT_BLOCK_SIZE << (32-TARGET_SIZE_2))); + u_int diff = (u_int)(tcaddr - out) & ((1u << TARGET_SIZE_2) - 1u); + return diff > EXPIRITY_OFFSET + MAX_OUTPUT_BLOCK_SIZE; } static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) @@ -1198,7 +1200,7 @@ static void *check_addr(u_int vaddr) size_t i; for (i = 0; i < ARRAY_SIZE(ht_bin->vaddr); i++) { if (ht_bin->vaddr[i] == vaddr) - if (doesnt_expire_soon((u_char *)ht_bin->tcaddr[i] - MAX_OUTPUT_BLOCK_SIZE)) + if (doesnt_expire_soon(ht_bin->tcaddr[i])) return ht_bin->tcaddr[i]; } @@ -1249,25 +1251,20 @@ static void *check_addr(u_int vaddr) return NULL; } -static void ll_remove_matching_addrs(struct ll_entry **head, - uintptr_t base_offs_s, int shift) +static void ll_remove_matching_addrs(struct ll_entry **head, u_int base_offs, int shift) { struct ll_entry *next; - while(*head) { - uintptr_t o1 = (u_char *)(*head)->addr - ndrc->translation_cache; - uintptr_t o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; - if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) - { - inv_debug("EXP: rm pointer to %08x (%p)\n", (*head)->vaddr, (*head)->addr); - hash_table_remove((*head)->vaddr); - next=(*head)->next; + while (*head) { + u_int tc_offs = (u_char *)((*head)->addr) - ndrc->translation_cache; + if (((tc_offs ^ base_offs) >> shift) == 0) { + inv_debug("EXP: rm link from tc_offs %x)\n", tc_offs); + next = (*head)->next; free(*head); - *head=next; - stat_dec(stat_links); + *head = next; } else { - head=&((*head)->next); + head = &((*head)->next); } } } @@ -1287,28 +1284,6 @@ static void ll_clear(struct ll_entry **head) } } -#if 0 -// Dereference the pointers and remove if it matches -static void ll_kill_pointers(struct ll_entry *head, - uintptr_t base_offs_s, int shift) -{ - while(head) { - u_char *ptr = get_pointer(head->addr); - uintptr_t o1 = ptr - ndrc->translation_cache; - uintptr_t o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; - inv_debug("EXP: Lookup pointer to %p at %p (%x)\n",ptr,head->addr,head->vaddr); - if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) - { - inv_debug("EXP: Kill pointer at %p (%x)\n",head->addr,head->vaddr); - void *host_addr=find_extjump_insn(head->addr); - mark_clear_cache(host_addr); - set_jump_target(host_addr, head->addr); - } - head=head->next; - } -} -#endif - static void blocks_clear(struct block_info **head) { struct block_info *cur, *next; @@ -1323,27 +1298,27 @@ static void blocks_clear(struct block_info **head) } } -static void blocks_remove_matching_addrs(struct block_info **head, - uintptr_t base_offs_s, int shift) +static int blocks_remove_matching_addrs(struct block_info **head, + u_int base_offs, int shift) { struct block_info *next; + int hit = 0; while (*head) { - u_int o1 = (*head)->tc_offs; - u_int o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; - if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) - { - inv_debug("EXP: rm block %08x (tc_offs %u)\n", (*head)->start, o1); + if ((((*head)->tc_offs ^ base_offs) >> shift) == 0) { + inv_debug("EXP: rm block %08x (tc_offs %zx)\n", (*head)->start, (*head)->tc_offs); invalidate_block(*head); next = (*head)->next; free(*head); *head = next; stat_dec(stat_blocks); + hit = 1; } else { head = &((*head)->next); } } + return hit; } // This is called when we write to a compiled block (see do_invstub) @@ -1358,7 +1333,8 @@ static void unlink_jumps_range(u_int start, u_int end) head = &((*head)->next); continue; } - inv_debug("INV: rm pointer to %08x (%p)\n", (*head)->vaddr, (*head)->addr); + inv_debug("INV: rm link to %08x (tc_offs %zx)\n", + (*head)->vaddr, (u_char *)((*head)->addr) - ndrc->translation_cache); void *host_addr = find_extjump_insn((*head)->addr); mark_clear_cache(host_addr); set_jump_target(host_addr, (*head)->addr); // point back to dyna_linker stub @@ -1471,7 +1447,7 @@ void new_dynarec_invalidate_all_pages(void) } #ifdef USE_MINI_HT - memset(mini_ht,-1,sizeof(mini_ht)); + memset(mini_ht, -1, sizeof(mini_ht)); #endif do_clear_cache(); } @@ -6098,16 +6074,17 @@ void new_dynarec_clear_full(void) memset(mini_ht,-1,sizeof(mini_ht)); memset(shadow,0,sizeof(shadow)); copy=shadow; - expirep=16384; // Expiry pointer, +2 blocks + expirep = EXPIRITY_OFFSET; pending_exception=0; literalcount=0; stop_after_jal=0; inv_code_start=inv_code_end=~0; hack_addr=0; f1_hack=0; - // TLB - for(n=0;n<4096;n++) blocks_clear(&blocks[n]); - for(n=0;n<4096;n++) ll_clear(jump_out+n); + for (n = 0; n < ARRAY_SIZE(blocks); n++) + blocks_clear(&blocks[n]); + for (n = 0; n < ARRAY_SIZE(jump_out); n++) + ll_clear(&jump_out[n]); stat_clear(stat_blocks); stat_clear(stat_links); @@ -6187,8 +6164,10 @@ void new_dynarec_cleanup(void) SysPrintf("munmap() failed\n"); #endif #endif - for(n=0;n<4096;n++) blocks_clear(&blocks[n]); - for(n=0;n<4096;n++) ll_clear(jump_out+n); + for (n = 0; n < ARRAY_SIZE(blocks); n++) + blocks_clear(&blocks[n]); + for (n = 0; n < ARRAY_SIZE(jump_out); n++) + ll_clear(&jump_out[n]); stat_clear(stat_blocks); stat_clear(stat_links); #ifdef ROM_COPY @@ -8760,58 +8739,34 @@ static noinline void pass6_clean_registers(int istart, int iend, int wr) static noinline void pass10_expire_blocks(void) { - int i, end; - end = (((out-ndrc->translation_cache)>>(TARGET_SIZE_2-16)) + 16384) & 65535; - while (expirep != end) + u_int step = MAX_OUTPUT_BLOCK_SIZE / PAGE_COUNT / 2; + // not sizeof(ndrc->translation_cache) due to vita hack + u_int step_mask = ((1u << TARGET_SIZE_2) - 1u) & ~(step - 1u); + u_int end = (out - ndrc->translation_cache + EXPIRITY_OFFSET) & step_mask; + u_int base_shift = __builtin_ctz(MAX_OUTPUT_BLOCK_SIZE); + int hit; + + for (; expirep != end; expirep = ((expirep + step) & step_mask)) { - int shift=TARGET_SIZE_2-3; // Divide into 8 blocks - uintptr_t base_offs = ((uintptr_t)(expirep >> 13) << shift); // Base offset of this block - uintptr_t base_offs_s = base_offs >> shift; - if (!(expirep & ((1 << 13) - 1))) - inv_debug("EXP: base_offs %x\n", base_offs); - switch((expirep>>11)&3) - { - case 0: - // Clear blocks - blocks_remove_matching_addrs(&blocks[expirep & 2047], base_offs_s, shift); - blocks_remove_matching_addrs(&blocks[2048 + (expirep & 2047)], base_offs_s, shift); - break; - case 1: - // Clear pointers - //ll_kill_pointers(jump_out[expirep&2047],base_offs_s,shift); - //ll_kill_pointers(jump_out[(expirep&2047)+2048],base_offs_s,shift); - break; - case 2: - // Clear hash table - for(i=0;i<32;i++) { - struct ht_entry *ht_bin = &hash_table[((expirep&2047)<<5)+i]; - uintptr_t o1 = (u_char *)ht_bin->tcaddr[1] - ndrc->translation_cache; - uintptr_t o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; - if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) { - inv_debug("EXP: Remove hash %x -> %p\n",ht_bin->vaddr[1],ht_bin->tcaddr[1]); - ht_bin->vaddr[1] = -1; - ht_bin->tcaddr[1] = NULL; - } - o1 = (u_char *)ht_bin->tcaddr[0] - ndrc->translation_cache; - o2 = o1 - MAX_OUTPUT_BLOCK_SIZE; - if ((o1 >> shift) == base_offs_s || (o2 >> shift) == base_offs_s) { - inv_debug("EXP: Remove hash %x -> %p\n",ht_bin->vaddr[0],ht_bin->tcaddr[0]); - ht_bin->vaddr[0] = ht_bin->vaddr[1]; - ht_bin->tcaddr[0] = ht_bin->tcaddr[1]; - ht_bin->vaddr[1] = -1; - ht_bin->tcaddr[1] = NULL; - } - } - break; - case 3: - // Clear jump_out - if((expirep&2047)==0) - do_clear_cache(); - ll_remove_matching_addrs(jump_out+(expirep&2047),base_offs_s,shift); - ll_remove_matching_addrs(jump_out+2048+(expirep&2047),base_offs_s,shift); - break; + u_int base_offs = expirep & ~(MAX_OUTPUT_BLOCK_SIZE - 1); + u_int block_i = expirep / step & (PAGE_COUNT - 1); + u_int phase = (expirep >> (base_shift - 1)) & 1u; + if (!(expirep & (MAX_OUTPUT_BLOCK_SIZE / 2 - 1))) { + inv_debug("EXP: base_offs %x/%x phase %u\n", base_offs, + out - ndrc->translation_cache phase); + } + + if (!phase) { + hit = blocks_remove_matching_addrs(&blocks[block_i], base_offs, base_shift); + if (hit) { + do_clear_cache(); + #ifdef USE_MINI_HT + memset(mini_ht, -1, sizeof(mini_ht)); + #endif + } } - expirep=(expirep+1)&65535; + else + ll_remove_matching_addrs(&jump_out[block_i], base_offs, base_shift); } } @@ -8835,7 +8790,7 @@ static struct block_info *new_block_info(u_int start, u_int len, block->is_dirty = 0; block->jump_in_cnt = jump_in_count; - // insert sorted by start vaddr + // insert sorted by start mirror-unmasked vaddr for (b_pptr = &blocks[page]; ; b_pptr = &((*b_pptr)->next)) { if (*b_pptr == NULL || (*b_pptr)->start >= start) { block->next = *b_pptr; @@ -9148,6 +9103,17 @@ static int new_recompile_block(u_int addr) if (instr_addr0_override) instr_addr[0] = instr_addr0_override; +#if 0 + /* check for improper expiration */ + for (i = 0; i < ARRAY_SIZE(jumps); i++) { + int j; + if (!jumps[i]) + continue; + for (j = 0; j < jumps[i]->count; j++) + assert(jumps[i]->e[j].stub < beginning || (u_char *)jumps[i]->e[j].stub > out); + } +#endif + /* Pass 9 - Linker */ for(i=0;i Date: Thu, 24 Feb 2022 23:28:11 +0200 Subject: [PATCH 138/597] drc: rework jump_out lists should waste less memory for malloc overheads --- libpcsxcore/new_dynarec/new_dynarec.c | 161 +++++++++++++------------- 1 file changed, 83 insertions(+), 78 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 66e3dd67c..d369b815a 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -144,14 +144,6 @@ struct regstat u_int waswritten; // MIPS regs that were used as store base before }; -// note: asm depends on this layout -struct ll_entry -{ - u_int vaddr; - void *addr; - struct ll_entry *next; -}; - struct ht_entry { u_int vaddr[2]; @@ -195,6 +187,16 @@ struct block_info } jump_in[0]; }; +struct jump_info +{ + int alloc; + int count; + struct { + u_int target_vaddr; + void *stub; + } e[0]; +}; + static struct decoded_insn { u_char itype; @@ -217,7 +219,7 @@ static struct decoded_insn static u_char *out; static struct ht_entry hash_table[65536]; static struct block_info *blocks[PAGE_COUNT]; - static struct ll_entry *jump_out[PAGE_COUNT]; + static struct jump_info *jumps[PAGE_COUNT]; static u_int start; static u_int *source; static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs @@ -1180,18 +1182,6 @@ static void emit_far_call(const void *f) emit_call(f); } -// Add virtual address mapping to linked list -static void ll_add(struct ll_entry **head,int vaddr,void *addr) -{ - struct ll_entry *new_entry; - new_entry=malloc(sizeof(struct ll_entry)); - assert(new_entry!=NULL); - new_entry->vaddr=vaddr; - new_entry->addr=addr; - new_entry->next=*head; - *head=new_entry; -} - // Check if an address is already compiled // but don't return addresses which are about to expire from the cache static void *check_addr(u_int vaddr) @@ -1251,39 +1241,6 @@ static void *check_addr(u_int vaddr) return NULL; } -static void ll_remove_matching_addrs(struct ll_entry **head, u_int base_offs, int shift) -{ - struct ll_entry *next; - while (*head) { - u_int tc_offs = (u_char *)((*head)->addr) - ndrc->translation_cache; - if (((tc_offs ^ base_offs) >> shift) == 0) { - inv_debug("EXP: rm link from tc_offs %x)\n", tc_offs); - next = (*head)->next; - free(*head); - *head = next; - } - else - { - head = &((*head)->next); - } - } -} - -// Remove all entries from linked list -static void ll_clear(struct ll_entry **head) -{ - struct ll_entry *cur; - struct ll_entry *next; - if((cur=*head)) { - *head=0; - while(cur) { - next=cur->next; - free(cur); - cur=next; - } - } -} - static void blocks_clear(struct block_info **head) { struct block_info *cur, *next; @@ -1322,28 +1279,58 @@ static int blocks_remove_matching_addrs(struct block_info **head, } // This is called when we write to a compiled block (see do_invstub) -static void unlink_jumps_range(u_int start, u_int end) +static void unlink_jumps_vaddr_range(u_int start, u_int end) { u_int page, start_page = get_page(start), end_page = get_page(end - 1); - struct ll_entry **head, *next; + int i; for (page = start_page; page <= end_page; page++) { - for (head = &jump_out[page]; *head; ) { - if ((*head)->vaddr < start || (*head)->vaddr >= end) { - head = &((*head)->next); + struct jump_info *ji = jumps[page]; + if (ji == NULL) + continue; + for (i = 0; i < ji->count; ) { + if (ji->e[i].target_vaddr < start || ji->e[i].target_vaddr >= end) { + i++; continue; } - inv_debug("INV: rm link to %08x (tc_offs %zx)\n", - (*head)->vaddr, (u_char *)((*head)->addr) - ndrc->translation_cache); - void *host_addr = find_extjump_insn((*head)->addr); + + inv_debug("INV: rm link to %08x (tc_offs %zx)\n", ji->e[i].target_vaddr, + (u_char *)ji->e[i].stub - ndrc->translation_cache); + void *host_addr = find_extjump_insn(ji->e[i].stub); mark_clear_cache(host_addr); - set_jump_target(host_addr, (*head)->addr); // point back to dyna_linker stub + set_jump_target(host_addr, ji->e[i].stub); // point back to dyna_linker stub - next = (*head)->next; - free(*head); - *head = next; stat_dec(stat_links); + ji->count--; + if (i < ji->count) { + ji->e[i] = ji->e[ji->count]; + continue; + } + i++; + } + } +} + +static void unlink_jumps_tc_range(struct jump_info *ji, u_int base_offs, int shift) +{ + int i; + if (ji == NULL) + return; + for (i = 0; i < ji->count; ) { + u_int tc_offs = (u_char *)ji->e[i].stub - ndrc->translation_cache; + if (((tc_offs ^ base_offs) >> shift) != 0) { + i++; + continue; + } + + inv_debug("EXP: rm link to %08x (tc_offs %zx)\n", ji->e[i].target_vaddr, tc_offs); + stat_dec(stat_links); + ji->count--; + if (i < ji->count) { + ji->e[i] = ji->e[ji->count]; + continue; } + i++; } } @@ -1352,7 +1339,7 @@ static void invalidate_block(struct block_info *block) u_int i; block->is_dirty = 1; - unlink_jumps_range(block->start, block->start + block->len); + unlink_jumps_vaddr_range(block->start, block->start + block->len); for (i = 0; i < block->jump_in_cnt; i++) hash_table_remove(block->jump_in[i].vaddr); } @@ -1474,14 +1461,28 @@ static void do_invstub(int n) // Add an entry to jump_out after making a link // src should point to code by emit_extjump() -void ndrc_add_jump_out(u_int vaddr,void *src) +void ndrc_add_jump_out(u_int vaddr, void *src) { - u_int page=get_page(vaddr); - inv_debug("ndrc_add_jump_out: %p -> %x (%d)\n",src,vaddr,page); - check_extjump2(src); - ll_add(jump_out+page,vaddr,src); - //inv_debug("ndrc_add_jump_out: to %p\n",get_pointer(src)); + inv_debug("ndrc_add_jump_out: %p -> %x\n", src, vaddr); + u_int page = get_page(vaddr); + struct jump_info *ji; + stat_inc(stat_links); + check_extjump2(src); + ji = jumps[page]; + if (ji == NULL) { + ji = malloc(sizeof(*ji) + sizeof(ji->e[0]) * 16); + ji->alloc = 16; + ji->count = 0; + } + else if (ji->count >= ji->alloc) { + ji->alloc += 16; + ji = realloc(ji, sizeof(*ji) + sizeof(ji->e[0]) * ji->alloc); + } + jumps[page] = ji; + ji->e[ji->count].target_vaddr = vaddr; + ji->e[ji->count].stub = src; + ji->count++; } /* Register allocation */ @@ -6083,8 +6084,10 @@ void new_dynarec_clear_full(void) f1_hack=0; for (n = 0; n < ARRAY_SIZE(blocks); n++) blocks_clear(&blocks[n]); - for (n = 0; n < ARRAY_SIZE(jump_out); n++) - ll_clear(&jump_out[n]); + for (n = 0; n < ARRAY_SIZE(jumps); n++) { + free(jumps[n]); + jumps[n] = NULL; + } stat_clear(stat_blocks); stat_clear(stat_links); @@ -6166,8 +6169,10 @@ void new_dynarec_cleanup(void) #endif for (n = 0; n < ARRAY_SIZE(blocks); n++) blocks_clear(&blocks[n]); - for (n = 0; n < ARRAY_SIZE(jump_out); n++) - ll_clear(&jump_out[n]); + for (n = 0; n < ARRAY_SIZE(jumps); n++) { + free(jumps[n]); + jumps[n] = NULL; + } stat_clear(stat_blocks); stat_clear(stat_links); #ifdef ROM_COPY @@ -8766,7 +8771,7 @@ static noinline void pass10_expire_blocks(void) } } else - ll_remove_matching_addrs(&jump_out[block_i], base_offs, base_shift); + unlink_jumps_tc_range(jumps[block_i], base_offs, base_shift); } } From 3280e6168d3645c849778c0dad68719c2f69c667 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 27 Feb 2022 00:10:57 +0200 Subject: [PATCH 139/597] drc: try to avoid some block trapping --- libpcsxcore/new_dynarec/new_dynarec.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index d369b815a..760d42739 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -179,7 +179,8 @@ struct block_info u_int tc_offs; //u_int tc_len; u_int reg_sv_flags; - u_short is_dirty; + u_char is_dirty; + u_char inv_near_misses; u_short jump_in_cnt; struct { u_int vaddr; @@ -645,7 +646,7 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) if (memcmp(block->source, block->copy, block->len)) continue; - block->is_dirty = 0; + block->is_dirty = block->inv_near_misses = 0; found_clean = block->jump_in[i].addr; hash_table_add(vaddr, found_clean); mark_invalid_code(block->start, block->len, 0); @@ -1347,6 +1348,7 @@ static void invalidate_block(struct block_info *block) static int invalidate_range(u_int start, u_int end, u32 *inv_start_ret, u32 *inv_end_ret) { + struct block_info *last_block = NULL; u_int start_page = get_page_prev(start); u_int end_page = get_page(end - 1); u_int start_m = pmmask(start); @@ -1366,6 +1368,7 @@ static int invalidate_range(u_int start, u_int end, for (block = blocks[page]; block != NULL; block = block->next) { if (block->is_dirty) continue; + last_block = block; blk_end_m = pmmask(block->start + block->len); if (blk_end_m <= start_m) { inv_start = max(inv_start, blk_end_m); @@ -1385,12 +1388,22 @@ static int invalidate_range(u_int start, u_int end, } } + if (!hit && last_block && last_block->source) { + // could be some leftover unused block, uselessly trapping writes + last_block->inv_near_misses++; + if (last_block->inv_near_misses > 128) { + invalidate_block(last_block); + stat_inc(stat_inv_hits); + hit++; + } + } if (hit) { do_clear_cache(); #ifdef USE_MINI_HT memset(mini_ht, -1, sizeof(mini_ht)); #endif } + if (inv_start <= (start_m & ~0xfff) && inv_end >= (start_m | 0xfff)) // the whole page is empty now mark_invalid_code(start, 1, 1); @@ -8793,6 +8806,7 @@ static struct block_info *new_block_info(u_int start, u_int len, block->tc_offs = beginning - ndrc->translation_cache; //block->tc_len = out - beginning; block->is_dirty = 0; + block->inv_near_misses = 0; block->jump_in_cnt = jump_in_count; // insert sorted by start mirror-unmasked vaddr From c8e482ed450223234bb742cc9f30c54696498254 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 27 Feb 2022 01:54:39 +0200 Subject: [PATCH 140/597] unbreak nodynarec build oops --- libpcsxcore/new_dynarec/emu_if.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 7591093f5..a2c31f9a1 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -440,7 +440,7 @@ void new_dyna_start(void *context) {} void new_dynarec_cleanup() {} void new_dynarec_clear_full() {} void new_dynarec_invalidate_all_pages() {} -void new_dynarec_invalidate_range(unsigned int start, unsigned int end) { return 0; } +void new_dynarec_invalidate_range(unsigned int start, unsigned int end) {} void new_dyna_pcsx_mem_init(void) {} void new_dyna_pcsx_mem_reset(void) {} void new_dyna_pcsx_mem_load_state(void) {} From ab4377be5e2d461703aaba706f419f1bc466abd7 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 2 Mar 2022 00:49:11 +0200 Subject: [PATCH 141/597] drc: fix some wrong inv address calculations Fixes: 882a08fc49541450bc403b2e920e4bccc257dfdf notaz/pcsx_rearmed#243 --- libpcsxcore/new_dynarec/new_dynarec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 760d42739..45c3bff77 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -603,9 +603,9 @@ static void hash_table_remove(int vaddr) static void mark_invalid_code(u_int vaddr, u_int len, char invalid) { + u_int vaddr_m = vaddr & 0x1fffffff; u_int i, j; - vaddr &= 0x1fffffff; - for (i = vaddr & ~0xfff; i < vaddr + len; i += 0x1000) { + for (i = vaddr_m & ~0xfff; i < vaddr_m + len; i += 0x1000) { // ram mirrors, but should not hurt bios for (j = 0; j < 0x800000; j += 0x200000) { invalid_code[(i|j) >> 12] = @@ -1352,7 +1352,7 @@ static int invalidate_range(u_int start, u_int end, u_int start_page = get_page_prev(start); u_int end_page = get_page(end - 1); u_int start_m = pmmask(start); - u_int end_m = pmmask(end); + u_int end_m = pmmask(end - 1); u_int inv_start, inv_end; u_int blk_start_m, blk_end_m; u_int page; From 68e7d4092d16dd3fd8e47c1f97bf32ef4d08658c Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 13 Mar 2022 22:46:31 +0200 Subject: [PATCH 142/597] always enable chd support --- Makefile | 9 ++++++--- libchdr | 2 +- libpcsxcore/cdriso.c | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 7616cac55..fa5d279bc 100644 --- a/Makefile +++ b/Makefile @@ -149,7 +149,7 @@ endif # cdrcimg OBJS += plugins/cdrcimg/cdrcimg.o -ifeq "$(CHD_SUPPORT)" "1" +#ifeq "$(CHD_SUPPORT)" "1" OBJS += libchdr/src/libchdr_bitstream.o OBJS += libchdr/src/libchdr_cdrom.o OBJS += libchdr/src/libchdr_chd.o @@ -157,8 +157,11 @@ OBJS += libchdr/src/libchdr_flac.o OBJS += libchdr/src/libchdr_huffman.o OBJS += libchdr/deps/lzma-19.00/src/Alloc.o libchdr/deps/lzma-19.00/src/Bra86.o libchdr/deps/lzma-19.00/src/BraIA64.o libchdr/deps/lzma-19.00/src/CpuArch.o libchdr/deps/lzma-19.00/src/Delta.o OBJS += libchdr/deps/lzma-19.00/src/LzFind.o libchdr/deps/lzma-19.00/src/Lzma86Dec.o libchdr/deps/lzma-19.00/src/LzmaDec.o libchdr/deps/lzma-19.00/src/LzmaEnc.o libchdr/deps/lzma-19.00/src/Sort.o -CFLAGS += -DHAVE_CHD -D_7ZIP_ST -Ilibchdr/include/libchdr -Ilibchdr/include/dr_libs -Ilibchdr/include -Ilibchdr/deps/lzma-19.00/include -endif +CFLAGS += -DHAVE_CHD -Ilibchdr/include +libpcsxcore/cdriso.o: CFLAGS += -Wno-unused-function +libchdr/src/%.o: CFLAGS += -Wno-unused -Ilibchdr/deps/lzma-19.00/include +libchdr/deps/lzma-19.00/src/%.o: CFLAGS += -Wno-unused -D_7ZIP_ST -Ilibchdr/deps/lzma-19.00/include +#endif # dfinput OBJS += plugins/dfinput/main.o plugins/dfinput/pad.o plugins/dfinput/guncon.o diff --git a/libchdr b/libchdr index 15ff8d675..a03e69319 160000 --- a/libchdr +++ b/libchdr @@ -1 +1 @@ -Subproject commit 15ff8d67554f8651f4c971f4d42176214b96ce7b +Subproject commit a03e69319164f69d781ab8e453f8cf407387bd13 diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index e6247bbbe..fc29099e9 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -40,7 +40,7 @@ #include #ifdef HAVE_CHD -#include "chd.h" +#include "libchdr/chd.h" #endif #define OFF_T_MSB ((off_t)1 << (sizeof(off_t) * 8 - 1)) From 0e17864883bb852c5c91d8c04730d24ef512283f Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 13 Mar 2022 23:10:52 +0200 Subject: [PATCH 143/597] cdriso: merge some libretro changes no async read for now --- libpcsxcore/cdriso.c | 147 ++++++++++++++++++++++++++-------------- libpcsxcore/psxcommon.h | 1 + 2 files changed, 99 insertions(+), 49 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index fc29099e9..2dcaf49ba 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -83,16 +83,14 @@ static struct { } *compr_img; #ifdef HAVE_CHD -typedef struct { +static struct { unsigned char (*buffer)[CD_FRAMESIZE_RAW + SUB_FRAMESIZE]; chd_file* chd; const chd_header* header; unsigned int sectors_per_hunk; unsigned int current_hunk; unsigned int sector_in_hunk; -} CHD_IMG; - -static CHD_IMG *chd_img; +} *chd_img; #endif int (*cdimg_read_func)(FILE *f, unsigned int base, void *dest, int sector); @@ -161,11 +159,6 @@ static void tok2msf(char *time, char *msf) { } } -// stop the CDDA playback -static void stopCDDA() { - playing = FALSE; -} - // this function tries to get the .toc file of the given .bin // the necessary data is put into the ti (trackinformation)-array static int parsetoc(const char *isofile) { @@ -207,12 +200,13 @@ static int parsetoc(const char *isofile) { } } // check if it's really a TOC named as a .cue - fgets(linebuf, sizeof(linebuf), fi); + if (fgets(linebuf, sizeof(linebuf), fi) != NULL) { token = strtok(linebuf, " "); if (token && strncmp(token, "CD", 2) != 0 && strcmp(token, "CATALOG") != 0) { fclose(fi); return -1; } + } fseek(fi, 0, SEEK_SET); } @@ -332,6 +326,9 @@ static int parsecue(const char *isofile) { strncpy(cuename, isofile, sizeof(cuename)); cuename[MAXPATHLEN - 1] = '\0'; if (strlen(cuename) >= 4) { + // If 'isofile' is a '.cd' file, use it as a .cue file + // and don't try to search the additional .cue file + if (strncasecmp(cuename + strlen(cuename) - 4, ".cd", 3) != 0 ) strcpy(cuename + strlen(cuename) - 4, ".cue"); } else { @@ -464,9 +461,9 @@ static int parsecue(const char *isofile) { file_len = ftell(ti[numtracks + 1].handle) / 2352; if (numtracks == 0 && strlen(isofile) >= 4 && - strcmp(isofile + strlen(isofile) - 4, ".cue") == 0) - { - // user selected .cue as image file, use it's data track instead + (strcmp(isofile + strlen(isofile) - 4, ".cue") == 0 || + strncasecmp(isofile + strlen(isofile) - 4, ".cd", 3) == 0)) { + // user selected .cue/.cdX as image file, use it's data track instead fclose(cdHandle); cdHandle = fopen(filepath, "rb"); } @@ -475,6 +472,10 @@ static int parsecue(const char *isofile) { fclose(fi); + // if there are no tracks detected, then it's not a cue file + if (!numtracks) + return -1; + return 0; } @@ -564,7 +565,8 @@ static int parsemds(const char *isofile) { memset(&ti, 0, sizeof(ti)); // check if it's a valid mds file - fread(&i, 1, sizeof(unsigned int), fi); + if (fread(&i, 1, sizeof(i), fi) != sizeof(i)) + goto fail_io; i = SWAP32(i); if (i != 0x4944454D) { // not an valid mds file @@ -574,19 +576,22 @@ static int parsemds(const char *isofile) { // get offset to session block fseek(fi, 0x50, SEEK_SET); - fread(&offset, 1, sizeof(unsigned int), fi); + if (fread(&offset, 1, sizeof(offset), fi) != sizeof(offset)) + goto fail_io; offset = SWAP32(offset); // get total number of tracks offset += 14; fseek(fi, offset, SEEK_SET); - fread(&s, 1, sizeof(unsigned short), fi); + if (fread(&s, 1, sizeof(s), fi) != sizeof(s)) + goto fail_io; s = SWAP16(s); numtracks = s; // get offset to track blocks fseek(fi, 4, SEEK_CUR); - fread(&offset, 1, sizeof(unsigned int), fi); + if (fread(&offset, 1, sizeof(offset), fi) != sizeof(offset)) + goto fail_io; offset = SWAP32(offset); // skip lead-in data @@ -615,32 +620,41 @@ static int parsemds(const char *isofile) { ti[i].start[1] = fgetc(fi); ti[i].start[2] = fgetc(fi); - fread(&extra_offset, 1, sizeof(unsigned int), fi); + if (fread(&extra_offset, 1, sizeof(extra_offset), fi) != sizeof(extra_offset)) + goto fail_io; extra_offset = SWAP32(extra_offset); // get track start offset (in .mdf) fseek(fi, offset + 0x28, SEEK_SET); - fread(&l, 1, sizeof(unsigned int), fi); + if (fread(&l, 1, sizeof(l), fi) != sizeof(l)) + goto fail_io; l = SWAP32(l); ti[i].start_offset = l; // get pregap fseek(fi, extra_offset, SEEK_SET); - fread(&l, 1, sizeof(unsigned int), fi); + if (fread(&l, 1, sizeof(l), fi) != sizeof(l)) + goto fail_io; l = SWAP32(l); if (l != 0 && i > 1) pregapOffset = msf2sec(ti[i].start); // get the track length - fread(&l, 1, sizeof(unsigned int), fi); + if (fread(&l, 1, sizeof(l), fi) != sizeof(l)) + goto fail_io; l = SWAP32(l); sec2msf(l, ti[i].length); offset += 0x50; } - fclose(fi); return 0; +fail_io: +#ifndef NDEBUG + SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); +#endif + fclose(fi); + return -1; } static int handlepbp(const char *isofile) { @@ -691,7 +705,8 @@ static int handlepbp(const char *isofile) { } psisoimg_offs = pbp_hdr.psar_offs; - fread(psar_sig, 1, sizeof(psar_sig), cdHandle); + if (fread(psar_sig, 1, sizeof(psar_sig), cdHandle) != sizeof(psar_sig)) + goto fail_io; psar_sig[10] = 0; if (strcmp(psar_sig, "PSTITLEIMG") == 0) { // multidisk image? @@ -727,7 +742,8 @@ static int handlepbp(const char *isofile) { goto fail_io; } - fread(psar_sig, 1, sizeof(psar_sig), cdHandle); + if (fread(psar_sig, 1, sizeof(psar_sig), cdHandle) != sizeof(psar_sig)) + goto fail_io; psar_sig[10] = 0; } @@ -745,15 +761,18 @@ static int handlepbp(const char *isofile) { // first 3 entries are special fseek(cdHandle, sizeof(toc_entry), SEEK_CUR); - fread(&toc_entry, 1, sizeof(toc_entry), cdHandle); + if (fread(&toc_entry, 1, sizeof(toc_entry), cdHandle) != sizeof(toc_entry)) + goto fail_io; numtracks = btoi(toc_entry.index1[0]); - fread(&toc_entry, 1, sizeof(toc_entry), cdHandle); + if (fread(&toc_entry, 1, sizeof(toc_entry), cdHandle) != sizeof(toc_entry)) + goto fail_io; cd_length = btoi(toc_entry.index1[0]) * 60 * 75 + btoi(toc_entry.index1[1]) * 75 + btoi(toc_entry.index1[2]); for (i = 1; i <= numtracks; i++) { - fread(&toc_entry, 1, sizeof(toc_entry), cdHandle); + if (fread(&toc_entry, 1, sizeof(toc_entry), cdHandle) != sizeof(toc_entry)) + goto fail_io; ti[i].type = (toc_entry.type == 1) ? CDDA : DATA; @@ -811,7 +830,14 @@ static int handlepbp(const char *isofile) { fail_index: free(compr_img->index_table); compr_img->index_table = NULL; + goto done; + fail_io: +#ifndef NDEBUG + SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); +#endif + +done: if (compr_img != NULL) { free(compr_img); compr_img = NULL; @@ -908,16 +934,19 @@ static int handlechd(const char *isofile) { int frame_offset = 0; int file_offset = 0; - chd_img = (CHD_IMG *)calloc(1, sizeof(*chd_img)); + chd_img = calloc(1, sizeof(*chd_img)); if (chd_img == NULL) goto fail_io; if(chd_open(isofile, CHD_OPEN_READ, NULL, &chd_img->chd) != CHDERR_NONE) goto fail_io; + if (Config.CHD_Precache && (chd_precache(chd_img->chd) != CHDERR_NONE)) + goto fail_io; + chd_img->header = chd_get_header(chd_img->chd); - chd_img->buffer = (unsigned char (*)[CD_FRAMESIZE_RAW + SUB_FRAMESIZE])malloc(chd_img->header->hunkbytes); + chd_img->buffer = malloc(chd_img->header->hunkbytes); if (chd_img->buffer == NULL) goto fail_io; @@ -1004,12 +1033,17 @@ static int opensubfile(const char *isoname) { } static int opensbifile(const char *isoname) { - char sbiname[MAXPATHLEN]; + char sbiname[MAXPATHLEN], disknum[MAXPATHLEN] = "0"; int s; strncpy(sbiname, isoname, sizeof(sbiname)); sbiname[MAXPATHLEN - 1] = '\0'; if (strlen(sbiname) >= 4) { + if (cdrIsoMultidiskCount > 1) { + sprintf(disknum, "_%i.sbi", cdrIsoMultidiskSelect + 1); + strcpy(sbiname + strlen(sbiname) - 4, disknum); + } + else strcpy(sbiname + strlen(sbiname) - 4, ".sbi"); } else { @@ -1034,10 +1068,18 @@ static int cdread_sub_mixed(FILE *f, unsigned int base, void *dest, int sector) fseek(f, base + sector * (CD_FRAMESIZE_RAW + SUB_FRAMESIZE), SEEK_SET); ret = fread(dest, 1, CD_FRAMESIZE_RAW, f); - fread(subbuffer, 1, SUB_FRAMESIZE, f); + if (fread(subbuffer, 1, SUB_FRAMESIZE, f) != SUB_FRAMESIZE) + goto fail_io; if (subChanRaw) DecodeRawSubData(); + goto done; +fail_io: +#ifndef NDEBUG + SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); +#endif + +done: return ret; } @@ -1186,7 +1228,7 @@ static unsigned char * CALLBACK ISOgetBuffer_compr(void) { } #ifdef HAVE_CHD -static unsigned char *ISOgetBuffer_chd(void) { +static unsigned char * CALLBACK ISOgetBuffer_chd(void) { return chd_img->buffer[chd_img->sector_in_hunk] + 12; } #endif @@ -1212,6 +1254,7 @@ static long CALLBACK ISOopen(void) { boolean isMode1ISO = FALSE; char alt_bin_filename[MAXPATHLEN]; const char *bin_filename; + char image_str[1024] = {0}; if (cdHandle != NULL) { return 0; // it's already open @@ -1224,7 +1267,7 @@ static long CALLBACK ISOopen(void) { return -1; } - SysPrintf(_("Loaded CD Image: %s"), GetIsoFile()); + sprintf(image_str, "Loaded CD Image: %s", GetIsoFile()); cddaBigEndian = FALSE; subChanMixed = FALSE; @@ -1237,41 +1280,40 @@ static long CALLBACK ISOopen(void) { cdimg_read_func = cdread_normal; if (parsetoc(GetIsoFile()) == 0) { - SysPrintf("[+toc]"); + strcat(image_str, "[+toc]"); } else if (parseccd(GetIsoFile()) == 0) { - SysPrintf("[+ccd]"); + strcat(image_str, "[+ccd]"); } else if (parsemds(GetIsoFile()) == 0) { - SysPrintf("[+mds]"); + strcat(image_str, "[+mds]"); } else if (parsecue(GetIsoFile()) == 0) { - SysPrintf("[+cue]"); + strcat(image_str, "[+cue]"); } if (handlepbp(GetIsoFile()) == 0) { - SysPrintf("[pbp]"); + strcat(image_str, "[+pbp]"); CDR_getBuffer = ISOgetBuffer_compr; cdimg_read_func = cdread_compressed; } else if (handlecbin(GetIsoFile()) == 0) { - SysPrintf("[cbin]"); + strcat(image_str, "[+cbin]"); CDR_getBuffer = ISOgetBuffer_compr; cdimg_read_func = cdread_compressed; } - #ifdef HAVE_CHD else if (handlechd(GetIsoFile()) == 0) { - printf("[chd]"); + strcat(image_str, "[+chd]"); CDR_getBuffer = ISOgetBuffer_chd; cdimg_read_func = cdread_chd; } #endif if (!subChanMixed && opensubfile(GetIsoFile()) == 0) { - SysPrintf("[+sub]"); + strcat(image_str, "[+sub]"); } if (opensbifile(GetIsoFile()) == 0) { - SysPrintf("[+sbi]"); + strcat(image_str, "[+sbi]"); } fseeko(cdHandle, 0, SEEK_END); @@ -1307,15 +1349,20 @@ static long CALLBACK ISOopen(void) { if (ftello(cdHandle) % 2048 == 0) { unsigned int modeTest = 0; fseek(cdHandle, 0, SEEK_SET); - fread(&modeTest, 4, 1, cdHandle); + if (!fread(&modeTest, sizeof(modeTest), 1, cdHandle)) { +#ifndef NDEBUG + SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); +#endif + return -1; + } if (SWAP32(modeTest) != 0xffffff00) { - SysPrintf("[2048]"); + strcat(image_str, "[2048]"); isMode1ISO = TRUE; } } fseek(cdHandle, 0, SEEK_SET); - SysPrintf(".\n"); + SysPrintf("%s.\n", image_str); PrintTracks(); @@ -1343,7 +1390,7 @@ static long CALLBACK ISOclose(void) { fclose(subHandle); subHandle = NULL; } - stopCDDA(); + playing = FALSE; cddaHandle = NULL; if (compr_img != NULL) { @@ -1477,7 +1524,9 @@ static boolean CALLBACK ISOreadTrack(unsigned char *time) { if (subHandle != NULL) { fseek(subHandle, sector * SUB_FRAMESIZE, SEEK_SET); - fread(subbuffer, 1, SUB_FRAMESIZE, subHandle); + if (fread(subbuffer, 1, SUB_FRAMESIZE, subHandle) != SUB_FRAMESIZE) + /* Faulty subchannel data shouldn't cause a read failure */ + return 0; if (subChanRaw) DecodeRawSubData(); } @@ -1495,7 +1544,7 @@ static long CALLBACK ISOplay(unsigned char *time) { // stops cdda audio static long CALLBACK ISOstop(void) { - stopCDDA(); + playing = FALSE; return 0; } diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index 224caa541..c0c2c9fb2 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -129,6 +129,7 @@ typedef struct { boolean Mdec; boolean PsxAuto; boolean Cdda; + boolean CHD_Precache; /* loads disk image into memory, works with CHD only. */ boolean HLE; boolean Debug; boolean PsxOut; From acc6228440ecf235137e8f8af1e54d792cc83d84 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 13 Mar 2022 23:40:48 +0200 Subject: [PATCH 144/597] cdriso: handle chd subchannels, when available libretro/pcsx_rearmed#535 only raw mode tested --- libpcsxcore/cdriso.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index 2dcaf49ba..599e14011 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -980,8 +980,16 @@ static int handlechd(const char *isofile) { else break; - if(md.track == 1) + SysPrintf("chd: %s\n", meta); + + if (md.track == 1) { md.pregap = 150; + if (!strncmp(md.subtype, "RW", 2)) { + subChanMixed = TRUE; + if (!strcmp(md.subtype, "RW_RAW")) + subChanRaw = TRUE; + } + } else sec2msf(msf2sec(ti[md.track-1].length) + md.pregap, ti[md.track-1].length); @@ -1204,6 +1212,12 @@ static int cdread_chd(FILE *f, unsigned int base, void *dest, int sector) if (dest != cdbuffer) // copy avoid HACK memcpy(dest, chd_img->buffer[chd_img->sector_in_hunk], CD_FRAMESIZE_RAW); + if (subChanMixed) { + memcpy(subbuffer, chd_img->buffer[chd_img->sector_in_hunk] + CD_FRAMESIZE_RAW, + SUB_FRAMESIZE); + if (subChanRaw) + DecodeRawSubData(); + } return CD_FRAMESIZE_RAW; } #endif @@ -1254,7 +1268,8 @@ static long CALLBACK ISOopen(void) { boolean isMode1ISO = FALSE; char alt_bin_filename[MAXPATHLEN]; const char *bin_filename; - char image_str[1024] = {0}; + char image_str[1024]; + int is_chd = 0; if (cdHandle != NULL) { return 0; // it's already open @@ -1267,7 +1282,8 @@ static long CALLBACK ISOopen(void) { return -1; } - sprintf(image_str, "Loaded CD Image: %s", GetIsoFile()); + snprintf(image_str, sizeof(image_str) - 6*4 - 1, + "Loaded CD Image: %s", GetIsoFile()); cddaBigEndian = FALSE; subChanMixed = FALSE; @@ -1306,6 +1322,7 @@ static long CALLBACK ISOopen(void) { strcat(image_str, "[+chd]"); CDR_getBuffer = ISOgetBuffer_chd; cdimg_read_func = cdread_chd; + is_chd = 1; } #endif @@ -1366,7 +1383,7 @@ static long CALLBACK ISOopen(void) { PrintTracks(); - if (subChanMixed) + if (subChanMixed && !is_chd) cdimg_read_func = cdread_sub_mixed; else if (isMode1ISO) cdimg_read_func = cdread_2048; From ad3d298885bb97ff905f8fc130604f18089119d2 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 14 Mar 2022 01:24:50 +0200 Subject: [PATCH 145/597] frontend: omap: fix glitches on mode changes Where was I 10 years ago? Why am I so old? --- frontend/libpicofe | 2 +- frontend/plat_omap.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/frontend/libpicofe b/frontend/libpicofe index a8b4c53d7..33787db41 160000 --- a/frontend/libpicofe +++ b/frontend/libpicofe @@ -1 +1 @@ -Subproject commit a8b4c53d7795e4d448d88b0b8222549ede78622a +Subproject commit 33787db41d955f8dcafe833097f2cc87d70186ec diff --git a/frontend/plat_omap.c b/frontend/plat_omap.c index c4bff3131..a4ff846d6 100644 --- a/frontend/plat_omap.c +++ b/frontend/plat_omap.c @@ -112,7 +112,7 @@ void *plat_gvideo_set_mode(int *w_in, int *h_in, int *bpp) } buf = vout_fbdev_resize(layer_fb, w, h, *bpp, - l, r, t, b, 3); + l, r, t, b, 3, 1); vout_fbdev_clear(layer_fb); @@ -134,9 +134,10 @@ void plat_gvideo_close(void) void plat_video_menu_enter(int is_rom_loaded) { g_menuscreen_ptr = vout_fbdev_resize(main_fb, - g_menuscreen_w, g_menuscreen_h, 16, 0, 0, 0, 0, 3); + g_menuscreen_w, g_menuscreen_h, 16, 0, 0, 0, 0, 3, 0); if (g_menuscreen_ptr == NULL) fprintf(stderr, "warning: vout_fbdev_resize failed\n"); + vout_fbdev_clear(main_fb); xenv_update(NULL, NULL, NULL, NULL); } @@ -154,11 +155,11 @@ void plat_video_menu_leave(void) { /* have to get rid of panning so that plugins that * use fb0 and don't ever pan can work. */ - vout_fbdev_clear(main_fb); g_menuscreen_ptr = vout_fbdev_resize(main_fb, - g_menuscreen_w, g_menuscreen_h, 16, 0, 0, 0, 0, 1); + g_menuscreen_w, g_menuscreen_h, 16, 0, 0, 0, 0, 1, 0); if (g_menuscreen_ptr == NULL) fprintf(stderr, "warning: vout_fbdev_resize failed\n"); + vout_fbdev_clear(main_fb); } void plat_minimize(void) From 623cf65eb525959ffe4852e046394b04eb207cb8 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 15 Mar 2022 00:04:27 +0200 Subject: [PATCH 146/597] release r23 --- readme.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/readme.txt b/readme.txt index 55e1feb89..5e3f3e620 100644 --- a/readme.txt +++ b/readme.txt @@ -113,6 +113,12 @@ the main menu where it is possible to enable/disable individual cheats. Changelog --------- +r23 (2022-03-14) +* many fixes from various contributors on github and from the libretro fork +* dynarec related slowdowns have been greatly reduced +* many dynarec bug fixes ++ added chd disk image support + r22 (2015-02-05) * general: fixed a race condition/crash in threaded SPU mode * pandora: C64x: fixed compatibility with newer c64_tools, enabled L2 cache From 55a695d912d793ba4b2f152fe795b302b76db162 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 2 Jun 2022 00:25:32 +0300 Subject: [PATCH 147/597] drc: update some logging and patches --- libpcsxcore/new_dynarec/emu_if.c | 6 +- libpcsxcore/new_dynarec/new_dynarec.c | 10 +-- libpcsxcore/new_dynarec/patches/trace_drc_chk | 63 +++++++++---------- libpcsxcore/new_dynarec/patches/trace_intr | 63 ++++++++++--------- 4 files changed, 69 insertions(+), 73 deletions(-) diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index a2c31f9a1..cc0bcdf1a 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -88,7 +88,8 @@ static void irq_test(void) void gen_interupt() { - evprintf(" +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt); + evprintf(" +ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle, + next_interupt, next_interupt - psxRegs.cycle); irq_test(); //psxBranchTest(); @@ -648,7 +649,8 @@ void do_insn_cmp(void) //if (psxRegs.cycle == 166172) breakme(); if (which_event >= 0 && event_cycles[which_event] != ev_cycles) { - printf("bad ev_cycles #%d: %08x %08x\n", which_event, event_cycles[which_event], ev_cycles); + printf("bad ev_cycles #%d: %u %u / %u\n", which_event, + event_cycles[which_event], ev_cycles, psxRegs.cycle); fatal = 1; } diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 45c3bff77..b160a4a07 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -2074,7 +2074,7 @@ static void cop0_alloc(struct regstat *current,int i) } else { - // TLBR/TLBWI/TLBWR/TLBP/ERET + // RFE assert(dops[i].opcode2==0x10); alloc_all(current,i); } @@ -5324,7 +5324,7 @@ static void rjump_assemble(int i, const struct regstat *i_regs) //assert(adj==0); emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), HOST_CCREG); add_stub(CC_STUB,out,NULL,0,i,-1,TAKEN,rs); - if(dops[i+1].itype==COP0&&(source[i+1]&0x3f)==0x10) + if(dops[i+1].itype==COP0 && dops[i+1].opcode2==0x10) // special case for RFE emit_jmp(0); else @@ -7026,9 +7026,9 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) // SYSCALL instruction (software interrupt) u=1; } - else if(dops[i].itype==COP0 && (source[i]&0x3f)==0x18) + else if(dops[i].itype==COP0 && dops[i].opcode2==0x10) { - // ERET instruction (return from interrupt) + // RFE u=1; } //u=1; // DEBUG @@ -8771,7 +8771,7 @@ static noinline void pass10_expire_blocks(void) u_int phase = (expirep >> (base_shift - 1)) & 1u; if (!(expirep & (MAX_OUTPUT_BLOCK_SIZE / 2 - 1))) { inv_debug("EXP: base_offs %x/%x phase %u\n", base_offs, - out - ndrc->translation_cache phase); + out - ndrc->translation_cache, phase); } if (!phase) { diff --git a/libpcsxcore/new_dynarec/patches/trace_drc_chk b/libpcsxcore/new_dynarec/patches/trace_drc_chk index e98a48e7f..414c22156 100644 --- a/libpcsxcore/new_dynarec/patches/trace_drc_chk +++ b/libpcsxcore/new_dynarec/patches/trace_drc_chk @@ -1,8 +1,8 @@ diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c -index f1005db..ebd1d4f 100644 +index b160a4a..0d91999 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c -@@ -235,7 +235,7 @@ static struct decoded_insn +@@ -285,7 +285,7 @@ static struct decoded_insn int new_dynarec_hacks_old; int new_dynarec_did_compile; @@ -11,25 +11,25 @@ index f1005db..ebd1d4f 100644 extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 extern int last_count; // last absolute target, often = next_interupt -@@ -471,6 +471,7 @@ int cycle_multiplier_old; +@@ -532,6 +532,7 @@ static int cycle_multiplier_active; static int CLOCK_ADJUST(int x) { + return x * 2; - int m = cycle_multiplier_override && cycle_multiplier == CYCLE_MULT_DEFAULT - ? cycle_multiplier_override : cycle_multiplier; - int s=(x>>31)|1; -@@ -522,6 +523,9 @@ static int doesnt_expire_soon(void *tcaddr) + int m = cycle_multiplier_active; + int s = (x >> 31) | 1; + return (x * m + s * 50) / 100; +@@ -662,6 +663,9 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) // This is called from the recompiled JR/JALR instructions - void noinline *get_addr(u_int vaddr) + static void noinline *get_addr(u_int vaddr, int can_compile) { +#ifdef DRC_DBG +printf("get_addr %08x, pc=%08x\n", vaddr, psxRegs.pc); +#endif - u_int page=get_page(vaddr); - u_int vpage=get_vpage(vaddr); - struct ll_entry *head; -@@ -6248,7 +6252,7 @@ void unneeded_registers(int istart,int iend,int r) + u_int start_page = get_page_prev(vaddr); + u_int i, page, end_page = get_page(vaddr); + void *found_clean = NULL; +@@ -7046,7 +7050,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) // R0 is always unneeded u|=1; // Save it @@ -38,24 +38,16 @@ index f1005db..ebd1d4f 100644 gte_unneeded[i]=gte_u; /* printf("ur (%d,%d) %x: ",istart,iend,start+i*4); -@@ -8794,6 +8798,7 @@ int new_recompile_block(u_int addr) - - // This allocates registers (if possible) one instruction prior - // to use, which can avoid a load-use penalty on certain CPUs. -+#if 0 +@@ -8236,6 +8240,7 @@ static noinline void pass5a_preallocate1(void) + static noinline void pass5b_preallocate2(void) + { + int i, hr; ++ return; for(i=0;i> 26; switch (tmp) { -@@ -499,13 +501,15 @@ static void doBranch(u32 tar) { +@@ -500,13 +502,15 @@ static void doBranch(u32 tar) { } break; } @@ -163,7 +163,7 @@ index f7898e9..1f125ed 100644 } /********************************************************* -@@ -615,12 +619,13 @@ void psxMULTU_stall() { +@@ -616,12 +620,13 @@ void psxMULTU_stall() { psxMULTU(); } @@ -179,7 +179,7 @@ index f7898e9..1f125ed 100644 void psxBGEZ() { RepZBranchi32(>=) } // Branch if Rs >= 0 void psxBGEZAL() { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link -@@ -702,7 +707,7 @@ void psxRFE() { +@@ -703,7 +708,7 @@ void psxRFE() { * Register branch logic * * Format: OP rs, rt, offset * *********************************************************/ @@ -188,15 +188,16 @@ index f7898e9..1f125ed 100644 void psxBEQ() { RepBranchi32(==) } // Branch if Rs == Rt void psxBNE() { RepBranchi32(!=) } // Branch if Rs != Rt -@@ -886,6 +891,7 @@ void MTC0(int reg, u32 val) { - case 12: // Status - psxRegs.CP0.r[12] = val; - psxTestSWInts(); -+ //psxBranchTest(); - break; +@@ -901,7 +907,7 @@ void MTC0(int reg, u32 val) { + } + } - case 13: // Cause -@@ -1027,6 +1033,23 @@ void intExecuteBlock() { +-void psxMTC0() { MTC0(_Rd_, _u32(_rRt_)); } ++void psxMTC0() { MTC0(_Rd_, _u32(_rRt_)); psxBranchTest(); } + void psxCTC0() { MTC0(_Rd_, _u32(_rRt_)); } + + /********************************************************* +@@ -1028,6 +1034,23 @@ void intExecuteBlock() { while (!branch2) execI(); } @@ -220,7 +221,7 @@ index f7898e9..1f125ed 100644 static void intClear(u32 Addr, u32 Size) { } -@@ -1049,7 +1072,7 @@ void intApplyConfig() { +@@ -1050,7 +1073,7 @@ void intApplyConfig() { assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall); assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall); @@ -229,7 +230,7 @@ index f7898e9..1f125ed 100644 psxBSC[18] = psxCOP2; psxBSC[50] = gteLWC2; psxBSC[58] = gteSWC2; -@@ -1091,9 +1114,10 @@ void execI() { +@@ -1092,9 +1115,10 @@ void execI() { if (Config.Debug) ProcessDebug(); psxRegs.pc += 4; @@ -242,10 +243,10 @@ index f7898e9..1f125ed 100644 R3000Acpu psxInt = { diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c -index 04aeec2..710a379 100644 +index 46cee0c..c814587 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c -@@ -217,11 +217,13 @@ void psxMemShutdown() { +@@ -218,11 +218,13 @@ void psxMemShutdown() { } static int writeok = 1; @@ -259,7 +260,7 @@ index 04aeec2..710a379 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -247,6 +249,7 @@ u16 psxMemRead16(u32 mem) { +@@ -248,6 +250,7 @@ u16 psxMemRead16(u32 mem) { char *p; u32 t; @@ -267,7 +268,7 @@ index 04aeec2..710a379 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -272,6 +275,7 @@ u32 psxMemRead32(u32 mem) { +@@ -273,6 +276,7 @@ u32 psxMemRead32(u32 mem) { char *p; u32 t; @@ -275,7 +276,7 @@ index 04aeec2..710a379 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -297,6 +301,7 @@ void psxMemWrite8(u32 mem, u8 value) { +@@ -298,6 +302,7 @@ void psxMemWrite8(u32 mem, u8 value) { char *p; u32 t; @@ -283,7 +284,7 @@ index 04aeec2..710a379 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -324,6 +329,7 @@ void psxMemWrite16(u32 mem, u16 value) { +@@ -325,6 +330,7 @@ void psxMemWrite16(u32 mem, u16 value) { char *p; u32 t; @@ -291,7 +292,7 @@ index 04aeec2..710a379 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -351,6 +357,7 @@ void psxMemWrite32(u32 mem, u32 value) { +@@ -352,6 +358,7 @@ void psxMemWrite32(u32 mem, u32 value) { char *p; u32 t; @@ -299,7 +300,7 @@ index 04aeec2..710a379 100644 // if ((mem&0x1fffff) == 0x71E18 || value == 0x48088800) SysPrintf("t2fix!!\n"); t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { -@@ -380,6 +387,8 @@ void psxMemWrite32(u32 mem, u32 value) { +@@ -381,6 +388,8 @@ void psxMemWrite32(u32 mem, u32 value) { } else { int i; From e78515041f52dad93f52e09195231d13b609487d Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 2 Jun 2022 23:22:19 +0300 Subject: [PATCH 148/597] psxcounters: try to support a dynarec with a very long timeslice The dynarec instead should probably not run for thousands of cycles doing no interrupt checks, but maybe this hack will be enough. libretro/pcsx_rearmed#658 --- libpcsxcore/psxcounters.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index ff0efbced..3342770e9 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -300,19 +300,19 @@ void psxRcntUpdate() cycle = psxRegs.cycle; // rcnt 0. - if( cycle - rcnts[0].cycleStart >= rcnts[0].cycle ) + while( cycle - rcnts[0].cycleStart >= rcnts[0].cycle ) { psxRcntReset( 0 ); } // rcnt 1. - if( cycle - rcnts[1].cycleStart >= rcnts[1].cycle ) + while( cycle - rcnts[1].cycleStart >= rcnts[1].cycle ) { psxRcntReset( 1 ); } // rcnt 2. - if( cycle - rcnts[2].cycleStart >= rcnts[2].cycle ) + while( cycle - rcnts[2].cycleStart >= rcnts[2].cycle ) { psxRcntReset( 2 ); } From 48aa3a5ac671d60663aa63d03abb189f55b18ea4 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 7 Jun 2022 00:45:57 +0300 Subject: [PATCH 149/597] cdrom: don't read and play simultaneously notaz/pcsx_rearmed#250 --- libpcsxcore/cdrom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 191a7373d..00e65a596 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -686,6 +686,7 @@ void cdrInterrupt() { ReadTrack(cdr.SetSectorPlay); cdr.TrackChanged = FALSE; + StopReading(); if (!Config.Cdda) CDR_play(cdr.SetSectorPlay); From 817d98eeb920176f2a4bd090482a76b986b036d6 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 7 Jun 2022 01:59:51 +0300 Subject: [PATCH 150/597] cdriso: hack .chd track handling to match .cue libretro/pcsx_rearmed#648 --- libpcsxcore/cdriso.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index 599e14011..9d74ef46a 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -931,7 +931,7 @@ static int handlecbin(const char *isofile) { #ifdef HAVE_CHD static int handlechd(const char *isofile) { - int frame_offset = 0; + int frame_offset = 150; int file_offset = 0; chd_img = calloc(1, sizeof(*chd_img)); @@ -983,25 +983,23 @@ static int handlechd(const char *isofile) { SysPrintf("chd: %s\n", meta); if (md.track == 1) { - md.pregap = 150; if (!strncmp(md.subtype, "RW", 2)) { subChanMixed = TRUE; if (!strcmp(md.subtype, "RW_RAW")) subChanRaw = TRUE; } } - else - sec2msf(msf2sec(ti[md.track-1].length) + md.pregap, ti[md.track-1].length); ti[md.track].type = !strncmp(md.type, "AUDIO", 5) ? CDDA : DATA; sec2msf(frame_offset + md.pregap, ti[md.track].start); sec2msf(md.frames, ti[md.track].length); - ti[md.track].start_offset = file_offset; + ti[md.track].start_offset = file_offset + md.pregap; - frame_offset += md.pregap + md.frames + md.postgap; - file_offset += md.frames + md.postgap; + // XXX: what about postgap? + frame_offset += md.frames; + file_offset += md.frames; numtracks++; } From 3039c914c0ac33ee89127db3bd4c53eb3c25cafd Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 15 Jul 2022 00:24:34 +0300 Subject: [PATCH 151/597] drc: some libnx support --- libpcsxcore/new_dynarec/assem_arm64.c | 12 ++-- libpcsxcore/new_dynarec/new_dynarec.c | 66 +++++++++++++++++--- libpcsxcore/new_dynarec/new_dynarec_config.h | 7 ++- 3 files changed, 71 insertions(+), 14 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 4eda43a9b..ff0d1a6c2 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -34,6 +34,8 @@ static void set_jump_target(void *addr, void *target) u_int *ptr = addr; intptr_t offset = (u_char *)target - (u_char *)addr; + ptr += ndrc_write_ofs / sizeof(ptr[0]); + if ((*ptr&0xFC000000) == 0x14000000) { // b assert(offset>=-134217728LL&&offset<134217728LL); *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff); @@ -142,7 +144,7 @@ static unused const char *condname[16] = { static void output_w32(u_int word) { - *((u_int *)out) = word; + *((u_int *)(out + ndrc_write_ofs)) = word; out += 4; } @@ -1943,13 +1945,13 @@ static void clear_cache_arm64(char *start, char *end) static void arch_init(void) { uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops; - struct tramp_insns *ops = ndrc->tramp.ops; + struct tramp_insns *ops = ndrc->tramp.ops, *opsw; size_t i; assert(!(diff & 3)); - start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); + opsw = start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) { - ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val] - ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17 + opsw[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val] + opsw[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17 } end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); } diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index b160a4a07..cabf4871d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -29,6 +29,10 @@ #ifdef _3DS #include <3ds_utils.h> #endif +#ifdef HAVE_LIBNX +#include +static Jit g_jit; +#endif #include "new_dynarec_config.h" #include "../psxhle.h" @@ -107,6 +111,17 @@ static struct ndrc_mem *ndrc; static struct ndrc_mem ndrc_ __attribute__((aligned(4096))); static struct ndrc_mem *ndrc = &ndrc_; #endif +#ifdef NDRC_WRITE_OFFSET +# ifdef __GLIBC__ +# include +# include +# include +# include +# endif +static long ndrc_write_ofs; +#else +#define ndrc_write_ofs 0 +#endif // stubs enum stub_type { @@ -424,6 +439,16 @@ static void mprotect_w_x(void *start, void *end, int is_x) sceKernelCloseVMDomain(); else sceKernelOpenVMDomain(); + #elif defined(HAVE_LIBNX) + Result rc; + if (is_x) + rc = jitTransitionToExecutable(&g_jit); + else + rc = jitTransitionToWritable(&g_jit); + if (R_FAILED(rc)) + SysPrintf("jitTransition %d %08x\n", is_x, rc); + #elif defined(NDRC_WRITE_OFFSET) + // separated rx and rw areas are always available #else u_long mstart = (u_long)start & ~4095ul; u_long mend = (u_long)end; @@ -434,9 +459,10 @@ static void mprotect_w_x(void *start, void *end, int is_x) #endif } -static void start_tcache_write(void *start, void *end) +static void *start_tcache_write(void *start, void *end) { mprotect_w_x(start, end, 0); + return (char *)start + ndrc_write_ofs; } static void end_tcache_write(void *start, void *end) @@ -451,6 +477,8 @@ static void end_tcache_write(void *start, void *end) sceKernelSyncVMDomain(sceBlock, start, len); #elif defined(_3DS) ctr_flush_invalidate_cache(); + #elif defined(HAVE_LIBNX) + // handled in mprotect_w_x() #elif defined(__aarch64__) // as of 2021, __clear_cache() is still broken on arm64 // so here is a custom one :( @@ -1154,8 +1182,8 @@ static void *get_trampoline(const void *f) abort(); } if (ndrc->tramp.f[i] == NULL) { - start_tcache_write(&ndrc->tramp.f[i], &ndrc->tramp.f[i + 1]); - ndrc->tramp.f[i] = f; + const void **d = start_tcache_write(&ndrc->tramp.f[i], &ndrc->tramp.f[i + 1]); + *d = f; end_tcache_write(&ndrc->tramp.f[i], &ndrc->tramp.f[i + 1]); } return &ndrc->tramp.ops[i]; @@ -6057,7 +6085,7 @@ static void new_dynarec_test(void) } SysPrintf("testing if we can run recompiled code @%p...\n", out); - ((volatile u_int *)out)[0]++; // make cache dirty + ((volatile u_int *)(out + ndrc_write_ofs))[0]++; // make the cache dirty for (i = 0; i < ARRAY_SIZE(ret); i++) { out = ndrc->translation_cache; @@ -6128,19 +6156,39 @@ void new_dynarec_init(void) #elif defined(_MSC_VER) ndrc = VirtualAlloc(NULL, sizeof(*ndrc), MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); + #elif defined(HAVE_LIBNX) + Result rc = jitCreate(&g_jit, sizeof(*ndrc)); + if (R_FAILED(rc)) + SysPrintf("jitCreate failed: %08x\n", rc); + SysPrintf("jitCreate: RX: %p RW: %p type: %d\n", g_jit.rx_addr, g_jit.rw_addr, g_jit.type); + ndrc = g_jit.rx_addr; + ndrc_write_ofs = (char *)g_jit.rw_addr - (char *)ndrc; #else uintptr_t desired_addr = 0; + int prot = PROT_READ | PROT_WRITE | PROT_EXEC; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + int fd = -1; #ifdef __ELF__ extern char _end; desired_addr = ((uintptr_t)&_end + 0xffffff) & ~0xffffffl; #endif - ndrc = mmap((void *)desired_addr, sizeof(*ndrc), - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + #ifdef NDRC_WRITE_OFFSET + // mostly for testing + fd = open("/dev/shm/pcsxr", O_CREAT | O_RDWR, 0600); + ftruncate(fd, sizeof(*ndrc)); + void *mw = mmap(NULL, sizeof(*ndrc), PROT_READ | PROT_WRITE, + (flags = MAP_SHARED), fd, 0); + assert(mw != MAP_FAILED); + prot = PROT_READ | PROT_EXEC; + #endif + ndrc = mmap((void *)desired_addr, sizeof(*ndrc), prot, flags, fd, 0); if (ndrc == MAP_FAILED) { SysPrintf("mmap() failed: %s\n", strerror(errno)); abort(); } + #ifdef NDRC_WRITE_OFFSET + ndrc_write_ofs = (char *)mw - (char *)ndrc; + #endif #endif #else #ifndef NO_WRITE_EXEC @@ -6175,9 +6223,13 @@ void new_dynarec_cleanup(void) // sceBlock is managed by retroarch's bootstrap code //sceKernelFreeMemBlock(sceBlock); //sceBlock = -1; + #elif defined(HAVE_LIBNX) + jitClose(&g_jit); + ndrc = NULL; #else if (munmap(ndrc, sizeof(*ndrc)) < 0) SysPrintf("munmap() failed\n"); + ndrc = NULL; #endif #endif for (n = 0; n < ARRAY_SIZE(blocks); n++) diff --git a/libpcsxcore/new_dynarec/new_dynarec_config.h b/libpcsxcore/new_dynarec/new_dynarec_config.h index f93613ffa..64c58492a 100644 --- a/libpcsxcore/new_dynarec/new_dynarec_config.h +++ b/libpcsxcore/new_dynarec/new_dynarec_config.h @@ -6,9 +6,12 @@ #define USE_MINI_HT 1 //#define REG_PREFETCH 1 -#if defined(__MACH__) +#if defined(__MACH__) || defined(HAVE_LIBNX) #define NO_WRITE_EXEC 1 #endif -#ifdef VITA +#if defined(VITA) || defined(HAVE_LIBNX) #define BASE_ADDR_DYNAMIC 1 #endif +#if defined(HAVE_LIBNX) +#define NDRC_WRITE_OFFSET 1 +#endif From af700b411e17806c3afb9e5f607317adc00dd546 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 15 Jul 2022 00:41:58 +0300 Subject: [PATCH 152/597] drc: simplify cache flush for some platforms untested, may break 3DS, if it happens please report --- libpcsxcore/new_dynarec/new_dynarec.c | 24 +++++++++++++++++++- libpcsxcore/new_dynarec/new_dynarec_config.h | 3 +++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index cabf4871d..c9e168847 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -506,6 +506,28 @@ static void end_block(void *start) end_tcache_write(start, out); } +#ifdef NDRC_CACHE_FLUSH_ALL + +static int needs_clear_cache; + +static void mark_clear_cache(void *target) +{ + if (!needs_clear_cache) { + start_tcache_write(ndrc, ndrc + 1); + needs_clear_cache = 1; + } +} + +static void do_clear_cache(void) +{ + if (needs_clear_cache) { + end_tcache_write(ndrc, ndrc + 1); + needs_clear_cache = 0; + } +} + +#else + // also takes care of w^x mappings when patching code static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; @@ -549,7 +571,7 @@ static void do_clear_cache(void) } } -//#define DEBUG_CYCLE_COUNT 1 +#endif // NDRC_CACHE_FLUSH_ALL #define NO_CYCLE_PENALTY_THR 12 diff --git a/libpcsxcore/new_dynarec/new_dynarec_config.h b/libpcsxcore/new_dynarec/new_dynarec_config.h index 64c58492a..5aee85df1 100644 --- a/libpcsxcore/new_dynarec/new_dynarec_config.h +++ b/libpcsxcore/new_dynarec/new_dynarec_config.h @@ -15,3 +15,6 @@ #if defined(HAVE_LIBNX) #define NDRC_WRITE_OFFSET 1 #endif +#if defined(HAVE_LIBNX) || defined(_3DS) +#define NDRC_CACHE_FLUSH_ALL 1 +#endif From d9e2b173fb11fea4976fb0a6c5feda6b654b4b46 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 19 Jul 2022 00:21:44 +0300 Subject: [PATCH 153/597] drc: some more libnx support --- libpcsxcore/new_dynarec/assem_arm64.c | 16 ++-- libpcsxcore/new_dynarec/new_dynarec.c | 93 ++++++++++++-------- libpcsxcore/new_dynarec/new_dynarec_config.h | 4 +- 3 files changed, 64 insertions(+), 49 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index ff0d1a6c2..b2b8110d2 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -31,11 +31,9 @@ void do_memhandler_post(); /* Linker */ static void set_jump_target(void *addr, void *target) { - u_int *ptr = addr; + u_int *ptr = NDRC_WRITE_OFFSET(addr); intptr_t offset = (u_char *)target - (u_char *)addr; - ptr += ndrc_write_ofs / sizeof(ptr[0]); - if ((*ptr&0xFC000000) == 0x14000000) { // b assert(offset>=-134217728LL&&offset<134217728LL); *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff); @@ -144,7 +142,7 @@ static unused const char *condname[16] = { static void output_w32(u_int word) { - *((u_int *)(out + ndrc_write_ofs)) = word; + *((u_int *)NDRC_WRITE_OFFSET(out)) = word; out += 4; } @@ -1900,7 +1898,7 @@ static void do_miniht_insert(u_int return_address,u_int rt,int temp) { emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]); } -static void clear_cache_arm64(char *start, char *end) +static unused void clear_cache_arm64(char *start, char *end) { // Don't rely on GCC's __clear_cache implementation, as it caches // icache/dcache cache line sizes, that can vary between cores on @@ -1945,13 +1943,13 @@ static void clear_cache_arm64(char *start, char *end) static void arch_init(void) { uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops; - struct tramp_insns *ops = ndrc->tramp.ops, *opsw; + struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops); size_t i; assert(!(diff & 3)); - opsw = start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); + start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) { - opsw[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val] - opsw[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17 + ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val] + ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17 } end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); } diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index c9e168847..447023c68 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -41,7 +41,11 @@ static Jit g_jit; #include "emu_if.h" // emulator interface #include "arm_features.h" +#ifdef __clang__ +#define noinline __attribute__((noinline)) +#else #define noinline __attribute__((noinline,noclone)) +#endif #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #endif @@ -95,14 +99,16 @@ static Jit g_jit; #define TC_REDUCE_BYTES 0 #endif +struct ndrc_tramp +{ + struct tramp_insns ops[2048 / sizeof(struct tramp_insns)]; + const void *f[2048 / sizeof(void *)]; +}; + struct ndrc_mem { u_char translation_cache[(1 << TARGET_SIZE_2) - TC_REDUCE_BYTES]; - struct - { - struct tramp_insns ops[2048 / sizeof(struct tramp_insns)]; - const void *f[2048 / sizeof(void *)]; - } tramp; + struct ndrc_tramp tramp; }; #ifdef BASE_ADDR_DYNAMIC @@ -111,7 +117,7 @@ static struct ndrc_mem *ndrc; static struct ndrc_mem ndrc_ __attribute__((aligned(4096))); static struct ndrc_mem *ndrc = &ndrc_; #endif -#ifdef NDRC_WRITE_OFFSET +#ifdef TC_WRITE_OFFSET # ifdef __GLIBC__ # include # include @@ -119,8 +125,9 @@ static struct ndrc_mem *ndrc = &ndrc_; # include # endif static long ndrc_write_ofs; +#define NDRC_WRITE_OFFSET(x) (void *)((char *)(x) + ndrc_write_ofs) #else -#define ndrc_write_ofs 0 +#define NDRC_WRITE_OFFSET(x) (x) #endif // stubs @@ -441,13 +448,16 @@ static void mprotect_w_x(void *start, void *end, int is_x) sceKernelOpenVMDomain(); #elif defined(HAVE_LIBNX) Result rc; - if (is_x) - rc = jitTransitionToExecutable(&g_jit); - else - rc = jitTransitionToWritable(&g_jit); - if (R_FAILED(rc)) - SysPrintf("jitTransition %d %08x\n", is_x, rc); - #elif defined(NDRC_WRITE_OFFSET) + // check to avoid the full flush in jitTransitionToExecutable() + if (g_jit.type != JitType_CodeMemory) { + if (is_x) + rc = jitTransitionToExecutable(&g_jit); + else + rc = jitTransitionToWritable(&g_jit); + if (R_FAILED(rc)) + ;//SysPrintf("jitTransition %d %08x\n", is_x, rc); + } + #elif defined(TC_WRITE_OFFSET) // separated rx and rw areas are always available #else u_long mstart = (u_long)start & ~4095ul; @@ -459,10 +469,9 @@ static void mprotect_w_x(void *start, void *end, int is_x) #endif } -static void *start_tcache_write(void *start, void *end) +static void start_tcache_write(void *start, void *end) { mprotect_w_x(start, end, 0); - return (char *)start + ndrc_write_ofs; } static void end_tcache_write(void *start, void *end) @@ -478,7 +487,10 @@ static void end_tcache_write(void *start, void *end) #elif defined(_3DS) ctr_flush_invalidate_cache(); #elif defined(HAVE_LIBNX) - // handled in mprotect_w_x() + if (g_jit.type == JitType_CodeMemory) { + armDCacheClean(start, len); + armICacheInvalidate((char *)start - ndrc_write_ofs, len); + } #elif defined(__aarch64__) // as of 2021, __clear_cache() is still broken on arm64 // so here is a custom one :( @@ -497,13 +509,13 @@ static void *start_block(void) u_char *end = out + MAX_OUTPUT_BLOCK_SIZE; if (end > ndrc->translation_cache + sizeof(ndrc->translation_cache)) end = ndrc->translation_cache + sizeof(ndrc->translation_cache); - start_tcache_write(out, end); + start_tcache_write(NDRC_WRITE_OFFSET(out), NDRC_WRITE_OFFSET(end)); return out; } static void end_block(void *start) { - end_tcache_write(start, out); + end_tcache_write(NDRC_WRITE_OFFSET(start), NDRC_WRITE_OFFSET(out)); } #ifdef NDRC_CACHE_FLUSH_ALL @@ -513,7 +525,7 @@ static int needs_clear_cache; static void mark_clear_cache(void *target) { if (!needs_clear_cache) { - start_tcache_write(ndrc, ndrc + 1); + start_tcache_write(NDRC_WRITE_OFFSET(ndrc), NDRC_WRITE_OFFSET(ndrc + 1)); needs_clear_cache = 1; } } @@ -521,7 +533,7 @@ static void mark_clear_cache(void *target) static void do_clear_cache(void) { if (needs_clear_cache) { - end_tcache_write(ndrc, ndrc + 1); + end_tcache_write(NDRC_WRITE_OFFSET(ndrc), NDRC_WRITE_OFFSET(ndrc + 1)); needs_clear_cache = 0; } } @@ -536,7 +548,7 @@ static void mark_clear_cache(void *target) uintptr_t offset = (u_char *)target - ndrc->translation_cache; u_int mask = 1u << ((offset >> 12) & 31); if (!(needs_clear_cache[offset >> 17] & mask)) { - char *start = (char *)((uintptr_t)target & ~4095l); + char *start = (char *)NDRC_WRITE_OFFSET((uintptr_t)target & ~4095l); start_tcache_write(start, start + 4095); needs_clear_cache[offset >> 17] |= mask; } @@ -565,7 +577,7 @@ static void do_clear_cache(void) break; end += 4096; } - end_tcache_write(start, end); + end_tcache_write(NDRC_WRITE_OFFSET(start), NDRC_WRITE_OFFSET(end)); } needs_clear_cache[i] = 0; } @@ -1193,20 +1205,25 @@ static const char *func_name(const void *a) static void *get_trampoline(const void *f) { + struct ndrc_tramp *tramp = NDRC_WRITE_OFFSET(&ndrc->tramp); size_t i; - for (i = 0; i < ARRAY_SIZE(ndrc->tramp.f); i++) { - if (ndrc->tramp.f[i] == f || ndrc->tramp.f[i] == NULL) + for (i = 0; i < ARRAY_SIZE(tramp->f); i++) { + if (tramp->f[i] == f || tramp->f[i] == NULL) break; } - if (i == ARRAY_SIZE(ndrc->tramp.f)) { + if (i == ARRAY_SIZE(tramp->f)) { SysPrintf("trampoline table is full, last func %p\n", f); abort(); } - if (ndrc->tramp.f[i] == NULL) { - const void **d = start_tcache_write(&ndrc->tramp.f[i], &ndrc->tramp.f[i + 1]); - *d = f; - end_tcache_write(&ndrc->tramp.f[i], &ndrc->tramp.f[i + 1]); + if (tramp->f[i] == NULL) { + start_tcache_write(&tramp->f[i], &tramp->f[i + 1]); + tramp->f[i] = f; + end_tcache_write(&tramp->f[i], &tramp->f[i + 1]); +#ifdef HAVE_LIBNX + // invalidate the RX mirror (unsure if necessary, but just in case...) + armDCacheFlush(&ndrc->tramp.f[i], sizeof(ndrc->tramp.f[i])); +#endif } return &ndrc->tramp.ops[i]; } @@ -6093,7 +6110,7 @@ static void disassemble_inst(int i) {} #define DRC_TEST_VAL 0x74657374 -static void new_dynarec_test(void) +static noinline void new_dynarec_test(void) { int (*testfunc)(void); void *beginning; @@ -6106,8 +6123,9 @@ static void new_dynarec_test(void) SysPrintf("linkage_arm* miscompilation/breakage detected.\n"); } - SysPrintf("testing if we can run recompiled code @%p...\n", out); - ((volatile u_int *)(out + ndrc_write_ofs))[0]++; // make the cache dirty + SysPrintf("(%p) testing if we can run recompiled code @%p...\n", + new_dynarec_test, out); + ((volatile u_int *)NDRC_WRITE_OFFSET(out))[0]++; // make the cache dirty for (i = 0; i < ARRAY_SIZE(ret); i++) { out = ndrc->translation_cache; @@ -6183,8 +6201,10 @@ void new_dynarec_init(void) if (R_FAILED(rc)) SysPrintf("jitCreate failed: %08x\n", rc); SysPrintf("jitCreate: RX: %p RW: %p type: %d\n", g_jit.rx_addr, g_jit.rw_addr, g_jit.type); + jitTransitionToWritable(&g_jit); ndrc = g_jit.rx_addr; ndrc_write_ofs = (char *)g_jit.rw_addr - (char *)ndrc; + memset(NDRC_WRITE_OFFSET(&ndrc->tramp), 0, sizeof(ndrc->tramp)); #else uintptr_t desired_addr = 0; int prot = PROT_READ | PROT_WRITE | PROT_EXEC; @@ -6194,7 +6214,7 @@ void new_dynarec_init(void) extern char _end; desired_addr = ((uintptr_t)&_end + 0xffffff) & ~0xffffffl; #endif - #ifdef NDRC_WRITE_OFFSET + #ifdef TC_WRITE_OFFSET // mostly for testing fd = open("/dev/shm/pcsxr", O_CREAT | O_RDWR, 0600); ftruncate(fd, sizeof(*ndrc)); @@ -6208,7 +6228,7 @@ void new_dynarec_init(void) SysPrintf("mmap() failed: %s\n", strerror(errno)); abort(); } - #ifdef NDRC_WRITE_OFFSET + #ifdef TC_WRITE_OFFSET ndrc_write_ofs = (char *)mw - (char *)ndrc; #endif #endif @@ -6262,9 +6282,6 @@ void new_dynarec_cleanup(void) } stat_clear(stat_blocks); stat_clear(stat_links); - #ifdef ROM_COPY - if (munmap (ROM_COPY, 67108864) < 0) {SysPrintf("munmap() failed\n");} - #endif new_dynarec_print_stats(); } diff --git a/libpcsxcore/new_dynarec/new_dynarec_config.h b/libpcsxcore/new_dynarec/new_dynarec_config.h index 5aee85df1..9687aa975 100644 --- a/libpcsxcore/new_dynarec/new_dynarec_config.h +++ b/libpcsxcore/new_dynarec/new_dynarec_config.h @@ -13,8 +13,8 @@ #define BASE_ADDR_DYNAMIC 1 #endif #if defined(HAVE_LIBNX) -#define NDRC_WRITE_OFFSET 1 +#define TC_WRITE_OFFSET 1 #endif -#if defined(HAVE_LIBNX) || defined(_3DS) +#if defined(_3DS) #define NDRC_CACHE_FLUSH_ALL 1 #endif From 04bd10b132d06eff2a803125dc8da640be2454db Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 11 Sep 2019 17:33:14 +0200 Subject: [PATCH 154/597] Fix PCSX on big-endian systems The __BIGENDIAN__ macro was never defined anywhere, and the __BIG_ENDIAN__ macro isn't set anymore by recent versions of GCC. Replace them by checking __BYTE_ORDER__ against __ORDER_BIG_ENDIAN__. Signed-off-by: Paul Cercueil --- libpcsxcore/misc.c | 2 +- libpcsxcore/psxmem.h | 2 +- libpcsxcore/r3000a.h | 4 ++-- plugins/dfxvideo/draw.c | 8 ++++---- plugins/dfxvideo/gpu.h | 12 +++--------- plugins/dfxvideo/gpulib_if.c | 12 +++--------- 6 files changed, 14 insertions(+), 26 deletions(-) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 02d1761b5..3a0630673 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -61,7 +61,7 @@ void mmssdd( char *b, char *p ) #if defined(__arm__) unsigned char *u = (void *)b; int block = (u[3] << 24) | (u[2] << 16) | (u[1] << 8) | u[0]; -#elif defined(__BIGENDIAN__) +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ int block = (b[0] & 0xff) | ((b[1] & 0xff) << 8) | ((b[2] & 0xff) << 16) | (b[3] << 24); #else int block = *((int*)b); diff --git a/libpcsxcore/psxmem.h b/libpcsxcore/psxmem.h index fbf5f67c7..3d5317c18 100644 --- a/libpcsxcore/psxmem.h +++ b/libpcsxcore/psxmem.h @@ -26,7 +26,7 @@ extern "C" { #include "psxcommon.h" -#if defined(__BIGENDIAN__) +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define _SWAP16(b) ((((unsigned char *)&(b))[0] & 0xff) | (((unsigned char *)&(b))[1] & 0xff) << 8) #define _SWAP32(b) ((((unsigned char *)&(b))[0] & 0xff) | ((((unsigned char *)&(b))[1] & 0xff) << 8) | ((((unsigned char *)&(b))[2] & 0xff) << 16) | (((unsigned char *)&(b))[3] << 24)) diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index cb72bf362..7d8e260ce 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -51,7 +51,7 @@ extern R3000Acpu psxInt; extern R3000Acpu psxRec; typedef union { -#if defined(__BIGENDIAN__) +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ struct { u8 h3, h2, h, l; } b; struct { s8 h3, h2, h, l; } sb; struct { u16 h, l; } w; @@ -217,7 +217,7 @@ void new_dyna_freeze(void *f, int mode); } \ } -#if defined(__BIGENDIAN__) +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define _i32(x) *(s32 *)&x #define _u32(x) x diff --git a/plugins/dfxvideo/draw.c b/plugins/dfxvideo/draw.c index ad3f3a196..e68f1a19e 100644 --- a/plugins/dfxvideo/draw.c +++ b/plugins/dfxvideo/draw.c @@ -1053,7 +1053,7 @@ void CreateDisplay(void) //backup YUV mode //hmm, should I bother check guid == 55595659-0000-0010-8000-00aa00389b71? //and check byte order? fo[j].byte_order == LSBFirst -#ifdef __BIG_ENDIAN__ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ if ( fo[j].type == XvYUV && fo[j].bits_per_pixel == 16 && fo[j].format == XvPacked && strncmp("YUYV", fo[j].component_order, 5) == 0 ) #else if ( fo[j].type == XvYUV && fo[j].bits_per_pixel == 16 && fo[j].format == XvPacked && strncmp("UYVY", fo[j].component_order, 5) == 0 ) @@ -1473,7 +1473,7 @@ void BlitToYUV(unsigned char * surf,int32_t x,int32_t y) U = min(abs(R * -1214 + G * -2384 + B * 3598 + 4096 + 1048576) >> 13, 240); V = min(abs(R * 3598 + G * -3013 + B * -585 + 4096 + 1048576) >> 13, 240); -#ifdef __BIG_ENDIAN__ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ destpix[row] = Y << 24 | U << 16 | Y << 8 | V; #else destpix[row] = Y << 24 | V << 16 | Y << 8 | U; @@ -1500,7 +1500,7 @@ void BlitToYUV(unsigned char * surf,int32_t x,int32_t y) U = min(abs(R * -1214 + G * -2384 + B * 3598 + 4096 + 1048576) >> 13, 240); V = min(abs(R * 3598 + G * -3013 + B * -585 + 4096 + 1048576) >> 13, 240); -#ifdef __BIG_ENDIAN__ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ destpix[row] = Y << 24 | U << 16 | Y << 8 | V; #else destpix[row] = Y << 24 | V << 16 | Y << 8 | U; @@ -1534,7 +1534,7 @@ void RGB2YUV(uint32_t *s, int width, int height, uint32_t *d) Y2 = min(abs(R * 2104 + G * 4130 + B * 802 + 4096 + 131072) >> 13, 235); -#ifdef __BIG_ENDIAN__ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ *d = V | Y2 << 8 | U << 16 | Y1 << 24; #else *d = U | Y1 << 8 | V << 16 | Y2 << 24; diff --git a/plugins/dfxvideo/gpu.h b/plugins/dfxvideo/gpu.h index 9ee5f3e6f..25fcc3ce8 100644 --- a/plugins/dfxvideo/gpu.h +++ b/plugins/dfxvideo/gpu.h @@ -72,7 +72,7 @@ #define SWAP16(x) ({ uint16_t y=(x); (((y)>>8 & 0xff) | ((y)<<8 & 0xff00)); }) #define SWAP32(x) ({ uint32_t y=(x); (((y)>>24 & 0xfful) | ((y)>>8 & 0xff00ul) | ((y)<<8 & 0xff0000ul) | ((y)<<24 & 0xff000000ul)); }) -#ifdef __BIG_ENDIAN__ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ // big endian config #define HOST2LE32(x) SWAP32(x) @@ -251,18 +251,12 @@ extern int32_t drawH; #define KEY_BADTEXTURES 128 #define KEY_CHECKTHISOUT 256 -#if !defined(__BIG_ENDIAN__) || defined(__x86_64__) || defined(__i386__) -#ifndef __LITTLE_ENDIAN__ -#define __LITTLE_ENDIAN__ -#endif -#endif - -#ifdef __LITTLE_ENDIAN__ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define RED(x) (x & 0xff) #define BLUE(x) ((x>>16) & 0xff) #define GREEN(x) ((x>>8) & 0xff) #define COLOR(x) (x & 0xffffff) -#elif defined __BIG_ENDIAN__ +#else #define RED(x) ((x>>24) & 0xff) #define BLUE(x) ((x>>8) & 0xff) #define GREEN(x) ((x>>16) & 0xff) diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index ff0c96c70..d7d69a765 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -40,7 +40,7 @@ #define SWAP16(x) ({ uint16_t y=(x); (((y)>>8 & 0xff) | ((y)<<8 & 0xff00)); }) #define SWAP32(x) ({ uint32_t y=(x); (((y)>>24 & 0xfful) | ((y)>>8 & 0xff00ul) | ((y)<<8 & 0xff0000ul) | ((y)<<24 & 0xff000000ul)); }) -#ifdef __BIG_ENDIAN__ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ // big endian config #define HOST2LE32(x) SWAP32(x) @@ -219,18 +219,12 @@ extern int32_t drawH; #define KEY_BADTEXTURES 128 #define KEY_CHECKTHISOUT 256 -#if !defined(__BIG_ENDIAN__) || defined(__x86_64__) || defined(__i386__) -#ifndef __LITTLE_ENDIAN__ -#define __LITTLE_ENDIAN__ -#endif -#endif - -#ifdef __LITTLE_ENDIAN__ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define RED(x) (x & 0xff) #define BLUE(x) ((x>>16) & 0xff) #define GREEN(x) ((x>>8) & 0xff) #define COLOR(x) (x & 0xffffff) -#elif defined __BIG_ENDIAN__ +#else #define RED(x) ((x>>24) & 0xff) #define BLUE(x) ((x>>8) & 0xff) #define GREEN(x) ((x>>16) & 0xff) From 086adfff4fe6352e401d00c052071a6b91245b40 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 13 Sep 2019 23:01:57 +0200 Subject: [PATCH 155/597] Make sure hardware registers are manipulated as little-endian The hardware registers should be represented in little-endian format. Therefore, on big-endian systems the values need to be byte-swapped. Signed-off-by: Paul Cercueil --- libpcsxcore/gpu.h | 4 ++-- libpcsxcore/misc.c | 2 +- libpcsxcore/psxcounters.c | 8 ++++---- libpcsxcore/psxdma.c | 6 +++--- libpcsxcore/psxhw.c | 10 +++++----- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/libpcsxcore/gpu.h b/libpcsxcore/gpu.h index 63a557203..9dfe63475 100644 --- a/libpcsxcore/gpu.h +++ b/libpcsxcore/gpu.h @@ -35,6 +35,6 @@ #define PSXGPU_TIMING_BITS (PSXGPU_LCF | PSXGPU_nBUSY) #define gpuSyncPluginSR() { \ - HW_GPU_STATUS &= PSXGPU_TIMING_BITS; \ - HW_GPU_STATUS |= GPU_readStatus() & ~PSXGPU_TIMING_BITS; \ + HW_GPU_STATUS &= SWAP32(PSXGPU_TIMING_BITS); \ + HW_GPU_STATUS |= SWAP32(GPU_readStatus() & ~PSXGPU_TIMING_BITS); \ } diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 3a0630673..b2dd9a127 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -680,7 +680,7 @@ int LoadState(const char *file) { GPU_freeze(0, gpufP); free(gpufP); if (HW_GPU_STATUS == 0) - HW_GPU_STATUS = GPU_readStatus(); + HW_GPU_STATUS = SWAP32(GPU_readStatus()); // spu SaveFuncs.read(f, &Size, 4); diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index 3342770e9..5198646d9 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -328,7 +328,7 @@ void psxRcntUpdate() // VSync irq. if( hSyncCount == VBlankStart ) { - HW_GPU_STATUS &= ~PSXGPU_LCF; + HW_GPU_STATUS &= SWAP32(~PSXGPU_LCF); GPU_vBlank( 1, 0 ); setIrq( 0x01 ); @@ -348,9 +348,9 @@ void psxRcntUpdate() frame_counter++; gpuSyncPluginSR(); - if( (HW_GPU_STATUS & PSXGPU_ILACE_BITS) == PSXGPU_ILACE_BITS ) - HW_GPU_STATUS |= frame_counter << 31; - GPU_vBlank( 0, HW_GPU_STATUS >> 31 ); + if ((HW_GPU_STATUS & SWAP32(PSXGPU_ILACE_BITS)) == SWAP32(PSXGPU_ILACE_BITS)) + HW_GPU_STATUS |= SWAP32(frame_counter << 31); + GPU_vBlank(0, SWAP32(HW_GPU_STATUS) >> 31); } // Schedule next call, in hsyncs diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index cb84fbccf..d3b85724f 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -185,7 +185,7 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU size = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff); if ((int)size <= 0) size = gpuDmaChainSize(madr); - HW_GPU_STATUS &= ~PSXGPU_nBUSY; + HW_GPU_STATUS &= SWAP32(~PSXGPU_nBUSY); // we don't emulate progress, just busy flag and end irq, // so pretend we're already at the last block @@ -217,7 +217,7 @@ void gpuInterrupt() { HW_DMA2_CHCR &= SWAP32(~0x01000000); DMA_INTERRUPT(2); } - HW_GPU_STATUS |= PSXGPU_nBUSY; // GPU no longer busy + HW_GPU_STATUS |= SWAP32(PSXGPU_nBUSY); // GPU no longer busy } void psxDma6(u32 madr, u32 bcr, u32 chcr) { @@ -245,7 +245,7 @@ void psxDma6(u32 madr, u32 bcr, u32 chcr) { *mem-- = SWAP32((madr - 4) & 0xffffff); madr -= 4; } - mem++; *mem = 0xffffff; + *++mem = SWAP32(0xffffff); //GPUOTCDMA_INT(size); // halted diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index dbcb9892f..7b2401b18 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -38,7 +38,7 @@ void psxHwReset() { mdecInit(); // initialize mdec decoder cdrReset(); psxRcntInit(); - HW_GPU_STATUS = 0x14802000; + HW_GPU_STATUS = SWAP32(0x14802000); } u8 psxHwRead8(u32 add) { @@ -248,8 +248,8 @@ u32 psxHwRead32(u32 add) { return hard; case 0x1f801814: gpuSyncPluginSR(); - hard = HW_GPU_STATUS; - if (hSyncCount < 240 && (HW_GPU_STATUS & PSXGPU_ILACE_BITS) != PSXGPU_ILACE_BITS) + hard = SWAP32(HW_GPU_STATUS); + if (hSyncCount < 240 && (hard & PSXGPU_ILACE_BITS) != PSXGPU_ILACE_BITS) hard |= PSXGPU_LCF & (psxRegs.cycle << 20); #ifdef PSXHW_LOG PSXHW_LOG("GPU STATUS 32bit read %x\n", hard); @@ -446,7 +446,7 @@ void psxHwWrite16(u32 add, u16 value) { PSXHW_LOG("IMASK 16bit write %x\n", value); #endif psxHu16ref(0x1074) = SWAPu16(value); - if (psxHu16ref(0x1070) & value) + if (psxHu16ref(0x1070) & SWAPu16(value)) new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); return; @@ -560,7 +560,7 @@ void psxHwWrite32(u32 add, u32 value) { PSXHW_LOG("IMASK 32bit write %x\n", value); #endif psxHu32ref(0x1074) = SWAPu32(value); - if (psxHu32ref(0x1070) & value) + if (psxHu32ref(0x1070) & SWAPu32(value)) new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); return; From f23b103c8248c10855949bfb2185b6b10d4f0457 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 13 Sep 2019 23:09:11 +0200 Subject: [PATCH 156/597] Get rid of bit fields in union Long story short, bit fields aren't endian-safe. More info: http://mjfrazer.org/mjfrazer/bitfields/ Simplify that by just using a few macros to access the needed bits. Signed-off-by: Paul Cercueil --- plugins/gpu_neon/psx_gpu_if.c | 2 +- plugins/gpu_senquack/gpulib_if.cpp | 4 +-- plugins/gpulib/gpu.c | 41 +++++++++++++++++------------- plugins/gpulib/gpu.h | 38 ++++++++------------------- plugins/gpulib/vout_pl.c | 13 +++++----- 5 files changed, 44 insertions(+), 54 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index ad017614b..bb8bea0ad 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -133,7 +133,7 @@ void renderer_sync_ecmds(uint32_t *ecmds) void renderer_update_caches(int x, int y, int w, int h) { update_texture_cache_region(&egpu, x, y, x + w - 1, y + h - 1); - if (gpu.state.enhancement_active && !gpu.status.rgb24) + if (gpu.state.enhancement_active && !(gpu.status & PSX_GPU_STATUS_RGB24)) sync_enhancement_buffers(x, y, w, h); } diff --git a/plugins/gpu_senquack/gpulib_if.cpp b/plugins/gpu_senquack/gpulib_if.cpp index c8452a3d0..5efc7d9de 100644 --- a/plugins/gpu_senquack/gpulib_if.cpp +++ b/plugins/gpu_senquack/gpulib_if.cpp @@ -226,10 +226,10 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) gpu_senquack.ilace_mask = gpu_senquack.config.ilace_force; #ifdef HAVE_PRE_ARMV7 /* XXX */ - gpu_senquack.ilace_mask |= gpu.status.interlace; + gpu_senquack.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE); #endif if (gpu_senquack.config.scale_hires) { - gpu_senquack.ilace_mask |= gpu.status.interlace; + gpu_senquack.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE); } for (; list < list_end; list += 1 + len) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index d67df03c3..d0bb526b7 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -56,7 +56,7 @@ static noinline void do_reset(void) memset(gpu.regs, 0, sizeof(gpu.regs)); for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++) gpu.ex_regs[i] = (0xe0 + i) << 24; - gpu.status.reg = 0x14802000; + gpu.status = 0x14802000; gpu.gp0 = 0; gpu.regs[3] = 1; gpu.screen.hres = gpu.screen.w = 256; @@ -77,7 +77,7 @@ static noinline void update_height(void) { // TODO: emulate this properly.. int sh = gpu.screen.y2 - gpu.screen.y1; - if (gpu.status.dheight) + if (gpu.status & PSX_GPU_STATUS_DHEIGHT) sh *= 2; if (sh <= 0 || sh > gpu.screen.vres) sh = gpu.screen.vres; @@ -114,7 +114,7 @@ static noinline int decide_frameskip_allow(uint32_t cmd_e3) // but not for interlace since it'll most likely always do that uint32_t x = cmd_e3 & 0x3ff; uint32_t y = (cmd_e3 >> 10) & 0x3ff; - gpu.frameskip.allow = gpu.status.interlace || + gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) || (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w || (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h; return gpu.frameskip.allow; @@ -213,10 +213,14 @@ void GPUwriteStatus(uint32_t data) do_cmd_reset(); break; case 0x03: - gpu.status.blanking = data & 1; + if (data & 1) + gpu.status |= PSX_GPU_STATUS_BLANKING; + else + gpu.status &= ~PSX_GPU_STATUS_BLANKING; break; case 0x04: - gpu.status.dma = data & 3; + gpu.status &= ~PSX_GPU_STATUS_DMA_MASK; + gpu.status |= PSX_GPU_STATUS_DMA(data & 3); break; case 0x05: gpu.screen.x = data & 0x3ff; @@ -240,9 +244,9 @@ void GPUwriteStatus(uint32_t data) update_height(); break; case 0x08: - gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10); - gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7]; - gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3]; + gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10); + gpu.screen.hres = hres[(gpu.status >> 16) & 7]; + gpu.screen.vres = vres[(gpu.status >> 19) & 3]; update_width(); update_height(); renderer_notify_res_change(); @@ -354,7 +358,7 @@ static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_re renderer_flush_queues(); if (is_read) { - gpu.status.img = 1; + gpu.status |= PSX_GPU_STATUS_IMG; // XXX: wrong for width 1 memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4); gpu.state.last_vram_read_frame = *gpu.state.frame_count; @@ -367,7 +371,7 @@ static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_re static void finish_vram_transfer(int is_read) { if (is_read) - gpu.status.img = 0; + gpu.status &= ~PSX_GPU_STATUS_IMG; else renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y, gpu.dma_start.w, gpu.dma_start.h); @@ -482,9 +486,9 @@ static noinline int do_cmd_buffer(uint32_t *data, int count) break; } - gpu.status.reg &= ~0x1fff; - gpu.status.reg |= gpu.ex_regs[1] & 0x7ff; - gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11; + gpu.status &= ~0x1fff; + gpu.status |= gpu.ex_regs[1] & 0x7ff; + gpu.status |= (gpu.ex_regs[6] & 3) << 11; gpu.state.fb_dirty |= vram_dirty; @@ -622,7 +626,7 @@ uint32_t GPUreadStatus(void) if (unlikely(gpu.cmd_len > 0)) flush_cmd_buffer(); - ret = gpu.status.reg; + ret = gpu.status; log_io("gpu_read_status %08x\n", ret); return ret; } @@ -646,13 +650,13 @@ long GPUfreeze(uint32_t type, struct GPUFreeze *freeze) memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2); memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs)); memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs)); - freeze->ulStatus = gpu.status.reg; + freeze->ulStatus = gpu.status; break; case 0: // load memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2); memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs)); memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs)); - gpu.status.reg = freeze->ulStatus; + gpu.status = freeze->ulStatus; gpu.cmd_len = 0; for (i = 8; i > 0; i--) { gpu.regs[i] ^= 1; // avoid reg change detection @@ -672,7 +676,7 @@ void GPUupdateLace(void) flush_cmd_buffer(); renderer_flush_queues(); - if (gpu.status.blanking) { + if (gpu.status & PSX_GPU_STATUS_BLANKING) { if (!gpu.state.blanked) { vout_blank(); gpu.state.blanked = 1; @@ -701,7 +705,8 @@ void GPUupdateLace(void) void GPUvBlank(int is_vblank, int lcf) { int interlace = gpu.state.allow_interlace - && gpu.status.interlace && gpu.status.dheight; + && (gpu.status & PSX_GPU_STATUS_INTERLACE) + && (gpu.status & PSX_GPU_STATUS_DHEIGHT); // interlace doesn't look nice on progressive displays, // so we have this "auto" mode here for games that don't read vram if (gpu.state.allow_interlace == 2 diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index d11f991ca..3bab7fff3 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -16,37 +16,21 @@ extern "C" { #define CMD_BUFFER_LEN 1024 +#define BIT(x) (1 << (x)) + +#define PSX_GPU_STATUS_DHEIGHT BIT(19) +#define PSX_GPU_STATUS_RGB24 BIT(21) +#define PSX_GPU_STATUS_INTERLACE BIT(22) +#define PSX_GPU_STATUS_BLANKING BIT(23) +#define PSX_GPU_STATUS_IMG BIT(27) +#define PSX_GPU_STATUS_DMA(x) ((x) << 29) +#define PSX_GPU_STATUS_DMA_MASK (BIT(29) | BIT(30)) + struct psx_gpu { uint32_t cmd_buffer[CMD_BUFFER_LEN]; uint32_t regs[16]; uint16_t *vram; - union { - uint32_t reg; - struct { - uint32_t tx:4; // 0 texture page - uint32_t ty:1; - uint32_t abr:2; - uint32_t tp:2; // 7 t.p. mode (4,8,15bpp) - uint32_t dtd:1; // 9 dither - uint32_t dfe:1; - uint32_t md:1; // 11 set mask bit when drawing - uint32_t me:1; // 12 no draw on mask - uint32_t unkn:3; - uint32_t width1:1; // 16 - uint32_t width0:2; - uint32_t dheight:1; // 19 double height - uint32_t video:1; // 20 NTSC,PAL - uint32_t rgb24:1; - uint32_t interlace:1; // 22 interlace on - uint32_t blanking:1; // 23 display not enabled - uint32_t unkn2:2; - uint32_t busy:1; // 26 !busy drawing - uint32_t img:1; // 27 ready to DMA image data - uint32_t com:1; // 28 ready for commands - uint32_t dma:2; // 29 off, ?, to vram, from vram - uint32_t lcf:1; // 31 - }; - } status; + uint32_t status; uint32_t gp0; uint32_t ex_regs[8]; struct { diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index d1fdefbc5..1c98b55af 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -36,7 +36,7 @@ static void check_mode_change(int force) gpu.state.enhancement_active = gpu.get_enhancement_bufer != NULL && gpu.state.enhancement_enable - && w <= 512 && h <= 256 && !gpu.status.rgb24; + && w <= 512 && h <= 256 && !(gpu.status & PSX_GPU_STATUS_RGB24); if (gpu.state.enhancement_active) { w_out *= 2; @@ -44,12 +44,13 @@ static void check_mode_change(int force) } // width|rgb24 change? - if (force || (gpu.status.reg ^ old_status) & ((7<<16)|(1<<21)) || h != old_h) + if (force || (gpu.status ^ old_status) & ((7<<16)|(1<<21)) || h != old_h) { - old_status = gpu.status.reg; + old_status = gpu.status; old_h = h; - cbs->pl_vout_set_mode(w_out, h_out, w, h, gpu.status.rgb24 ? 24 : 16); + cbs->pl_vout_set_mode(w_out, h_out, w, h, + (gpu.status & PSX_GPU_STATUS_RGB24) ? 24 : 16); } } @@ -82,7 +83,7 @@ void vout_update(void) vram += y * 1024 + x; - cbs->pl_vout_flip(vram, 1024, gpu.status.rgb24, w, h); + cbs->pl_vout_flip(vram, 1024, !!(gpu.status & PSX_GPU_STATUS_RGB24), w, h); } void vout_blank(void) @@ -95,7 +96,7 @@ void vout_blank(void) w *= 2; h *= 2; } - cbs->pl_vout_flip(NULL, 1024, gpu.status.rgb24, w, h); + cbs->pl_vout_flip(NULL, 1024, !!(gpu.status & PSX_GPU_STATUS_RGB24), w, h); } long GPUopen(void **unused) From 89df80c636609625bb2e89099805f49cfef3ead0 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 13 Sep 2019 23:13:43 +0200 Subject: [PATCH 157/597] gpulib: Add proper support for big-endian Update the gpulib code to work properly on big-endian architectures. Signed-off-by: Paul Cercueil --- plugins/gpulib/gpu.c | 32 ++++++++++++++++---------------- plugins/gpulib/gpu.h | 12 ++++++++++++ 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index d0bb526b7..8e92c782c 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -386,12 +386,12 @@ static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) while (pos < count && skip) { uint32_t *list = data + pos; - cmd = list[0] >> 24; + cmd = LE32TOH(list[0]) >> 24; len = 1 + cmd_lengths[cmd]; switch (cmd) { case 0x02: - if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h) + if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h) // clearing something large, don't skip do_cmd_list(list, 3, &dummy); else @@ -402,12 +402,12 @@ static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) case 0x34 ... 0x37: case 0x3c ... 0x3f: gpu.ex_regs[1] &= ~0x1ff; - gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff; + gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff; break; case 0x48 ... 0x4F: for (v = 3; pos + v < count; v++) { - if ((list[v] & 0xf000f000) == 0x50005000) + if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000)) break; } len += v - 3; @@ -415,16 +415,16 @@ static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) case 0x58 ... 0x5F: for (v = 4; pos + v < count; v += 2) { - if ((list[v] & 0xf000f000) == 0x50005000) + if ((list[v] & HTOLE32(0xf000f000)) == HTOLE32(0x50005000)) break; } len += v - 4; break; default: if (cmd == 0xe3) - skip = decide_frameskip_allow(list[0]); + skip = decide_frameskip_allow(LE32TOH(list[0])); if ((cmd & 0xf8) == 0xe0) - gpu.ex_regs[cmd & 7] = list[0]; + gpu.ex_regs[cmd & 7] = LE32TOH(list[0]); break; } @@ -459,7 +459,7 @@ static noinline int do_cmd_buffer(uint32_t *data, int count) break; } - cmd = data[pos] >> 24; + cmd = LE32TOH(data[pos]) >> 24; if (0xa0 <= cmd && cmd <= 0xdf) { if (unlikely((pos+2) >= count)) { // incomplete vram write/read cmd, can't consume yet @@ -468,13 +468,13 @@ static noinline int do_cmd_buffer(uint32_t *data, int count) } // consume vram write/read cmd - start_vram_transfer(data[pos + 1], data[pos + 2], (cmd & 0xe0) == 0xc0); + start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0); pos += 3; continue; } // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip - if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0)) + if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0)) pos += do_cmd_list_skip(data + pos, count - pos, &cmd); else { pos += do_cmd_list(data + pos, count - pos, &cmd); @@ -523,7 +523,7 @@ void GPUwriteDataMem(uint32_t *mem, int count) void GPUwriteData(uint32_t data) { log_io("gpu_write %08x\n", data); - gpu.cmd_buffer[gpu.cmd_len++] = data; + gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data); if (gpu.cmd_len >= CMD_BUFFER_LEN) flush_cmd_buffer(); } @@ -544,8 +544,8 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr) for (count = 0; (addr & 0x800000) == 0; count++) { list = rambase + (addr & 0x1fffff) / 4; - len = list[0] >> 24; - addr = list[0] & 0xffffff; + len = LE32TOH(list[0]) >> 24; + addr = LE32TOH(list[0]) & 0xffffff; preload(rambase + (addr & 0x1fffff) / 4); cpu_cycles += 10; @@ -570,7 +570,7 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr) // loop detection marker // (bit23 set causes DMA error on real machine, so // unlikely to be ever set by the game) - list[0] |= 0x800000; + list[0] |= HTOLE32(0x800000); } } @@ -580,8 +580,8 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr) addr = ld_addr & 0x1fffff; while (count-- > 0) { list = rambase + addr / 4; - addr = list[0] & 0x1fffff; - list[0] &= ~0x800000; + addr = LE32TOH(list[0]) & 0x1fffff; + list[0] &= HTOLE32(~0x800000); } } diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 3bab7fff3..b1f4f1dbc 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -16,6 +16,18 @@ extern "C" { #define CMD_BUFFER_LEN 1024 +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define HTOLE32(x) __builtin_bswap32(x) +#define HTOLE16(x) __builtin_bswap16(x) +#define LE32TOH(x) __builtin_bswap32(x) +#define LE16TOH(x) __builtin_bswap16(x) +#else +#define HTOLE32(x) (x) +#define HTOLE16(x) (x) +#define LE32TOH(x) (x) +#define LE16TOH(x) (x) +#endif + #define BIT(x) (1 << (x)) #define PSX_GPU_STATUS_DHEIGHT BIT(19) From 96c6ec7055ecef55b3dd221c86b796512bf52107 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 26 May 2022 14:07:41 +0100 Subject: [PATCH 158/597] misc: Use GCC builtins for byte-swap operations Instead of using custom code to byte-swap values, use the built-in function provided by GCC. Signed-off-by: Paul Cercueil --- libpcsxcore/misc.c | 9 +-------- libpcsxcore/psxmem.h | 7 ++----- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index b2dd9a127..7aa4fef24 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -58,14 +58,7 @@ struct iso_directory_record { void mmssdd( char *b, char *p ) { int m, s, d; -#if defined(__arm__) - unsigned char *u = (void *)b; - int block = (u[3] << 24) | (u[2] << 16) | (u[1] << 8) | u[0]; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - int block = (b[0] & 0xff) | ((b[1] & 0xff) << 8) | ((b[2] & 0xff) << 16) | (b[3] << 24); -#else - int block = *((int*)b); -#endif + int block = SWAP32(*((uint32_t*) b)); block += 150; m = block / 4500; // minutes diff --git a/libpcsxcore/psxmem.h b/libpcsxcore/psxmem.h index 3d5317c18..ec4b970a6 100644 --- a/libpcsxcore/psxmem.h +++ b/libpcsxcore/psxmem.h @@ -28,11 +28,8 @@ extern "C" { #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define _SWAP16(b) ((((unsigned char *)&(b))[0] & 0xff) | (((unsigned char *)&(b))[1] & 0xff) << 8) -#define _SWAP32(b) ((((unsigned char *)&(b))[0] & 0xff) | ((((unsigned char *)&(b))[1] & 0xff) << 8) | ((((unsigned char *)&(b))[2] & 0xff) << 16) | (((unsigned char *)&(b))[3] << 24)) - -#define SWAP16(v) ((((v) & 0xff00) >> 8) +(((v) & 0xff) << 8)) -#define SWAP32(v) ((((v) & 0xff000000ul) >> 24) + (((v) & 0xff0000ul) >> 8) + (((v) & 0xff00ul)<<8) +(((v) & 0xfful) << 24)) +#define SWAP16(v) __builtin_bswap16(v) +#define SWAP32(v) __builtin_bswap32(v) #define SWAPu32(v) SWAP32((u32)(v)) #define SWAPs32(v) SWAP32((s32)(v)) From ae8f89db2a4746d6119aca9359bdee75ad122fa0 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sat, 21 May 2022 18:58:14 +0100 Subject: [PATCH 159/597] frontend: Fix colorspace conversion routines on big-endian The bgr555_to_rgb565() and bgr888_to_rgb565() functions were only working correctly on little-endian systems. Signed-off-by: Paul Cercueil --- frontend/cspace.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/frontend/cspace.c b/frontend/cspace.c index 33a981df2..f60026c5c 100644 --- a/frontend/cspace.c +++ b/frontend/cspace.c @@ -15,20 +15,29 @@ * in favor of NEON version or platform-specific conversion */ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define SWAP16(x) __builtin_bswap16(x) +#define LE16TOHx2(x) ((SWAP16((x) >> 16) << 16) | SWAP16(x)) +#else +#define LE16TOHx2(x) (x) +#endif + #ifndef __arm__ void bgr555_to_rgb565(void *dst_, const void *src_, int bytes) { const unsigned int *src = src_; unsigned int *dst = dst_; - unsigned int p; - int x; + unsigned int x, p, r, g, b; for (x = 0; x < bytes / 4; x++) { - p = src[x]; - p = ((p & 0x7c007c00) >> 10) | ((p & 0x03e003e0) << 1) - | ((p & 0x001f001f) << 11); - dst[x] = p; + p = LE16TOHx2(src[x]); + + r = (p & 0x001f001f) << 11; + g = (p & 0x03e003e0) << 1; + b = (p & 0x7c007c00) >> 10; + + dst[x] = r | g | b; } } @@ -49,8 +58,13 @@ void bgr888_to_rgb565(void *dst_, const void *src_, int bytes) r2 = src[3] & 0xf8; g2 = src[4] & 0xfc; b2 = src[5] & 0xf8; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + *dst = (r1 << 24) | (g1 << 19) | (b1 << 13) | + (r2 << 8) | (g2 << 3) | (b2 >> 3); +#else *dst = (r2 << 24) | (g2 << 19) | (b2 << 13) | (r1 << 8) | (g1 << 3) | (b1 >> 3); +#endif } } From ae097dfb64926c50902b08b681cbf805b98e3751 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sat, 21 May 2022 17:52:47 +0100 Subject: [PATCH 160/597] dfxvideo: Restore support for big-endian arch The hardware registers, emulated RAM or emulated VRAM are all in little-endian format. Therefore, each access must be done with one of the byte-swap macros, for the plugin to work on big-endian systems. Signed-off-by: Paul Cercueil --- plugins/dfxvideo/gpu.c | 2 +- plugins/dfxvideo/gpu.h | 6 +++--- plugins/dfxvideo/gpulib_if.c | 18 +++++++++--------- plugins/dfxvideo/prim.c | 2 +- plugins/dfxvideo/soft.c | 16 ++++++++-------- plugins/gpulib/gpu.c | 7 +++++-- 6 files changed, 27 insertions(+), 24 deletions(-) diff --git a/plugins/dfxvideo/gpu.c b/plugins/dfxvideo/gpu.c index 649cb429e..9356a6e93 100644 --- a/plugins/dfxvideo/gpu.c +++ b/plugins/dfxvideo/gpu.c @@ -985,7 +985,7 @@ void CALLBACK GPUwriteDataMem(uint32_t * pMem, int iSize) if((gpuDataC==254 && gpuDataP>=3) || (gpuDataC==255 && gpuDataP>=4 && !(gpuDataP&1))) { - if((gpuDataM[gpuDataP] & 0xF000F000) == 0x50005000) + if((gpuDataM[gpuDataP] & HOST2LE32(0xF000F000)) == HOST2LE32(0x50005000)) gpuDataP=gpuDataC-1; } } diff --git a/plugins/dfxvideo/gpu.h b/plugins/dfxvideo/gpu.h index 25fcc3ce8..7b5aaef00 100644 --- a/plugins/dfxvideo/gpu.h +++ b/plugins/dfxvideo/gpu.h @@ -69,8 +69,8 @@ // byteswappings -#define SWAP16(x) ({ uint16_t y=(x); (((y)>>8 & 0xff) | ((y)<<8 & 0xff00)); }) -#define SWAP32(x) ({ uint32_t y=(x); (((y)>>24 & 0xfful) | ((y)>>8 & 0xff00ul) | ((y)<<8 & 0xff0000ul) | ((y)<<24 & 0xff000000ul)); }) +#define SWAP16(x) __builtin_bswap16(x) +#define SWAP32(x) __builtin_bswap32(x) #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ @@ -107,7 +107,7 @@ #define GETLE32_(X) LE2HOST32(*(uint32_t *)X) #define GETLE16D(X) ({uint32_t val = GETLE32(X); (val<<16 | val >> 16);}) #define PUTLE16(X, Y) do{*((uint16_t *)X)=HOST2LE16((uint16_t)Y);}while(0) -#define PUTLE32_(X, Y) do{*((uint32_t *)X)=HOST2LE16((uint32_t)Y);}while(0) +#define PUTLE32_(X, Y) do{*((uint32_t *)X)=HOST2LE32((uint32_t)Y);}while(0) #ifdef __arm__ #define GETLE32(X) (*(uint16_t *)(X)|(((uint16_t *)(X))[1]<<16)) #define PUTLE32(X, Y) do{uint16_t *p_=(uint16_t *)(X);uint32_t y_=Y;p_[0]=y_;p_[1]=y_>>16;}while(0) diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index d7d69a765..c4e4cfeab 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -37,8 +37,8 @@ // byteswappings -#define SWAP16(x) ({ uint16_t y=(x); (((y)>>8 & 0xff) | ((y)<<8 & 0xff00)); }) -#define SWAP32(x) ({ uint32_t y=(x); (((y)>>24 & 0xfful) | ((y)>>8 & 0xff00ul) | ((y)<<8 & 0xff0000ul) | ((y)<<24 & 0xff000000ul)); }) +#define SWAP16(x) __builtin_bswap16(x) +#define SWAP32(x) __builtin_bswap32(x) #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ @@ -75,7 +75,7 @@ #define GETLE32_(X) LE2HOST32(*(uint32_t *)X) #define GETLE16D(X) ({uint32_t val = GETLE32(X); (val<<16 | val >> 16);}) #define PUTLE16(X, Y) do{*((uint16_t *)X)=HOST2LE16((uint16_t)Y);}while(0) -#define PUTLE32_(X, Y) do{*((uint32_t *)X)=HOST2LE16((uint32_t)Y);}while(0) +#define PUTLE32_(X, Y) do{*((uint32_t *)X)=HOST2LE32((uint32_t)Y);}while(0) #ifdef __arm__ #define GETLE32(X) (*(uint16_t *)(X)|(((uint16_t *)(X))[1]<<16)) #define PUTLE32(X, Y) do{uint16_t *p_=(uint16_t *)(X);uint32_t y_=Y;p_[0]=y_;p_[1]=y_>>16;}while(0) @@ -315,7 +315,7 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) for (; list < list_end; list += 1 + len) { - cmd = *list >> 24; + cmd = GETLE32(list) >> 24; len = cmd_lengths[cmd]; if (list + 1 + len > list_end) { cmd = -1; @@ -326,7 +326,7 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) if (cmd == 0xa0 || cmd == 0xc0) break; // image i/o, forward to upper layer else if ((cmd & 0xf8) == 0xe0) - gpu.ex_regs[cmd & 7] = list[0]; + gpu.ex_regs[cmd & 7] = GETLE32(list); #endif primTableJ[cmd]((void *)list); @@ -345,7 +345,7 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) goto breakloop; } - if((*list_position & 0xf000f000) == 0x50005000) + if((*list_position & HOST2LE32(0xf000f000)) == HOST2LE32(0x50005000)) break; list_position++; @@ -368,7 +368,7 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) goto breakloop; } - if((*list_position & 0xf000f000) == 0x50005000) + if((*list_position & HOST2LE32(0xf000f000)) == HOST2LE32(0x50005000)) break; list_position += 2; @@ -383,8 +383,8 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) case 0xA0: // sys -> vid { short *slist = (void *)list; - u32 load_width = slist[4]; - u32 load_height = slist[5]; + u32 load_width = LE2HOST32(slist[4]); + u32 load_height = LE2HOST32(slist[5]); u32 load_size = load_width * load_height; len += load_size / 2; diff --git a/plugins/dfxvideo/prim.c b/plugins/dfxvideo/prim.c index 7a71d4c81..c872c284d 100644 --- a/plugins/dfxvideo/prim.c +++ b/plugins/dfxvideo/prim.c @@ -1240,7 +1240,7 @@ static void primLineGEx(unsigned char *baseAddr) sly1=(short)(((int)sly1<>SIGNSHIFT); } - lc1 = gpuData[0] & 0xffffff; + lc1 = GETLE32(&gpuData[0]) & 0xffffff; DrawSemiTrans = (SEMITRANSBIT(GETLE32(&gpuData[0]))) ? TRUE : FALSE; diff --git a/plugins/dfxvideo/soft.c b/plugins/dfxvideo/soft.c index c1c3beff6..70cf50cd2 100644 --- a/plugins/dfxvideo/soft.c +++ b/plugins/dfxvideo/soft.c @@ -292,7 +292,7 @@ static inline void GetShadeTransCol32(uint32_t * pdest,uint32_t color) { int32_t sr,sb,sg,src,sbc,sgc,c; src=XCOL1(color);sbc=XCOL2(color);sgc=XCOL3(color); - c=GETLE32(pdest)>>16; + c=HIWORD(GETLE32(pdest)); sr=(XCOL1(c))-src; if(sr&0x8000) sr=0; sb=(XCOL2(c))-sbc; if(sb&0x8000) sb=0; sg=(XCOL3(c))-sgc; if(sg&0x8000) sg=0; @@ -327,8 +327,8 @@ static inline void GetShadeTransCol32(uint32_t * pdest,uint32_t color) { uint32_t ma=GETLE32(pdest); PUTLE32(pdest, (X32PSXCOL(r,g,b))|lSetMask);//0x80008000; - if(ma&0x80000000) PUTLE32(pdest, (ma&0xFFFF0000)|(*pdest&0xFFFF)); - if(ma&0x00008000) PUTLE32(pdest, (ma&0xFFFF) |(*pdest&0xFFFF0000)); + if(ma&0x80000000) PUTLE32(pdest, (ma&0xFFFF0000)|(GETLE32(pdest)&0xFFFF)); + if(ma&0x00008000) PUTLE32(pdest, (ma&0xFFFF) |(GETLE32(pdest)&0xFFFF0000)); return; } PUTLE32(pdest, (X32PSXCOL(r,g,b))|lSetMask);//0x80008000; @@ -950,7 +950,7 @@ static void FillSoftwareAreaTrans(short x0,short y0,short x1, // FILL AREA TRANS { static int iCheat=0; col+=iCheat; - if(iCheat==1) iCheat=0; else iCheat=1; + iCheat ^= 1; } @@ -971,7 +971,7 @@ static void FillSoftwareAreaTrans(short x0,short y0,short x1, // FILL AREA TRANS { uint32_t *DSTPtr; unsigned short LineOffset; - uint32_t lcol=lSetMask|(((uint32_t)(col))<<16)|col; + uint32_t lcol = HOST2LE32(lSetMask | (((uint32_t)(col)) << 16) | col); dx>>=1; DSTPtr = (uint32_t *)(psxVuw + (1024*y0) + x0); LineOffset = 512 - dx; @@ -980,7 +980,7 @@ static void FillSoftwareAreaTrans(short x0,short y0,short x1, // FILL AREA TRANS { for(i=0;i>=1; DSTPtr = (uint32_t *)(psxVuw + (1024*y0) + x0); LineOffset = 512 - dx; for(i=0;i Date: Tue, 24 May 2022 22:22:48 +0100 Subject: [PATCH 161/597] cdrom: Fix PBP support on big-endian platforms The data contained in the PBP is in little-endian format. Therefore, everything in the PBP's header must be read with the byte-swap macros, for PBP support to work on big-endian systems. Signed-off-by: Paul Cercueil --- libpcsxcore/cdriso.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index 9d74ef46a..be7300f5a 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -680,6 +680,7 @@ static int handlepbp(const char *isofile) { off_t psisoimg_offs, cdimg_base; unsigned int t, cd_length; unsigned int offsettab[8]; + unsigned int psar_offs, index_entry_size, index_entry_offset; const char *ext = NULL; int i, ret; @@ -698,21 +699,23 @@ static int handlepbp(const char *isofile) { goto fail_io; } - ret = fseeko(cdHandle, pbp_hdr.psar_offs, SEEK_SET); + psar_offs = SWAP32(pbp_hdr.psar_offs); + + ret = fseeko(cdHandle, psar_offs, SEEK_SET); if (ret != 0) { - SysPrintf("failed to seek to %x\n", pbp_hdr.psar_offs); + SysPrintf("failed to seek to %x\n", psar_offs); goto fail_io; } - psisoimg_offs = pbp_hdr.psar_offs; + psisoimg_offs = psar_offs; if (fread(psar_sig, 1, sizeof(psar_sig), cdHandle) != sizeof(psar_sig)) goto fail_io; psar_sig[10] = 0; if (strcmp(psar_sig, "PSTITLEIMG") == 0) { // multidisk image? - ret = fseeko(cdHandle, pbp_hdr.psar_offs + 0x200, SEEK_SET); + ret = fseeko(cdHandle, psar_offs + 0x200, SEEK_SET); if (ret != 0) { - SysPrintf("failed to seek to %x\n", pbp_hdr.psar_offs + 0x200); + SysPrintf("failed to seek to %x\n", psar_offs + 0x200); goto fail_io; } @@ -734,7 +737,7 @@ static int handlepbp(const char *isofile) { if (cdrIsoMultidiskSelect >= cdrIsoMultidiskCount) cdrIsoMultidiskSelect = 0; - psisoimg_offs += offsettab[cdrIsoMultidiskSelect]; + psisoimg_offs += SWAP32(offsettab[cdrIsoMultidiskSelect]); ret = fseeko(cdHandle, psisoimg_offs, SEEK_SET); if (ret != 0) { @@ -818,12 +821,15 @@ static int handlepbp(const char *isofile) { goto fail_index; } - if (index_entry.size == 0) + index_entry_size = SWAP32(index_entry.size); + index_entry_offset = SWAP32(index_entry.offset); + + if (index_entry_size == 0) break; - compr_img->index_table[i] = cdimg_base + index_entry.offset; + compr_img->index_table[i] = cdimg_base + index_entry_offset; } - compr_img->index_table[i] = cdimg_base + index_entry.offset + index_entry.size; + compr_img->index_table[i] = cdimg_base + index_entry_offset + index_entry_size; return 0; From 3d037671fa024a16ea55f029b5c83cca8021f9ea Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 29 Apr 2016 22:06:36 +0200 Subject: [PATCH 162/597] configure: Detect the toolchain's sdl-config tool Instead of calling the host's sdl-config, which doesn't work when cross-compiling, call the sdl-config program that's installed in the compiler's sysroot. Signed-off-by: Paul Cercueil --- configure | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/configure b/configure index 1df9aac08..cb124ac6d 100755 --- a/configure +++ b/configure @@ -72,6 +72,9 @@ AR="${AS-${CROSS_COMPILE}ar}" MAIN_LDLIBS="$LDLIBS -ldl -lm -lpthread" config_mak="config.mak" +SYSROOT="$(${CC} --print-sysroot)" +[ "x${SDL_CONFIG}" = "x" ] && SDL_CONFIG="${SYSROOT}/usr/bin/sdl-config" + fail() { echo "$@" @@ -398,7 +401,7 @@ if [ "x$sound_drivers" = "x" ]; then sound_drivers="$sound_drivers pulseaudio" MAIN_LDLIBS="-lpulse $MAIN_LDLIBS" fi - if [ "$need_sdl" = "yes" ] || check_sdl `sdl-config --cflags --libs`; then + if [ "$need_sdl" = "yes" ] || check_sdl `${SDL_CONFIG} --cflags --libs`; then sound_drivers="$sound_drivers sdl" need_sdl="yes" fi @@ -417,10 +420,10 @@ else fi if [ "$need_sdl" = "yes" ]; then - which sdl-config > /dev/null || \ + which ${SDL_CONFIG} > /dev/null || \ fail "sdl-config is missing; please install libsdl (libsdl1.2-dev)" - CFLAGS="$CFLAGS `sdl-config --cflags`" - MAIN_LDLIBS="`sdl-config --libs` $MAIN_LDLIBS" + CFLAGS="$CFLAGS `${SDL_CONFIG} --cflags`" + MAIN_LDLIBS="`${SDL_CONFIG} --libs` $MAIN_LDLIBS" check_sdl || fail "please install libsdl (libsdl1.2-dev)" fi From d2bbb7d055e9a403bdcde044be8762be57f49b12 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 3 Jun 2022 18:26:28 +0100 Subject: [PATCH 163/597] CI: Pass SDL_CONFIG to the ./configure script Signed-off-by: Paul Cercueil --- .github/workflows/ci-linux-arm64.yml | 2 +- .github/workflows/ci-linux-armhf.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-linux-arm64.yml b/.github/workflows/ci-linux-arm64.yml index de2f84cee..1666b9760 100644 --- a/.github/workflows/ci-linux-arm64.yml +++ b/.github/workflows/ci-linux-arm64.yml @@ -17,6 +17,6 @@ jobs: sudo apt-get install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu .github/extract-foreign-all.sh arm64 - name: configure - run: DUMP_CONFIG_LOG=1 CROSS_COMPILE=aarch64-linux-gnu- PATH=$PATH:usr/bin CFLAGS='-Iusr/include/ -Iusr/include/SDL' LDFLAGS='-Lusr/lib/aarch64-linux-gnu/ -Llib/aarch64-linux-gnu/ -Wl,-rpath-link=lib/aarch64-linux-gnu/,-rpath-link=usr/lib/aarch64-linux-gnu/,-rpath-link=usr/lib/aarch64-linux-gnu/pulseaudio/' ./configure + run: DUMP_CONFIG_LOG=1 CROSS_COMPILE=aarch64-linux-gnu- SDL_CONFIG=usr/bin/sdl-config PATH=$PATH:usr/bin CFLAGS='-Iusr/include/ -Iusr/include/SDL' LDFLAGS='-Lusr/lib/aarch64-linux-gnu/ -Llib/aarch64-linux-gnu/ -Wl,-rpath-link=lib/aarch64-linux-gnu/,-rpath-link=usr/lib/aarch64-linux-gnu/,-rpath-link=usr/lib/aarch64-linux-gnu/pulseaudio/' ./configure - name: make run: make diff --git a/.github/workflows/ci-linux-armhf.yml b/.github/workflows/ci-linux-armhf.yml index 0842bf36d..50858959f 100644 --- a/.github/workflows/ci-linux-armhf.yml +++ b/.github/workflows/ci-linux-armhf.yml @@ -17,6 +17,6 @@ jobs: sudo apt-get install -y gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf .github/extract-foreign-all.sh armhf - name: configure - run: DUMP_CONFIG_LOG=1 CROSS_COMPILE=arm-linux-gnueabihf- PATH=$PATH:usr/bin CFLAGS='-Iusr/include/ -Iusr/include/SDL' LDFLAGS='-Lusr/lib/arm-linux-gnueabihf/ -Llib/arm-linux-gnueabihf/ -Wl,-rpath-link=lib/arm-linux-gnueabihf/,-rpath-link=usr/lib/arm-linux-gnueabihf/,-rpath-link=usr/lib/arm-linux-gnueabihf/pulseaudio/' ./configure + run: DUMP_CONFIG_LOG=1 CROSS_COMPILE=arm-linux-gnueabihf- SDL_CONFIG=usr/bin/sdl-config PATH=$PATH:usr/bin CFLAGS='-Iusr/include/ -Iusr/include/SDL' LDFLAGS='-Lusr/lib/arm-linux-gnueabihf/ -Llib/arm-linux-gnueabihf/ -Wl,-rpath-link=lib/arm-linux-gnueabihf/,-rpath-link=usr/lib/arm-linux-gnueabihf/,-rpath-link=usr/lib/arm-linux-gnueabihf/pulseaudio/' ./configure - name: make run: make From 0a371c2c4a1097bbbe888ed00d5f2d1d1fafa1f5 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 30 Jul 2022 01:07:21 +0300 Subject: [PATCH 164/597] dfxvideo: patch up some unsafe macros (old?)-ARM version of GETLE32() was causing wrong shifting due to implicit signed int promotion. libretro/pcsx_rearmed#676 --- plugins/dfxvideo/gpu.h | 18 +++++++++--------- plugins/dfxvideo/gpulib_if.c | 18 +++++++++--------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/plugins/dfxvideo/gpu.h b/plugins/dfxvideo/gpu.h index 7b5aaef00..745569288 100644 --- a/plugins/dfxvideo/gpu.h +++ b/plugins/dfxvideo/gpu.h @@ -64,6 +64,7 @@ #include #include #include +#include "../../include/arm_features.h" ///////////////////////////////////////////////////////////////////////////// @@ -100,16 +101,15 @@ #endif -#define GETLEs16(X) ((int16_t)GETLE16((uint16_t *)X)) -#define GETLEs32(X) ((int16_t)GETLE32((uint16_t *)X)) +#define GETLEs16(X) ((int16_t)GETLE16((uint16_t *)(X))) -#define GETLE16(X) LE2HOST16(*(uint16_t *)X) -#define GETLE32_(X) LE2HOST32(*(uint32_t *)X) -#define GETLE16D(X) ({uint32_t val = GETLE32(X); (val<<16 | val >> 16);}) -#define PUTLE16(X, Y) do{*((uint16_t *)X)=HOST2LE16((uint16_t)Y);}while(0) -#define PUTLE32_(X, Y) do{*((uint32_t *)X)=HOST2LE32((uint32_t)Y);}while(0) -#ifdef __arm__ -#define GETLE32(X) (*(uint16_t *)(X)|(((uint16_t *)(X))[1]<<16)) +#define GETLE16(X) LE2HOST16(*(uint16_t *)(X)) +#define GETLE32_(X) LE2HOST32(*(uint32_t *)(X)) +#define PUTLE16(X, Y) do{*((uint16_t *)(X))=HOST2LE16((uint16_t)(Y));}while(0) +#define PUTLE32_(X, Y) do{*((uint32_t *)(X))=HOST2LE32((uint32_t)(Y));}while(0) +#if defined(__arm__) && !defined(HAVE_ARMV6) +// for (very) old ARMs with no unaligned loads? +#define GETLE32(X) (*(uint16_t *)(X)|((uint32_t)((uint16_t *)(X))[1]<<16)) #define PUTLE32(X, Y) do{uint16_t *p_=(uint16_t *)(X);uint32_t y_=Y;p_[0]=y_;p_[1]=y_>>16;}while(0) #else #define GETLE32 GETLE32_ diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index c4e4cfeab..47cccedf0 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -16,6 +16,7 @@ #include #include #include "../gpulib/gpu.h" +#include "../../include/arm_features.h" #define u32 uint32_t @@ -68,16 +69,15 @@ #endif -#define GETLEs16(X) ((int16_t)GETLE16((uint16_t *)X)) -#define GETLEs32(X) ((int16_t)GETLE32((uint16_t *)X)) +#define GETLEs16(X) ((int16_t)GETLE16((uint16_t *)(X))) -#define GETLE16(X) LE2HOST16(*(uint16_t *)X) -#define GETLE32_(X) LE2HOST32(*(uint32_t *)X) -#define GETLE16D(X) ({uint32_t val = GETLE32(X); (val<<16 | val >> 16);}) -#define PUTLE16(X, Y) do{*((uint16_t *)X)=HOST2LE16((uint16_t)Y);}while(0) -#define PUTLE32_(X, Y) do{*((uint32_t *)X)=HOST2LE32((uint32_t)Y);}while(0) -#ifdef __arm__ -#define GETLE32(X) (*(uint16_t *)(X)|(((uint16_t *)(X))[1]<<16)) +#define GETLE16(X) LE2HOST16(*(uint16_t *)(X)) +#define GETLE32_(X) LE2HOST32(*(uint32_t *)(X)) +#define PUTLE16(X, Y) do{*((uint16_t *)(X))=HOST2LE16((uint16_t)(Y));}while(0) +#define PUTLE32_(X, Y) do{*((uint32_t *)(X))=HOST2LE32((uint32_t)(Y));}while(0) +#if defined(__arm__) && !defined(HAVE_ARMV6) +// for (very) old ARMs with no unaligned loads? +#define GETLE32(X) (*(uint16_t *)(X)|((uint32_t)((uint16_t *)(X))[1]<<16)) #define PUTLE32(X, Y) do{uint16_t *p_=(uint16_t *)(X);uint32_t y_=Y;p_[0]=y_;p_[1]=y_>>16;}while(0) #else #define GETLE32 GETLE32_ From 3f0189c68167c173b65b32963b0cf500d0d149b6 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 24 Jul 2022 01:31:08 +0300 Subject: [PATCH 165/597] gpu_neon: try to make the compiler save some callee-save regs ... which the asm isn't doing properly --- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 54 ++++++++++++++++++++- plugins/gpu_neon/psx_gpu_if.c | 9 ++++ 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index c0199a08a..d6907e4c6 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -34,6 +34,16 @@ .syntax unified .text +#if 0 +#define save_abi_regs() \ + vpush {q4-q7} +#define restore_abi_regs() \ + vpop {q4-q7} +#else +#define save_abi_regs() +#define restore_abi_regs() +#endif + #define psx_gpu r0 #define v_a r1 #define v_b r2 @@ -233,6 +243,7 @@ function(compute_all_gradients) @ r12 = psx_gpu->triangle_area ldr r12, [psx_gpu, #psx_gpu_triangle_area_offset] stmdb sp!, { r4 - r11, lr } + save_abi_regs() @ load exponent of 62 into upper half of double movw r4, #0 @@ -448,6 +459,7 @@ function(compute_all_gradients) stmia store_b, { g_bx0, g_bx, g_bx2, g_bx3, b_base, g_by } + restore_abi_regs() ldmia sp!, { r4 - r11, pc } @@ -578,6 +590,7 @@ function(compute_all_gradients) #define setup_spans_prologue() \ stmdb sp!, { r4 - r11, lr }; \ + save_abi_regs(); \ \ ldrsh x_a, [v_a, #8]; \ ldrsh x_b, [v_b, #8]; \ @@ -974,6 +987,7 @@ function(compute_all_gradients) #define setup_spans_epilogue() \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc } \ @@ -1348,6 +1362,7 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ bxeq lr; \ \ stmdb sp!, { r4 - r11, r14 }; \ + save_abi_regs(); \ vshl.u32 uvrg_dx4, uvrg_dx, #2; \ \ ldr b_dx, [psx_gpu, #psx_gpu_b_dx_offset]; \ @@ -1577,6 +1592,7 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ bne 0b; \ \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc }; \ \ 2: \ @@ -1617,6 +1633,7 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ bxeq lr; \ \ stmdb sp!, { r4 - r11, r14 }; \ + save_abi_regs(); \ vshl.u32 uvrg_dx4, uvrg_dx, #2; \ \ vshl.u32 uvrg_dx8, uvrg_dx, #3; \ @@ -1774,6 +1791,7 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ bne 0b; \ \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc }; \ \ 2: \ @@ -1810,6 +1828,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect) bxeq lr stmdb sp!, { r4 - r11, r14 } + save_abi_regs() vld1.u32 { test_mask }, [psx_gpu, :128] ldr color, [psx_gpu, #psx_gpu_triangle_color_offset] @@ -1892,6 +1911,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect) strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] bne 0b + restore_abi_regs() ldmia sp!, { r4 - r11, pc } 2: @@ -2114,6 +2134,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect) \ bxeq lr; \ \ stmdb sp!, { r4 - r11, r14 }; \ + save_abi_regs(); \ vshl.u32 rg_dx4, rg_dx, #2; \ \ ldr b_dx, [psx_gpu, #psx_gpu_b_dx_offset]; \ @@ -2306,6 +2327,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect) \ strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ bne 0b; \ \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc }; \ \ 2: \ @@ -2357,6 +2379,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct) \ bxeq lr; \ \ stmdb sp!, { r4 - r11, r14 }; \ + save_abi_regs(); \ vshl.u32 rg_dx4, rg_dx, #2; \ \ ldr b_dx, [psx_gpu, #psx_gpu_b_dx_offset]; \ @@ -2577,6 +2600,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct) \ \ bne 0b; \ \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc } \ setup_blocks_shaded_untextured_direct_builder(undithered) @@ -3152,6 +3176,7 @@ function(texture_blocks_16bpp) function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \ shade_blocks_textured_modulated_prologue_##shading(dithering, target); \ stmdb sp!, { r4 - r5, lr }; \ + save_abi_regs(); \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ vld1.u32 { test_mask }, [psx_gpu, :128]; \ @@ -3267,6 +3292,7 @@ function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \ shade_blocks_textured_modulated_store_draw_mask_##target(28); \ shade_blocks_textured_modulated_store_pixels_##target(); \ \ + restore_abi_regs(); \ ldmia sp!, { r4 - r5, pc } \ @@ -3332,7 +3358,8 @@ shade_blocks_textured_modulated_builder(unshaded, undithered, indirect); .align 3 function(shade_blocks_textured_unmodulated_indirect) - str r14, [sp, #-4] + stmdb sp!, { r4, r14 } + save_abi_regs() add draw_mask_bits_ptr, psx_gpu, #(psx_gpu_blocks_offset + 40) ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] @@ -3375,13 +3402,15 @@ function(shade_blocks_textured_unmodulated_indirect) vorr.u16 draw_mask_combined, draw_mask, zero_mask vst1.u32 { draw_mask_combined }, [draw_mask_store_ptr, :128], c_64 - ldr pc, [sp, #-4] + restore_abi_regs() + ldmia sp!, { r4, pc } .align 3 function(shade_blocks_textured_unmodulated_direct) stmdb sp!, { r4, r14 } + save_abi_regs() add draw_mask_bits_ptr, psx_gpu, #(psx_gpu_blocks_offset + 40) ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] @@ -3443,6 +3472,7 @@ function(shade_blocks_textured_unmodulated_direct) vst1.u16 { fb_pixels_next }, [fb_ptr_next] + restore_abi_regs() ldmia sp!, { r4, pc } 4: @@ -3462,6 +3492,7 @@ function(shade_blocks_unshaded_untextured_indirect) function(shade_blocks_unshaded_untextured_direct) stmdb sp!, { r4, r14 } + save_abi_regs() add draw_mask_ptr, psx_gpu, #psx_gpu_blocks_offset ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] @@ -3508,6 +3539,7 @@ function(shade_blocks_unshaded_untextured_direct) vbif.u16 fb_pixels_next, pixels, draw_mask vst1.u16 { fb_pixels_next }, [fb_ptr_next] + restore_abi_regs() ldmia sp!, { r4, pc } 4: @@ -3613,6 +3645,7 @@ function(shade_blocks_unshaded_untextured_direct) \ function(blend_blocks_##texturing##_average_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -3694,6 +3727,7 @@ function(blend_blocks_##texturing##_average_##mask_evaluate) \ vbif.u16 fb_pixels_next, blend_pixels, draw_mask_next; \ vst1.u16 { fb_pixels_next }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -3732,6 +3766,7 @@ blend_blocks_average_builder(untextured, on) \ function(blend_blocks_textured_add_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -3817,6 +3852,7 @@ function(blend_blocks_textured_add_##mask_evaluate) \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -3836,6 +3872,7 @@ function(blend_blocks_textured_add_##mask_evaluate) \ \ function(blend_blocks_untextured_add_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -3911,6 +3948,7 @@ function(blend_blocks_untextured_add_##mask_evaluate) \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -3968,6 +4006,7 @@ blend_blocks_add_untextured_builder(on) \ function(blend_blocks_##texturing##_subtract_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -4043,6 +4082,7 @@ function(blend_blocks_##texturing##_subtract_##mask_evaluate) \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -4067,6 +4107,7 @@ blend_blocks_subtract_builder(untextured, on) \ function(blend_blocks_textured_add_fourth_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -4152,6 +4193,7 @@ function(blend_blocks_textured_add_fourth_##mask_evaluate) \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -4171,6 +4213,7 @@ function(blend_blocks_textured_add_fourth_##mask_evaluate) \ \ function(blend_blocks_untextured_add_fourth_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -4250,6 +4293,7 @@ function(blend_blocks_untextured_add_fourth_##mask_evaluate) \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -4275,6 +4319,7 @@ blend_blocks_add_fourth_untextured_builder(on) function(blend_blocks_textured_unblended_on) stmdb sp!, { r4, r14 } + save_abi_regs() add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] @@ -4314,6 +4359,7 @@ function(blend_blocks_textured_unblended_on) vbif.u16 fb_pixels, pixels, draw_mask vst1.u16 { fb_pixels }, [fb_ptr] + restore_abi_regs() ldmia sp!, { r4, pc } @@ -4783,6 +4829,7 @@ setup_sprite_update_texture_8bpp_cache: setup_sprite_setup_left_draw_mask_fb_ptr##x4mode(); \ \ setup_sprite_tile_column_height_##multi_height(edge_mode, edge, tm, x4mode); \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc } \ #define setup_sprite_tiled_advance_column() \ @@ -4819,6 +4866,7 @@ setup_sprite_update_texture_8bpp_cache: \ setup_sprite_tiled_advance_column(); \ setup_sprite_tile_column_height_##multi_height(right_mode, left, tm, x4mode);\ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc } \ @@ -5177,6 +5225,8 @@ function(setup_sprite_##texture_mode##x4mode) \ ldr height, [sp, #44]; \ add fb_ptr, fb_ptr, y, lsl #11; \ \ + save_abi_regs(); \ + \ add fb_ptr, fb_ptr, x, lsl #1; \ and offset_v, v, #0xF; \ \ diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index bb8bea0ad..353b603ce 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -31,11 +31,20 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd) { int ret; +#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD) + // the asm doesn't bother to save callee-save vector regs, so do it here + __asm__ __volatile__("":::"q4","q5","q6","q7"); +#endif + if (gpu.state.enhancement_active) ret = gpu_parse_enhanced(&egpu, list, count * 4, (u32 *)last_cmd); else ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd); +#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD) + __asm__ __volatile__("":::"q4","q5","q6","q7"); +#endif + ex_regs[1] &= ~0x1ff; ex_regs[1] |= egpu.texture_settings & 0x1ff; return ret; From aaece50824568f63d6993b6db6a1ea659ac5c7da Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 3 Aug 2022 00:51:35 +0300 Subject: [PATCH 166/597] drc: patch up some potential issues on the switch --- libpcsxcore/new_dynarec/assem_arm64.c | 41 +++++++++++++++++++-------- libpcsxcore/new_dynarec/new_dynarec.c | 2 ++ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index b2b8110d2..6f108bf82 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -399,6 +399,27 @@ static void emit_movimm(u_int imm, u_int rt) } } +static void emit_movimm64(uint64_t imm, u_int rt) +{ + u_int shift, op, imm16, insns = 0; + for (shift = 0; shift < 4; shift++) { + imm16 = (imm >> shift * 16) & 0xffff; + if (!imm16) + continue; + op = insns ? 0xf2800000 : 0xd2800000; + assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16); + if (shift) + assem_debug(",lsl #%u", shift * 16); + assem_debug("\n"); + output_w32(op | (shift << 21) | imm16_rd(imm16, rt)); + insns++; + } + if (!insns) { + assem_debug("movz %s,#0\n", regname64[rt]); + output_w32(0xd2800000 | imm16_rd(0, rt)); + } +} + static void emit_readword(void *addr, u_int rt) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; @@ -1329,16 +1350,7 @@ static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int r } // just move the whole thing. At least on Linux all addresses // seem to be 48bit, so 3 insns - not great not terrible - assem_debug("movz %s,#%#lx\n", regname64[rt], rt_val & 0xffff); - output_w32(0xd2800000 | imm16_rd(rt_val & 0xffff, rt)); - assem_debug("movk %s,#%#lx,lsl #16\n", regname64[rt], (rt_val >> 16) & 0xffff); - output_w32(0xf2a00000 | imm16_rd((rt_val >> 16) & 0xffff, rt)); - assem_debug("movk %s,#%#lx,lsl #32\n", regname64[rt], (rt_val >> 32) & 0xffff); - output_w32(0xf2c00000 | imm16_rd((rt_val >> 32) & 0xffff, rt)); - if (rt_val >> 48) { - assem_debug("movk %s,#%#lx,lsl #48\n", regname64[rt], (rt_val >> 48) & 0xffff); - output_w32(0xf2e00000 | imm16_rd((rt_val >> 48) & 0xffff, rt)); - } + emit_movimm64(rt_val, rt); } // trashes x2 @@ -1513,8 +1525,13 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, emit_addimm(cc<0?2:cc,adj,2); if(is_dynamic) { uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1; - emit_adrp((void *)l1, 1); - emit_addimm64(1, l1 & 0xfff, 1); + intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl); + if (-4294967296l <= offset && offset < 4294967296l) { + emit_adrp((void *)l1, 1); + emit_addimm64(1, l1 & 0xfff, 1); + } + else + emit_movimm64(l1, 1); } else emit_far_call(do_memhandler_pre); diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 447023c68..0fafc60ad 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -490,6 +490,8 @@ static void end_tcache_write(void *start, void *end) if (g_jit.type == JitType_CodeMemory) { armDCacheClean(start, len); armICacheInvalidate((char *)start - ndrc_write_ofs, len); + // as of v4.2.1 libnx lacks isb + __asm__ volatile("isb" ::: "memory"); } #elif defined(__aarch64__) // as of 2021, __clear_cache() is still broken on arm64 From fab27ba2d1aab02ab99f416bb2da01123663af8c Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 2 Aug 2022 00:08:49 +0300 Subject: [PATCH 167/597] gpu_neon: fix some more abi violations --- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index d6907e4c6..9d342ae75 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -3174,9 +3174,9 @@ function(texture_blocks_16bpp) .align 3; \ \ function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \ + save_abi_regs(); \ shade_blocks_textured_modulated_prologue_##shading(dithering, target); \ stmdb sp!, { r4 - r5, lr }; \ - save_abi_regs(); \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ vld1.u32 { test_mask }, [psx_gpu, :128]; \ @@ -3292,8 +3292,9 @@ function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \ shade_blocks_textured_modulated_store_draw_mask_##target(28); \ shade_blocks_textured_modulated_store_pixels_##target(); \ \ + ldmia sp!, { r4 - r5, lr }; \ restore_abi_regs(); \ - ldmia sp!, { r4 - r5, pc } \ + bx lr \ shade_blocks_textured_modulated_builder(shaded, dithered, direct); @@ -5950,7 +5951,7 @@ setup_sprite_untextured_height_loop: #define texel_block_expanded_b q2 #define texel_block_expanded_ab q2 #define texel_block_expanded_c q3 -#define texel_block_expanded_d q4 +#define texel_block_expanded_d q0 #define texel_block_expanded_cd q3 function(update_texture_4bpp_cache) From a402136100279cd528661ca9eae70d12bedb8862 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 18 Jul 2022 01:18:52 +0300 Subject: [PATCH 168/597] gpu_neon: don't include vector_ops.h in the main header that stuff is only used in the C-only prototype --- plugins/gpu_neon/psx_gpu/common.h | 11 +---- plugins/gpu_neon/psx_gpu/psx_gpu.c | 6 +-- plugins/gpu_neon/psx_gpu/psx_gpu.h | 8 ++++ plugins/gpu_neon/psx_gpu/vector_ops.h | 28 +----------- plugins/gpu_neon/psx_gpu/vector_types.h | 57 +++++++++++++++++++++++++ 5 files changed, 70 insertions(+), 40 deletions(-) create mode 100644 plugins/gpu_neon/psx_gpu/vector_types.h diff --git a/plugins/gpu_neon/psx_gpu/common.h b/plugins/gpu_neon/psx_gpu/common.h index d5cf3e91f..820dfbefd 100644 --- a/plugins/gpu_neon/psx_gpu/common.h +++ b/plugins/gpu_neon/psx_gpu/common.h @@ -1,21 +1,12 @@ #ifndef COMMON_H #define COMMON_H -typedef signed char s8; -typedef unsigned char u8; -typedef signed short s16; -typedef unsigned short u16; -typedef signed int s32; -typedef unsigned int u32; -typedef signed long long int s64; -typedef unsigned long long int u64; - #include #include #include #include -#include "vector_ops.h" +#include "vector_types.h" #include "psx_gpu.h" #define unlikely(x) __builtin_expect((x), 0) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index a5e7aa181..b5aec1460 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -17,6 +17,9 @@ #include #include "common.h" +#ifndef NEON_BUILD +#include "vector_ops.h" +#endif u32 span_pixels = 0; u32 span_pixel_blocks = 0; @@ -515,9 +518,6 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu) } -void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, - vertex_struct *b, vertex_struct *c); - #ifndef NEON_BUILD #define setup_gradient_calculation_input(set, vertex) \ diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 1eaa99a4b..6f89cacfe 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -15,6 +15,8 @@ #ifndef PSX_GPU_H #define PSX_GPU_H +#include "vector_types.h" + typedef enum { PRIMITIVE_TYPE_TRIANGLE = 0, @@ -222,6 +224,8 @@ typedef struct __attribute__((aligned(16))) s16 x; s16 y; + + u32 padding; } vertex_struct; void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, @@ -247,5 +251,9 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command); void triangle_benchmark(psx_gpu_struct *psx_gpu); +void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, + const vertex_struct * __restrict__ a, const vertex_struct * __restrict__ b, + const vertex_struct * __restrict__ c); + #endif diff --git a/plugins/gpu_neon/psx_gpu/vector_ops.h b/plugins/gpu_neon/psx_gpu/vector_ops.h index c91e7d950..189eb79d0 100644 --- a/plugins/gpu_neon/psx_gpu/vector_ops.h +++ b/plugins/gpu_neon/psx_gpu/vector_ops.h @@ -15,33 +15,7 @@ #ifndef VECTOR_OPS #define VECTOR_OPS -#define build_vector_type_pair(sign, size, count, count_x2) \ -typedef struct \ -{ \ - sign##size e[count]; \ -} vec_##count##x##size##sign; \ - \ -typedef struct \ -{ \ - union \ - { \ - sign##size e[count_x2]; \ - struct \ - { \ - vec_##count##x##size##sign low; \ - vec_##count##x##size##sign high; \ - }; \ - }; \ -} vec_##count_x2##x##size##sign \ - -#define build_vector_types(sign) \ - build_vector_type_pair(sign, 8, 8, 16); \ - build_vector_type_pair(sign, 16, 4, 8); \ - build_vector_type_pair(sign, 32, 2, 4); \ - build_vector_type_pair(sign, 64, 1, 2) \ - -build_vector_types(u); -build_vector_types(s); +#include "vector_types.h" #define foreach_element(iterations, operation) \ diff --git a/plugins/gpu_neon/psx_gpu/vector_types.h b/plugins/gpu_neon/psx_gpu/vector_types.h new file mode 100644 index 000000000..4b1213ebb --- /dev/null +++ b/plugins/gpu_neon/psx_gpu/vector_types.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2011 Gilead Kutnick "Exophase" + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef VECTOR_TYPES +#define VECTOR_TYPES + +#include + +typedef int8_t s8; +typedef uint8_t u8; +typedef int16_t s16; +typedef uint16_t u16; +typedef int32_t s32; +typedef uint32_t u32; +typedef int64_t s64; +typedef uint64_t u64; + +#define build_vector_type_pair(sign, size, count, count_x2) \ +typedef struct \ +{ \ + sign##size e[count]; \ +} vec_##count##x##size##sign; \ + \ +typedef struct \ +{ \ + union \ + { \ + sign##size e[count_x2]; \ + struct \ + { \ + vec_##count##x##size##sign low; \ + vec_##count##x##size##sign high; \ + }; \ + }; \ +} vec_##count_x2##x##size##sign \ + +#define build_vector_types(sign) \ + build_vector_type_pair(sign, 8, 8, 16); \ + build_vector_type_pair(sign, 16, 4, 8); \ + build_vector_type_pair(sign, 32, 2, 4); \ + build_vector_type_pair(sign, 64, 1, 2) \ + +build_vector_types(u); +build_vector_types(s); + +#endif // VECTOR_TYPES From 37725e8cc9157e2e7819538ee5c98279b8dbefff Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 23 Jul 2022 18:22:33 +0300 Subject: [PATCH 169/597] gpu_neon: place asm func prototypes into a separate header for the upcoming intrinsics implementation --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 134 +---------- plugins/gpu_neon/psx_gpu/psx_gpu_simd.h | 281 ++++++++++++++++++++++++ 2 files changed, 283 insertions(+), 132 deletions(-) create mode 100644 plugins/gpu_neon/psx_gpu/psx_gpu_simd.h diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index b5aec1460..a79254da9 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -20,6 +20,7 @@ #ifndef NEON_BUILD #include "vector_ops.h" #endif +#include "psx_gpu_simd.h" u32 span_pixels = 0; u32 span_pixel_blocks = 0; @@ -301,9 +302,6 @@ void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1, } } -void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu, - u32 texture_page); - #ifndef NEON_BUILD void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu) @@ -452,9 +450,6 @@ void update_texture_8bpp_cache(psx_gpu_struct *psx_gpu) } } -void setup_blocks_shaded_untextured_undithered_unswizzled_indirect( - psx_gpu_struct *psx_gpu); - void flush_render_block_buffer(psx_gpu_struct *psx_gpu) { if((psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) && @@ -1205,26 +1200,6 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, setup_spans_up(index_##major, index_##minor, minor, yes) \ -void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_up_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_down_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_down_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_up_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_up_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_down_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_down_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); - - #ifndef NEON_BUILD void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, @@ -1941,30 +1916,6 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \ psx_gpu->num_blocks = num_blocks; \ } \ -void setup_blocks_shaded_textured_dithered_unswizzled_indirect(psx_gpu_struct - *psx_gpu); - -void setup_blocks_shaded_untextured_dithered_unswizzled_indirect(psx_gpu_struct - *psx_gpu); -void setup_blocks_shaded_untextured_undithered_unswizzled_indirect( - psx_gpu_struct *psx_gpu); -void setup_blocks_shaded_untextured_dithered_unswizzled_direct(psx_gpu_struct - *psx_gpu); -void setup_blocks_shaded_untextured_undithered_unswizzled_direct( - psx_gpu_struct *psx_gpu); - -void setup_blocks_unshaded_textured_dithered_unswizzled_indirect(psx_gpu_struct - *psx_gpu); -void setup_blocks_unshaded_untextured_undithered_unswizzled_indirect( - psx_gpu_struct *psx_gpu); -void setup_blocks_unshaded_untextured_undithered_unswizzled_direct( - psx_gpu_struct *psx_gpu); - -void setup_blocks_shaded_textured_dithered_swizzled_indirect(psx_gpu_struct - *psx_gpu); -void setup_blocks_unshaded_textured_dithered_swizzled_indirect(psx_gpu_struct - *psx_gpu); - //setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct); @@ -1984,15 +1935,6 @@ setup_blocks_builder(shaded, untextured, dithered, unswizzled, direct); setup_blocks_builder(unshaded, untextured, undithered, unswizzled, indirect); setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct); -#endif - -void texture_blocks_untextured(psx_gpu_struct *psx_gpu); -void texture_blocks_4bpp(psx_gpu_struct *psx_gpu); -void texture_blocks_8bpp(psx_gpu_struct *psx_gpu); -void texture_blocks_16bpp(psx_gpu_struct *psx_gpu); - -#ifndef NEON_BUILD - void texture_blocks_untextured(psx_gpu_struct *psx_gpu) { if(psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE) @@ -2277,27 +2219,6 @@ void shade_blocks_##shading##_textured_modulated_##dithering##_##target( \ } \ } \ -void shade_blocks_shaded_textured_modulated_dithered_direct(psx_gpu_struct - *psx_gpu); -void shade_blocks_shaded_textured_modulated_undithered_direct(psx_gpu_struct - *psx_gpu); -void shade_blocks_unshaded_textured_modulated_dithered_direct(psx_gpu_struct - *psx_gpu); -void shade_blocks_unshaded_textured_modulated_undithered_direct(psx_gpu_struct - *psx_gpu); - -void shade_blocks_shaded_textured_modulated_dithered_indirect(psx_gpu_struct - *psx_gpu); -void shade_blocks_shaded_textured_modulated_undithered_indirect(psx_gpu_struct - *psx_gpu); -void shade_blocks_unshaded_textured_modulated_dithered_indirect(psx_gpu_struct - *psx_gpu); -void shade_blocks_unshaded_textured_modulated_undithered_indirect(psx_gpu_struct - *psx_gpu); - -void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu); -void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu); - #ifndef NEON_BUILD shade_blocks_textured_modulated_builder(shaded, dithered, direct); @@ -2383,14 +2304,6 @@ void shade_blocks_textured_unmodulated_dithered_##target(psx_gpu_struct \ shade_blocks_textured_unmodulated_builder(indirect) shade_blocks_textured_unmodulated_builder(direct) -#endif - - -void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu); -void shade_blocks_unshaded_untextured_direct(psx_gpu_struct *psx_gpu); - -#ifndef NEON_BUILD - void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu) { } @@ -2602,27 +2515,6 @@ void \ } \ } \ -void blend_blocks_textured_average_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_average_on(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_add_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_add_on(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_subtract_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_subtract_on(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_add_fourth_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_add_fourth_on(psx_gpu_struct *psx_gpu); - -void blend_blocks_untextured_average_off(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_average_on(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_add_off(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_add_on(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_subtract_off(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_subtract_on(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_add_fourth_off(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_add_fourth_on(psx_gpu_struct *psx_gpu); - -void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_unblended_on(psx_gpu_struct *psx_gpu); - #ifndef NEON_BUILD void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu) @@ -3175,9 +3067,6 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, render_triangle_p(psx_gpu, vertex_ptrs, flags); } - -void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu); - #ifndef NEON_BUILD void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) @@ -3880,25 +3769,6 @@ void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\ } \ } \ -void setup_sprite_4bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); -void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); -void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); - -void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); -void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); -void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); - -void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, - s32 v, s32 width, s32 height, u32 color); -void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, - s32 u, s32 v, s32 width, s32 height, u32 color); - #ifndef NEON_BUILD setup_sprite_tiled_builder(4bpp,); setup_sprite_tiled_builder(8bpp,); @@ -5087,7 +4957,7 @@ u64 get_us(void) return (tv.tv_sec * 1000000ULL) + tv.tv_usec; } -#ifdef NEON_BUILD +#if 0 //def NEON_BUILD u32 get_counter() { diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.h b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.h new file mode 100644 index 000000000..a8080aff8 --- /dev/null +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.h @@ -0,0 +1,281 @@ +#include "vector_types.h" + +#ifdef ASM_PROTOTYPES +#define compute_all_gradients compute_all_gradients_ +#define update_texture_8bpp_cache_slice update_texture_8bpp_cache_slice_ +#define setup_spans_up_left setup_spans_up_left_ +#define setup_spans_up_right setup_spans_up_right_ +#define setup_spans_down_left setup_spans_down_left_ +#define setup_spans_down_right setup_spans_down_right_ +#define setup_spans_up_a setup_spans_up_a_ +#define setup_spans_up_b setup_spans_up_b_ +#define setup_spans_down_a setup_spans_down_a_ +#define setup_spans_down_b setup_spans_down_b_ +#define setup_spans_up_down setup_spans_up_down_ +#define setup_blocks_shaded_textured_dithered_unswizzled_indirect \ + setup_blocks_shaded_textured_dithered_unswizzled_indirect_ +#define setup_blocks_shaded_untextured_dithered_unswizzled_indirect \ + setup_blocks_shaded_untextured_dithered_unswizzled_indirect_ +#define setup_blocks_shaded_untextured_undithered_unswizzled_indirect \ + setup_blocks_shaded_untextured_undithered_unswizzled_indirect_ +#define setup_blocks_shaded_untextured_dithered_unswizzled_direct \ + setup_blocks_shaded_untextured_dithered_unswizzled_direct_ +#define setup_blocks_shaded_untextured_undithered_unswizzled_direct \ + setup_blocks_shaded_untextured_undithered_unswizzled_direct_ +#define setup_blocks_unshaded_textured_dithered_unswizzled_indirect \ + setup_blocks_unshaded_textured_dithered_unswizzled_indirect_ +#define setup_blocks_unshaded_untextured_undithered_unswizzled_indirect \ + setup_blocks_unshaded_untextured_undithered_unswizzled_indirect_ +#define setup_blocks_unshaded_untextured_undithered_unswizzled_direct \ + setup_blocks_unshaded_untextured_undithered_unswizzled_direct_ +#define setup_blocks_shaded_textured_dithered_swizzled_indirect \ + setup_blocks_shaded_textured_dithered_swizzled_indirect_ +#define setup_blocks_unshaded_textured_dithered_swizzled_indirect \ + setup_blocks_unshaded_textured_dithered_swizzled_indirect_ +#define texture_blocks_untextured texture_blocks_untextured_ +#define texture_blocks_4bpp texture_blocks_4bpp_ +#define texture_blocks_8bpp texture_blocks_8bpp_ +#define texture_blocks_16bpp texture_blocks_16bpp_ +#define shade_blocks_shaded_textured_modulated_dithered_direct \ + shade_blocks_shaded_textured_modulated_dithered_direct_ +#define shade_blocks_shaded_textured_modulated_undithered_direct \ + shade_blocks_shaded_textured_modulated_undithered_direct_ +#define shade_blocks_unshaded_textured_modulated_dithered_direct \ + shade_blocks_unshaded_textured_modulated_dithered_direct_ +#define shade_blocks_unshaded_textured_modulated_undithered_direct \ + shade_blocks_unshaded_textured_modulated_undithered_direct_ +#define shade_blocks_shaded_textured_modulated_dithered_indirect \ + shade_blocks_shaded_textured_modulated_dithered_indirect_ +#define shade_blocks_shaded_textured_modulated_undithered_indirect \ + shade_blocks_shaded_textured_modulated_undithered_indirect_ +#define shade_blocks_unshaded_textured_modulated_dithered_indirect \ + shade_blocks_unshaded_textured_modulated_dithered_indirect_ +#define shade_blocks_unshaded_textured_modulated_undithered_indirect \ + shade_blocks_unshaded_textured_modulated_undithered_indirect_ +#define shade_blocks_textured_unmodulated_indirect \ + shade_blocks_textured_unmodulated_indirect_ +#define shade_blocks_textured_unmodulated_direct \ + shade_blocks_textured_unmodulated_direct_ +#define shade_blocks_unshaded_untextured_indirect \ + shade_blocks_unshaded_untextured_indirect_ +#define shade_blocks_unshaded_untextured_direct shade_blocks_unshaded_untextured_direct_ +#define blend_blocks_textured_average_off blend_blocks_textured_average_off_ +#define blend_blocks_textured_average_on blend_blocks_textured_average_on_ +#define blend_blocks_textured_add_off blend_blocks_textured_add_off_ +#define blend_blocks_textured_add_on blend_blocks_textured_add_on_ +#define blend_blocks_textured_subtract_off blend_blocks_textured_subtract_off_ +#define blend_blocks_textured_subtract_on blend_blocks_textured_subtract_on_ +#define blend_blocks_textured_add_fourth_off blend_blocks_textured_add_fourth_off_ +#define blend_blocks_textured_add_fourth_on blend_blocks_textured_add_fourth_on_ +#define blend_blocks_untextured_average_off blend_blocks_untextured_average_off_ +#define blend_blocks_untextured_average_on blend_blocks_untextured_average_on_ +#define blend_blocks_untextured_add_off blend_blocks_untextured_add_off_ +#define blend_blocks_untextured_add_on blend_blocks_untextured_add_on_ +#define blend_blocks_untextured_subtract_off blend_blocks_untextured_subtract_off_ +#define blend_blocks_untextured_subtract_on blend_blocks_untextured_subtract_on_ +#define blend_blocks_untextured_add_fourth_off blend_blocks_untextured_add_fourth_off_ +#define blend_blocks_untextured_add_fourth_on blend_blocks_untextured_add_fourth_on_ +#define blend_blocks_textured_unblended_off blend_blocks_textured_unblended_off_ +#define blend_blocks_textured_unblended_on blend_blocks_textured_unblended_on_ +#define texture_sprite_blocks_8bpp texture_sprite_blocks_8bpp_ +#define setup_sprite_4bpp setup_sprite_4bpp_ +#define setup_sprite_8bpp setup_sprite_8bpp_ +#define setup_sprite_16bpp setup_sprite_16bpp_ +#define setup_sprite_4bpp_4x setup_sprite_4bpp_4x_ +#define setup_sprite_8bpp_4x setup_sprite_8bpp_4x_ +#define setup_sprite_16bpp_4x setup_sprite_16bpp_4x_ +#define setup_sprite_untextured setup_sprite_untextured_ +#define setup_sprite_untextured_simple setup_sprite_untextured_simple_ +#define scale2x_tiles8 scale2x_tiles8_ +#endif + +void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, + const vertex_struct * __restrict__ a, const vertex_struct * __restrict__ b, + const vertex_struct * __restrict__ c); + +void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu, + u32 texture_page); + +void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c); +void setup_spans_up_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c); +void setup_spans_down_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c); +void setup_spans_down_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c); +void setup_spans_up_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c); +void setup_spans_up_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c); +void setup_spans_down_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c); +void setup_spans_down_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c); +void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c); + +void setup_blocks_shaded_textured_dithered_unswizzled_indirect(psx_gpu_struct + *psx_gpu); + +void setup_blocks_shaded_untextured_dithered_unswizzled_indirect(psx_gpu_struct + *psx_gpu); +void setup_blocks_shaded_untextured_undithered_unswizzled_indirect( + psx_gpu_struct *psx_gpu); +void setup_blocks_shaded_untextured_dithered_unswizzled_direct(psx_gpu_struct + *psx_gpu); +void setup_blocks_shaded_untextured_undithered_unswizzled_direct( + psx_gpu_struct *psx_gpu); + +void setup_blocks_unshaded_textured_dithered_unswizzled_indirect(psx_gpu_struct + *psx_gpu); +void setup_blocks_unshaded_untextured_undithered_unswizzled_indirect( + psx_gpu_struct *psx_gpu); +void setup_blocks_unshaded_untextured_undithered_unswizzled_direct( + psx_gpu_struct *psx_gpu); + +void setup_blocks_shaded_textured_dithered_swizzled_indirect(psx_gpu_struct + *psx_gpu); +void setup_blocks_unshaded_textured_dithered_swizzled_indirect(psx_gpu_struct + *psx_gpu); + +void texture_blocks_untextured(psx_gpu_struct *psx_gpu); +void texture_blocks_4bpp(psx_gpu_struct *psx_gpu); +void texture_blocks_8bpp(psx_gpu_struct *psx_gpu); +void texture_blocks_16bpp(psx_gpu_struct *psx_gpu); + +void shade_blocks_shaded_textured_modulated_dithered_direct(psx_gpu_struct + *psx_gpu); +void shade_blocks_shaded_textured_modulated_undithered_direct(psx_gpu_struct + *psx_gpu); +void shade_blocks_unshaded_textured_modulated_dithered_direct(psx_gpu_struct + *psx_gpu); +void shade_blocks_unshaded_textured_modulated_undithered_direct(psx_gpu_struct + *psx_gpu); + +void shade_blocks_shaded_textured_modulated_dithered_indirect(psx_gpu_struct + *psx_gpu); +void shade_blocks_shaded_textured_modulated_undithered_indirect(psx_gpu_struct + *psx_gpu); +void shade_blocks_unshaded_textured_modulated_dithered_indirect(psx_gpu_struct + *psx_gpu); +void shade_blocks_unshaded_textured_modulated_undithered_indirect(psx_gpu_struct + *psx_gpu); + +void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu); +void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu); + +void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu); +void shade_blocks_unshaded_untextured_direct(psx_gpu_struct *psx_gpu); + +void blend_blocks_textured_average_off(psx_gpu_struct *psx_gpu); +void blend_blocks_textured_average_on(psx_gpu_struct *psx_gpu); +void blend_blocks_textured_add_off(psx_gpu_struct *psx_gpu); +void blend_blocks_textured_add_on(psx_gpu_struct *psx_gpu); +void blend_blocks_textured_subtract_off(psx_gpu_struct *psx_gpu); +void blend_blocks_textured_subtract_on(psx_gpu_struct *psx_gpu); +void blend_blocks_textured_add_fourth_off(psx_gpu_struct *psx_gpu); +void blend_blocks_textured_add_fourth_on(psx_gpu_struct *psx_gpu); + +void blend_blocks_untextured_average_off(psx_gpu_struct *psx_gpu); +void blend_blocks_untextured_average_on(psx_gpu_struct *psx_gpu); +void blend_blocks_untextured_add_off(psx_gpu_struct *psx_gpu); +void blend_blocks_untextured_add_on(psx_gpu_struct *psx_gpu); +void blend_blocks_untextured_subtract_off(psx_gpu_struct *psx_gpu); +void blend_blocks_untextured_subtract_on(psx_gpu_struct *psx_gpu); +void blend_blocks_untextured_add_fourth_off(psx_gpu_struct *psx_gpu); +void blend_blocks_untextured_add_fourth_on(psx_gpu_struct *psx_gpu); + +void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu); +void blend_blocks_textured_unblended_on(psx_gpu_struct *psx_gpu); + +void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu); + +void setup_sprite_4bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); +void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); +void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); + +void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); +void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); +void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); + +void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color); +void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, + s32 u, s32 v, s32 width, s32 height, u32 color); + +void scale2x_tiles8(void *dst, const void *src, int w8, int h); + +#ifdef ASM_PROTOTYPES +#undef compute_all_gradients +#undef update_texture_8bpp_cache_slice +#undef setup_spans_up_left +#undef setup_spans_up_right +#undef setup_spans_down_left +#undef setup_spans_down_right +#undef setup_spans_up_a +#undef setup_spans_up_b +#undef setup_spans_down_a +#undef setup_spans_down_b +#undef setup_spans_up_down +#undef setup_blocks_shaded_textured_dithered_unswizzled_indirect +#undef setup_blocks_shaded_untextured_dithered_unswizzled_indirect +#undef setup_blocks_shaded_untextured_undithered_unswizzled_indirect +#undef setup_blocks_shaded_untextured_dithered_unswizzled_direct +#undef setup_blocks_shaded_untextured_undithered_unswizzled_direct +#undef setup_blocks_unshaded_textured_dithered_unswizzled_indirect +#undef setup_blocks_unshaded_untextured_undithered_unswizzled_indirect +#undef setup_blocks_unshaded_untextured_undithered_unswizzled_direct +#undef setup_blocks_shaded_textured_dithered_swizzled_indirect +#undef setup_blocks_unshaded_textured_dithered_swizzled_indirect +#undef texture_blocks_untextured +#undef texture_blocks_4bpp +#undef texture_blocks_8bpp +#undef texture_blocks_16bpp +#undef shade_blocks_shaded_textured_modulated_dithered_direct +#undef shade_blocks_shaded_textured_modulated_undithered_direct +#undef shade_blocks_unshaded_textured_modulated_dithered_direct +#undef shade_blocks_unshaded_textured_modulated_undithered_direct +#undef shade_blocks_shaded_textured_modulated_dithered_indirect +#undef shade_blocks_shaded_textured_modulated_undithered_indirect +#undef shade_blocks_unshaded_textured_modulated_dithered_indirect +#undef shade_blocks_unshaded_textured_modulated_undithered_indirect +#undef shade_blocks_textured_unmodulated_indirect +#undef shade_blocks_textured_unmodulated_direct +#undef shade_blocks_unshaded_untextured_indirect +#undef shade_blocks_unshaded_untextured_direct +#undef blend_blocks_textured_average_off +#undef blend_blocks_textured_average_on +#undef blend_blocks_textured_add_off +#undef blend_blocks_textured_add_on +#undef blend_blocks_textured_subtract_off +#undef blend_blocks_textured_subtract_on +#undef blend_blocks_textured_add_fourth_off +#undef blend_blocks_textured_add_fourth_on +#undef blend_blocks_untextured_average_off +#undef blend_blocks_untextured_average_on +#undef blend_blocks_untextured_add_off +#undef blend_blocks_untextured_add_on +#undef blend_blocks_untextured_subtract_off +#undef blend_blocks_untextured_subtract_on +#undef blend_blocks_untextured_add_fourth_off +#undef blend_blocks_untextured_add_fourth_on +#undef blend_blocks_textured_unblended_off +#undef blend_blocks_textured_unblended_on +#undef texture_sprite_blocks_8bpp +#undef setup_sprite_4bpp +#undef setup_sprite_8bpp +#undef setup_sprite_16bpp +#undef setup_sprite_4bpp_4x +#undef setup_sprite_8bpp_4x +#undef setup_sprite_16bpp_4x +#undef setup_sprite_untextured +#undef setup_sprite_untextured_simple +#undef scale2x_tiles8 +#endif From a2cb152a937d0aecbf794e5ba36431e6a17b7483 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 18 Jul 2022 01:26:25 +0300 Subject: [PATCH 170/597] gpu_neon: new intrinsics-only implementation ~80-95% performance of the asm version on cortex-a72, but maybe less of a portability nightmare (+arm64 support) --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 31 +- plugins/gpu_neon/psx_gpu/psx_gpu.h | 1 + plugins/gpu_neon/psx_gpu/psx_gpu_4x.c | 28 +- plugins/gpu_neon/psx_gpu/psx_gpu_simd.c | 3525 +++++++++++++++++++++++ 4 files changed, 3569 insertions(+), 16 deletions(-) create mode 100644 plugins/gpu_neon/psx_gpu/psx_gpu_simd.c diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index a79254da9..80e9f129e 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -51,6 +51,8 @@ u32 zero_block_spans = 0; u32 texture_cache_loads = 0; u32 false_modulated_blocks = 0; +#define stats_add(stat, count) // stat += count + /* double size for enhancement */ u32 reciprocal_table[512 * 2]; @@ -1842,7 +1844,7 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, } \ #define setup_blocks_add_blocks_direct() \ - texel_blocks_untextured += span_num_blocks; \ + stats_add(texel_blocks_untextured, span_num_blocks); \ span_pixel_blocks += span_num_blocks \ @@ -1938,14 +1940,14 @@ setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct); void texture_blocks_untextured(psx_gpu_struct *psx_gpu) { if(psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE) - texel_blocks_untextured += psx_gpu->num_blocks; + stats_add(texel_blocks_untextured, psx_gpu->num_blocks); } void texture_blocks_4bpp(psx_gpu_struct *psx_gpu) { block_struct *block = psx_gpu->blocks; u32 num_blocks = psx_gpu->num_blocks; - texel_blocks_4bpp += num_blocks; + stats_add(texel_blocks_4bpp, num_blocks); vec_8x8u texels_low; vec_8x8u texels_high; @@ -1997,7 +1999,7 @@ void texture_blocks_8bpp(psx_gpu_struct *psx_gpu) block_struct *block = psx_gpu->blocks; u32 num_blocks = psx_gpu->num_blocks; - texel_blocks_8bpp += num_blocks; + stats_add(texel_blocks_8bpp, num_blocks); if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask) update_texture_8bpp_cache(psx_gpu); @@ -2031,7 +2033,7 @@ void texture_blocks_16bpp(psx_gpu_struct *psx_gpu) block_struct *block = psx_gpu->blocks; u32 num_blocks = psx_gpu->num_blocks; - texel_blocks_16bpp += num_blocks; + stats_add(texel_blocks_16bpp, num_blocks); vec_8x16u texels; @@ -3067,7 +3069,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, render_triangle_p(psx_gpu, vertex_ptrs, flags); } -#ifndef NEON_BUILD +#if !defined(NEON_BUILD) || defined(SIMD_BUILD) void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) { @@ -3775,6 +3777,9 @@ setup_sprite_tiled_builder(8bpp,); setup_sprite_tiled_builder(4bpp,_4x); setup_sprite_tiled_builder(8bpp,_4x); +#endif + +#if !defined(NEON_BUILD) || defined(SIMD_BUILD) void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) @@ -3803,7 +3808,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, texture_offset_base &= ~0x7; - sprites_16bpp++; + stats_add(sprites_16bpp, 1); if(block_width == 1) { @@ -3824,7 +3829,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, texture_block_ptr = texture_page_ptr + (texture_offset_base & texture_mask); - load_128b(block->texels, texture_block_ptr); + block->texels = *(vec_8x16u *)texture_block_ptr; block->draw_mask_bits = mask_bits; block->fb_ptr = fb_ptr; @@ -3858,7 +3863,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, texture_offset_base += 1024; texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); - load_128b(block->texels, texture_block_ptr); + block->texels = *(vec_8x16u *)texture_block_ptr; block->draw_mask_bits = left_mask_bits; block->fb_ptr = fb_ptr; @@ -3870,7 +3875,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, while(blocks_remaining) { texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); - load_128b(block->texels, texture_block_ptr); + block->texels = *(vec_8x16u *)texture_block_ptr; block->draw_mask_bits = 0; block->fb_ptr = fb_ptr; @@ -3883,7 +3888,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); - load_128b(block->texels, texture_block_ptr); + block->texels = *(vec_8x16u *)texture_block_ptr; block->draw_mask_bits = right_mask_bits; block->fb_ptr = fb_ptr; @@ -3897,6 +3902,10 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } } +#endif + +#ifndef NEON_BUILD + void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 6f89cacfe..1500eea80 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -244,6 +244,7 @@ void render_line(psx_gpu_struct *gpu, vertex_struct *vertexes, u32 flags, u32 texture_region_mask(s32 x1, s32 y1, s32 x2, s32 y2); +void update_texture_8bpp_cache(psx_gpu_struct *psx_gpu); void flush_render_block_buffer(psx_gpu_struct *psx_gpu); void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c index 85e972c5c..942b3d30f 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c @@ -2,7 +2,21 @@ ((psx_gpu)->enhancement_buf_ptr + \ ((psx_gpu)->enhancement_buf_by_x16[(x) / 16] << 20)) -#ifndef NEON_BUILD +#if !defined(NEON_BUILD) || defined(SIMD_BUILD) + +#ifndef zip_4x32b + +#define vector_cast(vec_to, source) source + +#define zip_4x32b(dest, source_a, source_b) { \ + u32 _i; for(_i = 0; _i < 4; _i++) { \ + (dest).e[_i * 2 + 0] = (source_a).e[_i]; \ + (dest).e[_i * 2 + 1] = (source_b).e[_i]; \ + } \ +} + +#endif + void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { @@ -56,7 +70,8 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, texture_block_ptr = texture_page_ptr + (texture_offset_base & texture_mask); - load_128b(texels, texture_block_ptr); + //load_128b(texels, texture_block_ptr); + texels = *(vec_8x16u *)texture_block_ptr; zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); block->texels = texels_wide; @@ -117,7 +132,8 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); - load_128b(texels, texture_block_ptr); + //load_128b(texels, texture_block_ptr); + texels = *(vec_8x16u *)texture_block_ptr; zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); block->texels = texels_wide; @@ -147,7 +163,8 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, while(blocks_remaining) { texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); - load_128b(texels, texture_block_ptr); + //load_128b(texels, texture_block_ptr); + texels = *(vec_8x16u *)texture_block_ptr; zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); block->texels = texels_wide; @@ -178,7 +195,8 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); - load_128b(texels, texture_block_ptr); + //load_128b(texels, texture_block_ptr); + texels = *(vec_8x16u *)texture_block_ptr; zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); block->texels = texels_wide; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c new file mode 100644 index 000000000..335af35d6 --- /dev/null +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c @@ -0,0 +1,3525 @@ +/* + * Copyright (C) 2011 Gilead Kutnick "Exophase" + * Copyright (C) 2022 Gražvydas Ignotas "notaz" + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include "psx_gpu.h" +#include "psx_gpu_simd.h" +//#define ASM_PROTOTYPES +//#include "psx_gpu_simd.h" +#ifndef SIMD_BUILD +#error "please define SIMD_BUILD if you want this gpu_neon C simd implementation" +#endif + +typedef u8 gvu8 __attribute__((vector_size(16))); +typedef u16 gvu16 __attribute__((vector_size(16))); +typedef u32 gvu32 __attribute__((vector_size(16))); +typedef u64 gvu64 __attribute__((vector_size(16))); +typedef s8 gvs8 __attribute__((vector_size(16))); +typedef s16 gvs16 __attribute__((vector_size(16))); +typedef s32 gvs32 __attribute__((vector_size(16))); +typedef s64 gvs64 __attribute__((vector_size(16))); + +typedef u8 gvhu8 __attribute__((vector_size(8))); +typedef u16 gvhu16 __attribute__((vector_size(8))); +typedef u32 gvhu32 __attribute__((vector_size(8))); +typedef u64 gvhu64 __attribute__((vector_size(8))); +typedef s8 gvhs8 __attribute__((vector_size(8))); +typedef s16 gvhs16 __attribute__((vector_size(8))); +typedef s32 gvhs32 __attribute__((vector_size(8))); +typedef s64 gvhs64 __attribute__((vector_size(8))); + +typedef union +{ + gvhu8 u8; + gvhu16 u16; + gvhu32 u32; + gvhu64 u64; + //u64 u64; + //uint64x1_t u64; + gvhs8 s8; + gvhs16 s16; + gvhs32 s32; + gvhs64 s64; + //s64 s64; + //int64x1_t s64; +} gvhreg; + +typedef union +{ + gvu8 u8; + gvu16 u16; + gvu32 u32; + gvu64 u64; + gvs8 s8; + gvs16 s16; + gvs32 s32; + gvs64 s64; + // this may be tempting, but it causes gcc to do lots of stack spills + //gvhreg h[2]; +} gvreg; + +#if defined(__ARM_NEON) || defined(__ARM_NEON__) +#include + +#define gvaddhn_u32(d, a, b) d.u16 = vaddhn_u32(a.u32, b.u32) +#define gvaddw_s32(d, a, b) d.s64 = vaddw_s32(a.s64, b.s32) +#define gvabsq_s32(d, s) d.s32 = vabsq_s32(s.s32) +#define gvbic_n_u16(d, n) d.u16 = vbic_u16(d.u16, vmov_n_u16(n)) +#define gvbifq(d, a, b) d.u8 = vbslq_u8(b.u8, d.u8, a.u8) +#define gvbit(d, a, b) d.u8 = vbsl_u8(b.u8, a.u8, d.u8) +#define gvceqq_u16(d, a, b) d.u16 = vceqq_u16(a.u16, b.u16) +#define gvcgt_s16(d, a, b) d.u16 = vcgt_s16(a.s16, b.s16) +#define gvclt_s16(d, a, b) d.u16 = vclt_s16(a.s16, b.s16) +#define gvcreate_s32(d, a, b) d.s32 = vcreate_s32((u32)(a) | ((u64)(b) << 32)) +#define gvcreate_u32(d, a, b) d.u32 = vcreate_u32((u32)(a) | ((u64)(b) << 32)) +#define gvcreate_s64(d, s) d.s64 = (gvhs64)vcreate_s64(s) +#define gvcreate_u64(d, s) d.u64 = (gvhu64)vcreate_u64(s) +#define gvcombine_u16(d, l, h) d.u16 = vcombine_u16(l.u16, h.u16) +#define gvcombine_u32(d, l, h) d.u32 = vcombine_u32(l.u32, h.u32) +#define gvcombine_s64(d, l, h) d.s64 = vcombine_s64((int64x1_t)l.s64, (int64x1_t)h.s64) +#define gvdup_l_u8(d, s, l) d.u8 = vdup_lane_u8(s.u8, l) +#define gvdup_l_u16(d, s, l) d.u16 = vdup_lane_u16(s.u16, l) +#define gvdup_l_u32(d, s, l) d.u32 = vdup_lane_u32(s.u32, l) +#define gvdupq_l_s64(d, s, l) d.s64 = vdupq_lane_s64((int64x1_t)s.s64, l) +#define gvdupq_l_u32(d, s, l) d.u32 = vdupq_lane_u32(s.u32, l) +#define gvdup_n_s64(d, n) d.s64 = vdup_n_s64(n) +#define gvdup_n_u8(d, n) d.u8 = vdup_n_u8(n) +#define gvdup_n_u16(d, n) d.u16 = vdup_n_u16(n) +#define gvdup_n_u32(d, n) d.u32 = vdup_n_u32(n) +#define gvdupq_n_u16(d, n) d.u16 = vdupq_n_u16(n) +#define gvdupq_n_u32(d, n) d.u32 = vdupq_n_u32(n) +#define gvdupq_n_s64(d, n) d.s64 = vdupq_n_s64(n) +#define gvhaddq_u16(d, a, b) d.u16 = vhaddq_u16(a.u16, b.u16) +#define gvmax_s16(d, a, b) d.s16 = vmax_s16(a.s16, b.s16) +#define gvmin_s16(d, a, b) d.s16 = vmin_s16(a.s16, b.s16) +#define gvminq_u8(d, a, b) d.u8 = vminq_u8(a.u8, b.u8) +#define gvminq_u16(d, a, b) d.u16 = vminq_u16(a.u16, b.u16) +#define gvmla_s32(d, a, b) d.s32 = vmla_s32(d.s32, a.s32, b.s32) +#define gvmla_u32(d, a, b) d.u32 = vmla_u32(d.u32, a.u32, b.u32) +#define gvmlaq_s32(d, a, b) d.s32 = vmlaq_s32(d.s32, a.s32, b.s32) +#define gvmlaq_u32(d, a, b) d.u32 = vmlaq_u32(d.u32, a.u32, b.u32) +#define gvmlal_s32(d, a, b) d.s64 = vmlal_s32(d.s64, a.s32, b.s32) +#define gvmlal_u8(d, a, b) d.u16 = vmlal_u8(d.u16, a.u8, b.u8) +#define gvmlsq_s32(d, a, b) d.s32 = vmlsq_s32(d.s32, a.s32, b.s32) +#define gvmlsq_l_s32(d, a, b, l) d.s32 = vmlsq_lane_s32(d.s32, a.s32, b.s32, l) +#define gvmov_l_s32(d, s, l) d.s32 = vset_lane_s32(s, d.s32, l) +#define gvmov_l_u32(d, s, l) d.u32 = vset_lane_u32(s, d.u32, l) +#define gvmovl_u8(d, s) d.u16 = vmovl_u8(s.u8) +#define gvmovl_s32(d, s) d.s64 = vmovl_s32(s.s32) +#define gvmovn_u16(d, s) d.u8 = vmovn_u16(s.u16) +#define gvmovn_u32(d, s) d.u16 = vmovn_u32(s.u32) +#define gvmovn_u64(d, s) d.u32 = vmovn_u64(s.u64) +#define gvmul_s32(d, a, b) d.s32 = vmul_s32(a.s32, b.s32) +#define gvmull_s16(d, a, b) d.s32 = vmull_s16(a.s16, b.s16) +#define gvmull_s32(d, a, b) d.s64 = vmull_s32(a.s32, b.s32) +#define gvmull_u8(d, a, b) d.u16 = vmull_u8(a.u8, b.u8) +#define gvmull_l_u32(d, a, b, l) d.u64 = vmull_lane_u32(a.u32, b.u32, l) +#define gvmlsl_s16(d, a, b) d.s32 = vmlsl_s16(d.s32, a.s16, b.s16) +#define gvneg_s32(d, s) d.s32 = vneg_s32(s.s32) +#define gvqadd_u8(d, a, b) d.u8 = vqadd_u8(a.u8, b.u8) +#define gvqsub_u8(d, a, b) d.u8 = vqsub_u8(a.u8, b.u8) +#define gvshl_u16(d, a, b) d.u16 = vshl_u16(a.u16, b.s16) +#define gvshlq_s64(d, a, b) d.s64 = vshlq_s64(a.s64, b.s64) +#define gvshlq_u32(d, a, b) d.u32 = vshlq_u32(a.u32, b.s32) +#define gvshlq_u64(d, a, b) d.u64 = vshlq_u64(a.u64, b.s64) +#define gvshrq_n_s16(d, s, n) d.s16 = vshrq_n_s16(s.s16, n) +#define gvshrq_n_u16(d, s, n) d.u16 = vshrq_n_u16(s.u16, n) +#define gvshl_n_u32(d, s, n) d.u32 = vshl_n_u32(s.u32, n) +#define gvshlq_n_u16(d, s, n) d.u16 = vshlq_n_u16(s.u16, n) +#define gvshlq_n_u32(d, s, n) d.u32 = vshlq_n_u32(s.u32, n) +#define gvshll_n_s8(d, s, n) d.s16 = vshll_n_s8(s.s8, n) +#define gvshll_n_u8(d, s, n) d.u16 = vshll_n_u8(s.u8, n) +#define gvshll_n_u16(d, s, n) d.u32 = vshll_n_u16(s.u16, n) +#define gvshr_n_u8(d, s, n) d.u8 = vshr_n_u8(s.u8, n) +#define gvshr_n_u16(d, s, n) d.u16 = vshr_n_u16(s.u16, n) +#define gvshr_n_u32(d, s, n) d.u32 = vshr_n_u32(s.u32, n) +#define gvshr_n_u64(d, s, n) d.u64 = (gvhu64)vshr_n_u64((uint64x1_t)s.u64, n) +#define gvshrn_n_s64(d, s, n) d.s32 = vshrn_n_s64(s.s64, n) +#define gvshrn_n_u16(d, s, n) d.u8 = vshrn_n_u16(s.u16, n) +#define gvshrn_n_u32(d, s, n) d.u16 = vshrn_n_u32(s.u32, n) +#define gvsli_n_u8(d, s, n) d.u8 = vsli_n_u8(d.u8, s.u8, n) +#define gvsri_n_u8(d, s, n) d.u8 = vsri_n_u8(d.u8, s.u8, n) +#define gvtstq_u16(d, a, b) d.u16 = vtstq_u16(a.u16, b.u16) +#define gvqshrun_n_s16(d, s, n) d.u8 = vqshrun_n_s16(s.s16, n) +#define gvqsubq_u8(d, a, b) d.u8 = vqsubq_u8(a.u8, b.u8) +#define gvqsubq_u16(d, a, b) d.u16 = vqsubq_u16(a.u16, b.u16) + +#define gvget_lo(d, s) d.u16 = vget_low_u16(s.u16) +#define gvget_hi(d, s) d.u16 = vget_high_u16(s.u16) +#define gvlo(s) ({gvhreg t_; gvget_lo(t_, s); t_;}) +#define gvhi(s) ({gvhreg t_; gvget_hi(t_, s); t_;}) + +#define gvset_lo(d, s) d.u16 = vcombine_u16(s.u16, gvhi(d).u16) +#define gvset_hi(d, s) d.u16 = vcombine_u16(gvlo(d).u16, s.u16) + +#define gvtbl2_u8(d, a, b) { \ + uint8x8x2_t v_; \ + v_.val[0] = vget_low_u8(a.u8); v_.val[1] = vget_high_u8(a.u8); \ + d.u8 = vtbl2_u8(v_, b.u8); \ +} + +#define gvzip_u8(d, a, b) { \ + uint8x8x2_t v_ = vzip_u8(a.u8, b.u8); \ + d.u8 = vcombine_u8(v_.val[0], v_.val[1]); \ +} +#define gvzipq_u16(d0, d1, s0, s1) { \ + uint16x8x2_t v_ = vzipq_u16(s0.u16, s1.u16); \ + d0.u16 = v_.val[0]; d1.u16 = v_.val[1]; \ +} + +#define gvld1_u8(d, s) d.u8 = vld1_u8(s) +#define gvld1_u32(d, s) d.u32 = vld1_u32((const u32 *)(s)) +#define gvld1q_u8(d, s) d.u8 = vld1q_u8(s) +#define gvld1q_u16(d, s) d.u16 = vld1q_u16(s) +#define gvld1q_u32(d, s) d.u32 = vld1q_u32((const u32 *)(s)) +#define gvld2_dup(v0, v1, p) { \ + uint8x8x2_t v_ = vld2_dup_u8(p); \ + v0.u8 = v_.val[0]; v1.u8 = v_.val[1]; \ +} +#define gvld2q_u8(v0, v1, p) { \ + uint8x16x2_t v_ = vld2q_u8(p); \ + v0.u8 = v_.val[0]; v1.u8 = v_.val[1]; \ +} + +#define gvst1_u8(v, p) \ + vst1_u8(p, v.u8) +#define gvst1q_u16(v, p) \ + vst1q_u16(p, v.u16) +#define gvst1q_inc_u32(v, p, i) { \ + vst1q_u32((u32 *)(p), v.u32); \ + p += (i) / sizeof(*p); \ +} +#define gvst2_u8(v0, v1, p) { \ + uint8x8x2_t v_; \ + v_.val[0] = v0.u8; v_.val[1] = v1.u8; \ + vst2_u8(p, v_); \ +} +#define gvst2_u16(v0, v1, p) { \ + uint16x4x2_t v_; \ + v_.val[0] = v0.u16; v_.val[1] = v1.u16; \ + vst2_u16(p, v_); \ +} +#define gvst2q_u8(v0, v1, p) { \ + uint8x16x2_t v_; \ + v_.val[0] = v0.u8; v_.val[1] = v1.u8; \ + vst2q_u8(p, v_); \ +} +#define gvst4_4_inc_u32(v0, v1, v2, v3, p, i) { \ + uint32x2x4_t v_; \ + v_.val[0] = v0.u32; v_.val[1] = v1.u32; v_.val[2] = v2.u32; v_.val[3] = v3.u32; \ + vst4_u32(p, v_); p += (i) / sizeof(*p); \ +} +#define gvst4_pi_u16(v0, v1, v2, v3, p) { \ + uint16x4x4_t v_; \ + v_.val[0] = v0.u16; v_.val[1] = v1.u16; v_.val[2] = v2.u16; v_.val[3] = v3.u16; \ + vst4_u16((u16 *)(p), v_); p += sizeof(v_) / sizeof(*p); \ +} +#define gvst1q_pi_u32(v, p) \ + gvst1q_inc_u32(v, p, sizeof(v)) +// could use vst1q_u32_x2 but that's not always available +#define gvst1q_2_pi_u32(v0, v1, p) { \ + gvst1q_inc_u32(v0, p, sizeof(v0)); \ + gvst1q_inc_u32(v1, p, sizeof(v1)); \ +} + +/* notes: + - gcc > 9: (arm32) int64x1_t type produces ops on gp regs + (also u64 __attribute__((vector_size(8))) :( ) + - gcc <11: (arm32) handles ' == 0' poorly +*/ + +/* +#elif defined(__SSE2__) +#include +*/ +#else +#error "arch not supported or SIMD support was not enabled by your compiler" +#endif + +// the below have intrinsics but they evaluate to basic operations on both gcc and clang +#define gvadd_s64(d, a, b) d.s64 = a.s64 + b.s64 +#define gvadd_u8(d, a, b) d.u8 = a.u8 + b.u8 +#define gvadd_u16(d, a, b) d.u16 = a.u16 + b.u16 +#define gvadd_u32(d, a, b) d.u32 = a.u32 + b.u32 +#define gvaddq_s64 gvadd_s64 +#define gvaddq_u16 gvadd_u16 +#define gvaddq_u32 gvadd_u32 +#define gvand(d, a, b) d.u32 = a.u32 & b.u32 +#define gvbic(d, a, b) d.u32 = a.u32 & ~b.u32 +#define gvbicq gvbic +#define gveor(d, a, b) d.u32 = a.u32 ^ b.u32 +#define gveorq gveor +#define gvceqz_u16(d, s) d.u16 = s.u16 == 0 +#define gvceqzq_u16 gvceqz_u16 +#define gvcltz_s16(d, s) d.s16 = s.s16 < 0 +#define gvcltzq_s16 gvcltz_s16 +#define gvsub_u16(d, a, b) d.u16 = a.u16 - b.u16 +#define gvsub_u32(d, a, b) d.u32 = a.u32 - b.u32 +#define gvsubq_u16 gvsub_u16 +#define gvsubq_u32 gvsub_u32 +#define gvorr(d, a, b) d.u32 = a.u32 | b.u32 +#define gvorrq gvorr + +#if defined(__arm__) + +#define gssub16(d, a, b) asm("ssub16 %0,%1,%2" : "=r"(d) : "r"(a), "r"(b)) +#define gsmusdx(d, a, b) asm("smusdx %0,%1,%2" : "=r"(d) : "r"(a), "r"(b)) + +#if 0 +// gcc/config/arm/arm.c +#undef gvadd_s64 +#define gvadd_s64(d, a, b) asm("vadd.i64 %P0,%P1,%P2" : "=w"(d.s64) : "w"(a.s64), "w"(b.s64)) +#endif + +#else + +#define gssub16(d, a, b) d = (u16)((a) - (b)) | ((((a) >> 16) - ((b) >> 16)) << 16) +#define gsmusdx(d, a, b) d = ((s32)(s16)(a) * ((s32)(b) >> 16)) \ + - (((s32)(a) >> 16) * (s16)(b)) + +#endif + +// for compatibility with the original psx_gpu.c code +#define vec_2x64s gvreg +#define vec_2x64u gvreg +#define vec_4x32s gvreg +#define vec_4x32u gvreg +#define vec_8x16s gvreg +#define vec_8x16u gvreg +#define vec_16x8s gvreg +#define vec_16x8u gvreg +#define vec_1x64s gvhreg +#define vec_1x64u gvhreg +#define vec_2x32s gvhreg +#define vec_2x32u gvhreg +#define vec_4x16s gvhreg +#define vec_4x16u gvhreg +#define vec_8x8s gvhreg +#define vec_8x8u gvhreg + +#if 0 +#include +#include +#include +static int ccount; +void cmpp(const char *name, const void *a_, const void *b_, size_t len) +{ + const uint32_t *a = a_, *b = b_, masks[] = { 0, 0xff, 0xffff, 0xffffff }; + size_t i, left; + uint32_t mask; + for (i = 0; i < (len + 3)/4; i++) { + left = len - i*4; + mask = left >= 4 ? ~0u : masks[left]; + if ((a[i] ^ b[i]) & mask) { + printf("%s: %08x %08x [%03zx/%zu] #%d\n", + name, a[i] & mask, b[i] & mask, i*4, i, ccount); + exit(1); + } + } + ccount++; +} +#define ccmpf(n) cmpp(#n, &psx_gpu->n, &n##_c, sizeof(n##_c)) +#define ccmpa(n,c) cmpp(#n, &psx_gpu->n, &n##_c, sizeof(n##_c[0]) * c) + +void dump_r_(const char *name, void *dump, int is_q) +{ + unsigned long long *u = dump; + //if (ccount > 1) return; + printf("%10s %016llx ", name, u[0]); + if (is_q) + printf("%016llx", u[1]); + puts(""); +} +void __attribute__((noinline,noclone)) dump_r_d(const char *name, void *dump) +{ dump_r_(name, dump, 0); } +void __attribute__((noinline,noclone)) dump_r_q(const char *name, void *dump) +{ dump_r_(name, dump, 1); } +#define dumprd(n) { u8 dump_[8]; gvst1_u8(n, dump_); dump_r_d(#n, dump_); } +#define dumprq(n) { u16 dump_[8]; gvst1q_u16(n, dump_); dump_r_q(#n, dump_); } +#endif + +void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, + const vertex_struct * __restrict__ a, const vertex_struct * __restrict__ b, + const vertex_struct * __restrict__ c) +{ + union { double d; struct { u32 l; u32 h; } i; } divident, divider; + union { double d; gvhreg v; } d30; + +#if 0 + compute_all_gradients_(psx_gpu, a, b, c); + return; +#endif + // First compute the triangle area reciprocal and shift. The division will + // happen concurrently with much of the work which follows. + + // load exponent of 62 into upper half of double + u32 shift = __builtin_clz(psx_gpu->triangle_area); + u32 triangle_area_normalized = psx_gpu->triangle_area << shift; + + // load area normalized into lower half of double + divident.i.l = triangle_area_normalized >> 10; + divident.i.h = (62 + 1023) << 20; + + divider.i.l = triangle_area_normalized << 20; + divider.i.h = ((1022 + 31) << 20) + (triangle_area_normalized >> 11); + + d30.d = divident.d / divider.d; // d30 = ((1 << 62) + ta_n) / ta_n + + // ((x1 - x0) * (y2 - y1)) - ((x2 - x1) * (y1 - y0)) = + // ( d0 * d1 ) - ( d2 * d3 ) = + // ( m0 ) - ( m1 ) = gradient + + // This is split to do 12 elements at a time over three sets: a, b, and c. + // Technically we only need to do 10 elements (uvrgb_x and uvrgb_y), so + // two of the slots are unused. + + // Inputs are all 16-bit signed. The m0/m1 results are 32-bit signed, as + // is g. + + // First type is: uvrg bxxx xxxx + // Second type is: yyyy ybyy uvrg + // Since x_a and y_c are the same the same variable is used for both. + + gvreg v0; + gvreg v1; + gvreg v2; + gvreg uvrg_xxxx0; + gvreg uvrg_xxxx1; + gvreg uvrg_xxxx2; + + gvreg y0_ab; + gvreg y1_ab; + gvreg y2_ab; + + gvreg d0_ab; + gvreg d1_ab; + gvreg d2_ab; + gvreg d3_ab; + + gvreg ga_uvrg_x; + gvreg ga_uvrg_y; + gvreg gw_rg_x; + gvreg gw_rg_y; + gvreg w_mask; + gvreg r_shift; + gvreg uvrg_dx2, uvrg_dx3; + gvreg uvrgb_phase; + gvhreg zero, tmp_lo, tmp_hi; + + gvld1q_u8(v0, (u8 *)a); // v0 = { uvrg0, b0, x0, y0 } + gvld1q_u8(v1, (u8 *)b); // v1 = { uvrg1, b1, x1, y1 } + gvld1q_u8(v2, (u8 *)c); // v2 = { uvrg2, b2, x2, y2 } + + gvmovl_u8(uvrg_xxxx0, gvlo(v0)); // uvrg_xxxx0 = { uv0, rg0, b0-, -- } + gvmovl_u8(uvrg_xxxx1, gvlo(v1)); // uvrg_xxxx1 = { uv1, rg1, b1-, -- } + gvmovl_u8(uvrg_xxxx2, gvlo(v2)); // uvrg_xxxx2 = { uv2, rg2, b2-, -- } + + gvdup_l_u16(tmp_lo, gvhi(v0), 1); // yyyy0 = { yy0, yy0 } + gvcombine_u16(y0_ab, tmp_lo, gvlo(uvrg_xxxx0)); + + gvdup_l_u16(tmp_lo, gvhi(v0), 0); // xxxx0 = { xx0, xx0 } + gvset_hi(uvrg_xxxx0, tmp_lo); + + u32 x1_x2 = (u16)b->x | (c->x << 16); // x1_x2 = { x1, x2 } + u32 x0_x1 = (u16)a->x | (b->x << 16); // x0_x1 = { x0, x1 } + + gvdup_l_u16(tmp_lo, gvhi(v1), 1); // yyyy1 = { yy1, yy1 } + gvcombine_u16(y1_ab, tmp_lo, gvlo(uvrg_xxxx1)); + + gvdup_l_u16(tmp_lo, gvhi(v1), 0); // xxxx1 = { xx1, xx1 } + gvset_hi(uvrg_xxxx1, tmp_lo); + + gvdup_l_u16(tmp_lo, gvhi(v2), 1); // yyyy2 = { yy2, yy2 } + gvcombine_u16(y2_ab, tmp_lo, gvlo(uvrg_xxxx2)); + + gvdup_l_u16(tmp_lo, gvhi(v2), 0); // xxxx2 = { xx2, xx2 } + gvset_hi(uvrg_xxxx2, tmp_lo); + + u32 y0_y1 = (u16)a->y | (b->y << 16); // y0_y1 = { y0, y1 } + u32 y1_y2 = (u16)b->y | (c->y << 16); // y1_y2 = { y1, y2 } + + gvsubq_u16(d0_ab, uvrg_xxxx1, uvrg_xxxx0); + + u32 b1_b2 = b->b | (c->b << 16); // b1_b2 = { b1, b2 } + + gvsubq_u16(d2_ab, uvrg_xxxx2, uvrg_xxxx1); + + gvsubq_u16(d1_ab, y2_ab, y1_ab); + + u32 b0_b1 = a->b | (b->b << 16); // b0_b1 = { b0, b1 } + + u32 dx, dy, db; + gssub16(dx, x1_x2, x0_x1); // dx = { x1 - x0, x2 - x1 } + gssub16(dy, y1_y2, y0_y1); // dy = { y1 - y0, y2 - y1 } + gssub16(db, b1_b2, b0_b1); // db = { b1 - b0, b2 - b1 } + + u32 ga_by, ga_bx; + gvsubq_u16(d3_ab, y1_ab, y0_ab); + gsmusdx(ga_by, dx, db); // ga_by = ((x1 - x0) * (b2 - b1)) - + // ((x2 - X1) * (b1 - b0)) + gvmull_s16(ga_uvrg_x, gvlo(d0_ab), gvlo(d1_ab)); + gsmusdx(ga_bx, db, dy); // ga_bx = ((b1 - b0) * (y2 - y1)) - + // ((b2 - b1) * (y1 - y0)) + gvmlsl_s16(ga_uvrg_x, gvlo(d2_ab), gvlo(d3_ab)); + u32 gs_bx = (s32)ga_bx >> 31; // movs + + gvmull_s16(ga_uvrg_y, gvhi(d0_ab), gvhi(d1_ab)); + if ((s32)gs_bx < 0) ga_bx = -ga_bx; // rsbmi + + gvmlsl_s16(ga_uvrg_y, gvhi(d2_ab), gvhi(d3_ab)); + u32 gs_by = (s32)ga_by >> 31; // movs + + gvhreg d0; + gvshr_n_u64(d0, d30.v, 22); // note: on "d30 >> 22" gcc generates junk code + + gvdupq_n_u32(uvrgb_phase, psx_gpu->uvrgb_phase); + u32 b_base = psx_gpu->uvrgb_phase + (a->b << 16); + + if ((s32)gs_by < 0) ga_by = -ga_by; // rsbmi + gvreg gs_uvrg_x, gs_uvrg_y; + gs_uvrg_x.s32 = ga_uvrg_x.s32 < 0; // gs_uvrg_x = ga_uvrg_x < 0 + gs_uvrg_y.s32 = ga_uvrg_y.s32 < 0; // gs_uvrg_y = ga_uvrg_y < 0 + + gvdupq_n_u32(w_mask, -psx_gpu->triangle_winding); // w_mask = { -w, -w, -w, -w } + shift -= 62 - 12; // shift -= (62 - FIXED_BITS) + + gvreg uvrg_base; + gvshll_n_u16(uvrg_base, gvlo(uvrg_xxxx0), 16); // uvrg_base = uvrg0 << 16 + gvdupq_n_u32(r_shift, shift); // r_shift = { shift, shift, shift, shift } + + gvaddq_u32(uvrg_base, uvrg_base, uvrgb_phase); + gvabsq_s32(ga_uvrg_x, ga_uvrg_x); // ga_uvrg_x = abs(ga_uvrg_x) + + u32 area_r_s = d0.u32[0]; // area_r_s = triangle_reciprocal + gvabsq_s32(ga_uvrg_y, ga_uvrg_y); // ga_uvrg_y = abs(ga_uvrg_y) + + gvmull_l_u32(gw_rg_x, gvhi(ga_uvrg_x), d0, 0); + gvmull_l_u32(ga_uvrg_x, gvlo(ga_uvrg_x), d0, 0); + gvmull_l_u32(gw_rg_y, gvhi(ga_uvrg_y), d0, 0); + gvmull_l_u32(ga_uvrg_y, gvlo(ga_uvrg_y), d0, 0); + + gvshlq_u64(gw_rg_x, gw_rg_x, r_shift); + gvshlq_u64(ga_uvrg_x, ga_uvrg_x, r_shift); + gvshlq_u64(gw_rg_y, gw_rg_y, r_shift); + gvshlq_u64(ga_uvrg_y, ga_uvrg_y, r_shift); + + gveorq(gs_uvrg_x, gs_uvrg_x, w_mask); + gvmovn_u64(tmp_lo, ga_uvrg_x); + + gveorq(gs_uvrg_y, gs_uvrg_y, w_mask); + gvmovn_u64(tmp_hi, gw_rg_x); + + gvcombine_u32(ga_uvrg_x, tmp_lo, tmp_hi); + + gveorq(ga_uvrg_x, ga_uvrg_x, gs_uvrg_x); + gvmovn_u64(tmp_lo, ga_uvrg_y); + + gvsubq_u32(ga_uvrg_x, ga_uvrg_x, gs_uvrg_x); + gvmovn_u64(tmp_hi, gw_rg_y); + + gvcombine_u32(ga_uvrg_y, tmp_lo, tmp_hi); + + gveorq(ga_uvrg_y, ga_uvrg_y, gs_uvrg_y); + ga_bx = ga_bx << 13; + + gvsubq_u32(ga_uvrg_y, ga_uvrg_y, gs_uvrg_y); + ga_by = ga_by << 13; + + u32 gw_bx_h, gw_by_h; + gw_bx_h = (u64)ga_bx * area_r_s >> 32; + + gvshlq_n_u32(ga_uvrg_x, ga_uvrg_x, 4); + gvshlq_n_u32(ga_uvrg_y, ga_uvrg_y, 4); + + gw_by_h = (u64)ga_by * area_r_s >> 32; + gvdup_n_u32(tmp_lo, a->x); + gvmlsq_l_s32(uvrg_base, ga_uvrg_x, tmp_lo, 0); + + gs_bx = gs_bx ^ -psx_gpu->triangle_winding; + gvaddq_u32(uvrg_dx2, ga_uvrg_x, ga_uvrg_x); + + gs_by = gs_by ^ -psx_gpu->triangle_winding; + + u32 r11 = -shift; // r11 = negative shift for scalar lsr + u32 *store_a = psx_gpu->uvrg.e; + r11 = r11 - (32 - 13); + u32 *store_b = store_a + 16 / sizeof(u32); + + gvaddq_u32(uvrg_dx3, uvrg_dx2, ga_uvrg_x); + gvst1q_inc_u32(uvrg_base, store_a, 32); + + gvst1q_inc_u32(ga_uvrg_x, store_b, 32); + u32 g_bx = (u32)gw_bx_h >> r11; + + gvst1q_inc_u32(ga_uvrg_y, store_a, 32); + u32 g_by = (u32)gw_by_h >> r11; + + gvdup_n_u32(zero, 0); + + gvst4_4_inc_u32(zero, gvlo(ga_uvrg_x), gvlo(uvrg_dx2), gvlo(uvrg_dx3), store_b, 32); + g_bx = g_bx ^ gs_bx; + + gvst4_4_inc_u32(zero, gvhi(ga_uvrg_x), gvhi(uvrg_dx2), gvhi(uvrg_dx3), store_b, 32); + g_bx = g_bx - gs_bx; + + g_bx = g_bx << 4; + g_by = g_by ^ gs_by; + + b_base -= g_bx * a->x; + g_by = g_by - gs_by; + + g_by = g_by << 4; + + u32 g_bx2 = g_bx + g_bx; + u32 g_bx3 = g_bx + g_bx2; + + // 112 + store_b[0] = 0; + store_b[1] = g_bx; + store_b[2] = g_bx2; + store_b[3] = g_bx3; + store_b[4] = b_base; + store_b[5] = g_by; // 132 +} + +#define setup_spans_debug_check(span_edge_data_element) \ + +#define setup_spans_prologue_alternate_yes() \ + vec_2x64s alternate_x; \ + vec_2x64s alternate_dx_dy; \ + vec_4x32s alternate_x_32; \ + vec_2x32s alternate_x_16; \ + \ + vec_4x16u alternate_select; \ + vec_4x16s y_mid_point; \ + \ + s32 y_b = v_b->y; \ + s64 edge_alt; \ + s32 edge_dx_dy_alt; \ + u32 edge_shift_alt \ + +#define setup_spans_prologue_alternate_no() \ + +#define setup_spans_prologue(alternate_active) \ + edge_data_struct *span_edge_data; \ + vec_4x32u *span_uvrg_offset; \ + u32 *span_b_offset; \ + \ + s32 clip; \ + vec_4x32u v_clip; \ + \ + union { vec_2x64s full; vec_1x64s h[2]; } edges_xy; \ + vec_2x32s edges_dx_dy; \ + vec_2x32u edge_shifts; \ + \ + vec_2x64s left_x, right_x; \ + vec_2x64s left_dx_dy, right_dx_dy; \ + vec_4x32s left_x_32, right_x_32; \ + vec_2x32s left_x_32_lo, right_x_32_lo; \ + vec_2x32s left_x_32_hi, right_x_32_hi; \ + vec_4x16s left_right_x_16_lo, left_right_x_16_hi; \ + vec_4x16s y_x4; \ + vec_8x16s left_edge; \ + vec_8x16s right_edge; \ + vec_4x16u span_shift; \ + \ + vec_2x32u c_0x01; \ + vec_4x16u c_0x04; \ + vec_4x16u c_0xFFFE; \ + vec_4x16u c_0x07; \ + \ + vec_2x32s x_starts; \ + vec_2x32s x_ends; \ + \ + s32 x_a = v_a->x; \ + s32 x_b = v_b->x; \ + s32 x_c = v_c->x; \ + s32 y_a = v_a->y; \ + s32 y_c = v_c->y; \ + \ + vec_4x32u uvrg; \ + vec_4x32u uvrg_dy; \ + u32 b = psx_gpu->b; \ + u32 b_dy = psx_gpu->b_dy; \ + const u32 *reciprocal_table = psx_gpu->reciprocal_table_ptr; \ + \ + gvld1q_u32(uvrg, psx_gpu->uvrg.e); \ + gvld1q_u32(uvrg_dy, psx_gpu->uvrg_dy.e); \ + gvdup_n_u32(c_0x01, 0x01); \ + setup_spans_prologue_alternate_##alternate_active() \ + +#define setup_spans_prologue_b() \ + span_edge_data = psx_gpu->span_edge_data; \ + span_uvrg_offset = (vec_4x32u *)psx_gpu->span_uvrg_offset; \ + span_b_offset = psx_gpu->span_b_offset; \ + \ + vec_8x16u c_0x0001; \ + \ + gvdupq_n_u16(c_0x0001, 0x0001); \ + gvdupq_n_u16(left_edge, psx_gpu->viewport_start_x); \ + gvdupq_n_u16(right_edge, psx_gpu->viewport_end_x); \ + gvaddq_u16(right_edge, right_edge, c_0x0001); \ + gvdup_n_u16(c_0x04, 0x04); \ + gvdup_n_u16(c_0x07, 0x07); \ + gvdup_n_u16(c_0xFFFE, 0xFFFE); \ + + +#define compute_edge_delta_x2() \ +{ \ + vec_2x32s heights; \ + vec_2x32s height_reciprocals; \ + vec_2x32s heights_b; \ + vec_2x32u widths; \ + \ + u32 edge_shift = reciprocal_table[height]; \ + \ + gvdup_n_u32(heights, height); \ + gvsub_u32(widths, x_ends, x_starts); \ + \ + gvdup_n_u32(edge_shifts, edge_shift); \ + gvsub_u32(heights_b, heights, c_0x01); \ + gvshr_n_u32(height_reciprocals, edge_shifts, 10); \ + \ + gvmla_s32(heights_b, x_starts, heights); \ + gvbic_n_u16(edge_shifts, 0xE0); \ + gvmul_s32(edges_dx_dy, widths, height_reciprocals); \ + gvmull_s32(edges_xy.full, heights_b, height_reciprocals); \ +} \ + +#define compute_edge_delta_x3(start_c, height_a, height_b) \ +{ \ + vec_2x32s heights; \ + vec_2x32s height_reciprocals; \ + vec_2x32s heights_b; \ + vec_2x32u widths; \ + \ + u32 width_alt; \ + s32 height_b_alt; \ + u32 height_reciprocal_alt; \ + \ + gvcreate_u32(heights, height_a, height_b); \ + gvcreate_u32(edge_shifts, reciprocal_table[height_a], reciprocal_table[height_b]); \ + \ + edge_shift_alt = reciprocal_table[height_minor_b]; \ + \ + gvsub_u32(widths, x_ends, x_starts); \ + width_alt = x_c - start_c; \ + \ + gvshr_n_u32(height_reciprocals, edge_shifts, 10); \ + height_reciprocal_alt = edge_shift_alt >> 10; \ + \ + gvbic_n_u16(edge_shifts, 0xE0); \ + edge_shift_alt &= 0x1F; \ + \ + gvsub_u32(heights_b, heights, c_0x01); \ + height_b_alt = height_minor_b - 1; \ + \ + gvmla_s32(heights_b, x_starts, heights); \ + height_b_alt += height_minor_b * start_c; \ + \ + gvmull_s32(edges_xy.full, heights_b, height_reciprocals); \ + edge_alt = (s64)height_b_alt * height_reciprocal_alt; \ + \ + gvmul_s32(edges_dx_dy, widths, height_reciprocals); \ + edge_dx_dy_alt = width_alt * height_reciprocal_alt; \ +} \ + + +#define setup_spans_adjust_y_up() \ + gvsub_u32(y_x4, y_x4, c_0x04) \ + +#define setup_spans_adjust_y_down() \ + gvadd_u32(y_x4, y_x4, c_0x04) \ + +#define setup_spans_adjust_interpolants_up() \ + gvsubq_u32(uvrg, uvrg, uvrg_dy); \ + b -= b_dy \ + +#define setup_spans_adjust_interpolants_down() \ + gvaddq_u32(uvrg, uvrg, uvrg_dy); \ + b += b_dy \ + + +#define setup_spans_clip_interpolants_increment() \ + gvmlaq_s32(uvrg, uvrg_dy, v_clip); \ + b += b_dy * clip \ + +#define setup_spans_clip_interpolants_decrement() \ + gvmlsq_s32(uvrg, uvrg_dy, v_clip); \ + b -= b_dy * clip \ + +#define setup_spans_clip_alternate_yes() \ + edge_alt += edge_dx_dy_alt * (s64)(clip) \ + +#define setup_spans_clip_alternate_no() \ + +#define setup_spans_clip(direction, alternate_active) \ +{ \ + gvdupq_n_u32(v_clip, clip); \ + gvmlal_s32(edges_xy.full, edges_dx_dy, gvlo(v_clip)); \ + setup_spans_clip_alternate_##alternate_active(); \ + setup_spans_clip_interpolants_##direction(); \ +} \ + + +#define setup_spans_adjust_edges_alternate_no(left_index, right_index) \ +{ \ + vec_2x64s edge_shifts_64; \ + union { vec_2x64s full; vec_1x64s h[2]; } edges_dx_dy_64; \ + vec_1x64s left_x_hi, right_x_hi; \ + \ + gvmovl_s32(edge_shifts_64, edge_shifts); \ + gvshlq_s64(edges_xy.full, edges_xy.full, edge_shifts_64); \ + \ + gvmovl_s32(edges_dx_dy_64.full, edges_dx_dy); \ + gvshlq_s64(edges_dx_dy_64.full, edges_dx_dy_64.full, edge_shifts_64); \ + \ + gvdupq_l_s64(left_x, edges_xy.h[left_index], 0); \ + gvdupq_l_s64(right_x, edges_xy.h[right_index], 0); \ + \ + gvdupq_l_s64(left_dx_dy, edges_dx_dy_64.h[left_index], 0); \ + gvdupq_l_s64(right_dx_dy, edges_dx_dy_64.h[right_index], 0); \ + \ + gvadd_s64(left_x_hi, gvlo(left_x), gvlo(left_dx_dy)); \ + gvadd_s64(right_x_hi, gvlo(right_x), gvlo(right_dx_dy)); \ + \ + gvset_hi(left_x, left_x_hi); \ + gvset_hi(right_x, right_x_hi); \ + \ + gvaddq_s64(left_dx_dy, left_dx_dy, left_dx_dy); \ + gvaddq_s64(right_dx_dy, right_dx_dy, right_dx_dy); \ +} \ + +#define setup_spans_adjust_edges_alternate_yes(left_index, right_index) \ +{ \ + setup_spans_adjust_edges_alternate_no(left_index, right_index); \ + s64 edge_dx_dy_alt_64; \ + vec_1x64s alternate_x_hi; \ + \ + gvdup_n_u16(y_mid_point, y_b); \ + \ + edge_alt <<= edge_shift_alt; \ + edge_dx_dy_alt_64 = (s64)edge_dx_dy_alt << edge_shift_alt; \ + \ + gvdupq_n_s64(alternate_x, edge_alt); \ + gvdupq_n_s64(alternate_dx_dy, edge_dx_dy_alt_64); \ + \ + gvadd_s64(alternate_x_hi, gvlo(alternate_x), gvlo(alternate_dx_dy)); \ + gvaddq_s64(alternate_dx_dy, alternate_dx_dy, alternate_dx_dy); \ + gvset_hi(alternate_x, alternate_x_hi); \ +} \ + + +#define setup_spans_y_select_up() \ + gvclt_s16(alternate_select, y_x4, y_mid_point) \ + +#define setup_spans_y_select_down() \ + gvcgt_s16(alternate_select, y_x4, y_mid_point) \ + +#define setup_spans_y_select_alternate_yes(direction) \ + setup_spans_y_select_##direction() \ + +#define setup_spans_y_select_alternate_no(direction) \ + +#define setup_spans_alternate_select_left() \ + gvbit(left_right_x_16_lo, alternate_x_16, alternate_select); \ + +#define setup_spans_alternate_select_right() \ + gvbit(left_right_x_16_hi, alternate_x_16, alternate_select); \ + +#define setup_spans_alternate_select_none() \ + +#define setup_spans_increment_alternate_yes() \ +{ \ + vec_2x32s alternate_x_32_lo, alternate_x_32_hi; \ + gvshrn_n_s64(alternate_x_32_lo, alternate_x, 32); \ + gvaddq_s64(alternate_x, alternate_x, alternate_dx_dy); \ + gvshrn_n_s64(alternate_x_32_hi, alternate_x, 32); \ + gvaddq_s64(alternate_x, alternate_x, alternate_dx_dy); \ + gvcombine_u32(alternate_x_32, alternate_x_32_lo, alternate_x_32_hi); \ + gvmovn_u32(alternate_x_16, alternate_x_32); \ +} \ + +#define setup_spans_increment_alternate_no() \ + +#define setup_spans_set_x4(alternate, direction, alternate_active) \ +{ \ + gvst1q_pi_u32(uvrg, span_uvrg_offset); \ + *span_b_offset++ = b; \ + setup_spans_adjust_interpolants_##direction(); \ + \ + gvst1q_pi_u32(uvrg, span_uvrg_offset); \ + *span_b_offset++ = b; \ + setup_spans_adjust_interpolants_##direction(); \ + \ + gvst1q_pi_u32(uvrg, span_uvrg_offset); \ + *span_b_offset++ = b; \ + setup_spans_adjust_interpolants_##direction(); \ + \ + gvst1q_pi_u32(uvrg, span_uvrg_offset); \ + *span_b_offset++ = b; \ + setup_spans_adjust_interpolants_##direction(); \ + \ + gvshrn_n_s64(left_x_32_lo, left_x, 32); \ + gvshrn_n_s64(right_x_32_lo, right_x, 32); \ + \ + gvaddq_s64(left_x, left_x, left_dx_dy); \ + gvaddq_s64(right_x, right_x, right_dx_dy); \ + \ + gvshrn_n_s64(left_x_32_hi, left_x, 32); \ + gvshrn_n_s64(right_x_32_hi, right_x, 32); \ + \ + gvaddq_s64(left_x, left_x, left_dx_dy); \ + gvaddq_s64(right_x, right_x, right_dx_dy); \ + \ + gvcombine_s64(left_x_32, left_x_32_lo, left_x_32_hi); \ + gvcombine_s64(right_x_32, right_x_32_lo, right_x_32_hi); \ + \ + gvmovn_u32(left_right_x_16_lo, left_x_32); \ + gvmovn_u32(left_right_x_16_hi, right_x_32); \ + \ + setup_spans_increment_alternate_##alternate_active(); \ + setup_spans_y_select_alternate_##alternate_active(direction); \ + setup_spans_alternate_select_##alternate(); \ + \ + gvmax_s16(left_right_x_16_lo, left_right_x_16_lo, gvlo(left_edge)); \ + gvmax_s16(left_right_x_16_hi, left_right_x_16_hi, gvhi(left_edge)); \ + gvmin_s16(left_right_x_16_lo, left_right_x_16_lo, gvlo(right_edge)); \ + gvmin_s16(left_right_x_16_hi, left_right_x_16_hi, gvhi(right_edge)); \ + \ + gvsub_u16(left_right_x_16_hi, left_right_x_16_hi, left_right_x_16_lo); \ + gvadd_u16(left_right_x_16_hi, left_right_x_16_hi, c_0x07); \ + gvand(span_shift, left_right_x_16_hi, c_0x07); \ + gvshl_u16(span_shift, c_0xFFFE, span_shift); \ + gvshr_n_u16(left_right_x_16_hi, left_right_x_16_hi, 3); \ + \ + gvst4_pi_u16(left_right_x_16_lo, left_right_x_16_hi, span_shift, y_x4, \ + span_edge_data); \ + \ + setup_spans_adjust_y_##direction(); \ +} \ + + +#define setup_spans_alternate_adjust_yes() \ + edge_alt -= edge_dx_dy_alt * (s64)height_minor_a \ + +#define setup_spans_alternate_adjust_no() \ + + +#define setup_spans_down(left_index, right_index, alternate, alternate_active) \ + setup_spans_alternate_adjust_##alternate_active(); \ + if(y_c > psx_gpu->viewport_end_y) \ + height -= y_c - psx_gpu->viewport_end_y - 1; \ + \ + clip = psx_gpu->viewport_start_y - y_a; \ + if(clip > 0) \ + { \ + height -= clip; \ + y_a += clip; \ + setup_spans_clip(increment, alternate_active); \ + } \ + \ + setup_spans_prologue_b(); \ + \ + if(height > 0) \ + { \ + u64 y_x4_ = ((u64)(y_a + 3) << 48) | ((u64)(u16)(y_a + 2) << 32) \ + | (u32)((y_a + 1) << 16) | (u16)y_a; \ + gvcreate_u64(y_x4, y_x4_); \ + setup_spans_adjust_edges_alternate_##alternate_active(left_index, \ + right_index); \ + \ + psx_gpu->num_spans = height; \ + do \ + { \ + setup_spans_set_x4(alternate, down, alternate_active); \ + height -= 4; \ + } while(height > 0); \ + } \ + + +#define setup_spans_alternate_pre_increment_yes() \ + edge_alt += edge_dx_dy_alt \ + +#define setup_spans_alternate_pre_increment_no() \ + +#define setup_spans_up_decrement_height_yes() \ + height-- \ + +#define setup_spans_up_decrement_height_no() \ + {} \ + +#define setup_spans_up(left_index, right_index, alternate, alternate_active) \ + setup_spans_alternate_adjust_##alternate_active(); \ + y_a--; \ + \ + if(y_c < psx_gpu->viewport_start_y) \ + height -= psx_gpu->viewport_start_y - y_c; \ + else \ + setup_spans_up_decrement_height_##alternate_active(); \ + \ + clip = y_a - psx_gpu->viewport_end_y; \ + if(clip > 0) \ + { \ + height -= clip; \ + y_a -= clip; \ + setup_spans_clip(decrement, alternate_active); \ + } \ + \ + setup_spans_prologue_b(); \ + \ + if(height > 0) \ + { \ + u64 y_x4_ = ((u64)(y_a - 3) << 48) | ((u64)(u16)(y_a - 2) << 32) \ + | (u32)((y_a - 1) << 16) | (u16)y_a; \ + gvcreate_u64(y_x4, y_x4_); \ + gvaddw_s32(edges_xy.full, edges_xy.full, edges_dx_dy); \ + setup_spans_alternate_pre_increment_##alternate_active(); \ + setup_spans_adjust_edges_alternate_##alternate_active(left_index, \ + right_index); \ + setup_spans_adjust_interpolants_up(); \ + \ + psx_gpu->num_spans = height; \ + while(height > 0) \ + { \ + setup_spans_set_x4(alternate, up, alternate_active); \ + height -= 4; \ + } \ + } \ + +#define index_left 0 +#define index_right 1 + +#define setup_spans_up_up(minor, major) \ + setup_spans_prologue(yes); \ + s32 height_minor_a = y_a - y_b; \ + s32 height_minor_b = y_b - y_c; \ + s32 height = y_a - y_c; \ + \ + gvdup_n_u32(x_starts, x_a); \ + gvcreate_u32(x_ends, x_c, x_b); \ + \ + compute_edge_delta_x3(x_b, height, height_minor_a); \ + setup_spans_up(index_##major, index_##minor, minor, yes) \ + +void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c) +{ +#if 0 + setup_spans_up_left_(psx_gpu, v_a, v_b, v_c); + return; +#endif + setup_spans_up_up(left, right) +} + +void setup_spans_up_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c) +{ +#if 0 + setup_spans_up_right_(psx_gpu, v_a, v_b, v_c); + return; +#endif + setup_spans_up_up(right, left) +} + +#define setup_spans_down_down(minor, major) \ + setup_spans_prologue(yes); \ + s32 height_minor_a = y_b - y_a; \ + s32 height_minor_b = y_c - y_b; \ + s32 height = y_c - y_a; \ + \ + gvdup_n_u32(x_starts, x_a); \ + gvcreate_u32(x_ends, x_c, x_b); \ + \ + compute_edge_delta_x3(x_b, height, height_minor_a); \ + setup_spans_down(index_##major, index_##minor, minor, yes) \ + +void setup_spans_down_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c) +{ +#if 0 + setup_spans_down_left_(psx_gpu, v_a, v_b, v_c); + return; +#endif + setup_spans_down_down(left, right) +} + +void setup_spans_down_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c) +{ +#if 0 + setup_spans_down_right_(psx_gpu, v_a, v_b, v_c); + return; +#endif + setup_spans_down_down(right, left) +} + +#define setup_spans_up_flat() \ + s32 height = y_a - y_c; \ + \ + compute_edge_delta_x2(); \ + setup_spans_up(index_left, index_right, none, no) \ + +void setup_spans_up_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c) +{ +#if 0 + setup_spans_up_a_(psx_gpu, v_a, v_b, v_c); + return; +#endif + setup_spans_prologue(no); + + gvcreate_u32(x_starts, x_a, x_b); + gvdup_n_u32(x_ends, x_c); + + setup_spans_up_flat() +} + +void setup_spans_up_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c) +{ +#if 0 + setup_spans_up_b_(psx_gpu, v_a, v_b, v_c); + return; +#endif + setup_spans_prologue(no); + + gvdup_n_u32(x_starts, x_a); + gvcreate_u32(x_ends, x_b, x_c); + + setup_spans_up_flat() +} + +#define setup_spans_down_flat() \ + s32 height = y_c - y_a; \ + \ + compute_edge_delta_x2(); \ + setup_spans_down(index_left, index_right, none, no) \ + +void setup_spans_down_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c) +{ +#if 0 + setup_spans_down_a_(psx_gpu, v_a, v_b, v_c); + return; +#endif + setup_spans_prologue(no); + + gvcreate_u32(x_starts, x_a, x_b); + gvdup_n_u32(x_ends, x_c); + + setup_spans_down_flat() +} + +void setup_spans_down_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c) +{ +#if 0 + setup_spans_down_b_(psx_gpu, v_a, v_b, v_c); + return; +#endif + setup_spans_prologue(no) + + gvdup_n_u32(x_starts, x_a); + gvcreate_u32(x_ends, x_b, x_c); + + setup_spans_down_flat() +} + +void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, + vertex_struct *v_b, vertex_struct *v_c) +{ +#if 0 + setup_spans_up_down_(psx_gpu, v_a, v_b, v_c); + return; +#endif + setup_spans_prologue(no); + + s32 y_b = v_b->y; + s64 edge_alt; + s32 edge_dx_dy_alt; + u32 edge_shift_alt; + + s32 middle_y = y_a; + s32 height_minor_a = y_a - y_b; + s32 height_minor_b = y_c - y_a; + s32 height_major = y_c - y_b; + + vec_2x64s edges_xy_b; + vec_1x64s edges_xy_b_left; + vec_2x32s edges_dx_dy_b; + vec_2x32u edge_shifts_b; + + vec_2x32s height_increment; + + gvcreate_u32(x_starts, x_a, x_c); + gvdup_n_u32(x_ends, x_b); + + compute_edge_delta_x3(x_a, height_minor_a, height_major); + + gvcreate_s32(height_increment, 0, height_minor_b); + + gvmlal_s32(edges_xy.full, edges_dx_dy, height_increment); + + gvcreate_s64(edges_xy_b_left, edge_alt); + gvcombine_s64(edges_xy_b, edges_xy_b_left, gvhi(edges_xy.full)); + + edge_shifts_b = edge_shifts; + gvmov_l_u32(edge_shifts_b, edge_shift_alt, 0); + + gvneg_s32(edges_dx_dy_b, edges_dx_dy); + gvmov_l_s32(edges_dx_dy_b, edge_dx_dy_alt, 0); + + y_a--; + + if(y_b < psx_gpu->viewport_start_y) + height_minor_a -= psx_gpu->viewport_start_y - y_b; + + clip = y_a - psx_gpu->viewport_end_y; + if(clip > 0) + { + height_minor_a -= clip; + y_a -= clip; + setup_spans_clip(decrement, no); + } + + setup_spans_prologue_b(); + + if(height_minor_a > 0) + { + u64 y_x4_ = ((u64)(y_a - 3) << 48) | ((u64)(u16)(y_a - 2) << 32) + | (u32)((y_a - 1) << 16) | (u16)y_a; + gvcreate_u64(y_x4, y_x4_); + gvaddw_s32(edges_xy.full, edges_xy.full, edges_dx_dy); + setup_spans_adjust_edges_alternate_no(index_left, index_right); + setup_spans_adjust_interpolants_up(); + + psx_gpu->num_spans = height_minor_a; + while(height_minor_a > 0) + { + setup_spans_set_x4(none, up, no); + height_minor_a -= 4; + } + + span_edge_data += height_minor_a; + span_uvrg_offset += height_minor_a; + span_b_offset += height_minor_a; + } + + edges_xy.full = edges_xy_b; + edges_dx_dy = edges_dx_dy_b; + edge_shifts = edge_shifts_b; + + gvld1q_u32(uvrg, psx_gpu->uvrg.e); + b = psx_gpu->b; + + y_a = middle_y; + + if(y_c > psx_gpu->viewport_end_y) + height_minor_b -= y_c - psx_gpu->viewport_end_y - 1; + + clip = psx_gpu->viewport_start_y - y_a; + if(clip > 0) + { + height_minor_b -= clip; + y_a += clip; + setup_spans_clip(increment, no); + } + + if(height_minor_b > 0) + { + u64 y_x4_ = ((u64)(y_a + 3) << 48) | ((u64)(u16)(y_a + 2) << 32) + | (u32)((y_a + 1) << 16) | (u16)y_a; + gvcreate_u64(y_x4, y_x4_); + setup_spans_adjust_edges_alternate_no(index_left, index_right); + + // FIXME: overflow corner case + if(psx_gpu->num_spans + height_minor_b == MAX_SPANS) + height_minor_b &= ~3; + + psx_gpu->num_spans += height_minor_b; + while(height_minor_b > 0) + { + setup_spans_set_x4(none, down, no); + height_minor_b -= 4; + } + } +} + + +#define dither_table_entry_normal(value) \ + (value) \ + +#define setup_blocks_load_msb_mask_indirect() \ + +#define setup_blocks_load_msb_mask_direct() \ + vec_8x16u msb_mask; \ + gvdupq_n_u16(msb_mask, psx_gpu->mask_msb); \ + +#define setup_blocks_variables_shaded_textured(target) \ + vec_4x32u u_block; \ + vec_4x32u v_block; \ + vec_4x32u r_block; \ + vec_4x32u g_block; \ + vec_4x32u b_block; \ + vec_4x32u uvrg_dx; \ + vec_4x32u uvrg_dx4; \ + vec_4x32u uvrg_dx8; \ + vec_4x32u uvrg; \ + vec_16x8u texture_mask; \ + vec_8x8u texture_mask_lo, texture_mask_hi; \ + u32 b_dx = psx_gpu->b_block_span.e[1]; \ + u32 b_dx4 = b_dx << 2; \ + u32 b_dx8 = b_dx << 3; \ + u32 b; \ + \ + gvld1q_u32(uvrg_dx, psx_gpu->uvrg_dx.e); \ + gvshlq_n_u32(uvrg_dx4, uvrg_dx, 2); \ + gvshlq_n_u32(uvrg_dx8, uvrg_dx, 3); \ + gvld2_dup(texture_mask_lo, texture_mask_hi, &psx_gpu->texture_mask_width); \ + gvcombine_u16(texture_mask, texture_mask_lo, texture_mask_hi) \ + +#define setup_blocks_variables_shaded_untextured(target) \ + vec_4x32u r_block; \ + vec_4x32u g_block; \ + vec_4x32u b_block; \ + vec_4x32u rgb_dx; \ + vec_2x32u rgb_dx_lo, rgb_dx_hi; \ + vec_4x32u rgb_dx4; \ + vec_4x32u rgb_dx8; \ + vec_4x32u rgb; \ + vec_2x32u rgb_lo, rgb_hi; \ + \ + vec_8x8u d64_0x07; \ + vec_8x8u d64_1; \ + vec_8x8u d64_4; \ + vec_8x8u d64_128; \ + \ + gvdup_n_u8(d64_0x07, 0x07); \ + gvdup_n_u8(d64_1, 1); \ + gvdup_n_u8(d64_4, 4); \ + gvdup_n_u8(d64_128, 128); \ + \ + gvld1_u32(rgb_dx_lo, &psx_gpu->uvrg_dx.e[2]); \ + gvcreate_u32(rgb_dx_hi, psx_gpu->b_block_span.e[1], 0); \ + gvcombine_u32(rgb_dx, rgb_dx_lo, rgb_dx_hi); \ + gvshlq_n_u32(rgb_dx4, rgb_dx, 2); \ + gvshlq_n_u32(rgb_dx8, rgb_dx, 3) \ + +#define setup_blocks_variables_unshaded_textured(target) \ + vec_4x32u u_block; \ + vec_4x32u v_block; \ + vec_2x32u uv_dx; \ + vec_2x32u uv_dx4; \ + vec_2x32u uv_dx8; \ + vec_2x32u uv; \ + vec_16x8u texture_mask; \ + vec_8x8u texture_mask_lo, texture_mask_hi; \ + \ + gvld1_u32(uv_dx, psx_gpu->uvrg_dx.e); \ + gvld1_u32(uv, psx_gpu->uvrg.e); \ + gvshl_n_u32(uv_dx4, uv_dx, 2); \ + gvshl_n_u32(uv_dx8, uv_dx, 3); \ + gvld2_dup(texture_mask_lo, texture_mask_hi, &psx_gpu->texture_mask_width); \ + gvcombine_u16(texture_mask, texture_mask_lo, texture_mask_hi) \ + +#define setup_blocks_variables_unshaded_untextured_direct() \ + gvorrq(colors, colors, msb_mask) \ + +#define setup_blocks_variables_unshaded_untextured_indirect() \ + +#define setup_blocks_variables_unshaded_untextured(target) \ + u32 color = psx_gpu->triangle_color; \ + vec_8x16u colors; \ + \ + u32 color_r = color & 0xFF; \ + u32 color_g = (color >> 8) & 0xFF; \ + u32 color_b = (color >> 16) & 0xFF; \ + \ + color = (color_r >> 3) | ((color_g >> 3) << 5) | \ + ((color_b >> 3) << 10); \ + gvdupq_n_u16(colors, color); \ + setup_blocks_variables_unshaded_untextured_##target() \ + +#define setup_blocks_span_initialize_dithered_textured() \ + vec_8x16u dither_offsets; \ + gvshll_n_s8(dither_offsets, dither_offsets_short, 4) \ + +#define setup_blocks_span_initialize_dithered_untextured() \ + vec_8x8u dither_offsets; \ + gvadd_u8(dither_offsets, dither_offsets_short, d64_4) \ + +#define setup_blocks_span_initialize_dithered(texturing) \ + u32 dither_row = psx_gpu->dither_table[y & 0x3]; \ + u32 dither_shift = (span_edge_data->left_x & 0x3) * 8; \ + vec_8x8s dither_offsets_short; \ + \ + dither_row = \ + (dither_row >> dither_shift) | (dither_row << (32 - dither_shift)); \ + gvdup_n_u32(dither_offsets_short, dither_row); \ + setup_blocks_span_initialize_dithered_##texturing() \ + +#define setup_blocks_span_initialize_undithered(texturing) \ + +#define setup_blocks_span_initialize_shaded_textured() \ +{ \ + u32 left_x = span_edge_data->left_x; \ + vec_4x32u block_span; \ + vec_4x32u v_left_x; \ + \ + gvld1q_u32(uvrg, span_uvrg_offset); \ + gvdupq_n_u32(v_left_x, left_x); \ + gvmlaq_u32(uvrg, uvrg_dx, v_left_x); \ + b = *span_b_offset; \ + b += b_dx * left_x; \ + \ + gvdupq_l_u32(u_block, gvlo(uvrg), 0); \ + gvdupq_l_u32(v_block, gvlo(uvrg), 1); \ + gvdupq_l_u32(r_block, gvhi(uvrg), 0); \ + gvdupq_l_u32(g_block, gvhi(uvrg), 1); \ + gvdupq_n_u32(b_block, b); \ + \ + gvld1q_u32(block_span, psx_gpu->u_block_span.e); \ + gvaddq_u32(u_block, u_block, block_span); \ + gvld1q_u32(block_span, psx_gpu->v_block_span.e); \ + gvaddq_u32(v_block, v_block, block_span); \ + gvld1q_u32(block_span, psx_gpu->r_block_span.e); \ + gvaddq_u32(r_block, r_block, block_span); \ + gvld1q_u32(block_span, psx_gpu->g_block_span.e); \ + gvaddq_u32(g_block, g_block, block_span); \ + gvld1q_u32(block_span, psx_gpu->b_block_span.e); \ + gvaddq_u32(b_block, b_block, block_span); \ +} + +#define setup_blocks_span_initialize_shaded_untextured() \ +{ \ + u32 left_x = span_edge_data->left_x; \ + u32 *span_uvrg_offset_high = (u32 *)span_uvrg_offset + 2; \ + vec_4x32u block_span; \ + vec_4x32u v_left_x; \ + \ + gvld1_u32(rgb_lo, span_uvrg_offset_high); \ + gvcreate_u32(rgb_hi, *span_b_offset, 0); \ + gvcombine_u32(rgb, rgb_lo, rgb_hi); \ + gvdupq_n_u32(v_left_x, left_x); \ + gvmlaq_u32(rgb, rgb_dx, v_left_x); \ + \ + gvdupq_l_u32(r_block, gvlo(rgb), 0); \ + gvdupq_l_u32(g_block, gvlo(rgb), 1); \ + gvdupq_l_u32(b_block, gvhi(rgb), 0); \ + \ + gvld1q_u32(block_span, psx_gpu->r_block_span.e); \ + gvaddq_u32(r_block, r_block, block_span); \ + gvld1q_u32(block_span, psx_gpu->g_block_span.e); \ + gvaddq_u32(g_block, g_block, block_span); \ + gvld1q_u32(block_span, psx_gpu->b_block_span.e); \ + gvaddq_u32(b_block, b_block, block_span); \ +} \ + +#define setup_blocks_span_initialize_unshaded_textured() \ +{ \ + u32 left_x = span_edge_data->left_x; \ + vec_4x32u block_span; \ + vec_2x32u v_left_x; \ + \ + gvld1_u32(uv, span_uvrg_offset); \ + gvdup_n_u32(v_left_x, left_x); \ + gvmla_u32(uv, uv_dx, v_left_x); \ + \ + gvdupq_l_u32(u_block, uv, 0); \ + gvdupq_l_u32(v_block, uv, 1); \ + \ + gvld1q_u32(block_span, psx_gpu->u_block_span.e); \ + gvaddq_u32(u_block, u_block, block_span); \ + gvld1q_u32(block_span, psx_gpu->v_block_span.e); \ + gvaddq_u32(v_block, v_block, block_span); \ +} \ + +#define setup_blocks_span_initialize_unshaded_untextured() \ + +#define setup_blocks_texture_swizzled() \ +{ \ + vec_8x8u u_saved = u; \ + gvsli_n_u8(u, v, 4); \ + gvsri_n_u8(v, u_saved, 4); \ +} \ + +#define setup_blocks_texture_unswizzled() \ + +#define setup_blocks_store_shaded_textured(swizzling, dithering, target, \ + edge_type) \ +{ \ + vec_8x16u u_whole; \ + vec_8x16u v_whole; \ + vec_8x16u r_whole; \ + vec_8x16u g_whole; \ + vec_8x16u b_whole; \ + vec_4x16u u_whole_lo, u_whole_hi; \ + vec_4x16u v_whole_lo, v_whole_hi; \ + vec_4x16u r_whole_lo, r_whole_hi; \ + vec_4x16u g_whole_lo, g_whole_hi; \ + vec_4x16u b_whole_lo, b_whole_hi; \ + \ + vec_8x8u u; \ + vec_8x8u v; \ + vec_8x8u r; \ + vec_8x8u g; \ + vec_8x8u b; \ + \ + vec_4x32u dx4; \ + vec_4x32u dx8; \ + \ + gvshrn_n_u32(u_whole_lo, u_block, 16); \ + gvshrn_n_u32(v_whole_lo, v_block, 16); \ + gvshrn_n_u32(r_whole_lo, r_block, 16); \ + gvshrn_n_u32(g_whole_lo, g_block, 16); \ + gvshrn_n_u32(b_whole_lo, b_block, 16); \ + \ + gvdupq_l_u32(dx4, gvlo(uvrg_dx4), 0); \ + gvaddhn_u32(u_whole_hi, u_block, dx4); \ + gvdupq_l_u32(dx4, gvlo(uvrg_dx4), 1); \ + gvaddhn_u32(v_whole_hi, v_block, dx4); \ + gvdupq_l_u32(dx4, gvhi(uvrg_dx4), 0); \ + gvaddhn_u32(r_whole_hi, r_block, dx4); \ + gvdupq_l_u32(dx4, gvhi(uvrg_dx4), 1); \ + gvaddhn_u32(g_whole_hi, g_block, dx4); \ + gvdupq_n_u32(dx4, b_dx4); \ + gvaddhn_u32(b_whole_hi, b_block, dx4); \ + \ + gvcombine_u16(u_whole, u_whole_lo, u_whole_hi); \ + gvcombine_u16(v_whole, v_whole_lo, v_whole_hi); \ + gvcombine_u16(r_whole, r_whole_lo, r_whole_hi); \ + gvcombine_u16(g_whole, g_whole_lo, g_whole_hi); \ + gvcombine_u16(b_whole, b_whole_lo, b_whole_hi); \ + gvmovn_u16(u, u_whole); \ + gvmovn_u16(v, v_whole); \ + gvmovn_u16(r, r_whole); \ + gvmovn_u16(g, g_whole); \ + gvmovn_u16(b, b_whole); \ + \ + gvdupq_l_u32(dx8, gvlo(uvrg_dx8), 0); \ + gvaddq_u32(u_block, u_block, dx8); \ + gvdupq_l_u32(dx8, gvlo(uvrg_dx8), 1); \ + gvaddq_u32(v_block, v_block, dx8); \ + gvdupq_l_u32(dx8, gvhi(uvrg_dx8), 0); \ + gvaddq_u32(r_block, r_block, dx8); \ + gvdupq_l_u32(dx8, gvhi(uvrg_dx8), 1); \ + gvaddq_u32(g_block, g_block, dx8); \ + gvdupq_n_u32(dx8, b_dx8); \ + gvaddq_u32(b_block, b_block, dx8); \ + \ + gvand(u, u, gvlo(texture_mask)); \ + gvand(v, v, gvhi(texture_mask)); \ + setup_blocks_texture_##swizzling(); \ + \ + gvst2_u8(u, v, (u8 *)block->uv.e); \ + gvst1_u8(r, block->r.e); \ + gvst1_u8(g, block->g.e); \ + gvst1_u8(b, block->b.e); \ + gvst1q_u16(dither_offsets, (u16 *)block->dither_offsets.e); \ + block->fb_ptr = fb_ptr; \ +} \ + +#define setup_blocks_store_unshaded_textured(swizzling, dithering, target, \ + edge_type) \ +{ \ + vec_8x16u u_whole; \ + vec_8x16u v_whole; \ + vec_4x16u u_whole_lo, u_whole_hi; \ + vec_4x16u v_whole_lo, v_whole_hi; \ + \ + vec_8x8u u; \ + vec_8x8u v; \ + \ + vec_4x32u dx4; \ + vec_4x32u dx8; \ + \ + gvshrn_n_u32(u_whole_lo, u_block, 16); \ + gvshrn_n_u32(v_whole_lo, v_block, 16); \ + \ + gvdupq_l_u32(dx4, uv_dx4, 0); \ + gvaddhn_u32(u_whole_hi, u_block, dx4); \ + gvdupq_l_u32(dx4, uv_dx4, 1); \ + gvaddhn_u32(v_whole_hi, v_block, dx4); \ + \ + gvcombine_u16(u_whole, u_whole_lo, u_whole_hi); \ + gvcombine_u16(v_whole, v_whole_lo, v_whole_hi); \ + gvmovn_u16(u, u_whole); \ + gvmovn_u16(v, v_whole); \ + \ + gvdupq_l_u32(dx8, uv_dx8, 0); \ + gvaddq_u32(u_block, u_block, dx8); \ + gvdupq_l_u32(dx8, uv_dx8, 1); \ + gvaddq_u32(v_block, v_block, dx8); \ + \ + gvand(u, u, gvlo(texture_mask)); \ + gvand(v, v, gvhi(texture_mask)); \ + setup_blocks_texture_##swizzling(); \ + \ + gvst2_u8(u, v, (u8 *)block->uv.e); \ + gvst1q_u16(dither_offsets, (u16 *)block->dither_offsets.e); \ + block->fb_ptr = fb_ptr; \ +} \ + +#define setup_blocks_store_shaded_untextured_dithered() \ + gvqadd_u8(r, r, dither_offsets); \ + gvqadd_u8(g, g, dither_offsets); \ + gvqadd_u8(b, b, dither_offsets); \ + \ + gvqsub_u8(r, r, d64_4); \ + gvqsub_u8(g, g, d64_4); \ + gvqsub_u8(b, b, d64_4) \ + +#define setup_blocks_store_shaded_untextured_undithered() \ + +#define setup_blocks_store_untextured_pixels_indirect_full(_pixels) \ + gvst1q_u16(_pixels, block->pixels.e); \ + block->fb_ptr = fb_ptr \ + +#define setup_blocks_store_untextured_pixels_indirect_edge(_pixels) \ + gvst1q_u16(_pixels, block->pixels.e); \ + block->fb_ptr = fb_ptr \ + +#define setup_blocks_store_shaded_untextured_seed_pixels_indirect() \ + gvmull_u8(pixels, r, d64_1) \ + +#define setup_blocks_store_untextured_pixels_direct_full(_pixels) \ + gvst1q_u16(_pixels, fb_ptr) \ + +#define setup_blocks_store_untextured_pixels_direct_edge(_pixels) \ +{ \ + vec_8x16u fb_pixels; \ + vec_8x16u draw_mask; \ + vec_8x16u test_mask; \ + \ + gvld1q_u16(test_mask, psx_gpu->test_mask.e); \ + gvld1q_u16(fb_pixels, fb_ptr); \ + gvdupq_n_u16(draw_mask, span_edge_data->right_mask); \ + gvtstq_u16(draw_mask, draw_mask, test_mask); \ + gvbifq(fb_pixels, _pixels, draw_mask); \ + gvst1q_u16(fb_pixels, fb_ptr); \ +} \ + +#define setup_blocks_store_shaded_untextured_seed_pixels_direct() \ + pixels = msb_mask; \ + gvmlal_u8(pixels, r, d64_1) \ + +#define setup_blocks_store_shaded_untextured(swizzling, dithering, target, \ + edge_type) \ +{ \ + vec_8x16u r_whole; \ + vec_8x16u g_whole; \ + vec_8x16u b_whole; \ + vec_4x16u r_whole_lo, r_whole_hi; \ + vec_4x16u g_whole_lo, g_whole_hi; \ + vec_4x16u b_whole_lo, b_whole_hi; \ + \ + vec_8x8u r; \ + vec_8x8u g; \ + vec_8x8u b; \ + \ + vec_4x32u dx4; \ + vec_4x32u dx8; \ + \ + vec_8x16u pixels; \ + \ + gvshrn_n_u32(r_whole_lo, r_block, 16); \ + gvshrn_n_u32(g_whole_lo, g_block, 16); \ + gvshrn_n_u32(b_whole_lo, b_block, 16); \ + \ + gvdupq_l_u32(dx4, gvlo(rgb_dx4), 0); \ + gvaddhn_u32(r_whole_hi, r_block, dx4); \ + gvdupq_l_u32(dx4, gvlo(rgb_dx4), 1); \ + gvaddhn_u32(g_whole_hi, g_block, dx4); \ + gvdupq_l_u32(dx4, gvhi(rgb_dx4), 0); \ + gvaddhn_u32(b_whole_hi, b_block, dx4); \ + \ + gvcombine_u16(r_whole, r_whole_lo, r_whole_hi); \ + gvcombine_u16(g_whole, g_whole_lo, g_whole_hi); \ + gvcombine_u16(b_whole, b_whole_lo, b_whole_hi); \ + gvmovn_u16(r, r_whole); \ + gvmovn_u16(g, g_whole); \ + gvmovn_u16(b, b_whole); \ + \ + gvdupq_l_u32(dx8, gvlo(rgb_dx8), 0); \ + gvaddq_u32(r_block, r_block, dx8); \ + gvdupq_l_u32(dx8, gvlo(rgb_dx8), 1); \ + gvaddq_u32(g_block, g_block, dx8); \ + gvdupq_l_u32(dx8, gvhi(rgb_dx8), 0); \ + gvaddq_u32(b_block, b_block, dx8); \ + \ + setup_blocks_store_shaded_untextured_##dithering(); \ + \ + gvshr_n_u8(r, r, 3); \ + gvbic(g, g, d64_0x07); \ + gvbic(b, b, d64_0x07); \ + \ + setup_blocks_store_shaded_untextured_seed_pixels_##target(); \ + gvmlal_u8(pixels, g, d64_4); \ + gvmlal_u8(pixels, b, d64_128); \ + \ + setup_blocks_store_untextured_pixels_##target##_##edge_type(pixels); \ +} \ + +#define setup_blocks_store_unshaded_untextured(swizzling, dithering, target, \ + edge_type) \ + setup_blocks_store_untextured_pixels_##target##_##edge_type(colors) \ + +#define setup_blocks_store_draw_mask_textured_indirect(_block, bits) \ + (_block)->draw_mask_bits = bits \ + +#define setup_blocks_store_draw_mask_untextured_indirect(_block, bits) \ +{ \ + vec_8x16u bits_mask; \ + vec_8x16u test_mask; \ + \ + gvld1q_u16(test_mask, psx_gpu->test_mask.e); \ + gvdupq_n_u16(bits_mask, bits); \ + gvtstq_u16(bits_mask, bits_mask, test_mask); \ + gvst1q_u16(bits_mask, (_block)->draw_mask.e); \ +} \ + +#define setup_blocks_store_draw_mask_untextured_direct(_block, bits) \ + +#define setup_blocks_add_blocks_indirect() \ + num_blocks += span_num_blocks; \ + \ + if(num_blocks > MAX_BLOCKS) \ + { \ + psx_gpu->num_blocks = num_blocks - span_num_blocks; \ + flush_render_block_buffer(psx_gpu); \ + num_blocks = span_num_blocks; \ + block = psx_gpu->blocks; \ + } \ + +#define setup_blocks_add_blocks_direct() \ + +#define setup_blocks_do(shading, texturing, dithering, sw, target) \ + setup_blocks_load_msb_mask_##target(); \ + setup_blocks_variables_##shading##_##texturing(target); \ + \ + edge_data_struct *span_edge_data = psx_gpu->span_edge_data; \ + vec_4x32u *span_uvrg_offset = (vec_4x32u *)psx_gpu->span_uvrg_offset; \ + u32 *span_b_offset = psx_gpu->span_b_offset; \ + \ + block_struct *block = psx_gpu->blocks + psx_gpu->num_blocks; \ + \ + u32 num_spans = psx_gpu->num_spans; \ + \ + u16 *fb_ptr; \ + u32 y; \ + \ + u32 num_blocks = psx_gpu->num_blocks; \ + u32 span_num_blocks; \ + \ + while(num_spans) \ + { \ + span_num_blocks = span_edge_data->num_blocks; \ + if(span_num_blocks) \ + { \ + y = span_edge_data->y; \ + fb_ptr = psx_gpu->vram_out_ptr + span_edge_data->left_x + (y * 1024); \ + \ + setup_blocks_span_initialize_##shading##_##texturing(); \ + setup_blocks_span_initialize_##dithering(texturing); \ + \ + setup_blocks_add_blocks_##target(); \ + \ + s32 pixel_span = span_num_blocks * 8; \ + pixel_span -= __builtin_popcount(span_edge_data->right_mask & 0xFF); \ + \ + span_num_blocks--; \ + while(span_num_blocks) \ + { \ + setup_blocks_store_##shading##_##texturing(sw, dithering, target, \ + full); \ + setup_blocks_store_draw_mask_##texturing##_##target(block, 0x00); \ + \ + fb_ptr += 8; \ + block++; \ + span_num_blocks--; \ + } \ + \ + setup_blocks_store_##shading##_##texturing(sw, dithering, target, edge); \ + setup_blocks_store_draw_mask_##texturing##_##target(block, \ + span_edge_data->right_mask); \ + \ + block++; \ + } \ + \ + num_spans--; \ + span_edge_data++; \ + span_uvrg_offset++; \ + span_b_offset++; \ + } \ + \ + psx_gpu->num_blocks = num_blocks \ + +void setup_blocks_shaded_textured_dithered_swizzled_indirect(psx_gpu_struct + *psx_gpu) +{ +#if 0 + setup_blocks_shaded_textured_dithered_swizzled_indirect_(psx_gpu); + return; +#endif + setup_blocks_do(shaded, textured, dithered, swizzled, indirect); +} + +void setup_blocks_shaded_textured_dithered_unswizzled_indirect(psx_gpu_struct + *psx_gpu) +{ +#if 0 + setup_blocks_shaded_textured_dithered_unswizzled_indirect_(psx_gpu); + return; +#endif + setup_blocks_do(shaded, textured, dithered, unswizzled, indirect); +} + +void setup_blocks_unshaded_textured_dithered_swizzled_indirect(psx_gpu_struct + *psx_gpu) +{ +#if 0 + setup_blocks_unshaded_textured_dithered_swizzled_indirect_(psx_gpu); + return; +#endif + setup_blocks_do(unshaded, textured, dithered, swizzled, indirect); +} + +void setup_blocks_unshaded_textured_dithered_unswizzled_indirect(psx_gpu_struct + *psx_gpu) +{ +#if 0 + setup_blocks_unshaded_textured_dithered_unswizzled_indirect_(psx_gpu); + return; +#endif + setup_blocks_do(unshaded, textured, dithered, unswizzled, indirect); +} + +void setup_blocks_unshaded_untextured_undithered_unswizzled_indirect( + psx_gpu_struct *psx_gpu) +{ +#if 0 + setup_blocks_unshaded_untextured_undithered_unswizzled_indirect_(psx_gpu); + return; +#endif + setup_blocks_do(unshaded, untextured, undithered, unswizzled, indirect); +} + +void setup_blocks_unshaded_untextured_undithered_unswizzled_direct( + psx_gpu_struct *psx_gpu) +{ +#if 0 + setup_blocks_unshaded_untextured_undithered_unswizzled_direct_(psx_gpu); + return; +#endif + setup_blocks_do(unshaded, untextured, undithered, unswizzled, direct); +} + +void setup_blocks_shaded_untextured_undithered_unswizzled_indirect(psx_gpu_struct + *psx_gpu) +{ +#if 0 + setup_blocks_shaded_untextured_undithered_unswizzled_indirect_(psx_gpu); + return; +#endif + setup_blocks_do(shaded, untextured, undithered, unswizzled, indirect); +} + +void setup_blocks_shaded_untextured_dithered_unswizzled_indirect(psx_gpu_struct + *psx_gpu) +{ +#if 0 + setup_blocks_shaded_untextured_dithered_unswizzled_indirect_(psx_gpu); + return; +#endif + setup_blocks_do(shaded, untextured, dithered, unswizzled, indirect); +} + +void setup_blocks_shaded_untextured_undithered_unswizzled_direct( + psx_gpu_struct *psx_gpu) +{ +#if 0 + setup_blocks_shaded_untextured_undithered_unswizzled_direct_(psx_gpu); + return; +#endif + setup_blocks_do(shaded, untextured, undithered, unswizzled, direct); +} + +void setup_blocks_shaded_untextured_dithered_unswizzled_direct(psx_gpu_struct + *psx_gpu) +{ +#if 0 + setup_blocks_shaded_untextured_dithered_unswizzled_direct_(psx_gpu); + return; +#endif + setup_blocks_do(shaded, untextured, dithered, unswizzled, direct); +} + +static void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu) +{ + u32 current_texture_page = psx_gpu->current_texture_page; + u8 *texture_page_ptr = psx_gpu->texture_page_base; + const u16 *vram_ptr = psx_gpu->vram_ptr; + u32 tile_x, tile_y; + u32 sub_y; + vec_8x16u c_0x00f0; + + vram_ptr += (current_texture_page >> 4) * 256 * 1024; + vram_ptr += (current_texture_page & 0xF) * 64; + + gvdupq_n_u16(c_0x00f0, 0x00f0); + + psx_gpu->dirty_textures_4bpp_mask &= ~(psx_gpu->current_texture_mask); + + for (tile_y = 16; tile_y; tile_y--) + { + for (tile_x = 16; tile_x; tile_x--) + { + for (sub_y = 8; sub_y; sub_y--) + { + vec_8x8u texel_block_a, texel_block_b; + vec_8x16u texel_block_expanded_a, texel_block_expanded_b; + vec_8x16u texel_block_expanded_c, texel_block_expanded_d; + vec_8x16u texel_block_expanded_ab, texel_block_expanded_cd; + + gvld1_u8(texel_block_a, (u8 *)vram_ptr); vram_ptr += 1024; + gvld1_u8(texel_block_b, (u8 *)vram_ptr); vram_ptr += 1024; + + gvmovl_u8(texel_block_expanded_a, texel_block_a); + gvshll_n_u8(texel_block_expanded_b, texel_block_a, 4); + gvmovl_u8(texel_block_expanded_c, texel_block_b); + gvshll_n_u8(texel_block_expanded_d, texel_block_b, 4); + + gvbicq(texel_block_expanded_a, texel_block_expanded_a, c_0x00f0); + gvbicq(texel_block_expanded_b, texel_block_expanded_b, c_0x00f0); + gvbicq(texel_block_expanded_c, texel_block_expanded_c, c_0x00f0); + gvbicq(texel_block_expanded_d, texel_block_expanded_d, c_0x00f0); + + gvorrq(texel_block_expanded_ab, texel_block_expanded_a, texel_block_expanded_b); + gvorrq(texel_block_expanded_cd, texel_block_expanded_c, texel_block_expanded_d); + + gvst1q_2_pi_u32(texel_block_expanded_ab, texel_block_expanded_cd, texture_page_ptr); + } + + vram_ptr -= (1024 * 16) - 4; + } + + vram_ptr += (16 * 1024) - (4 * 16); + } +} + +void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu, + u32 texture_page) +{ +#if 0 + update_texture_8bpp_cache_slice_(psx_gpu, texture_page); + return; +#endif + u16 *texture_page_ptr = psx_gpu->texture_page_base; + u16 *vram_ptr = psx_gpu->vram_ptr; + + u32 tile_x, tile_y; + u32 sub_y; + + vram_ptr += (texture_page >> 4) * 256 * 1024; + vram_ptr += (texture_page & 0xF) * 64; + + if((texture_page ^ psx_gpu->current_texture_page) & 0x1) + texture_page_ptr += (8 * 16) * 8; + + for (tile_y = 16; tile_y; tile_y--) + { + for (tile_x = 8; tile_x; tile_x--) + { + for (sub_y = 4; sub_y; sub_y--) + { + vec_4x32u texels_a, texels_b, texels_c, texels_d = {}; + gvld1q_u32(texels_a, vram_ptr); vram_ptr += 1024; + gvld1q_u32(texels_b, vram_ptr); vram_ptr += 1024; + gvld1q_u32(texels_c, vram_ptr); vram_ptr += 1024; + gvld1q_u32(texels_d, vram_ptr); vram_ptr += 1024; + + gvst1q_2_pi_u32(texels_a, texels_b, texture_page_ptr); + gvst1q_2_pi_u32(texels_c, texels_d, texture_page_ptr); + } + + vram_ptr -= (1024 * 16) - 8; + } + + vram_ptr -= (8 * 8); + vram_ptr += (16 * 1024); + + texture_page_ptr += (8 * 16) * 8; + } +} + +void texture_blocks_untextured(psx_gpu_struct *psx_gpu) +{ +} + +void texture_blocks_4bpp(psx_gpu_struct *psx_gpu) +{ +#if 0 + texture_blocks_4bpp_(psx_gpu); + return; +#endif + block_struct *block = psx_gpu->blocks; + u32 num_blocks = psx_gpu->num_blocks; + + vec_8x8u texels_low; + vec_8x8u texels_high; + + vec_16x8u clut_low; + vec_16x8u clut_high; + + const u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr; + + gvld2q_u8(clut_low, clut_high, (u8 *)psx_gpu->clut_ptr); + + if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_4bpp_mask) + update_texture_4bpp_cache(psx_gpu); + + while(num_blocks) + { + vec_8x8u texels = + { + .u8 = + { + texture_ptr_8bpp[block->uv.e[0]], + texture_ptr_8bpp[block->uv.e[1]], + texture_ptr_8bpp[block->uv.e[2]], + texture_ptr_8bpp[block->uv.e[3]], + texture_ptr_8bpp[block->uv.e[4]], + texture_ptr_8bpp[block->uv.e[5]], + texture_ptr_8bpp[block->uv.e[6]], + texture_ptr_8bpp[block->uv.e[7]] + } + }; + + gvtbl2_u8(texels_low, clut_low, texels); + gvtbl2_u8(texels_high, clut_high, texels); + + gvst2_u8(texels_low, texels_high, (u8 *)block->texels.e); + + num_blocks--; + block++; + } +} + +void texture_blocks_8bpp(psx_gpu_struct *psx_gpu) +{ +#if 0 + texture_blocks_8bpp_(psx_gpu); + return; +#endif + u32 num_blocks = psx_gpu->num_blocks; + + if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask) + update_texture_8bpp_cache(psx_gpu); + + const u8 * __restrict__ texture_ptr_8bpp = psx_gpu->texture_page_ptr; + const u16 * __restrict__ clut_ptr = psx_gpu->clut_ptr; + block_struct * __restrict__ block = psx_gpu->blocks; + + while(num_blocks) + { + u16 offset; + #define load_one(i_) \ + offset = block->uv.e[i_]; u16 texel##i_ = texture_ptr_8bpp[offset] + #define store_one(i_) \ + block->texels.e[i_] = clut_ptr[texel##i_] + load_one(0); load_one(1); load_one(2); load_one(3); + load_one(4); load_one(5); load_one(6); load_one(7); + store_one(0); store_one(1); store_one(2); store_one(3); + store_one(4); store_one(5); store_one(6); store_one(7); + #undef load_one + #undef store_one + + num_blocks--; + block++; + } +} + +void texture_blocks_16bpp(psx_gpu_struct *psx_gpu) +{ +#if 0 + texture_blocks_16bpp_(psx_gpu); + return; +#endif + u32 num_blocks = psx_gpu->num_blocks; + const u16 * __restrict__ texture_ptr_16bpp = psx_gpu->texture_page_ptr; + block_struct * __restrict__ block = psx_gpu->blocks; + + while(num_blocks) + { + u32 offset; + #define load_one(i_) \ + offset = block->uv.e[i_]; \ + offset += ((offset & 0xFF00) * 3); \ + u16 texel##i_ = texture_ptr_16bpp[offset] + #define store_one(i_) \ + block->texels.e[i_] = texel##i_ + load_one(0); load_one(1); load_one(2); load_one(3); + load_one(4); load_one(5); load_one(6); load_one(7); + store_one(0); store_one(1); store_one(2); store_one(3); + store_one(4); store_one(5); store_one(6); store_one(7); + #undef load_one + #undef store_one + + num_blocks--; + block++; + } +} + +#define shade_blocks_load_msb_mask_indirect() \ + +#define shade_blocks_load_msb_mask_direct() \ + vec_8x16u msb_mask; \ + gvdupq_n_u16(msb_mask, psx_gpu->mask_msb); \ + +#define shade_blocks_store_indirect(_draw_mask, _pixels) \ + gvst1q_u16(_draw_mask, block->draw_mask.e); \ + gvst1q_u16(_pixels, block->pixels.e); \ + +#define shade_blocks_store_direct(_draw_mask, _pixels) \ +{ \ + vec_8x16u fb_pixels; \ + gvorrq(_pixels, _pixels, msb_mask); \ + gvld1q_u16(fb_pixels, block->fb_ptr); \ + gvbifq(fb_pixels, _pixels, _draw_mask); \ + gvst1q_u16(fb_pixels, block->fb_ptr); \ +} \ + +#define shade_blocks_textured_false_modulated_check_dithered(target) \ + +#define shade_blocks_textured_false_modulated_check_undithered(target) \ + if(psx_gpu->triangle_color == 0x808080) \ + { \ + shade_blocks_textured_unmodulated_##target(psx_gpu); \ + return; \ + } \ + +#define shade_blocks_textured_modulated_shaded_primitive_load(dithering, \ + target) \ + +#define shade_blocks_textured_modulated_unshaded_primitive_load(dithering, \ + target) \ +{ \ + u32 color = psx_gpu->triangle_color; \ + gvdup_n_u8(colors_r, color); \ + gvdup_n_u8(colors_g, color >> 8); \ + gvdup_n_u8(colors_b, color >> 16); \ + shade_blocks_textured_false_modulated_check_##dithering(target); \ +} \ + +#define shade_blocks_textured_modulated_shaded_block_load() \ + gvld1_u8(colors_r, block->r.e); \ + gvld1_u8(colors_g, block->g.e); \ + gvld1_u8(colors_b, block->b.e) \ + +#define shade_blocks_textured_modulated_unshaded_block_load() \ + +#define shade_blocks_textured_modulate_dithered(component) \ + gvld1q_u16(pixels_##component, block->dither_offsets.e); \ + gvmlal_u8(pixels_##component, texels_##component, colors_##component) \ + +#define shade_blocks_textured_modulate_undithered(component) \ + gvmull_u8(pixels_##component, texels_##component, colors_##component) \ + +#define shade_blocks_textured_modulated_do(shading, dithering, target) \ + block_struct *block = psx_gpu->blocks; \ + u32 num_blocks = psx_gpu->num_blocks; \ + vec_8x16u texels; \ + \ + vec_8x8u texels_r; \ + vec_8x8u texels_g; \ + vec_8x8u texels_b; \ + \ + vec_8x8u colors_r; \ + vec_8x8u colors_g; \ + vec_8x8u colors_b; \ + \ + vec_8x8u pixels_r_low; \ + vec_8x8u pixels_g_low; \ + vec_8x8u pixels_b_low; \ + vec_8x16u pixels; \ + \ + vec_8x16u pixels_r; \ + vec_8x16u pixels_g; \ + vec_8x16u pixels_b; \ + \ + vec_8x16u draw_mask; \ + vec_8x16u zero_mask; \ + \ + vec_8x8u d64_0x07; \ + vec_8x8u d64_0x1F; \ + vec_8x8u d64_1; \ + vec_8x8u d64_4; \ + vec_8x8u d64_128; \ + \ + vec_8x16u d128_0x8000; \ + \ + vec_8x16u test_mask; \ + u32 draw_mask_bits; \ + \ + gvld1q_u16(test_mask, psx_gpu->test_mask.e); \ + shade_blocks_load_msb_mask_##target(); \ + \ + gvdup_n_u8(d64_0x07, 0x07); \ + gvdup_n_u8(d64_0x1F, 0x1F); \ + gvdup_n_u8(d64_1, 1); \ + gvdup_n_u8(d64_4, 4); \ + gvdup_n_u8(d64_128, 128); \ + \ + gvdupq_n_u16(d128_0x8000, 0x8000); \ + \ + shade_blocks_textured_modulated_##shading##_primitive_load(dithering, \ + target); \ + \ + while(num_blocks) \ + { \ + draw_mask_bits = block->draw_mask_bits; \ + gvdupq_n_u16(draw_mask, draw_mask_bits); \ + gvtstq_u16(draw_mask, draw_mask, test_mask); \ + \ + shade_blocks_textured_modulated_##shading##_block_load(); \ + \ + gvld1q_u16(texels, block->texels.e); \ + \ + gvmovn_u16(texels_r, texels); \ + gvshrn_n_u16(texels_g, texels, 5); \ + gvshrn_n_u16(texels_b, texels, 7); \ + \ + gvand(texels_r, texels_r, d64_0x1F); \ + gvand(texels_g, texels_g, d64_0x1F); \ + gvshr_n_u8(texels_b, texels_b, 3); \ + \ + shade_blocks_textured_modulate_##dithering(r); \ + shade_blocks_textured_modulate_##dithering(g); \ + shade_blocks_textured_modulate_##dithering(b); \ + \ + gvceqzq_u16(zero_mask, texels); \ + gvand(pixels, texels, d128_0x8000); \ + \ + gvqshrun_n_s16(pixels_r_low, pixels_r, 4); \ + gvqshrun_n_s16(pixels_g_low, pixels_g, 4); \ + gvqshrun_n_s16(pixels_b_low, pixels_b, 4); \ + \ + gvorrq(zero_mask, draw_mask, zero_mask); \ + \ + gvshr_n_u8(pixels_r_low, pixels_r_low, 3); \ + gvbic(pixels_g_low, pixels_g_low, d64_0x07); \ + gvbic(pixels_b_low, pixels_b_low, d64_0x07); \ + \ + gvmlal_u8(pixels, pixels_r_low, d64_1); \ + gvmlal_u8(pixels, pixels_g_low, d64_4); \ + gvmlal_u8(pixels, pixels_b_low, d64_128); \ + \ + shade_blocks_store_##target(zero_mask, pixels); \ + \ + num_blocks--; \ + block++; \ + } \ + +void shade_blocks_shaded_textured_modulated_dithered_direct(psx_gpu_struct + *psx_gpu) +{ +#if 0 + shade_blocks_shaded_textured_modulated_dithered_direct_(psx_gpu); + return; +#endif + shade_blocks_textured_modulated_do(shaded, dithered, direct); +} + +void shade_blocks_shaded_textured_modulated_undithered_direct(psx_gpu_struct + *psx_gpu) +{ +#if 0 + shade_blocks_shaded_textured_modulated_undithered_direct_(psx_gpu); + return; +#endif + shade_blocks_textured_modulated_do(shaded, undithered, direct); +} + +void shade_blocks_unshaded_textured_modulated_dithered_direct(psx_gpu_struct + *psx_gpu) +{ +#if 0 + shade_blocks_unshaded_textured_modulated_dithered_direct_(psx_gpu); + return; +#endif + shade_blocks_textured_modulated_do(unshaded, dithered, direct); +} + +void shade_blocks_unshaded_textured_modulated_undithered_direct(psx_gpu_struct + *psx_gpu) +{ +#if 0 + shade_blocks_unshaded_textured_modulated_undithered_direct_(psx_gpu); + return; +#endif + shade_blocks_textured_modulated_do(unshaded, undithered, direct); +} + +void shade_blocks_shaded_textured_modulated_dithered_indirect(psx_gpu_struct + *psx_gpu) +{ +#if 0 + shade_blocks_shaded_textured_modulated_dithered_indirect_(psx_gpu); + return; +#endif + shade_blocks_textured_modulated_do(shaded, dithered, indirect); +} + +void shade_blocks_shaded_textured_modulated_undithered_indirect(psx_gpu_struct + *psx_gpu) +{ +#if 0 + shade_blocks_shaded_textured_modulated_undithered_indirect_(psx_gpu); + return; +#endif + shade_blocks_textured_modulated_do(shaded, undithered, indirect); +} + +void shade_blocks_unshaded_textured_modulated_dithered_indirect(psx_gpu_struct + *psx_gpu) +{ +#if 0 + shade_blocks_unshaded_textured_modulated_dithered_indirect_(psx_gpu); + return; +#endif + shade_blocks_textured_modulated_do(unshaded, dithered, indirect); +} + +void shade_blocks_unshaded_textured_modulated_undithered_indirect(psx_gpu_struct + *psx_gpu) +{ +#if 0 + shade_blocks_unshaded_textured_modulated_undithered_indirect_(psx_gpu); + return; +#endif + shade_blocks_textured_modulated_do(unshaded, undithered, indirect); +} + +#define shade_blocks_textured_unmodulated_do(target) \ + block_struct *block = psx_gpu->blocks; \ + u32 num_blocks = psx_gpu->num_blocks; \ + vec_8x16u draw_mask; \ + vec_8x16u test_mask; \ + u32 draw_mask_bits; \ + \ + vec_8x16u pixels; \ + \ + gvld1q_u16(test_mask, psx_gpu->test_mask.e); \ + shade_blocks_load_msb_mask_##target(); \ + \ + while(num_blocks) \ + { \ + vec_8x16u zero_mask; \ + \ + draw_mask_bits = block->draw_mask_bits; \ + gvdupq_n_u16(draw_mask, draw_mask_bits); \ + gvtstq_u16(draw_mask, draw_mask, test_mask); \ + \ + gvld1q_u16(pixels, block->texels.e); \ + \ + gvceqzq_u16(zero_mask, pixels); \ + gvorrq(zero_mask, draw_mask, zero_mask); \ + \ + shade_blocks_store_##target(zero_mask, pixels); \ + \ + num_blocks--; \ + block++; \ + } \ + +void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu) +{ +#if 0 + shade_blocks_textured_unmodulated_indirect_(psx_gpu); + return; +#endif + shade_blocks_textured_unmodulated_do(indirect) +} + +void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu) +{ +#if 0 + shade_blocks_textured_unmodulated_direct_(psx_gpu); + return; +#endif + shade_blocks_textured_unmodulated_do(direct) +} + +void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu) +{ +} + +void shade_blocks_unshaded_untextured_direct(psx_gpu_struct *psx_gpu) +{ +#if 0 + shade_blocks_unshaded_untextured_direct_(psx_gpu); + return; +#endif + block_struct *block = psx_gpu->blocks; + u32 num_blocks = psx_gpu->num_blocks; + + vec_8x16u pixels; + gvld1q_u16(pixels, block->texels.e); + shade_blocks_load_msb_mask_direct(); + + while(num_blocks) + { + vec_8x16u draw_mask; + gvld1q_u16(draw_mask, block->draw_mask.e); + shade_blocks_store_direct(draw_mask, pixels); + + num_blocks--; + block++; + } +} + +#define blend_blocks_mask_evaluate_on() \ + vec_8x16u mask_pixels; \ + gvcltzq_s16(mask_pixels, framebuffer_pixels); \ + gvorrq(draw_mask, draw_mask, mask_pixels) \ + +#define blend_blocks_mask_evaluate_off() \ + +#define blend_blocks_average() \ +{ \ + vec_8x16u pixels_no_msb; \ + vec_8x16u fb_pixels_no_msb; \ + \ + vec_8x16u d128_0x0421; \ + \ + gvdupq_n_u16(d128_0x0421, 0x0421); \ + \ + gveorq(blend_pixels, pixels, framebuffer_pixels); \ + gvbicq(pixels_no_msb, pixels, d128_0x8000); \ + gvand(blend_pixels, blend_pixels, d128_0x0421); \ + gvsubq_u16(blend_pixels, pixels_no_msb, blend_pixels); \ + gvbicq(fb_pixels_no_msb, framebuffer_pixels, d128_0x8000); \ + gvhaddq_u16(blend_pixels, fb_pixels_no_msb, blend_pixels); \ +} \ + +#define blend_blocks_add() \ +{ \ + vec_8x16u pixels_rb, pixels_g; \ + vec_8x16u fb_rb, fb_g; \ + \ + vec_8x16u d128_0x7C1F; \ + vec_8x16u d128_0x03E0; \ + \ + gvdupq_n_u16(d128_0x7C1F, 0x7C1F); \ + gvdupq_n_u16(d128_0x03E0, 0x03E0); \ + \ + gvand(pixels_rb, pixels, d128_0x7C1F); \ + gvand(pixels_g, pixels, d128_0x03E0); \ + \ + gvand(fb_rb, framebuffer_pixels, d128_0x7C1F); \ + gvand(fb_g, framebuffer_pixels, d128_0x03E0); \ + \ + gvaddq_u16(fb_rb, fb_rb, pixels_rb); \ + gvaddq_u16(fb_g, fb_g, pixels_g); \ + \ + gvminq_u8(fb_rb, fb_rb, d128_0x7C1F); \ + gvminq_u16(fb_g, fb_g, d128_0x03E0); \ + \ + gvorrq(blend_pixels, fb_rb, fb_g); \ +} \ + +#define blend_blocks_subtract() \ +{ \ + vec_8x16u pixels_rb, pixels_g; \ + vec_8x16u fb_rb, fb_g; \ + \ + vec_8x16u d128_0x7C1F; \ + vec_8x16u d128_0x03E0; \ + \ + gvdupq_n_u16(d128_0x7C1F, 0x7C1F); \ + gvdupq_n_u16(d128_0x03E0, 0x03E0); \ + \ + gvand(pixels_rb, pixels, d128_0x7C1F); \ + gvand(pixels_g, pixels, d128_0x03E0); \ + \ + gvand(fb_rb, framebuffer_pixels, d128_0x7C1F); \ + gvand(fb_g, framebuffer_pixels, d128_0x03E0); \ + \ + gvqsubq_u8(fb_rb, fb_rb, pixels_rb); \ + gvqsubq_u16(fb_g, fb_g, pixels_g); \ + \ + gvorrq(blend_pixels, fb_rb, fb_g); \ +} \ + +#define blend_blocks_add_fourth() \ +{ \ + vec_8x16u pixels_rb, pixels_g; \ + vec_8x16u pixels_fourth; \ + vec_8x16u fb_rb, fb_g; \ + \ + vec_8x16u d128_0x7C1F; \ + vec_8x16u d128_0x1C07; \ + vec_8x16u d128_0x03E0; \ + vec_8x16u d128_0x00E0; \ + \ + gvdupq_n_u16(d128_0x7C1F, 0x7C1F); \ + gvdupq_n_u16(d128_0x1C07, 0x1C07); \ + gvdupq_n_u16(d128_0x03E0, 0x03E0); \ + gvdupq_n_u16(d128_0x00E0, 0x00E0); \ + \ + gvshrq_n_u16(pixels_fourth, pixels, 2); \ + \ + gvand(fb_rb, framebuffer_pixels, d128_0x7C1F); \ + gvand(fb_g, framebuffer_pixels, d128_0x03E0); \ + \ + gvand(pixels_rb, pixels_fourth, d128_0x1C07); \ + gvand(pixels_g, pixels_fourth, d128_0x00E0); \ + \ + gvaddq_u16(fb_rb, fb_rb, pixels_rb); \ + gvaddq_u16(fb_g, fb_g, pixels_g); \ + \ + gvminq_u8(fb_rb, fb_rb, d128_0x7C1F); \ + gvminq_u16(fb_g, fb_g, d128_0x03E0); \ + \ + gvorrq(blend_pixels, fb_rb, fb_g); \ +} \ + +#define blend_blocks_blended_combine_textured() \ +{ \ + vec_8x16u blend_mask; \ + gvcltzq_s16(blend_mask, pixels); \ + \ + gvorrq(blend_pixels, blend_pixels, d128_0x8000); \ + gvbifq(blend_pixels, pixels, blend_mask); \ +} \ + +#define blend_blocks_blended_combine_untextured() \ + +#define blend_blocks_body_blend(blend_mode, texturing) \ +{ \ + blend_blocks_##blend_mode(); \ + blend_blocks_blended_combine_##texturing(); \ +} \ + +#define blend_blocks_body_average(texturing) \ + blend_blocks_body_blend(average, texturing) \ + +#define blend_blocks_body_add(texturing) \ + blend_blocks_body_blend(add, texturing) \ + +#define blend_blocks_body_subtract(texturing) \ + blend_blocks_body_blend(subtract, texturing) \ + +#define blend_blocks_body_add_fourth(texturing) \ + blend_blocks_body_blend(add_fourth, texturing) \ + +#define blend_blocks_body_unblended(texturing) \ + blend_pixels = pixels \ + +#define blend_blocks_do(texturing, blend_mode, mask_evaluate) \ + block_struct *block = psx_gpu->blocks; \ + u32 num_blocks = psx_gpu->num_blocks; \ + vec_8x16u draw_mask; \ + vec_8x16u pixels; \ + vec_8x16u blend_pixels; \ + vec_8x16u framebuffer_pixels; \ + vec_8x16u msb_mask; \ + vec_8x16u d128_0x8000; \ + \ + u16 *fb_ptr; \ + \ + gvdupq_n_u16(d128_0x8000, 0x8000); \ + gvdupq_n_u16(msb_mask, psx_gpu->mask_msb); \ + (void)d128_0x8000; /* sometimes unused */ \ + \ + while(num_blocks) \ + { \ + gvld1q_u16(pixels, block->pixels.e); \ + gvld1q_u16(draw_mask, block->draw_mask.e); \ + fb_ptr = block->fb_ptr; \ + \ + gvld1q_u16(framebuffer_pixels, fb_ptr); \ + \ + blend_blocks_mask_evaluate_##mask_evaluate(); \ + blend_blocks_body_##blend_mode(texturing); \ + \ + gvorrq(blend_pixels, blend_pixels, msb_mask); \ + gvbifq(framebuffer_pixels, blend_pixels, draw_mask); \ + gvst1q_u16(framebuffer_pixels, fb_ptr); \ + \ + num_blocks--; \ + block++; \ + } \ + + +void blend_blocks_textured_average_off(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_textured_average_off_(psx_gpu); + return; +#endif + blend_blocks_do(textured, average, off); +} + +void blend_blocks_untextured_average_off(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_untextured_average_off_(psx_gpu); + return; +#endif + blend_blocks_do(untextured, average, off); +} + +void blend_blocks_textured_average_on(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_textured_average_on_(psx_gpu); + return; +#endif + blend_blocks_do(textured, average, on); +} + +void blend_blocks_untextured_average_on(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_untextured_average_on_(psx_gpu); + return; +#endif + blend_blocks_do(untextured, average, on); +} + +void blend_blocks_textured_add_off(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_textured_add_off_(psx_gpu); + return; +#endif + blend_blocks_do(textured, add, off); +} + +void blend_blocks_textured_add_on(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_textured_add_on_(psx_gpu); + return; +#endif + blend_blocks_do(textured, add, on); +} + +void blend_blocks_untextured_add_off(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_untextured_add_off_(psx_gpu); + return; +#endif + blend_blocks_do(untextured, add, off); +} + +void blend_blocks_untextured_add_on(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_untextured_add_on_(psx_gpu); + return; +#endif + blend_blocks_do(untextured, add, on); +} + +void blend_blocks_textured_subtract_off(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_textured_subtract_off_(psx_gpu); + return; +#endif + blend_blocks_do(textured, subtract, off); +} + +void blend_blocks_textured_subtract_on(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_textured_subtract_on_(psx_gpu); + return; +#endif + blend_blocks_do(textured, subtract, on); +} + +void blend_blocks_untextured_subtract_off(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_untextured_subtract_off_(psx_gpu); + return; +#endif + blend_blocks_do(untextured, subtract, off); +} + +void blend_blocks_untextured_subtract_on(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_untextured_subtract_on_(psx_gpu); + return; +#endif + blend_blocks_do(untextured, subtract, on); +} + +void blend_blocks_textured_add_fourth_off(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_textured_add_fourth_off_(psx_gpu); + return; +#endif + blend_blocks_do(textured, add_fourth, off); +} + +void blend_blocks_textured_add_fourth_on(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_textured_add_fourth_on_(psx_gpu); + return; +#endif + blend_blocks_do(textured, add_fourth, on); +} + +void blend_blocks_untextured_add_fourth_off(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_untextured_add_fourth_off_(psx_gpu); + return; +#endif + blend_blocks_do(untextured, add_fourth, off); +} + +void blend_blocks_untextured_add_fourth_on(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_untextured_add_fourth_on_(psx_gpu); + return; +#endif + blend_blocks_do(untextured, add_fourth, on); +} + +void blend_blocks_textured_unblended_on(psx_gpu_struct *psx_gpu) +{ +#if 0 + blend_blocks_textured_unblended_on_(psx_gpu); + return; +#endif + blend_blocks_do(textured, unblended, on); +} + +void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu) +{ +} + +void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color) +{ + if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE | + RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 && + (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0) + { + setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color); + return; + } + +#if 0 + setup_sprite_untextured_(psx_gpu, x, y, u, v, width, height, color); + return; +#endif + u32 right_width = ((width - 1) & 0x7) + 1; + u32 right_mask_bits = (0xFF << right_width); + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x; + u32 block_width = (width + 7) / 8; + u32 fb_ptr_pitch = 1024 - ((block_width - 1) * 8); + u32 blocks_remaining; + u32 num_blocks = psx_gpu->num_blocks; + block_struct *block = psx_gpu->blocks + num_blocks; + + u32 color_r = color & 0xFF; + u32 color_g = (color >> 8) & 0xFF; + u32 color_b = (color >> 16) & 0xFF; + vec_8x16u colors; + vec_8x16u right_mask; + vec_8x16u test_mask; + vec_8x16u zero_mask; + + gvld1q_u16(test_mask, psx_gpu->test_mask.e); + color = (color_r >> 3) | ((color_g >> 3) << 5) | ((color_b >> 3) << 10); + + gvdupq_n_u16(colors, color); + gvdupq_n_u16(zero_mask, 0x00); + gvdupq_n_u16(right_mask, right_mask_bits); + gvtstq_u16(right_mask, right_mask, test_mask); + + while(height) + { + blocks_remaining = block_width - 1; + num_blocks += block_width; + + if(num_blocks > MAX_BLOCKS) + { + flush_render_block_buffer(psx_gpu); + num_blocks = block_width; + block = psx_gpu->blocks; + } + + while(blocks_remaining) + { + gvst1q_u16(colors, block->pixels.e); + gvst1q_u16(zero_mask, block->draw_mask.e); + block->fb_ptr = fb_ptr; + + fb_ptr += 8; + block++; + blocks_remaining--; + } + + gvst1q_u16(colors, block->pixels.e); + gvst1q_u16(right_mask, block->draw_mask.e); + block->fb_ptr = fb_ptr; + + block++; + fb_ptr += fb_ptr_pitch; + + height--; + psx_gpu->num_blocks = num_blocks; + } +} + +#define setup_sprite_tiled_initialize_4bpp_clut() \ + vec_16x8u clut_low, clut_high; \ + \ + gvld2q_u8(clut_low, clut_high, (u8 *)psx_gpu->clut_ptr) \ + +#define setup_sprite_tiled_initialize_4bpp() \ + setup_sprite_tiled_initialize_4bpp_clut(); \ + \ + if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_4bpp_mask) \ + update_texture_4bpp_cache(psx_gpu) \ + +#define setup_sprite_tiled_initialize_8bpp() \ + if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask) \ + update_texture_8bpp_cache(psx_gpu) \ + +#define setup_sprite_tile_fetch_texel_block_8bpp(offset) \ + texture_block_ptr = psx_gpu->texture_page_ptr + \ + ((texture_offset + offset) & texture_mask); \ + \ + gvld1_u8(texels, (u8 *)texture_block_ptr) \ + +#define setup_sprite_tile_add_blocks(tile_num_blocks) \ + num_blocks += tile_num_blocks; \ + \ + if(num_blocks > MAX_BLOCKS) \ + { \ + flush_render_block_buffer(psx_gpu); \ + num_blocks = tile_num_blocks; \ + block = psx_gpu->blocks; \ + } \ + +#define setup_sprite_tile_full_4bpp(edge) \ +{ \ + vec_8x8u texels_low, texels_high; \ + setup_sprite_tile_add_blocks(sub_tile_height * 2); \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + gvtbl2_u8(texels_low, clut_low, texels); \ + gvtbl2_u8(texels_high, clut_high, texels); \ + \ + gvst2_u8(texels_low, texels_high, (u8 *)block->texels.e); \ + block->draw_mask_bits = left_mask_bits; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + setup_sprite_tile_fetch_texel_block_8bpp(8); \ + gvtbl2_u8(texels_low, clut_low, texels); \ + gvtbl2_u8(texels_high, clut_high, texels); \ + \ + gvst2_u8(texels_low, texels_high, (u8 *)block->texels.e); \ + block->draw_mask_bits = right_mask_bits; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + fb_ptr += 1024; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + +#define setup_sprite_tile_half_4bpp(edge) \ +{ \ + vec_8x8u texels_low, texels_high; \ + setup_sprite_tile_add_blocks(sub_tile_height); \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + gvtbl2_u8(texels_low, clut_low, texels); \ + gvtbl2_u8(texels_high, clut_high, texels); \ + \ + gvst2_u8(texels_low, texels_high, (u8 *)block->texels.e); \ + block->draw_mask_bits = edge##_mask_bits; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + fb_ptr += 1024; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + +#define setup_sprite_tile_full_8bpp(edge) \ +{ \ + setup_sprite_tile_add_blocks(sub_tile_height * 2); \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + gvst1_u8(texels, block->r.e); \ + block->draw_mask_bits = left_mask_bits; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + setup_sprite_tile_fetch_texel_block_8bpp(8); \ + gvst1_u8(texels, block->r.e); \ + block->draw_mask_bits = right_mask_bits; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + fb_ptr += 1024; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + +#define setup_sprite_tile_half_8bpp(edge) \ +{ \ + setup_sprite_tile_add_blocks(sub_tile_height * 2); \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + gvst1_u8(texels, block->r.e); \ + block->draw_mask_bits = edge##_mask_bits; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + fb_ptr += 1024; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_right() \ + texture_offset = texture_offset_base + 8; \ + fb_ptr += 8 \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_left() \ + texture_offset = texture_offset_base \ + +#define setup_sprite_tile_column_edge_pre_adjust_half(edge) \ + setup_sprite_tile_column_edge_pre_adjust_half_##edge() \ + +#define setup_sprite_tile_column_edge_pre_adjust_full(edge) \ + texture_offset = texture_offset_base \ + +#define setup_sprite_tile_column_edge_post_adjust_half_right() \ + fb_ptr -= 8 \ + +#define setup_sprite_tile_column_edge_post_adjust_half_left() \ + +#define setup_sprite_tile_column_edge_post_adjust_half(edge) \ + setup_sprite_tile_column_edge_post_adjust_half_##edge() \ + +#define setup_sprite_tile_column_edge_post_adjust_full(edge) \ + + +#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode, \ + x4mode) \ +do \ +{ \ + sub_tile_height = column_data; \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \ +} while(0) \ + +#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode, \ + x4mode) \ +do \ +{ \ + u32 tiles_remaining = column_data >> 16; \ + sub_tile_height = column_data & 0xFF; \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + tiles_remaining -= 1; \ + \ + while(tiles_remaining) \ + { \ + sub_tile_height = 16; \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + tiles_remaining--; \ + } \ + \ + sub_tile_height = (column_data >> 8) & 0xFF; \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \ +} while(0) \ + + +#define setup_sprite_column_data_single() \ + column_data = height \ + +#define setup_sprite_column_data_multi() \ + column_data = 16 - offset_v; \ + column_data |= ((height_rounded & 0xF) + 1) << 8; \ + column_data |= (tile_height - 1) << 16 \ + +#define RIGHT_MASK_BIT_SHIFT 8 +#define RIGHT_MASK_BIT_SHIFT_4x 16 + +#define setup_sprite_tile_column_width_single(texture_mode, multi_height, \ + edge_mode, edge, x4mode) \ +{ \ + setup_sprite_column_data_##multi_height(); \ + left_mask_bits = left_block_mask | right_block_mask; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ + \ + setup_sprite_tile_column_height_##multi_height(edge_mode, edge, \ + texture_mode, x4mode); \ +} \ + +#define setup_sprite_tiled_advance_column() \ + texture_offset_base += 0x100; \ + if((texture_offset_base & 0xF00) == 0) \ + texture_offset_base -= (0x100 + 0xF00) \ + +#define FB_PTR_MULTIPLIER 1 +#define FB_PTR_MULTIPLIER_4x 2 + +#define setup_sprite_tile_column_width_multi(texture_mode, multi_height, \ + left_mode, right_mode, x4mode) \ +{ \ + setup_sprite_column_data_##multi_height(); \ + s32 fb_ptr_advance_column = (16 - (1024 * height)) \ + * FB_PTR_MULTIPLIER##x4mode; \ + \ + tile_width -= 2; \ + left_mask_bits = left_block_mask; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ + \ + setup_sprite_tile_column_height_##multi_height(left_mode, right, \ + texture_mode, x4mode); \ + fb_ptr += fb_ptr_advance_column; \ + \ + left_mask_bits = 0x00; \ + right_mask_bits = 0x00; \ + \ + while(tile_width) \ + { \ + setup_sprite_tiled_advance_column(); \ + setup_sprite_tile_column_height_##multi_height(full, none, \ + texture_mode, x4mode); \ + fb_ptr += fb_ptr_advance_column; \ + tile_width--; \ + } \ + \ + left_mask_bits = right_block_mask; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ + \ + setup_sprite_tiled_advance_column(); \ + setup_sprite_tile_column_height_##multi_height(right_mode, left, \ + texture_mode, x4mode); \ +} \ + + +/* 4x stuff */ +#define setup_sprite_tiled_initialize_4bpp_4x() \ + setup_sprite_tiled_initialize_4bpp_clut() \ + +#define setup_sprite_tiled_initialize_8bpp_4x() \ + +#define setup_sprite_tile_full_4bpp_4x(edge) \ +{ \ + vec_8x8u texels_low, texels_high; \ + vec_8x16u pixels; \ + vec_4x16u pixels_half; \ + setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \ + u32 left_mask_bits_a = left_mask_bits & 0xFF; \ + u32 left_mask_bits_b = left_mask_bits >> 8; \ + u32 right_mask_bits_a = right_mask_bits & 0xFF; \ + u32 right_mask_bits_b = right_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + gvtbl2_u8(texels_low, clut_low, texels); \ + gvtbl2_u8(texels_high, clut_high, texels); \ + gvzip_u8(pixels, texels_low, texels_high); \ + \ + gvget_lo(pixels_half, pixels); \ + gvst2_u16(pixels_half, pixels_half, block->texels.e); \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + gvst2_u16(pixels_half, pixels_half, block->texels.e); \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + gvget_hi(pixels_half, pixels); \ + gvst2_u16(pixels_half, pixels_half, block->texels.e); \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + gvst2_u16(pixels_half, pixels_half, block->texels.e); \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + setup_sprite_tile_fetch_texel_block_8bpp(8); \ + gvtbl2_u8(texels_low, clut_low, texels); \ + gvtbl2_u8(texels_high, clut_high, texels); \ + gvzip_u8(pixels, texels_low, texels_high); \ + \ + gvget_lo(pixels_half, pixels); \ + gvst2_u16(pixels_half, pixels_half, block->texels.e); \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 16; \ + block++; \ + \ + gvst2_u16(pixels_half, pixels_half, block->texels.e); \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024 + 16; \ + block++; \ + \ + gvget_hi(pixels_half, pixels); \ + gvst2_u16(pixels_half, pixels_half, block->texels.e); \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24; \ + block++; \ + \ + gvst2_u16(pixels_half, pixels_half, block->texels.e); \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 24; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + +#define setup_sprite_tile_half_4bpp_4x(edge) \ +{ \ + vec_8x8u texels_low, texels_high; \ + vec_8x16u pixels; \ + vec_4x16u pixels_half; \ + setup_sprite_tile_add_blocks(sub_tile_height * 4); \ + u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ + u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + gvtbl2_u8(texels_low, clut_low, texels); \ + gvtbl2_u8(texels_high, clut_high, texels); \ + gvzip_u8(pixels, texels_low, texels_high); \ + \ + gvget_lo(pixels_half, pixels); \ + gvst2_u16(pixels_half, pixels_half, block->texels.e); \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + gvst2_u16(pixels_half, pixels_half, block->texels.e); \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + gvget_hi(pixels_half, pixels); \ + gvst2_u16(pixels_half, pixels_half, block->texels.e); \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + gvst2_u16(pixels_half, pixels_half, block->texels.e); \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + +#define setup_sprite_tile_full_8bpp_4x(edge) \ +{ \ + setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \ + vec_8x16u texels_wide; \ + vec_4x16u texels_half; \ + u32 left_mask_bits_a = left_mask_bits & 0xFF; \ + u32 left_mask_bits_b = left_mask_bits >> 8; \ + u32 right_mask_bits_a = right_mask_bits & 0xFF; \ + u32 right_mask_bits_b = right_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + gvzip_u8(texels_wide, texels, texels); \ + gvget_lo(texels_half, texels_wide); \ + gvst1_u8(texels_half, block->r.e); \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + gvst1_u8(texels_half, block->r.e); \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + gvget_hi(texels_half, texels_wide); \ + gvst1_u8(texels_half, block->r.e); \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + gvst1_u8(texels_half, block->r.e); \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + setup_sprite_tile_fetch_texel_block_8bpp(8); \ + gvzip_u8(texels_wide, texels, texels); \ + gvget_lo(texels_half, texels_wide); \ + gvst1_u8(texels_half, block->r.e); \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 16; \ + block++; \ + \ + gvst1_u8(texels_half, block->r.e); \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024 + 16; \ + block++; \ + \ + gvget_hi(texels_half, texels_wide); \ + gvst1_u8(texels_half, block->r.e); \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24; \ + block++; \ + \ + gvst1_u8(texels_half, block->r.e); \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24 + 1024; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + +#define setup_sprite_tile_half_8bpp_4x(edge) \ +{ \ + setup_sprite_tile_add_blocks(sub_tile_height * 4); \ + vec_8x16u texels_wide; \ + vec_4x16u texels_half; \ + u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ + u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + gvzip_u8(texels_wide, texels, texels); \ + gvget_lo(texels_half, texels_wide); \ + gvst1_u8(texels_half, block->r.e); \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + gvst1_u8(texels_half, block->r.e); \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + gvget_hi(texels_half, texels_wide); \ + gvst1_u8(texels_half, block->r.e); \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + gvst1_u8(texels_half, block->r.e); \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8 + 1024; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \ + texture_offset = texture_offset_base + 8; \ + fb_ptr += 16 \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \ + texture_offset = texture_offset_base \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \ + texture_offset = texture_offset_base \ + +#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \ + fb_ptr -= 16 \ + +#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \ + +#define setup_sprite_offset_u_adjust() \ + +#define setup_sprite_comapre_left_block_mask() \ + ((left_block_mask & 0xFF) == 0xFF) \ + +#define setup_sprite_comapre_right_block_mask() \ + (((right_block_mask >> 8) & 0xFF) == 0xFF) \ + +#define setup_sprite_offset_u_adjust_4x() \ + offset_u *= 2; \ + offset_u_right = offset_u_right * 2 + 1 \ + +#define setup_sprite_comapre_left_block_mask_4x() \ + ((left_block_mask & 0xFFFF) == 0xFFFF) \ + +#define setup_sprite_comapre_right_block_mask_4x() \ + (((right_block_mask >> 16) & 0xFFFF) == 0xFFFF) \ + + +#define setup_sprite_tiled_do(texture_mode, x4mode) \ + s32 offset_u = u & 0xF; \ + s32 offset_v = v & 0xF; \ + \ + s32 width_rounded = offset_u + width + 15; \ + s32 height_rounded = offset_v + height + 15; \ + s32 tile_height = height_rounded / 16; \ + s32 tile_width = width_rounded / 16; \ + u32 offset_u_right = width_rounded & 0xF; \ + \ + setup_sprite_offset_u_adjust##x4mode(); \ + \ + u32 left_block_mask = ~(0xFFFFFFFF << offset_u); \ + u32 right_block_mask = 0xFFFFFFFE << offset_u_right; \ + \ + u32 left_mask_bits; \ + u32 right_mask_bits; \ + \ + u32 sub_tile_height; \ + u32 column_data; \ + \ + u32 texture_mask = (psx_gpu->texture_mask_width & 0xF) | \ + ((psx_gpu->texture_mask_height & 0xF) << 4) | \ + ((psx_gpu->texture_mask_width >> 4) << 8) | \ + ((psx_gpu->texture_mask_height >> 4) << 12); \ + u32 texture_offset = ((v & 0xF) << 4) | ((u & 0xF0) << 4) | \ + ((v & 0xF0) << 8); \ + u32 texture_offset_base = texture_offset; \ + u32 control_mask; \ + \ + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (x - offset_u); \ + u32 num_blocks = psx_gpu->num_blocks; \ + block_struct *block = psx_gpu->blocks + num_blocks; \ + \ + u16 *texture_block_ptr; \ + vec_8x8u texels; \ + \ + setup_sprite_tiled_initialize_##texture_mode##x4mode(); \ + \ + control_mask = tile_width == 1; \ + control_mask |= (tile_height == 1) << 1; \ + control_mask |= setup_sprite_comapre_left_block_mask##x4mode() << 2; \ + control_mask |= setup_sprite_comapre_right_block_mask##x4mode() << 3; \ + \ + switch(control_mask) \ + { \ + default: \ + case 0x0: \ + setup_sprite_tile_column_width_multi(texture_mode, multi, full, full, \ + x4mode); \ + break; \ + \ + case 0x1: \ + setup_sprite_tile_column_width_single(texture_mode, multi, full, none, \ + x4mode); \ + break; \ + \ + case 0x2: \ + setup_sprite_tile_column_width_multi(texture_mode, single, full, full, \ + x4mode); \ + break; \ + \ + case 0x3: \ + setup_sprite_tile_column_width_single(texture_mode, single, full, none, \ + x4mode); \ + break; \ + \ + case 0x4: \ + setup_sprite_tile_column_width_multi(texture_mode, multi, half, full, \ + x4mode); \ + break; \ + \ + case 0x5: \ + setup_sprite_tile_column_width_single(texture_mode, multi, half, right, \ + x4mode); \ + break; \ + \ + case 0x6: \ + setup_sprite_tile_column_width_multi(texture_mode, single, half, full, \ + x4mode); \ + break; \ + \ + case 0x7: \ + setup_sprite_tile_column_width_single(texture_mode, single, half, right, \ + x4mode); \ + break; \ + \ + case 0x8: \ + setup_sprite_tile_column_width_multi(texture_mode, multi, full, half, \ + x4mode); \ + break; \ + \ + case 0x9: \ + setup_sprite_tile_column_width_single(texture_mode, multi, half, left, \ + x4mode); \ + break; \ + \ + case 0xA: \ + setup_sprite_tile_column_width_multi(texture_mode, single, full, half, \ + x4mode); \ + break; \ + \ + case 0xB: \ + setup_sprite_tile_column_width_single(texture_mode, single, half, left, \ + x4mode); \ + break; \ + \ + case 0xC: \ + setup_sprite_tile_column_width_multi(texture_mode, multi, half, half, \ + x4mode); \ + break; \ + \ + case 0xE: \ + setup_sprite_tile_column_width_multi(texture_mode, single, half, half, \ + x4mode); \ + break; \ + } \ + +void setup_sprite_4bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color) +{ +#if 0 + setup_sprite_4bpp_(psx_gpu, x, y, u, v, width, height, color); + return; +#endif + setup_sprite_tiled_do(4bpp,) +} + +void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color) +{ +#if 0 + setup_sprite_8bpp_(psx_gpu, x, y, u, v, width, height, color); + return; +#endif + setup_sprite_tiled_do(8bpp,) +} + +#undef draw_mask_fb_ptr_left +#undef draw_mask_fb_ptr_right + +void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color) +{ +#if 0 + setup_sprite_4bpp_4x_(psx_gpu, x, y, u, v, width, height, color); + return; +#endif + setup_sprite_tiled_do(4bpp, _4x) +} + +void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color) +{ +#if 0 + setup_sprite_8bpp_4x_(psx_gpu, x, y, u, v, width, height, color); + return; +#endif + setup_sprite_tiled_do(8bpp, _4x) +} + + +void scale2x_tiles8(void * __restrict__ dst_, const void * __restrict__ src_, int w8, int h) +{ +#if 0 + scale2x_tiles8_(dst, src_, w8, h); + return; +#endif + const u16 * __restrict__ src = src_; + const u16 * __restrict__ src1; + u16 * __restrict__ dst = dst_; + u16 * __restrict__ dst1; + gvreg a, b; + int w; + for (; h > 0; h--, src += 1024, dst += 1024*2) + { + src1 = src; + dst1 = dst; + for (w = w8; w > 0; w--, src1 += 8, dst1 += 8*2) + { + gvld1q_u16(a, src1); + gvzipq_u16(a, b, a, a); + gvst1q_u16(a, dst1); + gvst1q_u16(b, dst1 + 8); + gvst1q_u16(a, dst1 + 1024); + gvst1q_u16(b, dst1 + 1024 + 8); + } + } +} + +// vim:ts=2:sw=2:expandtab From 57467c77b7045642afd8de1679f1149acc9c1ab5 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 8 Aug 2022 21:16:59 +0300 Subject: [PATCH 171/597] gpu_neon: integration for arm64 --- Makefile | 20 +++++++++++++++----- configure | 17 +++++++++++++++-- frontend/cspace.c | 4 ++-- frontend/menu.c | 8 ++++---- frontend/plugin_lib.c | 8 ++++++-- plugins/gpu_neon/Makefile | 9 +++++++-- 6 files changed, 49 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index fa5d279bc..585480d1d 100644 --- a/Makefile +++ b/Makefile @@ -50,7 +50,7 @@ OBJS += libpcsxcore/gte.o libpcsxcore/gte_nf.o libpcsxcore/gte_divider.o ifeq "$(ARCH)" "arm" OBJS += libpcsxcore/gte_arm.o endif -ifeq "$(HAVE_NEON)" "1" +ifeq "$(HAVE_NEON_ASM)" "1" OBJS += libpcsxcore/gte_neon.o endif libpcsxcore/psxbios.o: CFLAGS += -Wno-nonnull @@ -62,7 +62,7 @@ OBJS += libpcsxcore/new_dynarec/pcsxmem.o ifeq "$(ARCH)" "arm" OBJS += libpcsxcore/new_dynarec/linkage_arm.o libpcsxcore/new_dynarec/new_dynarec.o: libpcsxcore/new_dynarec/assem_arm.c - else ifeq "$(ARCH)" "aarch64" + else ifneq (,$(findstring $(ARCH),aarch64 arm64)) OBJS += libpcsxcore/new_dynarec/linkage_arm64.o libpcsxcore/new_dynarec/new_dynarec.o: libpcsxcore/new_dynarec/assem_arm64.c else @@ -119,9 +119,17 @@ endif # builtin gpu OBJS += plugins/gpulib/gpu.o plugins/gpulib/vout_pl.o ifeq "$(BUILTIN_GPU)" "neon" -OBJS += plugins/gpu_neon/psx_gpu_if.o plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.o +OBJS += plugins/gpu_neon/psx_gpu_if.o plugins/gpu_neon/psx_gpu_if.o: CFLAGS += -DNEON_BUILD -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP plugins/gpu_neon/psx_gpu_if.o: plugins/gpu_neon/psx_gpu/*.c +frontend/menu.o frontend/plugin_lib.o: CFLAGS += -DBUILTIN_GPU_NEON + ifeq "$(HAVE_NEON_ASM)" "1" + OBJS += plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.o + else + OBJS += plugins/gpu_neon/psx_gpu/psx_gpu_simd.o + plugins/gpu_neon/psx_gpu_if.o: CFLAGS += -DSIMD_BUILD + plugins/gpu_neon/psx_gpu/psx_gpu_simd.o: CFLAGS += -DSIMD_BUILD + endif endif ifeq "$(BUILTIN_GPU)" "peops" # note: code is not safe for strict-aliasing? (Castlevania problems) @@ -168,11 +176,13 @@ OBJS += plugins/dfinput/main.o plugins/dfinput/pad.o plugins/dfinput/guncon.o # frontend/gui OBJS += frontend/cspace.o -ifeq "$(HAVE_NEON)" "1" +ifeq "$(HAVE_NEON_ASM)" "1" OBJS += frontend/cspace_neon.o +frontend/cspace.o: CFLAGS += -DHAVE_bgr555_to_rgb565 -DHAVE_bgr888_to_x else ifeq "$(ARCH)" "arm" OBJS += frontend/cspace_arm.o +frontend/cspace.o: CFLAGS += -DHAVE_bgr555_to_rgb565 endif endif @@ -235,7 +245,7 @@ OBJS += frontend/plugin_lib.o OBJS += frontend/libpicofe/linux/plat.o OBJS += frontend/libpicofe/readpng.o frontend/libpicofe/fonts.o frontend/libpicofe/linux/plat.o: CFLAGS += -DNO_HOME_DIR -ifeq "$(HAVE_NEON)" "1" +ifeq "$(HAVE_NEON_ASM)" "1" OBJS += frontend/libpicofe/arm/neon_scale2x.o OBJS += frontend/libpicofe/arm/neon_eagle2x.o frontend/libpicofe/arm/neon_scale2x.o: CFLAGS += -DDO_BGR_TO_RGB diff --git a/configure b/configure index cb124ac6d..8ef59cedb 100755 --- a/configure +++ b/configure @@ -50,6 +50,7 @@ have_armv5="" have_armv6="" have_armv7="" have_arm_neon="" +have_arm_neon_asm="" have_tslib="" have_gles="" have_c64x_dsp="" @@ -213,7 +214,7 @@ arm*) fi if [ "x$have_arm_neon" = "x" ]; then - # detect NEON from user-supplied cflags to enable asm code + # detect NEON from user-supplied cflags to enable neon code have_arm_neon=`check_define __ARM_NEON__ && echo yes` || true fi if [ "x$have_armv7" = "x" ]; then @@ -278,8 +279,14 @@ arm*) echo "You probably want to specify -mcpu= or -march= like this:" echo " CFLAGS=-march=armv7-a ./configure ..." fi + have_arm_neon_asm=$have_arm_neon ;; aarch64) + have_arm_neon="yes" + have_arm_neon_asm="no" + if [ "x$builtin_gpu" = "x" ]; then + builtin_gpu="neon" + fi ;; *) # dynarec only available on ARM @@ -508,6 +515,7 @@ done test "x$have_armv6" != "x" || have_armv6="no" test "x$have_armv7" != "x" || have_armv7="no" test "x$have_arm_neon" != "x" || have_arm_neon="no" +test "x$have_arm_neon_asm" != "x" || have_arm_neon_asm="no" test "x$have_gles" != "x" || have_gles="no" test "x$have_c64x_dsp" != "x" || have_c64x_dsp="no" @@ -521,9 +529,11 @@ echo "C compiler flags $CFLAGS" echo "libraries $MAIN_LDLIBS" echo "linker flags $LDFLAGS$MAIN_LDFLAGS" echo "enable dynarec $enable_dynarec" +if [ "$ARCH" = "arm" -o "$ARCH" = "aarch64" ]; then + echo "enable ARM NEON $have_arm_neon" +fi if [ "$ARCH" = "arm" ]; then echo "ARMv7 optimizations $have_armv7" - echo "enable ARM NEON $have_arm_neon" echo "TI C64x DSP support $have_c64x_dsp" fi echo "tslib support $have_tslib" @@ -558,6 +568,9 @@ echo "PLUGINS = $plugins" >> $config_mak if [ "$have_arm_neon" = "yes" ]; then echo "HAVE_NEON = 1" >> $config_mak fi +if [ "$have_arm_neon_asm" = "yes" ]; then + echo "HAVE_NEON_ASM = 1" >> $config_mak +fi if [ "$have_tslib" = "yes" ]; then echo "HAVE_TSLIB = 1" >> $config_mak fi diff --git a/frontend/cspace.c b/frontend/cspace.c index f60026c5c..8426a1339 100644 --- a/frontend/cspace.c +++ b/frontend/cspace.c @@ -22,7 +22,7 @@ #define LE16TOHx2(x) (x) #endif -#ifndef __arm__ +#ifndef HAVE_bgr555_to_rgb565 void bgr555_to_rgb565(void *dst_, const void *src_, int bytes) { @@ -43,7 +43,7 @@ void bgr555_to_rgb565(void *dst_, const void *src_, int bytes) #endif -#ifndef __ARM_NEON__ +#ifndef HAVE_bgr888_to_x void bgr888_to_rgb565(void *dst_, const void *src_, int bytes) { diff --git a/frontend/menu.c b/frontend/menu.c index 85f7b7f93..4808e9c8c 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1343,7 +1343,7 @@ static int menu_loop_gfx_options(int id, int keys) // ------------ bios/plugins ------------ -#ifdef __ARM_NEON__ +#ifdef BUILTIN_GPU_NEON static const char h_gpu_neon[] = "Configure built-in NEON GPU plugin"; @@ -1502,7 +1502,7 @@ static const char h_bios[] = "HLE is simulated BIOS. BIOS selection is sav "savestates and can't be changed there. Must save\n" "config and reload the game for change to take effect"; static const char h_plugin_gpu[] = -#ifdef __ARM_NEON__ +#ifdef BUILTIN_GPU_NEON "builtin_gpu is the NEON GPU, very fast and accurate\n" #endif "gpu_peops is Pete's soft GPU, slow but accurate\n" @@ -1523,7 +1523,7 @@ static menu_entry e_menu_plugin_options[] = mee_enum_h ("BIOS", 0, bios_sel, bioses, h_bios), mee_enum_h ("GPU plugin", 0, gpu_plugsel, gpu_plugins, h_plugin_gpu), mee_enum_h ("SPU plugin", 0, spu_plugsel, spu_plugins, h_plugin_spu), -#ifdef __ARM_NEON__ +#ifdef BUILTIN_GPU_NEON mee_handler_h ("Configure built-in GPU plugin", menu_loop_plugin_gpu_neon, h_gpu_neon), #endif mee_handler_h ("Configure gpu_peops plugin", menu_loop_plugin_gpu_peops, h_gpu_peops), @@ -1979,7 +1979,7 @@ static const char credits_text[] = "(C) 2005-2009 PCSX-df Team\n" "(C) 2009-2011 PCSX-Reloaded Team\n\n" "ARM recompiler (C) 2009-2011 Ari64\n" -#ifdef __ARM_NEON__ +#ifdef BUILTIN_GPU_NEON "ARM NEON GPU (c) 2011-2012 Exophase\n" #endif "PEOpS GPU and SPU by Pete Bernert\n" diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index d215636f4..cc3576bfb 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -468,7 +468,7 @@ static int dispmode_default(void) return 1; } -#ifdef __ARM_NEON__ +#ifdef BUILTIN_GPU_NEON static int dispmode_doubleres(void) { if (!(pl_rearmed_cbs.gpu_caps & GPU_CAP_SUPPORTS_2X) @@ -480,7 +480,9 @@ static int dispmode_doubleres(void) snprintf(hud_msg, sizeof(hud_msg), "double resolution"); return 1; } +#endif +#ifdef __ARM_NEON__ static int dispmode_scale2x(void) { if (!resolution_ok(psx_w * 2, psx_h * 2) || psx_bpp != 16) @@ -506,8 +508,10 @@ static int dispmode_eagle2x(void) static int (*dispmode_switchers[])(void) = { dispmode_default, -#ifdef __ARM_NEON__ +#ifdef BUILTIN_GPU_NEON dispmode_doubleres, +#endif +#ifdef __ARM_NEON__ dispmode_scale2x, dispmode_eagle2x, #endif diff --git a/plugins/gpu_neon/Makefile b/plugins/gpu_neon/Makefile index 08bf0ee6a..955feab6a 100644 --- a/plugins/gpu_neon/Makefile +++ b/plugins/gpu_neon/Makefile @@ -5,9 +5,14 @@ include ../../config.mak SRC += psx_gpu_if.c CFLAGS += -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP -ifeq "$(HAVE_NEON)" "1" -SRC += psx_gpu/psx_gpu_arm_neon.S CFLAGS += -DNEON_BUILD +ifeq "$(HAVE_NEON)" "1" + ifeq "$(HAVE_NEON_ASM)" "1" + SRC += psx_gpu/psx_gpu_arm_neon.S + else + OBJS += psx_gpu/psx_gpu_simd.c + CFLAGS += -DSIMD_BUILD + endif else CFLAGS += -fno-strict-aliasing endif From d639fa7fe4be413006e13209587fa710b98820f5 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 9 Aug 2022 00:07:27 +0300 Subject: [PATCH 172/597] cspace: generic implementation with vector extensions --- frontend/cspace.c | 71 ++++++++++++++++++++++++++++++++++++++++-- frontend/cspace_neon.S | 10 ++++++ 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/frontend/cspace.c b/frontend/cspace.c index 8426a1339..2b528a5a1 100644 --- a/frontend/cspace.c +++ b/frontend/cspace.c @@ -1,5 +1,5 @@ /* - * (C) Gražvydas "notaz" Ignotas, 2011,2012 + * (C) Gražvydas "notaz" Ignotas, 2011,2012,2022 * * This work is licensed under the terms of any of these licenses * (at your option): @@ -22,7 +22,74 @@ #define LE16TOHx2(x) (x) #endif -#ifndef HAVE_bgr555_to_rgb565 +#if defined(HAVE_bgr555_to_rgb565) + +/* have bgr555_to_rgb565 somewhere else */ + +#elif ((defined(__clang_major__) && __clang_major__ >= 4) \ + || (defined(__GNUC__) && __GNUC__ >= 5)) \ + && __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ + +#include +#include + +#if defined(__ARM_NEON) || defined(__ARM_NEON__) +#include +#define gsli(d_, s_, n_) d_ = vsliq_n_u16(d_, s_, n_) +#define gsri(d_, s_, n_) d_ = vsriq_n_u16(d_, s_, n_) +#else +#define gsli(d_, s_, n_) d_ |= s_ << n_ +#define gsri(d_, s_, n_) d_ |= s_ >> n_ +#endif + +typedef uint16_t gvu16 __attribute__((vector_size(16),aligned(16))); +typedef uint16_t gvu16u __attribute__((vector_size(16),aligned(2))); +#define gdup(v_) {v_, v_, v_, v_, v_, v_, v_, v_} +#define do_one(s) ({ \ + uint16_t d_ = (s) << 1; d_ = (d_ & 0x07c0) | (d_ << 10) | (d_ >> 11); d_; \ +}) +#define do_one_simd(d_, s_, c0x07c0_) { \ + gvu16 s1 = s_ << 1; \ + d_ = s1 & c0x07c0_; \ + gsli(d_, s_, 11); \ + gsri(d_, s1, 11); \ +} + +void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_, int bytes) +{ + const uint16_t * __restrict__ src = src_; + uint16_t * __restrict__ dst = dst_; + gvu16 c0x07c0 = gdup(0x07c0); + + assert(!(((uintptr_t)dst | (uintptr_t)src | bytes) & 1)); + + // align the destination + if ((uintptr_t)dst & 0x0e) + { + uintptr_t left = 0x10 - ((uintptr_t)dst & 0x0e); + gvu16 d, s = *(const gvu16u *)src; + do_one_simd(d, s, c0x07c0); + *(gvu16u *)dst = d; + dst += left / 2; + src += left / 2; + bytes -= left; + } + // go + for (; bytes >= 16; dst += 8, src += 8, bytes -= 16) + { + gvu16 d, s = *(const gvu16u *)src; + do_one_simd(d, s, c0x07c0); + *(gvu16 *)dst = d; + __builtin_prefetch(src + 128/2); + } + // finish it + for (; bytes > 0; dst++, src++, bytes -= 2) + *dst = do_one(*src); +} +#undef do_one +#undef do_one_simd + +#else void bgr555_to_rgb565(void *dst_, const void *src_, int bytes) { diff --git a/frontend/cspace_neon.S b/frontend/cspace_neon.S index 4cb3d4c8f..651ab8424 100644 --- a/frontend/cspace_neon.S +++ b/frontend/cspace_neon.S @@ -23,6 +23,16 @@ FUNCTION(bgr555_to_rgb565): @ dst, src, bytes pld [r1] mov r3, #0x07c0 vdup.16 q15, r3 + tst r0, #8 + beq 0f + @ align the dst + vld1.16 {d0}, [r1]! + vshl.u16 d0, d0, #1 + vshl.u16 d1, d0, #10 + vsri.u16 d1, d0, #11 + vbit d1, d0, d30 + vst1.16 {d1}, [r0]! +0: subs r2, r2, #64 blt btr16_end64 0: From 4f7c7e7a5726cad26a83f44c6ecc751d1fc18d60 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 9 Aug 2022 01:50:54 +0300 Subject: [PATCH 173/597] mark libretro makefile as unmaintained here --- Makefile.libretro | 208 +--------------------------------------------- 1 file changed, 1 insertion(+), 207 deletions(-) diff --git a/Makefile.libretro b/Makefile.libretro index 223ba9f90..03ccff7ea 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -1,207 +1 @@ -# Makefile for PCSX ReARMed (libretro) - -ifeq ($(platform),) - platform = unix - ifeq ($(shell uname -a),) - platform = win - else ifneq ($(findstring MINGW,$(shell uname -a)),) - platform = win - else ifneq ($(findstring Darwin,$(shell uname -a)),) - platform = osx - else ifneq ($(findstring win,$(shell uname -a)),) - platform = win - endif -endif - -CC ?= gcc -CXX ?= g++ -AS ?= as -CC_AS ?= $(CC) -CFLAGS ?= - -TARGET_NAME := pcsx_rearmed - -MMAP_WIN32=0 - -# Unix -ifeq ($(platform), unix) - TARGET := $(TARGET_NAME)_libretro.so - fpic := -fPIC - SHARED := -shared -Wl,--version-script=libretro/link.T - -# OS X -else ifeq ($(platform), osx) - TARGET := $(TARGET_NAME)_libretro.dylib - fpic := -fPIC - SHARED := -dynamiclib - OSXVER = `sw_vers -productVersion | cut -d. -f 2` - OSX_LT_MAVERICKS = `(( $(OSXVER) <= 9)) && echo "YES"` - ifeq ($(OSX_LT_MAVERICKS),"YES") - fpic += -mmacosx-version-min=10.5 - endif - -# iOS -else ifeq ($(platform), ios) - ARCH := arm - TARGET := $(TARGET_NAME)_libretro_ios.dylib - fpic := -fPIC - SHARED := -dynamiclib - - ifeq ($(IOSSDK),) - IOSSDK := $(shell xcrun -sdk iphoneos -show-sdk-path) - endif - - CC = clang -arch armv7 -isysroot $(IOSSDK) - CXX = clang++ -arch armv7 -isysroot $(IOSSDK) - CC_AS = perl ./tools/gas-preprocessor.pl $(CC) - CFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon -marm - ASFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon - HAVE_NEON = 1 - BUILTIN_GPU = neon - USE_DYNAREC = 1 - CFLAGS += -DIOS - OSXVER = `sw_vers -productVersion | cut -d. -f 2` - OSX_LT_MAVERICKS = `(( $(OSXVER) <= 9)) && echo "YES"` - ifeq ($(OSX_LT_MAVERICKS),"YES") - CC += -miphoneos-version-min=5.0 - CXX += -miphoneos-version-min=5.0 - CC_AS += -miphoneos-version-min=5.0 - CFLAGS += -miphoneos-version-min=5.0 - endif - -# PS3 -else ifeq ($(platform), ps3) - TARGET := $(TARGET_NAME)_libretro_ps3.a - CC = $(CELL_SDK)/host-win32/ppu/bin/ppu-lv2-gcc.exe - AR = $(CELL_SDK)/host-win32/ppu/bin/ppu-lv2-ar.exe - CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ - -# sncps3 -else ifeq ($(platform), sncps3) - TARGET := $(TARGET_NAME)_libretro_ps3.a - CC = $(CELL_SDK)/host-win32/sn/bin/ps3ppusnc.exe - AR = $(CELL_SDK)/host-win32/sn/bin/ps3snarl.exe - CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ - -# Lightweight PS3 Homebrew SDK -else ifeq ($(platform), psl1ght) - TARGET := $(TARGET_NAME)_libretro_psl1ght.a - CC = $(PS3DEV)/ppu/bin/ppu-gcc$(EXE_EXT) - AR = $(PS3DEV)/ppu/bin/ppu-ar$(EXE_EXT) - CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ - -# PSP -else ifeq ($(platform), psp1) - TARGET := $(TARGET_NAME)_libretro_psp1.a - CC = psp-gcc$(EXE_EXT) - AR = psp-ar$(EXE_EXT) - CFLAGS += -DPSP -G0 - -# Xbox 360 -else ifeq ($(platform), xenon) - TARGET := $(TARGET_NAME)_libretro_xenon360.a - CC = xenon-gcc$(EXE_EXT) - AR = xenon-ar$(EXE_EXT) - CFLAGS += -D__LIBXENON__ -m32 -D__ppc__ - -# Nintendo Game Cube -else ifeq ($(platform), ngc) - TARGET := $(TARGET_NAME)_libretro_ngc.a - CC = $(DEVKITPPC)/bin/powerpc-eabi-gcc$(EXE_EXT) - AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) - CFLAGS += -DGEKKO -DHW_DOL -mrvl -mcpu=750 -meabi -mhard-float -DBLARGG_BIG_ENDIAN=1 -D__ppc__ - -# Nintendo Wii -else ifeq ($(platform), wii) - TARGET := libretro_$(TARGET_NAME)_wii.a - CC = $(DEVKITPPC)/bin/powerpc-eabi-gcc$(EXE_EXT) - AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) - CFLAGS += -DGEKKO -DHW_RVL -mrvl -mcpu=750 -meabi -mhard-float -DBLARGG_BIG_ENDIAN=1 -D__ppc__ - -# QNX -else ifeq ($(platform), qnx) - TARGET := $(TARGET_NAME)_libretro_qnx.so - CC = qcc -Vgcc_ntoarmv7le - CC_AS = $(CC) - HAVE_NEON = 1 - USE_DYNAREC = 1 - DRC_CACHE_BASE = 0 - BUILTIN_GPU = neon - ARCH = arm - CFLAGS += -D__BLACKBERRY_QNX__ -marm -mcpu=cortex-a9 -mtune=cortex-a9 -mfpu=neon -mfloat-abi=softfp - ASFLAGS += -mcpu=cortex-a9 -mfpu=neon -mfloat-abi=softfp - -# ARM -else ifneq (,$(findstring armv,$(platform))) - TARGET := $(TARGET_NAME)_libretro.so - SHARED := -shared -Wl,--no-undefined - DRC_CACHE_BASE = 0 - ifneq (,$(findstring cortexa8,$(platform))) - CFLAGS += -marm -mcpu=cortex-a8 - ASFLAGS += -mcpu=cortex-a8 - else ifneq (,$(findstring cortexa9,$(platform))) - CFLAGS += -marm -mcpu=cortex-a9 - ASFLAGS += -mcpu=cortex-a9 - endif - CFLAGS += -marm - ifneq (,$(findstring neon,$(platform))) - CFLAGS += -mfpu=neon - ASFLAGS += -mfpu=neon - HAVE_NEON = 1 - BUILTIN_GPU = neon - endif - ifneq (,$(findstring softfloat,$(platform))) - CFLAGS += -mfloat-abi=softfp - ASFLAGS += -mfloat-abi=softfp - else ifneq (,$(findstring hardfloat,$(platform))) - CFLAGS += -mfloat-abi=hard - ASFLAGS += -mfloat-abi=hard - endif - ARCH = arm - USE_DYNAREC = 1 - -# Windows -else - TARGET := $(TARGET_NAME)_libretro.dll - CC = gcc - fpic := -fPIC - LD_FLAGS := -fPIC - SHARED := -shared -static-libgcc -static-libstdc++ -s -Wl,--version-script=libretro/link.T - CFLAGS += -D__WIN32__ -D__WIN32_LIBRETRO__ - MMAP_WIN32=1 -endif - -CFLAGS += -fPIC -ifeq ($(platform),win) - MAIN_LDLIBS += -lws2_32 -else ifneq ($(platform),qnx) - LDLIBS += -lpthread - MAIN_LDLIBS += -ldl -endif -MAIN_LDFLAGS += -shared -MAIN_LDLIBS += -lm -lz -EXTRA_LDFLAGS = - -# try to autodetect stuff for the lazy -ifndef ARCH -ARCH = $(shell $(CC) -dumpmachine | awk -F- '{print $$1}') -endif -ifndef HAVE_NEON -HAVE_NEON = $(shell $(CC) -E -dD - < /dev/null 2> /dev/null | grep -q __ARM_NEON__ && echo 1 || echo 0) -endif -ifeq ($(shell ld -v 2> /dev/null | awk '{print $$1}'),GNU) -MAIN_LDFLAGS += -Wl,--no-undefined -endif - -TARGET ?= libretro.so -PLATFORM = libretro -BUILTIN_GPU ?= peops -SOUND_DRIVERS = libretro -PLUGINS = -NO_CONFIG_MAK = yes - -include Makefile - -# no special AS needed for gpu_neon -plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.o: plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S - $(CC) $(CFLAGS) -c $^ -o $@ +$(error This file is unmaintained. Please use the libretro fork: https://github.com/libretro/pcsx_rearmed) From dcb2e057c41c8324afc9cd83e45c2c9ca350e42f Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 9 Aug 2022 20:42:39 +0300 Subject: [PATCH 174/597] cspace: add forgotten length decrement --- frontend/cspace_neon.S | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/cspace_neon.S b/frontend/cspace_neon.S index 651ab8424..4928b44ab 100644 --- a/frontend/cspace_neon.S +++ b/frontend/cspace_neon.S @@ -27,6 +27,7 @@ FUNCTION(bgr555_to_rgb565): @ dst, src, bytes beq 0f @ align the dst vld1.16 {d0}, [r1]! + sub r2, r2, #8 vshl.u16 d0, d0, #1 vshl.u16 d1, d0, #10 vsri.u16 d1, d0, #11 From 4a2e3735ee8c0741a63d860a489a2a099f53e9e5 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 10 Aug 2022 22:32:11 +0300 Subject: [PATCH 175/597] drc: fix wrong masking in set_jump_target --- libpcsxcore/new_dynarec/assem_arm64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 6f108bf82..b7d82c12f 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -45,7 +45,7 @@ static void set_jump_target(void *addr, void *target) // should only happen when jumping to an already compiled block (see add_jump_out) // a workaround would be to do a trampoline jump via a stub at the end of the block assert(-1048576 <= offset && offset < 1048576); - *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5); + *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5); } else if((*ptr&0x9f000000)==0x10000000) { // adr // generated by do_miniht_insert From 9b495f6ec3f28cf5ed1d41f6af16a9967fcf3e64 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 10 Aug 2022 22:39:41 +0300 Subject: [PATCH 176/597] drc: rework smc checks again the way it was done before wasn't good enough for Mega Man Legends 2 at least --- libpcsxcore/new_dynarec/assem_arm.c | 20 ++- libpcsxcore/new_dynarec/assem_arm64.c | 21 +-- libpcsxcore/new_dynarec/linkage_arm.S | 4 +- libpcsxcore/new_dynarec/new_dynarec.c | 229 ++++++++++++++++---------- 4 files changed, 170 insertions(+), 104 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 6570f1e80..95007dfaf 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -27,8 +27,6 @@ #include "pcnt.h" #include "arm_features.h" -#define unused __attribute__((unused)) - #ifdef DRC_DBG #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wunused-variable" @@ -1033,6 +1031,15 @@ static void emit_jcc(const void *a_) output_w32(0x3a000000|offset); } +static void *emit_cbz(int rs, const void *a) +{ + void *ret; + emit_test(rs, rs); + ret = out; + emit_jeq(a); + return ret; +} + static unused void emit_callreg(u_int r) { assert(r<15); @@ -1392,13 +1399,10 @@ static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) } // special case for checking invalid_code -static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) +static void emit_ldrb_indexedsr12_reg(int base, int r, int rt) { - assert(imm<128&&imm>=0); - assert(r>=0&&r<16); - assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]); - output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620); - emit_cmpimm(HOST_TEMPREG,imm); + assem_debug("ldrb %s,%s,%s lsr #12\n",regname[rt],regname[base],regname[r]); + output_w32(0xe7d00000|rd_rn_rm(rt,base,r)|0x620); } static void emit_callne(int a) diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index b7d82c12f..271bee580 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -23,8 +23,6 @@ #include "pcnt.h" #include "arm_features.h" -#define unused __attribute__((unused)) - void do_memhandler_pre(); void do_memhandler_post(); @@ -619,6 +617,10 @@ static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt static void emit_addimm(u_int rs, uintptr_t imm, u_int rt) { + if (imm == 0) { + emit_mov(rs, rt); + return; + } emit_addimm_s(0, 0, rs, imm, rt); } @@ -988,9 +990,11 @@ static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r) output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r)); } -static unused void emit_cbz(const void *a, u_int r) +static void *emit_cbz(u_int r, const void *a) { + void *ret = out; emit_cb(0, 0, a, r); + return ret; } static void emit_jmpreg(u_int r) @@ -1198,14 +1202,11 @@ static void emit_clz(u_int rs, u_int rt) } // special case for checking invalid_code -static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm) +static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt) { - host_tempreg_acquire(); - emit_shrimm(r, 12, HOST_TEMPREG); - assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]); - output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG)); - emit_cmpimm(HOST_TEMPREG, imm); - host_tempreg_release(); + emit_shrimm(r, 12, rt); + assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]); + output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt)); } // special for loadlr_assemble, rs2 is destroyed diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 513911cab..7e0db2d7a 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -30,7 +30,7 @@ #define ndrc_try_restore_block ESYM(ndrc_try_restore_block) #define ndrc_get_addr_ht ESYM(ndrc_get_addr_ht) #define ndrc_get_addr_ht_param ESYM(ndrc_get_addr_ht_param) -#define ndrc_invalidate_addr ESYM(ndrc_invalidate_addr) +#define ndrc_write_invalidate_one ESYM(ndrc_write_invalidate_one) #define gen_interupt ESYM(gen_interupt) #define gteCheckStallRaw ESYM(gteCheckStallRaw) #define psxException ESYM(psxException) @@ -401,7 +401,7 @@ invalidate_addr_call: ldr lr, [fp, #LO_inv_code_end] cmp r0, r12 cmpcs lr, r0 - blcc ndrc_invalidate_addr + blcc ndrc_write_invalidate_one ldmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, pc} .size invalidate_addr_call, .-invalidate_addr_call diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 0fafc60ad..b9e7c1448 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -41,6 +41,7 @@ static Jit g_jit; #include "emu_if.h" // emulator interface #include "arm_features.h" +#define unused __attribute__((unused)) #ifdef __clang__ #define noinline __attribute__((noinline)) #else @@ -58,6 +59,7 @@ static Jit g_jit; //#define DISASM //#define ASSEM_PRINT +//#define INV_DEBUG_W //#define STAT_PRINT #ifdef ASSEM_PRINT @@ -163,7 +165,7 @@ struct regstat u_int wasconst; // before; for example 'lw r2, (r2)' wasconst is true u_int isconst; // ... but isconst is false when r2 is known u_int loadedconst; // host regs that have constants loaded - u_int waswritten; // MIPS regs that were used as store base before + //u_int waswritten; // MIPS regs that were used as store base before }; struct ht_entry @@ -397,8 +399,9 @@ void new_dyna_leave(); void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile); void *ndrc_get_addr_ht(u_int vaddr); -void ndrc_invalidate_addr(u_int addr); void ndrc_add_jump_out(u_int vaddr, void *src); +void ndrc_write_invalidate_one(u_int addr); +static void ndrc_write_invalidate_many(u_int addr, u_int end); static int new_recompile_block(u_int addr); static void invalidate_block(struct block_info *block); @@ -687,6 +690,28 @@ static int doesnt_expire_soon(u_char *tcaddr) return diff > EXPIRITY_OFFSET + MAX_OUTPUT_BLOCK_SIZE; } +static unused void check_for_block_changes(u_int start, u_int end) +{ + u_int start_page = get_page_prev(start); + u_int end_page = get_page(end - 1); + u_int page; + + for (page = start_page; page <= end_page; page++) { + struct block_info *block; + for (block = blocks[page]; block != NULL; block = block->next) { + if (block->is_dirty) + continue; + if (memcmp(block->source, block->copy, block->len)) { + printf("bad block %08x-%08x %016llx %016llx @%08x\n", + block->start, block->start + block->len, + *(long long *)block->source, *(long long *)block->copy, psxRegs.pc); + fflush(stdout); + abort(); + } + } + } +} + static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) { void *found_clean = NULL; @@ -770,6 +795,7 @@ static void noinline *get_addr(u_int vaddr, int can_compile) // Look up address in hash table first void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile) { + //check_for_block_changes(vaddr, vaddr + MAXBLOCK); const struct ht_entry *ht_bin = hash_table_get(vaddr); stat_inc(stat_ht_lookups); if (ht_bin->vaddr[0] == vaddr) return ht_bin->tcaddr[0]; @@ -1165,7 +1191,8 @@ static const struct { FUNCNAME(jump_handler_write8), FUNCNAME(jump_handler_write16), FUNCNAME(jump_handler_write32), - FUNCNAME(ndrc_invalidate_addr), + FUNCNAME(ndrc_write_invalidate_one), + FUNCNAME(ndrc_write_invalidate_many), FUNCNAME(jump_to_new_pc), FUNCNAME(jump_break), FUNCNAME(jump_break_ds), @@ -1332,7 +1359,7 @@ static int blocks_remove_matching_addrs(struct block_info **head, int hit = 0; while (*head) { if ((((*head)->tc_offs ^ base_offs) >> shift) == 0) { - inv_debug("EXP: rm block %08x (tc_offs %zx)\n", (*head)->start, (*head)->tc_offs); + inv_debug("EXP: rm block %08x (tc_offs %x)\n", (*head)->start, (*head)->tc_offs); invalidate_block(*head); next = (*head)->next; free(*head); @@ -1393,7 +1420,7 @@ static void unlink_jumps_tc_range(struct jump_info *ji, u_int base_offs, int shi continue; } - inv_debug("EXP: rm link to %08x (tc_offs %zx)\n", ji->e[i].target_vaddr, tc_offs); + inv_debug("EXP: rm link to %08x (tc_offs %x)\n", ji->e[i].target_vaddr, tc_offs); stat_dec(stat_links); ji->count--; if (i < ji->count) { @@ -1428,7 +1455,7 @@ static int invalidate_range(u_int start, u_int end, int hit = 0; // additional area without code (to supplement invalid_code[]), [start, end) - // avoids excessive ndrc_invalidate_addr() calls + // avoids excessive ndrc_write_invalidate*() calls inv_start = start_m & ~0xfff; inv_end = end_m | 0xfff; @@ -1487,16 +1514,28 @@ void new_dynarec_invalidate_range(unsigned int start, unsigned int end) invalidate_range(start, end, NULL, NULL); } -void ndrc_invalidate_addr(u_int addr) +static void ndrc_write_invalidate_many(u_int start, u_int end) { // this check is done by the caller //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; } - int ret = invalidate_range(addr, addr + 4, &inv_code_start, &inv_code_end); + int ret = invalidate_range(start, end, &inv_code_start, &inv_code_end); +#ifdef INV_DEBUG_W + int invc = invalid_code[start >> 12]; + u_int len = end - start; if (ret) - inv_debug("INV ADDR: %08x hit %d blocks\n", addr, ret); + printf("INV ADDR: %08x/%02x hit %d blocks\n", start, len, ret); else - inv_debug("INV ADDR: %08x miss, inv %08x-%08x\n", addr, inv_code_start, inv_code_end); + printf("INV ADDR: %08x/%02x miss, inv %08x-%08x invc %d->%d\n", start, len, + inv_code_start, inv_code_end, invc, invalid_code[start >> 12]); + check_for_block_changes(start, end); +#endif stat_inc(stat_inv_addr_calls); + (void)ret; +} + +void ndrc_write_invalidate_one(u_int addr) +{ + ndrc_write_invalidate_many(addr, addr + 4); } // This is called when loading a save state. @@ -1521,26 +1560,6 @@ void new_dynarec_invalidate_all_pages(void) do_clear_cache(); } -static void do_invstub(int n) -{ - literal_pool(20); - u_int reglist = stubs[n].a; - set_jump_target(stubs[n].addr, out); - save_regs(reglist); - if (stubs[n].b != 0) - emit_mov(stubs[n].b, 0); - emit_readword(&inv_code_start, 1); - emit_readword(&inv_code_end, 2); - emit_cmp(0, 1); - emit_cmpcs(2, 0); - void *jaddr = out; - emit_jc(0); - emit_far_call(ndrc_invalidate_addr); - set_jump_target(jaddr, out); - restore_regs(reglist); - emit_jmp(stubs[n].retaddr); // return address -} - // Add an entry to jump_out after making a link // src should point to code by emit_extjump() void ndrc_add_jump_out(u_int vaddr, void *src) @@ -3147,6 +3166,89 @@ static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) } #endif +static void do_invstub(int n) +{ + literal_pool(20); + assem_debug("do_invstub\n"); + u_int reglist = stubs[n].a; + u_int addrr = stubs[n].b; + int ofs_start = stubs[n].c; + int ofs_end = stubs[n].d; + int len = ofs_end - ofs_start; + u_int rightr = 0; + + set_jump_target(stubs[n].addr, out); + save_regs(reglist); + emit_readword(&inv_code_start, 2); + emit_readword(&inv_code_end, 3); + if (addrr != 0 || ofs_start != 0) + emit_addimm(addrr, ofs_start, 0); + if (len != 0) + emit_addimm(0, len + 4, (rightr = 1)); + emit_cmp(0, 2); + emit_cmpcs(3, rightr); + void *jaddr = out; + emit_jc(0); + void *func = (len != 0) + ? (void *)ndrc_write_invalidate_many + : (void *)ndrc_write_invalidate_one; + emit_far_call(func); + set_jump_target(jaddr, out); + restore_regs(reglist); + emit_jmp(stubs[n].retaddr); +} + +static void do_store_smc_check(int i, const struct regstat *i_regs, u_int reglist, int addr) +{ + if (HACK_ENABLED(NDHACK_NO_SMC_CHECK)) + return; + // this can't be used any more since we started to check exact + // block boundaries in invalidate_range() + //if (i_regs->waswritten & (1<= 0; j--) { + if (!dops[j].is_store || dops[j].rs1 != dops[i].rs1 + || abs(imm[j] - imm[j+1]) > imm_maxdiff) + break; + count++; + if (imm_min > imm[j]) + imm_min = imm[j]; + if (imm_max < imm[j]) + imm_max = imm[j]; + } +#if defined(HOST_IMM8) + int ir = get_reg(i_regs->regmap, INVCP); + assert(ir >= 0); + host_tempreg_acquire(); + emit_ldrb_indexedsr12_reg(ir, addr, HOST_TEMPREG); +#else + emit_cmpmem_indexedsr12_imm(invalid_code, addr, 1); + #error not handled +#endif +#ifdef INVALIDATE_USE_COND_CALL + if (count == 1) { + emit_cmpimm(HOST_TEMPREG, 1); + emit_callne(invalidate_addr_reg[addr]); + host_tempreg_release(); + return; + } +#endif + void *jaddr = emit_cbz(HOST_TEMPREG, 0); + host_tempreg_release(); + imm_min -= imm[i]; + imm_max -= imm[i]; + add_stub(INVCODE_STUB, jaddr, out, reglist|(1<waswritten&(1<regmap,INVCP); - assert(ir>=0); - emit_cmpmem_indexedsr12_reg(ir,addr,1); - #else - emit_cmpmem_indexedsr12_imm(invalid_code,addr,1); - #endif - #ifdef INVALIDATE_USE_COND_CALL - emit_callne(invalidate_addr_reg[addr]); - #else - void *jaddr2 = out; - emit_jne(0); - add_stub(INVCODE_STUB,jaddr2,out,reglist|(1<waswritten&(1<regmap,INVCP); - assert(ir>=0); - emit_cmpmem_indexedsr12_reg(ir,temp,1); - #else - emit_cmpmem_indexedsr12_imm(invalid_code,temp,1); - #endif - #ifdef INVALIDATE_USE_COND_CALL - emit_callne(invalidate_addr_reg[temp]); - #else - void *jaddr2 = out; - emit_jne(0); - add_stub(INVCODE_STUB,jaddr2,out,reglist|(1<waswritten&(1<regmap,INVCP); - assert(ir>=0); - emit_cmpmem_indexedsr12_reg(ir,ar,1); -#else - emit_cmpmem_indexedsr12_imm(invalid_code,ar,1); -#endif - #ifdef INVALIDATE_USE_COND_CALL - emit_callne(invalidate_addr_reg[ar]); - #else - void *jaddr3 = out; - emit_jne(0); - add_stub(INVCODE_STUB,jaddr3,out,reglist|(1<0&&(dops[i-1].itype==STORE||dops[i-1].itype==STORELR||(dops[i-1].itype==C2LS&&dops[i-1].opcode==0x3a))&&(u_int)imm[i-1]<0x800) current.waswritten|=1<=0x800) current.waswritten&=~(1<0) @@ -7810,7 +7871,7 @@ static noinline void pass3_register_alloc(u_int addr) } } if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1; - regs[i].waswritten=current.waswritten; + //regs[i].waswritten=current.waswritten; } } @@ -8863,8 +8924,8 @@ static noinline void pass10_expire_blocks(void) u_int block_i = expirep / step & (PAGE_COUNT - 1); u_int phase = (expirep >> (base_shift - 1)) & 1u; if (!(expirep & (MAX_OUTPUT_BLOCK_SIZE / 2 - 1))) { - inv_debug("EXP: base_offs %x/%x phase %u\n", base_offs, - out - ndrc->translation_cache, phase); + inv_debug("EXP: base_offs %x/%lx phase %u\n", base_offs, + (long)(out - ndrc->translation_cache), phase); } if (!phase) { From 3033d89853e237b09bcfbc7f1db238335eef4477 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 11 Aug 2022 22:52:54 +0300 Subject: [PATCH 177/597] drc: fix a silly mistake of overwriting a reg --- libpcsxcore/new_dynarec/new_dynarec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index b9e7c1448..1c7085661 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -3179,10 +3179,10 @@ static void do_invstub(int n) set_jump_target(stubs[n].addr, out); save_regs(reglist); - emit_readword(&inv_code_start, 2); - emit_readword(&inv_code_end, 3); if (addrr != 0 || ofs_start != 0) emit_addimm(addrr, ofs_start, 0); + emit_readword(&inv_code_start, 2); + emit_readword(&inv_code_end, 3); if (len != 0) emit_addimm(0, len + 4, (rightr = 1)); emit_cmp(0, 2); From df740cdce57508b93e158391e2331eebacd24cb4 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 14 Aug 2022 00:05:07 +0300 Subject: [PATCH 178/597] gpu_neon: fix wrong block counting --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 6 +++--- plugins/gpu_neon/psx_gpu/psx_gpu_simd.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 80e9f129e..1d513d8be 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -3122,7 +3122,7 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) #define setup_sprite_tile_fetch_texel_block_8bpp(offset) \ - texture_block_ptr = psx_gpu->texture_page_ptr + \ + texture_block_ptr = (u8 *)psx_gpu->texture_page_ptr + \ ((texture_offset + offset) & texture_mask); \ \ load_64b(texels, texture_block_ptr) \ @@ -3230,7 +3230,7 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) #define setup_sprite_tile_half_8bpp(edge) \ { \ - setup_sprite_tile_add_blocks(sub_tile_height * 2); \ + setup_sprite_tile_add_blocks(sub_tile_height); \ \ while(sub_tile_height) \ { \ @@ -3684,7 +3684,7 @@ void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\ u32 num_blocks = psx_gpu->num_blocks; \ block_struct *block = psx_gpu->blocks + num_blocks; \ \ - u16 *texture_block_ptr; \ + u8 *texture_block_ptr; \ vec_8x8u texels; \ \ setup_sprite_tiled_initialize_##texture_mode##x4mode(); \ diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c index 335af35d6..5c05b14a8 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c @@ -2912,7 +2912,7 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, #define setup_sprite_tile_half_8bpp(edge) \ { \ - setup_sprite_tile_add_blocks(sub_tile_height * 2); \ + setup_sprite_tile_add_blocks(sub_tile_height); \ \ while(sub_tile_height) \ { \ @@ -3497,7 +3497,7 @@ void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, void scale2x_tiles8(void * __restrict__ dst_, const void * __restrict__ src_, int w8, int h) { #if 0 - scale2x_tiles8_(dst, src_, w8, h); + scale2x_tiles8_(dst_, src_, w8, h); return; #endif const u16 * __restrict__ src = src_; From 26e3e2aa7525fd4e63e64192dfbb68950e0e4c5a Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 14 Aug 2022 00:07:11 +0300 Subject: [PATCH 179/597] gpu_neon: fix another abi violation before the flush_render_block_buffer call the stack is misaligned, so push an odd num of regs to realign --- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 9d342ae75..da47756ef 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -1600,9 +1600,9 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ vpush { texture_mask }; \ vpush { uvrg_dx4 }; \ \ - stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \ bl flush_render_block_buffer; \ - ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \ \ vpop { uvrg_dx4 }; \ vpop { texture_mask }; \ @@ -1799,9 +1799,9 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ vpush { texture_mask }; \ vpush { uvrg_dx4 }; \ \ - stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \ bl flush_render_block_buffer; \ - ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \ \ vpop { uvrg_dx4 }; \ vpop { texture_mask }; \ From 1c654475393e6595bc00245178ae5bf253e318ef Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 28 Aug 2022 23:51:40 +0300 Subject: [PATCH 180/597] misc: get rid of an unaligned read 96c6ec7055ecef55b3dd221c86b796512bf52107 introduced an unaligned read which is undefined behavior in C, even if most hardware allows it (but some very old ARMs don't). Perf. doesn't matter here so read byte-by-byte. notaz/pcsx_rearmed#261 --- libpcsxcore/misc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 7aa4fef24..c06a8a4fa 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -55,10 +55,11 @@ struct iso_directory_record { char name [1]; }; -void mmssdd( char *b, char *p ) +static void mmssdd( char *b, char *p ) { int m, s, d; - int block = SWAP32(*((uint32_t*) b)); + unsigned char *ub = (void *)b; + int block = (ub[3] << 24) | (ub[2] << 16) | (ub[1] << 8) | ub[0]; block += 150; m = block / 4500; // minutes From 609d9ea53d1b2d4c5398066ca7899179b466040a Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 30 Aug 2022 03:00:51 +0300 Subject: [PATCH 181/597] spu: get rid of iXAPitch it makes no sense after SPU was fully synced to the core, and now it just pulls unwanted windows.h dependency on win32. --- frontend/menu.c | 2 +- plugins/dfsound/xa.c | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/frontend/menu.c b/frontend/menu.c index 4808e9c8c..341a429cc 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1486,7 +1486,7 @@ static menu_entry e_menu_plugin_spu[] = mee_range_h ("Volume boost", 0, volume_boost, -5, 30, h_spu_volboost), mee_onoff ("Reverb", 0, spu_config.iUseReverb, 1), mee_enum ("Interpolation", 0, spu_config.iUseInterpolation, men_spu_interp), - mee_onoff ("Adjust XA pitch", 0, spu_config.iXAPitch, 1), + //mee_onoff ("Adjust XA pitch", 0, spu_config.iXAPitch, 1), mee_onoff_h ("Adjust tempo", 0, spu_config.iTempo, 1, h_spu_tempo), mee_end, }; diff --git a/plugins/dfsound/xa.c b/plugins/dfsound/xa.c index c3658af41..d63e83cc5 100644 --- a/plugins/dfsound/xa.c +++ b/plugins/dfsound/xa.c @@ -94,6 +94,7 @@ INLINE void MixXA(int *SSumLR, int ns_to, int decode_pos) // small linux time helper... only used for watchdog //////////////////////////////////////////////////////////////////////// +#if 0 static unsigned long timeGetTime_spu() { #if defined(NO_OS) @@ -106,6 +107,7 @@ static unsigned long timeGetTime_spu() return tv.tv_sec * 1000 + tv.tv_usec/1000; // to do that, but at least it works #endif } +#endif //////////////////////////////////////////////////////////////////////// // FEED XA @@ -133,6 +135,7 @@ INLINE void FeedXA(xa_decode_t *xap) if(iPlace==0) return; // no place at all //----------------------------------------------------// +#if 0 if(spu_config.iXAPitch) // pitch change option? { static DWORD dwLT=0; @@ -169,6 +172,7 @@ INLINE void FeedXA(xa_decode_t *xap) if(iLastSize) iSize=iLastSize; } } +#endif //----------------------------------------------------// spos=0x10000L; @@ -179,6 +183,7 @@ INLINE void FeedXA(xa_decode_t *xap) uint32_t * pS=(uint32_t *)xap->pcm; uint32_t l=0; +#if 0 if(spu_config.iXAPitch) { int32_t l1,l2;short s; @@ -238,6 +243,7 @@ INLINE void FeedXA(xa_decode_t *xap) } } else +#endif { for(i=0;ipcm; uint32_t l;short s=0; +#if 0 if(spu_config.iXAPitch) { int32_t l1; @@ -337,6 +344,7 @@ INLINE void FeedXA(xa_decode_t *xap) } } else +#endif { for(i=0;i Date: Tue, 30 Aug 2022 02:58:32 +0300 Subject: [PATCH 182/597] plugins: try to untangle the CALLBACK mess on ARM it doesn't matter so calling conventions were never maintained, so just get rid of __stdcall --- frontend/main.c | 2 +- frontend/plugin.c | 13 +++++-------- include/psemu_plugin_defs.h | 4 ++++ libpcsxcore/plugins.h | 15 +-------------- plugins/cdrcimg/cdrcimg.c | 9 ++++----- plugins/dfinput/main.c | 7 ------- plugins/dfsound/stdafx.h | 5 ----- plugins/gpu-gles/gpuExternals.h | 2 +- 8 files changed, 16 insertions(+), 41 deletions(-) diff --git a/frontend/main.c b/frontend/main.c index 234364504..671068d3d 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -712,7 +712,7 @@ void SysRunGui() { printf("SysRunGui\n"); } -static void dummy_lace() +static void CALLBACK dummy_lace() { } diff --git a/frontend/plugin.c b/frontend/plugin.c index d9eb04a40..196c98028 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -15,14 +15,11 @@ #include "../libpcsxcore/system.h" #include "../plugins/cdrcimg/cdrcimg.h" -#ifndef _WIN32 +// this can't be __stdcall like it was in PSEmu API as too many functions are mixed up +#undef CALLBACK #define CALLBACK -#else -#define WIN32_LEAN_AND_MEAN -#include -#endif -static int dummy_func() { +static long CALLBACK dummy_func() { return 0; } @@ -49,7 +46,7 @@ extern void CALLBACK SPUasync(unsigned int, unsigned int); extern int CALLBACK SPUplayCDDAchannel(short *, int); /* PAD */ -static long PADreadPort1(PadDataS *pad) +static long CALLBACK PADreadPort1(PadDataS *pad) { pad->controllerType = in_type1; pad->buttonStatus = ~in_keystate; @@ -62,7 +59,7 @@ static long PADreadPort1(PadDataS *pad) return 0; } -static long PADreadPort2(PadDataS *pad) +static long CALLBACK PADreadPort2(PadDataS *pad) { pad->controllerType = in_type2; pad->buttonStatus = ~in_keystate >> 16; diff --git a/include/psemu_plugin_defs.h b/include/psemu_plugin_defs.h index 998665406..fa626272a 100644 --- a/include/psemu_plugin_defs.h +++ b/include/psemu_plugin_defs.h @@ -5,6 +5,10 @@ extern "C" { #endif +// this can't be __stdcall like it was in PSEmu API as too many functions are mixed up +#undef CALLBACK +#define CALLBACK + // header version #define _PPDK_HEADER_VERSION 3 diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index e3bffc776..34f062e2e 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -25,21 +25,10 @@ extern "C" { #endif #include "psxcommon.h" +#include "psemu_plugin_defs.h" //#define ENABLE_SIO1API 1 -#ifndef _WIN32 - -typedef void* HWND; -#define CALLBACK - -#else - -#define WIN32_LEAN_AND_MEAN -#include - -#endif - typedef long (CALLBACK *GPUopen)(unsigned long *, char *, char *); typedef long (CALLBACK *SPUopen)(void); typedef long (CALLBACK *PADopen)(unsigned long *); @@ -47,8 +36,6 @@ typedef long (CALLBACK *NETopen)(unsigned long *); typedef long (CALLBACK *SIO1open)(unsigned long *); #include "spu.h" - -#include "psemu_plugin_defs.h" #include "decode_xa.h" int LoadPlugins(); diff --git a/plugins/cdrcimg/cdrcimg.c b/plugins/cdrcimg/cdrcimg.c index 91cf1cad1..225451628 100644 --- a/plugins/cdrcimg/cdrcimg.c +++ b/plugins/cdrcimg/cdrcimg.c @@ -12,16 +12,15 @@ #include #include #include -#ifndef _WIN32 -#define CALLBACK +#if !defined(_WIN32) && !defined(NO_DYLIB) #include -#else -#define WIN32_LEAN_AND_MEAN -#include #endif #include "cdrcimg.h" +#undef CALLBACK +#define CALLBACK + #define PFX "cdrcimg: " #define err(f, ...) fprintf(stderr, PFX f, ##__VA_ARGS__) diff --git a/plugins/dfinput/main.c b/plugins/dfinput/main.c index 475ea073a..937f78823 100644 --- a/plugins/dfinput/main.c +++ b/plugins/dfinput/main.c @@ -8,13 +8,6 @@ * See the COPYING file in the top-level directory. */ -#ifndef _WIN32 -#define CALLBACK -#else -#define WIN32_LEAN_AND_MEAN -#include -#endif - #include "main.h" unsigned char CurPad, CurByte, CurCmd, CmdLen; diff --git a/plugins/dfsound/stdafx.h b/plugins/dfsound/stdafx.h index 7e2202906..82b0d7e58 100644 --- a/plugins/dfsound/stdafx.h +++ b/plugins/dfsound/stdafx.h @@ -19,16 +19,11 @@ #include #include -#ifdef _WIN32 -#define WIN32_LEAN_AND_MEAN -#include -#else #undef CALLBACK #define CALLBACK #define DWORD unsigned int #define LOWORD(l) ((unsigned short)(l)) #define HIWORD(l) ((unsigned short)(((unsigned int)(l) >> 16) & 0xFFFF)) -#endif #ifndef INLINE #define INLINE static inline diff --git a/plugins/gpu-gles/gpuExternals.h b/plugins/gpu-gles/gpuExternals.h index 126016727..0a8acf594 100644 --- a/plugins/gpu-gles/gpuExternals.h +++ b/plugins/gpu-gles/gpuExternals.h @@ -288,7 +288,7 @@ extern unsigned char gl_vy[8]; extern OGLVertex vertex[4]; extern short sprtY,sprtX,sprtH,sprtW; #ifdef _WINDOWS -extern HWND hWWindow; +//extern HWND hWWindow; #endif extern BOOL bIsFirstFrame; extern int iWinSize; From af93c8bee18e35d9316d6f64ab4ede52479108d6 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 1 Sep 2022 02:02:21 +0300 Subject: [PATCH 183/597] cdrom: some debugging aid --- libpcsxcore/cdrom.c | 130 ++++++++++++++++++++++++++++++++++---------- libpcsxcore/cdrom.h | 72 ------------------------ 2 files changed, 102 insertions(+), 100 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 00e65a596..1431dac65 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -44,7 +44,75 @@ #endif //#define CDR_LOG_CMD_IRQ -cdrStruct cdr; +static struct { + unsigned char OCUP; + unsigned char Reg1Mode; + unsigned char Reg2; + unsigned char CmdProcess; + unsigned char Ctrl; + unsigned char Stat; + + unsigned char StatP; + + unsigned char Transfer[DATA_SIZE]; + struct { + unsigned char Track; + unsigned char Index; + unsigned char Relative[3]; + unsigned char Absolute[3]; + } subq; + unsigned char TrackChanged; + boolean m_locationChanged; + unsigned char pad1[2]; + unsigned int freeze_ver; + + unsigned char Prev[4]; + unsigned char Param[8]; + unsigned char Result[16]; + + unsigned char ParamC; + unsigned char ParamP; + unsigned char ResultC; + unsigned char ResultP; + unsigned char ResultReady; + unsigned char Cmd; + unsigned char Readed; + unsigned char SetlocPending; + u32 Reading; + + unsigned char ResultTN[6]; + unsigned char ResultTD[4]; + unsigned char SetSectorPlay[4]; + unsigned char SetSectorEnd[4]; + unsigned char SetSector[4]; + unsigned char Track; + boolean Play, Muted; + int CurTrack; + int Mode, File, Channel; + int Reset; + int NoErr; + int FirstSector; + + xa_decode_t Xa; + + int Init; + + u16 Irq; + u8 IrqRepeated; + u32 eCycle; + + u8 Seeked; + + u8 DriveState; + u8 FastForward; + u8 FastBackward; + u8 pad; + + u8 AttenuatorLeftToLeft, AttenuatorLeftToRight; + u8 AttenuatorRightToRight, AttenuatorRightToLeft; + u8 AttenuatorLeftToLeftT, AttenuatorLeftToRightT; + u8 AttenuatorRightToRightT, AttenuatorRightToLeftT; +} cdr; static unsigned char *pTransfer; static s16 read_buf[CD_FRAMESIZE_RAW/2]; @@ -81,7 +149,8 @@ static s16 read_buf[CD_FRAMESIZE_RAW/2]; #define CdlGetQ 29 #define CdlReadToc 30 -char *CmdName[0x100]= { +#ifdef CDR_LOG_CMD_IRQ +static const char * const CmdName[0x100] = { "CdlSync", "CdlNop", "CdlSetloc", "CdlPlay", "CdlForward", "CdlBackward", "CdlReadN", "CdlStandby", "CdlStop", "CdlPause", "CdlReset", "CdlMute", @@ -91,6 +160,7 @@ char *CmdName[0x100]= { "CdlGetclock", "CdlTest", "CdlID", "CdlReadS", "CdlInit", NULL, "CDlReadToc", NULL }; +#endif unsigned char Test04[] = { 0 }; unsigned char Test05[] = { 0 }; @@ -227,10 +297,21 @@ static void sec2msf(unsigned int s, u8 *msf) { cdr.ResultReady = 1; \ } -static void setIrq(void) +static void setIrq(int log_cmd) { if (cdr.Stat & cdr.Reg2) psxHu32ref(0x1070) |= SWAP32((u32)0x4); + +#ifdef CDR_LOG_CMD_IRQ + { + int i; + SysPrintf("CDR IRQ=%d cmd %02x stat %02x: ", + !!(cdr.Stat & cdr.Reg2), log_cmd, cdr.Stat); + for (i = 0; i < cdr.ResultC; i++) + SysPrintf("%02x ", cdr.Result[i]); + SysPrintf("\n"); + } +#endif } // timing used in this function was taken from tests on real hardware @@ -446,7 +527,7 @@ static void cdrPlayInterrupt_Autopause() //cdr.ResultReady = 1; //cdr.Stat = DataReady; cdr.Stat = DataEnd; - setIrq(); + setIrq(0x200); StopCdda(); } @@ -484,7 +565,7 @@ static void cdrPlayInterrupt_Autopause() cdr.Stat = DataReady; SetResultSize(8); - setIrq(); + setIrq(0x201); } } @@ -504,7 +585,7 @@ void cdrPlayInterrupt() cdr.Seeked = SEEK_DONE; if (cdr.Irq == 0) { cdr.Stat = Complete; - setIrq(); + setIrq(0x202); } if (cdr.SetlocPending) { @@ -1087,19 +1168,8 @@ void cdrInterrupt() { } finish: - setIrq(); + setIrq(Irq); cdr.ParamC = 0; - -#ifdef CDR_LOG_CMD_IRQ - { - int i; - SysPrintf("CDR IRQ %d cmd %02x stat %02x: ", - !!(cdr.Stat & cdr.Reg2), Irq, cdr.Stat); - for (i = 0; i < cdr.ResultC; i++) - SysPrintf("%02x ", cdr.Result[i]); - SysPrintf("\n"); - } -#endif } #ifdef HAVE_ARMV7 @@ -1256,7 +1326,7 @@ void cdrReadInterrupt() { if (!(cdr.Mode & MODE_STRSND) || !(cdr.Transfer[4+2] & 0x4)) { cdr.Stat = DataReady; - setIrq(); + setIrq(0x203); } // update for CdlGetlocP @@ -1288,13 +1358,13 @@ unsigned char cdrRead0(void) { // What means the 0x10 and the 0x08 bits? I only saw it used by the bios cdr.Ctrl |= 0x18; - CDR_LOG_IO("cdr r0: %02x\n", cdr.Ctrl); + CDR_LOG_IO("cdr r0.sta: %02x\n", cdr.Ctrl); return psxHu8(0x1800) = cdr.Ctrl; } void cdrWrite0(unsigned char rt) { - CDR_LOG_IO("cdr w0: %02x\n", rt); + CDR_LOG_IO("cdr w0.idx: %02x\n", rt); cdr.Ctrl = (rt & 3) | (cdr.Ctrl & ~3); } @@ -1308,13 +1378,14 @@ unsigned char cdrRead1(void) { if (cdr.ResultP == cdr.ResultC) cdr.ResultReady = 0; - CDR_LOG_IO("cdr r1: %02x\n", psxHu8(0x1801)); + CDR_LOG_IO("cdr r1.rsp: %02x #%u\n", psxHu8(0x1801), cdr.ResultP - 1); return psxHu8(0x1801); } void cdrWrite1(unsigned char rt) { - CDR_LOG_IO("cdr w1: %02x\n", rt); + const char *rnames[] = { "cmd", "smd", "smc", "arr" }; (void)rnames; + CDR_LOG_IO("cdr w1.%s: %02x\n", rnames[cdr.Ctrl & 3], rt); switch (cdr.Ctrl & 3) { case 0: @@ -1332,6 +1403,7 @@ void cdrWrite1(unsigned char rt) { #ifdef CDR_LOG_CMD_IRQ SysPrintf("CD1 write: %x (%s)", rt, CmdName[rt]); if (cdr.ParamC) { + int i; SysPrintf(" Param[%d] = {", cdr.ParamC); for (i = 0; i < cdr.ParamC; i++) SysPrintf(" %x,", cdr.Param[i]); @@ -1384,12 +1456,13 @@ unsigned char cdrRead2(void) { ret = *pTransfer++; } - CDR_LOG_IO("cdr r2: %02x\n", ret); + CDR_LOG_IO("cdr r2.dat: %02x\n", ret); return ret; } void cdrWrite2(unsigned char rt) { - CDR_LOG_IO("cdr w2: %02x\n", rt); + const char *rnames[] = { "prm", "ien", "all", "arl" }; (void)rnames; + CDR_LOG_IO("cdr w2.%s: %02x\n", rnames[cdr.Ctrl & 3], rt); switch (cdr.Ctrl & 3) { case 0: @@ -1398,7 +1471,7 @@ void cdrWrite2(unsigned char rt) { return; case 1: cdr.Reg2 = rt; - setIrq(); + setIrq(0x204); return; case 2: cdr.AttenuatorLeftToLeftT = rt; @@ -1415,12 +1488,13 @@ unsigned char cdrRead3(void) { else psxHu8(0x1803) = cdr.Reg2 | 0xE0; - CDR_LOG_IO("cdr r3: %02x\n", psxHu8(0x1803)); + CDR_LOG_IO("cdr r3.%s: %02x\n", (cdr.Ctrl & 1) ? "ifl" : "ien", psxHu8(0x1803)); return psxHu8(0x1803); } void cdrWrite3(unsigned char rt) { - CDR_LOG_IO("cdr w3: %02x\n", rt); + const char *rnames[] = { "req", "ifl", "alr", "ava" }; (void)rnames; + CDR_LOG_IO("cdr w3.%s: %02x\n", rnames[cdr.Ctrl & 3], rt); switch (cdr.Ctrl & 3) { case 0: diff --git a/libpcsxcore/cdrom.h b/libpcsxcore/cdrom.h index 2ec10545a..e12c39aa6 100644 --- a/libpcsxcore/cdrom.h +++ b/libpcsxcore/cdrom.h @@ -45,78 +45,6 @@ extern "C" { #define SUB_FRAMESIZE 96 -typedef struct { - unsigned char OCUP; - unsigned char Reg1Mode; - unsigned char Reg2; - unsigned char CmdProcess; - unsigned char Ctrl; - unsigned char Stat; - - unsigned char StatP; - - unsigned char Transfer[DATA_SIZE]; - struct { - unsigned char Track; - unsigned char Index; - unsigned char Relative[3]; - unsigned char Absolute[3]; - } subq; - unsigned char TrackChanged; - boolean m_locationChanged; - unsigned char pad1[2]; - unsigned int freeze_ver; - - unsigned char Prev[4]; - unsigned char Param[8]; - unsigned char Result[16]; - - unsigned char ParamC; - unsigned char ParamP; - unsigned char ResultC; - unsigned char ResultP; - unsigned char ResultReady; - unsigned char Cmd; - unsigned char Readed; - unsigned char SetlocPending; - u32 Reading; - - unsigned char ResultTN[6]; - unsigned char ResultTD[4]; - unsigned char SetSectorPlay[4]; - unsigned char SetSectorEnd[4]; - unsigned char SetSector[4]; - unsigned char Track; - boolean Play, Muted; - int CurTrack; - int Mode, File, Channel; - int Reset; - int NoErr; - int FirstSector; - - xa_decode_t Xa; - - int Init; - - u16 Irq; - u8 IrqRepeated; - u32 eCycle; - - u8 Seeked; - - u8 DriveState; - u8 FastForward; - u8 FastBackward; - u8 pad; - - u8 AttenuatorLeftToLeft, AttenuatorLeftToRight; - u8 AttenuatorRightToRight, AttenuatorRightToLeft; - u8 AttenuatorLeftToLeftT, AttenuatorLeftToRightT; - u8 AttenuatorRightToRightT, AttenuatorRightToLeftT; -} cdrStruct; - -extern cdrStruct cdr; - void cdrReset(); void cdrAttenuate(s16 *buf, int samples, int stereo); From 92d79a62c3ce10befde9911e7a9a5e7101bf82e2 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 1 Sep 2022 02:09:49 +0300 Subject: [PATCH 184/597] cdrom: should use the last Setloc loc and SeekL notaz/pcsx_rearmed#253 --- libpcsxcore/cdrom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 1431dac65..5475a6b60 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -985,6 +985,7 @@ void cdrInterrupt() { */ CDRMISC_INT(cdr.Seeked == SEEK_DONE ? 0x800 : cdReadTime * 4); cdr.Seeked = SEEK_PENDING; + memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); start_rotating = 1; break; From f522e63c1182d8a7f1ca7de3b5c06c47a5d22bbb Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 2 Sep 2022 00:47:56 +0300 Subject: [PATCH 185/597] cdrom: try to clean up the seeking mess regressions are likely (this is cdrom code - pcsx's weak spot), so feel free to revert if I'm not responsive --- libpcsxcore/cdrom.c | 254 ++++++++++++++++---------------------------- 1 file changed, 92 insertions(+), 162 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 5475a6b60..5dfa658dc 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -62,8 +62,7 @@ static struct { unsigned char Absolute[3]; } subq; unsigned char TrackChanged; - boolean m_locationChanged; - unsigned char pad1[2]; + unsigned char pad1[3]; unsigned int freeze_ver; unsigned char Prev[4]; @@ -101,7 +100,7 @@ static struct { u8 IrqRepeated; u32 eCycle; - u8 Seeked; + u8 pad2; u8 DriveState; u8 FastForward; @@ -208,19 +207,13 @@ unsigned char Test23[] = { 0x43, 0x58, 0x44, 0x32, 0x39 ,0x34, 0x30, 0x51 }; #define cdReadTime (PSXCLK / 75) enum drive_state { - DRIVESTATE_STANDBY = 0, + DRIVESTATE_STANDBY = 0, // pause, play, read DRIVESTATE_LID_OPEN, DRIVESTATE_RESCAN_CD, DRIVESTATE_PREPARE_CD, DRIVESTATE_STOPPED, }; -// for cdr.Seeked -enum seeked_state { - SEEK_PENDING = 0, - SEEK_DONE = 1, -}; - static struct CdrStat stat; static unsigned int msf2sec(const u8 *msf) { @@ -265,7 +258,7 @@ static void sec2msf(unsigned int s, u8 *msf) { } // cdrPlayInterrupt -#define CDRMISC_INT(eCycle) { \ +#define CDRSEEKPLAY_INT(eCycle) { \ psxRegs.interrupt |= (1 << PSXINT_CDRPLAY); \ psxRegs.intCycle[PSXINT_CDRPLAY].cycle = eCycle; \ psxRegs.intCycle[PSXINT_CDRPLAY].sCycle = psxRegs.cycle; \ @@ -273,22 +266,20 @@ static void sec2msf(unsigned int s, u8 *msf) { } #define StopReading() { \ - if (cdr.Reading) { \ - cdr.Reading = 0; \ - psxRegs.interrupt &= ~(1 << PSXINT_CDREAD); \ - } \ - cdr.StatP &= ~(STATUS_READ|STATUS_SEEK);\ + cdr.Reading = 0; \ + psxRegs.interrupt &= ~(1 << PSXINT_CDREAD); \ } #define StopCdda() { \ - if (cdr.Play) { \ - if (!Config.Cdda) CDR_stop(); \ - cdr.StatP &= ~STATUS_PLAY; \ - cdr.Play = FALSE; \ - cdr.FastForward = 0; \ - cdr.FastBackward = 0; \ - /*SPU_registerCallback( SPUirq );*/ \ - } \ + if (cdr.Play && !Config.Cdda) CDR_stop(); \ + cdr.Play = FALSE; \ + cdr.FastForward = 0; \ + cdr.FastBackward = 0; \ +} + +#define SetPlaySeekRead(x, f) { \ + x &= ~(STATUS_PLAY | STATUS_SEEK | STATUS_READ); \ + x |= f; \ } #define SetResultSize(size) { \ @@ -321,14 +312,15 @@ void cdrLidSeekInterrupt() switch (cdr.DriveState) { default: case DRIVESTATE_STANDBY: - cdr.StatP &= ~STATUS_SEEK; + StopCdda(); + StopReading(); + SetPlaySeekRead(cdr.StatP, 0); if (CDR_getStatus(&stat) == -1) return; if (stat.Status & STATUS_SHELLOPEN) { - StopCdda(); cdr.DriveState = DRIVESTATE_LID_OPEN; CDRLID_INT(0x800); } @@ -340,7 +332,6 @@ void cdrLidSeekInterrupt() // 02, 12, 10 if (!(cdr.StatP & STATUS_SHELLOPEN)) { - StopReading(); cdr.StatP |= STATUS_SHELLOPEN; // could generate error irq here, but real hardware @@ -530,6 +521,7 @@ static void cdrPlayInterrupt_Autopause() setIrq(0x200); StopCdda(); + SetPlaySeekRead(cdr.StatP, 0); } else if (((cdr.Mode & MODE_REPORT) || cdr.FastForward || cdr.FastBackward)) { cdr.Result[0] = cdr.StatP; @@ -569,30 +561,47 @@ static void cdrPlayInterrupt_Autopause() } } +static int cdrSeekTime(unsigned char *target) +{ + int seekTime = abs(msf2sec(cdr.SetSectorPlay) - msf2sec(target)) * (cdReadTime / 200); + /* + * Gameblabla : + * It was originally set to 1000000 for Driver, however it is not high enough for Worms Pinball + * and was unreliable for that game. + * I also tested it against Mednafen and Driver's titlescreen music starts 25 frames later, not immediatly. + * + * Obviously, this isn't perfect but right now, it should be a bit better. + * Games to test this against if you change that setting : + * - Driver (titlescreen music delay and retry mission) + * - Worms Pinball (Will either not boot or crash in the memory card screen) + * - Viewpoint (short pauses if the delay in the ingame music is too long) + * + * It seems that 3386880 * 5 is too much for Driver's titlescreen and it starts skipping. + * However, 1000000 is not enough for Worms Pinball to reliably boot. + */ + if(seekTime > 3386880 * 2) seekTime = 3386880 * 2; + CDR_LOG("seek: %.2f %.2f\n", (float)seekTime / PSXCLK, (float)seekTime / cdReadTime); + return seekTime; +} + // also handles seek void cdrPlayInterrupt() { - if (cdr.Seeked == SEEK_PENDING) { + if (cdr.StatP & STATUS_SEEK) { if (cdr.Stat) { CDR_LOG_I("cdrom: seek stat hack\n"); - CDRMISC_INT(0x1000); + CDRSEEKPLAY_INT(0x1000); return; } SetResultSize(1); cdr.StatP |= STATUS_ROTATING; - cdr.StatP &= ~STATUS_SEEK; + SetPlaySeekRead(cdr.StatP, cdr.Play ? STATUS_PLAY : 0); cdr.Result[0] = cdr.StatP; - cdr.Seeked = SEEK_DONE; if (cdr.Irq == 0) { cdr.Stat = Complete; setIrq(0x202); } - if (cdr.SetlocPending) { - memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); - cdr.SetlocPending = 0; - cdr.m_locationChanged = TRUE; - } Find_CurTrack(cdr.SetSectorPlay); ReadTrack(cdr.SetSectorPlay); cdr.TrackChanged = FALSE; @@ -605,6 +614,7 @@ void cdrPlayInterrupt() if (memcmp(cdr.SetSectorPlay, cdr.SetSectorEnd, 3) == 0) { StopCdda(); + SetPlaySeekRead(cdr.StatP, 0); cdr.TrackChanged = TRUE; } else { @@ -630,15 +640,7 @@ void cdrPlayInterrupt() } } - if (cdr.m_locationChanged) - { - CDRMISC_INT(cdReadTime * 30); - cdr.m_locationChanged = FALSE; - } - else - { - CDRMISC_INT(cdReadTime); - } + CDRSEEKPLAY_INT(cdReadTime); // update for CdlGetlocP/autopause generate_subq(cdr.SetSectorPlay); @@ -698,15 +700,7 @@ void cdrInterrupt() { else { for (i = 0; i < 3; i++) - { set_loc[i] = btoi(cdr.Param[i]); - } - - i = msf2sec(cdr.SetSectorPlay); - i = abs(i - msf2sec(set_loc)); - if (i > 16) - cdr.Seeked = SEEK_PENDING; - memcpy(cdr.SetSector, set_loc, 3); cdr.SetSector[3] = 0; cdr.SetlocPending = 1; @@ -716,29 +710,15 @@ void cdrInterrupt() { do_CdlPlay: case CdlPlay: StopCdda(); - if (cdr.Seeked == SEEK_PENDING) { - // XXX: wrong, should seek instead.. - cdr.Seeked = SEEK_DONE; - } - + StopReading(); + cdr.FastBackward = 0; cdr.FastForward = 0; - if (cdr.SetlocPending) { - memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); - cdr.SetlocPending = 0; - cdr.m_locationChanged = TRUE; - } - // BIOS CD Player // - Pause player, hit Track 01/02/../xx (Setloc issued!!) - if (cdr.ParamC == 0 || cdr.Param[0] == 0) { - CDR_LOG("PLAY Resume @ %d:%d:%d\n", - cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); - } - else - { + if (cdr.ParamC != 0 && cdr.Param[0] != 0) { int track = btoi( cdr.Param[0] ); if (track <= cdr.ResultTN[1]) @@ -747,11 +727,21 @@ void cdrInterrupt() { CDR_LOG("PLAY track %d\n", cdr.CurTrack); if (CDR_getTD((u8)cdr.CurTrack, cdr.ResultTD) != -1) { - cdr.SetSectorPlay[0] = cdr.ResultTD[2]; - cdr.SetSectorPlay[1] = cdr.ResultTD[1]; - cdr.SetSectorPlay[2] = cdr.ResultTD[0]; + for (i = 0; i < 3; i++) + set_loc[i] = cdr.ResultTD[2 - i]; + seekTime = cdrSeekTime(set_loc); + memcpy(cdr.SetSectorPlay, set_loc, 3); } } + else if (cdr.SetlocPending) { + seekTime = cdrSeekTime(cdr.SetSector); + memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); + } + else { + CDR_LOG("PLAY Resume @ %d:%d:%d\n", + cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); + } + cdr.SetlocPending = 0; /* Rayman: detect track changes @@ -767,20 +757,15 @@ void cdrInterrupt() { ReadTrack(cdr.SetSectorPlay); cdr.TrackChanged = FALSE; - StopReading(); if (!Config.Cdda) CDR_play(cdr.SetSectorPlay); - // Vib Ribbon: gameplay checks flag - cdr.StatP &= ~STATUS_SEEK; - cdr.Result[0] = cdr.StatP; - - cdr.StatP |= STATUS_PLAY; + SetPlaySeekRead(cdr.StatP, STATUS_SEEK | STATUS_ROTATING); // BIOS player - set flag again cdr.Play = TRUE; - CDRMISC_INT( cdReadTime ); + CDRSEEKPLAY_INT(cdReadTime + seekTime); start_rotating = 1; break; @@ -826,6 +811,8 @@ void cdrInterrupt() { StopCdda(); StopReading(); + SetPlaySeekRead(cdr.StatP, 0); + cdr.StatP &= ~STATUS_ROTATING; delay = 0x800; if (cdr.DriveState == DRIVESTATE_STANDBY) @@ -836,12 +823,12 @@ void cdrInterrupt() { break; case CdlStop + 0x100: - cdr.StatP &= ~STATUS_ROTATING; - cdr.Result[0] = cdr.StatP; cdr.Stat = Complete; break; case CdlPause: + StopCdda(); + StopReading(); /* Gundam Battle Assault 2: much slower (*) - Fixes boot, gameplay @@ -857,26 +844,27 @@ void cdrInterrupt() { * Mednafen's timing don't work for Gundam Battle Assault 2 in PAL/50hz mode, * seems to be timing sensitive as it can depend on the CPU's clock speed. * */ - if (cdr.DriveState == DRIVESTATE_STANDBY) + if (!(cdr.StatP & (STATUS_PLAY | STATUS_READ))) { delay = 7000; } else { delay = (((cdr.Mode & MODE_SPEED) ? 2 : 1) * (1000000)); - CDRMISC_INT((cdr.Mode & MODE_SPEED) ? cdReadTime / 2 : cdReadTime); } AddIrqQueue(CdlPause + 0x100, delay); + SetPlaySeekRead(cdr.StatP, 0); cdr.Ctrl |= 0x80; break; case CdlPause + 0x100: - cdr.StatP &= ~STATUS_READ; - cdr.Result[0] = cdr.StatP; cdr.Stat = Complete; break; case CdlReset: + StopCdda(); + StopReading(); + SetPlaySeekRead(cdr.StatP, 0); cdr.Muted = FALSE; cdr.Mode = 0x20; /* This fixes This is Football 2, Pooh's Party lockups */ AddIrqQueue(CdlReset + 0x100, 4100000); @@ -968,8 +956,10 @@ void cdrInterrupt() { case CdlSeekP: StopCdda(); StopReading(); - cdr.StatP |= STATUS_SEEK; + SetPlaySeekRead(cdr.StatP, STATUS_SEEK); + seekTime = cdrSeekTime(cdr.SetSector); + memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); /* Crusaders of Might and Magic = 0.5x-4x - fix cutscene speech start @@ -983,9 +973,7 @@ void cdrInterrupt() { Rockman X5 = 0.5-4x - fix capcom logo */ - CDRMISC_INT(cdr.Seeked == SEEK_DONE ? 0x800 : cdReadTime * 4); - cdr.Seeked = SEEK_PENDING; - memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); + CDRSEEKPLAY_INT(cdReadTime + seekTime); start_rotating = 1; break; @@ -1035,6 +1023,9 @@ void cdrInterrupt() { break; case CdlInit: + StopCdda(); + StopReading(); + SetPlaySeekRead(cdr.StatP, 0); // yes, it really sets STATUS_SHELLOPEN cdr.StatP |= STATUS_SHELLOPEN; cdr.DriveState = DRIVESTATE_RESCAN_CD; @@ -1060,34 +1051,18 @@ void cdrInterrupt() { case CdlReadN: case CdlReadS: - if (cdr.SetlocPending) { - seekTime = abs(msf2sec(cdr.SetSectorPlay) - msf2sec(cdr.SetSector)) * (cdReadTime / 200); - /* - * Gameblabla : - * It was originally set to 1000000 for Driver, however it is not high enough for Worms Pinball - * and was unreliable for that game. - * I also tested it against Mednafen and Driver's titlescreen music starts 25 frames later, not immediatly. - * - * Obviously, this isn't perfect but right now, it should be a bit better. - * Games to test this against if you change that setting : - * - Driver (titlescreen music delay and retry mission) - * - Worms Pinball (Will either not boot or crash in the memory card screen) - * - Viewpoint (short pauses if the delay in the ingame music is too long) - * - * It seems that 3386880 * 5 is too much for Driver's titlescreen and it starts skipping. - * However, 1000000 is not enough for Worms Pinball to reliably boot. - */ - if(seekTime > 3386880 * 2) seekTime = 3386880 * 2; - memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); - cdr.SetlocPending = 0; - cdr.m_locationChanged = TRUE; - } - Find_CurTrack(cdr.SetSectorPlay); + Find_CurTrack(cdr.SetlocPending ? cdr.SetSector : cdr.SetSectorPlay); if ((cdr.Mode & MODE_CDDA) && cdr.CurTrack > 1) // Read* acts as play for cdda tracks in cdda mode goto do_CdlPlay; + StopCdda(); + if (cdr.SetlocPending) { + seekTime = cdrSeekTime(cdr.SetSector); + memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); + cdr.SetlocPending = 0; + } cdr.Reading = 1; cdr.FirstSector = 1; @@ -1128,12 +1103,9 @@ void cdrInterrupt() { Gameblabla additional notes : This still needs the "+ seekTime" that PCSX Redux doesn't have for the Driver "retry" mission error. */ - cdr.StatP |= STATUS_READ; - cdr.StatP &= ~STATUS_SEEK; - CDREAD_INT(((cdr.Mode & 0x80) ? (cdReadTime) : cdReadTime * 2) + seekTime); - cdr.Result[0] = cdr.StatP; + SetPlaySeekRead(cdr.StatP, STATUS_SEEK); start_rotating = 1; break; case CdlSync: @@ -1229,16 +1201,14 @@ void cdrReadInterrupt() { if (cdr.Irq || cdr.Stat) { CDR_LOG_I("cdrom: read stat hack %02x %x\n", cdr.Irq, cdr.Stat); - CDREAD_INT(0x1000); + CDREAD_INT(2048); return; } cdr.OCUP = 1; SetResultSize(1); - cdr.StatP |= STATUS_READ|STATUS_ROTATING; - cdr.StatP &= ~STATUS_SEEK; + SetPlaySeekRead(cdr.StatP, STATUS_READ | STATUS_ROTATING); cdr.Result[0] = cdr.StatP; - cdr.Seeked = SEEK_DONE; ReadTrack(cdr.SetSectorPlay); @@ -1279,19 +1249,6 @@ void cdrReadInterrupt() { int ret = xa_decode_sector(&cdr.Xa, cdr.Transfer+4, cdr.FirstSector); if (!ret) { cdrAttenuate(cdr.Xa.pcm, cdr.Xa.nsamples, cdr.Xa.stereo); - /* - * Gameblabla - - * This is a hack for Megaman X4, Castlevania etc... - * that regressed from the new m_locationChanged and CDROM timings changes. - * It is mostly noticeable in Castevania however and the stuttering can be very jarring. - * - * According to PCSX redux authors, we shouldn't cause a location change if - * the sector difference is too small. - * I attempted to go with that approach but came empty handed. - * So for now, let's just set cdr.m_locationChanged to false when playing back any ADPCM samples. - * This does not regress Crash Team Racing's intro at least. - */ - cdr.m_locationChanged = FALSE; SPU_playADPCMchannel(&cdr.Xa); cdr.FirstSector = 0; } @@ -1311,13 +1268,7 @@ void cdrReadInterrupt() { cdr.Readed = 0; - uint32_t delay = (cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime; - if (cdr.m_locationChanged) { - CDREAD_INT(delay * 30); - cdr.m_locationChanged = FALSE; - } else { - CDREAD_INT(delay); - } + CDREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime); /* Croc 2: $40 - only FORM1 (*) @@ -1420,30 +1371,10 @@ void cdrWrite1(unsigned char rt) { AddIrqQueue(cdr.Cmd, 0x800); switch (cdr.Cmd) { - - case CdlReadN: - case CdlReadS: - case CdlPause: - StopCdda(); - StopReading(); - break; - - case CdlInit: - case CdlReset: - cdr.Seeked = SEEK_DONE; - StopCdda(); - StopReading(); - break; - case CdlSetmode: CDR_LOG("cdrWrite1() Log: Setmode %x\n", cdr.Param[0]); cdr.Mode = cdr.Param[0]; - - // Squaresoft on PlayStation 1998 Collector's CD Vol. 1 - // - fixes choppy movie sound - if( cdr.Play && (cdr.Mode & MODE_CDDA) == 0 ) - StopCdda(); break; } } @@ -1711,6 +1642,5 @@ int cdrFreeze(void *f, int Mode) { void LidInterrupt() { getCdInfo(); - StopCdda(); cdrLidSeekInterrupt(); } From d9a0249331e3f0da8820e158baaafa69e3a534ac Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 2 Sep 2022 23:13:44 +0300 Subject: [PATCH 186/597] cdrom: get rid of cdrPlayInterrupt unify with cdrReadInterrupt and name it cdrPlaySeekReadInterrupt ... althoug these are not actually interrupts, more like events, but keep this weird PCSX's naming convention --- libpcsxcore/cdrom.c | 60 ++++++++++++++++---------------- libpcsxcore/cdrom.h | 12 +++---- libpcsxcore/new_dynarec/emu_if.c | 8 +++-- libpcsxcore/r3000a.c | 8 +---- libpcsxcore/r3000a.h | 2 +- 5 files changed, 43 insertions(+), 47 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 5dfa658dc..2b30e89d4 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -241,8 +241,8 @@ static void sec2msf(unsigned int s, u8 *msf) { new_dyna_set_event(PSXINT_CDR, eCycle); \ } -// cdrReadInterrupt -#define CDREAD_INT(eCycle) { \ +// cdrPlaySeekReadInterrupt +#define CDRPLAYSEEKREAD_INT(eCycle) { \ psxRegs.interrupt |= (1 << PSXINT_CDREAD); \ psxRegs.intCycle[PSXINT_CDREAD].cycle = eCycle; \ psxRegs.intCycle[PSXINT_CDREAD].sCycle = psxRegs.cycle; \ @@ -257,14 +257,6 @@ static void sec2msf(unsigned int s, u8 *msf) { new_dyna_set_event(PSXINT_CDRLID, eCycle); \ } -// cdrPlayInterrupt -#define CDRSEEKPLAY_INT(eCycle) { \ - psxRegs.interrupt |= (1 << PSXINT_CDRPLAY); \ - psxRegs.intCycle[PSXINT_CDRPLAY].cycle = eCycle; \ - psxRegs.intCycle[PSXINT_CDRPLAY].sCycle = psxRegs.cycle; \ - new_dyna_set_event(PSXINT_CDRPLAY, eCycle); \ -} - #define StopReading() { \ cdr.Reading = 0; \ psxRegs.interrupt &= ~(1 << PSXINT_CDREAD); \ @@ -307,7 +299,7 @@ static void setIrq(int log_cmd) // timing used in this function was taken from tests on real hardware // (yes it's slow, but you probably don't want to modify it) -void cdrLidSeekInterrupt() +void cdrLidSeekInterrupt(void) { switch (cdr.DriveState) { default: @@ -584,18 +576,24 @@ static int cdrSeekTime(unsigned char *target) return seekTime; } -// also handles seek -void cdrPlayInterrupt() +static void cdrReadInterrupt(void); + +void cdrPlaySeekReadInterrupt(void) { - if (cdr.StatP & STATUS_SEEK) { + if (cdr.Reading) { + cdrReadInterrupt(); + return; + } + + if (!cdr.Play && (cdr.StatP & STATUS_SEEK)) { if (cdr.Stat) { CDR_LOG_I("cdrom: seek stat hack\n"); - CDRSEEKPLAY_INT(0x1000); + CDRPLAYSEEKREAD_INT(0x1000); return; } SetResultSize(1); cdr.StatP |= STATUS_ROTATING; - SetPlaySeekRead(cdr.StatP, cdr.Play ? STATUS_PLAY : 0); + SetPlaySeekRead(cdr.StatP, 0); cdr.Result[0] = cdr.StatP; if (cdr.Irq == 0) { cdr.Stat = Complete; @@ -605,6 +603,7 @@ void cdrPlayInterrupt() Find_CurTrack(cdr.SetSectorPlay); ReadTrack(cdr.SetSectorPlay); cdr.TrackChanged = FALSE; + return; } if (!cdr.Play) return; @@ -612,6 +611,7 @@ void cdrPlayInterrupt() CDR_LOG( "CDDA - %d:%d:%d\n", cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2] ); + SetPlaySeekRead(cdr.StatP, STATUS_PLAY); if (memcmp(cdr.SetSectorPlay, cdr.SetSectorEnd, 3) == 0) { StopCdda(); SetPlaySeekRead(cdr.StatP, 0); @@ -640,13 +640,13 @@ void cdrPlayInterrupt() } } - CDRSEEKPLAY_INT(cdReadTime); + CDRPLAYSEEKREAD_INT(cdReadTime); // update for CdlGetlocP/autopause generate_subq(cdr.SetSectorPlay); } -void cdrInterrupt() { +void cdrInterrupt(void) { u16 Irq = cdr.Irq; int no_busy_error = 0; int start_rotating = 0; @@ -765,7 +765,7 @@ void cdrInterrupt() { // BIOS player - set flag again cdr.Play = TRUE; - CDRSEEKPLAY_INT(cdReadTime + seekTime); + CDRPLAYSEEKREAD_INT(cdReadTime + seekTime); start_rotating = 1; break; @@ -973,7 +973,7 @@ void cdrInterrupt() { Rockman X5 = 0.5-4x - fix capcom logo */ - CDRSEEKPLAY_INT(cdReadTime + seekTime); + CDRPLAYSEEKREAD_INT(cdReadTime + seekTime); start_rotating = 1; break; @@ -1103,7 +1103,7 @@ void cdrInterrupt() { Gameblabla additional notes : This still needs the "+ seekTime" that PCSX Redux doesn't have for the Driver "retry" mission error. */ - CDREAD_INT(((cdr.Mode & 0x80) ? (cdReadTime) : cdReadTime * 2) + seekTime); + CDRPLAYSEEKREAD_INT(((cdr.Mode & 0x80) ? (cdReadTime) : cdReadTime * 2) + seekTime); SetPlaySeekRead(cdr.StatP, STATUS_SEEK); start_rotating = 1; @@ -1193,15 +1193,13 @@ void cdrAttenuate(s16 *buf, int samples, int stereo) } } -void cdrReadInterrupt() { +static void cdrReadInterrupt(void) +{ u8 *buf; - if (!cdr.Reading) - return; - if (cdr.Irq || cdr.Stat) { CDR_LOG_I("cdrom: read stat hack %02x %x\n", cdr.Irq, cdr.Stat); - CDREAD_INT(2048); + CDRPLAYSEEKREAD_INT(2048); return; } @@ -1221,7 +1219,7 @@ void cdrReadInterrupt() { memset(cdr.Transfer, 0, DATA_SIZE); cdr.Stat = DiskError; cdr.Result[0] |= STATUS_ERROR; - CDREAD_INT((cdr.Mode & 0x80) ? (cdReadTime / 2) : cdReadTime); + setIrq(0x205); return; } @@ -1268,7 +1266,7 @@ void cdrReadInterrupt() { cdr.Readed = 0; - CDREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime); + CDRPLAYSEEKREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime); /* Croc 2: $40 - only FORM1 (*) @@ -1544,7 +1542,7 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { DMA_INTERRUPT(3); } -void cdrDmaInterrupt() +void cdrDmaInterrupt(void) { if (HW_DMA3_CHCR & SWAP32(0x01000000)) { @@ -1620,6 +1618,8 @@ int cdrFreeze(void *f, int Mode) { Find_CurTrack(cdr.SetSectorPlay); if (!Config.Cdda) CDR_play(cdr.SetSectorPlay); + if (psxRegs.interrupt & (1 << PSXINT_CDRPLAY_OLD)) + CDRPLAYSEEKREAD_INT((cdr.Mode & 0x80) ? (cdReadTime / 2) : cdReadTime); } if ((cdr.freeze_ver & 0xffffff00) != 0x63647200) { @@ -1640,7 +1640,7 @@ int cdrFreeze(void *f, int Mode) { return 0; } -void LidInterrupt() { +void LidInterrupt(void) { getCdInfo(); cdrLidSeekInterrupt(); } diff --git a/libpcsxcore/cdrom.h b/libpcsxcore/cdrom.h index e12c39aa6..0cd6c5f6e 100644 --- a/libpcsxcore/cdrom.h +++ b/libpcsxcore/cdrom.h @@ -48,13 +48,11 @@ extern "C" { void cdrReset(); void cdrAttenuate(s16 *buf, int samples, int stereo); -void cdrInterrupt(); -void cdrReadInterrupt(); -void cdrRepplayInterrupt(); -void cdrLidSeekInterrupt(); -void cdrPlayInterrupt(); -void cdrDmaInterrupt(); -void LidInterrupt(); +void cdrInterrupt(void); +void cdrPlaySeekReadInterrupt(void); +void cdrLidSeekInterrupt(void); +void cdrDmaInterrupt(void); +void LidInterrupt(void); unsigned char cdrRead0(void); unsigned char cdrRead1(void); unsigned char cdrRead2(void); diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index cc0bcdf1a..bf64e0f0d 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -46,12 +46,16 @@ static void schedule_timeslice(void) next_interupt = c + min; } +static void unusedInterrupt() +{ +} + typedef void (irq_func)(); static irq_func * const irq_funcs[] = { [PSXINT_SIO] = sioInterrupt, [PSXINT_CDR] = cdrInterrupt, - [PSXINT_CDREAD] = cdrReadInterrupt, + [PSXINT_CDREAD] = cdrPlaySeekReadInterrupt, [PSXINT_GPUDMA] = gpuInterrupt, [PSXINT_MDECOUTDMA] = mdec1Interrupt, [PSXINT_SPUDMA] = spuInterrupt, @@ -59,7 +63,7 @@ static irq_func * const irq_funcs[] = { [PSXINT_GPUOTCDMA] = gpuotcInterrupt, [PSXINT_CDRDMA] = cdrDmaInterrupt, [PSXINT_CDRLID] = cdrLidSeekInterrupt, - [PSXINT_CDRPLAY] = cdrPlayInterrupt, + [PSXINT_CDRPLAY_OLD] = unusedInterrupt, [PSXINT_SPU_UPDATE] = spuUpdate, [PSXINT_RCNT] = psxRcntUpdate, }; diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 7e6f16b48..a9b800e39 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -139,7 +139,7 @@ void psxBranchTest() { if (psxRegs.interrupt & (1 << PSXINT_CDREAD)) { // cdr read if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_CDREAD].sCycle) >= psxRegs.intCycle[PSXINT_CDREAD].cycle) { psxRegs.interrupt &= ~(1 << PSXINT_CDREAD); - cdrReadInterrupt(); + cdrPlaySeekReadInterrupt(); } } if (psxRegs.interrupt & (1 << PSXINT_GPUDMA)) { // gpu dma @@ -178,12 +178,6 @@ void psxBranchTest() { cdrDmaInterrupt(); } } - if (psxRegs.interrupt & (1 << PSXINT_CDRPLAY)) { // cdr play timing - if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_CDRPLAY].sCycle) >= psxRegs.intCycle[PSXINT_CDRPLAY].cycle) { - psxRegs.interrupt &= ~(1 << PSXINT_CDRPLAY); - cdrPlayInterrupt(); - } - } if (psxRegs.interrupt & (1 << PSXINT_CDRLID)) { // cdr lid states if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_CDRLID].sCycle) >= psxRegs.intCycle[PSXINT_CDRLID].cycle) { psxRegs.interrupt &= ~(1 << PSXINT_CDRLID); diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 7d8e260ce..ea6f0e75f 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -164,7 +164,7 @@ enum { PSXINT_NEWDRC_CHECK, PSXINT_RCNT, PSXINT_CDRLID, - PSXINT_CDRPLAY, + PSXINT_CDRPLAY_OLD, /* unused */ PSXINT_SPU_UPDATE, PSXINT_COUNT }; From d0ea0d8aac524732bddf244f73e57ade28a442c5 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 3 Sep 2022 01:54:28 +0300 Subject: [PATCH 187/597] cdrom: take a minor change from libretro --- libpcsxcore/cdrom.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 2b30e89d4..bc5e2ae78 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1018,7 +1018,8 @@ void cdrInterrupt(void) { } cdr.Result[0] |= (cdr.Result[1] >> 4) & 0x08; - strncpy((char *)&cdr.Result[4], "PCSX", 4); + /* This adds the string "PCSX" in Playstation bios boot screen */ + memcpy((char *)&cdr.Result[4], "PCSX", 4); cdr.Stat = Complete; break; From 0c49c8ae1e4b96792f77aa2868c4865b172d7b42 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 3 Sep 2022 18:33:09 +0300 Subject: [PATCH 188/597] cdrom: remove another hack The hack just makes Worms Pinball unstable, and Crusaders of Might and Magic no longer seems to need it. --- libpcsxcore/cdrom.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index bc5e2ae78..84b05a419 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1071,15 +1071,6 @@ void cdrInterrupt(void) { // - fixes new game ReadTrack(cdr.SetSectorPlay); - - // Crusaders of Might and Magic - update getlocl now - // - fixes cutscene speech - { - u8 *buf = CDR_getBuffer(); - if (buf != NULL) - memcpy(cdr.Transfer, buf, 8); - } - /* Duke Nukem: Land of the Babes - seek then delay read for one frame - fixes cutscenes From e9b207f1b9c79001aaa75abfc4cec353f006a57d Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 3 Sep 2022 18:37:53 +0300 Subject: [PATCH 189/597] cdrom: remove confusing comment No need for a wall-of-text comment for something as basic as seeking. It also said "disabling it with `&& false` for now" when there is no such code here. --- libpcsxcore/cdrom.c | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 84b05a419..07b13649c 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1071,30 +1071,6 @@ void cdrInterrupt(void) { // - fixes new game ReadTrack(cdr.SetSectorPlay); - /* - Duke Nukem: Land of the Babes - seek then delay read for one frame - - fixes cutscenes - C-12 - Final Resistance - doesn't like seek - */ - - /* - By nicolasnoble from PCSX Redux : - "It LOOKS like this logic is wrong, therefore disabling it with `&& false` for now. - For "PoPoLoCrois Monogatari II", the game logic will soft lock and will never issue GetLocP to detect - the end of its XA streams, as it seems to assume ReadS will not return a status byte with the SEEK - flag set. I think the reasonning is that since it's invalid to call GetLocP while seeking, the game - tries to protect itself against errors by preventing from issuing a GetLocP while it knows the - last status was "seek". But this makes the logic just softlock as it'll never get a notification - about the fact the drive is done seeking and the read actually started. - - In other words, this state machine here is probably wrong in assuming the response to ReadS/ReadN is - done right away. It's rather when it's done seeking, and the read has actually started. This probably - requires a bit more work to make sure seek delays are processed properly. - Checked with a few games, this seems to work fine." - - Gameblabla additional notes : - This still needs the "+ seekTime" that PCSX Redux doesn't have for the Driver "retry" mission error. - */ CDRPLAYSEEKREAD_INT(((cdr.Mode & 0x80) ? (cdReadTime) : cdReadTime * 2) + seekTime); SetPlaySeekRead(cdr.StatP, STATUS_SEEK); From f3fa20c2fc7cff9352b5bcbe23e8b682bdcf0b2c Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 5 Sep 2022 00:02:28 +0300 Subject: [PATCH 190/597] spu: remove some strange rounding Seems wrong. Also deal with now possible div by 0 in scan_for_irq(). --- plugins/dfsound/registers.c | 9 ++++----- plugins/dfsound/spu.c | 2 ++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index e0693064d..61d0b810f 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -484,11 +484,10 @@ static void SetPitch(int ch,unsigned short val) // SET PITCH if(val>0x3fff) NP=0x3fff; // get pitch val else NP=val; - spu.s_chan[ch].iRawPitch=NP; - spu.s_chan[ch].sinc=(NP<<4)|8; - spu.s_chan[ch].sinc_inv=0; - if (spu_config.iUseInterpolation == 1) - spu.SB[ch * SB_SIZE + 32] = 1; // -> freq change in simple interpolation mode: set flag + spu.s_chan[ch].iRawPitch = NP; + spu.s_chan[ch].sinc = NP << 4; + spu.s_chan[ch].sinc_inv = 0; + spu.SB[ch * SB_SIZE + 32] = 1; // -> freq change in simple interpolation mode: set flag } //////////////////////////////////////////////////////////////////////// diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index b0b083dd2..35d890d97 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1230,6 +1230,8 @@ void schedule_next_irq(void) if ((unsigned long)(spu.pSpuIrq - spu.s_chan[ch].pCurr) > IRQ_NEAR_BLOCKS * 16 && (unsigned long)(spu.pSpuIrq - spu.s_chan[ch].pLoop) > IRQ_NEAR_BLOCKS * 16) continue; + if (spu.s_chan[ch].sinc == 0) + continue; scan_for_irq(ch, &upd_samples); } From 5aa94fa080e1b0a661b23aa912022dd464d41110 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 5 Sep 2022 00:40:58 +0300 Subject: [PATCH 191/597] spu: rename dwChannelOn to something more suitable --- plugins/dfsound/adsr.h | 19 ------------------- plugins/dfsound/externals.h | 2 +- plugins/dfsound/freeze.c | 8 ++++---- plugins/dfsound/registers.c | 6 +++++- plugins/dfsound/spu.c | 20 +++++++++++--------- 5 files changed, 21 insertions(+), 34 deletions(-) delete mode 100644 plugins/dfsound/adsr.h diff --git a/plugins/dfsound/adsr.h b/plugins/dfsound/adsr.h deleted file mode 100644 index ff2af1ff6..000000000 --- a/plugins/dfsound/adsr.h +++ /dev/null @@ -1,19 +0,0 @@ -/*************************************************************************** - adsr.h - description - ------------------- - begin : Wed May 15 2002 - copyright : (C) 2002 by Pete Bernert - email : BlackDove@addcom.de - ***************************************************************************/ -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. See also the license.txt file for * - * additional informations. * - * * - ***************************************************************************/ - -INLINE void StartADSR(int ch); -INLINE int MixADSR(int ch); diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index 5ec941525..094266471 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -194,7 +194,7 @@ typedef struct unsigned int dwNoiseVal; // global noise generator unsigned int dwNoiseCount; unsigned int dwNewChannel; // flags for faster testing, if new channel starts - unsigned int dwChannelOn; // not silent channels + unsigned int dwChannelsAudible; // not silent channels unsigned int dwChannelDead; // silent+not useful channels unsigned char * pSpuBuffer; diff --git a/plugins/dfsound/freeze.c b/plugins/dfsound/freeze.c index 3bdbab1bd..a9843aeb2 100644 --- a/plugins/dfsound/freeze.c +++ b/plugins/dfsound/freeze.c @@ -148,7 +148,7 @@ static void save_channel(SPUCHAN_orig *d, const SPUCHAN *s, int ch) d->iStart = (regAreaGet(ch,6)&~1)<<3; d->iCurr = 0; // set by the caller d->iLoop = 0; // set by the caller - d->bOn = !!(spu.dwChannelOn & (1<bOn = !!(spu.dwChannelsAudible & (1<bStop = s->ADSRX.State == ADSR_RELEASE; d->bReverb = s->bReverb; d->iActFreq = 1; @@ -209,7 +209,7 @@ static void load_channel(SPUCHAN *d, const SPUCHAN_orig *s, int ch) d->ADSRX.ReleaseModeExp = s->ADSRX.ReleaseModeExp; d->ADSRX.ReleaseRate = s->ADSRX.ReleaseRate; d->ADSRX.EnvelopeVol = s->ADSRX.EnvelopeVol; - if (s->bOn) spu.dwChannelOn |= 1<bOn) spu.dwChannelsAudible |= 1<ADSRX.EnvelopeVol = 0; } @@ -334,7 +334,7 @@ void LoadStateV5(SPUFreeze_t * pF) spu.decode_pos = pFO->decode_pos & 0x1ff; spu.dwNewChannel=0; - spu.dwChannelOn=0; + spu.dwChannelsAudible=0; spu.dwChannelDead=0; for(i=0;i>4)-0xc0; if(spu.dwNewChannel&(1<>16); @@ -488,6 +488,10 @@ static void SetPitch(int ch,unsigned short val) // SET PITCH spu.s_chan[ch].sinc = NP << 4; spu.s_chan[ch].sinc_inv = 0; spu.SB[ch * SB_SIZE + 32] = 1; // -> freq change in simple interpolation mode: set flag + if (val) + spu.dwChannelsAudible |= 1u << ch; + else + spu.dwChannelsAudible &= ~(1u << ch); } //////////////////////////////////////////////////////////////////////// diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 35d890d97..de7d2f250 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -250,8 +250,9 @@ static void StartSoundMain(int ch) s_chan->pCurr = spu.spuMemC+((regAreaGet(ch,6)&~1)<<3); spu.dwNewChannel&=~(1<iRawPitch) + spu.dwChannelsAudible|=1<>= 1) // loop em all... { if (!(mask & 1)) continue; // channel not playing? next @@ -800,7 +801,8 @@ static void do_channels(int ns_to) d = MixADSR(&s_chan->ADSRX, d); if (d < ns_to) { - spu.dwChannelOn &= ~(1 << ch); + spu.dwChannelsAudible &= ~(1 << ch); + s_chan->ADSRX.State = ADSR_RELEASE; s_chan->ADSRX.EnvelopeVol = 0; memset(&ChanBuf[d], 0, (ns_to - d) * sizeof(ChanBuf[0])); } @@ -937,7 +939,7 @@ static void queue_channel_work(int ns_to, unsigned int silentch) StartSoundMain(ch); } - mask = work->channels_on = spu.dwChannelOn & 0xffffff; + mask = work->channels_on = spu.dwChannelsAudible & 0xffffff; spu.decode_dirty_ch |= mask & 0x0a; for (ch = 0; mask != 0; ch++, mask >>= 1) @@ -962,7 +964,7 @@ static void queue_channel_work(int ns_to, unsigned int silentch) // note: d is not accurate on skip d = SkipADSR(&s_chan->ADSRX, d); if (d < ns_to) { - spu.dwChannelOn &= ~(1 << ch); + spu.dwChannelsAudible &= ~(1 << ch); s_chan->ADSRX.EnvelopeVol = 0; } } @@ -1105,7 +1107,7 @@ void do_samples(unsigned int cycles_to, int do_direct) return; } - silentch = ~(spu.dwChannelOn | spu.dwNewChannel) & 0xffffff; + silentch = ~(spu.dwChannelsAudible | spu.dwNewChannel) & 0xffffff; do_direct |= (silentch == 0xffffff); if (worker != NULL) @@ -1623,7 +1625,7 @@ void spu_get_debug_info(int *chans_out, int *run_chans, int *fmod_chans_out, int for(;ch Date: Mon, 5 Sep 2022 02:03:24 +0300 Subject: [PATCH 192/597] spu: support master volume --- plugins/dfsound/externals.h | 4 +++- plugins/dfsound/freeze.c | 2 +- plugins/dfsound/registers.c | 2 +- plugins/dfsound/spu.c | 18 ++++++++++++------ 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index 094266471..2bc2fc0e7 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -232,7 +232,9 @@ typedef struct unsigned short regArea[0x400]; } SPUInfo; -#define regAreaGet(ch,offset) \ +#define regAreaGet(offset) \ + spu.regArea[((offset) - 0xc00)>>1] +#define regAreaGetCh(ch, offset) \ spu.regArea[((ch<<4)|(offset))>>1] /////////////////////////////////////////////////////////// diff --git a/plugins/dfsound/freeze.c b/plugins/dfsound/freeze.c index a9843aeb2..4866df894 100644 --- a/plugins/dfsound/freeze.c +++ b/plugins/dfsound/freeze.c @@ -145,7 +145,7 @@ static void save_channel(SPUCHAN_orig *d, const SPUCHAN *s, int ch) d->spos = s->spos; d->sinc = s->sinc; memcpy(d->SB, spu.SB + ch * SB_SIZE, sizeof(d->SB[0]) * SB_SIZE); - d->iStart = (regAreaGet(ch,6)&~1)<<3; + d->iStart = (regAreaGetCh(ch, 6) & ~1) << 3; d->iCurr = 0; // set by the caller d->iLoop = 0; // set by the caller d->bOn = !!(spu.dwChannelsAudible & (1<>=1) // loop channels { - if((val&1) && regAreaGet(ch,6)) // mmm... start has to be set before key on !?! + if((val&1) && regAreaGetCh(ch, 6)) // mmm... start has to be set before key on !?! { spu.s_chan[ch].bIgnoreLoop = 0; spu.dwNewChannel|=(1<iSBPos=27; s_chan->spos=0; - s_chan->pCurr = spu.spuMemC+((regAreaGet(ch,6)&~1)<<3); + s_chan->pCurr = spu.spuMemC + ((regAreaGetCh(ch, 6) & ~1) << 3); spu.dwNewChannel&=~(1<> 17; + int vol_r = ((int)regAreaGet(H_SPUmvolR) << 17) >> 17; int ns; int d; @@ -1192,23 +1193,28 @@ static void do_samples_finish(int *SSumLR, int ns_to, } MixXA(SSumLR, ns_to, decode_pos); - - if((spu.spuCtrl&0x4000)==0) // muted? (rare, don't optimize for this) + + vol_l = vol_l * spu_config.iVolume >> 10; + vol_r = vol_r * spu_config.iVolume >> 10; + + if (!(spu.spuCtrl & 0x4000) || !(vol_l | vol_r)) { + // muted? (rare) memset(spu.pS, 0, ns_to * 2 * sizeof(spu.pS[0])); + memset(SSumLR, 0, ns_to * 2 * sizeof(SSumLR[0])); spu.pS += ns_to * 2; } else for (ns = 0; ns < ns_to * 2; ) { d = SSumLR[ns]; SSumLR[ns] = 0; - d = d * volmult >> 10; + d = d * vol_l >> 15; ssat32_to_16(d); *spu.pS++ = d; ns++; d = SSumLR[ns]; SSumLR[ns] = 0; - d = d * volmult >> 10; + d = d * vol_r >> 15; ssat32_to_16(d); *spu.pS++ = d; ns++; From 9cf790343a0788535b4ab2b2ce576662860d0188 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 6 Sep 2022 18:25:42 +0300 Subject: [PATCH 193/597] spu: sync on xa playback start this avoids nasty underflows at the start of the stream --- frontend/plugin.c | 8 ++++---- libpcsxcore/cdrom.c | 9 +++++---- libpcsxcore/plugins.h | 4 ++-- plugins/dfsound/freeze.c | 2 +- plugins/dfsound/spu.c | 14 ++++++++++---- plugins/dfsound/spu.h | 4 ++-- plugins/dfsound/xa.c | 2 +- 7 files changed, 25 insertions(+), 18 deletions(-) diff --git a/frontend/plugin.c b/frontend/plugin.c index 196c98028..5f9c5ffb0 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -35,7 +35,7 @@ extern void CALLBACK SPUwriteDMA(unsigned short); extern unsigned short CALLBACK SPUreadDMA(void); extern void CALLBACK SPUwriteDMAMem(unsigned short *, int, unsigned int); extern void CALLBACK SPUreadDMAMem(unsigned short *, int, unsigned int); -extern void CALLBACK SPUplayADPCMchannel(void *); +extern void CALLBACK SPUplayADPCMchannel(void *, unsigned int, int); extern void CALLBACK SPUregisterCallback(void (*cb)(void)); extern void CALLBACK SPUregisterScheduleCb(void (*cb)(unsigned int)); extern long CALLBACK SPUconfigure(void); @@ -43,7 +43,7 @@ extern long CALLBACK SPUtest(void); extern void CALLBACK SPUabout(void); extern long CALLBACK SPUfreeze(unsigned int, void *, unsigned int); extern void CALLBACK SPUasync(unsigned int, unsigned int); -extern int CALLBACK SPUplayCDDAchannel(short *, int); +extern int CALLBACK SPUplayCDDAchannel(short *, int, unsigned int, int); /* PAD */ static long CALLBACK PADreadPort1(PadDataS *pad) @@ -266,9 +266,9 @@ pc_hook_func (SPU_writeDMA, (unsigned short a0), (a0), PCNT_SPU) pc_hook_func_ret(unsigned short,SPU_readDMA, (void), (), PCNT_SPU) pc_hook_func (SPU_writeDMAMem, (unsigned short *a0, int a1, uint32_t a2), (a0, a1, a2), PCNT_SPU) pc_hook_func (SPU_readDMAMem, (unsigned short *a0, int a1, uint32_t a2), (a0, a1, a2), PCNT_SPU) -pc_hook_func (SPU_playADPCMchannel, (void *a0), (a0), PCNT_SPU) +pc_hook_func (SPU_playADPCMchannel, (void *a0, unsigned int a1, int a2), (a0, a1, a2), PCNT_SPU) pc_hook_func (SPU_async, (uint32_t a0, uint32_t a1), (a0, a1), PCNT_SPU) -pc_hook_func_ret(int, SPU_playCDDAchannel, (short *a0, int a1), (a0, a1), PCNT_SPU) +pc_hook_func_ret(int, SPU_playCDDAchannel, (short *a0, int a1, unsigned int a2, int a3), (a0, a1, a2, a3), PCNT_SPU) #define hook_it(name) { \ o_##name = name; \ diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 07b13649c..fb526b407 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -624,10 +624,10 @@ void cdrPlaySeekReadInterrupt(void) if (!cdr.Irq && !cdr.Stat && (cdr.Mode & (MODE_AUTOPAUSE|MODE_REPORT))) cdrPlayInterrupt_Autopause(); - if (CDR_readCDDA && !cdr.Muted && !Config.Cdda) { + if (!cdr.Muted && !Config.Cdda) { cdrAttenuate(read_buf, CD_FRAMESIZE_RAW / 4, 1); - if (SPU_playCDDAchannel) - SPU_playCDDAchannel(read_buf, CD_FRAMESIZE_RAW); + SPU_playCDDAchannel(read_buf, CD_FRAMESIZE_RAW, psxRegs.cycle, cdr.FirstSector); + cdr.FirstSector = 0; } cdr.SetSectorPlay[2]++; @@ -756,6 +756,7 @@ void cdrInterrupt(void) { Find_CurTrack(cdr.SetSectorPlay); ReadTrack(cdr.SetSectorPlay); cdr.TrackChanged = FALSE; + cdr.FirstSector = 1; if (!Config.Cdda) CDR_play(cdr.SetSectorPlay); @@ -1215,7 +1216,7 @@ static void cdrReadInterrupt(void) int ret = xa_decode_sector(&cdr.Xa, cdr.Transfer+4, cdr.FirstSector); if (!ret) { cdrAttenuate(cdr.Xa.pcm, cdr.Xa.nsamples, cdr.Xa.stereo); - SPU_playADPCMchannel(&cdr.Xa); + SPU_playADPCMchannel(&cdr.Xa, psxRegs.cycle, cdr.FirstSector); cdr.FirstSector = 0; } else cdr.FirstSector = -1; diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index 34f062e2e..38c41ca73 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -172,7 +172,7 @@ typedef void (CALLBACK* SPUwriteDMA)(unsigned short); typedef unsigned short (CALLBACK* SPUreadDMA)(void); typedef void (CALLBACK* SPUwriteDMAMem)(unsigned short *, int, unsigned int); typedef void (CALLBACK* SPUreadDMAMem)(unsigned short *, int, unsigned int); -typedef void (CALLBACK* SPUplayADPCMchannel)(xa_decode_t *); +typedef void (CALLBACK* SPUplayADPCMchannel)(xa_decode_t *, unsigned int, int); typedef void (CALLBACK* SPUregisterCallback)(void (CALLBACK *callback)(void)); typedef void (CALLBACK* SPUregisterScheduleCb)(void (CALLBACK *callback)(unsigned int cycles_after)); typedef long (CALLBACK* SPUconfigure)(void); @@ -189,7 +189,7 @@ typedef struct { } SPUFreeze_t; typedef long (CALLBACK* SPUfreeze)(uint32_t, SPUFreeze_t *, uint32_t); typedef void (CALLBACK* SPUasync)(uint32_t, uint32_t); -typedef int (CALLBACK* SPUplayCDDAchannel)(short *, int); +typedef int (CALLBACK* SPUplayCDDAchannel)(short *, int, unsigned int, int); // SPU function pointers extern SPUconfigure SPU_configure; diff --git a/plugins/dfsound/freeze.c b/plugins/dfsound/freeze.c index 4866df894..a400cf7cb 100644 --- a/plugins/dfsound/freeze.c +++ b/plugins/dfsound/freeze.c @@ -284,7 +284,7 @@ long CALLBACK SPUfreeze(uint32_t ulFreezeMode, SPUFreeze_t * pF, spu.bMemDirty = 1; if(pF->xaS.nsamples<=4032) // start xa again - SPUplayADPCMchannel(&pF->xaS); + SPUplayADPCMchannel(&pF->xaS, spu.cycles_played, 0); spu.xapGlobal=0; diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index fbd555b17..3aaf53a46 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1298,20 +1298,26 @@ void CALLBACK SPUupdate(void) // XA AUDIO -void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap) +void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycle, int is_start) { if(!xap) return; - if(!xap->freq) return; // no xa freq ? bye + if(!xap->freq) return; // no xa freq ? bye - FeedXA(xap); // call main XA feeder + if (is_start) + do_samples(cycle, 1); // catch up to prevent source underflows later + + FeedXA(xap); // call main XA feeder } // CDDA AUDIO -int CALLBACK SPUplayCDDAchannel(short *pcm, int nbytes) +int CALLBACK SPUplayCDDAchannel(short *pcm, int nbytes, unsigned int cycle, int is_start) { if (!pcm) return -1; if (nbytes<=0) return -1; + if (is_start) + do_samples(cycle, 1); // catch up to prevent source underflows later + return FeedCDDA((unsigned char *)pcm, nbytes); } diff --git a/plugins/dfsound/spu.h b/plugins/dfsound/spu.h index d42425d6f..8a0f2d2e4 100644 --- a/plugins/dfsound/spu.h +++ b/plugins/dfsound/spu.h @@ -16,5 +16,5 @@ ***************************************************************************/ void ClearWorkingState(void); -void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap); -int CALLBACK SPUplayCDDAchannel(short *pcm, int bytes); +void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycle, int is_start); +int CALLBACK SPUplayCDDAchannel(short *pcm, int bytes, unsigned int cycle, int is_start); diff --git a/plugins/dfsound/xa.c b/plugins/dfsound/xa.c index d63e83cc5..f62a12d20 100644 --- a/plugins/dfsound/xa.c +++ b/plugins/dfsound/xa.c @@ -120,7 +120,7 @@ INLINE void FeedXA(xa_decode_t *xap) if(!spu.bSPUIsOpen) return; spu.xapGlobal = xap; // store info for save states - spu.XARepeat = 100; // set up repeat + spu.XARepeat = 3; // set up repeat #if 0//def XA_HACK iSize=((45500*xap->nsamples)/xap->freq); // get size From 3113a160828f9353715e4b8cc1041d9c86162df2 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 8 Sep 2022 02:50:36 +0300 Subject: [PATCH 194/597] spu: cleanup some irq hacks Note that bIgnoreLoop is still needed or "Misadventures Of Tron Bonne" may hang after cutscenes. Before this commit the game will sometimes cut off dialogues. --- plugins/dfsound/spu.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 3aaf53a46..f4426abe6 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -426,8 +426,8 @@ static int decode_block(void *unused, int ch, int *SB) start = s_chan->pLoop; } - else - check_irq(ch, start); // hack, see check_irq below.. + + check_irq(ch, start); predict_nr = start[0]; shift_factor = predict_nr & 0xf; @@ -436,19 +436,11 @@ static int decode_block(void *unused, int ch, int *SB) decode_block_data(SB, start + 2, predict_nr, shift_factor); flags = start[1]; - if (flags & 4 && (!s_chan->bIgnoreLoop)) + if (flags & 4 && !s_chan->bIgnoreLoop) s_chan->pLoop = start; // loop adress start += 16; - if (flags & 1) { // 1: stop/loop - start = s_chan->pLoop; - check_irq(ch, start); // hack.. :( - } - - if (start - spu.spuMemC >= 0x80000) - start = spu.spuMemC; - s_chan->pCurr = start; // store values for next cycle s_chan->prevflags = flags; @@ -469,20 +461,15 @@ static int skip_block(int ch) start = s_chan->pLoop; } - else - check_irq(ch, start); + + check_irq(ch, start); flags = start[1]; - if (flags & 4) + if (flags & 4 && !s_chan->bIgnoreLoop) s_chan->pLoop = start; start += 16; - if (flags & 1) { - start = s_chan->pLoop; - check_irq(ch, start); - } - s_chan->pCurr = start; s_chan->prevflags = flags; @@ -511,8 +498,6 @@ static void scan_for_irq(int ch, unsigned int *upd_samples) block += 16; if (flags & 1) { // 1: stop/loop block = s_chan->pLoop; - if (block == spu.pSpuIrq) // hack.. (see decode_block) - break; } pos += 28 << 16; } From ff2c28226e4ca1eea4cdbc9e9e7efa119c623196 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 7 Sep 2022 02:09:08 +0300 Subject: [PATCH 195/597] psxcounters: try to eliminate another source of audio drift --- libpcsxcore/psxcounters.c | 49 ++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index 5198646d9..fba2f5c9e 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -73,7 +73,6 @@ Rcnt rcnts[ CounterQuantity ]; u32 hSyncCount = 0; u32 frame_counter = 0; static u32 hsync_steps = 0; -static u32 base_cycle = 0; u32 psxNextCounter = 0, psxNextsCounter = 0; @@ -293,6 +292,27 @@ void psxRcntReset( u32 index ) } } +static void scheduleRcntBase(void) +{ + // Schedule next call, in hsyncs + if (hSyncCount < VBlankStart) + hsync_steps = VBlankStart - hSyncCount; + else + hsync_steps = HSyncTotal[Config.PsxType] - hSyncCount; + + if (hSyncCount + hsync_steps == HSyncTotal[Config.PsxType]) + { + rcnts[3].cycle = Config.PsxType ? PSXCLK / 50 : PSXCLK / 60; + } + else + { + // clk / 50 / 314 ~= 2157.25 + // clk / 60 / 263 ~= 2146.31 + u32 mult = Config.PsxType ? 8836089 : 8791293; + rcnts[3].cycle = hsync_steps * mult >> 12; + } +} + void psxRcntUpdate() { u32 cycle; @@ -320,9 +340,6 @@ void psxRcntUpdate() // rcnt base. if( cycle - rcnts[3].cycleStart >= rcnts[3].cycle ) { - u32 leftover_cycles = cycle - rcnts[3].cycleStart - rcnts[3].cycle; - u32 next_vsync; - hSyncCount += hsync_steps; // VSync irq. @@ -344,6 +361,7 @@ void psxRcntUpdate() // Update lace. (with InuYasha fix) if( hSyncCount >= (Config.VSyncWA ? HSyncTotal[Config.PsxType] / BIAS : HSyncTotal[Config.PsxType]) ) { + rcnts[3].cycleStart += Config.PsxType ? PSXCLK / 50 : PSXCLK / 60; hSyncCount = 0; frame_counter++; @@ -353,21 +371,7 @@ void psxRcntUpdate() GPU_vBlank(0, SWAP32(HW_GPU_STATUS) >> 31); } - // Schedule next call, in hsyncs - hsync_steps = HSyncTotal[Config.PsxType] - hSyncCount; - next_vsync = VBlankStart - hSyncCount; // ok to overflow - if( next_vsync && next_vsync < hsync_steps ) - hsync_steps = next_vsync; - - rcnts[3].cycleStart = cycle - leftover_cycles; - if (Config.PsxType) - // 20.12 precision, clk / 50 / 313 ~= 2164.14 - base_cycle += hsync_steps * 8864320; - else - // clk / 60 / 263 ~= 2146.31 - base_cycle += hsync_steps * 8791293; - rcnts[3].cycle = base_cycle >> 12; - base_cycle &= 0xfff; + scheduleRcntBase(); } psxRcntSet(); @@ -510,15 +514,12 @@ s32 psxRcntFreeze( void *f, s32 Mode ) count = (psxRegs.cycle - rcnts[i].cycleStart) / rcnts[i].rate; _psxRcntWcount( i, count ); } - hsync_steps = 0; - if (rcnts[3].target) - hsync_steps = (psxRegs.cycle - rcnts[3].cycleStart) / rcnts[3].target; + scheduleRcntBase(); psxRcntSet(); - - base_cycle = 0; } return 0; } /******************************************************************************/ +// vim:ts=4:shiftwidth=4:expandtab From df717ca9bf196fd40134d16f94b7f3c774f2c897 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 5 Sep 2022 19:41:52 +0300 Subject: [PATCH 196/597] cdrom: try to eliminate playback timing drifting --- libpcsxcore/cdrom.c | 28 ++++++++++++++++------------ libpcsxcore/r3000a.h | 14 ++++++++------ 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index fb526b407..184d07ab0 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -242,11 +242,15 @@ static void sec2msf(unsigned int s, u8 *msf) { } // cdrPlaySeekReadInterrupt -#define CDRPLAYSEEKREAD_INT(eCycle) { \ +#define CDRPLAYSEEKREAD_INT(eCycle, isFirst) { \ + u32 e_ = eCycle; \ psxRegs.interrupt |= (1 << PSXINT_CDREAD); \ - psxRegs.intCycle[PSXINT_CDREAD].cycle = eCycle; \ - psxRegs.intCycle[PSXINT_CDREAD].sCycle = psxRegs.cycle; \ - new_dyna_set_event(PSXINT_CDREAD, eCycle); \ + if (isFirst) \ + psxRegs.intCycle[PSXINT_CDREAD].sCycle = psxRegs.cycle; \ + else \ + psxRegs.intCycle[PSXINT_CDREAD].sCycle += psxRegs.intCycle[PSXINT_CDREAD].cycle; \ + psxRegs.intCycle[PSXINT_CDREAD].cycle = e_; \ + new_dyna_set_event_abs(PSXINT_CDREAD, psxRegs.intCycle[PSXINT_CDREAD].sCycle + e_); \ } // cdrLidSeekInterrupt @@ -588,7 +592,7 @@ void cdrPlaySeekReadInterrupt(void) if (!cdr.Play && (cdr.StatP & STATUS_SEEK)) { if (cdr.Stat) { CDR_LOG_I("cdrom: seek stat hack\n"); - CDRPLAYSEEKREAD_INT(0x1000); + CDRPLAYSEEKREAD_INT(0x1000, 1); return; } SetResultSize(1); @@ -640,7 +644,7 @@ void cdrPlaySeekReadInterrupt(void) } } - CDRPLAYSEEKREAD_INT(cdReadTime); + CDRPLAYSEEKREAD_INT(cdReadTime, 0); // update for CdlGetlocP/autopause generate_subq(cdr.SetSectorPlay); @@ -766,7 +770,7 @@ void cdrInterrupt(void) { // BIOS player - set flag again cdr.Play = TRUE; - CDRPLAYSEEKREAD_INT(cdReadTime + seekTime); + CDRPLAYSEEKREAD_INT(cdReadTime + seekTime, 1); start_rotating = 1; break; @@ -974,7 +978,7 @@ void cdrInterrupt(void) { Rockman X5 = 0.5-4x - fix capcom logo */ - CDRPLAYSEEKREAD_INT(cdReadTime + seekTime); + CDRPLAYSEEKREAD_INT(cdReadTime + seekTime, 1); start_rotating = 1; break; @@ -1072,7 +1076,7 @@ void cdrInterrupt(void) { // - fixes new game ReadTrack(cdr.SetSectorPlay); - CDRPLAYSEEKREAD_INT(((cdr.Mode & 0x80) ? (cdReadTime) : cdReadTime * 2) + seekTime); + CDRPLAYSEEKREAD_INT(((cdr.Mode & 0x80) ? (cdReadTime) : cdReadTime * 2) + seekTime, 1); SetPlaySeekRead(cdr.StatP, STATUS_SEEK); start_rotating = 1; @@ -1168,7 +1172,7 @@ static void cdrReadInterrupt(void) if (cdr.Irq || cdr.Stat) { CDR_LOG_I("cdrom: read stat hack %02x %x\n", cdr.Irq, cdr.Stat); - CDRPLAYSEEKREAD_INT(2048); + CDRPLAYSEEKREAD_INT(2048, 1); return; } @@ -1235,7 +1239,7 @@ static void cdrReadInterrupt(void) cdr.Readed = 0; - CDRPLAYSEEKREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime); + CDRPLAYSEEKREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime, 0); /* Croc 2: $40 - only FORM1 (*) @@ -1588,7 +1592,7 @@ int cdrFreeze(void *f, int Mode) { if (!Config.Cdda) CDR_play(cdr.SetSectorPlay); if (psxRegs.interrupt & (1 << PSXINT_CDRPLAY_OLD)) - CDRPLAYSEEKREAD_INT((cdr.Mode & 0x80) ? (cdReadTime / 2) : cdReadTime); + CDRPLAYSEEKREAD_INT((cdr.Mode & 0x80) ? (cdReadTime / 2) : cdReadTime, 1); } if ((cdr.freeze_ver & 0xffffff00) != 0x63647200) { diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index ea6f0e75f..49afcb1cd 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -206,17 +206,19 @@ void new_dyna_before_save(void); void new_dyna_after_save(void); void new_dyna_freeze(void *f, int mode); -#define new_dyna_set_event(e, c) { \ - s32 c_ = c; \ - u32 abs_ = psxRegs.cycle + c_; \ - s32 odi_ = next_interupt - psxRegs.cycle; \ +#define new_dyna_set_event_abs(e, abs) { \ + u32 abs_ = abs; \ + s32 di_ = next_interupt - abs_; \ event_cycles[e] = abs_; \ - if (c_ < odi_) { \ - /*printf("%u: next_interupt %d -> %d (%u)\n", psxRegs.cycle, odi_, c_, abs_);*/ \ + if (di_ > 0) { \ + /*printf("%u: next_interupt %u -> %u\n", psxRegs.cycle, next_interupt, abs_);*/ \ next_interupt = abs_; \ } \ } +#define new_dyna_set_event(e, c) \ + new_dyna_set_event_abs(e, psxRegs.cycle + (c)) + #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define _i32(x) *(s32 *)&x From 7f2576b26c2372738c528741d7cf8c307736e623 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 5 Sep 2022 23:47:10 +0300 Subject: [PATCH 197/597] cdrom: clean up command handling --- libpcsxcore/cdrom.c | 148 +++++++++++++++++++++----------------------- 1 file changed, 71 insertions(+), 77 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 184d07ab0..772e76f03 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -45,10 +45,11 @@ //#define CDR_LOG_CMD_IRQ static struct { - unsigned char OCUP; - unsigned char Reg1Mode; + // unused members maintain savesate compatibility + unsigned char unused0; + unsigned char unused1; unsigned char Reg2; - unsigned char CmdProcess; + unsigned char unused2; unsigned char Ctrl; unsigned char Stat; @@ -62,7 +63,7 @@ static struct { unsigned char Absolute[3]; } subq; unsigned char TrackChanged; - unsigned char pad1[3]; + unsigned char unused3[3]; unsigned int freeze_ver; unsigned char Prev[4]; @@ -94,18 +95,18 @@ static struct { xa_decode_t Xa; - int Init; + u32 unused4; - u16 Irq; - u8 IrqRepeated; - u32 eCycle; + u16 CmdInProgress; + u16 unused5; + u32 unused6; - u8 pad2; + u8 unused7; u8 DriveState; u8 FastForward; u8 FastBackward; - u8 pad; + u8 unused8; u8 AttenuatorLeftToLeft, AttenuatorLeftToRight; u8 AttenuatorRightToRight, AttenuatorRightToLeft; @@ -290,6 +291,7 @@ static void setIrq(int log_cmd) psxHu32ref(0x1070) |= SWAP32((u32)0x4); #ifdef CDR_LOG_CMD_IRQ + if (cdr.Stat) { int i; SysPrintf("CDR IRQ=%d cmd %02x stat %02x: ", @@ -481,23 +483,6 @@ static void ReadTrack(const u8 *time) { cdr.subq.Absolute[0], cdr.subq.Absolute[1], cdr.subq.Absolute[2]); } -static void AddIrqQueue(unsigned short irq, unsigned long ecycle) { - if (cdr.Irq != 0) { - if (irq == cdr.Irq || irq + 0x100 == cdr.Irq) { - cdr.IrqRepeated = 1; - CDR_INT(ecycle); - return; - } - - CDR_LOG_I("cdr: override cmd %02x -> %02x\n", cdr.Irq, irq); - } - - cdr.Irq = irq; - cdr.eCycle = ecycle; - - CDR_INT(ecycle); -} - static void cdrPlayInterrupt_Autopause() { u32 abs_lev_max = 0; @@ -599,7 +584,7 @@ void cdrPlaySeekReadInterrupt(void) cdr.StatP |= STATUS_ROTATING; SetPlaySeekRead(cdr.StatP, 0); cdr.Result[0] = cdr.StatP; - if (cdr.Irq == 0) { + if (cdr.Stat == 0) { cdr.Stat = Complete; setIrq(0x202); } @@ -625,7 +610,7 @@ void cdrPlaySeekReadInterrupt(void) CDR_readCDDA(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], (u8 *)read_buf); } - if (!cdr.Irq && !cdr.Stat && (cdr.Mode & (MODE_AUTOPAUSE|MODE_REPORT))) + if (!cdr.Stat && (cdr.Mode & (MODE_AUTOPAUSE|MODE_REPORT))) cdrPlayInterrupt_Autopause(); if (!cdr.Muted && !Config.Cdda) { @@ -651,19 +636,20 @@ void cdrPlaySeekReadInterrupt(void) } void cdrInterrupt(void) { - u16 Irq = cdr.Irq; int no_busy_error = 0; int start_rotating = 0; int error = 0; - int delay; unsigned int seekTime = 0; + u32 second_resp_time = 0; + u8 ParamC; u8 set_loc[3]; + u16 Cmd; int i; // Reschedule IRQ if (cdr.Stat) { - CDR_LOG_I("cdrom: stat hack: %02x %x\n", cdr.Irq, cdr.Stat); - CDR_INT(0x1000); + CDR_LOG_I("cdrom: cmd %02x with irqstat %x\n", cdr.CmdInProgress, cdr.Stat); + CDR_INT(1000); return; } @@ -674,17 +660,16 @@ void cdrInterrupt(void) { cdr.Result[0] = cdr.StatP; cdr.Stat = Acknowledge; - if (cdr.IrqRepeated) { - cdr.IrqRepeated = 0; - if (cdr.eCycle > psxRegs.cycle) { - CDR_INT(cdr.eCycle); - goto finish; - } - } + Cmd = cdr.CmdInProgress; + cdr.CmdInProgress = 0; + ParamC = cdr.ParamC; - cdr.Irq = 0; + if (Cmd < 0x100) { + cdr.Cmd = 0; + cdr.ParamC = 0; + } - switch (Irq) { + switch (Cmd) { case CdlNop: if (cdr.DriveState != DRIVESTATE_LID_OPEN) cdr.StatP &= ~STATUS_SHELLOPEN; @@ -722,7 +707,7 @@ void cdrInterrupt(void) { // BIOS CD Player // - Pause player, hit Track 01/02/../xx (Setloc issued!!) - if (cdr.ParamC != 0 && cdr.Param[0] != 0) { + if (ParamC != 0 && cdr.Param[0] != 0) { int track = btoi( cdr.Param[0] ); if (track <= cdr.ResultTN[1]) @@ -796,7 +781,7 @@ void cdrInterrupt(void) { error = ERROR_INVALIDARG; goto set_error; } - AddIrqQueue(CdlStandby + 0x100, cdReadTime * 125 / 2); + second_resp_time = cdReadTime * 125 / 2; start_rotating = 1; break; @@ -819,12 +804,11 @@ void cdrInterrupt(void) { SetPlaySeekRead(cdr.StatP, 0); cdr.StatP &= ~STATUS_ROTATING; - delay = 0x800; + second_resp_time = 0x800; if (cdr.DriveState == DRIVESTATE_STANDBY) - delay = cdReadTime * 30 / 2; + second_resp_time = cdReadTime * 30 / 2; cdr.DriveState = DRIVESTATE_STOPPED; - AddIrqQueue(CdlStop + 0x100, delay); break; case CdlStop + 0x100: @@ -851,13 +835,12 @@ void cdrInterrupt(void) { * */ if (!(cdr.StatP & (STATUS_PLAY | STATUS_READ))) { - delay = 7000; + second_resp_time = 7000; } else { - delay = (((cdr.Mode & MODE_SPEED) ? 2 : 1) * (1000000)); + second_resp_time = (((cdr.Mode & MODE_SPEED) ? 2 : 1) * 1000000); } - AddIrqQueue(CdlPause + 0x100, delay); SetPlaySeekRead(cdr.StatP, 0); cdr.Ctrl |= 0x80; break; @@ -872,7 +855,7 @@ void cdrInterrupt(void) { SetPlaySeekRead(cdr.StatP, 0); cdr.Muted = FALSE; cdr.Mode = 0x20; /* This fixes This is Football 2, Pooh's Party lockups */ - AddIrqQueue(CdlReset + 0x100, 4100000); + second_resp_time = 4100000; no_busy_error = 1; start_rotating = 1; break; @@ -895,6 +878,8 @@ void cdrInterrupt(void) { break; case CdlSetmode: + CDR_LOG("cdrWrite1() Log: Setmode %x\n", cdr.Param[0]); + cdr.Mode = cdr.Param[0]; no_busy_error = 1; break; @@ -920,7 +905,7 @@ void cdrInterrupt(void) { case CdlReadT: // SetSession? // really long - AddIrqQueue(CdlReadT + 0x100, cdReadTime * 290 / 4); + second_resp_time = cdReadTime * 290 / 4; start_rotating = 1; break; @@ -1001,7 +986,7 @@ void cdrInterrupt(void) { break; case CdlID: - AddIrqQueue(CdlID + 0x100, 20480); + second_resp_time = 20480; break; case CdlID + 0x100: @@ -1045,7 +1030,7 @@ void cdrInterrupt(void) { break; case CdlReadToc: - AddIrqQueue(CdlReadToc + 0x100, cdReadTime * 180 / 4); + second_resp_time = cdReadTime * 180 / 4; no_busy_error = 1; start_rotating = 1; break; @@ -1083,7 +1068,7 @@ void cdrInterrupt(void) { break; case CdlSync: default: - CDR_LOG_I("Invalid command: %02x\n", Irq); + CDR_LOG_I("Invalid command: %02x\n", Cmd); error = ERROR_INVALIDCMD; // FALLTHROUGH @@ -1113,9 +1098,17 @@ void cdrInterrupt(void) { } } -finish: - setIrq(Irq); - cdr.ParamC = 0; + if (second_resp_time) { + cdr.CmdInProgress = Cmd | 0x100; + CDR_INT(second_resp_time); + } + else if (cdr.Cmd && cdr.Cmd != (Cmd & 0xff)) { + cdr.CmdInProgress = cdr.Cmd; + CDR_LOG_I("cdrom: cmd %02x came before %02x finished\n", cdr.Cmd, Cmd); + CDR_INT(256); + } + + setIrq(Cmd); } #ifdef HAVE_ARMV7 @@ -1170,13 +1163,12 @@ static void cdrReadInterrupt(void) { u8 *buf; - if (cdr.Irq || cdr.Stat) { - CDR_LOG_I("cdrom: read stat hack %02x %x\n", cdr.Irq, cdr.Stat); + if (cdr.Stat) { + CDR_LOG_I("cdrom: read stat hack %02x %02x\n", cdr.Cmd, cdr.Stat); CDRPLAYSEEKREAD_INT(2048, 1); return; } - cdr.OCUP = 1; SetResultSize(1); SetPlaySeekRead(cdr.StatP, STATUS_READ | STATUS_ROTATING); cdr.Result[0] = cdr.StatP; @@ -1273,10 +1265,7 @@ unsigned char cdrRead0(void) { else cdr.Ctrl &= ~0x20; - if (cdr.OCUP) - cdr.Ctrl |= 0x40; -// else -// cdr.Ctrl &= ~0x40; + cdr.Ctrl |= 0x40; // data fifo not empty // What means the 0x10 and the 0x08 bits? I only saw it used by the bios cdr.Ctrl |= 0x18; @@ -1320,9 +1309,6 @@ void cdrWrite1(unsigned char rt) { return; } - cdr.Cmd = rt; - cdr.OCUP = 0; - #ifdef CDR_LOG_CMD_IRQ SysPrintf("CD1 write: %x (%s)", rt, CmdName[rt]); if (cdr.ParamC) { @@ -1338,16 +1324,20 @@ void cdrWrite1(unsigned char rt) { cdr.ResultReady = 0; cdr.Ctrl |= 0x80; - // cdr.Stat = NoIntr; - AddIrqQueue(cdr.Cmd, 0x800); - - switch (cdr.Cmd) { - case CdlSetmode: - CDR_LOG("cdrWrite1() Log: Setmode %x\n", cdr.Param[0]); - cdr.Mode = cdr.Param[0]; - break; + if (!cdr.CmdInProgress) { + cdr.CmdInProgress = rt; + // should be something like 12k + controller delays + CDR_INT(5000); } + else { + CDR_LOG_I("cdr: cmd while busy: %02x, prev %02x, busy %02x\n", + rt, cdr.Cmd, cdr.CmdInProgress); + if (cdr.CmdInProgress < 0x100) // no pending 2nd response + cdr.CmdInProgress = rt; + } + + cdr.Cmd = rt; } unsigned char cdrRead2(void) { @@ -1403,6 +1393,10 @@ void cdrWrite3(unsigned char rt) { case 0: break; // transfer case 1: +#ifdef CDR_LOG_CMD_IRQ + if (cdr.Stat & rt) + SysPrintf("ack %02x\n", cdr.Stat & rt); +#endif cdr.Stat &= ~rt; if (rt & 0x40) @@ -1414,7 +1408,7 @@ void cdrWrite3(unsigned char rt) { case 3: if (rt & 0x20) { memcpy(&cdr.AttenuatorLeftToLeft, &cdr.AttenuatorLeftToLeftT, 4); - CDR_LOG_I("CD-XA Volume: %02x %02x | %02x %02x\n", + CDR_LOG("CD-XA Volume: %02x %02x | %02x %02x\n", cdr.AttenuatorLeftToLeft, cdr.AttenuatorLeftToRight, cdr.AttenuatorRightToLeft, cdr.AttenuatorRightToRight); } From f5450cfbaf8651524f23579832ddb5bc08fe545a Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 7 Sep 2022 00:47:55 +0300 Subject: [PATCH 198/597] cdrom: don't reschedule irqs trigger shortly after ack instead, like nocash describes --- libpcsxcore/cdrom.c | 119 +++++++++++++++++++++++++++----------------- 1 file changed, 72 insertions(+), 47 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 772e76f03..3c016dfd2 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -98,7 +98,8 @@ static struct { u32 unused4; u16 CmdInProgress; - u16 unused5; + u8 Irq1Pending; + u8 unused5; u32 unused6; u8 unused7; @@ -584,10 +585,8 @@ void cdrPlaySeekReadInterrupt(void) cdr.StatP |= STATUS_ROTATING; SetPlaySeekRead(cdr.StatP, 0); cdr.Result[0] = cdr.StatP; - if (cdr.Stat == 0) { - cdr.Stat = Complete; - setIrq(0x202); - } + cdr.Stat = Complete; + setIrq(0x202); Find_CurTrack(cdr.SetSectorPlay); ReadTrack(cdr.SetSectorPlay); @@ -629,10 +628,10 @@ void cdrPlaySeekReadInterrupt(void) } } - CDRPLAYSEEKREAD_INT(cdReadTime, 0); - // update for CdlGetlocP/autopause generate_subq(cdr.SetSectorPlay); + + CDRPLAYSEEKREAD_INT(cdReadTime, 0); } void cdrInterrupt(void) { @@ -646,10 +645,8 @@ void cdrInterrupt(void) { u16 Cmd; int i; - // Reschedule IRQ if (cdr.Stat) { CDR_LOG_I("cdrom: cmd %02x with irqstat %x\n", cdr.CmdInProgress, cdr.Stat); - CDR_INT(1000); return; } @@ -1105,7 +1102,6 @@ void cdrInterrupt(void) { else if (cdr.Cmd && cdr.Cmd != (Cmd & 0xff)) { cdr.CmdInProgress = cdr.Cmd; CDR_LOG_I("cdrom: cmd %02x came before %02x finished\n", cdr.Cmd, Cmd); - CDR_INT(256); } setIrq(Cmd); @@ -1159,46 +1155,59 @@ void cdrAttenuate(s16 *buf, int samples, int stereo) } } -static void cdrReadInterrupt(void) +static void cdrReadInterruptSetResult(unsigned char result) { - u8 *buf; - if (cdr.Stat) { - CDR_LOG_I("cdrom: read stat hack %02x %02x\n", cdr.Cmd, cdr.Stat); - CDRPLAYSEEKREAD_INT(2048, 1); + CDR_LOG_I("cdrom: %d:%02d:%02d irq miss, cmd=%02x irqstat=%02x\n", + cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], + cdr.CmdInProgress, cdr.Stat); + cdr.Irq1Pending = result; return; } - SetResultSize(1); + cdr.Result[0] = result; + cdr.Stat = (result & STATUS_ERROR) ? DiskError : DataReady; + setIrq(0x203); +} + +static void cdrUpdateTransferBuf(const u8 *buf) +{ + if (!buf) + return; + memcpy(cdr.Transfer, buf, DATA_SIZE); + CheckPPFCache(cdr.Transfer, cdr.Prev[0], cdr.Prev[1], cdr.Prev[2]); + CDR_LOG("cdr.Transfer %x:%x:%x\n", cdr.Transfer[0], cdr.Transfer[1], cdr.Transfer[2]); + cdr.Readed = 0; +} + +static void cdrReadInterrupt(void) +{ + u8 *buf = NULL, *hdr; + SetPlaySeekRead(cdr.StatP, STATUS_READ | STATUS_ROTATING); - cdr.Result[0] = cdr.StatP; ReadTrack(cdr.SetSectorPlay); - - buf = CDR_getBuffer(); + if (cdr.NoErr) + buf = CDR_getBuffer(); if (buf == NULL) cdr.NoErr = 0; if (!cdr.NoErr) { CDR_LOG_I("cdrReadInterrupt() Log: err\n"); memset(cdr.Transfer, 0, DATA_SIZE); - cdr.Stat = DiskError; - cdr.Result[0] |= STATUS_ERROR; - setIrq(0x205); + cdrReadInterruptSetResult(cdr.StatP | STATUS_ERROR); return; } - memcpy(cdr.Transfer, buf, DATA_SIZE); - CheckPPFCache(cdr.Transfer, cdr.Prev[0], cdr.Prev[1], cdr.Prev[2]); - - - CDR_LOG("cdrReadInterrupt() Log: cdr.Transfer %x:%x:%x\n", cdr.Transfer[0], cdr.Transfer[1], cdr.Transfer[2]); + if (!cdr.Irq1Pending) + cdrUpdateTransferBuf(buf); if ((!cdr.Muted) && (cdr.Mode & MODE_STRSND) && (!Config.Xa) && (cdr.FirstSector != -1)) { // CD-XA + hdr = buf + 4; // Firemen 2: Multi-XA files - briefings, cutscenes if( cdr.FirstSector == 1 && (cdr.Mode & MODE_SF)==0 ) { - cdr.File = cdr.Transfer[4 + 0]; - cdr.Channel = cdr.Transfer[4 + 1]; + cdr.File = hdr[0]; + cdr.Channel = hdr[1]; } /* Gameblabla @@ -1206,10 +1215,8 @@ static void cdrReadInterrupt(void) * Fixes missing audio in Blue's Clues : Blue's Big Musical. (Should also fix Taxi 2) * TODO : Check if this is the proper behaviour. * */ - if((cdr.Transfer[4 + 2] & 0x4) && - (cdr.Transfer[4 + 1] == cdr.Channel) && - (cdr.Transfer[4 + 0] == cdr.File) && cdr.Channel != 255) { - int ret = xa_decode_sector(&cdr.Xa, cdr.Transfer+4, cdr.FirstSector); + if ((hdr[2] & 0x4) && hdr[0] == cdr.File && hdr[1] == cdr.Channel && cdr.Channel != 255) { + int ret = xa_decode_sector(&cdr.Xa, buf + 4, cdr.FirstSector); if (!ret) { cdrAttenuate(cdr.Xa.pcm, cdr.Xa.nsamples, cdr.Xa.stereo); SPU_playADPCMchannel(&cdr.Xa, psxRegs.cycle, cdr.FirstSector); @@ -1219,6 +1226,15 @@ static void cdrReadInterrupt(void) } } + /* + Croc 2: $40 - only FORM1 (*) + Judge Dredd: $C8 - only FORM1 (*) + Sim Theme Park - no adpcm at all (zero) + */ + + if (!(cdr.Mode & MODE_STRSND) || !(buf[4+2] & 0x4)) + cdrReadInterruptSetResult(cdr.StatP); + cdr.SetSectorPlay[2]++; if (cdr.SetSectorPlay[2] == 75) { cdr.SetSectorPlay[2] = 0; @@ -1229,23 +1245,31 @@ static void cdrReadInterrupt(void) } } - cdr.Readed = 0; + if (!cdr.Irq1Pending) { + // update for CdlGetlocP + ReadTrack(cdr.SetSectorPlay); + } CDRPLAYSEEKREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime, 0); +} - /* - Croc 2: $40 - only FORM1 (*) - Judge Dredd: $C8 - only FORM1 (*) - Sim Theme Park - no adpcm at all (zero) - */ - - if (!(cdr.Mode & MODE_STRSND) || !(cdr.Transfer[4+2] & 0x4)) { - cdr.Stat = DataReady; - setIrq(0x203); +static void doMissedIrqs(void) +{ + if (cdr.Irq1Pending) + { + // hand out the "newest" sector, according to nocash + cdrUpdateTransferBuf(CDR_getBuffer()); + CDR_LOG_I("cdrom: %x:%02x:%02x loaded on ack\n", + cdr.Transfer[0], cdr.Transfer[1], cdr.Transfer[2]); + SetResultSize(1); + cdr.Result[0] = cdr.Irq1Pending; + cdr.Stat = (cdr.Irq1Pending & STATUS_ERROR) ? DiskError : DataReady; + cdr.Irq1Pending = 0; + setIrq(0x205); + return; } - - // update for CdlGetlocP - ReadTrack(cdr.SetSectorPlay); + if (!(psxRegs.interrupt & (1 << PSXINT_CDR)) && cdr.CmdInProgress) + CDR_INT(256); } /* @@ -1401,6 +1425,7 @@ void cdrWrite3(unsigned char rt) { if (rt & 0x40) cdr.ParamC = 0; + doMissedIrqs(); return; case 2: cdr.AttenuatorLeftToRightT = rt; @@ -1446,7 +1471,7 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { case 0x11000000: case 0x11400100: if (cdr.Readed == 0) { - CDR_LOG("psxDma3() Log: *** DMA 3 *** NOT READY\n"); + CDR_LOG_I("psxDma3() Log: *** DMA 3 *** NOT READY\n"); break; } From 79cd4919a2ceb70170da6eae1882d1738b4e54a9 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 8 Sep 2022 18:01:34 +0300 Subject: [PATCH 199/597] cdrom: get rid of pTransfer it could easily crash the emu (misbehaving game or even malicious ISO) --- libpcsxcore/cdrom.c | 56 +++++++++++++++------------------------------ 1 file changed, 18 insertions(+), 38 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 3c016dfd2..000e54829 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -95,7 +95,7 @@ static struct { xa_decode_t Xa; - u32 unused4; + u32 FifoOffset; u16 CmdInProgress; u8 Irq1Pending; @@ -114,7 +114,6 @@ static struct { u8 AttenuatorLeftToLeftT, AttenuatorLeftToRightT; u8 AttenuatorRightToRightT, AttenuatorRightToLeftT; } cdr; -static unsigned char *pTransfer; static s16 read_buf[CD_FRAMESIZE_RAW/2]; /* CD-ROM magic numbers */ @@ -1365,13 +1364,12 @@ void cdrWrite1(unsigned char rt) { } unsigned char cdrRead2(void) { - unsigned char ret; + unsigned char ret = 0; - if (cdr.Readed == 0) { - ret = 0; - } else { - ret = *pTransfer++; - } + if (cdr.Readed && cdr.FifoOffset < DATA_SIZE) + ret = cdr.Transfer[cdr.FifoOffset++]; + else + CDR_LOG_I("cdrom: read empty fifo\n"); CDR_LOG_IO("cdr r2.dat: %02x\n", ret); return ret; @@ -1442,19 +1440,16 @@ void cdrWrite3(unsigned char rt) { if ((rt & 0x80) && cdr.Readed == 0) { cdr.Readed = 1; - pTransfer = cdr.Transfer; switch (cdr.Mode & 0x30) { case MODE_SIZE_2328: case 0x00: - pTransfer += 12; + cdr.FifoOffset = 12; break; case MODE_SIZE_2340: - pTransfer += 0; - break; - default: + cdr.FifoOffset = 0; break; } } @@ -1474,45 +1469,30 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { CDR_LOG_I("psxDma3() Log: *** DMA 3 *** NOT READY\n"); break; } - - cdsize = (bcr & 0xffff) * 4; - - // Ape Escape: bcr = 0001 / 0000 - // - fix boot - if( cdsize == 0 ) - { - switch (cdr.Mode & (MODE_SIZE_2340|MODE_SIZE_2328)) { - case MODE_SIZE_2340: cdsize = 2340; break; - case MODE_SIZE_2328: cdsize = 2328; break; - default: - case MODE_SIZE_2048: cdsize = 2048; break; - } - } - - ptr = (u8 *)PSXM(madr); if (ptr == NULL) { - CDR_LOG("psxDma3() Log: *** DMA 3 *** NULL Pointer!\n"); + CDR_LOG_I("psxDma3() Log: *** DMA 3 *** NULL Pointer!\n"); break; } + cdsize = (((bcr - 1) & 0xffff) + 1) * 4; + /* GS CDX: Enhancement CD crash - Setloc 0:0:0 - CdlPlay - Spams DMA3 and gets buffer overrun */ - size = CD_FRAMESIZE_RAW - (pTransfer - cdr.Transfer); + size = DATA_SIZE - cdr.FifoOffset; if (size > cdsize) size = cdsize; if (size > 0) { - memcpy(ptr, pTransfer, size); + memcpy(ptr, cdr.Transfer + cdr.FifoOffset, size); + cdr.FifoOffset += size; + psxCpu->Clear(madr, size / 4); } - psxCpu->Clear(madr, cdsize / 4); - pTransfer += cdsize; - if( chcr == 0x11400100 ) { HW_DMA3_MADR = SWAPu32(madr + cdsize); CDRDMA_INT( (cdsize/4) / 4 ); @@ -1563,7 +1543,7 @@ void cdrReset() { cdr.Stat = NoIntr; cdr.DriveState = DRIVESTATE_STANDBY; cdr.StatP = STATUS_ROTATING; - pTransfer = cdr.Transfer; + cdr.FifoOffset = DATA_SIZE; // fifo empty // BIOS player - default values cdr.AttenuatorLeftToLeft = 0x80; @@ -1586,7 +1566,7 @@ int cdrFreeze(void *f, int Mode) { if (Mode == 1) { cdr.ParamP = cdr.ParamC; - tmp = pTransfer - cdr.Transfer; + tmp = cdr.FifoOffset; } gzfreeze(&tmp, sizeof(tmp)); @@ -1594,7 +1574,7 @@ int cdrFreeze(void *f, int Mode) { if (Mode == 0) { getCdInfo(); - pTransfer = cdr.Transfer + tmp; + cdr.FifoOffset = tmp; // read right sub data tmpp[0] = btoi(cdr.Prev[0]); From 4f329f16f58b969df7ec9081549a7bbd70814f55 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 8 Sep 2022 18:17:06 +0300 Subject: [PATCH 200/597] dma: try more accurate timings seems to help Legend of Mana --- libpcsxcore/cdrom.c | 16 ++++++++-------- libpcsxcore/mdec.c | 35 ++++++++++++++--------------------- libpcsxcore/psxdma.c | 4 ++-- 3 files changed, 24 insertions(+), 31 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 000e54829..310ea4f27 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1462,9 +1462,8 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { CDR_LOG("psxDma3() Log: *** DMA 3 *** %x addr = %x size = %x\n", chcr, madr, bcr); - switch (chcr) { + switch (chcr & 0x71000000) { case 0x11000000: - case 0x11400100: if (cdr.Readed == 0) { CDR_LOG_I("psxDma3() Log: *** DMA 3 *** NOT READY\n"); break; @@ -1493,15 +1492,16 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { psxCpu->Clear(madr, size / 4); } - if( chcr == 0x11400100 ) { + CDRDMA_INT((cdsize/4) * 24); + + HW_DMA3_CHCR &= SWAPu32(~0x10000000); + if (chcr & 0x100) { HW_DMA3_MADR = SWAPu32(madr + cdsize); - CDRDMA_INT( (cdsize/4) / 4 ); + HW_DMA3_BCR &= SWAPu32(0xffff0000); } - else if( chcr == 0x11000000 ) { - // CDRDMA_INT( (cdsize/4) * 1 ); + else { // halted - psxRegs.cycle += (cdsize/4) * 24/2; - CDRDMA_INT(16); + psxRegs.cycle += (cdsize/4) * 24 - 20; } return; diff --git a/libpcsxcore/mdec.c b/libpcsxcore/mdec.c index 61ed5ea53..4b93ffa57 100644 --- a/libpcsxcore/mdec.c +++ b/libpcsxcore/mdec.c @@ -33,12 +33,10 @@ * so 2.0 to 4.0 should be fine. */ -/* Was set to 2 before but it would cause issues in R-types and Vandal Hearts videos. - * Setting it to 6 as dmitrysmagin did fix those... except for Galerians. - * Galerians needs this to be set to 10 (!!) before it looks properly. - * I've tried this with a few other games (including R-Types) and so far, this - * has not backfired. - * */ +/* + * >= 10 for Galerians + * <= 18 for "Disney's Treasure Planet" + */ #define MDEC_BIAS 10 #define DSIZE 8 @@ -487,7 +485,7 @@ void psxDma0(u32 adr, u32 bcr, u32 chcr) { size = (bcr >> 16) * (bcr & 0xffff); switch (cmd >> 28) { - case 0x3: // decode + case 0x3: // decode 15/24bpp mdec.rl = (u16 *) PSXM(adr); /* now the mdec is busy till all data are decoded */ mdec.reg1 |= MDEC1_BUSY; @@ -495,10 +493,8 @@ void psxDma0(u32 adr, u32 bcr, u32 chcr) { mdec.rl_end = mdec.rl + (size * 2); /* sanity check */ - if(mdec.rl_end <= mdec.rl) { - MDECINDMA_INT( size / 4 ); - return; - } + if(mdec.rl_end <= mdec.rl) + break; /* process the pending dma1 */ if(mdec.pending_dma1.adr){ @@ -517,23 +513,18 @@ void psxDma0(u32 adr, u32 bcr, u32 chcr) { iqtab_init(iq_y, p); iqtab_init(iq_uv, p + 64); } - - MDECINDMA_INT( size / 4 ); - return; + break; case 0x6: // cosine table // printf("mdec cosine table\n"); - - MDECINDMA_INT( size / 4 ); - return; + break; default: // printf("mdec unknown command\n"); break; } - HW_DMA0_CHCR &= SWAP32(~0x01000000); - DMA_INTERRUPT(0); + MDECINDMA_INT(size); } void mdec0Interrupt() @@ -629,8 +620,10 @@ void psxDma1(u32 adr, u32 bcr, u32 chcr) { } } - /* define the power of mdec */ - MDECOUTDMA_INT(words * MDEC_BIAS); + /* define the power of mdec */ + MDECOUTDMA_INT(words * MDEC_BIAS); + /* some CPU stalling */ + psxRegs.cycle += words; } } diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index d3b85724f..e6f68fc2a 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -54,7 +54,7 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU words = (bcr >> 16) * (bcr & 0xffff); SPU_writeDMAMem(ptr, words * 2, psxRegs.cycle); HW_DMA4_MADR = SWAPu32(madr + words * 4); - SPUDMA_INT(words / 2); + SPUDMA_INT(words * 4); return; case 0x01000200: //spu to cpu transfer @@ -73,7 +73,7 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU psxCpu->Clear(madr, words); HW_DMA4_MADR = SWAPu32(madr + words * 4); - SPUDMA_INT(words / 2); + SPUDMA_INT(words * 4); return; #ifdef PSXDMA_LOG From 742a21f278bff9a6c5c7f07a3fd8026fef47676a Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 8 Sep 2022 21:13:25 +0300 Subject: [PATCH 201/597] cdrom: partially emulate the fifo --- libpcsxcore/cdrom.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 310ea4f27..a3ddb4473 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -76,7 +76,7 @@ static struct { unsigned char ResultP; unsigned char ResultReady; unsigned char Cmd; - unsigned char Readed; + unsigned char unused4; unsigned char SetlocPending; u32 Reading; @@ -95,7 +95,8 @@ static struct { xa_decode_t Xa; - u32 FifoOffset; + u16 FifoOffset; + u16 FifoSize; u16 CmdInProgress; u8 Irq1Pending; @@ -1176,7 +1177,8 @@ static void cdrUpdateTransferBuf(const u8 *buf) memcpy(cdr.Transfer, buf, DATA_SIZE); CheckPPFCache(cdr.Transfer, cdr.Prev[0], cdr.Prev[1], cdr.Prev[2]); CDR_LOG("cdr.Transfer %x:%x:%x\n", cdr.Transfer[0], cdr.Transfer[1], cdr.Transfer[2]); - cdr.Readed = 0; + if (cdr.FifoOffset < 2048 + 12) + CDR_LOG("cdrom: FifoOffset(1) %d/%d\n", cdr.FifoOffset, cdr.FifoSize); } static void cdrReadInterrupt(void) @@ -1366,10 +1368,10 @@ void cdrWrite1(unsigned char rt) { unsigned char cdrRead2(void) { unsigned char ret = 0; - if (cdr.Readed && cdr.FifoOffset < DATA_SIZE) + if (cdr.FifoOffset < cdr.FifoSize) ret = cdr.Transfer[cdr.FifoOffset++]; else - CDR_LOG_I("cdrom: read empty fifo\n"); + CDR_LOG_I("cdrom: read empty fifo (%d)\n", cdr.FifoSize); CDR_LOG_IO("cdr r2.dat: %02x\n", ret); return ret; @@ -1438,21 +1440,27 @@ void cdrWrite3(unsigned char rt) { return; } - if ((rt & 0x80) && cdr.Readed == 0) { - cdr.Readed = 1; - + // test: Viewpoint + if ((rt & 0x80) && cdr.FifoOffset < cdr.FifoSize) { + CDR_LOG("cdrom: FifoOffset(2) %d/%d\n", cdr.FifoOffset, cdr.FifoSize); + } + else if (rt & 0x80) { switch (cdr.Mode & 0x30) { case MODE_SIZE_2328: case 0x00: cdr.FifoOffset = 12; + cdr.FifoSize = 2048 + 12; break; case MODE_SIZE_2340: default: cdr.FifoOffset = 0; + cdr.FifoSize = 2340; break; } } + else if (!(rt & 0xc0)) + cdr.FifoOffset = DATA_SIZE; // fifo empty } void psxDma3(u32 madr, u32 bcr, u32 chcr) { @@ -1464,10 +1472,6 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { switch (chcr & 0x71000000) { case 0x11000000: - if (cdr.Readed == 0) { - CDR_LOG_I("psxDma3() Log: *** DMA 3 *** NOT READY\n"); - break; - } ptr = (u8 *)PSXM(madr); if (ptr == NULL) { CDR_LOG_I("psxDma3() Log: *** DMA 3 *** NULL Pointer!\n"); @@ -1575,6 +1579,7 @@ int cdrFreeze(void *f, int Mode) { getCdInfo(); cdr.FifoOffset = tmp; + cdr.FifoSize = (cdr.Mode & 0x20) ? 2340 : 2048 + 12; // read right sub data tmpp[0] = btoi(cdr.Prev[0]); From 6c9db47c7c54b925e00a96b17faa05e17d6af262 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 8 Sep 2022 20:09:04 +0300 Subject: [PATCH 202/597] easier logging of unhandled stuff --- libpcsxcore/cdrom.c | 6 ++++-- libpcsxcore/mdec.c | 2 +- libpcsxcore/psxdma.c | 12 +++--------- libpcsxcore/psxhw.c | 7 +++++++ libpcsxcore/system.h | 4 ++++ plugins/dfsound/externals.h | 3 +++ plugins/dfsound/registers.c | 17 +++++++++++++++++ 7 files changed, 39 insertions(+), 12 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index a3ddb4473..3fafc1985 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -35,7 +35,7 @@ #if 0 #define CDR_LOG_I SysPrintf #else -#define CDR_LOG_I(...) +#define CDR_LOG_I log_unhandled #endif #if 0 #define CDR_LOG_IO SysPrintf @@ -1495,6 +1495,8 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { cdr.FifoOffset += size; psxCpu->Clear(madr, size / 4); } + if (size < cdsize) + CDR_LOG_I("cdrom: dma3 %d/%d\n", size, cdsize); CDRDMA_INT((cdsize/4) * 24); @@ -1510,7 +1512,7 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { return; default: - CDR_LOG("psxDma3() Log: Unknown cddma %x\n", chcr); + CDR_LOG_I("psxDma3() Log: Unknown cddma %x\n", chcr); break; } diff --git a/libpcsxcore/mdec.c b/libpcsxcore/mdec.c index 4b93ffa57..ca4245856 100644 --- a/libpcsxcore/mdec.c +++ b/libpcsxcore/mdec.c @@ -520,7 +520,7 @@ void psxDma0(u32 adr, u32 bcr, u32 chcr) { break; default: - // printf("mdec unknown command\n"); + log_unhandled("mdec: unknown command %08x\n", cmd); break; } diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index e6f68fc2a..70e12f6e1 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -76,11 +76,9 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU SPUDMA_INT(words * 4); return; -#ifdef PSXDMA_LOG default: - PSXDMA_LOG("*** DMA4 SPU - unknown *** %x addr = %x size = %x\n", chcr, madr, bcr); + log_unhandled("*** DMA4 SPU - unknown *** %x addr = %x size = %x\n", chcr, madr, bcr); break; -#endif } HW_DMA4_CHCR &= SWAP32(~0x01000000); @@ -200,11 +198,9 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU GPUDMA_INT(size); return; -#ifdef PSXDMA_LOG default: - PSXDMA_LOG("*** DMA 2 - GPU unknown *** %lx addr = %lx size = %lx\n", chcr, madr, bcr); + log_unhandled("*** DMA 2 - GPU unknown *** %x addr = %x size = %x\n", chcr, madr, bcr); break; -#endif } HW_DMA2_CHCR &= SWAP32(~0x01000000); @@ -253,12 +249,10 @@ void psxDma6(u32 madr, u32 bcr, u32 chcr) { GPUOTCDMA_INT(16); return; } -#ifdef PSXDMA_LOG else { // Unknown option - PSXDMA_LOG("*** DMA6 OT - unknown *** %x addr = %x size = %x\n", chcr, madr, bcr); + log_unhandled("*** DMA6 OT - unknown *** %x addr = %x size = %x\n", chcr, madr, bcr); } -#endif HW_DMA6_CHCR &= SWAP32(~0x01000000); DMA_INTERRUPT(6); diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index 7b2401b18..b7540dfcf 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -190,6 +190,10 @@ u16 psxHwRead16(u32 add) { //case 0x1f802030: hard = //int_2000???? //case 0x1f802040: hard =//dip switches...?? + case 0x1f801800: + case 0x1f801802: + log_unhandled("cdrom r16 %x\n", add); + // falthrough default: if (add >= 0x1f801c00 && add < 0x1f801e00) { hard = SPU_readRegister(add); @@ -348,6 +352,9 @@ u32 psxHwRead32(u32 add) { #endif return hard; + case 0x1f801800: + log_unhandled("cdrom r32 %x\n", add); + // falthrough default: hard = psxHu32(add); #ifdef PSXHW_LOG diff --git a/libpcsxcore/system.h b/libpcsxcore/system.h index c869fdf1d..c380aa473 100644 --- a/libpcsxcore/system.h +++ b/libpcsxcore/system.h @@ -36,6 +36,10 @@ void SysUpdate(); // Called on VBlank (to update i.e. pads) void SysRunGui(); // Returns to the Gui void SysClose(); // Close mem and plugins +// log if the game does something we don't handle (well) +//#define log_unhandled printf +#define log_unhandled(...) + #ifdef __cplusplus } #endif diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index 2bc2fc0e7..1cfef6614 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -21,6 +21,9 @@ // generic defines ///////////////////////////////////////////////////////// +//#define log_unhandled printf +#define log_unhandled(...) + #ifdef __GNUC__ #define noinline __attribute__((noinline)) #define unlikely(x) __builtin_expect((x), 0) diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index 6b62247f8..e75f70861 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -161,6 +161,17 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, break; //-------------------------------------------------// + case H_SPUmvolL: + case H_SPUmvolR: + if (val & 0x8000) + log_unhandled("w master sweep: %08lx %04x\n", reg, val); + break; + + case 0x0dac: + if (val != 4) + log_unhandled("1f801dac %04x\n", val); + break; + /* case H_ExtLeft: //auxprintf("EL %d\n",val); @@ -335,6 +346,10 @@ unsigned short CALLBACK SPUreadRegister(unsigned long reg) //case H_SPUIsOn2: // return IsSoundOn(16,24); + case H_SPUMute1: + case H_SPUMute2: + log_unhandled("r isOn: %08lx\n", reg); + break; } return spu.regArea[(r-0xc00)>>1]; @@ -431,6 +446,7 @@ static void SetVolumeL(unsigned char ch,short vol) // LEFT VOLUME if(vol&0x8000) // sweep? { short sInc=1; // -> sweep up? + log_unhandled("ch%d sweepl %04x\n", ch, vol); if(vol&0x2000) sInc=-1; // -> or down? if(vol&0x1000) vol^=0xffff; // -> mmm... phase inverted? have to investigate this vol=((vol&0x7f)+1)/2; // -> sweep: 0..127 -> 0..64 @@ -457,6 +473,7 @@ static void SetVolumeR(unsigned char ch,short vol) // RIGHT VOLUME if(vol&0x8000) // comments... see above :) { short sInc=1; + log_unhandled("ch%d sweepr %04x\n", ch, vol); if(vol&0x2000) sInc=-1; if(vol&0x1000) vol^=0xffff; vol=((vol&0x7f)+1)/2; From 4cc373dd225bc4c0db251b3689571e97a1e4ec5a Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 9 Sep 2022 22:11:30 +0300 Subject: [PATCH 203/597] psxinterpreter: reduce the use of globals they induce penalties with -fPIC --- libpcsxcore/gte.c | 60 +-- libpcsxcore/gte.h | 11 +- libpcsxcore/new_dynarec/emu_if.c | 11 +- libpcsxcore/new_dynarec/new_dynarec.c | 2 +- libpcsxcore/psxinterpreter.c | 614 ++++++++++++++------------ libpcsxcore/psxinterpreter.h | 10 +- libpcsxcore/r3000a.c | 5 +- libpcsxcore/r3000a.h | 73 --- 8 files changed, 364 insertions(+), 422 deletions(-) diff --git a/libpcsxcore/gte.c b/libpcsxcore/gte.c index 6b3b299fd..03261ee22 100644 --- a/libpcsxcore/gte.c +++ b/libpcsxcore/gte.c @@ -301,8 +301,7 @@ void gteCheckStall(u32 op) { gteCheckStallRaw(gte_cycletab[op], &psxRegs); } -static inline u32 MFC2(int reg) { - psxCP2Regs *regs = &psxRegs.CP2; +u32 MFC2(struct psxCP2Regs *regs, int reg) { switch (reg) { case 1: case 3: @@ -311,7 +310,7 @@ static inline u32 MFC2(int reg) { case 9: case 10: case 11: - psxRegs.CP2D.r[reg] = (s32)psxRegs.CP2D.p[reg].sw.l; + regs->CP2D.r[reg] = (s32)regs->CP2D.p[reg].sw.l; break; case 7: @@ -319,25 +318,24 @@ static inline u32 MFC2(int reg) { case 17: case 18: case 19: - psxRegs.CP2D.r[reg] = (u32)psxRegs.CP2D.p[reg].w.l; + regs->CP2D.r[reg] = (u32)regs->CP2D.p[reg].w.l; break; case 15: - psxRegs.CP2D.r[reg] = gteSXY2; + regs->CP2D.r[reg] = gteSXY2; break; case 28: case 29: - psxRegs.CP2D.r[reg] = LIM(gteIR1 >> 7, 0x1f, 0, 0) | + regs->CP2D.r[reg] = LIM(gteIR1 >> 7, 0x1f, 0, 0) | (LIM(gteIR2 >> 7, 0x1f, 0, 0) << 5) | (LIM(gteIR3 >> 7, 0x1f, 0, 0) << 10); break; } - return psxRegs.CP2D.r[reg]; + return regs->CP2D.r[reg]; } -static inline void MTC2(u32 value, int reg) { - psxCP2Regs *regs = &psxRegs.CP2; +void MTC2(struct psxCP2Regs *regs, u32 value, int reg) { switch (reg) { case 15: gteSXY0 = gteSXY1; @@ -379,11 +377,11 @@ static inline void MTC2(u32 value, int reg) { return; default: - psxRegs.CP2D.r[reg] = value; + regs->CP2D.r[reg] = value; } } -static inline void CTC2(u32 value, int reg) { +void CTC2(struct psxCP2Regs *regs, u32 value, int reg) { switch (reg) { case 4: case 12: @@ -401,45 +399,7 @@ static inline void CTC2(u32 value, int reg) { break; } - psxRegs.CP2C.r[reg] = value; -} - -void gteMFC2() { - if (!_Rt_) return; - psxRegs.GPR.r[_Rt_] = MFC2(_Rd_); -} - -void gteCFC2() { - if (!_Rt_) return; - psxRegs.GPR.r[_Rt_] = psxRegs.CP2C.r[_Rd_]; -} - -void gteMTC2() { - MTC2(psxRegs.GPR.r[_Rt_], _Rd_); -} - -void gteCTC2() { - CTC2(psxRegs.GPR.r[_Rt_], _Rd_); -} - -#define _oB_ (psxRegs.GPR.r[_Rs_] + _Imm_) - -void gteLWC2() { - MTC2(psxMemRead32(_oB_), _Rt_); -} - -void gteSWC2() { - psxMemWrite32(_oB_, MFC2(_Rt_)); -} - -void gteLWC2_stall() { - gteCheckStall(0); - gteLWC2(); -} - -void gteSWC2_stall() { - gteCheckStall(0); - gteSWC2(); + regs->CP2C.r[reg] = value; } #endif // FLAGLESS diff --git a/libpcsxcore/gte.h b/libpcsxcore/gte.h index 75e9e5b37..f1dcc66a6 100644 --- a/libpcsxcore/gte.h +++ b/libpcsxcore/gte.h @@ -72,14 +72,9 @@ extern const unsigned char gte_cycletab[64]; int gteCheckStallRaw(u32 op_cycles, psxRegisters *regs); void gteCheckStall(u32 op); -void gteMFC2(); -void gteCFC2(); -void gteMTC2(); -void gteCTC2(); -void gteLWC2(); -void gteSWC2(); -void gteLWC2_stall(); -void gteSWC2_stall(); +u32 MFC2(struct psxCP2Regs *regs, int reg); +void MTC2(struct psxCP2Regs *regs, u32 value, int reg); +void CTC2(struct psxCP2Regs *regs, u32 value, int reg); void gteRTPS(struct psxCP2Regs *regs); void gteOP(struct psxCP2Regs *regs); diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index bf64e0f0d..aa093564d 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -105,13 +105,10 @@ void gen_interupt() next_interupt, next_interupt - psxRegs.cycle); } -// from interpreter -extern void MTC0(int reg, u32 val); - void pcsx_mtc0(u32 reg, u32 val) { evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); - MTC0(reg, val); + MTC0(&psxRegs, reg, val); gen_interupt(); if (Cause & Status & 0x0300) // possible sw irq pending_exception = 1; @@ -120,7 +117,7 @@ void pcsx_mtc0(u32 reg, u32 val) void pcsx_mtc0_ds(u32 reg, u32 val) { evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); - MTC0(reg, val); + MTC0(&psxRegs, reg, val); } void new_dyna_before_save(void) @@ -299,15 +296,13 @@ const uint64_t gte_reg_writes[64] = { static int ari64_init() { static u32 scratch_buf[8*8*2] __attribute__((aligned(64))); - extern void (*psxCP2[64])(); - extern void psxNULL(); size_t i; new_dynarec_init(); new_dyna_pcsx_mem_init(); for (i = 0; i < ARRAY_SIZE(gte_handlers); i++) - if (psxCP2[i] != psxNULL) + if (psxCP2[i] != gteNULL) gte_handlers[i] = psxCP2[i]; #if defined(__arm__) && !defined(DRC_DBG) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 1c7085661..2b57e59da 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -4152,7 +4152,7 @@ static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_) static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_) { - void *hlefunc = psxNULL; + void *hlefunc = gteNULL; uint32_t hleCode = source[i] & 0x03ffffff; if (hleCode < ARRAY_SIZE(psxHLEt)) hlefunc = psxHLEt[hleCode]; diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index e7e32690b..ea20cab97 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -26,6 +26,7 @@ #include "gte.h" #include "psxhle.h" #include "psxinterpreter.h" +#include #include //#include "debug.h" #define ProcessDebug() @@ -42,18 +43,26 @@ static u32 branchPC; #define debugI() #endif +#ifdef __i386__ +#define INT_ATTR __attribute__((regparm(2))) +#else +#define INT_ATTR +#endif +#ifndef INVALID_PTR +#define INVALID_PTR NULL +#endif + // Subsets -void (*psxBSC[64])(); -void (*psxSPC[64])(); -void (*psxREG[32])(); -void (*psxCP0[32])(); -void (*psxCP2[64])(struct psxCP2Regs *regs); -void (*psxCP2BSC[32])(); - -static u32 fetchNoCache(u32 pc) +static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code); +static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code); + +static u32 INT_ATTR fetchNoCache(u8 **memRLUT, u32 pc) { - u32 *code = (u32 *)PSXM(pc); - return ((code == NULL) ? 0 : SWAP32(*code)); + u8 *base = memRLUT[pc >> 16]; + if (base == INVALID_PTR) + return 0; + u32 *code = (u32 *)(base + (pc & 0xfffc)); + return SWAP32(*code); } /* @@ -65,7 +74,7 @@ static struct cache_entry { u32 data[4]; } ICache[256]; -static u32 fetchICache(u32 pc) +static u32 INT_ATTR fetchICache(u8 **memRLUT, u32 pc) { // cached? if (pc < 0xa0000000) @@ -75,9 +84,11 @@ static u32 fetchICache(u32 pc) if (((entry->tag ^ pc) & 0xfffffff0) != 0 || pc < entry->tag) { - u32 *code = (u32 *)PSXM(pc & ~0x0f); - if (!code) + const u8 *base = memRLUT[pc >> 16]; + const u32 *code; + if (base == INVALID_PTR) return 0; + code = (u32 *)(base + (pc & 0xfff0)); entry->tag = pc; // treat as 4 words, although other configurations are said to be possible @@ -92,10 +103,10 @@ static u32 fetchICache(u32 pc) return entry->data[(pc & 0x0f) >> 2]; } - return fetchNoCache(pc); + return fetchNoCache(memRLUT, pc); } -u32 (*fetch)(u32 pc) = fetchNoCache; +static u32 (INT_ATTR *fetch)(u8 **memRLUT, u32 pc) = fetchNoCache; static void delayRead(int reg, u32 bpc) { u32 rold, rnew; @@ -103,7 +114,7 @@ static void delayRead(int reg, u32 bpc) { // SysPrintf("delayRead at %x!\n", psxRegs.pc); rold = psxRegs.GPR.r[reg]; - psxBSC[psxRegs.code >> 26](); // branch delay load + psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); // branch delay load rnew = psxRegs.GPR.r[reg]; psxRegs.pc = bpc; @@ -126,7 +137,7 @@ static void delayWrite(int reg, u32 bpc) { // no changes from normal behavior - psxBSC[psxRegs.code >> 26](); + psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); branch = 0; psxRegs.pc = bpc; @@ -146,6 +157,50 @@ static void delayReadWrite(int reg, u32 bpc) { psxBranchTest(); } +/**** R3000A Instruction Macros ****/ +#define _PC_ regs_->pc // The next PC to be executed + +#define _fOp_(code) ((code >> 26) ) // The opcode part of the instruction register +#define _fFunct_(code) ((code ) & 0x3F) // The funct part of the instruction register +#define _fRd_(code) ((code >> 11) & 0x1F) // The rd part of the instruction register +#define _fRt_(code) ((code >> 16) & 0x1F) // The rt part of the instruction register +#define _fRs_(code) ((code >> 21) & 0x1F) // The rs part of the instruction register +#define _fSa_(code) ((code >> 6) & 0x1F) // The sa part of the instruction register +#define _fIm_(code) ((u16)code) // The immediate part of the instruction register +#define _fTarget_(code) (code & 0x03ffffff) // The target part of the instruction register + +#define _fImm_(code) ((s16)code) // sign-extended immediate +#define _fImmU_(code) (code&0xffff) // zero-extended immediate + +#define _Op_ _fOp_(code) +#define _Funct_ _fFunct_(code) +#define _Rd_ _fRd_(code) +#define _Rt_ _fRt_(code) +#define _Rs_ _fRs_(code) +#define _Sa_ _fSa_(code) +#define _Im_ _fIm_(code) +#define _Target_ _fTarget_(code) + +#define _Imm_ _fImm_(code) +#define _ImmU_ _fImmU_(code) + +#define _rRs_ regs_->GPR.r[_Rs_] // Rs register +#define _rRt_ regs_->GPR.r[_Rt_] // Rt register +#define _rRd_ regs_->GPR.r[_Rd_] // Rd register +#define _rSa_ regs_->GPR.r[_Sa_] // Sa register +#define _rFs_ regs_->CP0.r[_Rd_] // Fs register + +#define _rHi_ regs_->GPR.n.hi // The HI register +#define _rLo_ regs_->GPR.n.lo // The LO register + +#define _JumpTarget_ ((_Target_ * 4) + (_PC_ & 0xf0000000)) // Calculates the target during a jump instruction +#define _BranchTarget_ ((s16)_Im_ * 4 + _PC_) // Calculates the target during a branch instruction + +#define _SetLink(x) regs_->GPR.r[x] = _PC_ + 4; // Sets the return address in the link register + +#define OP(name) \ + static inline INT_ATTR void name(psxRegisters *regs_, u32 code) + // this defines shall be used with the tmp // of the next func (instead of _Funct_...) #define _tFunct_ ((tmp ) & 0x3F) // The funct part of the instruction register @@ -154,7 +209,10 @@ static void delayReadWrite(int reg, u32 bpc) { #define _tRs_ ((tmp >> 21) & 0x1F) // The rs part of the instruction register #define _tSa_ ((tmp >> 6) & 0x1F) // The sa part of the instruction register -int psxTestLoadDelay(int reg, u32 tmp) { +#define _i32(x) (s32)(x) +#define _u32(x) (u32)(x) + +static int psxTestLoadDelay(int reg, u32 tmp) { if (tmp == 0) return 0; // NOP switch (tmp >> 26) { case 0x00: // SPECIAL @@ -310,8 +368,8 @@ int psxTestLoadDelay(int reg, u32 tmp) { return 0; } -void psxDelayTest(int reg, u32 bpc) { - u32 tmp = fetch(bpc); +static void psxDelayTest(int reg, u32 bpc) { + u32 tmp = fetch(psxMemRLUT, bpc); branch = 1; switch (psxTestLoadDelay(reg, tmp)) { @@ -322,7 +380,7 @@ void psxDelayTest(int reg, u32 bpc) { case 3: delayWrite(reg, bpc); return; } - psxBSC[psxRegs.code >> 26](); + psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); branch = 0; psxRegs.pc = bpc; @@ -330,10 +388,10 @@ void psxDelayTest(int reg, u32 bpc) { psxBranchTest(); } -static u32 psxBranchNoDelay(void) { - u32 temp; +static u32 psxBranchNoDelay(psxRegisters *regs_) { + u32 temp, code; - psxRegs.code = fetch(psxRegs.pc); + regs_->code = code = fetch(psxMemRLUT, regs_->pc); switch (_Op_) { case 0x00: // SPECIAL switch (_Funct_) { @@ -408,7 +466,7 @@ static int psxDelayBranchExec(u32 tar) { static int psxDelayBranchTest(u32 tar1) { u32 tar2, tmp1, tmp2; - tar2 = psxBranchNoDelay(); + tar2 = psxBranchNoDelay(&psxRegs); if (tar2 == (u32)-1) return 0; @@ -421,7 +479,7 @@ static int psxDelayBranchTest(u32 tar1) { * has no normal delay slot, instruction at tar1 was fetched instead) */ psxRegs.pc = tar1; - tmp1 = psxBranchNoDelay(); + tmp1 = psxBranchNoDelay(&psxRegs); if (tmp1 == (u32)-1) { return psxDelayBranchExec(tar2); } @@ -434,7 +492,7 @@ static int psxDelayBranchTest(u32 tar1) { * - jump to target of that branch (tmp1) */ psxRegs.pc = tar2; - tmp2 = psxBranchNoDelay(); + tmp2 = psxBranchNoDelay(&psxRegs); if (tmp2 == (u32)-1) { return psxDelayBranchExec(tmp1); } @@ -451,7 +509,7 @@ static int psxDelayBranchTest(u32 tar1) { } static void doBranch(u32 tar) { - u32 tmp; + u32 tmp, code; branch2 = branch = 1; branchPC = tar; @@ -460,7 +518,7 @@ static void doBranch(u32 tar) { if (psxDelayBranchTest(tar)) return; - psxRegs.code = fetch(psxRegs.pc); + psxRegs.code = code = fetch(psxMemRLUT, psxRegs.pc); debugI(); @@ -501,7 +559,7 @@ static void doBranch(u32 tar) { break; } - psxBSC[psxRegs.code >> 26](); + psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); branch = 0; psxRegs.pc = branchPC; @@ -513,107 +571,104 @@ static void doBranch(u32 tar) { * Arithmetic with immediate operand * * Format: OP rt, rs, immediate * *********************************************************/ -void psxADDI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im (Exception on Integer Overflow) -void psxADDIU() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im -void psxANDI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) & _ImmU_; } // Rt = Rs And Im -void psxORI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) | _ImmU_; } // Rt = Rs Or Im -void psxXORI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) ^ _ImmU_; } // Rt = Rs Xor Im -void psxSLTI() { if (!_Rt_) return; _rRt_ = _i32(_rRs_) < _Imm_ ; } // Rt = Rs < Im (Signed) -void psxSLTIU() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) < ((u32)_Imm_); } // Rt = Rs < Im (Unsigned) +OP(psxADDI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im (Exception on Integer Overflow) +OP(psxADDIU) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im +OP(psxANDI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) & _ImmU_; } // Rt = Rs And Im +OP(psxORI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) | _ImmU_; } // Rt = Rs Or Im +OP(psxXORI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) ^ _ImmU_; } // Rt = Rs Xor Im +OP(psxSLTI) { if (!_Rt_) return; _rRt_ = _i32(_rRs_) < _Imm_ ; } // Rt = Rs < Im (Signed) +OP(psxSLTIU) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) < ((u32)_Imm_); } // Rt = Rs < Im (Unsigned) /********************************************************* * Register arithmetic * * Format: OP rd, rs, rt * *********************************************************/ -void psxADD() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); } // Rd = Rs + Rt (Exception on Integer Overflow) -void psxADDU() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); } // Rd = Rs + Rt -void psxSUB() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); } // Rd = Rs - Rt (Exception on Integer Overflow) -void psxSUBU() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); } // Rd = Rs - Rt -void psxAND() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) & _u32(_rRt_); } // Rd = Rs And Rt -void psxOR() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) | _u32(_rRt_); } // Rd = Rs Or Rt -void psxXOR() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) ^ _u32(_rRt_); } // Rd = Rs Xor Rt -void psxNOR() { if (!_Rd_) return; _rRd_ =~(_u32(_rRs_) | _u32(_rRt_)); }// Rd = Rs Nor Rt -void psxSLT() { if (!_Rd_) return; _rRd_ = _i32(_rRs_) < _i32(_rRt_); } // Rd = Rs < Rt (Signed) -void psxSLTU() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) < _u32(_rRt_); } // Rd = Rs < Rt (Unsigned) +OP(psxADD) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); } // Rd = Rs + Rt (Exception on Integer Overflow) +OP(psxADDU) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); } // Rd = Rs + Rt +OP(psxSUB) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); } // Rd = Rs - Rt (Exception on Integer Overflow) +OP(psxSUBU) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); } // Rd = Rs - Rt +OP(psxAND) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) & _u32(_rRt_); } // Rd = Rs And Rt +OP(psxOR) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) | _u32(_rRt_); } // Rd = Rs Or Rt +OP(psxXOR) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) ^ _u32(_rRt_); } // Rd = Rs Xor Rt +OP(psxNOR) { if (!_Rd_) return; _rRd_ =~(_u32(_rRs_) | _u32(_rRt_)); }// Rd = Rs Nor Rt +OP(psxSLT) { if (!_Rd_) return; _rRd_ = _i32(_rRs_) < _i32(_rRt_); } // Rd = Rs < Rt (Signed) +OP(psxSLTU) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) < _u32(_rRt_); } // Rd = Rs < Rt (Unsigned) /********************************************************* * Register mult/div & Register trap logic * * Format: OP rs, rt * *********************************************************/ -void psxDIV() { - if (!_i32(_rRt_)) { - _i32(_rHi_) = _i32(_rRs_); - if (_i32(_rRs_) & 0x80000000) { - _i32(_rLo_) = 1; - } else { - _i32(_rLo_) = 0xFFFFFFFF; - } -/* - * Notaz said that this was "not needed" for ARM platforms and could slow it down so let's disable for ARM. - * This fixes a crash issue that can happen when running Amidog's CPU test. - * (It still stays stuck to a black screen but at least it doesn't crash anymore) - */ +OP(psxDIV) { + if (!_rRt_) { + _rHi_ = _rRs_; + if (_rRs_ & 0x80000000) { + _rLo_ = 1; + } else { + _rLo_ = 0xFFFFFFFF; + } + } #if !defined(__arm__) && !defined(__aarch64__) - } else if (_i32(_rRs_) == 0x80000000 && _i32(_rRt_) == 0xFFFFFFFF) { - _i32(_rLo_) = 0x80000000; - _i32(_rHi_) = 0; + else if (_rRs_ == 0x80000000 && _rRt_ == 0xFFFFFFFF) { + _rLo_ = 0x80000000; + _rHi_ = 0; + } #endif - } else { - _i32(_rLo_) = _i32(_rRs_) / _i32(_rRt_); - _i32(_rHi_) = _i32(_rRs_) % _i32(_rRt_); - } + else { + _rLo_ = _i32(_rRs_) / _i32(_rRt_); + _rHi_ = _i32(_rRs_) % _i32(_rRt_); + } } -void psxDIV_stall() { - psxRegs.muldivBusyCycle = psxRegs.cycle + 37; - psxDIV(); +OP(psxDIV_stall) { + regs_->muldivBusyCycle = regs_->cycle + 37; + psxDIV(regs_, code); } -void psxDIVU() { +OP(psxDIVU) { if (_rRt_ != 0) { _rLo_ = _rRs_ / _rRt_; _rHi_ = _rRs_ % _rRt_; } else { - _i32(_rLo_) = 0xffffffff; - _i32(_rHi_) = _i32(_rRs_); + _rLo_ = 0xffffffff; + _rHi_ = _rRs_; } } -void psxDIVU_stall() { - psxRegs.muldivBusyCycle = psxRegs.cycle + 37; - psxDIVU(); +OP(psxDIVU_stall) { + regs_->muldivBusyCycle = regs_->cycle + 37; + psxDIVU(regs_, code); } -void psxMULT() { - u64 res = (s64)((s64)_i32(_rRs_) * (s64)_i32(_rRt_)); +OP(psxMULT) { + u64 res = (s64)_i32(_rRs_) * _i32(_rRt_); - psxRegs.GPR.n.lo = (u32)(res & 0xffffffff); - psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff); + regs_->GPR.n.lo = (u32)res; + regs_->GPR.n.hi = (u32)(res >> 32); } -void psxMULT_stall() { +OP(psxMULT_stall) { // approximate, but maybe good enough u32 rs = _rRs_; u32 lz = __builtin_clz(((rs ^ ((s32)rs >> 21)) | 1)); u32 c = 7 + (2 - (lz / 11)) * 4; - psxRegs.muldivBusyCycle = psxRegs.cycle + c; - psxMULT(); + regs_->muldivBusyCycle = regs_->cycle + c; + psxMULT(regs_, code); } -void psxMULTU() { - u64 res = (u64)((u64)_u32(_rRs_) * (u64)_u32(_rRt_)); +OP(psxMULTU) { + u64 res = (u64)_u32(_rRs_) * _u32(_rRt_); - psxRegs.GPR.n.lo = (u32)(res & 0xffffffff); - psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff); + regs_->GPR.n.lo = (u32)(res & 0xffffffff); + regs_->GPR.n.hi = (u32)((res >> 32) & 0xffffffff); } -void psxMULTU_stall() { +OP(psxMULTU_stall) { // approximate, but maybe good enough u32 lz = __builtin_clz(_rRs_ | 1); u32 c = 7 + (2 - (lz / 11)) * 4; - psxRegs.muldivBusyCycle = psxRegs.cycle + c; - psxMULTU(); + regs_->muldivBusyCycle = regs_->cycle + c; + psxMULTU(regs_, code); } /********************************************************* @@ -623,80 +678,88 @@ void psxMULTU_stall() { #define RepZBranchi32(op) if(_i32(_rRs_) op 0) doBranch(_BranchTarget_); #define RepZBranchLinki32(op) { _SetLink(31); if(_i32(_rRs_) op 0) { doBranch(_BranchTarget_); } } -void psxBGEZ() { RepZBranchi32(>=) } // Branch if Rs >= 0 -void psxBGEZAL() { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link -void psxBGTZ() { RepZBranchi32(>) } // Branch if Rs > 0 -void psxBLEZ() { RepZBranchi32(<=) } // Branch if Rs <= 0 -void psxBLTZ() { RepZBranchi32(<) } // Branch if Rs < 0 -void psxBLTZAL() { RepZBranchLinki32(<) } // Branch if Rs < 0 and link +OP(psxBGEZ) { RepZBranchi32(>=) } // Branch if Rs >= 0 +OP(psxBGEZAL) { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link +OP(psxBGTZ) { RepZBranchi32(>) } // Branch if Rs > 0 +OP(psxBLEZ) { RepZBranchi32(<=) } // Branch if Rs <= 0 +OP(psxBLTZ) { RepZBranchi32(<) } // Branch if Rs < 0 +OP(psxBLTZAL) { RepZBranchLinki32(<) } // Branch if Rs < 0 and link /********************************************************* * Shift arithmetic with constant shift * * Format: OP rd, rt, sa * *********************************************************/ -void psxSLL() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) << _Sa_; } // Rd = Rt << sa -void psxSRA() { if (!_Rd_) return; _i32(_rRd_) = _i32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (arithmetic) -void psxSRL() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (logical) +OP(psxSLL) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) << _Sa_; } // Rd = Rt << sa +OP(psxSRA) { if (!_Rd_) return; _rRd_ = _i32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (arithmetic) +OP(psxSRL) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (logical) /********************************************************* * Shift arithmetic with variant register shift * * Format: OP rd, rt, rs * *********************************************************/ -void psxSLLV() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) << (_u32(_rRs_) & 0x1F); } // Rd = Rt << rs -void psxSRAV() { if (!_Rd_) return; _i32(_rRd_) = _i32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (arithmetic) -void psxSRLV() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (logical) +OP(psxSLLV) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) << (_u32(_rRs_) & 0x1F); } // Rd = Rt << rs +OP(psxSRAV) { if (!_Rd_) return; _rRd_ = _i32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (arithmetic) +OP(psxSRLV) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (logical) /********************************************************* * Load higher 16 bits of the first word in GPR with imm * * Format: OP rt, immediate * *********************************************************/ -void psxLUI() { if (!_Rt_) return; _u32(_rRt_) = psxRegs.code << 16; } // Upper halfword of Rt = Im +OP(psxLUI) { if (!_Rt_) return; _rRt_ = code << 16; } // Upper halfword of Rt = Im /********************************************************* * Move from HI/LO to GPR * * Format: OP rd * *********************************************************/ -void psxMFHI() { if (!_Rd_) return; _rRd_ = _rHi_; } // Rd = Hi -void psxMFLO() { if (!_Rd_) return; _rRd_ = _rLo_; } // Rd = Lo +OP(psxMFHI) { if (!_Rd_) return; _rRd_ = _rHi_; } // Rd = Hi +OP(psxMFLO) { if (!_Rd_) return; _rRd_ = _rLo_; } // Rd = Lo -static void mflohiCheckStall(void) +static void mflohiCheckStall(psxRegisters *regs_) { - u32 left = psxRegs.muldivBusyCycle - psxRegs.cycle; + u32 left = regs_->muldivBusyCycle - regs_->cycle; if (left <= 37) { //printf("muldiv stall %u\n", left); - psxRegs.cycle = psxRegs.muldivBusyCycle; + regs_->cycle = regs_->muldivBusyCycle; } } -void psxMFHI_stall() { mflohiCheckStall(); psxMFHI(); } -void psxMFLO_stall() { mflohiCheckStall(); psxMFLO(); } +OP(psxMFHI_stall) { mflohiCheckStall(regs_); psxMFHI(regs_, code); } +OP(psxMFLO_stall) { mflohiCheckStall(regs_); psxMFLO(regs_, code); } /********************************************************* * Move to GPR to HI/LO & Register jump * * Format: OP rs * *********************************************************/ -void psxMTHI() { _rHi_ = _rRs_; } // Hi = Rs -void psxMTLO() { _rLo_ = _rRs_; } // Lo = Rs +OP(psxMTHI) { _rHi_ = _rRs_; } // Hi = Rs +OP(psxMTLO) { _rLo_ = _rRs_; } // Lo = Rs /********************************************************* * Special purpose instructions * * Format: OP * *********************************************************/ -void psxBREAK() { - psxRegs.pc -= 4; +OP(psxBREAK) { + regs_->pc -= 4; psxException(0x24, branch); } -void psxSYSCALL() { - psxRegs.pc -= 4; +OP(psxSYSCALL) { + regs_->pc -= 4; psxException(0x20, branch); } -void psxRFE() { +static inline void psxTestSWInts(psxRegisters *regs_) { + if (regs_->CP0.n.Cause & regs_->CP0.n.Status & 0x0300 && + regs_->CP0.n.Status & 0x1) { + regs_->CP0.n.Cause &= ~0x7c; + psxException(regs_->CP0.n.Cause, branch); + } +} + +OP(psxRFE) { // SysPrintf("psxRFE\n"); - psxRegs.CP0.n.Status = (psxRegs.CP0.n.Status & 0xfffffff0) | - ((psxRegs.CP0.n.Status & 0x3c) >> 2); - psxTestSWInts(); + regs_->CP0.n.Status = (regs_->CP0.n.Status & 0xfffffff0) | + ((regs_->CP0.n.Status & 0x3c) >> 2); + psxTestSWInts(regs_); } /********************************************************* @@ -705,26 +768,26 @@ void psxRFE() { *********************************************************/ #define RepBranchi32(op) if(_i32(_rRs_) op _i32(_rRt_)) doBranch(_BranchTarget_); -void psxBEQ() { RepBranchi32(==) } // Branch if Rs == Rt -void psxBNE() { RepBranchi32(!=) } // Branch if Rs != Rt +OP(psxBEQ) { RepBranchi32(==) } // Branch if Rs == Rt +OP(psxBNE) { RepBranchi32(!=) } // Branch if Rs != Rt /********************************************************* * Jump to target * * Format: OP target * *********************************************************/ -void psxJ() { doBranch(_JumpTarget_); } -void psxJAL() { _SetLink(31); doBranch(_JumpTarget_); } +OP(psxJ) { doBranch(_JumpTarget_); } +OP(psxJAL) { _SetLink(31); doBranch(_JumpTarget_); } /********************************************************* * Register jump * * Format: OP rs, rd * *********************************************************/ -void psxJR() { +OP(psxJR) { doBranch(_rRs_ & ~3); psxJumpTest(); } -void psxJALR() { +OP(psxJALR) { u32 temp = _u32(_rRs_); if (_Rd_) { _SetLink(_Rd_); } doBranch(temp & ~3); @@ -735,59 +798,23 @@ void psxJALR() { * Format: OP rt, offset(base) * *********************************************************/ -#define _oB_ (_u32(_rRs_) + _Imm_) - -void psxLB() { - if (_Rt_) { - _i32(_rRt_) = (signed char)psxMemRead8(_oB_); - } else { - psxMemRead8(_oB_); - } -} - -void psxLBU() { - if (_Rt_) { - _u32(_rRt_) = psxMemRead8(_oB_); - } else { - psxMemRead8(_oB_); - } -} - -void psxLH() { - if (_Rt_) { - _i32(_rRt_) = (short)psxMemRead16(_oB_); - } else { - psxMemRead16(_oB_); - } -} - -void psxLHU() { - if (_Rt_) { - _u32(_rRt_) = psxMemRead16(_oB_); - } else { - psxMemRead16(_oB_); - } -} +#define _oB_ (regs_->GPR.r[_Rs_] + _Imm_) -void psxLW() { - if (_Rt_) { - _u32(_rRt_) = psxMemRead32(_oB_); - } else { - psxMemRead32(_oB_); - } -} - -u32 LWL_MASK[4] = { 0xffffff, 0xffff, 0xff, 0 }; -u32 LWL_SHIFT[4] = { 24, 16, 8, 0 }; +OP(psxLB) { u32 v = (s8)psxMemRead8(_oB_); if (_Rt_) _rRt_ = v; } +OP(psxLBU) { u32 v = psxMemRead8(_oB_); if (_Rt_) _rRt_ = v; } +OP(psxLH) { u32 v = (s16)psxMemRead16(_oB_); if (_Rt_) _rRt_ = v; } +OP(psxLHU) { u32 v = psxMemRead16(_oB_); if (_Rt_) _rRt_ = v; } +OP(psxLW) { u32 v = psxMemRead32(_oB_); if (_Rt_) _rRt_ = v; } -void psxLWL() { +OP(psxLWL) { + static const u32 LWL_MASK[4] = { 0xffffff, 0xffff, 0xff, 0 }; + static const u32 LWL_SHIFT[4] = { 24, 16, 8, 0 }; u32 addr = _oB_; u32 shift = addr & 3; u32 mem = psxMemRead32(addr & ~3); if (!_Rt_) return; - _u32(_rRt_) = ( _u32(_rRt_) & LWL_MASK[shift]) | - ( mem << LWL_SHIFT[shift]); + _rRt_ = (_u32(_rRt_) & LWL_MASK[shift]) | (mem << LWL_SHIFT[shift]); /* Mem = 1234. Reg = abcd @@ -799,17 +826,15 @@ void psxLWL() { */ } -u32 LWR_MASK[4] = { 0, 0xff000000, 0xffff0000, 0xffffff00 }; -u32 LWR_SHIFT[4] = { 0, 8, 16, 24 }; - -void psxLWR() { +OP(psxLWR) { + static const u32 LWR_MASK[4] = { 0, 0xff000000, 0xffff0000, 0xffffff00 }; + static const u32 LWR_SHIFT[4] = { 0, 8, 16, 24 }; u32 addr = _oB_; u32 shift = addr & 3; u32 mem = psxMemRead32(addr & ~3); if (!_Rt_) return; - _u32(_rRt_) = ( _u32(_rRt_) & LWR_MASK[shift]) | - ( mem >> LWR_SHIFT[shift]); + _rRt_ = (_u32(_rRt_) & LWR_MASK[shift]) | (mem >> LWR_SHIFT[shift]); /* Mem = 1234. Reg = abcd @@ -821,14 +846,13 @@ void psxLWR() { */ } -void psxSB() { psxMemWrite8 (_oB_, _rRt_ & 0xff); } -void psxSH() { psxMemWrite16(_oB_, _rRt_ & 0xffff); } -void psxSW() { psxMemWrite32(_oB_, _rRt_); } - -u32 SWL_MASK[4] = { 0xffffff00, 0xffff0000, 0xff000000, 0 }; -u32 SWL_SHIFT[4] = { 24, 16, 8, 0 }; +OP(psxSB) { psxMemWrite8 (_oB_, _rRt_ & 0xff); } +OP(psxSH) { psxMemWrite16(_oB_, _rRt_ & 0xffff); } +OP(psxSW) { psxMemWrite32(_oB_, _rRt_); } -void psxSWL() { +OP(psxSWL) { + static const u32 SWL_MASK[4] = { 0xffffff00, 0xffff0000, 0xff000000, 0 }; + static const u32 SWL_SHIFT[4] = { 24, 16, 8, 0 }; u32 addr = _oB_; u32 shift = addr & 3; u32 mem = psxMemRead32(addr & ~3); @@ -845,10 +869,9 @@ void psxSWL() { */ } -u32 SWR_MASK[4] = { 0, 0xff, 0xffff, 0xffffff }; -u32 SWR_SHIFT[4] = { 0, 8, 16, 24 }; - -void psxSWR() { +OP(psxSWR) { + static const u32 SWR_MASK[4] = { 0, 0xff, 0xffff, 0xffffff }; + static const u32 SWR_SHIFT[4] = { 0, 8, 16, 24 }; u32 addr = _oB_; u32 shift = addr & 3; u32 mem = psxMemRead32(addr & ~3); @@ -870,88 +893,139 @@ void psxSWR() { * Moves between GPR and COPx * * Format: OP rt, fs * *********************************************************/ -void psxMFC0() { if (!_Rt_) return; _i32(_rRt_) = (int)_rFs_; } -void psxCFC0() { if (!_Rt_) return; _i32(_rRt_) = (int)_rFs_; } - -void psxTestSWInts() { - if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.Status & 0x0300 && - psxRegs.CP0.n.Status & 0x1) { - psxRegs.CP0.n.Cause &= ~0x7c; - psxException(psxRegs.CP0.n.Cause, branch); - } -} +OP(psxMFC0) { if (!_Rt_) return; _rRt_ = _rFs_; } +OP(psxCFC0) { if (!_Rt_) return; _rRt_ = _rFs_; } -void MTC0(int reg, u32 val) { +void MTC0(psxRegisters *regs_, int reg, u32 val) { // SysPrintf("MTC0 %d: %x\n", reg, val); switch (reg) { case 12: // Status - psxRegs.CP0.r[12] = val; - psxTestSWInts(); + regs_->CP0.r[12] = val; + psxTestSWInts(regs_); break; case 13: // Cause - psxRegs.CP0.n.Cause &= ~0x0300; - psxRegs.CP0.n.Cause |= val & 0x0300; - psxTestSWInts(); + regs_->CP0.n.Cause &= ~0x0300; + regs_->CP0.n.Cause |= val & 0x0300; + psxTestSWInts(regs_); break; default: - psxRegs.CP0.r[reg] = val; + regs_->CP0.r[reg] = val; break; } } -void psxMTC0() { MTC0(_Rd_, _u32(_rRt_)); } -void psxCTC0() { MTC0(_Rd_, _u32(_rRt_)); } +OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); } +OP(psxCTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); } /********************************************************* * Unknow instruction (would generate an exception) * * Format: ? * *********************************************************/ -void psxNULL() { +static inline void psxNULL_(void) { #ifdef PSXCPU_LOG PSXCPU_LOG("psx: Unimplemented op %x\n", psxRegs.code); #endif } -void psxSPECIAL() { - psxSPC[_Funct_](); -} +OP(psxNULL) { psxNULL_(); } +void gteNULL(struct psxCP2Regs *regs) { psxNULL_(); } -void psxREGIMM() { - psxREG[_Rt_](); +OP(psxSPECIAL) { + psxSPC[_Funct_](regs_, code); } -void psxCOP0() { - psxCP0[_Rs_](); +OP(psxCOP0) { + switch (_Rs_) { + case 0x00: psxMFC0(regs_, code); break; + case 0x02: psxCFC0(regs_, code); break; + case 0x04: psxMTC0(regs_, code); break; + case 0x06: psxCTC0(regs_, code); break; + case 0x10: psxRFE(regs_, code); break; + default: psxNULL_(); break; + } } -void psxCOP2() { - psxCP2[_Funct_]((struct psxCP2Regs *)&psxRegs.CP2D); +OP(psxCOP2) { + psxCP2[_Funct_](®s_->CP2); } -void psxCOP2_stall() { +OP(psxCOP2_stall) { u32 f = _Funct_; gteCheckStall(f); - psxCP2[f]((struct psxCP2Regs *)&psxRegs.CP2D); + psxCP2[f](®s_->CP2); +} + +OP(gteMFC2) { + if (!_Rt_) return; + regs_->GPR.r[_Rt_] = MFC2(®s_->CP2, _Rd_); +} + +OP(gteCFC2) { + if (!_Rt_) return; + regs_->GPR.r[_Rt_] = regs_->CP2C.r[_Rd_]; +} + +OP(gteMTC2) { + MTC2(®s_->CP2, regs_->GPR.r[_Rt_], _Rd_); +} + +OP(gteCTC2) { + CTC2(®s_->CP2, regs_->GPR.r[_Rt_], _Rd_); +} + +OP(gteLWC2) { + MTC2(®s_->CP2, psxMemRead32(_oB_), _Rt_); +} + +OP(gteSWC2) { + psxMemWrite32(_oB_, MFC2(®s_->CP2, _Rt_)); +} + +OP(gteLWC2_stall) { + gteCheckStall(0); + gteLWC2(regs_, code); } -void psxBASIC(struct psxCP2Regs *regs) { - psxCP2BSC[_Rs_](); +OP(gteSWC2_stall) { + gteCheckStall(0); + gteSWC2(regs_, code); +} + +static void psxBASIC(struct psxCP2Regs *cp2regs) { + psxRegisters *regs_ = (void *)((char *)cp2regs - offsetof(psxRegisters, CP2)); + u32 code = regs_->code; + assert(regs_ == &psxRegs); + switch (_Rs_) { + case 0x00: gteMFC2(regs_, code); break; + case 0x02: gteCFC2(regs_, code); break; + case 0x04: gteMTC2(regs_, code); break; + case 0x06: gteCTC2(regs_, code); break; + default: psxNULL_(); break; + } +} + +OP(psxREGIMM) { + switch (_Rt_) { + case 0x00: psxBLTZ(regs_, code); break; + case 0x01: psxBGEZ(regs_, code); break; + case 0x10: psxBLTZAL(regs_, code); break; + case 0x11: psxBGEZAL(regs_, code); break; + default: psxNULL_(); break; + } } -void psxHLE() { -// psxHLEt[psxRegs.code & 0xffff](); -// psxHLEt[psxRegs.code & 0x07](); // HDHOSHY experimental patch - uint32_t hleCode = psxRegs.code & 0x03ffffff; +OP(psxHLE) { + uint32_t hleCode = code & 0x03ffffff; if (hleCode >= (sizeof(psxHLEt) / sizeof(psxHLEt[0]))) { - psxNULL(); + psxNULL_(); } else { psxHLEt[hleCode](); } } -void (*psxBSC[64])() = { +static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = { psxSPECIAL, psxREGIMM, psxJ , psxJAL , psxBEQ , psxBNE , psxBLEZ, psxBGTZ, psxADDI , psxADDIU , psxSLTI, psxSLTIU, psxANDI, psxORI , psxXORI, psxLUI , psxCOP0 , psxNULL , psxCOP2, psxNULL , psxNULL, psxNULL, psxNULL, psxNULL, @@ -962,8 +1036,7 @@ void (*psxBSC[64])() = { psxNULL , psxNULL , gteSWC2, psxHLE , psxNULL, psxNULL, psxNULL, psxNULL }; - -void (*psxSPC[64])() = { +static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code) = { psxSLL , psxNULL , psxSRL , psxSRA , psxSLLV , psxNULL , psxSRLV, psxSRAV, psxJR , psxJALR , psxNULL, psxNULL, psxSYSCALL, psxBREAK, psxNULL, psxNULL, psxMFHI, psxMTHI , psxMFLO, psxMTLO, psxNULL , psxNULL , psxNULL, psxNULL, @@ -974,39 +1047,17 @@ void (*psxSPC[64])() = { psxNULL, psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, psxNULL }; -void (*psxREG[32])() = { - psxBLTZ , psxBGEZ , psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, - psxNULL , psxNULL , psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, - psxBLTZAL, psxBGEZAL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, - psxNULL , psxNULL , psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL -}; - -void (*psxCP0[32])() = { - psxMFC0, psxNULL, psxCFC0, psxNULL, psxMTC0, psxNULL, psxCTC0, psxNULL, - psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, - psxRFE , psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, - psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL -}; - void (*psxCP2[64])(struct psxCP2Regs *regs) = { - psxBASIC, gteRTPS , psxNULL , psxNULL, psxNULL, psxNULL , gteNCLIP, psxNULL, // 00 - psxNULL , psxNULL , psxNULL , psxNULL, gteOP , psxNULL , psxNULL , psxNULL, // 08 - gteDPCS , gteINTPL, gteMVMVA, gteNCDS, gteCDP , psxNULL , gteNCDT , psxNULL, // 10 - psxNULL , psxNULL , psxNULL , gteNCCS, gteCC , psxNULL , gteNCS , psxNULL, // 18 - gteNCT , psxNULL , psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, // 20 - gteSQR , gteDCPL , gteDPCT , psxNULL, psxNULL, gteAVSZ3, gteAVSZ4, psxNULL, // 28 - gteRTPT , psxNULL , psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, // 30 - psxNULL , psxNULL , psxNULL , psxNULL, psxNULL, gteGPF , gteGPL , gteNCCT // 38 -}; - -void (*psxCP2BSC[32])() = { - gteMFC2, psxNULL, gteCFC2, psxNULL, gteMTC2, psxNULL, gteCTC2, psxNULL, - psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, - psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, - psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL + psxBASIC, gteRTPS , gteNULL , gteNULL, gteNULL, gteNULL , gteNCLIP, gteNULL, // 00 + gteNULL , gteNULL , gteNULL , gteNULL, gteOP , gteNULL , gteNULL , gteNULL, // 08 + gteDPCS , gteINTPL, gteMVMVA, gteNCDS, gteCDP , gteNULL , gteNCDT , gteNULL, // 10 + gteNULL , gteNULL , gteNULL , gteNCCS, gteCC , gteNULL , gteNCS , gteNULL, // 18 + gteNCT , gteNULL , gteNULL , gteNULL, gteNULL, gteNULL , gteNULL , gteNULL, // 20 + gteSQR , gteDCPL , gteDPCT , gteNULL, gteNULL, gteAVSZ3, gteAVSZ4, gteNULL, // 28 + gteRTPT , gteNULL , gteNULL , gteNULL, gteNULL, gteNULL , gteNULL , gteNULL, // 30 + gteNULL , gteNULL , gteNULL , gteNULL, gteNULL, gteGPF , gteGPL , gteNCCT // 38 }; - /////////////////////////////////////////// static int intInit() { @@ -1017,15 +1068,35 @@ static void intReset() { memset(&ICache, 0xff, sizeof(ICache)); } -void intExecute() { +static inline void execI_(u8 **memRLUT, psxRegisters *regs_) { + regs_->code = fetch(memRLUT, regs_->pc); + + debugI(); + + if (Config.Debug) ProcessDebug(); + + regs_->pc += 4; + regs_->cycle += BIAS; + + psxBSC[regs_->code >> 26](regs_, regs_->code); +} + +static void intExecute() { + psxRegisters *regs_ = &psxRegs; + u8 **memRLUT = psxMemRLUT; extern int stop; - for (;!stop;) - execI(); + + while (!stop) + execI_(memRLUT, regs_); } -void intExecuteBlock() { +static void intExecuteBlock() { + psxRegisters *regs_ = &psxRegs; + u8 **memRLUT = psxMemRLUT; + branch2 = 0; - while (!branch2) execI(); + while (!branch2) + execI_(memRLUT, regs_); } static void intClear(u32 Addr, u32 Size) { @@ -1083,18 +1154,9 @@ void intApplyConfig() { static void intShutdown() { } -// interpreter execution +// single step (may do several ops in case of a branch) void execI() { - psxRegs.code = fetch(psxRegs.pc); - - debugI(); - - if (Config.Debug) ProcessDebug(); - - psxRegs.pc += 4; - psxRegs.cycle += BIAS; - - psxBSC[psxRegs.code >> 26](); + execI_(psxMemRLUT, &psxRegs); } R3000Acpu psxInt = { diff --git a/libpcsxcore/psxinterpreter.h b/libpcsxcore/psxinterpreter.h index 89dd7ea16..87c7a324b 100644 --- a/libpcsxcore/psxinterpreter.h +++ b/libpcsxcore/psxinterpreter.h @@ -1,7 +1,11 @@ - -extern u32 (*fetch)(u32 pc); +#ifndef __PSXINTERPRETER_H__ +#define __PSXINTERPRETER_H__ // called by "new_dynarec" void execI(); -void psxNULL(); void intApplyConfig(); +void MTC0(psxRegisters *regs_, int reg, u32 val); +void gteNULL(struct psxCP2Regs *regs); +void (*psxCP2[64])(struct psxCP2Regs *regs); + +#endif // __PSXINTERPRETER_H__ diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index a9b800e39..ba22d45fb 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -83,15 +83,14 @@ void psxShutdown() { } void psxException(u32 code, u32 bd) { - psxRegs.code = fetch(psxRegs.pc); + psxRegs.code = PSXMu32(psxRegs.pc); if (!Config.HLE && ((((psxRegs.code) >> 24) & 0xfe) == 0x4a)) { // "hokuto no ken" / "Crash Bandicot 2" ... // BIOS does not allow to return to GTE instructions // (just skips it, supposedly because it's scheduled already) // so we execute it here - extern void (*psxCP2[64])(void *cp2regs); - psxCP2[psxRegs.code & 0x3f](&psxRegs.CP2D); + psxCP2[psxRegs.code & 0x3f](&psxRegs.CP2); } // Set the Cause diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 49afcb1cd..f99e03ba0 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -219,85 +219,12 @@ void new_dyna_freeze(void *f, int mode); #define new_dyna_set_event(e, c) \ new_dyna_set_event_abs(e, psxRegs.cycle + (c)) -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - -#define _i32(x) *(s32 *)&x -#define _u32(x) x - -#define _i16(x) (((short *)&x)[1]) -#define _u16(x) (((unsigned short *)&x)[1]) - -#define _i8(x) (((char *)&x)[3]) -#define _u8(x) (((unsigned char *)&x)[3]) - -#else - -#define _i32(x) *(s32 *)&x -#define _u32(x) x - -#define _i16(x) *(short *)&x -#define _u16(x) *(unsigned short *)&x - -#define _i8(x) *(char *)&x -#define _u8(x) *(unsigned char *)&x - -#endif - -/**** R3000A Instruction Macros ****/ -#define _PC_ psxRegs.pc // The next PC to be executed - -#define _fOp_(code) ((code >> 26) ) // The opcode part of the instruction register -#define _fFunct_(code) ((code ) & 0x3F) // The funct part of the instruction register -#define _fRd_(code) ((code >> 11) & 0x1F) // The rd part of the instruction register -#define _fRt_(code) ((code >> 16) & 0x1F) // The rt part of the instruction register -#define _fRs_(code) ((code >> 21) & 0x1F) // The rs part of the instruction register -#define _fSa_(code) ((code >> 6) & 0x1F) // The sa part of the instruction register -#define _fIm_(code) ((u16)code) // The immediate part of the instruction register -#define _fTarget_(code) (code & 0x03ffffff) // The target part of the instruction register - -#define _fImm_(code) ((s16)code) // sign-extended immediate -#define _fImmU_(code) (code&0xffff) // zero-extended immediate - -#define _Op_ _fOp_(psxRegs.code) -#define _Funct_ _fFunct_(psxRegs.code) -#define _Rd_ _fRd_(psxRegs.code) -#define _Rt_ _fRt_(psxRegs.code) -#define _Rs_ _fRs_(psxRegs.code) -#define _Sa_ _fSa_(psxRegs.code) -#define _Im_ _fIm_(psxRegs.code) -#define _Target_ _fTarget_(psxRegs.code) - -#define _Imm_ _fImm_(psxRegs.code) -#define _ImmU_ _fImmU_(psxRegs.code) - -#define _rRs_ psxRegs.GPR.r[_Rs_] // Rs register -#define _rRt_ psxRegs.GPR.r[_Rt_] // Rt register -#define _rRd_ psxRegs.GPR.r[_Rd_] // Rd register -#define _rSa_ psxRegs.GPR.r[_Sa_] // Sa register -#define _rFs_ psxRegs.CP0.r[_Rd_] // Fs register - -#define _c2dRs_ psxRegs.CP2D.r[_Rs_] // Rs cop2 data register -#define _c2dRt_ psxRegs.CP2D.r[_Rt_] // Rt cop2 data register -#define _c2dRd_ psxRegs.CP2D.r[_Rd_] // Rd cop2 data register -#define _c2dSa_ psxRegs.CP2D.r[_Sa_] // Sa cop2 data register - -#define _rHi_ psxRegs.GPR.n.hi // The HI register -#define _rLo_ psxRegs.GPR.n.lo // The LO register - -#define _JumpTarget_ ((_Target_ * 4) + (_PC_ & 0xf0000000)) // Calculates the target during a jump instruction -#define _BranchTarget_ ((s16)_Im_ * 4 + _PC_) // Calculates the target during a branch instruction - -#define _SetLink(x) psxRegs.GPR.r[x] = _PC_ + 4; // Sets the return address in the link register - int psxInit(); void psxReset(); void psxShutdown(); void psxException(u32 code, u32 bd); void psxBranchTest(); void psxExecuteBios(); -int psxTestLoadDelay(int reg, u32 tmp); -void psxDelayTest(int reg, u32 bpc); -void psxTestSWInts(); void psxJumpTest(); #ifdef __cplusplus From 3d08b9fe3d0e3608b1217cdc86cbb868e29a9afa Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 10 Sep 2022 02:39:24 +0300 Subject: [PATCH 204/597] gte: minor diff from libretro --- libpcsxcore/gte.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/gte.c b/libpcsxcore/gte.c index 03261ee22..991a4452c 100644 --- a/libpcsxcore/gte.c +++ b/libpcsxcore/gte.c @@ -154,8 +154,8 @@ // sign-extended by bug in original hardware, according to Nocash docs // GTE section 'Screen Offset and Distance'. The emulator does this // sign extension when it is loaded to GTE by CTC2. -//#define gteH (psxRegs.CP2C.p[26].sw.l) -#define gteH (psxRegs.CP2C.p[26].w.l) +//#define gteH (regs->CP2C.p[26].sw.l) +#define gteH (regs->CP2C.p[26].w.l) #define gteDQA (regs->CP2C.p[27].sw.l) #define gteDQB (((s32 *)regs->CP2C.r)[28]) #define gteZSF3 (regs->CP2C.p[29].sw.l) From 8a0521eef6fe31b9db78b1fef1c64aa49ef8f9f9 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 10 Sep 2022 02:37:50 +0300 Subject: [PATCH 205/597] fix build forgot to add 'extern' --- libpcsxcore/psxinterpreter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/psxinterpreter.h b/libpcsxcore/psxinterpreter.h index 87c7a324b..f8581b825 100644 --- a/libpcsxcore/psxinterpreter.h +++ b/libpcsxcore/psxinterpreter.h @@ -6,6 +6,6 @@ void execI(); void intApplyConfig(); void MTC0(psxRegisters *regs_, int reg, u32 val); void gteNULL(struct psxCP2Regs *regs); -void (*psxCP2[64])(struct psxCP2Regs *regs); +extern void (*psxCP2[64])(struct psxCP2Regs *regs); #endif // __PSXINTERPRETER_H__ From eaa5336dc779b706fa5a49fdec3eaa5c829113d3 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 10 Sep 2022 16:46:05 +0300 Subject: [PATCH 206/597] spu: fix wrong volume shift libretro/pcsx_rearmed#685 --- plugins/dfsound/spu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index f4426abe6..66c7651ef 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1193,13 +1193,13 @@ static void do_samples_finish(int *SSumLR, int ns_to, for (ns = 0; ns < ns_to * 2; ) { d = SSumLR[ns]; SSumLR[ns] = 0; - d = d * vol_l >> 15; + d = d * vol_l >> 14; ssat32_to_16(d); *spu.pS++ = d; ns++; d = SSumLR[ns]; SSumLR[ns] = 0; - d = d * vol_r >> 15; + d = d * vol_r >> 14; ssat32_to_16(d); *spu.pS++ = d; ns++; From 378592c4dbc0e2e2b75da0f49202969f82c0787a Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 10 Sep 2022 02:26:04 +0300 Subject: [PATCH 207/597] standalone: allow lightrec for testing lightrec is not (yet?) here, just to reduce diff from libretro fork --- frontend/menu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/menu.c b/frontend/menu.c index 341a429cc..1c9ed6aba 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1617,7 +1617,7 @@ static menu_entry e_menu_adv_options[] = mee_onoff_h ("Rootcounter hack", 0, Config.RCntFix, 1, h_cfg_rcnt1), #endif mee_onoff_h ("Rootcounter hack 2", 0, Config.VSyncWA, 1, h_cfg_rcnt2), -#ifndef DRC_DISABLE +#if !defined(DRC_DISABLE) || defined(LIGHTREC) mee_onoff_h ("Disable dynarec (slow!)",0, Config.Cpu, 1, h_cfg_nodrc), #endif mee_handler_h ("[Speed hacks]", menu_loop_speed_hacks, h_cfg_shacks), @@ -2633,7 +2633,7 @@ void menu_prepare_emu(void) plat_video_menu_leave(); - #ifndef DRC_DISABLE + #if !defined(DRC_DISABLE) || defined(LIGHTREC) psxCpu = (Config.Cpu == CPU_INTERPRETER) ? &psxInt : &psxRec; #else psxCpu = &psxInt; From 4ad17db3c1b6bfa042832d613369eda9c7ffff4f Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 10 Sep 2022 17:53:54 +0300 Subject: [PATCH 208/597] some big endian fixes "kinda sucks but it works" kind of thing --- frontend/libpicofe | 2 +- frontend/menu.c | 41 ++++++++++++++++++++++++++++-------- libpcsxcore/cdrom.c | 16 +++++++++++++- libpcsxcore/cdrom.h | 1 - plugins/dfxvideo/gpu.c | 9 -------- plugins/dfxvideo/gpu.h | 4 ---- plugins/dfxvideo/gpulib_if.c | 20 ++++++++---------- plugins/dfxvideo/soft.c | 15 +++++++------ 8 files changed, 65 insertions(+), 43 deletions(-) diff --git a/frontend/libpicofe b/frontend/libpicofe index 33787db41..7167e5f33 160000 --- a/frontend/libpicofe +++ b/frontend/libpicofe @@ -1 +1 @@ -Subproject commit 33787db41d955f8dcafe833097f2cc87d70186ec +Subproject commit 7167e5f3376f0d0692ae102ed2df1ef5d2cc199a diff --git a/frontend/menu.c b/frontend/menu.c index 1c9ed6aba..4816eceae 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -96,6 +96,7 @@ static int config_save_counter, region, in_type_sel1, in_type_sel2; static int psx_clock; static int memcard1_sel = -1, memcard2_sel = -1; extern int g_autostateld_opt; +static int menu_iopts[8]; int g_opts, g_scaler, g_gamma = 100; int scanlines, scanline_level = 20; int soft_scaling, analog_deadzone; // for Caanoo @@ -1571,14 +1572,16 @@ static menu_entry e_menu_speed_hacks[] = mee_onoff_h ("Assume GTE regs unneeded", 0, new_dynarec_hacks, NDHACK_GTE_UNNEEDED, h_cfg_gteunn), mee_onoff_h ("Disable GTE flags", 0, new_dynarec_hacks, NDHACK_GTE_NO_FLAGS, h_cfg_gteflgs), #endif - mee_onoff_h ("Disable CPU/GTE stalls", 0, Config.DisableStalls, 1, h_cfg_stalls), + mee_onoff_h ("Disable CPU/GTE stalls", 0, menu_iopts[0], 1, h_cfg_stalls), mee_end, }; static int menu_loop_speed_hacks(int id, int keys) { static int sel = 0; + menu_iopts[0] = Config.DisableStalls; me_loop(e_menu_speed_hacks, &sel); + Config.DisableStalls = menu_iopts[0]; return 0; } @@ -1603,22 +1606,24 @@ static const char h_cfg_nodrc[] = "Disable dynamic recompiler and use interpret static const char h_cfg_shacks[] = "Breaks games but may give better performance"; static const char h_cfg_icache[] = "Support F1 games (only when dynarec is off)"; +enum { AMO_XA, AMO_CDDA, AMO_SIO, AMO_SPUI, AMO_IC, AMO_RCNT, AMO_WA, AMO_CPU }; + static menu_entry e_menu_adv_options[] = { mee_onoff_h ("Show CPU load", 0, g_opts, OPT_SHOWCPU, h_cfg_cpul), mee_onoff_h ("Show SPU channels", 0, g_opts, OPT_SHOWSPU, h_cfg_spu), mee_onoff_h ("Disable Frame Limiter", 0, g_opts, OPT_NO_FRAMELIM, h_cfg_fl), - mee_onoff_h ("Disable XA Decoding", 0, Config.Xa, 1, h_cfg_xa), - mee_onoff_h ("Disable CD Audio", 0, Config.Cdda, 1, h_cfg_cdda), - //mee_onoff_h ("SIO IRQ Always Enabled", 0, Config.Sio, 1, h_cfg_sio), - mee_onoff_h ("SPU IRQ Always Enabled", 0, Config.SpuIrq, 1, h_cfg_spuirq), - mee_onoff_h ("ICache emulation", 0, Config.icache_emulation, 1, h_cfg_icache), + mee_onoff_h ("Disable XA Decoding", 0, menu_iopts[AMO_XA], 1, h_cfg_xa), + mee_onoff_h ("Disable CD Audio", 0, menu_iopts[AMO_CDDA], 1, h_cfg_cdda), + //mee_onoff_h ("SIO IRQ Always Enabled", 0, menu_iopts[AMO_SIO], 1, h_cfg_sio), + mee_onoff_h ("SPU IRQ Always Enabled", 0, menu_iopts[AMO_SPUI], 1, h_cfg_spuirq), + mee_onoff_h ("ICache emulation", 0, menu_iopts[AMO_IC], 1, h_cfg_icache), #ifdef DRC_DISABLE - mee_onoff_h ("Rootcounter hack", 0, Config.RCntFix, 1, h_cfg_rcnt1), + mee_onoff_h ("Rootcounter hack", 0, menu_iopts[AMO_RCNT], 1, h_cfg_rcnt1), #endif - mee_onoff_h ("Rootcounter hack 2", 0, Config.VSyncWA, 1, h_cfg_rcnt2), + mee_onoff_h ("Rootcounter hack 2", 0, menu_iopts[AMO_WA], 1, h_cfg_rcnt2), #if !defined(DRC_DISABLE) || defined(LIGHTREC) - mee_onoff_h ("Disable dynarec (slow!)",0, Config.Cpu, 1, h_cfg_nodrc), + mee_onoff_h ("Disable dynarec (slow!)",0, menu_iopts[AMO_CPU], 1, h_cfg_nodrc), #endif mee_handler_h ("[Speed hacks]", menu_loop_speed_hacks, h_cfg_shacks), mee_end, @@ -1627,7 +1632,25 @@ static menu_entry e_menu_adv_options[] = static int menu_loop_adv_options(int id, int keys) { static int sel = 0; + static struct { + boolean *opt; + int *mopt; + } opts[] = { + { &Config.Xa, &menu_iopts[AMO_XA] }, + { &Config.Cdda, &menu_iopts[AMO_CDDA] }, + { &Config.Sio, &menu_iopts[AMO_SIO] }, + { &Config.SpuIrq, &menu_iopts[AMO_SPUI] }, + { &Config.icache_emulation, &menu_iopts[AMO_IC] }, + { &Config.RCntFix, &menu_iopts[AMO_RCNT] }, + { &Config.VSyncWA, &menu_iopts[AMO_WA] }, + { &Config.Cpu, &menu_iopts[AMO_CPU] }, + }; + int i; + for (i = 0; i < ARRAY_SIZE(opts); i++) + *opts[i].mopt = *opts[i].opt; me_loop(e_menu_adv_options, &sel); + for (i = 0; i < ARRAY_SIZE(opts); i++) + *opts[i].opt = *opts[i].mopt; return 0; } diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 3fafc1985..cdb7a1ee9 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -567,6 +567,8 @@ static int cdrSeekTime(unsigned char *target) } static void cdrReadInterrupt(void); +static void cdrPrepCdda(s16 *buf, int samples); +static void cdrAttenuate(s16 *buf, int samples, int stereo); void cdrPlaySeekReadInterrupt(void) { @@ -613,6 +615,7 @@ void cdrPlaySeekReadInterrupt(void) cdrPlayInterrupt_Autopause(); if (!cdr.Muted && !Config.Cdda) { + cdrPrepCdda(read_buf, CD_FRAMESIZE_RAW / 4); cdrAttenuate(read_buf, CD_FRAMESIZE_RAW / 4, 1); SPU_playCDDAchannel(read_buf, CD_FRAMESIZE_RAW, psxRegs.cycle, cdr.FirstSector); cdr.FirstSector = 0; @@ -1117,7 +1120,18 @@ void cdrInterrupt(void) { } while (0) #endif -void cdrAttenuate(s16 *buf, int samples, int stereo) +static void cdrPrepCdda(s16 *buf, int samples) +{ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + int i; + for (i = 0; i < samples; i++) { + buf[i * 2 + 0] = SWAP16(buf[i * 2 + 0]); + buf[i * 2 + 1] = SWAP16(buf[i * 2 + 1]); + } +#endif +} + +static void cdrAttenuate(s16 *buf, int samples, int stereo) { int i, l, r; int ll = cdr.AttenuatorLeftToLeft; diff --git a/libpcsxcore/cdrom.h b/libpcsxcore/cdrom.h index 0cd6c5f6e..52bd21c08 100644 --- a/libpcsxcore/cdrom.h +++ b/libpcsxcore/cdrom.h @@ -46,7 +46,6 @@ extern "C" { #define SUB_FRAMESIZE 96 void cdrReset(); -void cdrAttenuate(s16 *buf, int samples, int stereo); void cdrInterrupt(void); void cdrPlaySeekReadInterrupt(void); diff --git a/plugins/dfxvideo/gpu.c b/plugins/dfxvideo/gpu.c index 9356a6e93..1a3f25ac1 100644 --- a/plugins/dfxvideo/gpu.c +++ b/plugins/dfxvideo/gpu.c @@ -24,12 +24,8 @@ //////////////////////////////////////////////////////////////////////// unsigned char *psxVub; -signed char *psxVsb; unsigned short *psxVuw; unsigned short *psxVuw_eom; -signed short *psxVsw; -uint32_t *psxVul; -int32_t *psxVsl; //////////////////////////////////////////////////////////////////////// // GPU globals @@ -96,12 +92,7 @@ long CALLBACK GPUinit(void) // GPU INIT //!!! ATTENTION !!! psxVub=vram + 512 * 1024; // security offset into double sized psx vram! - psxVsb=(signed char *)psxVub; // different ways of accessing PSX VRAM - psxVsw=(signed short *)psxVub; - psxVsl=(int32_t *)psxVub; psxVuw=(unsigned short *)psxVub; - psxVul=(uint32_t *)psxVub; - psxVuw_eom=psxVuw+1024*512; // pre-calc of end of vram memset(vram,0x00,(512*2)*1024 + (1024*1024)); diff --git a/plugins/dfxvideo/gpu.h b/plugins/dfxvideo/gpu.h index 745569288..224bc6548 100644 --- a/plugins/dfxvideo/gpu.h +++ b/plugins/dfxvideo/gpu.h @@ -277,11 +277,7 @@ extern BOOL bSkipNextFrame; extern long lGPUstatusRet; extern unsigned char * psxVSecure; extern unsigned char * psxVub; -extern signed char * psxVsb; extern unsigned short * psxVuw; -extern signed short * psxVsw; -extern uint32_t * psxVul; -extern int32_t * psxVsl; extern unsigned short * psxVuw_eom; extern BOOL bChangeWinMode; extern long lSelectedSlot; diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index 47cccedf0..86cfd2684 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -233,12 +233,8 @@ extern int32_t drawH; PSXDisplay_t PSXDisplay; unsigned char *psxVub; -signed char *psxVsb; unsigned short *psxVuw; unsigned short *psxVuw_eom; -signed short *psxVsw; -uint32_t *psxVul; -int32_t *psxVsl; long lGPUstatusRet; uint32_t lGPUInfoVals[16]; @@ -266,13 +262,7 @@ long lLowerpart; static void set_vram(void *vram) { psxVub=vram; - - psxVsb=(signed char *)psxVub; // different ways of accessing PSX VRAM - psxVsw=(signed short *)psxVub; - psxVsl=(int32_t *)psxVub; psxVuw=(unsigned short *)psxVub; - psxVul=(uint32_t *)psxVub; - psxVuw_eom=psxVuw+1024*512; // pre-calc of end of vram } @@ -402,8 +392,16 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) return list - list_start; } -void renderer_sync_ecmds(uint32_t *ecmds) +void renderer_sync_ecmds(uint32_t *ecmds_) { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + // the funcs below expect LE + uint32_t i, ecmds[8]; + for (i = 1; i <= 6; i++) + ecmds[i] = HTOLE32(ecmds_[i]); +#else + uint32_t *ecmds = ecmds_; +#endif cmdTexturePage((unsigned char *)&ecmds[1]); cmdTextureWindow((unsigned char *)&ecmds[2]); cmdDrawAreaStart((unsigned char *)&ecmds[3]); diff --git a/plugins/dfxvideo/soft.c b/plugins/dfxvideo/soft.c index 70cf50cd2..a9d9e0426 100644 --- a/plugins/dfxvideo/soft.c +++ b/plugins/dfxvideo/soft.c @@ -971,13 +971,14 @@ static void FillSoftwareAreaTrans(short x0,short y0,short x1, // FILL AREA TRANS { uint32_t *DSTPtr; unsigned short LineOffset; - uint32_t lcol = HOST2LE32(lSetMask | (((uint32_t)(col)) << 16) | col); + uint32_t lcol = lSetMask | ((uint32_t)col << 16) | col; dx>>=1; DSTPtr = (uint32_t *)(psxVuw + (1024*y0) + x0); LineOffset = 512 - dx; if(!bCheckMask && !DrawSemiTrans) { + lcol = HOST2LE32(lcol); for(i=0;i> 16; if(drawX>xmin) xmin=drawX; @@ -2409,9 +2410,9 @@ static inline void drawPoly3Fi(short x1,short y1,short x2,short y2,short x3,shor for(j=xmin;j> 16; if(drawX>xmin) xmin=drawX; @@ -2489,9 +2490,9 @@ static void drawPoly4F(int32_t rgb) for(j=xmin;j Date: Thu, 15 Sep 2022 19:25:01 +0300 Subject: [PATCH 209/597] cdrom: delay the missed irq more --- libpcsxcore/cdrom.c | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index cdb7a1ee9..3d3807857 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -566,6 +566,7 @@ static int cdrSeekTime(unsigned char *target) return seekTime; } +static void cdrUpdateTransferBuf(const u8 *buf); static void cdrReadInterrupt(void); static void cdrPrepCdda(s16 *buf, int samples); static void cdrAttenuate(s16 *buf, int samples, int stereo); @@ -652,6 +653,18 @@ void cdrInterrupt(void) { CDR_LOG_I("cdrom: cmd %02x with irqstat %x\n", cdr.CmdInProgress, cdr.Stat); return; } + if (cdr.Irq1Pending) { + // hand out the "newest" sector, according to nocash + cdrUpdateTransferBuf(CDR_getBuffer()); + CDR_LOG_I("cdrom: %x:%02x:%02x loaded on ack\n", + cdr.Transfer[0], cdr.Transfer[1], cdr.Transfer[2]); + SetResultSize(1); + cdr.Result[0] = cdr.Irq1Pending; + cdr.Stat = (cdr.Irq1Pending & STATUS_ERROR) ? DiskError : DataReady; + cdr.Irq1Pending = 0; + setIrq(0x205); + return; + } cdr.Ctrl &= ~0x80; @@ -1268,25 +1281,6 @@ static void cdrReadInterrupt(void) CDRPLAYSEEKREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime, 0); } -static void doMissedIrqs(void) -{ - if (cdr.Irq1Pending) - { - // hand out the "newest" sector, according to nocash - cdrUpdateTransferBuf(CDR_getBuffer()); - CDR_LOG_I("cdrom: %x:%02x:%02x loaded on ack\n", - cdr.Transfer[0], cdr.Transfer[1], cdr.Transfer[2]); - SetResultSize(1); - cdr.Result[0] = cdr.Irq1Pending; - cdr.Stat = (cdr.Irq1Pending & STATUS_ERROR) ? DiskError : DataReady; - cdr.Irq1Pending = 0; - setIrq(0x205); - return; - } - if (!(psxRegs.interrupt & (1 << PSXINT_CDR)) && cdr.CmdInProgress) - CDR_INT(256); -} - /* cdrRead0: bit 0,1 - mode @@ -1431,15 +1425,18 @@ void cdrWrite3(unsigned char rt) { case 0: break; // transfer case 1: + if (cdr.Stat & rt) { #ifdef CDR_LOG_CMD_IRQ - if (cdr.Stat & rt) - SysPrintf("ack %02x\n", cdr.Stat & rt); + SysPrintf("ack %02x (w %02x)\n", cdr.Stat & rt, rt); #endif + if (!(psxRegs.interrupt & (1 << PSXINT_CDR)) && + (cdr.CmdInProgress || cdr.Irq1Pending)) + CDR_INT(2000); // 710+ + } cdr.Stat &= ~rt; if (rt & 0x40) cdr.ParamC = 0; - doMissedIrqs(); return; case 2: cdr.AttenuatorLeftToRightT = rt; From aafce833d9d30a8709b6e07891b8ec2145242fd8 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 17 Aug 2022 00:11:39 +0300 Subject: [PATCH 210/597] gpu_neon: adjust some comments and things --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 17 ++++++++++++----- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 4 ++-- plugins/gpu_neon/psx_gpu/psx_gpu_simd.c | 9 +++++---- plugins/gpu_neon/psx_gpu/vector_ops.h | 2 +- 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 1d513d8be..51ad152dd 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -22,6 +22,13 @@ #endif #include "psx_gpu_simd.h" +#if 0 +void dump_r_d(const char *name, void *dump); +void dump_r_q(const char *name, void *dump); +#define dumprd(n) dump_r_d(#n, n.e) +#define dumprq(n) dump_r_q(#n, n.e) +#endif + u32 span_pixels = 0; u32 span_pixel_blocks = 0; u32 spans = 0; @@ -769,13 +776,13 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, { \ u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data; \ if (_num_spans > MAX_SPANS) \ - *(int *)0 = 1; \ + *(volatile int *)0 = 1; \ if (_num_spans < psx_gpu->num_spans) \ { \ if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW) \ - *(int *)0 = 1; \ - if(span_edge_data_element.y > 2048) \ - *(int *)0 = 1; \ + *(volatile int *)0 = 2; \ + if(span_edge_data_element.y >= 2048) \ + *(volatile int *)0 = 3; \ } \ } \ @@ -788,7 +795,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, vec_2x64s alternate_x; \ vec_2x64s alternate_dx_dy; \ vec_4x32s alternate_x_32; \ - vec_2x32s alternate_x_16; \ + vec_4x16u alternate_x_16; \ \ vec_4x16u alternate_select; \ vec_4x16s y_mid_point; \ diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index da47756ef..c62c1baa6 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -369,8 +369,8 @@ function(compute_all_gradients) sub r14, r14, #(62 - 12) @ r14 = shift - (62 - FIXED_BITS) vshll.u16 uvrg_base, uvrg0, #16 @ uvrg_base = uvrg0 << 16 - vdup.u32 r_shift, r14 @ r_shift = { shift, shift, shift, shift } - + vdup.u32 r_shift, r14 @ r_shift = { shift, shift*, shift, shift* } + @ * - vshl.u64: ignored by hw vadd.u32 uvrg_base, uvrgb_phase vabs.s32 ga_uvrg_x, ga_uvrg_x @ ga_uvrg_x = abs(ga_uvrg_x) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c index 5c05b14a8..bbeccb719 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c @@ -313,7 +313,7 @@ typedef union #include #include #include -static int ccount; +static int ccount, dump_enabled; void cmpp(const char *name, const void *a_, const void *b_, size_t len) { const uint32_t *a = a_, *b = b_, masks[] = { 0, 0xff, 0xffff, 0xffffff }; @@ -336,8 +336,9 @@ void cmpp(const char *name, const void *a_, const void *b_, size_t len) void dump_r_(const char *name, void *dump, int is_q) { unsigned long long *u = dump; + if (!dump_enabled) return; //if (ccount > 1) return; - printf("%10s %016llx ", name, u[0]); + printf("%20s %016llx ", name, u[0]); if (is_q) printf("%016llx", u[1]); puts(""); @@ -497,7 +498,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, gvreg uvrg_base; gvshll_n_u16(uvrg_base, gvlo(uvrg_xxxx0), 16); // uvrg_base = uvrg0 << 16 - gvdupq_n_u32(r_shift, shift); // r_shift = { shift, shift, shift, shift } + gvdupq_n_s64(r_shift, shift); // r_shift = { shift, shift } gvaddq_u32(uvrg_base, uvrg_base, uvrgb_phase); gvabsq_s32(ga_uvrg_x, ga_uvrg_x); // ga_uvrg_x = abs(ga_uvrg_x) @@ -600,7 +601,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, vec_2x64s alternate_x; \ vec_2x64s alternate_dx_dy; \ vec_4x32s alternate_x_32; \ - vec_2x32s alternate_x_16; \ + vec_4x16u alternate_x_16; \ \ vec_4x16u alternate_select; \ vec_4x16s y_mid_point; \ diff --git a/plugins/gpu_neon/psx_gpu/vector_ops.h b/plugins/gpu_neon/psx_gpu/vector_ops.h index 189eb79d0..6f2bcbf7b 100644 --- a/plugins/gpu_neon/psx_gpu/vector_ops.h +++ b/plugins/gpu_neon/psx_gpu/vector_ops.h @@ -103,7 +103,7 @@ foreach_element(2, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \ #define shr_4x16b(dest, source, shift) \ - foreach_element(4, (dest).e[_i] = (source).e[_i] >> (shift)) \ + foreach_element(4, (dest).e[_i] = (u16)(source).e[_i] >> (shift)) \ #define shl_4x16b(dest, source, shift) \ foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] << (shift)) \ From 9088aca124430b38f473c931492b333cd998dfda Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 12 Sep 2022 01:40:24 +0300 Subject: [PATCH 211/597] gpu_neon: brand new x86 SSE2+ implementation --- plugins/gpu_neon/psx_gpu/psx_gpu_simd.c | 595 +++++++++++++++++++++--- 1 file changed, 520 insertions(+), 75 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c index bbeccb719..86d1cf1e5 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c @@ -18,6 +18,9 @@ #include "psx_gpu_simd.h" //#define ASM_PROTOTYPES //#include "psx_gpu_simd.h" +#ifdef __SSE2__ +#include +#endif #ifndef SIMD_BUILD #error "please define SIMD_BUILD if you want this gpu_neon C simd implementation" #endif @@ -40,22 +43,6 @@ typedef s16 gvhs16 __attribute__((vector_size(8))); typedef s32 gvhs32 __attribute__((vector_size(8))); typedef s64 gvhs64 __attribute__((vector_size(8))); -typedef union -{ - gvhu8 u8; - gvhu16 u16; - gvhu32 u32; - gvhu64 u64; - //u64 u64; - //uint64x1_t u64; - gvhs8 s8; - gvhs16 s16; - gvhs32 s32; - gvhs64 s64; - //s64 s64; - //int64x1_t s64; -} gvhreg; - typedef union { gvu8 u8; @@ -66,13 +53,37 @@ typedef union gvs16 s16; gvs32 s32; gvs64 s64; +#ifdef __SSE2__ + __m128i m; +#endif // this may be tempting, but it causes gcc to do lots of stack spills //gvhreg h[2]; } gvreg; +typedef gvreg gvreg_ua __attribute__((aligned(1))); +typedef uint64_t uint64_t_ua __attribute__((aligned(1))); +typedef gvu8 gvu8_ua __attribute__((aligned(1))); +typedef gvu16 gvu16_ua __attribute__((aligned(1))); + #if defined(__ARM_NEON) || defined(__ARM_NEON__) #include +typedef union +{ + gvhu8 u8; + gvhu16 u16; + gvhu32 u32; + gvhu64 u64; + //u64 u64; + //uint64x1_t u64; + gvhs8 s8; + gvhs16 s16; + gvhs32 s32; + gvhs64 s64; + //s64 s64; + //int64x1_t s64; +} gvhreg; + #define gvaddhn_u32(d, a, b) d.u16 = vaddhn_u32(a.u32, b.u32) #define gvaddw_s32(d, a, b) d.s64 = vaddw_s32(a.s64, b.s32) #define gvabsq_s32(d, s) d.s32 = vabsq_s32(s.s32) @@ -131,8 +142,6 @@ typedef union #define gvqadd_u8(d, a, b) d.u8 = vqadd_u8(a.u8, b.u8) #define gvqsub_u8(d, a, b) d.u8 = vqsub_u8(a.u8, b.u8) #define gvshl_u16(d, a, b) d.u16 = vshl_u16(a.u16, b.s16) -#define gvshlq_s64(d, a, b) d.s64 = vshlq_s64(a.s64, b.s64) -#define gvshlq_u32(d, a, b) d.u32 = vshlq_u32(a.u32, b.s32) #define gvshlq_u64(d, a, b) d.u64 = vshlq_u64(a.u64, b.s64) #define gvshrq_n_s16(d, s, n) d.s16 = vshrq_n_s16(s.s16, n) #define gvshrq_n_u16(d, s, n) d.u16 = vshrq_n_u16(s.u16, n) @@ -146,7 +155,6 @@ typedef union #define gvshr_n_u16(d, s, n) d.u16 = vshr_n_u16(s.u16, n) #define gvshr_n_u32(d, s, n) d.u32 = vshr_n_u32(s.u32, n) #define gvshr_n_u64(d, s, n) d.u64 = (gvhu64)vshr_n_u64((uint64x1_t)s.u64, n) -#define gvshrn_n_s64(d, s, n) d.s32 = vshrn_n_s64(s.s64, n) #define gvshrn_n_u16(d, s, n) d.u8 = vshrn_n_u16(s.u16, n) #define gvshrn_n_u32(d, s, n) d.u16 = vshrn_n_u32(s.u32, n) #define gvsli_n_u8(d, s, n) d.u8 = vsli_n_u8(d.u8, s.u8, n) @@ -156,6 +164,8 @@ typedef union #define gvqsubq_u8(d, a, b) d.u8 = vqsubq_u8(a.u8, b.u8) #define gvqsubq_u16(d, a, b) d.u16 = vqsubq_u16(a.u16, b.u16) +#define gvmovn_top_u64(d, s) d.u32 = vshrn_n_u64(s.u64, 32) + #define gvget_lo(d, s) d.u16 = vget_low_u16(s.u16) #define gvget_hi(d, s) d.u16 = vget_high_u16(s.u16) #define gvlo(s) ({gvhreg t_; gvget_lo(t_, s); t_;}) @@ -184,7 +194,7 @@ typedef union #define gvld1q_u8(d, s) d.u8 = vld1q_u8(s) #define gvld1q_u16(d, s) d.u16 = vld1q_u16(s) #define gvld1q_u32(d, s) d.u32 = vld1q_u32((const u32 *)(s)) -#define gvld2_dup(v0, v1, p) { \ +#define gvld2_u8_dup(v0, v1, p) { \ uint8x8x2_t v_ = vld2_dup_u8(p); \ v0.u8 = v_.val[0]; v1.u8 = v_.val[1]; \ } @@ -240,10 +250,161 @@ typedef union - gcc <11: (arm32) handles ' == 0' poorly */ -/* #elif defined(__SSE2__) -#include -*/ + +// use a full reg and discard the upper half +#define gvhreg gvreg + +#define gv0() _mm_setzero_si128() + +#ifdef __x86_64__ +#define gvcreate_s32(d, a, b) d.m = _mm_cvtsi64_si128((u32)(a) | ((u64)(b) << 32)) +#define gvcreate_s64(d, s) d.m = _mm_cvtsi64_si128(s) +#else +#define gvcreate_s32(d, a, b) d.m = _mm_set_epi32(0, 0, b, a) +#define gvcreate_s64(d, s) d.m = _mm_loadu_si64(&(s)) +#endif + +#define gvbic_n_u16(d, n) d.m = _mm_andnot_si128(_mm_set1_epi16(n), d.m) +#define gvceqq_u16(d, a, b) d.u16 = vceqq_u16(a.u16, b.u16) +#define gvcgt_s16(d, a, b) d.m = _mm_cmpgt_epi16(a.m, b.m) +#define gvclt_s16(d, a, b) d.m = _mm_cmpgt_epi16(b.m, a.m) +#define gvcreate_u32 gvcreate_s32 +#define gvcreate_u64 gvcreate_s64 +#define gvcombine_u16(d, l, h) d.m = _mm_unpacklo_epi64(l.m, h.m) +#define gvcombine_u32 gvcombine_u16 +#define gvcombine_s64 gvcombine_u16 +#define gvdup_l_u8(d, s, l) d.u8 = vdup_lane_u8(s.u8, l) +#define gvdup_l_u16(d, s, l) d.m = _mm_shufflelo_epi16(s.m, (l)|((l)<<2)|((l)<<4)|((l)<<6)) +#define gvdup_l_u32(d, s, l) d.m = vdup_lane_u32(s.u32, l) +#define gvdupq_l_s64(d, s, l) d.m = _mm_unpacklo_epi64(s.m, s.m) +#define gvdupq_l_u32(d, s, l) d.m = _mm_shuffle_epi32(s.m, (l)|((l)<<2)|((l)<<4)|((l)<<6)) +#define gvdup_n_s64(d, n) d.m = _mm_set1_epi64x(n) +#define gvdup_n_u8(d, n) d.m = _mm_set1_epi8(n) +#define gvdup_n_u16(d, n) d.m = _mm_set1_epi16(n) +#define gvdup_n_u32(d, n) d.m = _mm_set1_epi32(n) +#define gvdupq_n_u16(d, n) d.m = _mm_set1_epi16(n) +#define gvdupq_n_u32(d, n) d.m = _mm_set1_epi32(n) +#define gvdupq_n_s64(d, n) d.m = _mm_set1_epi64x(n) +#define gvmax_s16(d, a, b) d.m = _mm_max_epi16(a.m, b.m) +#define gvmin_s16(d, a, b) d.m = _mm_min_epi16(a.m, b.m) +#define gvminq_u8(d, a, b) d.m = _mm_min_epu8(a.m, b.m) +#define gvmovn_u64(d, s) d.m = _mm_shuffle_epi32(s.m, 0 | (2 << 2)) +#define gvmovn_top_u64(d, s) d.m = _mm_shuffle_epi32(s.m, 1 | (3 << 2)) +#define gvmull_s16(d, a, b) { \ + __m128i lo_ = _mm_mullo_epi16(a.m, b.m); \ + __m128i hi_ = _mm_mulhi_epi16(a.m, b.m); \ + d.m = _mm_unpacklo_epi16(lo_, hi_); \ +} +#define gvmull_l_u32(d, a, b, l) { \ + __m128i a_ = _mm_unpacklo_epi32(a.m, a.m); /* lanes 0,1 -> 0,2 */ \ + __m128i b_ = _mm_shuffle_epi32(b.m, (l) | ((l) << 4)); \ + d.m = _mm_mul_epu32(a_, b_); \ +} +#define gvmlsl_s16(d, a, b) { \ + gvreg tmp_; \ + gvmull_s16(tmp_, a, b); \ + d.m = _mm_sub_epi32(d.m, tmp_.m); \ +} +#define gvqadd_u8(d, a, b) d.m = _mm_adds_epu8(a.m, b.m) +#define gvqsub_u8(d, a, b) d.m = _mm_subs_epu8(a.m, b.m) +#define gvshrq_n_s16(d, s, n) d.m = _mm_srai_epi16(s.m, n) +#define gvshrq_n_u16(d, s, n) d.m = _mm_srli_epi16(s.m, n) +#define gvshrq_n_u32(d, s, n) d.m = _mm_srli_epi32(s.m, n) +#define gvshl_n_u32(d, s, n) d.m = _mm_slli_epi32(s.m, n) +#define gvshlq_n_u16(d, s, n) d.m = _mm_slli_epi16(s.m, n) +#define gvshlq_n_u32(d, s, n) d.m = _mm_slli_epi32(s.m, n) +#define gvshll_n_u16(d, s, n) d.m = _mm_slli_epi32(_mm_unpacklo_epi16(s.m, gv0()), n) +#define gvshr_n_u16(d, s, n) d.m = _mm_srli_epi16(s.m, n) +#define gvshr_n_u32(d, s, n) d.m = _mm_srli_epi32(s.m, n) +#define gvshr_n_u64(d, s, n) d.m = _mm_srli_epi64(s.m, n) +#define gvshrn_n_s64(d, s, n) { \ + gvreg tmp_; \ + gvshrq_n_s64(tmp_, s, n); \ + d.m = _mm_shuffle_epi32(tmp_.m, 0 | (2 << 2)); \ +} +#define gvqshrun_n_s16(d, s, n) { \ + __m128i t_ = _mm_srai_epi16(s.m, n); \ + d.m = _mm_packus_epi16(t_, t_); \ +} +#define gvqsubq_u8(d, a, b) d.m = _mm_subs_epu8(a.m, b.m) +#define gvqsubq_u16(d, a, b) d.m = _mm_subs_epu16(a.m, b.m) + +#ifdef __SSSE3__ +#define gvabsq_s32(d, s) d.m = _mm_abs_epi32(s.m) +#define gvtbl2_u8(d, a, b) d.m = _mm_shuffle_epi8(a.m, b.m) +#else +// must supply these here or else gcc will produce something terrible with __builtin_shuffle +#define gvmovn_u16(d, s) { \ + __m128i t2_ = _mm_and_si128(s.m, _mm_set1_epi16(0xff)); \ + d.m = _mm_packus_epi16(t2_, t2_); \ +} +#define gvmovn_u32(d, s) { \ + __m128i t2_; \ + t2_ = _mm_shufflelo_epi16(s.m, (0 << 0) | (2 << 2)); \ + t2_ = _mm_shufflehi_epi16(t2_, (0 << 0) | (2 << 2)); \ + d.m = _mm_shuffle_epi32(t2_, (0 << 0) | (2 << 2)); \ +} +#define gvmovn_top_u32(d, s) { \ + __m128i t2_; \ + t2_ = _mm_shufflelo_epi16(s.m, (1 << 0) | (3 << 2)); \ + t2_ = _mm_shufflehi_epi16(t2_, (1 << 0) | (3 << 2)); \ + d.m = _mm_shuffle_epi32(t2_, (0 << 0) | (2 << 2)); \ +} +#endif // !__SSSE3__ +#ifdef __SSE4_1__ +#define gvminq_u16(d, a, b) d.m = _mm_min_epu16(a.m, b.m) +#define gvmovl_u8(d, s) d.m = _mm_cvtepu8_epi16(s.m) +#define gvmovl_s8(d, s) d.m = _mm_cvtepi8_epi16(s.m) +#define gvmovl_s32(d, s) d.m = _mm_cvtepi32_epi64(s.m) +#define gvmull_s32(d, a, b) { \ + __m128i a_ = _mm_unpacklo_epi32(a.m, a.m); /* lanes 0,1 -> 0,2 */ \ + __m128i b_ = _mm_unpacklo_epi32(b.m, b.m); \ + d.m = _mm_mul_epi32(a_, b_); \ +} +#else +#define gvmovl_u8(d, s) d.m = _mm_unpacklo_epi8(s.m, gv0()) +#define gvmovl_s8(d, s) d.m = _mm_unpacklo_epi8(s.m, _mm_cmpgt_epi8(gv0(), s.m)) +#define gvmovl_s32(d, s) d.m = _mm_unpacklo_epi32(s.m, _mm_srai_epi32(s.m, 31)) +#endif // !__SSE4_1__ +#ifndef __AVX2__ +#define gvshlq_u64(d, a, b) { \ + gvreg t1_, t2_; \ + t1_.m = _mm_sll_epi64(a.m, b.m); \ + t2_.m = _mm_sll_epi64(a.m, _mm_shuffle_epi32(b.m, (2 << 0) | (3 << 2))); \ + d.u64 = (gvu64){ t1_.u64[0], t2_.u64[1] }; \ +} +#endif // __AVX2__ + +#define gvlo(s) s +#define gvhi(s) ((gvreg)_mm_shuffle_epi32(s.m, (2 << 0) | (3 << 2))) +#define gvget_lo(d, s) d = gvlo(s) +#define gvget_hi(d, s) d = gvhi(s) + +#define gvset_lo(d, s) d.m = _mm_unpacklo_epi64(s.m, gvhi(d).m) +#define gvset_hi(d, s) d.m = _mm_unpacklo_epi64(d.m, s.m) + +#define gvld1_u8(d, s) d.m = _mm_loadu_si64(s) +#define gvld1_u32 gvld1_u8 +#define gvld1q_u8(d, s) d.m = _mm_loadu_si128((__m128i *)(s)) +#define gvld1q_u16 gvld1q_u8 +#define gvld1q_u32 gvld1q_u8 + +#define gvst4_4_inc_u32(v0, v1, v2, v3, p, i) { \ + __m128i t0 = _mm_unpacklo_epi32(v0.m, v1.m); \ + __m128i t1 = _mm_unpacklo_epi32(v2.m, v3.m); \ + _mm_storeu_si128(((__m128i *)(p)) + 0, _mm_unpacklo_epi64(t0, t1)); \ + _mm_storeu_si128(((__m128i *)(p)) + 1, _mm_unpackhi_epi64(t0, t1)); \ + p += (i) / sizeof(*p); \ +} +#define gvst4_pi_u16(v0, v1, v2, v3, p) { \ + __m128i t0 = _mm_unpacklo_epi16(v0.m, v1.m); \ + __m128i t1 = _mm_unpacklo_epi16(v2.m, v3.m); \ + _mm_storeu_si128(((__m128i *)(p)) + 0, _mm_unpacklo_epi32(t0, t1)); \ + _mm_storeu_si128(((__m128i *)(p)) + 1, _mm_unpackhi_epi32(t0, t1)); \ + p += sizeof(t0) * 2 / sizeof(*p); \ +} + #else #error "arch not supported or SIMD support was not enabled by your compiler" #endif @@ -257,6 +418,7 @@ typedef union #define gvaddq_u16 gvadd_u16 #define gvaddq_u32 gvadd_u32 #define gvand(d, a, b) d.u32 = a.u32 & b.u32 +#define gvand_n_u32(d, n) d.u32 &= n #define gvbic(d, a, b) d.u32 = a.u32 & ~b.u32 #define gvbicq gvbic #define gveor(d, a, b) d.u32 = a.u32 ^ b.u32 @@ -271,6 +433,256 @@ typedef union #define gvsubq_u32 gvsub_u32 #define gvorr(d, a, b) d.u32 = a.u32 | b.u32 #define gvorrq gvorr +#define gvorr_n_u16(d, n) d.u16 |= n + +// fallbacks +#if 1 + +#ifndef gvaddhn_u32 +#define gvaddhn_u32(d, a, b) { \ + gvreg tmp1_ = { .u32 = a.u32 + b.u32 }; \ + gvmovn_top_u32(d, tmp1_); \ +} +#endif +#ifndef gvabsq_s32 +#define gvabsq_s32(d, s) { \ + gvreg tmp1_ = { .s32 = (gvs32){} - s.s32 }; \ + gvreg mask_ = { .s32 = s.s32 >> 31 }; \ + gvbslq_(d, mask_, tmp1_, s); \ +} +#endif +#ifndef gvbit +#define gvbslq_(d, s, a, b) d.u32 = (a.u32 & s.u32) | (b.u32 & ~s.u32) +#define gvbifq(d, a, b) gvbslq_(d, b, d, a) +#define gvbit(d, a, b) gvbslq_(d, b, a, d) +#endif +#ifndef gvaddw_s32 +#define gvaddw_s32(d, a, b) {gvreg t_; gvmovl_s32(t_, b); d.s64 += t_.s64;} +#endif +#ifndef gvhaddq_u16 +// can do this because the caller needs the msb clear +#define gvhaddq_u16(d, a, b) d.u16 = (a.u16 + b.u16) >> 1 +#endif +#ifndef gvminq_u16 +#define gvminq_u16(d, a, b) { \ + gvu16 t_ = a.u16 < b.u16; \ + d.u16 = (a.u16 & t_) | (b.u16 & ~t_); \ +} +#endif +#ifndef gvmlsq_s32 +#define gvmlsq_s32(d, a, b) d.s32 -= a.s32 * b.s32 +#endif +#ifndef gvmlsq_l_s32 +#define gvmlsq_l_s32(d, a, b, l){gvreg t_; gvdupq_l_u32(t_, b, l); d.s32 -= a.s32 * t_.s32;} +#endif +#ifndef gvmla_s32 +#define gvmla_s32(d, a, b) d.s32 += a.s32 * b.s32 +#endif +#ifndef gvmla_u32 +#define gvmla_u32 gvmla_s32 +#endif +#ifndef gvmlaq_s32 +#define gvmlaq_s32(d, a, b) d.s32 += a.s32 * b.s32 +#endif +#ifndef gvmlaq_u32 +#define gvmlaq_u32 gvmlaq_s32 +#endif +#ifndef gvmlal_u8 +#define gvmlal_u8(d, a, b) {gvreg t_; gvmull_u8(t_, a, b); d.u16 += t_.u16;} +#endif +#ifndef gvmlal_s32 +#define gvmlal_s32(d, a, b) {gvreg t_; gvmull_s32(t_, a, b); d.s64 += t_.s64;} +#endif +#ifndef gvmov_l_s32 +#define gvmov_l_s32(d, s, l) d.s32[l] = s +#endif +#ifndef gvmov_l_u32 +#define gvmov_l_u32(d, s, l) d.u32[l] = s +#endif +#ifndef gvmul_s32 +#define gvmul_s32(d, a, b) d.s32 = a.s32 * b.s32 +#endif +#ifndef gvmull_u8 +#define gvmull_u8(d, a, b) { \ + gvreg t1_, t2_; \ + gvmovl_u8(t1_, a); \ + gvmovl_u8(t2_, b); \ + d.u16 = t1_.u16 * t2_.u16; \ +} +#endif +#ifndef gvmull_s32 +// note: compilers tend to use int regs here +#define gvmull_s32(d, a, b) { \ + d.s64[0] = (s64)a.s32[0] * b.s32[0]; \ + d.s64[1] = (s64)a.s32[1] * b.s32[1]; \ +} +#endif +#ifndef gvneg_s32 +#define gvneg_s32(d, s) d.s32 = -s.s32 +#endif +// x86 note: needs _mm_sllv_epi16 (avx512), else this sucks terribly +#ifndef gvshl_u16 +#define gvshl_u16(d, a, b) d.u16 = a.u16 << b.u16 +#endif +// x86 note: needs _mm_sllv_* (avx2) +#ifndef gvshlq_u64 +#define gvshlq_u64(d, a, b) d.u64 = a.u64 << b.u64 +#endif +#ifndef gvshll_n_s8 +#define gvshll_n_s8(d, s, n) {gvreg t_; gvmovl_s8(t_, s); gvshlq_n_u16(d, t_, n);} +#endif +#ifndef gvshll_n_u8 +#define gvshll_n_u8(d, s, n) {gvreg t_; gvmovl_u8(t_, s); gvshlq_n_u16(d, t_, n);} +#endif +#ifndef gvshr_n_u8 +#define gvshr_n_u8(d, s, n) d.u8 = s.u8 >> (n) +#endif +#ifndef gvshrq_n_s64 +#define gvshrq_n_s64(d, s, n) d.s64 = s.s64 >> (n) +#endif +#ifndef gvshrn_n_u16 +#define gvshrn_n_u16(d, s, n) {gvreg t_; gvshrq_n_u16(t_, s, n); gvmovn_u16(d, t_);} +#endif +#ifndef gvshrn_n_u32 +#define gvshrn_n_u32(d, s, n) {gvreg t_; gvshrq_n_u32(t_, s, n); gvmovn_u32(d, t_);} +#endif +#ifndef gvsli_n_u8 +#define gvsli_n_u8(d, s, n) d.u8 = (s.u8 << (n)) | (d.u8 & ((1u << (n)) - 1u)) +#endif +#ifndef gvsri_n_u8 +#define gvsri_n_u8(d, s, n) d.u8 = (s.u8 >> (n)) | (d.u8 & ((0xff00u >> (n)) & 0xffu)) +#endif +#ifndef gvtstq_u16 +#define gvtstq_u16(d, a, b) d.u16 = (a.u16 & b.u16) != 0 +#endif + +#ifndef gvld2_u8_dup +#define gvld2_u8_dup(v0, v1, p) { \ + gvdup_n_u8(v0, ((const u8 *)(p))[0]); \ + gvdup_n_u8(v1, ((const u8 *)(p))[1]); \ +} +#endif +#ifndef gvst1_u8 +#define gvst1_u8(v, p) *(uint64_t_ua *)(p) = v.u64[0] +#endif +#ifndef gvst1q_u16 +#define gvst1q_u16(v, p) *(gvreg_ua *)(p) = v +#endif +#ifndef gvst1q_inc_u32 +#define gvst1q_inc_u32(v, p, i) {*(gvreg_ua *)(p) = v; p += (i) / sizeof(*p);} +#endif +#ifndef gvst1q_pi_u32 +#define gvst1q_pi_u32(v, p) gvst1q_inc_u32(v, p, sizeof(v)) +#endif +#ifndef gvst1q_2_pi_u32 +#define gvst1q_2_pi_u32(v0, v1, p) { \ + gvst1q_inc_u32(v0, p, sizeof(v0)); \ + gvst1q_inc_u32(v1, p, sizeof(v1)); \ +} +#endif +#ifndef gvst2_u8 +#define gvst2_u8(v0, v1, p) {gvreg t_; gvzip_u8(t_, v0, v1); *(gvu8_ua *)(p) = t_.u8;} +#endif +#ifndef gvst2_u16 +#define gvst2_u16(v0, v1, p) {gvreg t_; gvzip_u16(t_, v0, v1); *(gvu16_ua *)(p) = t_.u16;} +#endif + +// note: these shuffles assume sizeof(gvhreg) == 16 && sizeof(gvreg) == 16 +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +// prefer __builtin_shuffle on gcc as it handles -1 poorly +#if __has_builtin(__builtin_shufflevector) && !__has_builtin(__builtin_shuffle) + +#ifndef gvld2q_u8 +#define gvld2q_u8(v0, v1, p) { \ + gvu8 v0_ = ((gvu8_ua *)(p))[0]; \ + gvu8 v1_ = ((gvu8_ua *)(p))[1]; \ + v0.u8 = __builtin_shufflevector(v0_, v1_, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30); \ + v1.u8 = __builtin_shufflevector(v0_, v1_, 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31); \ +} +#endif +#ifndef gvmovn_u16 +#define gvmovn_u16(d, s) \ + d.u8 = __builtin_shufflevector(s.u8, s.u8, 0,2,4,6,8,10,12,14,-1,-1,-1,-1,-1,-1,-1,-1) +#endif +#ifndef gvmovn_u32 +#define gvmovn_u32(d, s) \ + d.u16 = __builtin_shufflevector(s.u16, s.u16, 0,2,4,6,-1,-1,-1,-1) +#endif +#ifndef gvmovn_top_u32 +#define gvmovn_top_u32(d, s) \ + d.u16 = __builtin_shufflevector(s.u16, s.u16, 1,3,5,7,-1,-1,-1,-1) +#endif +#ifndef gvzip_u8 +#define gvzip_u8(d, a, b) \ + d.u8 = __builtin_shufflevector(a.u8, b.u8, 0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23) +#endif +#ifndef gvzip_u16 +#define gvzip_u16(d, a, b) \ + d.u16 = __builtin_shufflevector(a.u16, b.u16, 0,8,1,9,2,10,3,11) +#endif +#ifndef gvzipq_u16 +#define gvzipq_u16(d0, d1, s0, s1) { \ + gvu16 t_ = __builtin_shufflevector(s0.u16, s1.u16, 0, 8, 1, 9, 2, 10, 3, 11); \ + d1.u16 = __builtin_shufflevector(s0.u16, s1.u16, 4,12, 5,13, 6, 14, 7, 15); \ + d0.u16 = t_; \ +} +#endif + +#else // !__has_builtin(__builtin_shufflevector) + +#ifndef gvld2q_u8 +#define gvld2q_u8(v0, v1, p) { \ + gvu8 v0_ = ((gvu8_ua *)(p))[0]; \ + gvu8 v1_ = ((gvu8_ua *)(p))[1]; \ + v0.u8 = __builtin_shuffle(v0_, v1_, (gvu8){0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30}); \ + v1.u8 = __builtin_shuffle(v0_, v1_, (gvu8){1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31}); \ +} +#endif +#ifndef gvmovn_u16 +#define gvmovn_u16(d, s) \ + d.u8 = __builtin_shuffle(s.u8, (gvu8){0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14}) +#endif +#ifndef gvmovn_u32 +#define gvmovn_u32(d, s) \ + d.u16 = __builtin_shuffle(s.u16, (gvu16){0,2,4,6,0,2,4,6}) +#endif +#ifndef gvmovn_top_u32 +#define gvmovn_top_u32(d, s) \ + d.u16 = __builtin_shuffle(s.u16, (gvu16){1,3,5,7,1,3,5,7}) +#endif +#ifndef gvtbl2_u8 +#define gvtbl2_u8(d, a, b) d.u8 = __builtin_shuffle(a.u8, b.u8) +#endif +#ifndef gvzip_u8 +#define gvzip_u8(d, a, b) \ + d.u8 = __builtin_shuffle(a.u8, b.u8, (gvu8){0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23}) +#endif +#ifndef gvzip_u16 +#define gvzip_u16(d, a, b) \ + d.u16 = __builtin_shuffle(a.u16, b.u16, (gvu16){0,8,1,9,2,10,3,11}) +#endif +#ifndef gvzipq_u16 +#define gvzipq_u16(d0, d1, s0, s1) { \ + gvu16 t_ = __builtin_shuffle(s0.u16, s1.u16, (gvu16){0, 8, 1, 9, 2, 10, 3, 11}); \ + d1.u16 = __builtin_shuffle(s0.u16, s1.u16, (gvu16){4,12, 5,13, 6, 14, 7, 15}); \ + d0.u16 = t_; \ +} +#endif + +#endif // __builtin_shufflevector || __builtin_shuffle + +#ifndef gvtbl2_u8 +#define gvtbl2_u8(d, a, b) { \ + int i_; \ + for (i_ = 0; i_ < 16; i_++) \ + d.u8[i_] = a.u8[b.u8[i_]]; \ +} +#endif + +#endif // fallbacks #if defined(__arm__) @@ -498,7 +910,6 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, gvreg uvrg_base; gvshll_n_u16(uvrg_base, gvlo(uvrg_xxxx0), 16); // uvrg_base = uvrg0 << 16 - gvdupq_n_s64(r_shift, shift); // r_shift = { shift, shift } gvaddq_u32(uvrg_base, uvrg_base, uvrgb_phase); gvabsq_s32(ga_uvrg_x, ga_uvrg_x); // ga_uvrg_x = abs(ga_uvrg_x) @@ -511,10 +922,25 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, gvmull_l_u32(gw_rg_y, gvhi(ga_uvrg_y), d0, 0); gvmull_l_u32(ga_uvrg_y, gvlo(ga_uvrg_y), d0, 0); +#if defined(__ARM_NEON) || defined(__ARM_NEON__) + gvdupq_n_s64(r_shift, shift); // r_shift = { shift, shift } gvshlq_u64(gw_rg_x, gw_rg_x, r_shift); gvshlq_u64(ga_uvrg_x, ga_uvrg_x, r_shift); gvshlq_u64(gw_rg_y, gw_rg_y, r_shift); gvshlq_u64(ga_uvrg_y, ga_uvrg_y, r_shift); +#elif defined(__SSE2__) + r_shift.m = _mm_cvtsi32_si128(-shift); + gw_rg_x.m = _mm_srl_epi64(gw_rg_x.m, r_shift.m); + ga_uvrg_x.m = _mm_srl_epi64(ga_uvrg_x.m, r_shift.m); + gw_rg_y.m = _mm_srl_epi64(gw_rg_y.m, r_shift.m); + ga_uvrg_y.m = _mm_srl_epi64(ga_uvrg_y.m, r_shift.m); +#else + gvdupq_n_s64(r_shift, -shift); // r_shift = { shift, shift } + gvshrq_u64(gw_rg_x, gw_rg_x, r_shift); + gvshrq_u64(ga_uvrg_x, ga_uvrg_x, r_shift); + gvshrq_u64(gw_rg_y, gw_rg_y, r_shift); + gvshrq_u64(ga_uvrg_y, ga_uvrg_y, r_shift); +#endif gveorq(gs_uvrg_x, gs_uvrg_x, w_mask); gvmovn_u64(tmp_lo, ga_uvrg_x); @@ -621,7 +1047,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, s32 clip; \ vec_4x32u v_clip; \ \ - union { vec_2x64s full; vec_1x64s h[2]; } edges_xy; \ + vec_2x64s edges_xy; \ vec_2x32s edges_dx_dy; \ vec_2x32u edge_shifts; \ \ @@ -676,6 +1102,14 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, gvdup_n_u16(c_0x07, 0x07); \ gvdup_n_u16(c_0xFFFE, 0xFFFE); \ +#if defined(__ARM_NEON) || defined(__ARM_NEON__) +// better encoding, remaining bits are unused anyway +#define mask_edge_shifts(edge_shifts) \ + gvbic_n_u16(edge_shifts, 0xE0) +#else +#define mask_edge_shifts(edge_shifts) \ + gvand_n_u32(edge_shifts, 0x1F) +#endif #define compute_edge_delta_x2() \ { \ @@ -694,9 +1128,9 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, gvshr_n_u32(height_reciprocals, edge_shifts, 10); \ \ gvmla_s32(heights_b, x_starts, heights); \ - gvbic_n_u16(edge_shifts, 0xE0); \ + mask_edge_shifts(edge_shifts); \ gvmul_s32(edges_dx_dy, widths, height_reciprocals); \ - gvmull_s32(edges_xy.full, heights_b, height_reciprocals); \ + gvmull_s32(edges_xy, heights_b, height_reciprocals); \ } \ #define compute_edge_delta_x3(start_c, height_a, height_b) \ @@ -721,7 +1155,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, gvshr_n_u32(height_reciprocals, edge_shifts, 10); \ height_reciprocal_alt = edge_shift_alt >> 10; \ \ - gvbic_n_u16(edge_shifts, 0xE0); \ + mask_edge_shifts(edge_shifts); \ edge_shift_alt &= 0x1F; \ \ gvsub_u32(heights_b, heights, c_0x01); \ @@ -730,7 +1164,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, gvmla_s32(heights_b, x_starts, heights); \ height_b_alt += height_minor_b * start_c; \ \ - gvmull_s32(edges_xy.full, heights_b, height_reciprocals); \ + gvmull_s32(edges_xy, heights_b, height_reciprocals); \ edge_alt = (s64)height_b_alt * height_reciprocal_alt; \ \ gvmul_s32(edges_dx_dy, widths, height_reciprocals); \ @@ -769,29 +1203,29 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, #define setup_spans_clip(direction, alternate_active) \ { \ gvdupq_n_u32(v_clip, clip); \ - gvmlal_s32(edges_xy.full, edges_dx_dy, gvlo(v_clip)); \ + gvmlal_s32(edges_xy, edges_dx_dy, gvlo(v_clip)); \ setup_spans_clip_alternate_##alternate_active(); \ setup_spans_clip_interpolants_##direction(); \ } \ -#define setup_spans_adjust_edges_alternate_no(left_index, right_index) \ +#define setup_spans_adjust_edges_alternate_no(left_half, right_half) \ { \ vec_2x64s edge_shifts_64; \ - union { vec_2x64s full; vec_1x64s h[2]; } edges_dx_dy_64; \ + vec_2x64s edges_dx_dy_64; \ vec_1x64s left_x_hi, right_x_hi; \ \ gvmovl_s32(edge_shifts_64, edge_shifts); \ - gvshlq_s64(edges_xy.full, edges_xy.full, edge_shifts_64); \ + gvshlq_u64(edges_xy, edges_xy, edge_shifts_64); \ \ - gvmovl_s32(edges_dx_dy_64.full, edges_dx_dy); \ - gvshlq_s64(edges_dx_dy_64.full, edges_dx_dy_64.full, edge_shifts_64); \ + gvmovl_s32(edges_dx_dy_64, edges_dx_dy); \ + gvshlq_u64(edges_dx_dy_64, edges_dx_dy_64, edge_shifts_64); \ \ - gvdupq_l_s64(left_x, edges_xy.h[left_index], 0); \ - gvdupq_l_s64(right_x, edges_xy.h[right_index], 0); \ + gvdupq_l_s64(left_x, gv##left_half(edges_xy), 0); \ + gvdupq_l_s64(right_x, gv##right_half(edges_xy), 0); \ \ - gvdupq_l_s64(left_dx_dy, edges_dx_dy_64.h[left_index], 0); \ - gvdupq_l_s64(right_dx_dy, edges_dx_dy_64.h[right_index], 0); \ + gvdupq_l_s64(left_dx_dy, gv##left_half(edges_dx_dy_64), 0); \ + gvdupq_l_s64(right_dx_dy, gv##right_half(edges_dx_dy_64), 0); \ \ gvadd_s64(left_x_hi, gvlo(left_x), gvlo(left_dx_dy)); \ gvadd_s64(right_x_hi, gvlo(right_x), gvlo(right_dx_dy)); \ @@ -803,9 +1237,9 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, gvaddq_s64(right_dx_dy, right_dx_dy, right_dx_dy); \ } \ -#define setup_spans_adjust_edges_alternate_yes(left_index, right_index) \ +#define setup_spans_adjust_edges_alternate_yes(left_half, right_half) \ { \ - setup_spans_adjust_edges_alternate_no(left_index, right_index); \ + setup_spans_adjust_edges_alternate_no(left_half, right_half); \ s64 edge_dx_dy_alt_64; \ vec_1x64s alternate_x_hi; \ \ @@ -845,9 +1279,9 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, #define setup_spans_increment_alternate_yes() \ { \ vec_2x32s alternate_x_32_lo, alternate_x_32_hi; \ - gvshrn_n_s64(alternate_x_32_lo, alternate_x, 32); \ + gvmovn_top_u64(alternate_x_32_lo, alternate_x); \ gvaddq_s64(alternate_x, alternate_x, alternate_dx_dy); \ - gvshrn_n_s64(alternate_x_32_hi, alternate_x, 32); \ + gvmovn_top_u64(alternate_x_32_hi, alternate_x); \ gvaddq_s64(alternate_x, alternate_x, alternate_dx_dy); \ gvcombine_u32(alternate_x_32, alternate_x_32_lo, alternate_x_32_hi); \ gvmovn_u32(alternate_x_16, alternate_x_32); \ @@ -855,6 +1289,18 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, #define setup_spans_increment_alternate_no() \ +#if defined(__SSE2__) && !(defined(__AVX512BW__) && defined(__AVX512VL__)) +#define setup_spans_make_span_shift(span_shift) { \ + gvreg tab1_ = { .u8 = { 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00 } }; \ + gvtbl2_u8(span_shift, tab1_, span_shift); \ + gvorr_n_u16(span_shift, 0xff00); \ + (void)c_0xFFFE; \ +} +#else +#define setup_spans_make_span_shift(span_shift) \ + gvshl_u16(span_shift, c_0xFFFE, span_shift) +#endif + #define setup_spans_set_x4(alternate, direction, alternate_active) \ { \ gvst1q_pi_u32(uvrg, span_uvrg_offset); \ @@ -873,14 +1319,14 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, *span_b_offset++ = b; \ setup_spans_adjust_interpolants_##direction(); \ \ - gvshrn_n_s64(left_x_32_lo, left_x, 32); \ - gvshrn_n_s64(right_x_32_lo, right_x, 32); \ + gvmovn_top_u64(left_x_32_lo, left_x); \ + gvmovn_top_u64(right_x_32_lo, right_x); \ \ gvaddq_s64(left_x, left_x, left_dx_dy); \ gvaddq_s64(right_x, right_x, right_dx_dy); \ \ - gvshrn_n_s64(left_x_32_hi, left_x, 32); \ - gvshrn_n_s64(right_x_32_hi, right_x, 32); \ + gvmovn_top_u64(left_x_32_hi, left_x); \ + gvmovn_top_u64(right_x_32_hi, right_x); \ \ gvaddq_s64(left_x, left_x, left_dx_dy); \ gvaddq_s64(right_x, right_x, right_dx_dy); \ @@ -903,7 +1349,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, gvsub_u16(left_right_x_16_hi, left_right_x_16_hi, left_right_x_16_lo); \ gvadd_u16(left_right_x_16_hi, left_right_x_16_hi, c_0x07); \ gvand(span_shift, left_right_x_16_hi, c_0x07); \ - gvshl_u16(span_shift, c_0xFFFE, span_shift); \ + setup_spans_make_span_shift(span_shift); \ gvshr_n_u16(left_right_x_16_hi, left_right_x_16_hi, 3); \ \ gvst4_pi_u16(left_right_x_16_lo, left_right_x_16_hi, span_shift, y_x4, \ @@ -919,7 +1365,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, #define setup_spans_alternate_adjust_no() \ -#define setup_spans_down(left_index, right_index, alternate, alternate_active) \ +#define setup_spans_down(left_half, right_half, alternate, alternate_active) \ setup_spans_alternate_adjust_##alternate_active(); \ if(y_c > psx_gpu->viewport_end_y) \ height -= y_c - psx_gpu->viewport_end_y - 1; \ @@ -939,8 +1385,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, u64 y_x4_ = ((u64)(y_a + 3) << 48) | ((u64)(u16)(y_a + 2) << 32) \ | (u32)((y_a + 1) << 16) | (u16)y_a; \ gvcreate_u64(y_x4, y_x4_); \ - setup_spans_adjust_edges_alternate_##alternate_active(left_index, \ - right_index); \ + setup_spans_adjust_edges_alternate_##alternate_active(left_half, right_half); \ \ psx_gpu->num_spans = height; \ do \ @@ -962,7 +1407,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, #define setup_spans_up_decrement_height_no() \ {} \ -#define setup_spans_up(left_index, right_index, alternate, alternate_active) \ +#define setup_spans_up(left_half, right_half, alternate, alternate_active) \ setup_spans_alternate_adjust_##alternate_active(); \ y_a--; \ \ @@ -986,10 +1431,9 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, u64 y_x4_ = ((u64)(y_a - 3) << 48) | ((u64)(u16)(y_a - 2) << 32) \ | (u32)((y_a - 1) << 16) | (u16)y_a; \ gvcreate_u64(y_x4, y_x4_); \ - gvaddw_s32(edges_xy.full, edges_xy.full, edges_dx_dy); \ + gvaddw_s32(edges_xy, edges_xy, edges_dx_dy); \ setup_spans_alternate_pre_increment_##alternate_active(); \ - setup_spans_adjust_edges_alternate_##alternate_active(left_index, \ - right_index); \ + setup_spans_adjust_edges_alternate_##alternate_active(left_half, right_half); \ setup_spans_adjust_interpolants_up(); \ \ psx_gpu->num_spans = height; \ @@ -1000,8 +1444,8 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, } \ } \ -#define index_left 0 -#define index_right 1 +#define half_left lo +#define half_right hi #define setup_spans_up_up(minor, major) \ setup_spans_prologue(yes); \ @@ -1013,7 +1457,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, gvcreate_u32(x_ends, x_c, x_b); \ \ compute_edge_delta_x3(x_b, height, height_minor_a); \ - setup_spans_up(index_##major, index_##minor, minor, yes) \ + setup_spans_up(half_##major, half_##minor, minor, yes) \ void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, vertex_struct *v_b, vertex_struct *v_c) @@ -1045,7 +1489,7 @@ void setup_spans_up_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a, gvcreate_u32(x_ends, x_c, x_b); \ \ compute_edge_delta_x3(x_b, height, height_minor_a); \ - setup_spans_down(index_##major, index_##minor, minor, yes) \ + setup_spans_down(half_##major, half_##minor, minor, yes) \ void setup_spans_down_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, vertex_struct *v_b, vertex_struct *v_c) @@ -1071,7 +1515,7 @@ void setup_spans_down_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a, s32 height = y_a - y_c; \ \ compute_edge_delta_x2(); \ - setup_spans_up(index_left, index_right, none, no) \ + setup_spans_up(half_left, half_right, none, no) \ void setup_spans_up_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a, vertex_struct *v_b, vertex_struct *v_c) @@ -1107,7 +1551,7 @@ void setup_spans_up_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a, s32 height = y_c - y_a; \ \ compute_edge_delta_x2(); \ - setup_spans_down(index_left, index_right, none, no) \ + setup_spans_down(half_left, half_right, none, no) \ void setup_spans_down_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a, vertex_struct *v_b, vertex_struct *v_c) @@ -1172,10 +1616,10 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, gvcreate_s32(height_increment, 0, height_minor_b); - gvmlal_s32(edges_xy.full, edges_dx_dy, height_increment); + gvmlal_s32(edges_xy, edges_dx_dy, height_increment); gvcreate_s64(edges_xy_b_left, edge_alt); - gvcombine_s64(edges_xy_b, edges_xy_b_left, gvhi(edges_xy.full)); + gvcombine_s64(edges_xy_b, edges_xy_b_left, gvhi(edges_xy)); edge_shifts_b = edge_shifts; gvmov_l_u32(edge_shifts_b, edge_shift_alt, 0); @@ -1203,8 +1647,8 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, u64 y_x4_ = ((u64)(y_a - 3) << 48) | ((u64)(u16)(y_a - 2) << 32) | (u32)((y_a - 1) << 16) | (u16)y_a; gvcreate_u64(y_x4, y_x4_); - gvaddw_s32(edges_xy.full, edges_xy.full, edges_dx_dy); - setup_spans_adjust_edges_alternate_no(index_left, index_right); + gvaddw_s32(edges_xy, edges_xy, edges_dx_dy); + setup_spans_adjust_edges_alternate_no(lo, hi); setup_spans_adjust_interpolants_up(); psx_gpu->num_spans = height_minor_a; @@ -1219,7 +1663,7 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, span_b_offset += height_minor_a; } - edges_xy.full = edges_xy_b; + edges_xy = edges_xy_b; edges_dx_dy = edges_dx_dy_b; edge_shifts = edge_shifts_b; @@ -1244,7 +1688,7 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, u64 y_x4_ = ((u64)(y_a + 3) << 48) | ((u64)(u16)(y_a + 2) << 32) | (u32)((y_a + 1) << 16) | (u16)y_a; gvcreate_u64(y_x4, y_x4_); - setup_spans_adjust_edges_alternate_no(index_left, index_right); + setup_spans_adjust_edges_alternate_no(lo, hi); // FIXME: overflow corner case if(psx_gpu->num_spans + height_minor_b == MAX_SPANS) @@ -1289,7 +1733,7 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, gvld1q_u32(uvrg_dx, psx_gpu->uvrg_dx.e); \ gvshlq_n_u32(uvrg_dx4, uvrg_dx, 2); \ gvshlq_n_u32(uvrg_dx8, uvrg_dx, 3); \ - gvld2_dup(texture_mask_lo, texture_mask_hi, &psx_gpu->texture_mask_width); \ + gvld2_u8_dup(texture_mask_lo, texture_mask_hi, &psx_gpu->texture_mask_width); \ gvcombine_u16(texture_mask, texture_mask_lo, texture_mask_hi) \ #define setup_blocks_variables_shaded_untextured(target) \ @@ -1311,7 +1755,7 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, gvdup_n_u8(d64_0x07, 0x07); \ gvdup_n_u8(d64_1, 1); \ gvdup_n_u8(d64_4, 4); \ - gvdup_n_u8(d64_128, 128); \ + gvdup_n_u8(d64_128, 128u); \ \ gvld1_u32(rgb_dx_lo, &psx_gpu->uvrg_dx.e[2]); \ gvcreate_u32(rgb_dx_hi, psx_gpu->b_block_span.e[1], 0); \ @@ -1333,7 +1777,7 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, gvld1_u32(uv, psx_gpu->uvrg.e); \ gvshl_n_u32(uv_dx4, uv_dx, 2); \ gvshl_n_u32(uv_dx8, uv_dx, 3); \ - gvld2_dup(texture_mask_lo, texture_mask_hi, &psx_gpu->texture_mask_width); \ + gvld2_u8_dup(texture_mask_lo, texture_mask_hi, &psx_gpu->texture_mask_width); \ gvcombine_u16(texture_mask, texture_mask_lo, texture_mask_hi) \ #define setup_blocks_variables_unshaded_untextured_direct() \ @@ -1719,7 +2163,7 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, \ u32 num_spans = psx_gpu->num_spans; \ \ - u16 *fb_ptr; \ + u16 * __restrict__ fb_ptr; \ u32 y; \ \ u32 num_blocks = psx_gpu->num_blocks; \ @@ -2096,11 +2540,12 @@ void texture_blocks_16bpp(psx_gpu_struct *psx_gpu) #define shade_blocks_store_direct(_draw_mask, _pixels) \ { \ + u16 * __restrict__ fb_ptr = block->fb_ptr; \ vec_8x16u fb_pixels; \ + gvld1q_u16(fb_pixels, fb_ptr); \ gvorrq(_pixels, _pixels, msb_mask); \ - gvld1q_u16(fb_pixels, block->fb_ptr); \ gvbifq(fb_pixels, _pixels, _draw_mask); \ - gvst1q_u16(fb_pixels, block->fb_ptr); \ + gvst1q_u16(fb_pixels, fb_ptr); \ } \ #define shade_blocks_textured_false_modulated_check_dithered(target) \ @@ -2140,7 +2585,7 @@ void texture_blocks_16bpp(psx_gpu_struct *psx_gpu) gvmull_u8(pixels_##component, texels_##component, colors_##component) \ #define shade_blocks_textured_modulated_do(shading, dithering, target) \ - block_struct *block = psx_gpu->blocks; \ + const block_struct * __restrict__ block = psx_gpu->blocks; \ u32 num_blocks = psx_gpu->num_blocks; \ vec_8x16u texels; \ \ @@ -2182,7 +2627,7 @@ void texture_blocks_16bpp(psx_gpu_struct *psx_gpu) gvdup_n_u8(d64_0x1F, 0x1F); \ gvdup_n_u8(d64_1, 1); \ gvdup_n_u8(d64_4, 4); \ - gvdup_n_u8(d64_128, 128); \ + gvdup_n_u8(d64_128, 128u); \ \ gvdupq_n_u16(d128_0x8000, 0x8000); \ \ From af486d6e6500b327e3fe34848848f6331ef777b2 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 15 Sep 2022 19:11:11 +0300 Subject: [PATCH 212/597] frontend: handle double res rendering, enable on x86_64 --- configure | 6 ++++++ frontend/plat_sdl.c | 5 +++-- frontend/plugin_lib.c | 5 +++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/configure b/configure index 8ef59cedb..2352ec6aa 100755 --- a/configure +++ b/configure @@ -288,6 +288,12 @@ aarch64) builtin_gpu="neon" fi ;; +x86_64) + enable_dynarec="no" + if [ "x$builtin_gpu" = "x" ]; then + builtin_gpu="neon" + fi + ;; *) # dynarec only available on ARM enable_dynarec="no" diff --git a/frontend/plat_sdl.c b/frontend/plat_sdl.c index 5e11cf843..5f29b90c8 100644 --- a/frontend/plat_sdl.c +++ b/frontend/plat_sdl.c @@ -147,8 +147,9 @@ void plat_init(void) SDL_WM_SetCaption("PCSX-ReARMed " REV, NULL); shadow_size = g_menuscreen_w * g_menuscreen_h * 2; - if (shadow_size < 640 * 512 * 2) - shadow_size = 640 * 512 * 2; + // alloc enough for double res. rendering + if (shadow_size < 1024 * 512 * 2) + shadow_size = 1024 * 512 * 2; shadow_fb = malloc(shadow_size); menubg_img = malloc(shadow_size); diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index cc3576bfb..588f13370 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -308,6 +308,7 @@ static void pl_vout_flip(const void *vram, int stride, int bgr24, int w, int h) unsigned char *dest = pl_vout_buf; const unsigned short *src = vram; int dstride = pl_vout_w, h1 = h; + int h_full = pl_vout_h - pl_vout_yoffset; int doffs; pcnt_start(PCNT_BLIT); @@ -318,7 +319,7 @@ static void pl_vout_flip(const void *vram, int stride, int bgr24, int w, int h) pl_plat_clear(); else memset(pl_vout_buf, 0, - dstride * pl_vout_h * pl_vout_bpp / 8); + dstride * h_full * pl_vout_bpp / 8); goto out_hud; } @@ -334,7 +335,7 @@ static void pl_vout_flip(const void *vram, int stride, int bgr24, int w, int h) pl_plat_clear(); else memset(pl_vout_buf, 0, - dstride * pl_vout_h * pl_vout_bpp / 8); + dstride * h_full * pl_vout_bpp / 8); clear_counter--; } From 415213c97e0e35ea392114c71d8a10371c64d6fd Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 15 Sep 2022 21:12:10 +0300 Subject: [PATCH 213/597] misc: patch some issues indicated by clang --- libpcsxcore/cdriso.c | 2 +- libpcsxcore/cdrom.c | 3 ++- libpcsxcore/misc.c | 2 +- plugins/dfxvideo/gpulib_if.c | 4 ++++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index be7300f5a..246b6a062 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -1547,7 +1547,7 @@ static boolean CALLBACK ISOreadTrack(unsigned char *time) { fseek(subHandle, sector * SUB_FRAMESIZE, SEEK_SET); if (fread(subbuffer, 1, SUB_FRAMESIZE, subHandle) != SUB_FRAMESIZE) /* Faulty subchannel data shouldn't cause a read failure */ - return 0; + return 1; if (subChanRaw) DecodeRawSubData(); } diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 3d3807857..487dfb8d2 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -545,7 +545,8 @@ static void cdrPlayInterrupt_Autopause() static int cdrSeekTime(unsigned char *target) { - int seekTime = abs(msf2sec(cdr.SetSectorPlay) - msf2sec(target)) * (cdReadTime / 200); + int diff = msf2sec(cdr.SetSectorPlay) - msf2sec(target); + int seekTime = abs(diff) * (cdReadTime / 200); /* * Gameblabla : * It was originally set to 1000000 for Driver, however it is not high enough for Worms Pinball diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index c06a8a4fa..b3dfdf533 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -90,7 +90,7 @@ static void mmssdd( char *b, char *p ) time[0] = itob(time[0]); time[1] = itob(time[1]); time[2] = itob(time[2]); #define READTRACK() \ - if (CDR_readTrack(time) == -1) return -1; \ + if (!CDR_readTrack(time)) return -1; \ buf = (void *)CDR_getBuffer(); \ if (buf == NULL) return -1; \ else CheckPPFCache((u8 *)buf, time[0], time[1], time[2]); diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index 86cfd2684..245d2274b 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -18,6 +18,10 @@ #include "../gpulib/gpu.h" #include "../../include/arm_features.h" +#if defined(__GNUC__) && (__GNUC__ >= 6 || (defined(__clang_major__) && __clang_major__ >= 10)) +#pragma GCC diagnostic ignored "-Wmisleading-indentation" +#endif + #define u32 uint32_t #define INFO_TW 0 From 77e1e47949d469acab865d38fe7493a4a295139e Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 16 Sep 2022 00:03:40 +0300 Subject: [PATCH 214/597] don't cast between long and pointers for win64 long is 32 bit there --- frontend/main.c | 6 +++--- libpcsxcore/psxmem.c | 8 ++++---- plugins/dfsound/freeze.c | 10 +++++----- plugins/gpu_neon/psx_gpu/psx_gpu.c | 3 ++- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/frontend/main.c b/frontend/main.c index 671068d3d..60ec51cdc 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -989,7 +989,7 @@ void *SysLoadLibrary(const char *lib) { tmp++; for (i = 0; i < ARRAY_SIZE(builtin_plugins); i++) if (strcmp(tmp, builtin_plugins[i]) == 0) - return (void *)(long)(PLUGIN_DL_BASE + builtin_plugin_ids[i]); + return (void *)(uintptr_t)(PLUGIN_DL_BASE + builtin_plugin_ids[i]); } #ifndef _WIN32 @@ -1004,7 +1004,7 @@ void *SysLoadLibrary(const char *lib) { } void *SysLoadSym(void *lib, const char *sym) { - unsigned int plugid = (unsigned int)(long)lib; + unsigned int plugid = (unsigned int)(uintptr_t)lib; if (PLUGIN_DL_BASE <= plugid && plugid < PLUGIN_DL_BASE + ARRAY_SIZE(builtin_plugins)) return plugin_link(plugid - PLUGIN_DL_BASE, sym); @@ -1025,7 +1025,7 @@ const char *SysLibError() { } void SysCloseLibrary(void *lib) { - unsigned int plugid = (unsigned int)(long)lib; + unsigned int plugid = (unsigned int)(uintptr_t)lib; if (PLUGIN_DL_BASE <= plugid && plugid < PLUGIN_DL_BASE + ARRAY_SIZE(builtin_plugins)) return; diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 46cee0cab..2a9633348 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -59,13 +59,13 @@ void *psxMap(unsigned long addr, size_t size, int is_fixed, /* if (is_fixed) flags |= MAP_FIXED; */ - req = (void *)addr; + req = (void *)(uintptr_t)addr; ret = mmap(req, size, PROT_READ | PROT_WRITE, flags, -1, 0); if (ret == MAP_FAILED) return NULL; } - if (addr != 0 && ret != (void *)addr) { + if (addr != 0 && ret != (void *)(uintptr_t)addr) { SysMessage("psxMap: warning: wanted to map @%08x, got %p\n", addr, ret); @@ -74,14 +74,14 @@ void *psxMap(unsigned long addr, size_t size, int is_fixed, return NULL; } - if (((addr ^ (unsigned long)ret) & ~0xff000000l) && try_ < 2) + if (((addr ^ (unsigned long)(uintptr_t)ret) & ~0xff000000l) && try_ < 2) { psxUnmap(ret, size, tag); // try to use similarly aligned memory instead // (recompiler needs this) mask = try_ ? 0xffff : 0xffffff; - addr = ((unsigned long)ret + mask) & ~mask; + addr = ((uintptr_t)ret + mask) & ~mask; try_++; goto retry; } diff --git a/plugins/dfsound/freeze.c b/plugins/dfsound/freeze.c index a400cf7cb..51e9fd775 100644 --- a/plugins/dfsound/freeze.c +++ b/plugins/dfsound/freeze.c @@ -187,8 +187,8 @@ static void load_channel(SPUCHAN *d, const SPUCHAN_orig *s, int ch) d->sinc = s->sinc; d->sinc_inv = 0; memcpy(spu.SB + ch * SB_SIZE, s->SB, sizeof(spu.SB[0]) * SB_SIZE); - d->pCurr = (void *)((long)s->iCurr & 0x7fff0); - d->pLoop = (void *)((long)s->iLoop & 0x7fff0); + d->pCurr = (void *)((uintptr_t)s->iCurr & 0x7fff0); + d->pLoop = (void *)((uintptr_t)s->iLoop & 0x7fff0); d->bReverb = s->bReverb; d->iLeftVolume = s->iLeftVolume; d->iRightVolume = s->iRightVolume; @@ -258,7 +258,7 @@ long CALLBACK SPUfreeze(uint32_t ulFreezeMode, SPUFreeze_t * pF, pFO=(SPUOSSFreeze_t *)(pF+1); // store special stuff pFO->spuIrq = spu.regArea[(H_SPUirqAddr - 0x0c00) / 2]; - if(spu.pSpuIrq) pFO->pSpuIrq = (unsigned long)spu.pSpuIrq-(unsigned long)spu.spuMemC; + if(spu.pSpuIrq) pFO->pSpuIrq = spu.pSpuIrq - spu.spuMemC; pFO->spuAddr=spu.spuAddr; if(pFO->spuAddr==0) pFO->spuAddr=0xbaadf00d; @@ -340,8 +340,8 @@ void LoadStateV5(SPUFreeze_t * pF) { load_channel(&spu.s_chan[i],&pFO->s_chan[i],i); - spu.s_chan[i].pCurr+=(unsigned long)spu.spuMemC; - spu.s_chan[i].pLoop+=(unsigned long)spu.spuMemC; + spu.s_chan[i].pCurr+=(uintptr_t)spu.spuMemC; + spu.s_chan[i].pLoop+=(uintptr_t)spu.spuMemC; } } diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 51ad152dd..85cf89faa 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -14,6 +14,7 @@ #include #include +#include #include #include "common.h" @@ -4016,7 +4017,7 @@ void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, num_width = width; vram_ptr = (void *)vram_ptr16; - if((long)vram_ptr16 & 2) + if((uintptr_t)vram_ptr16 & 2) { *vram_ptr16 = color_32bpp; vram_ptr = (void *)(vram_ptr16 + 1); From b1eb84bc59c07b110e36714bf0222197f298ec09 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 16 Sep 2022 02:31:37 +0300 Subject: [PATCH 215/597] cdrom: adjust timing --- libpcsxcore/cdrom.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 487dfb8d2..3f4057954 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -295,8 +295,8 @@ static void setIrq(int log_cmd) if (cdr.Stat) { int i; - SysPrintf("CDR IRQ=%d cmd %02x stat %02x: ", - !!(cdr.Stat & cdr.Reg2), log_cmd, cdr.Stat); + SysPrintf("%u cdrom: CDR IRQ=%d cmd %02x stat %02x: ", + psxRegs.cycle, !!(cdr.Stat & cdr.Reg2), log_cmd, cdr.Stat); for (i = 0; i < cdr.ResultC; i++) SysPrintf("%02x ", cdr.Result[i]); SysPrintf("\n"); @@ -651,7 +651,8 @@ void cdrInterrupt(void) { int i; if (cdr.Stat) { - CDR_LOG_I("cdrom: cmd %02x with irqstat %x\n", cdr.CmdInProgress, cdr.Stat); + CDR_LOG_I("%u cdrom: cmd %02x with irqstat %x\n", + psxRegs.cycle, cdr.CmdInProgress, cdr.Stat); return; } if (cdr.Irq1Pending) { @@ -1118,7 +1119,8 @@ void cdrInterrupt(void) { } else if (cdr.Cmd && cdr.Cmd != (Cmd & 0xff)) { cdr.CmdInProgress = cdr.Cmd; - CDR_LOG_I("cdrom: cmd %02x came before %02x finished\n", cdr.Cmd, Cmd); + CDR_LOG_I("%u cdrom: cmd %02x came before %02x finished\n", + psxRegs.cycle, cdr.Cmd, Cmd); } setIrq(Cmd); @@ -1344,7 +1346,7 @@ void cdrWrite1(unsigned char rt) { } #ifdef CDR_LOG_CMD_IRQ - SysPrintf("CD1 write: %x (%s)", rt, CmdName[rt]); + SysPrintf("%u cdrom: CD1 write: %x (%s)", psxRegs.cycle, rt, CmdName[rt]); if (cdr.ParamC) { int i; SysPrintf(" Param[%d] = {", cdr.ParamC); @@ -1365,8 +1367,8 @@ void cdrWrite1(unsigned char rt) { CDR_INT(5000); } else { - CDR_LOG_I("cdr: cmd while busy: %02x, prev %02x, busy %02x\n", - rt, cdr.Cmd, cdr.CmdInProgress); + CDR_LOG_I("%u cdrom: cmd while busy: %02x, prev %02x, busy %02x\n", + psxRegs.cycle, rt, cdr.Cmd, cdr.CmdInProgress); if (cdr.CmdInProgress < 0x100) // no pending 2nd response cdr.CmdInProgress = rt; } @@ -1428,11 +1430,13 @@ void cdrWrite3(unsigned char rt) { case 1: if (cdr.Stat & rt) { #ifdef CDR_LOG_CMD_IRQ - SysPrintf("ack %02x (w %02x)\n", cdr.Stat & rt, rt); + SysPrintf("%u cdrom: ack %02x (w %02x)\n", + psxRegs.cycle, cdr.Stat & rt, rt); #endif + // note: Croc vs Discworld Noir if (!(psxRegs.interrupt & (1 << PSXINT_CDR)) && (cdr.CmdInProgress || cdr.Irq1Pending)) - CDR_INT(2000); // 710+ + CDR_INT(850); // 711-993 } cdr.Stat &= ~rt; From 4ebb76b301b3cffd1639a06146e8875397cce197 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 16 Sep 2022 02:34:58 +0300 Subject: [PATCH 216/597] gpu_neon: rm wrong cost qualifier --- plugins/gpu_neon/psx_gpu/psx_gpu_simd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c index 86d1cf1e5..486897f71 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c @@ -2585,7 +2585,7 @@ void texture_blocks_16bpp(psx_gpu_struct *psx_gpu) gvmull_u8(pixels_##component, texels_##component, colors_##component) \ #define shade_blocks_textured_modulated_do(shading, dithering, target) \ - const block_struct * __restrict__ block = psx_gpu->blocks; \ + block_struct * __restrict__ block = psx_gpu->blocks; \ u32 num_blocks = psx_gpu->num_blocks; \ vec_8x16u texels; \ \ From 1f1128d05bddab7552b94a8963c050c75decc384 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 16 Sep 2022 22:59:25 +0300 Subject: [PATCH 217/597] main: log missing CPU features will probably be too late but ohwell --- frontend/main.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/frontend/main.c b/frontend/main.c index 60ec51cdc..144ce490a 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -29,6 +29,14 @@ #include "arm_features.h" #include "revision.h" +#if defined(__has_builtin) +#define DO_CPU_CHECKS __has_builtin(__builtin_cpu_init) +#elif defined(__x86_64__) || defined(__i386__) +#define DO_CPU_CHECKS 1 +#else +#define DO_CPU_CHECKS 0 +#endif + #ifndef NO_FRONTEND #include "libpicofe/input.h" #include "libpicofe/plat.h" @@ -405,6 +413,24 @@ void emu_on_new_cd(int show_hud_msg) } } +static void log_wrong_cpu(void) +{ +#if DO_CPU_CHECKS + __builtin_cpu_init(); + #define CHECK_CPU(name) if (!__builtin_cpu_supports(name)) \ + SysPrintf("ERROR: compiled for " name ", which is unsupported by the CPU\n") +#ifdef __SSE2__ + CHECK_CPU("sse2"); +#endif +#ifdef __SSSE3__ + CHECK_CPU("ssse3"); +#endif +#ifdef __SSE4_1__ + CHECK_CPU("sse4.1"); +#endif +#endif // DO_CPU_CHECKS +} + int emu_core_preinit(void) { // what is the name of the config file? @@ -419,6 +445,8 @@ int emu_core_preinit(void) #endif emuLog = stdout; + log_wrong_cpu(); + SetIsoFile(NULL); memset(&Config, 0, sizeof(Config)); From 8aeb66dcc917d0271e4574799b1793da7f3426c4 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 17 Sep 2022 22:46:03 +0300 Subject: [PATCH 218/597] cdrom: actually reject commands when not ready before it would still execute them and just do an error response --- libpcsxcore/cdrom.c | 89 +++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 3f4057954..e01d7debd 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -500,7 +500,7 @@ static void cdrPlayInterrupt_Autopause() //cdr.ResultReady = 1; //cdr.Stat = DataReady; cdr.Stat = DataEnd; - setIrq(0x200); + setIrq(0x1000); // 0x1000 just for logging purposes StopCdda(); SetPlaySeekRead(cdr.StatP, 0); @@ -539,7 +539,7 @@ static void cdrPlayInterrupt_Autopause() cdr.Stat = DataReady; SetResultSize(8); - setIrq(0x201); + setIrq(0x1001); } } @@ -590,7 +590,7 @@ void cdrPlaySeekReadInterrupt(void) SetPlaySeekRead(cdr.StatP, 0); cdr.Result[0] = cdr.StatP; cdr.Stat = Complete; - setIrq(0x202); + setIrq(0x1002); Find_CurTrack(cdr.SetSectorPlay); ReadTrack(cdr.SetSectorPlay); @@ -639,14 +639,17 @@ void cdrPlaySeekReadInterrupt(void) CDRPLAYSEEKREAD_INT(cdReadTime, 0); } +#define CMD_PART2 0x100 +#define CMD_WHILE_NOT_READY 0x200 + void cdrInterrupt(void) { - int no_busy_error = 0; int start_rotating = 0; int error = 0; unsigned int seekTime = 0; u32 second_resp_time = 0; u8 ParamC; u8 set_loc[3]; + u16 not_ready = 0; u16 Cmd; int i; @@ -664,12 +667,10 @@ void cdrInterrupt(void) { cdr.Result[0] = cdr.Irq1Pending; cdr.Stat = (cdr.Irq1Pending & STATUS_ERROR) ? DiskError : DataReady; cdr.Irq1Pending = 0; - setIrq(0x205); + setIrq(0x1003); return; } - cdr.Ctrl &= ~0x80; - // default response SetResultSize(1); cdr.Result[0] = cdr.StatP; @@ -680,18 +681,29 @@ void cdrInterrupt(void) { ParamC = cdr.ParamC; if (Cmd < 0x100) { - cdr.Cmd = 0; + cdr.Ctrl &= ~0x80; cdr.ParamC = 0; + cdr.Cmd = 0; + } + + switch (cdr.DriveState) { + case DRIVESTATE_LID_OPEN: + case DRIVESTATE_RESCAN_CD: + case DRIVESTATE_PREPARE_CD: + // no disk or busy with the initial scan, allowed cmds are limited + not_ready = CMD_WHILE_NOT_READY; + break; } - switch (Cmd) { + switch (Cmd | not_ready) { case CdlNop: + case CdlNop + CMD_WHILE_NOT_READY: if (cdr.DriveState != DRIVESTATE_LID_OPEN) cdr.StatP &= ~STATUS_SHELLOPEN; - no_busy_error = 1; break; case CdlSetloc: + case CdlSetloc + CMD_WHILE_NOT_READY: CDR_LOG("CDROM setloc command (%02X, %02X, %02X)\n", cdr.Param[0], cdr.Param[1], cdr.Param[2]); // MM must be BCD, SS must be BCD and <0x60, FF must be BCD and <0x75 @@ -800,7 +812,7 @@ void cdrInterrupt(void) { start_rotating = 1; break; - case CdlStandby + 0x100: + case CdlStandby + CMD_PART2: cdr.Stat = Complete; break; @@ -826,7 +838,7 @@ void cdrInterrupt(void) { cdr.DriveState = DRIVESTATE_STOPPED; break; - case CdlStop + 0x100: + case CdlStop + CMD_PART2: cdr.Stat = Complete; break; @@ -857,25 +869,25 @@ void cdrInterrupt(void) { second_resp_time = (((cdr.Mode & MODE_SPEED) ? 2 : 1) * 1000000); } SetPlaySeekRead(cdr.StatP, 0); - cdr.Ctrl |= 0x80; break; - case CdlPause + 0x100: + case CdlPause + CMD_PART2: cdr.Stat = Complete; break; case CdlReset: + case CdlReset + CMD_WHILE_NOT_READY: StopCdda(); StopReading(); SetPlaySeekRead(cdr.StatP, 0); cdr.Muted = FALSE; cdr.Mode = 0x20; /* This fixes This is Football 2, Pooh's Party lockups */ - second_resp_time = 4100000; - no_busy_error = 1; + second_resp_time = not_ready ? 70000 : 4100000; start_rotating = 1; break; - case CdlReset + 0x100: + case CdlReset + CMD_PART2: + case CdlReset + CMD_PART2 + CMD_WHILE_NOT_READY: cdr.Stat = Complete; break; @@ -893,19 +905,19 @@ void cdrInterrupt(void) { break; case CdlSetmode: + case CdlSetmode + CMD_WHILE_NOT_READY: CDR_LOG("cdrWrite1() Log: Setmode %x\n", cdr.Param[0]); cdr.Mode = cdr.Param[0]; - no_busy_error = 1; break; case CdlGetparam: + case CdlGetparam + CMD_WHILE_NOT_READY: /* Gameblabla : According to mednafen, Result size should be 5 and done this way. */ SetResultSize(5); cdr.Result[1] = cdr.Mode; cdr.Result[2] = 0; cdr.Result[3] = cdr.File; cdr.Result[4] = cdr.Channel; - no_busy_error = 1; break; case CdlGetlocL: @@ -924,7 +936,7 @@ void cdrInterrupt(void) { start_rotating = 1; break; - case CdlReadT + 0x100: + case CdlReadT + CMD_PART2: cdr.Stat = Complete; break; @@ -983,6 +995,7 @@ void cdrInterrupt(void) { break; case CdlTest: + case CdlTest + CMD_WHILE_NOT_READY: switch (cdr.Param[0]) { case 0x20: // System Controller ROM Version SetResultSize(4); @@ -997,14 +1010,13 @@ void cdrInterrupt(void) { memcpy(cdr.Result, Test23, 4); break; } - no_busy_error = 1; break; case CdlID: second_resp_time = 20480; break; - case CdlID + 0x100: + case CdlID + CMD_PART2: SetResultSize(8); cdr.Result[0] = cdr.StatP; cdr.Result[1] = 0; @@ -1029,6 +1041,7 @@ void cdrInterrupt(void) { break; case CdlInit: + case CdlInit + CMD_WHILE_NOT_READY: StopCdda(); StopReading(); SetPlaySeekRead(cdr.StatP, 0); @@ -1036,23 +1049,22 @@ void cdrInterrupt(void) { cdr.StatP |= STATUS_SHELLOPEN; cdr.DriveState = DRIVESTATE_RESCAN_CD; CDRLID_INT(20480); - no_busy_error = 1; start_rotating = 1; break; case CdlGetQ: - no_busy_error = 1; + case CdlGetQ + CMD_WHILE_NOT_READY: break; case CdlReadToc: + case CdlReadToc + CMD_WHILE_NOT_READY: second_resp_time = cdReadTime * 180 / 4; - no_busy_error = 1; start_rotating = 1; break; - case CdlReadToc + 0x100: + case CdlReadToc + CMD_PART2: + case CdlReadToc + CMD_PART2 + CMD_WHILE_NOT_READY: cdr.Stat = Complete; - no_busy_error = 1; break; case CdlReadN: @@ -1083,36 +1095,25 @@ void cdrInterrupt(void) { break; case CdlSync: default: - CDR_LOG_I("Invalid command: %02x\n", Cmd); + CDR_LOG_I("Invalid command: %02x%s\n", + Cmd, not_ready ? " (not_ready)" : ""); error = ERROR_INVALIDCMD; // FALLTHROUGH set_error: SetResultSize(2); cdr.Result[0] = cdr.StatP | STATUS_ERROR; - cdr.Result[1] = error; + cdr.Result[1] = not_ready ? ERROR_NOTREADY : error; cdr.Stat = DiskError; break; } if (cdr.DriveState == DRIVESTATE_STOPPED && start_rotating) { + printf("cdr.DriveState %d->%d\n", cdr.DriveState, DRIVESTATE_STANDBY); cdr.DriveState = DRIVESTATE_STANDBY; cdr.StatP |= STATUS_ROTATING; } - if (!no_busy_error) { - switch (cdr.DriveState) { - case DRIVESTATE_LID_OPEN: - case DRIVESTATE_RESCAN_CD: - case DRIVESTATE_PREPARE_CD: - SetResultSize(2); - cdr.Result[0] = cdr.StatP | STATUS_ERROR; - cdr.Result[1] = ERROR_NOTREADY; - cdr.Stat = DiskError; - break; - } - } - if (second_resp_time) { cdr.CmdInProgress = Cmd | 0x100; CDR_INT(second_resp_time); @@ -1197,7 +1198,7 @@ static void cdrReadInterruptSetResult(unsigned char result) SetResultSize(1); cdr.Result[0] = result; cdr.Stat = (result & STATUS_ERROR) ? DiskError : DataReady; - setIrq(0x203); + setIrq(0x1004); } static void cdrUpdateTransferBuf(const u8 *buf) @@ -1399,7 +1400,7 @@ void cdrWrite2(unsigned char rt) { return; case 1: cdr.Reg2 = rt; - setIrq(0x204); + setIrq(0x1005); return; case 2: cdr.AttenuatorLeftToLeftT = rt; From 871bedc6237736d01d3e0fd83e0998810b8302ab Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 17 Sep 2022 23:18:47 +0300 Subject: [PATCH 219/597] cdrom: allow to interrupt initial scan sequence probably wrong but ohwell... --- libpcsxcore/cdrom.c | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index e01d7debd..f168a0fce 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -308,6 +308,8 @@ static void setIrq(int log_cmd) // (yes it's slow, but you probably don't want to modify it) void cdrLidSeekInterrupt(void) { + CDR_LOG_I("%u %s cdr.DriveState=%d\n", psxRegs.cycle, __func__, cdr.DriveState); + switch (cdr.DriveState) { default: case DRIVESTATE_STANDBY: @@ -369,10 +371,14 @@ void cdrLidSeekInterrupt(void) break; case DRIVESTATE_PREPARE_CD: - cdr.StatP |= STATUS_SEEK; - - cdr.DriveState = DRIVESTATE_STANDBY; - CDRLID_INT(cdReadTime * 26); + if (cdr.StatP & STATUS_SEEK) { + SetPlaySeekRead(cdr.StatP, 0); + cdr.DriveState = DRIVESTATE_STANDBY; + } + else { + SetPlaySeekRead(cdr.StatP, STATUS_SEEK); + CDRLID_INT(cdReadTime * 26); + } break; } } @@ -687,9 +693,18 @@ void cdrInterrupt(void) { } switch (cdr.DriveState) { + case DRIVESTATE_PREPARE_CD: + if (Cmd > 2) { + // Syphon filter 2 expects commands to work shortly after it sees + // STATUS_ROTATING, so give up trying to emulate the startup seq + cdr.DriveState = DRIVESTATE_STANDBY; + cdr.StatP &= ~STATUS_SEEK; + psxRegs.interrupt &= ~(1 << PSXINT_CDRLID); + break; + } + // fallthrough case DRIVESTATE_LID_OPEN: case DRIVESTATE_RESCAN_CD: - case DRIVESTATE_PREPARE_CD: // no disk or busy with the initial scan, allowed cmds are limited not_ready = CMD_WHILE_NOT_READY; break; @@ -1564,9 +1579,15 @@ void cdrReset() { cdr.Channel = 1; cdr.Reg2 = 0x1f; cdr.Stat = NoIntr; - cdr.DriveState = DRIVESTATE_STANDBY; - cdr.StatP = STATUS_ROTATING; cdr.FifoOffset = DATA_SIZE; // fifo empty + if (CdromId[0] == '\0') { + cdr.DriveState = DRIVESTATE_STOPPED; + cdr.StatP = 0; + } + else { + cdr.DriveState = DRIVESTATE_STANDBY; + cdr.StatP = STATUS_ROTATING; + } // BIOS player - default values cdr.AttenuatorLeftToLeft = 0x80; From 480e570b6f27909ecdb3f7b2769171b7a7b7f484 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 17 Sep 2022 23:28:46 +0300 Subject: [PATCH 220/597] cdrom: treat seek the same as other commands much simpler this way --- libpcsxcore/cdrom.c | 49 +++++++++++++------------------- libpcsxcore/cdrom.h | 2 +- libpcsxcore/new_dynarec/emu_if.c | 2 +- libpcsxcore/r3000a.c | 2 +- 4 files changed, 23 insertions(+), 32 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index f168a0fce..3159e90bb 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -243,8 +243,8 @@ static void sec2msf(unsigned int s, u8 *msf) { new_dyna_set_event(PSXINT_CDR, eCycle); \ } -// cdrPlaySeekReadInterrupt -#define CDRPLAYSEEKREAD_INT(eCycle, isFirst) { \ +// cdrPlayReadInterrupt +#define CDRPLAYREAD_INT(eCycle, isFirst) { \ u32 e_ = eCycle; \ psxRegs.interrupt |= (1 << PSXINT_CDREAD); \ if (isFirst) \ @@ -578,32 +578,13 @@ static void cdrReadInterrupt(void); static void cdrPrepCdda(s16 *buf, int samples); static void cdrAttenuate(s16 *buf, int samples, int stereo); -void cdrPlaySeekReadInterrupt(void) +void cdrPlayReadInterrupt(void) { if (cdr.Reading) { cdrReadInterrupt(); return; } - if (!cdr.Play && (cdr.StatP & STATUS_SEEK)) { - if (cdr.Stat) { - CDR_LOG_I("cdrom: seek stat hack\n"); - CDRPLAYSEEKREAD_INT(0x1000, 1); - return; - } - SetResultSize(1); - cdr.StatP |= STATUS_ROTATING; - SetPlaySeekRead(cdr.StatP, 0); - cdr.Result[0] = cdr.StatP; - cdr.Stat = Complete; - setIrq(0x1002); - - Find_CurTrack(cdr.SetSectorPlay); - ReadTrack(cdr.SetSectorPlay); - cdr.TrackChanged = FALSE; - return; - } - if (!cdr.Play) return; CDR_LOG( "CDDA - %d:%d:%d\n", @@ -642,7 +623,7 @@ void cdrPlaySeekReadInterrupt(void) // update for CdlGetlocP/autopause generate_subq(cdr.SetSectorPlay); - CDRPLAYSEEKREAD_INT(cdReadTime, 0); + CDRPLAYREAD_INT(cdReadTime, 0); } #define CMD_PART2 0x100 @@ -797,7 +778,7 @@ void cdrInterrupt(void) { // BIOS player - set flag again cdr.Play = TRUE; - CDRPLAYSEEKREAD_INT(cdReadTime + seekTime, 1); + CDRPLAYREAD_INT(cdReadTime + seekTime, 1); start_rotating = 1; break; @@ -988,7 +969,7 @@ void cdrInterrupt(void) { case CdlSeekP: StopCdda(); StopReading(); - SetPlaySeekRead(cdr.StatP, STATUS_SEEK); + SetPlaySeekRead(cdr.StatP, STATUS_SEEK | STATUS_ROTATING); seekTime = cdrSeekTime(cdr.SetSector); memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); @@ -1005,10 +986,20 @@ void cdrInterrupt(void) { Rockman X5 = 0.5-4x - fix capcom logo */ - CDRPLAYSEEKREAD_INT(cdReadTime + seekTime, 1); + second_resp_time = cdReadTime + seekTime; start_rotating = 1; break; + case CdlSeekL + CMD_PART2: + case CdlSeekP + CMD_PART2: + SetPlaySeekRead(cdr.StatP, 0); + cdr.Stat = Complete; + + Find_CurTrack(cdr.SetSectorPlay); + ReadTrack(cdr.SetSectorPlay); + cdr.TrackChanged = FALSE; + break; + case CdlTest: case CdlTest + CMD_WHILE_NOT_READY: switch (cdr.Param[0]) { @@ -1103,7 +1094,7 @@ void cdrInterrupt(void) { // - fixes new game ReadTrack(cdr.SetSectorPlay); - CDRPLAYSEEKREAD_INT(((cdr.Mode & 0x80) ? (cdReadTime) : cdReadTime * 2) + seekTime, 1); + CDRPLAYREAD_INT(((cdr.Mode & 0x80) ? (cdReadTime) : cdReadTime * 2) + seekTime, 1); SetPlaySeekRead(cdr.StatP, STATUS_SEEK); start_rotating = 1; @@ -1297,7 +1288,7 @@ static void cdrReadInterrupt(void) ReadTrack(cdr.SetSectorPlay); } - CDRPLAYSEEKREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime, 0); + CDRPLAYREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime, 0); } /* @@ -1636,7 +1627,7 @@ int cdrFreeze(void *f, int Mode) { if (!Config.Cdda) CDR_play(cdr.SetSectorPlay); if (psxRegs.interrupt & (1 << PSXINT_CDRPLAY_OLD)) - CDRPLAYSEEKREAD_INT((cdr.Mode & 0x80) ? (cdReadTime / 2) : cdReadTime, 1); + CDRPLAYREAD_INT((cdr.Mode & 0x80) ? (cdReadTime / 2) : cdReadTime, 1); } if ((cdr.freeze_ver & 0xffffff00) != 0x63647200) { diff --git a/libpcsxcore/cdrom.h b/libpcsxcore/cdrom.h index 52bd21c08..ee0b4d4bb 100644 --- a/libpcsxcore/cdrom.h +++ b/libpcsxcore/cdrom.h @@ -48,7 +48,7 @@ extern "C" { void cdrReset(); void cdrInterrupt(void); -void cdrPlaySeekReadInterrupt(void); +void cdrPlayReadInterrupt(void); void cdrLidSeekInterrupt(void); void cdrDmaInterrupt(void); void LidInterrupt(void); diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index aa093564d..86bf0d270 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -55,7 +55,7 @@ typedef void (irq_func)(); static irq_func * const irq_funcs[] = { [PSXINT_SIO] = sioInterrupt, [PSXINT_CDR] = cdrInterrupt, - [PSXINT_CDREAD] = cdrPlaySeekReadInterrupt, + [PSXINT_CDREAD] = cdrPlayReadInterrupt, [PSXINT_GPUDMA] = gpuInterrupt, [PSXINT_MDECOUTDMA] = mdec1Interrupt, [PSXINT_SPUDMA] = spuInterrupt, diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index ba22d45fb..818cc91a6 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -138,7 +138,7 @@ void psxBranchTest() { if (psxRegs.interrupt & (1 << PSXINT_CDREAD)) { // cdr read if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_CDREAD].sCycle) >= psxRegs.intCycle[PSXINT_CDREAD].cycle) { psxRegs.interrupt &= ~(1 << PSXINT_CDREAD); - cdrPlaySeekReadInterrupt(); + cdrPlayReadInterrupt(); } } if (psxRegs.interrupt & (1 << PSXINT_GPUDMA)) { // gpu dma From 70c1043e63dafcf92b0f4dc0932326a6d042fbb4 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 22 Sep 2022 00:35:28 +0300 Subject: [PATCH 221/597] cdrom: change GetlocL behavior notaz/pcsx_rearmed#263 --- libpcsxcore/cdrom.c | 48 +++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 3159e90bb..42f13af21 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -89,8 +89,7 @@ static struct { boolean Play, Muted; int CurTrack; int Mode, File, Channel; - int Reset; - int NoErr; + unsigned char LocL[8]; int FirstSector; xa_decode_t Xa; @@ -208,6 +207,8 @@ unsigned char Test23[] = { 0x43, 0x58, 0x44, 0x32, 0x39 ,0x34, 0x30, 0x51 }; // so (PSXCLK / 75) = cdr read time (linuzappz) #define cdReadTime (PSXCLK / 75) +#define LOCL_INVALID 0xff + enum drive_state { DRIVESTATE_STANDBY = 0, // pause, play, read DRIVESTATE_LID_OPEN, @@ -446,9 +447,10 @@ static void generate_subq(const u8 *time) cdr.subq.Absolute[2] = itob(time[2]); } -static void ReadTrack(const u8 *time) { +static int ReadTrack(const u8 *time) { unsigned char tmp[3]; struct SubQ *subq; + int read_ok; u16 crc; tmp[0] = itob(time[0]); @@ -456,15 +458,15 @@ static void ReadTrack(const u8 *time) { tmp[2] = itob(time[2]); if (memcmp(cdr.Prev, tmp, 3) == 0) - return; + return 1; CDR_LOG("ReadTrack *** %02x:%02x:%02x\n", tmp[0], tmp[1], tmp[2]); - cdr.NoErr = CDR_readTrack(tmp); + read_ok = CDR_readTrack(tmp); memcpy(cdr.Prev, tmp, 3); if (CheckSBI(time)) - return; + return read_ok; subq = (struct SubQ *)CDR_getBufferSub(); if (subq != NULL && cdr.CurTrack == 1) { @@ -488,6 +490,8 @@ static void ReadTrack(const u8 *time) { cdr.subq.Track, cdr.subq.Index, cdr.subq.Relative[0], cdr.subq.Relative[1], cdr.subq.Relative[2], cdr.subq.Absolute[0], cdr.subq.Absolute[1], cdr.subq.Absolute[2]); + + return read_ok; } static void cdrPlayInterrupt_Autopause() @@ -634,8 +638,10 @@ void cdrInterrupt(void) { int error = 0; unsigned int seekTime = 0; u32 second_resp_time = 0; + const void *buf; u8 ParamC; u8 set_loc[3]; + int read_ok; u16 not_ready = 0; u16 Cmd; int i; @@ -767,6 +773,7 @@ void cdrInterrupt(void) { */ Find_CurTrack(cdr.SetSectorPlay); ReadTrack(cdr.SetSectorPlay); + cdr.LocL[0] = LOCL_INVALID; cdr.TrackChanged = FALSE; cdr.FirstSector = 1; @@ -826,6 +833,7 @@ void cdrInterrupt(void) { StopReading(); SetPlaySeekRead(cdr.StatP, 0); cdr.StatP &= ~STATUS_ROTATING; + cdr.LocL[0] = LOCL_INVALID; second_resp_time = 0x800; if (cdr.DriveState == DRIVESTATE_STANDBY) @@ -876,6 +884,7 @@ void cdrInterrupt(void) { StopCdda(); StopReading(); SetPlaySeekRead(cdr.StatP, 0); + cdr.LocL[0] = LOCL_INVALID; cdr.Muted = FALSE; cdr.Mode = 0x20; /* This fixes This is Football 2, Pooh's Party lockups */ second_resp_time = not_ready ? 70000 : 4100000; @@ -917,8 +926,12 @@ void cdrInterrupt(void) { break; case CdlGetlocL: + if (cdr.LocL[0] == LOCL_INVALID) { + error = 0x80; + goto set_error; + } SetResultSize(8); - memcpy(cdr.Result, cdr.Transfer, 8); + memcpy(cdr.Result, cdr.LocL, 8); break; case CdlGetlocP: @@ -996,7 +1009,9 @@ void cdrInterrupt(void) { cdr.Stat = Complete; Find_CurTrack(cdr.SetSectorPlay); - ReadTrack(cdr.SetSectorPlay); + read_ok = ReadTrack(cdr.SetSectorPlay); + if (read_ok && (buf = CDR_getBuffer())) + memcpy(cdr.LocL, buf, 8); cdr.TrackChanged = FALSE; break; @@ -1064,6 +1079,7 @@ void cdrInterrupt(void) { case CdlReadToc: case CdlReadToc + CMD_WHILE_NOT_READY: + cdr.LocL[0] = LOCL_INVALID; second_resp_time = cdReadTime * 180 / 4; start_rotating = 1; break; @@ -1093,20 +1109,21 @@ void cdrInterrupt(void) { // Fighting Force 2 - update subq time immediately // - fixes new game ReadTrack(cdr.SetSectorPlay); + cdr.LocL[0] = LOCL_INVALID; CDRPLAYREAD_INT(((cdr.Mode & 0x80) ? (cdReadTime) : cdReadTime * 2) + seekTime, 1); SetPlaySeekRead(cdr.StatP, STATUS_SEEK); start_rotating = 1; break; + case CdlSync: default: - CDR_LOG_I("Invalid command: %02x%s\n", - Cmd, not_ready ? " (not_ready)" : ""); error = ERROR_INVALIDCMD; // FALLTHROUGH set_error: + CDR_LOG_I("cdrom: cmd %02x error %02x\n", Cmd, error); SetResultSize(2); cdr.Result[0] = cdr.StatP | STATUS_ERROR; cdr.Result[1] = not_ready ? ERROR_NOTREADY : error; @@ -1115,7 +1132,6 @@ void cdrInterrupt(void) { } if (cdr.DriveState == DRIVESTATE_STOPPED && start_rotating) { - printf("cdr.DriveState %d->%d\n", cdr.DriveState, DRIVESTATE_STANDBY); cdr.DriveState = DRIVESTATE_STANDBY; cdr.StatP |= STATUS_ROTATING; } @@ -1221,21 +1237,23 @@ static void cdrUpdateTransferBuf(const u8 *buf) static void cdrReadInterrupt(void) { u8 *buf = NULL, *hdr; + int read_ok; SetPlaySeekRead(cdr.StatP, STATUS_READ | STATUS_ROTATING); - ReadTrack(cdr.SetSectorPlay); - if (cdr.NoErr) + read_ok = ReadTrack(cdr.SetSectorPlay); + if (read_ok) buf = CDR_getBuffer(); if (buf == NULL) - cdr.NoErr = 0; + read_ok = 0; - if (!cdr.NoErr) { + if (!read_ok) { CDR_LOG_I("cdrReadInterrupt() Log: err\n"); memset(cdr.Transfer, 0, DATA_SIZE); cdrReadInterruptSetResult(cdr.StatP | STATUS_ERROR); return; } + memcpy(cdr.LocL, buf, 8); if (!cdr.Irq1Pending) cdrUpdateTransferBuf(buf); From ececcc61fa9115edcc51892a27aaa720d983e23e Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 29 Sep 2022 21:36:54 +0300 Subject: [PATCH 222/597] cdrom: ignore repeated read cmds libretro/pcsx_rearmed#691 --- libpcsxcore/cdrom.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 42f13af21..cace6d90b 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -711,7 +711,7 @@ void cdrInterrupt(void) { // MM must be BCD, SS must be BCD and <0x60, FF must be BCD and <0x75 if (((cdr.Param[0] & 0x0F) > 0x09) || (cdr.Param[0] > 0x99) || ((cdr.Param[1] & 0x0F) > 0x09) || (cdr.Param[1] >= 0x60) || ((cdr.Param[2] & 0x0F) > 0x09) || (cdr.Param[2] >= 0x75)) { - CDR_LOG("Invalid/out of range seek to %02X:%02X:%02X\n", cdr.Param[0], cdr.Param[1], cdr.Param[2]); + CDR_LOG_I("Invalid/out of range seek to %02X:%02X:%02X\n", cdr.Param[0], cdr.Param[1], cdr.Param[2]); error = ERROR_INVALIDARG; goto set_error; } @@ -1091,6 +1091,9 @@ void cdrInterrupt(void) { case CdlReadN: case CdlReadS: + if (cdr.Reading && !cdr.SetlocPending) + break; + Find_CurTrack(cdr.SetlocPending ? cdr.SetSector : cdr.SetSectorPlay); if ((cdr.Mode & MODE_CDDA) && cdr.CurTrack > 1) From ca1683d0864e3549bc522bdb04a3b778d34e22b4 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 29 Sep 2022 22:45:23 +0300 Subject: [PATCH 223/597] cdrom: update status immediately after seek Philosoma wants it --- libpcsxcore/cdrom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index cace6d90b..7715a2b7f 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1006,6 +1006,7 @@ void cdrInterrupt(void) { case CdlSeekL + CMD_PART2: case CdlSeekP + CMD_PART2: SetPlaySeekRead(cdr.StatP, 0); + cdr.Result[0] = cdr.StatP; cdr.Stat = Complete; Find_CurTrack(cdr.SetSectorPlay); From d014a47167b28b19f87546bca0b0c53f08b1daff Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 1 Oct 2022 22:32:01 +0300 Subject: [PATCH 224/597] remove all the hack options They just confuse users, like: libretro/pcsx_rearmed#693 For Parasite Eve 2 and others, adjust "PSX CPU clock" instead if needed. --- frontend/main.c | 4 ++-- frontend/menu.c | 25 ++----------------------- libpcsxcore/misc.c | 18 ++++++++++++------ libpcsxcore/new_dynarec/pcsxmem.c | 4 ---- libpcsxcore/psxcommon.h | 4 ---- libpcsxcore/psxcounters.c | 16 ++-------------- libpcsxcore/psxhw.c | 7 ------- libpcsxcore/r3000a.c | 2 +- libpcsxcore/sio.c | 10 ++++------ maemo/main.c | 4 ---- 10 files changed, 23 insertions(+), 71 deletions(-) diff --git a/frontend/main.c b/frontend/main.c index 144ce490a..d81210903 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -129,8 +129,8 @@ static void set_default_paths(void) void emu_set_default_config(void) { // try to set sane config on which most games work - Config.Xa = Config.Cdda = Config.Sio = - Config.icache_emulation = Config.SpuIrq = Config.RCntFix = Config.VSyncWA = 0; + Config.Xa = Config.Cdda = 0; + Config.icache_emulation = 0; Config.PsxAuto = 1; pl_rearmed_cbs.gpu_neon.allow_interlace = 2; // auto diff --git a/frontend/menu.c b/frontend/menu.c index 4816eceae..a494c00d9 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -390,14 +390,10 @@ static const struct { CE_CONFIG_STR(Spu), // CE_CONFIG_STR(Cdr), CE_CONFIG_VAL(Xa), -// CE_CONFIG_VAL(Sio), CE_CONFIG_VAL(Mdec), CE_CONFIG_VAL(Cdda), CE_CONFIG_VAL(Debug), CE_CONFIG_VAL(PsxOut), - CE_CONFIG_VAL(SpuIrq), - CE_CONFIG_VAL(RCntFix), - CE_CONFIG_VAL(VSyncWA), CE_CONFIG_VAL(icache_emulation), CE_CONFIG_VAL(DisableStalls), CE_CONFIG_VAL(Cpu), @@ -1592,21 +1588,14 @@ static const char h_cfg_fl[] = "Frame Limiter keeps the game from running to static const char h_cfg_xa[] = "Disables XA sound, which can sometimes improve performance"; static const char h_cfg_cdda[] = "Disable CD Audio for a performance boost\n" "(proper .cue/.bin dump is needed otherwise)"; -//static const char h_cfg_sio[] = "You should not need this, breaks games"; -static const char h_cfg_spuirq[] = "Compatibility tweak; should be left off"; -static const char h_cfg_rcnt2[] = "InuYasha Sengoku Battle Fix\n" - "(timing hack, breaks other games)"; -#ifdef DRC_DISABLE -static const char h_cfg_rcnt1[] = "Parasite Eve 2, Vandal Hearts 1/2 Fix\n" - "(timing hack, breaks other games)"; -#else +#ifndef DRC_DISABLE static const char h_cfg_nodrc[] = "Disable dynamic recompiler and use interpreter\n" "Might be useful to overcome some dynarec bugs"; #endif static const char h_cfg_shacks[] = "Breaks games but may give better performance"; static const char h_cfg_icache[] = "Support F1 games (only when dynarec is off)"; -enum { AMO_XA, AMO_CDDA, AMO_SIO, AMO_SPUI, AMO_IC, AMO_RCNT, AMO_WA, AMO_CPU }; +enum { AMO_XA, AMO_CDDA, AMO_IC, AMO_CPU }; static menu_entry e_menu_adv_options[] = { @@ -1615,13 +1604,7 @@ static menu_entry e_menu_adv_options[] = mee_onoff_h ("Disable Frame Limiter", 0, g_opts, OPT_NO_FRAMELIM, h_cfg_fl), mee_onoff_h ("Disable XA Decoding", 0, menu_iopts[AMO_XA], 1, h_cfg_xa), mee_onoff_h ("Disable CD Audio", 0, menu_iopts[AMO_CDDA], 1, h_cfg_cdda), - //mee_onoff_h ("SIO IRQ Always Enabled", 0, menu_iopts[AMO_SIO], 1, h_cfg_sio), - mee_onoff_h ("SPU IRQ Always Enabled", 0, menu_iopts[AMO_SPUI], 1, h_cfg_spuirq), mee_onoff_h ("ICache emulation", 0, menu_iopts[AMO_IC], 1, h_cfg_icache), -#ifdef DRC_DISABLE - mee_onoff_h ("Rootcounter hack", 0, menu_iopts[AMO_RCNT], 1, h_cfg_rcnt1), -#endif - mee_onoff_h ("Rootcounter hack 2", 0, menu_iopts[AMO_WA], 1, h_cfg_rcnt2), #if !defined(DRC_DISABLE) || defined(LIGHTREC) mee_onoff_h ("Disable dynarec (slow!)",0, menu_iopts[AMO_CPU], 1, h_cfg_nodrc), #endif @@ -1638,11 +1621,7 @@ static int menu_loop_adv_options(int id, int keys) } opts[] = { { &Config.Xa, &menu_iopts[AMO_XA] }, { &Config.Cdda, &menu_iopts[AMO_CDDA] }, - { &Config.Sio, &menu_iopts[AMO_SIO] }, - { &Config.SpuIrq, &menu_iopts[AMO_SPUI] }, { &Config.icache_emulation, &menu_iopts[AMO_IC] }, - { &Config.RCntFix, &menu_iopts[AMO_RCNT] }, - { &Config.VSyncWA, &menu_iopts[AMO_WA] }, { &Config.Cpu, &menu_iopts[AMO_CPU] }, }; int i; diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index b3dfdf533..d83ee194c 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -722,10 +722,13 @@ int SendPcsxInfo() { if (NET_recvData == NULL || NET_sendData == NULL) return 0; + boolean Sio_old = 0; + boolean SpuIrq_old = 0; + boolean RCntFix_old = 0; NET_sendData(&Config.Xa, sizeof(Config.Xa), PSE_NET_BLOCKING); - NET_sendData(&Config.Sio, sizeof(Config.Sio), PSE_NET_BLOCKING); - NET_sendData(&Config.SpuIrq, sizeof(Config.SpuIrq), PSE_NET_BLOCKING); - NET_sendData(&Config.RCntFix, sizeof(Config.RCntFix), PSE_NET_BLOCKING); + NET_sendData(&Sio_old, sizeof(Sio_old), PSE_NET_BLOCKING); + NET_sendData(&SpuIrq_old, sizeof(SpuIrq_old), PSE_NET_BLOCKING); + NET_sendData(&RCntFix_old, sizeof(RCntFix_old), PSE_NET_BLOCKING); NET_sendData(&Config.PsxType, sizeof(Config.PsxType), PSE_NET_BLOCKING); NET_sendData(&Config.Cpu, sizeof(Config.Cpu), PSE_NET_BLOCKING); @@ -738,10 +741,13 @@ int RecvPcsxInfo() { if (NET_recvData == NULL || NET_sendData == NULL) return 0; + boolean Sio_old = 0; + boolean SpuIrq_old = 0; + boolean RCntFix_old = 0; NET_recvData(&Config.Xa, sizeof(Config.Xa), PSE_NET_BLOCKING); - NET_recvData(&Config.Sio, sizeof(Config.Sio), PSE_NET_BLOCKING); - NET_recvData(&Config.SpuIrq, sizeof(Config.SpuIrq), PSE_NET_BLOCKING); - NET_recvData(&Config.RCntFix, sizeof(Config.RCntFix), PSE_NET_BLOCKING); + NET_recvData(&Sio_old, sizeof(Sio_old), PSE_NET_BLOCKING); + NET_recvData(&SpuIrq_old, sizeof(SpuIrq_old), PSE_NET_BLOCKING); + NET_recvData(&RCntFix_old, sizeof(RCntFix_old), PSE_NET_BLOCKING); NET_recvData(&Config.PsxType, sizeof(Config.PsxType), PSE_NET_BLOCKING); SysUpdate(); diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index bb471b6a9..69a4c99df 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -152,8 +152,6 @@ make_rcnt_funcs(2) static void io_write_ireg16(u32 value) { - //if (Config.Sio) psxHu16ref(0x1070) |= 0x80; - if (Config.SpuIrq) psxHu16ref(0x1070) |= 0x200; psxHu16ref(0x1070) &= value; } @@ -166,8 +164,6 @@ static void io_write_imask16(u32 value) static void io_write_ireg32(u32 value) { - //if (Config.Sio) psxHu32ref(0x1070) |= 0x80; - if (Config.SpuIrq) psxHu32ref(0x1070) |= 0x200; psxHu32ref(0x1070) &= value; } diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index c0c2c9fb2..a549eb67e 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -125,7 +125,6 @@ typedef struct { char PluginsDir[MAXPATHLEN]; char PatchesDir[MAXPATHLEN]; boolean Xa; - boolean Sio; boolean Mdec; boolean PsxAuto; boolean Cdda; @@ -133,10 +132,7 @@ typedef struct { boolean HLE; boolean Debug; boolean PsxOut; - boolean SpuIrq; - boolean RCntFix; boolean UseNet; - boolean VSyncWA; boolean icache_emulation; boolean DisableStalls; u8 Cpu; // CPU_DYNAREC or CPU_INTERPRETER diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index fba2f5c9e..32a18475e 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -358,8 +358,8 @@ void psxRcntUpdate() } } - // Update lace. (with InuYasha fix) - if( hSyncCount >= (Config.VSyncWA ? HSyncTotal[Config.PsxType] / BIAS : HSyncTotal[Config.PsxType]) ) + // Update lace. + if( hSyncCount >= HSyncTotal[Config.PsxType] ) { rcnts[3].cycleStart += Config.PsxType ? PSXCLK / 50 : PSXCLK / 60; hSyncCount = 0; @@ -420,18 +420,6 @@ u32 psxRcntRcount( u32 index ) count = _psxRcntRcount( index ); - // Parasite Eve 2 fix. - if( Config.RCntFix ) - { - if( index == 2 ) - { - if( rcnts[index].counterState == CountToTarget ) - { - count /= BIAS; - } - } - } - verboseLog( 2, "[RCNT %i] rcount: %x\n", index, count ); return count; diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index b7540dfcf..483f4962d 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -30,9 +30,6 @@ //#define PSXHW_LOG printf void psxHwReset() { - if (Config.Sio) psxHu32ref(0x1070) |= SWAP32(0x80); - if (Config.SpuIrq) psxHu32ref(0x1070) |= SWAP32(0x200); - memset(psxH, 0, 0x10000); mdecInit(); // initialize mdec decoder @@ -443,8 +440,6 @@ void psxHwWrite16(u32 add, u16 value) { #ifdef PSXHW_LOG PSXHW_LOG("IREG 16bit write %x\n", value); #endif - if (Config.Sio) psxHu16ref(0x1070) |= SWAPu16(0x80); - if (Config.SpuIrq) psxHu16ref(0x1070) |= SWAPu16(0x200); psxHu16ref(0x1070) &= SWAPu16(value); return; @@ -558,8 +553,6 @@ void psxHwWrite32(u32 add, u32 value) { #ifdef PSXHW_LOG PSXHW_LOG("IREG 32bit write %x\n", value); #endif - if (Config.Sio) psxHu32ref(0x1070) |= SWAPu32(0x80); - if (Config.SpuIrq) psxHu32ref(0x1070) |= SWAPu32(0x200); psxHu32ref(0x1070) &= SWAPu32(value); return; case 0x1f801074: diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 818cc91a6..e7557a740 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -123,7 +123,7 @@ void psxBranchTest() { psxRcntUpdate(); if (psxRegs.interrupt) { - if ((psxRegs.interrupt & (1 << PSXINT_SIO)) && !Config.Sio) { // sio + if ((psxRegs.interrupt & (1 << PSXINT_SIO))) { // sio if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_SIO].sCycle) >= psxRegs.intCycle[PSXINT_SIO].cycle) { psxRegs.interrupt &= ~(1 << PSXINT_SIO); sioInterrupt(); diff --git a/libpcsxcore/sio.c b/libpcsxcore/sio.c index b3732d298..329bc36ba 100644 --- a/libpcsxcore/sio.c +++ b/libpcsxcore/sio.c @@ -68,12 +68,10 @@ char Mcd1Data[MCD_SIZE], Mcd2Data[MCD_SIZE]; char McdDisable[2]; #define SIO_INT(eCycle) { \ - if (!Config.Sio) { \ - psxRegs.interrupt |= (1 << PSXINT_SIO); \ - psxRegs.intCycle[PSXINT_SIO].cycle = eCycle; \ - psxRegs.intCycle[PSXINT_SIO].sCycle = psxRegs.cycle; \ - new_dyna_set_event(PSXINT_SIO, eCycle); \ - } \ + psxRegs.interrupt |= (1 << PSXINT_SIO); \ + psxRegs.intCycle[PSXINT_SIO].cycle = eCycle; \ + psxRegs.intCycle[PSXINT_SIO].sCycle = psxRegs.cycle; \ + new_dyna_set_event(PSXINT_SIO, eCycle); \ } // clk cycle byte diff --git a/maemo/main.c b/maemo/main.c index 564e8ed5a..91aa2e785 100644 --- a/maemo/main.c +++ b/maemo/main.c @@ -251,10 +251,6 @@ int main(int argc, char **argv) else if (!strcmp(argv[i], "-unai")) strcpy(Config.Gpu, "gpu_unai.so"); else if (!strcmp(argv[i], "-cdda")) Config.Cdda = 1; else if (!strcmp(argv[i], "-xa")) Config.Xa = 1; - else if (!strcmp(argv[i], "-rcnt")) Config.RCntFix = 1 ; - else if (!strcmp(argv[i], "-sio")) Config.Sio = 1; - else if (!strcmp(argv[i], "-spuirq")) Config.SpuIrq = 1; - else if (!strcmp(argv[i], "-vsync")) Config.VSyncWA = 1; else if (!strcmp(argv[i], "-fps")) g_opts |=OPT_SHOWFPS; else if (!strcmp(argv[i], "-cpu")) g_opts |=OPT_SHOWCPU; else if (!strcmp(argv[i], "-spu")) g_opts |=OPT_SHOWSPU; From d5aeda23720ba9374312f8d387f299024fedb7e6 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 1 Oct 2022 23:43:39 +0300 Subject: [PATCH 225/597] psxinterpreter: use cycle_multiplier also not just ari64 --- frontend/main.c | 2 +- frontend/menu.c | 10 +++++----- libpcsxcore/database.c | 21 ++++++++++++++------- libpcsxcore/new_dynarec/emu_if.c | 4 +--- libpcsxcore/new_dynarec/new_dynarec.c | 11 ++++------- libpcsxcore/new_dynarec/new_dynarec.h | 3 --- libpcsxcore/psxcommon.h | 10 +++++----- libpcsxcore/psxinterpreter.c | 27 ++++++++++++++++++++++----- libpcsxcore/r3000a.c | 1 + libpcsxcore/r3000a.h | 5 +++-- maemo/main.c | 3 +-- 11 files changed, 57 insertions(+), 40 deletions(-) diff --git a/frontend/main.c b/frontend/main.c index d81210903..3440e3886 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -132,6 +132,7 @@ void emu_set_default_config(void) Config.Xa = Config.Cdda = 0; Config.icache_emulation = 0; Config.PsxAuto = 1; + Config.cycle_multiplier = CYCLE_MULT_DEFAULT; pl_rearmed_cbs.gpu_neon.allow_interlace = 2; // auto pl_rearmed_cbs.gpu_neon.enhancement_enable = @@ -163,7 +164,6 @@ void emu_set_default_config(void) spu_config.iTempo = 1; #endif new_dynarec_hacks = 0; - cycle_multiplier = 200; in_type1 = PSE_PAD_TYPE_STANDARD; in_type2 = PSE_PAD_TYPE_STANDARD; diff --git a/frontend/menu.c b/frontend/menu.c index a494c00d9..2e4091c38 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -306,7 +306,7 @@ static void menu_sync_config(void) Config.PsxAuto = 0; Config.PsxType = region - 1; } - cycle_multiplier = 10000 / psx_clock; + Config.cycle_multiplier = 10000 / psx_clock; switch (in_type_sel1) { case 1: in_type1 = PSE_PAD_TYPE_ANALOGPAD; break; @@ -1550,8 +1550,6 @@ static int menu_loop_plugin_options(int id, int keys) // ------------ adv options menu ------------ #ifndef DRC_DISABLE -static const char h_cfg_psxclk[] = "Over/under-clock the PSX, default is " DEFAULT_PSX_CLOCK_S "\n" - "(lower value - less work for the emu, may be faster)"; static const char h_cfg_noch[] = "Disables game-specific compatibility hacks"; static const char h_cfg_nosmc[] = "Will cause crashes when loading, break memcards"; static const char h_cfg_gteunn[] = "May cause graphical glitches"; @@ -1562,7 +1560,6 @@ static const char h_cfg_stalls[] = "Will cause some games to run too fast"; static menu_entry e_menu_speed_hacks[] = { #ifndef DRC_DISABLE - mee_range_h ("PSX CPU clock, %%", 0, psx_clock, 1, 500, h_cfg_psxclk), mee_onoff_h ("Disable compat hacks", 0, new_dynarec_hacks, NDHACK_NO_COMPAT_HACKS, h_cfg_noch), mee_onoff_h ("Disable SMC checks", 0, new_dynarec_hacks, NDHACK_NO_SMC_CHECK, h_cfg_nosmc), mee_onoff_h ("Assume GTE regs unneeded", 0, new_dynarec_hacks, NDHACK_GTE_UNNEEDED, h_cfg_gteunn), @@ -1594,6 +1591,8 @@ static const char h_cfg_nodrc[] = "Disable dynamic recompiler and use interpret #endif static const char h_cfg_shacks[] = "Breaks games but may give better performance"; static const char h_cfg_icache[] = "Support F1 games (only when dynarec is off)"; +static const char h_cfg_psxclk[] = "Over/under-clock the PSX, default is " DEFAULT_PSX_CLOCK_S "\n" + "(adjust this if the game is too slow/too fast/hangs)"; enum { AMO_XA, AMO_CDDA, AMO_IC, AMO_CPU }; @@ -1608,6 +1607,7 @@ static menu_entry e_menu_adv_options[] = #if !defined(DRC_DISABLE) || defined(LIGHTREC) mee_onoff_h ("Disable dynarec (slow!)",0, menu_iopts[AMO_CPU], 1, h_cfg_nodrc), #endif + mee_range_h ("PSX CPU clock, %", 0, psx_clock, 1, 500, h_cfg_psxclk), mee_handler_h ("[Speed hacks]", menu_loop_speed_hacks, h_cfg_shacks), mee_end, }; @@ -2647,6 +2647,7 @@ void menu_prepare_emu(void) psxCpu->Reset(); } + menu_sync_config(); psxCpu->ApplyConfig(); // core doesn't care about Config.Cdda changes, @@ -2654,7 +2655,6 @@ void menu_prepare_emu(void) if (Config.Cdda) CDR_stop(); - menu_sync_config(); if (cpu_clock > 0) plat_target_cpu_clock_set(cpu_clock); diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 52d17a7e6..561aedeed 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -22,7 +22,7 @@ static const struct const char * const id; int mult; } -new_dynarec_clock_overrides[] = +cycle_multiplier_overrides[] = { /* Internal Section - fussy about timings */ { "SLPS01868", 202 }, @@ -30,6 +30,13 @@ new_dynarec_clock_overrides[] = * changing memcard settings is enough to break/unbreak it */ { "SLPS02528", 190 }, { "SLPS02636", 190 }, +#ifdef DRC_DISABLE /* new_dynarec has a hack for this game */ + /* Parasite Eve II - internal timer checks */ + { "SLUS01042", 125 }, + { "SLUS01055", 125 }, + { "SLES02558", 125 }, + { "SLES12558", 125 }, +#endif }; /* Function for automatic patching according to GameID. */ @@ -51,16 +58,16 @@ void Apply_Hacks_Cdrom() /* Dynarec game-specific hacks */ new_dynarec_hacks_pergame = 0; - cycle_multiplier_override = 0; + Config.cycle_multiplier_override = 0; - for (i = 0; i < ARRAY_SIZE(new_dynarec_clock_overrides); i++) + for (i = 0; i < ARRAY_SIZE(cycle_multiplier_overrides); i++) { - if (strcmp(CdromId, new_dynarec_clock_overrides[i].id) == 0) + if (strcmp(CdromId, cycle_multiplier_overrides[i].id) == 0) { - cycle_multiplier_override = new_dynarec_clock_overrides[i].mult; + Config.cycle_multiplier_override = cycle_multiplier_overrides[i].mult; new_dynarec_hacks_pergame |= NDHACK_OVERRIDE_CYCLE_M; - SysPrintf("using new_dynarec clock override: %d\n", - cycle_multiplier_override); + SysPrintf("using cycle_multiplier_override: %d\n", + Config.cycle_multiplier_override); break; } } diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 86bf0d270..c09e9eca6 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -394,7 +394,7 @@ static void ari64_apply_config() else new_dynarec_hacks &= ~NDHACK_NO_STALLS; - if (cycle_multiplier != cycle_multiplier_old + if (Config.cycle_multiplier != cycle_multiplier_old || new_dynarec_hacks != new_dynarec_hacks_old) { new_dynarec_clear_full(); @@ -424,8 +424,6 @@ unsigned int address; int pending_exception, stop; unsigned int next_interupt; int new_dynarec_did_compile; -int cycle_multiplier; -int cycle_multiplier_override; int cycle_multiplier_old; int new_dynarec_hacks_pergame; int new_dynarec_hacks_old; diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 2b57e59da..276ef8afd 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -592,8 +592,6 @@ static void do_clear_cache(void) #define NO_CYCLE_PENALTY_THR 12 -int cycle_multiplier = CYCLE_MULT_DEFAULT; // 100 for 1.0 -int cycle_multiplier_override; int cycle_multiplier_old; static int cycle_multiplier_active; @@ -6233,7 +6231,7 @@ void new_dynarec_clear_full(void) stat_clear(stat_blocks); stat_clear(stat_links); - cycle_multiplier_old = cycle_multiplier; + cycle_multiplier_old = Config.cycle_multiplier; new_dynarec_hacks_old = new_dynarec_hacks; } @@ -6303,7 +6301,6 @@ void new_dynarec_init(void) #endif #endif out = ndrc->translation_cache; - cycle_multiplier=200; new_dynarec_clear_full(); #ifdef HOST_IMM8 // Copy this into local area so we don't have to put it in every literal pool @@ -6360,7 +6357,7 @@ static u_int *get_source_start(u_int addr, u_int *limit) (0xbfc00000 <= addr && addr < 0xbfc80000))) { // BIOS. The multiplier should be much higher as it's uncached 8bit mem, - // but timings in PCSX are too tied to the interpreter's BIAS + // but timings in PCSX are too tied to the interpreter's 2-per-insn assumption if (!HACK_ENABLED(NDHACK_OVERRIDE_CYCLE_M)) cycle_multiplier_active = 200; @@ -9031,8 +9028,8 @@ static int new_recompile_block(u_int addr) return 0; } - cycle_multiplier_active = cycle_multiplier_override && cycle_multiplier == CYCLE_MULT_DEFAULT - ? cycle_multiplier_override : cycle_multiplier; + cycle_multiplier_active = Config.cycle_multiplier_override && Config.cycle_multiplier == CYCLE_MULT_DEFAULT + ? Config.cycle_multiplier_override : Config.cycle_multiplier; source = get_source_start(start, &pagelimit); if (source == NULL) { diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index e32846543..d18ff6309 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -5,9 +5,6 @@ extern int pending_exception; extern int stop; extern int new_dynarec_did_compile; -#define CYCLE_MULT_DEFAULT 175 -extern int cycle_multiplier; // 100 for 1.0 -extern int cycle_multiplier_override; extern int cycle_multiplier_old; #define NDHACK_NO_SMC_CHECK (1<<0) diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index a549eb67e..382d91949 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -110,6 +110,8 @@ extern int Log; void __Log(char *fmt, ...); +#define CYCLE_MULT_DEFAULT 175 + typedef struct { char Gpu[MAXPATHLEN]; char Spu[MAXPATHLEN]; @@ -117,7 +119,7 @@ typedef struct { char Pad1[MAXPATHLEN]; char Pad2[MAXPATHLEN]; char Net[MAXPATHLEN]; - char Sio1[MAXPATHLEN]; + char Sio1[MAXPATHLEN]; char Mcd1[MAXPATHLEN]; char Mcd2[MAXPATHLEN]; char Bios[MAXPATHLEN]; @@ -135,6 +137,8 @@ typedef struct { boolean UseNet; boolean icache_emulation; boolean DisableStalls; + int cycle_multiplier; // 100 for 1.0 + int cycle_multiplier_override; u8 Cpu; // CPU_DYNAREC or CPU_INTERPRETER u8 PsxType; // PSX_TYPE_NTSC or PSX_TYPE_PAL #ifdef _WIN32 @@ -159,10 +163,6 @@ extern struct PcsxSaveFuncs SaveFuncs; if (Mode == 0) SaveFuncs.read(f, ptr, size); \ } -// Make the timing events trigger faster as we are currently assuming everything -// takes one cycle, which is not the case on real hardware. -// FIXME: Count the proper cycle and get rid of this -#define BIAS 2 #define PSXCLK 33868800 /* 33.8688 MHz */ enum { diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index ea20cab97..4ae9417a8 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -108,6 +108,17 @@ static u32 INT_ATTR fetchICache(u8 **memRLUT, u32 pc) static u32 (INT_ATTR *fetch)(u8 **memRLUT, u32 pc) = fetchNoCache; +// Make the timing events trigger faster as we are currently assuming everything +// takes one cycle, which is not the case on real hardware. +// FIXME: count cache misses, memory latencies, stalls to get rid of this +static inline void addCycle(void) +{ + assert(psxRegs.subCycleStep >= 0x10000); + psxRegs.subCycle += psxRegs.subCycleStep; + psxRegs.cycle += psxRegs.subCycle >> 16; + psxRegs.subCycle &= 0xffff; +} + static void delayRead(int reg, u32 bpc) { u32 rold, rnew; @@ -458,7 +469,7 @@ static int psxDelayBranchExec(u32 tar) { branch = 0; psxRegs.pc = tar; - psxRegs.cycle += BIAS; + addCycle(); psxBranchTest(); return 1; } @@ -484,7 +495,7 @@ static int psxDelayBranchTest(u32 tar1) { return psxDelayBranchExec(tar2); } debugI(); - psxRegs.cycle += BIAS; + addCycle(); /* * Got a branch at tar1: @@ -497,7 +508,7 @@ static int psxDelayBranchTest(u32 tar1) { return psxDelayBranchExec(tmp1); } debugI(); - psxRegs.cycle += BIAS; + addCycle(); /* * Got a branch at tar2: @@ -523,7 +534,7 @@ static void doBranch(u32 tar) { debugI(); psxRegs.pc += 4; - psxRegs.cycle += BIAS; + addCycle(); // check for load delay tmp = psxRegs.code >> 26; @@ -1076,7 +1087,7 @@ static inline void execI_(u8 **memRLUT, psxRegisters *regs_) { if (Config.Debug) ProcessDebug(); regs_->pc += 4; - regs_->cycle += BIAS; + addCycle(); psxBSC[regs_->code >> 26](regs_, regs_->code); } @@ -1111,6 +1122,8 @@ void intNotify (int note, void *data) { } void intApplyConfig() { + int cycle_mult; + assert(psxBSC[18] == psxCOP2 || psxBSC[18] == psxCOP2_stall); assert(psxBSC[50] == gteLWC2 || psxBSC[50] == gteLWC2_stall); assert(psxBSC[58] == gteSWC2 || psxBSC[58] == gteSWC2_stall); @@ -1149,6 +1162,10 @@ void intApplyConfig() { fetch = fetchNoCache; else fetch = fetchICache; + + cycle_mult = Config.cycle_multiplier_override && Config.cycle_multiplier == CYCLE_MULT_DEFAULT + ? Config.cycle_multiplier_override : Config.cycle_multiplier; + psxRegs.subCycleStep = 0x10000 * cycle_mult / 100; } static void intShutdown() { diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index e7557a740..d8268e2a6 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -61,6 +61,7 @@ void psxReset() { psxRegs.CP0.r[12] = 0x10900000; // COP0 enabled | BEV = 1 | TS = 1 psxRegs.CP0.r[15] = 0x00000002; // PRevID = Revision ID, same as R3000A + psxCpu->ApplyConfig(); psxCpu->Reset(); psxHwReset(); diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index f99e03ba0..2339d5957 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -191,9 +191,10 @@ typedef struct { struct { u32 sCycle, cycle; } intCycle[32]; u32 gteBusyCycle; u32 muldivBusyCycle; + u32 subCycle; /* interpreter cycle counting */ + u32 subCycleStep; // warning: changing anything in psxRegisters requires update of all - // asm in libpcsxcore/new_dynarec/, but this member can be replaced - u32 reserved[2]; + // asm in libpcsxcore/new_dynarec/ } psxRegisters; extern psxRegisters psxRegs; diff --git a/maemo/main.c b/maemo/main.c index 91aa2e785..77dbcd6be 100644 --- a/maemo/main.c +++ b/maemo/main.c @@ -22,7 +22,6 @@ #include "maemo_common.h" extern int in_enable_vibration; -extern int cycle_multiplier; extern int in_type1, in_type2; accel_option accelOptions; @@ -258,7 +257,7 @@ int main(int argc, char **argv) else if (!strcmp(argv[i], "-mcd1")) sprintf(Config.Mcd1, "%s", argv[++i]); else if (!strcmp(argv[i], "-mcd2")) sprintf(Config.Mcd2, "%s", argv[++i]); - else if (!strcmp(argv[i], "-cpuclock")) cycle_multiplier = 10000 / atol(argv[++i]); + else if (!strcmp(argv[i], "-cpuclock")) Config.cycle_multiplier = 10000 / atol(argv[++i]); else if (!strcmp(argv[i], "-guncon")) in_type1 = PSE_PAD_TYPE_GUNCON; else if (!strcmp(argv[i], "-gunnotrigger")) g_opts |= OPT_TSGUN_NOTRIGGER; else if (!strcmp(argv[i], "-analog")) in_type1 = PSE_PAD_TYPE_ANALOGPAD; From acc415b3040edfcc91226955f39b405f09cca430 Mon Sep 17 00:00:00 2001 From: Sakitoshi Date: Wed, 23 Jan 2019 22:19:49 -0300 Subject: [PATCH 226/597] updated gaussian interpolation filter the previous gaussian filter was pretty old and inaccurate, the new filter was taken from the updated peops sound plugin 1.10b. --- plugins/dfsound/gauss_i.h | 421 ++++++++++++++++++++++++++------------ plugins/dfsound/spu.c | 10 +- plugins/dfsound/xa.c | 60 +++--- 3 files changed, 326 insertions(+), 165 deletions(-) diff --git a/plugins/dfsound/gauss_i.h b/plugins/dfsound/gauss_i.h index 4405e57a0..012cf701d 100644 --- a/plugins/dfsound/gauss_i.h +++ b/plugins/dfsound/gauss_i.h @@ -5,6 +5,7 @@ copyright : (C) 2003 by Chris Moeller, eh, whatever email : chris@kode54.tk ***************************************************************************/ + /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * @@ -15,136 +16,296 @@ * * ***************************************************************************/ +//*************************************************************************// +// History of changes: +// +// 2003/02/08 - kode54 +// - generated by interleaving table from gauss.h from the libopenspc +// project; a gaussian bell curve table logged from the SPC-700, +// though Neill says he logged the same curve from a PSX SPU. Also +// says that interleaving the coefficients together runs faster. Meh. +// +//*************************************************************************// + #ifndef GAUSS_H #define GAUSS_H -static const short gauss[]={ - 0x172, 0x519, 0x176, 0x000, 0x16E, 0x519, 0x17A, 0x000, - 0x16A, 0x518, 0x17D, 0x000, 0x166, 0x518, 0x181, 0x000, - 0x162, 0x518, 0x185, 0x000, 0x15F, 0x518, 0x189, 0x000, - 0x15B, 0x518, 0x18D, 0x000, 0x157, 0x517, 0x191, 0x000, - 0x153, 0x517, 0x195, 0x000, 0x150, 0x517, 0x19A, 0x000, - 0x14C, 0x516, 0x19E, 0x000, 0x148, 0x516, 0x1A2, 0x000, - 0x145, 0x515, 0x1A6, 0x000, 0x141, 0x514, 0x1AA, 0x000, - 0x13E, 0x514, 0x1AE, 0x000, 0x13A, 0x513, 0x1B2, 0x000, - 0x137, 0x512, 0x1B7, 0x001, 0x133, 0x511, 0x1BB, 0x001, - 0x130, 0x511, 0x1BF, 0x001, 0x12C, 0x510, 0x1C3, 0x001, - 0x129, 0x50F, 0x1C8, 0x001, 0x125, 0x50E, 0x1CC, 0x001, - 0x122, 0x50D, 0x1D0, 0x001, 0x11E, 0x50C, 0x1D5, 0x001, - 0x11B, 0x50B, 0x1D9, 0x001, 0x118, 0x50A, 0x1DD, 0x001, - 0x114, 0x508, 0x1E2, 0x001, 0x111, 0x507, 0x1E6, 0x002, - 0x10E, 0x506, 0x1EB, 0x002, 0x10B, 0x504, 0x1EF, 0x002, - 0x107, 0x503, 0x1F3, 0x002, 0x104, 0x502, 0x1F8, 0x002, - 0x101, 0x500, 0x1FC, 0x002, 0x0FE, 0x4FF, 0x201, 0x002, - 0x0FB, 0x4FD, 0x205, 0x003, 0x0F8, 0x4FB, 0x20A, 0x003, - 0x0F5, 0x4FA, 0x20F, 0x003, 0x0F2, 0x4F8, 0x213, 0x003, - 0x0EF, 0x4F6, 0x218, 0x003, 0x0EC, 0x4F5, 0x21C, 0x004, - 0x0E9, 0x4F3, 0x221, 0x004, 0x0E6, 0x4F1, 0x226, 0x004, - 0x0E3, 0x4EF, 0x22A, 0x004, 0x0E0, 0x4ED, 0x22F, 0x004, - 0x0DD, 0x4EB, 0x233, 0x005, 0x0DA, 0x4E9, 0x238, 0x005, - 0x0D7, 0x4E7, 0x23D, 0x005, 0x0D4, 0x4E5, 0x241, 0x005, - 0x0D2, 0x4E3, 0x246, 0x006, 0x0CF, 0x4E0, 0x24B, 0x006, - 0x0CC, 0x4DE, 0x250, 0x006, 0x0C9, 0x4DC, 0x254, 0x006, - 0x0C7, 0x4D9, 0x259, 0x007, 0x0C4, 0x4D7, 0x25E, 0x007, - 0x0C1, 0x4D5, 0x263, 0x007, 0x0BF, 0x4D2, 0x267, 0x008, - 0x0BC, 0x4D0, 0x26C, 0x008, 0x0BA, 0x4CD, 0x271, 0x008, - 0x0B7, 0x4CB, 0x276, 0x009, 0x0B4, 0x4C8, 0x27B, 0x009, - 0x0B2, 0x4C5, 0x280, 0x009, 0x0AF, 0x4C3, 0x284, 0x00A, - 0x0AD, 0x4C0, 0x289, 0x00A, 0x0AB, 0x4BD, 0x28E, 0x00A, - 0x0A8, 0x4BA, 0x293, 0x00B, 0x0A6, 0x4B7, 0x298, 0x00B, - 0x0A3, 0x4B5, 0x29D, 0x00B, 0x0A1, 0x4B2, 0x2A2, 0x00C, - 0x09F, 0x4AF, 0x2A6, 0x00C, 0x09C, 0x4AC, 0x2AB, 0x00D, - 0x09A, 0x4A9, 0x2B0, 0x00D, 0x098, 0x4A6, 0x2B5, 0x00E, - 0x096, 0x4A2, 0x2BA, 0x00E, 0x093, 0x49F, 0x2BF, 0x00F, - 0x091, 0x49C, 0x2C4, 0x00F, 0x08F, 0x499, 0x2C9, 0x00F, - 0x08D, 0x496, 0x2CE, 0x010, 0x08B, 0x492, 0x2D3, 0x010, - 0x089, 0x48F, 0x2D8, 0x011, 0x086, 0x48C, 0x2DC, 0x011, - 0x084, 0x488, 0x2E1, 0x012, 0x082, 0x485, 0x2E6, 0x013, - 0x080, 0x481, 0x2EB, 0x013, 0x07E, 0x47E, 0x2F0, 0x014, - 0x07C, 0x47A, 0x2F5, 0x014, 0x07A, 0x477, 0x2FA, 0x015, - 0x078, 0x473, 0x2FF, 0x015, 0x076, 0x470, 0x304, 0x016, - 0x075, 0x46C, 0x309, 0x017, 0x073, 0x468, 0x30E, 0x017, - 0x071, 0x465, 0x313, 0x018, 0x06F, 0x461, 0x318, 0x018, - 0x06D, 0x45D, 0x31D, 0x019, 0x06B, 0x459, 0x322, 0x01A, - 0x06A, 0x455, 0x326, 0x01B, 0x068, 0x452, 0x32B, 0x01B, - 0x066, 0x44E, 0x330, 0x01C, 0x064, 0x44A, 0x335, 0x01D, - 0x063, 0x446, 0x33A, 0x01D, 0x061, 0x442, 0x33F, 0x01E, - 0x05F, 0x43E, 0x344, 0x01F, 0x05E, 0x43A, 0x349, 0x020, - 0x05C, 0x436, 0x34E, 0x020, 0x05A, 0x432, 0x353, 0x021, - 0x059, 0x42E, 0x357, 0x022, 0x057, 0x42A, 0x35C, 0x023, - 0x056, 0x425, 0x361, 0x024, 0x054, 0x421, 0x366, 0x024, - 0x053, 0x41D, 0x36B, 0x025, 0x051, 0x419, 0x370, 0x026, - 0x050, 0x415, 0x374, 0x027, 0x04E, 0x410, 0x379, 0x028, - 0x04D, 0x40C, 0x37E, 0x029, 0x04C, 0x408, 0x383, 0x02A, - 0x04A, 0x403, 0x388, 0x02B, 0x049, 0x3FF, 0x38C, 0x02C, - 0x047, 0x3FB, 0x391, 0x02D, 0x046, 0x3F6, 0x396, 0x02E, - 0x045, 0x3F2, 0x39B, 0x02F, 0x043, 0x3ED, 0x39F, 0x030, - 0x042, 0x3E9, 0x3A4, 0x031, 0x041, 0x3E5, 0x3A9, 0x032, - 0x040, 0x3E0, 0x3AD, 0x033, 0x03E, 0x3DC, 0x3B2, 0x034, - 0x03D, 0x3D7, 0x3B7, 0x035, 0x03C, 0x3D2, 0x3BB, 0x036, - 0x03B, 0x3CE, 0x3C0, 0x037, 0x03A, 0x3C9, 0x3C5, 0x038, - 0x038, 0x3C5, 0x3C9, 0x03A, 0x037, 0x3C0, 0x3CE, 0x03B, - 0x036, 0x3BB, 0x3D2, 0x03C, 0x035, 0x3B7, 0x3D7, 0x03D, - 0x034, 0x3B2, 0x3DC, 0x03E, 0x033, 0x3AD, 0x3E0, 0x040, - 0x032, 0x3A9, 0x3E5, 0x041, 0x031, 0x3A4, 0x3E9, 0x042, - 0x030, 0x39F, 0x3ED, 0x043, 0x02F, 0x39B, 0x3F2, 0x045, - 0x02E, 0x396, 0x3F6, 0x046, 0x02D, 0x391, 0x3FB, 0x047, - 0x02C, 0x38C, 0x3FF, 0x049, 0x02B, 0x388, 0x403, 0x04A, - 0x02A, 0x383, 0x408, 0x04C, 0x029, 0x37E, 0x40C, 0x04D, - 0x028, 0x379, 0x410, 0x04E, 0x027, 0x374, 0x415, 0x050, - 0x026, 0x370, 0x419, 0x051, 0x025, 0x36B, 0x41D, 0x053, - 0x024, 0x366, 0x421, 0x054, 0x024, 0x361, 0x425, 0x056, - 0x023, 0x35C, 0x42A, 0x057, 0x022, 0x357, 0x42E, 0x059, - 0x021, 0x353, 0x432, 0x05A, 0x020, 0x34E, 0x436, 0x05C, - 0x020, 0x349, 0x43A, 0x05E, 0x01F, 0x344, 0x43E, 0x05F, - 0x01E, 0x33F, 0x442, 0x061, 0x01D, 0x33A, 0x446, 0x063, - 0x01D, 0x335, 0x44A, 0x064, 0x01C, 0x330, 0x44E, 0x066, - 0x01B, 0x32B, 0x452, 0x068, 0x01B, 0x326, 0x455, 0x06A, - 0x01A, 0x322, 0x459, 0x06B, 0x019, 0x31D, 0x45D, 0x06D, - 0x018, 0x318, 0x461, 0x06F, 0x018, 0x313, 0x465, 0x071, - 0x017, 0x30E, 0x468, 0x073, 0x017, 0x309, 0x46C, 0x075, - 0x016, 0x304, 0x470, 0x076, 0x015, 0x2FF, 0x473, 0x078, - 0x015, 0x2FA, 0x477, 0x07A, 0x014, 0x2F5, 0x47A, 0x07C, - 0x014, 0x2F0, 0x47E, 0x07E, 0x013, 0x2EB, 0x481, 0x080, - 0x013, 0x2E6, 0x485, 0x082, 0x012, 0x2E1, 0x488, 0x084, - 0x011, 0x2DC, 0x48C, 0x086, 0x011, 0x2D8, 0x48F, 0x089, - 0x010, 0x2D3, 0x492, 0x08B, 0x010, 0x2CE, 0x496, 0x08D, - 0x00F, 0x2C9, 0x499, 0x08F, 0x00F, 0x2C4, 0x49C, 0x091, - 0x00F, 0x2BF, 0x49F, 0x093, 0x00E, 0x2BA, 0x4A2, 0x096, - 0x00E, 0x2B5, 0x4A6, 0x098, 0x00D, 0x2B0, 0x4A9, 0x09A, - 0x00D, 0x2AB, 0x4AC, 0x09C, 0x00C, 0x2A6, 0x4AF, 0x09F, - 0x00C, 0x2A2, 0x4B2, 0x0A1, 0x00B, 0x29D, 0x4B5, 0x0A3, - 0x00B, 0x298, 0x4B7, 0x0A6, 0x00B, 0x293, 0x4BA, 0x0A8, - 0x00A, 0x28E, 0x4BD, 0x0AB, 0x00A, 0x289, 0x4C0, 0x0AD, - 0x00A, 0x284, 0x4C3, 0x0AF, 0x009, 0x280, 0x4C5, 0x0B2, - 0x009, 0x27B, 0x4C8, 0x0B4, 0x009, 0x276, 0x4CB, 0x0B7, - 0x008, 0x271, 0x4CD, 0x0BA, 0x008, 0x26C, 0x4D0, 0x0BC, - 0x008, 0x267, 0x4D2, 0x0BF, 0x007, 0x263, 0x4D5, 0x0C1, - 0x007, 0x25E, 0x4D7, 0x0C4, 0x007, 0x259, 0x4D9, 0x0C7, - 0x006, 0x254, 0x4DC, 0x0C9, 0x006, 0x250, 0x4DE, 0x0CC, - 0x006, 0x24B, 0x4E0, 0x0CF, 0x006, 0x246, 0x4E3, 0x0D2, - 0x005, 0x241, 0x4E5, 0x0D4, 0x005, 0x23D, 0x4E7, 0x0D7, - 0x005, 0x238, 0x4E9, 0x0DA, 0x005, 0x233, 0x4EB, 0x0DD, - 0x004, 0x22F, 0x4ED, 0x0E0, 0x004, 0x22A, 0x4EF, 0x0E3, - 0x004, 0x226, 0x4F1, 0x0E6, 0x004, 0x221, 0x4F3, 0x0E9, - 0x004, 0x21C, 0x4F5, 0x0EC, 0x003, 0x218, 0x4F6, 0x0EF, - 0x003, 0x213, 0x4F8, 0x0F2, 0x003, 0x20F, 0x4FA, 0x0F5, - 0x003, 0x20A, 0x4FB, 0x0F8, 0x003, 0x205, 0x4FD, 0x0FB, - 0x002, 0x201, 0x4FF, 0x0FE, 0x002, 0x1FC, 0x500, 0x101, - 0x002, 0x1F8, 0x502, 0x104, 0x002, 0x1F3, 0x503, 0x107, - 0x002, 0x1EF, 0x504, 0x10B, 0x002, 0x1EB, 0x506, 0x10E, - 0x002, 0x1E6, 0x507, 0x111, 0x001, 0x1E2, 0x508, 0x114, - 0x001, 0x1DD, 0x50A, 0x118, 0x001, 0x1D9, 0x50B, 0x11B, - 0x001, 0x1D5, 0x50C, 0x11E, 0x001, 0x1D0, 0x50D, 0x122, - 0x001, 0x1CC, 0x50E, 0x125, 0x001, 0x1C8, 0x50F, 0x129, - 0x001, 0x1C3, 0x510, 0x12C, 0x001, 0x1BF, 0x511, 0x130, - 0x001, 0x1BB, 0x511, 0x133, 0x001, 0x1B7, 0x512, 0x137, - 0x000, 0x1B2, 0x513, 0x13A, 0x000, 0x1AE, 0x514, 0x13E, - 0x000, 0x1AA, 0x514, 0x141, 0x000, 0x1A6, 0x515, 0x145, - 0x000, 0x1A2, 0x516, 0x148, 0x000, 0x19E, 0x516, 0x14C, - 0x000, 0x19A, 0x517, 0x150, 0x000, 0x195, 0x517, 0x153, - 0x000, 0x191, 0x517, 0x157, 0x000, 0x18D, 0x518, 0x15B, - 0x000, 0x189, 0x518, 0x15F, 0x000, 0x185, 0x518, 0x162, - 0x000, 0x181, 0x518, 0x166, 0x000, 0x17D, 0x518, 0x16A, - 0x000, 0x17A, 0x519, 0x16E, 0x000, 0x176, 0x519, 0x172}; -#endif + +/* +128 * 4 table +- 0 = past #3 +- 1 = past #2 +- 2 = past #1 +- 3 = past #0 + + +offset 0 +for(0) + for(256) + rev(256) + rev(0) +*/ + + +// NOTE: Dr. Hell +// - Excel NORMDIST($A6,2,0.567,FALSE) [0-4] = 98% + + +// Mednafen's table (PSX) 99-100% +const int gauss[]={ + 0x12c7, 0x59b3, 0x1307, 0xffffffff, + 0x1288, 0x59b2, 0x1347, 0xffffffff, + 0x1249, 0x59b0, 0x1388, 0xffffffff, + 0x120b, 0x59ad, 0x13c9, 0xffffffff, + 0x11cd, 0x59a9, 0x140b, 0xffffffff, + 0x118f, 0x59a4, 0x144d, 0xffffffff, + 0x1153, 0x599e, 0x1490, 0xffffffff, + 0x1116, 0x5997, 0x14d4, 0xffffffff, + 0x10db, 0x598f, 0x1517, 0xffffffff, + 0x109f, 0x5986, 0x155c, 0xffffffff, + 0x1065, 0x597c, 0x15a0, 0xffffffff, + 0x102a, 0x5971, 0x15e6, 0xffffffff, + 0x0ff1, 0x5965, 0x162c, 0xffffffff, + 0x0fb7, 0x5958, 0x1672, 0xffffffff, + 0x0f7f, 0x5949, 0x16b9, 0xffffffff, + 0x0f46, 0x593a, 0x1700, 0xffffffff, + 0x0f0f, 0x592a, 0x1747, 0x0000, + 0x0ed7, 0x5919, 0x1790, 0x0000, + 0x0ea1, 0x5907, 0x17d8, 0x0000, + 0x0e6b, 0x58f4, 0x1821, 0x0000, + 0x0e35, 0x58e0, 0x186b, 0x0000, + 0x0e00, 0x58cb, 0x18b5, 0x0000, + 0x0dcb, 0x58b5, 0x1900, 0x0000, + 0x0d97, 0x589e, 0x194b, 0x0001, + 0x0d63, 0x5886, 0x1996, 0x0001, + 0x0d30, 0x586d, 0x19e2, 0x0001, + 0x0cfd, 0x5853, 0x1a2e, 0x0001, + 0x0ccb, 0x5838, 0x1a7b, 0x0002, + 0x0c99, 0x581c, 0x1ac8, 0x0002, + 0x0c68, 0x57ff, 0x1b16, 0x0002, + 0x0c38, 0x57e2, 0x1b64, 0x0003, + 0x0c07, 0x57c3, 0x1bb3, 0x0003, + 0x0bd8, 0x57a3, 0x1c02, 0x0003, + 0x0ba9, 0x5782, 0x1c51, 0x0004, + 0x0b7a, 0x5761, 0x1ca1, 0x0004, + 0x0b4c, 0x573e, 0x1cf1, 0x0005, + 0x0b1e, 0x571b, 0x1d42, 0x0005, + 0x0af1, 0x56f6, 0x1d93, 0x0006, + 0x0ac4, 0x56d1, 0x1de5, 0x0007, + 0x0a98, 0x56ab, 0x1e37, 0x0007, + 0x0a6c, 0x5684, 0x1e89, 0x0008, + 0x0a40, 0x565b, 0x1edc, 0x0009, + 0x0a16, 0x5632, 0x1f2f, 0x0009, + 0x09eb, 0x5609, 0x1f82, 0x000a, + 0x09c1, 0x55de, 0x1fd6, 0x000b, + 0x0998, 0x55b2, 0x202a, 0x000c, + 0x096f, 0x5585, 0x207f, 0x000d, + 0x0946, 0x5558, 0x20d4, 0x000e, + 0x091e, 0x5529, 0x2129, 0x000f, + 0x08f7, 0x54fa, 0x217f, 0x0010, + 0x08d0, 0x54ca, 0x21d5, 0x0011, + 0x08a9, 0x5499, 0x222c, 0x0012, + 0x0883, 0x5467, 0x2282, 0x0013, + 0x085d, 0x5434, 0x22da, 0x0015, + 0x0838, 0x5401, 0x2331, 0x0016, + 0x0813, 0x53cc, 0x2389, 0x0018, + 0x07ef, 0x5397, 0x23e1, 0x0019, + 0x07cb, 0x5361, 0x2439, 0x001b, + 0x07a7, 0x532a, 0x2492, 0x001c, + 0x0784, 0x52f3, 0x24eb, 0x001e, + 0x0762, 0x52ba, 0x2545, 0x0020, + 0x0740, 0x5281, 0x259e, 0x0021, + 0x071e, 0x5247, 0x25f8, 0x0023, + 0x06fd, 0x520c, 0x2653, 0x0025, + 0x06dc, 0x51d0, 0x26ad, 0x0027, + 0x06bb, 0x5194, 0x2708, 0x0029, + 0x069b, 0x5156, 0x2763, 0x002c, + 0x067c, 0x5118, 0x27be, 0x002e, + 0x065c, 0x50da, 0x281a, 0x0030, + 0x063e, 0x509a, 0x2876, 0x0033, + 0x061f, 0x505a, 0x28d2, 0x0035, + 0x0601, 0x5019, 0x292e, 0x0038, + 0x05e4, 0x4fd7, 0x298b, 0x003a, + 0x05c7, 0x4f95, 0x29e7, 0x003d, + 0x05aa, 0x4f52, 0x2a44, 0x0040, + 0x058e, 0x4f0e, 0x2aa1, 0x0043, + 0x0572, 0x4ec9, 0x2aff, 0x0046, + 0x0556, 0x4e84, 0x2b5c, 0x0049, + 0x053b, 0x4e3e, 0x2bba, 0x004d, + 0x0520, 0x4df7, 0x2c18, 0x0050, + 0x0506, 0x4db0, 0x2c76, 0x0054, + 0x04ec, 0x4d68, 0x2cd4, 0x0057, + 0x04d2, 0x4d20, 0x2d33, 0x005b, + 0x04b9, 0x4cd7, 0x2d91, 0x005f, + 0x04a0, 0x4c8d, 0x2df0, 0x0063, + 0x0488, 0x4c42, 0x2e4f, 0x0067, + 0x0470, 0x4bf7, 0x2eae, 0x006b, + 0x0458, 0x4bac, 0x2f0d, 0x006f, + 0x0441, 0x4b5f, 0x2f6c, 0x0074, + 0x042a, 0x4b13, 0x2fcc, 0x0078, + 0x0413, 0x4ac5, 0x302b, 0x007d, + 0x03fc, 0x4a77, 0x308b, 0x0082, + 0x03e7, 0x4a29, 0x30ea, 0x0087, + 0x03d1, 0x49d9, 0x314a, 0x008c, + 0x03bc, 0x498a, 0x31aa, 0x0091, + 0x03a7, 0x493a, 0x3209, 0x0096, + 0x0392, 0x48e9, 0x3269, 0x009c, + 0x037e, 0x4898, 0x32c9, 0x00a1, + 0x036a, 0x4846, 0x3329, 0x00a7, + 0x0356, 0x47f4, 0x3389, 0x00ad, + 0x0343, 0x47a1, 0x33e9, 0x00b3, + 0x0330, 0x474e, 0x3449, 0x00ba, + 0x031d, 0x46fa, 0x34a9, 0x00c0, + 0x030b, 0x46a6, 0x3509, 0x00c7, + 0x02f9, 0x4651, 0x3569, 0x00cd, + 0x02e7, 0x45fc, 0x35c9, 0x00d4, + 0x02d6, 0x45a6, 0x3629, 0x00db, + 0x02c4, 0x4550, 0x3689, 0x00e3, + 0x02b4, 0x44fa, 0x36e8, 0x00ea, + 0x02a3, 0x44a3, 0x3748, 0x00f2, + 0x0293, 0x444c, 0x37a8, 0x00fa, + 0x0283, 0x43f4, 0x3807, 0x0101, + 0x0273, 0x439c, 0x3867, 0x010a, + 0x0264, 0x4344, 0x38c6, 0x0112, + 0x0255, 0x42eb, 0x3926, 0x011b, + 0x0246, 0x4292, 0x3985, 0x0123, + 0x0237, 0x4239, 0x39e4, 0x012c, + 0x0229, 0x41df, 0x3a43, 0x0135, + 0x021b, 0x4185, 0x3aa2, 0x013f, + 0x020d, 0x412a, 0x3b00, 0x0148, + 0x0200, 0x40d0, 0x3b5f, 0x0152, + 0x01f2, 0x4074, 0x3bbd, 0x015c, + 0x01e5, 0x4019, 0x3c1b, 0x0166, + 0x01d9, 0x3fbd, 0x3c79, 0x0171, + 0x01cc, 0x3f62, 0x3cd7, 0x017b, + 0x01c0, 0x3f05, 0x3d35, 0x0186, + 0x01b4, 0x3ea9, 0x3d92, 0x0191, + 0x01a8, 0x3e4c, 0x3def, 0x019c, + 0x019c, 0x3def, 0x3e4c, 0x01a8, + 0x0191, 0x3d92, 0x3ea9, 0x01b4, + 0x0186, 0x3d35, 0x3f05, 0x01c0, + 0x017b, 0x3cd7, 0x3f62, 0x01cc, + 0x0171, 0x3c79, 0x3fbd, 0x01d9, + 0x0166, 0x3c1b, 0x4019, 0x01e5, + 0x015c, 0x3bbd, 0x4074, 0x01f2, + 0x0152, 0x3b5f, 0x40d0, 0x0200, + 0x0148, 0x3b00, 0x412a, 0x020d, + 0x013f, 0x3aa2, 0x4185, 0x021b, + 0x0135, 0x3a43, 0x41df, 0x0229, + 0x012c, 0x39e4, 0x4239, 0x0237, + 0x0123, 0x3985, 0x4292, 0x0246, + 0x011b, 0x3926, 0x42eb, 0x0255, + 0x0112, 0x38c6, 0x4344, 0x0264, + 0x010a, 0x3867, 0x439c, 0x0273, + 0x0101, 0x3807, 0x43f4, 0x0283, + 0x00fa, 0x37a8, 0x444c, 0x0293, + 0x00f2, 0x3748, 0x44a3, 0x02a3, + 0x00ea, 0x36e8, 0x44fa, 0x02b4, + 0x00e3, 0x3689, 0x4550, 0x02c4, + 0x00db, 0x3629, 0x45a6, 0x02d6, + 0x00d4, 0x35c9, 0x45fc, 0x02e7, + 0x00cd, 0x3569, 0x4651, 0x02f9, + 0x00c7, 0x3509, 0x46a6, 0x030b, + 0x00c0, 0x34a9, 0x46fa, 0x031d, + 0x00ba, 0x3449, 0x474e, 0x0330, + 0x00b3, 0x33e9, 0x47a1, 0x0343, + 0x00ad, 0x3389, 0x47f4, 0x0356, + 0x00a7, 0x3329, 0x4846, 0x036a, + 0x00a1, 0x32c9, 0x4898, 0x037e, + 0x009c, 0x3269, 0x48e9, 0x0392, + 0x0096, 0x3209, 0x493a, 0x03a7, + 0x0091, 0x31aa, 0x498a, 0x03bc, + 0x008c, 0x314a, 0x49d9, 0x03d1, + 0x0087, 0x30ea, 0x4a29, 0x03e7, + 0x0082, 0x308b, 0x4a77, 0x03fc, + 0x007d, 0x302b, 0x4ac5, 0x0413, + 0x0078, 0x2fcc, 0x4b13, 0x042a, + 0x0074, 0x2f6c, 0x4b5f, 0x0441, + 0x006f, 0x2f0d, 0x4bac, 0x0458, + 0x006b, 0x2eae, 0x4bf7, 0x0470, + 0x0067, 0x2e4f, 0x4c42, 0x0488, + 0x0063, 0x2df0, 0x4c8d, 0x04a0, + 0x005f, 0x2d91, 0x4cd7, 0x04b9, + 0x005b, 0x2d33, 0x4d20, 0x04d2, + 0x0057, 0x2cd4, 0x4d68, 0x04ec, + 0x0054, 0x2c76, 0x4db0, 0x0506, + 0x0050, 0x2c18, 0x4df7, 0x0520, + 0x004d, 0x2bba, 0x4e3e, 0x053b, + 0x0049, 0x2b5c, 0x4e84, 0x0556, + 0x0046, 0x2aff, 0x4ec9, 0x0572, + 0x0043, 0x2aa1, 0x4f0e, 0x058e, + 0x0040, 0x2a44, 0x4f52, 0x05aa, + 0x003d, 0x29e7, 0x4f95, 0x05c7, + 0x003a, 0x298b, 0x4fd7, 0x05e4, + 0x0038, 0x292e, 0x5019, 0x0601, + 0x0035, 0x28d2, 0x505a, 0x061f, + 0x0033, 0x2876, 0x509a, 0x063e, + 0x0030, 0x281a, 0x50da, 0x065c, + 0x002e, 0x27be, 0x5118, 0x067c, + 0x002c, 0x2763, 0x5156, 0x069b, + 0x0029, 0x2708, 0x5194, 0x06bb, + 0x0027, 0x26ad, 0x51d0, 0x06dc, + 0x0025, 0x2653, 0x520c, 0x06fd, + 0x0023, 0x25f8, 0x5247, 0x071e, + 0x0021, 0x259e, 0x5281, 0x0740, + 0x0020, 0x2545, 0x52ba, 0x0762, + 0x001e, 0x24eb, 0x52f3, 0x0784, + 0x001c, 0x2492, 0x532a, 0x07a7, + 0x001b, 0x2439, 0x5361, 0x07cb, + 0x0019, 0x23e1, 0x5397, 0x07ef, + 0x0018, 0x2389, 0x53cc, 0x0813, + 0x0016, 0x2331, 0x5401, 0x0838, + 0x0015, 0x22da, 0x5434, 0x085d, + 0x0013, 0x2282, 0x5467, 0x0883, + 0x0012, 0x222c, 0x5499, 0x08a9, + 0x0011, 0x21d5, 0x54ca, 0x08d0, + 0x0010, 0x217f, 0x54fa, 0x08f7, + 0x000f, 0x2129, 0x5529, 0x091e, + 0x000e, 0x20d4, 0x5558, 0x0946, + 0x000d, 0x207f, 0x5585, 0x096f, + 0x000c, 0x202a, 0x55b2, 0x0998, + 0x000b, 0x1fd6, 0x55de, 0x09c1, + 0x000a, 0x1f82, 0x5609, 0x09eb, + 0x0009, 0x1f2f, 0x5632, 0x0a16, + 0x0009, 0x1edc, 0x565b, 0x0a40, + 0x0008, 0x1e89, 0x5684, 0x0a6c, + 0x0007, 0x1e37, 0x56ab, 0x0a98, + 0x0007, 0x1de5, 0x56d1, 0x0ac4, + 0x0006, 0x1d93, 0x56f6, 0x0af1, + 0x0005, 0x1d42, 0x571b, 0x0b1e, + 0x0005, 0x1cf1, 0x573e, 0x0b4c, + 0x0004, 0x1ca1, 0x5761, 0x0b7a, + 0x0004, 0x1c51, 0x5782, 0x0ba9, + 0x0003, 0x1c02, 0x57a3, 0x0bd8, + 0x0003, 0x1bb3, 0x57c3, 0x0c07, + 0x0003, 0x1b64, 0x57e2, 0x0c38, + 0x0002, 0x1b16, 0x57ff, 0x0c68, + 0x0002, 0x1ac8, 0x581c, 0x0c99, + 0x0002, 0x1a7b, 0x5838, 0x0ccb, + 0x0001, 0x1a2e, 0x5853, 0x0cfd, + 0x0001, 0x19e2, 0x586d, 0x0d30, + 0x0001, 0x1996, 0x5886, 0x0d63, + 0x0001, 0x194b, 0x589e, 0x0d97, + 0x0000, 0x1900, 0x58b5, 0x0dcb, + 0x0000, 0x18b5, 0x58cb, 0x0e00, + 0x0000, 0x186b, 0x58e0, 0x0e35, + 0x0000, 0x1821, 0x58f4, 0x0e6b, + 0x0000, 0x17d8, 0x5907, 0x0ea1, + 0x0000, 0x1790, 0x5919, 0x0ed7, + 0x0000, 0x1747, 0x592a, 0x0f0f, + 0xffffffff, 0x1700, 0x593a, 0x0f46, + 0xffffffff, 0x16b9, 0x5949, 0x0f7f, + 0xffffffff, 0x1672, 0x5958, 0x0fb7, + 0xffffffff, 0x162c, 0x5965, 0x0ff1, + 0xffffffff, 0x15e6, 0x5971, 0x102a, + 0xffffffff, 0x15a0, 0x597c, 0x1065, + 0xffffffff, 0x155c, 0x5986, 0x109f, + 0xffffffff, 0x1517, 0x598f, 0x10db, + 0xffffffff, 0x14d4, 0x5997, 0x1116, + 0xffffffff, 0x1490, 0x599e, 0x1153, + 0xffffffff, 0x144d, 0x59a4, 0x118f, + 0xffffffff, 0x140b, 0x59a9, 0x11cd, + 0xffffffff, 0x13c9, 0x59ad, 0x120b, + 0xffffffff, 0x1388, 0x59b0, 0x1249, + 0xffffffff, 0x1347, 0x59b2, 0x1288, + 0xffffffff, 0x1307, 0x59b3, 0x12c7, +}; + +#endif \ No newline at end of file diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 66c7651ef..3b4c051df 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -348,11 +348,11 @@ INLINE int iGetInterpolationVal(int *SB, int sinc, int spos, int fmod_freq) int vl, vr;int gpos; vl = (spos >> 6) & ~3; gpos = SB[28]; - vr=(gauss[vl]*(int)gval0)&~2047; - vr+=(gauss[vl+1]*gval(1))&~2047; - vr+=(gauss[vl+2]*gval(2))&~2047; - vr+=(gauss[vl+3]*gval(3))&~2047; - fa = vr>>11; + vr=(gauss[vl]*(int)gval0) >> 15; + vr+=(gauss[vl+1]*gval(1)) >> 15; + vr+=(gauss[vl+2]*gval(2)) >> 15; + vr+=(gauss[vl+3]*gval(3)) >> 15; + fa = vr; } break; //--------------------------------------------------// case 1: // simple interpolation diff --git a/plugins/dfsound/xa.c b/plugins/dfsound/xa.c index f62a12d20..c7a84fd01 100644 --- a/plugins/dfsound/xa.c +++ b/plugins/dfsound/xa.c @@ -200,16 +200,16 @@ INLINE void FeedXA(xa_decode_t *xap) spos -= 0x10000L; } vl = (spos >> 6) & ~3; - vr=(gauss[vl]*gvall0)&~2047; - vr+=(gauss[vl+1]*gvall(1))&~2047; - vr+=(gauss[vl+2]*gvall(2))&~2047; - vr+=(gauss[vl+3]*gvall(3))&~2047; - l= (vr >> 11) & 0xffff; - vr=(gauss[vl]*gvalr0)&~2047; - vr+=(gauss[vl+1]*gvalr(1))&~2047; - vr+=(gauss[vl+2]*gvalr(2))&~2047; - vr+=(gauss[vl+3]*gvalr(3))&~2047; - l |= vr << 5; + vr=(gauss[vl]*gvall0) >> 15; + vr+=(gauss[vl+1]*gvall(1)) >> 15; + vr+=(gauss[vl+2]*gvall(2)) >> 15; + vr+=(gauss[vl+3]*gvall(3)) >> 15; + l= vr & 0xffff; + vr=(gauss[vl]*gvalr0) >> 15; + vr+=(gauss[vl+1]*gvalr(1)) >> 15; + vr+=(gauss[vl+2]*gvalr(2)) >> 15; + vr+=(gauss[vl+3]*gvalr(3)) >> 15; + l |= vr << 16; } else { @@ -258,16 +258,16 @@ INLINE void FeedXA(xa_decode_t *xap) spos -= 0x10000L; } vl = (spos >> 6) & ~3; - vr=(gauss[vl]*gvall0)&~2047; - vr+=(gauss[vl+1]*gvall(1))&~2047; - vr+=(gauss[vl+2]*gvall(2))&~2047; - vr+=(gauss[vl+3]*gvall(3))&~2047; - l= (vr >> 11) & 0xffff; - vr=(gauss[vl]*gvalr0)&~2047; - vr+=(gauss[vl+1]*gvalr(1))&~2047; - vr+=(gauss[vl+2]*gvalr(2))&~2047; - vr+=(gauss[vl+3]*gvalr(3))&~2047; - l |= vr << 5; + vr=(gauss[vl]*gvall0) >> 15; + vr+=(gauss[vl+1]*gvall(1)) >> 15; + vr+=(gauss[vl+2]*gvall(2)) >> 15; + vr+=(gauss[vl+3]*gvall(3)) >> 15; + l= vr & 0xffff; + vr=(gauss[vl]*gvalr0) >> 15; + vr+=(gauss[vl+1]*gvalr(1)) >> 15; + vr+=(gauss[vl+2]*gvalr(2)) >> 15; + vr+=(gauss[vl+3]*gvalr(3)) >> 15; + l |= vr << 16; } else { @@ -311,11 +311,11 @@ INLINE void FeedXA(xa_decode_t *xap) spos -= 0x10000L; } vl = (spos >> 6) & ~3; - vr=(gauss[vl]*gvall0)&~2047; - vr+=(gauss[vl+1]*gvall(1))&~2047; - vr+=(gauss[vl+2]*gvall(2))&~2047; - vr+=(gauss[vl+3]*gvall(3))&~2047; - l1=s= vr >> 11; + vr=(gauss[vl]*gvall0) >> 15; + vr+=(gauss[vl+1]*gvall(1)) >> 15; + vr+=(gauss[vl+2]*gvall(2)) >> 15; + vr+=(gauss[vl+3]*gvall(3)) >> 15; + l1=s= vr; l1 &= 0xffff; } else @@ -357,11 +357,11 @@ INLINE void FeedXA(xa_decode_t *xap) spos -= 0x10000L; } vl = (spos >> 6) & ~3; - vr=(gauss[vl]*gvall0)&~2047; - vr+=(gauss[vl+1]*gvall(1))&~2047; - vr+=(gauss[vl+2]*gvall(2))&~2047; - vr+=(gauss[vl+3]*gvall(3))&~2047; - l=s= vr >> 11; + vr=(gauss[vl]*gvall0) >> 15; + vr+=(gauss[vl+1]*gvall(1)) >> 15; + vr+=(gauss[vl+2]*gvall(2)) >> 15; + vr+=(gauss[vl+3]*gvall(3)) >> 15; + l=s= vr; } else { From 7a8d521fba9c86ae7b51369ce061bf63112b745f Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 13 Oct 2022 23:55:21 +0300 Subject: [PATCH 227/597] merge from libretro fork + minor modifications Reducing the diff as it's too hard to track down breakage when one works but not the other. --- frontend/blit320.h | 5 + frontend/cspace.h | 5 + frontend/in_tsbutton.h | 5 + frontend/libretro.c | 3403 +++++++++++++++----- frontend/libretro.h | 1926 ----------- frontend/libretro_core_options.h | 1557 +++++++++ frontend/libretro_core_options_intl.h | 552 ++++ frontend/link.T | 5 + frontend/main.c | 38 +- frontend/menu.c | 12 +- frontend/menu.h | 5 + frontend/nopic.h | 5 +- frontend/pl_gun_ts.h | 5 + frontend/plat.h | 5 + frontend/plat_omap.h | 3 + frontend/plugin.c | 18 +- frontend/plugin.h | 5 + frontend/plugin_lib.c | 18 +- frontend/plugin_lib.h | 19 +- include/config.h | 7 +- include/pcnt.h | 4 + include/psemu_plugin_defs.h | 17 +- jni/Android.mk | 281 +- jni/Application.mk | 2 +- libpcsxcore/cdrom.c | 2 +- libpcsxcore/database.c | 2 +- libpcsxcore/debug.c | 2 +- libpcsxcore/disr3000a.c | 22 +- libpcsxcore/gpu.h | 5 + libpcsxcore/gte_arm.h | 5 + libpcsxcore/gte_divider.h | 5 + libpcsxcore/gte_neon.h | 5 + libpcsxcore/lightrec/mem.h | 28 + libpcsxcore/misc.c | 75 +- libpcsxcore/new_dynarec/emu_if.c | 2 +- libpcsxcore/plugins.c | 6 +- libpcsxcore/ppf.c | 53 +- libpcsxcore/psxbios.c | 219 +- libpcsxcore/psxcommon.h | 4 + libpcsxcore/psxcounters.c | 2 +- libpcsxcore/psxdma.c | 10 +- libpcsxcore/psxhle.c | 14 +- libpcsxcore/psxinterpreter.c | 2 +- libpcsxcore/psxinterpreter.h | 3 + libpcsxcore/psxmem.c | 116 +- libpcsxcore/psxmem.h | 8 +- libpcsxcore/r3000a.c | 5 +- libpcsxcore/sio.c | 24 +- libpcsxcore/sjisfont.h | 5 + libpcsxcore/socket.c | 19 +- plugins/cdrcimg/cdrcimg.c | 2 +- plugins/cdrcimg/cdrcimg.h | 4 + plugins/dfinput/externals.h | 4 + plugins/dfinput/main.c | 14 + plugins/dfinput/main.h | 5 + plugins/dfinput/pad.c | 4 + plugins/dfsound/dma.h | 4 + plugins/dfsound/externals.h | 4 + plugins/dfsound/out.h | 4 + plugins/dfsound/registers.h | 4 + plugins/dfsound/spu.h | 5 + plugins/dfsound/spu_c64x.h | 5 + plugins/dfsound/spu_config.h | 5 + plugins/dfsound/stdafx.h | 5 + plugins/dfsound/xa.h | 7 +- plugins/dfxvideo/gpulib_if.c | 6 +- plugins/dfxvideo/soft.c | 10 +- plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h | 5 + plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 67 +- plugins/gpulib/gpu.h | 5 + plugins/gpulib/vout_sdl.c | 2 +- plugins/spunull/register.h | 4 + 72 files changed, 5664 insertions(+), 3056 deletions(-) delete mode 100755 frontend/libretro.h create mode 100644 frontend/libretro_core_options.h create mode 100644 frontend/libretro_core_options_intl.h create mode 100644 frontend/link.T create mode 100644 libpcsxcore/lightrec/mem.h diff --git a/frontend/blit320.h b/frontend/blit320.h index 434b52a09..ea1d2a54d 100644 --- a/frontend/blit320.h +++ b/frontend/blit320.h @@ -1,3 +1,8 @@ +#ifndef __BLIT320_H__ +#define __BLIT320_H__ + void blit320_640(void *dst, const void *src, int unused); void blit320_512(void *dst, const void *src, int unused); void blit320_368(void *dst, const void *src, int unused); + +#endif /* __BLIT320_H__ */ diff --git a/frontend/cspace.h b/frontend/cspace.h index 8c92d2d95..6dbd5e0f5 100644 --- a/frontend/cspace.h +++ b/frontend/cspace.h @@ -1,3 +1,6 @@ +#ifndef __CSPACE_H__ +#define __CSPACE_H__ + #ifdef __cplusplus extern "C" { @@ -19,3 +22,5 @@ void bgr888_to_uyvy(void *d, const void *s, int pixels); #ifdef __cplusplus } #endif + +#endif /* __CSPACE_H__ */ diff --git a/frontend/in_tsbutton.h b/frontend/in_tsbutton.h index 82fab29cc..65a178fb0 100644 --- a/frontend/in_tsbutton.h +++ b/frontend/in_tsbutton.h @@ -1 +1,6 @@ +#ifndef __IN_TSBUTTON_H__ +#define __IN_TSBUTTON_H__ + void in_tsbutton_init(void); + +#endif /* __IN_TSBUTTON_H__ */ diff --git a/frontend/libretro.c b/frontend/libretro.c index 4c285cfbc..42f6151fe 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -15,6 +15,10 @@ #include #endif +#ifdef SWITCH +#include +#endif + #include "../libpcsxcore/misc.h" #include "../libpcsxcore/psxcounters.h" #include "../libpcsxcore/psxmem_map.h" @@ -22,190 +26,499 @@ #include "../libpcsxcore/cdrom.h" #include "../libpcsxcore/cdriso.h" #include "../libpcsxcore/cheat.h" +#include "../libpcsxcore/r3000a.h" #include "../plugins/dfsound/out.h" #include "../plugins/dfsound/spu_config.h" #include "../plugins/dfinput/externals.h" #include "cspace.h" #include "main.h" +#include "menu.h" #include "plugin.h" #include "plugin_lib.h" #include "arm_features.h" #include "revision.h" -#include "libretro.h" + +#include +#include "libretro_core_options.h" + +#ifdef USE_LIBRETRO_VFS +#include +#endif + +#ifdef _3DS +#include "3ds/3ds_utils.h" +#endif + +#define PORTS_NUMBER 8 + +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef MAX +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#endif + +#define ISHEXDEC ((buf[cursor] >= '0') && (buf[cursor] <= '9')) || ((buf[cursor] >= 'a') && (buf[cursor] <= 'f')) || ((buf[cursor] >= 'A') && (buf[cursor] <= 'F')) + +#define INTERNAL_FPS_SAMPLE_PERIOD 64 + +//hack to prevent retroarch freezing when reseting in the menu but not while running with the hot key +static int rebootemu = 0; static retro_video_refresh_t video_cb; static retro_input_poll_t input_poll_cb; static retro_input_state_t input_state_cb; static retro_environment_t environ_cb; static retro_audio_sample_batch_t audio_batch_cb; -static struct retro_rumble_interface rumble; +static retro_set_rumble_state_t rumble_cb; +static struct retro_log_callback logging; +static retro_log_printf_t log_cb; + +static unsigned msg_interface_version = 0; static void *vout_buf; +static void *vout_buf_ptr; static int vout_width, vout_height; static int vout_doffs_old, vout_fb_dirty; static bool vout_can_dupe; static bool duping_enable; +static bool found_bios; +static bool display_internal_fps = false; +static unsigned frame_count = 0; +static bool libretro_supports_bitmasks = false; +static bool libretro_supports_option_categories = false; +static bool show_input_settings = true; +#ifdef GPU_PEOPS +static bool show_advanced_gpu_peops_settings = true; +#endif +#ifdef GPU_UNAI +static bool show_advanced_gpu_unai_settings = true; +#endif +static float mouse_sensitivity = 1.0f; + +typedef enum +{ + FRAMESKIP_NONE = 0, + FRAMESKIP_AUTO, + FRAMESKIP_AUTO_THRESHOLD, + FRAMESKIP_FIXED_INTERVAL +} frameskip_type_t; + +static unsigned frameskip_type = FRAMESKIP_NONE; +static unsigned frameskip_threshold = 0; +static unsigned frameskip_interval = 0; +static unsigned frameskip_counter = 0; + +static int retro_audio_buff_active = false; +static unsigned retro_audio_buff_occupancy = 0; +static int retro_audio_buff_underrun = false; + +static unsigned retro_audio_latency = 0; +static int update_audio_latency = false; + +static unsigned previous_width = 0; +static unsigned previous_height = 0; static int plugins_opened; static int is_pal_mode; /* memory card data */ extern char Mcd1Data[MCD_SIZE]; +extern char Mcd2Data[MCD_SIZE]; extern char McdDisable[2]; /* PCSX ReARMed core calls and stuff */ -int in_type1, in_type2; -int in_a1[2] = { 127, 127 }, in_a2[2] = { 127, 127 }; -int in_keystate; +int in_type[8] = { + PSE_PAD_TYPE_NONE, PSE_PAD_TYPE_NONE, + PSE_PAD_TYPE_NONE, PSE_PAD_TYPE_NONE, + PSE_PAD_TYPE_NONE, PSE_PAD_TYPE_NONE, + PSE_PAD_TYPE_NONE, PSE_PAD_TYPE_NONE +}; +int in_analog_left[8][2] = { { 127, 127 }, { 127, 127 }, { 127, 127 }, { 127, 127 }, { 127, 127 }, { 127, 127 }, { 127, 127 }, { 127, 127 } }; +int in_analog_right[8][2] = { { 127, 127 }, { 127, 127 }, { 127, 127 }, { 127, 127 }, { 127, 127 }, { 127, 127 }, { 127, 127 }, { 127, 127 } }; +unsigned short in_keystate[PORTS_NUMBER]; +int in_mouse[8][2]; +int multitap1 = 0; +int multitap2 = 0; int in_enable_vibration = 1; +// NegCon adjustment parameters +// > The NegCon 'twist' action is somewhat awkward when mapped +// to a standard analog stick -> user should be able to tweak +// response/deadzone for comfort +// > When response is linear, 'additional' deadzone (set here) +// may be left at zero, since this is normally handled via in-game +// options menus +// > When response is non-linear, deadzone should be set to match the +// controller being used (otherwise precision may be lost) +// > negcon_linearity: +// - 1: Response is linear - recommended when using racing wheel +// peripherals, not recommended for standard gamepads +// - 2: Response is quadratic - optimal setting for gamepads +// - 3: Response is cubic - enables precise fine control, but +// difficult to use... +#define NEGCON_RANGE 0x7FFF +static int negcon_deadzone = 0; +static int negcon_linearity = 1; + +static bool axis_bounds_modifier; + /* PSX max resolution is 640x512, but with enhancement it's 1024x512 */ -#define VOUT_MAX_WIDTH 1024 +#define VOUT_MAX_WIDTH 1024 #define VOUT_MAX_HEIGHT 512 +//Dummy functions +bool retro_load_game_special(unsigned game_type, const struct retro_game_info *info, size_t num_info) { return false; } +void retro_unload_game(void) {} +static int vout_open(void) { return 0; } +static void vout_close(void) {} +static int snd_init(void) { return 0; } +static void snd_finish(void) {} +static int snd_busy(void) { return 0; } + +#define GPU_PEOPS_ODD_EVEN_BIT (1 << 0) +#define GPU_PEOPS_EXPAND_SCREEN_WIDTH (1 << 1) +#define GPU_PEOPS_IGNORE_BRIGHTNESS (1 << 2) +#define GPU_PEOPS_DISABLE_COORD_CHECK (1 << 3) +#define GPU_PEOPS_LAZY_SCREEN_UPDATE (1 << 6) +#define GPU_PEOPS_OLD_FRAME_SKIP (1 << 7) +#define GPU_PEOPS_REPEATED_TRIANGLES (1 << 8) +#define GPU_PEOPS_QUADS_WITH_TRIANGLES (1 << 9) +#define GPU_PEOPS_FAKE_BUSY_STATE (1 << 10) + static void init_memcard(char *mcd_data) { - unsigned off = 0; - unsigned i; + unsigned off = 0; + unsigned i; - memset(mcd_data, 0, MCD_SIZE); + memset(mcd_data, 0, MCD_SIZE); - mcd_data[off++] = 'M'; - mcd_data[off++] = 'C'; - off += 0x7d; - mcd_data[off++] = 0x0e; + mcd_data[off++] = 'M'; + mcd_data[off++] = 'C'; + off += 0x7d; + mcd_data[off++] = 0x0e; - for (i = 0; i < 15; i++) { - mcd_data[off++] = 0xa0; - off += 0x07; - mcd_data[off++] = 0xff; - mcd_data[off++] = 0xff; - off += 0x75; - mcd_data[off++] = 0xa0; - } + for (i = 0; i < 15; i++) + { + mcd_data[off++] = 0xa0; + off += 0x07; + mcd_data[off++] = 0xff; + mcd_data[off++] = 0xff; + off += 0x75; + mcd_data[off++] = 0xa0; + } - for (i = 0; i < 20; i++) { - mcd_data[off++] = 0xff; - mcd_data[off++] = 0xff; - mcd_data[off++] = 0xff; - mcd_data[off++] = 0xff; - off += 0x04; - mcd_data[off++] = 0xff; - mcd_data[off++] = 0xff; - off += 0x76; - } + for (i = 0; i < 20; i++) + { + mcd_data[off++] = 0xff; + mcd_data[off++] = 0xff; + mcd_data[off++] = 0xff; + mcd_data[off++] = 0xff; + off += 0x04; + mcd_data[off++] = 0xff; + mcd_data[off++] = 0xff; + off += 0x76; + } } -static int vout_open(void) +static void set_vout_fb() { - return 0; + struct retro_framebuffer fb = { 0 }; + + fb.width = vout_width; + fb.height = vout_height; + fb.access_flags = RETRO_MEMORY_ACCESS_WRITE; + + if (environ_cb(RETRO_ENVIRONMENT_GET_CURRENT_SOFTWARE_FRAMEBUFFER, &fb) && fb.format == RETRO_PIXEL_FORMAT_RGB565) + vout_buf_ptr = (uint16_t *)fb.data; + else + vout_buf_ptr = vout_buf; } static void vout_set_mode(int w, int h, int raw_w, int raw_h, int bpp) { - vout_width = w; - vout_height = h; + vout_width = w; + vout_height = h; + + if (previous_width != vout_width || previous_height != vout_height) + { + previous_width = vout_width; + previous_height = vout_height; + + struct retro_system_av_info info; + retro_get_system_av_info(&info); + environ_cb(RETRO_ENVIRONMENT_SET_GEOMETRY, &info.geometry); + } + + set_vout_fb(); } #ifndef FRONTEND_SUPPORTS_RGB565 static void convert(void *buf, size_t bytes) { - unsigned int i, v, *p = buf; + unsigned int i, v, *p = buf; - for (i = 0; i < bytes / 4; i++) { - v = p[i]; - p[i] = (v & 0x001f001f) | ((v >> 1) & 0x7fe07fe0); - } + for (i = 0; i < bytes / 4; i++) + { + v = p[i]; + p[i] = (v & 0x001f001f) | ((v >> 1) & 0x7fe07fe0); + } } #endif static void vout_flip(const void *vram, int stride, int bgr24, int w, int h) { - unsigned short *dest = vout_buf; - const unsigned short *src = vram; - int dstride = vout_width, h1 = h; - int doffs; - - if (vram == NULL) { - // blanking - memset(vout_buf, 0, dstride * h * 2); - goto out; - } - - doffs = (vout_height - h) * dstride; - doffs += (dstride - w) / 2 & ~1; - if (doffs != vout_doffs_old) { - // clear borders - memset(vout_buf, 0, dstride * h * 2); - vout_doffs_old = doffs; - } - dest += doffs; - - if (bgr24) - { - // XXX: could we switch to RETRO_PIXEL_FORMAT_XRGB8888 here? - for (; h1-- > 0; dest += dstride, src += stride) - { - bgr888_to_rgb565(dest, src, w * 3); - } - } - else - { - for (; h1-- > 0; dest += dstride, src += stride) - { - bgr555_to_rgb565(dest, src, w * 2); - } - } + unsigned short *dest = vout_buf_ptr; + const unsigned short *src = vram; + int dstride = vout_width, h1 = h; + int doffs; + + if (vram == NULL) + { + // blanking + memset(vout_buf_ptr, 0, dstride * h * 2); + goto out; + } + + doffs = (vout_height - h) * dstride; + doffs += (dstride - w) / 2 & ~1; + if (doffs != vout_doffs_old) + { + // clear borders + memset(vout_buf_ptr, 0, dstride * h * 2); + vout_doffs_old = doffs; + } + dest += doffs; + + if (bgr24) + { + // XXX: could we switch to RETRO_PIXEL_FORMAT_XRGB8888 here? + for (; h1-- > 0; dest += dstride, src += stride) + { + bgr888_to_rgb565(dest, src, w * 3); + } + } + else + { + for (; h1-- > 0; dest += dstride, src += stride) + { + bgr555_to_rgb565(dest, src, w * 2); + } + } out: #ifndef FRONTEND_SUPPORTS_RGB565 - convert(vout_buf, vout_width * vout_height * 2); + convert(vout_buf_ptr, vout_width * vout_height * 2); +#endif + vout_fb_dirty = 1; + pl_rearmed_cbs.flip_cnt++; +} + +#ifdef _3DS +typedef struct +{ + void *buffer; + uint32_t target_map; + size_t size; + enum psxMapTag tag; +} psx_map_t; + +psx_map_t custom_psx_maps[] = { + { NULL, 0x13000000, 0x210000, MAP_TAG_RAM }, // 0x80000000 + { NULL, 0x12800000, 0x010000, MAP_TAG_OTHER }, // 0x1f800000 + { NULL, 0x12c00000, 0x080000, MAP_TAG_OTHER }, // 0x1fc00000 + { NULL, 0x11000000, 0x800000, MAP_TAG_LUTS }, // 0x08000000 + { NULL, 0x12000000, 0x200000, MAP_TAG_VRAM }, // 0x00000000 +}; + +void *pl_3ds_mmap(unsigned long addr, size_t size, int is_fixed, + enum psxMapTag tag) +{ + (void)is_fixed; + (void)addr; + + if (__ctr_svchax) + { + psx_map_t *custom_map = custom_psx_maps; + + for (; custom_map->size; custom_map++) + { + if ((custom_map->size == size) && (custom_map->tag == tag)) + { + uint32_t ptr_aligned, tmp; + + custom_map->buffer = malloc(size + 0x1000); + ptr_aligned = (((u32)custom_map->buffer) + 0xFFF) & ~0xFFF; + + if (svcControlMemory(&tmp, (void *)custom_map->target_map, (void *)ptr_aligned, size, MEMOP_MAP, 0x3) < 0) + { + SysPrintf("could not map memory @0x%08X\n", custom_map->target_map); + exit(1); + } + + return (void *)custom_map->target_map; + } + } + } + + return malloc(size); +} + +void pl_3ds_munmap(void *ptr, size_t size, enum psxMapTag tag) +{ + (void)tag; + + if (__ctr_svchax) + { + psx_map_t *custom_map = custom_psx_maps; + + for (; custom_map->size; custom_map++) + { + if ((custom_map->target_map == (uint32_t)ptr)) + { + uint32_t ptr_aligned, tmp; + + ptr_aligned = (((u32)custom_map->buffer) + 0xFFF) & ~0xFFF; + + svcControlMemory(&tmp, (void *)custom_map->target_map, (void *)ptr_aligned, size, MEMOP_UNMAP, 0x3); + + free(custom_map->buffer); + custom_map->buffer = NULL; + return; + } + } + } + + free(ptr); +} +#endif + +#ifdef VITA +typedef struct +{ + void *buffer; + uint32_t target_map; + size_t size; + enum psxMapTag tag; +} psx_map_t; + +void *addr = NULL; + +psx_map_t custom_psx_maps[] = { + { NULL, NULL, 0x210000, MAP_TAG_RAM }, // 0x80000000 + { NULL, NULL, 0x010000, MAP_TAG_OTHER }, // 0x1f800000 + { NULL, NULL, 0x080000, MAP_TAG_OTHER }, // 0x1fc00000 + { NULL, NULL, 0x800000, MAP_TAG_LUTS }, // 0x08000000 + { NULL, NULL, 0x200000, MAP_TAG_VRAM }, // 0x00000000 +}; + +int init_vita_mmap() +{ + int n; + void *tmpaddr; + addr = malloc(64 * 1024 * 1024); + if (addr == NULL) + return -1; + tmpaddr = ((u32)(addr + 0xFFFFFF)) & ~0xFFFFFF; + custom_psx_maps[0].buffer = tmpaddr + 0x2000000; + custom_psx_maps[1].buffer = tmpaddr + 0x1800000; + custom_psx_maps[2].buffer = tmpaddr + 0x1c00000; + custom_psx_maps[3].buffer = tmpaddr + 0x0000000; + custom_psx_maps[4].buffer = tmpaddr + 0x1000000; +#if 0 + for(n = 0; n < 5; n++){ + sceClibPrintf("addr reserved %x\n",custom_psx_maps[n].buffer); + } #endif - vout_fb_dirty = 1; - pl_rearmed_cbs.flip_cnt++; + return 0; +} + +void deinit_vita_mmap() +{ + free(addr); +} + +void *pl_vita_mmap(unsigned long addr, size_t size, int is_fixed, + enum psxMapTag tag) +{ + (void)is_fixed; + (void)addr; + + psx_map_t *custom_map = custom_psx_maps; + + for (; custom_map->size; custom_map++) + { + if ((custom_map->size == size) && (custom_map->tag == tag)) + { + return custom_map->buffer; + } + } + + return malloc(size); } -static void vout_close(void) +void pl_vita_munmap(void *ptr, size_t size, enum psxMapTag tag) { + (void)tag; + + psx_map_t *custom_map = custom_psx_maps; + + for (; custom_map->size; custom_map++) + { + if ((custom_map->buffer == ptr)) + { + return; + } + } + + free(ptr); } +#endif static void *pl_mmap(unsigned int size) { - return psxMap(0, size, 0, MAP_TAG_VRAM); + return psxMap(0, size, 0, MAP_TAG_VRAM); } static void pl_munmap(void *ptr, unsigned int size) { - psxUnmap(ptr, size, MAP_TAG_VRAM); + psxUnmap(ptr, size, MAP_TAG_VRAM); } struct rearmed_cbs pl_rearmed_cbs = { - .pl_vout_open = vout_open, - .pl_vout_set_mode = vout_set_mode, - .pl_vout_flip = vout_flip, - .pl_vout_close = vout_close, - .mmap = pl_mmap, - .munmap = pl_munmap, - /* from psxcounters */ - .gpu_hcnt = &hSyncCount, - .gpu_frame_count = &frame_counter, + .pl_vout_open = vout_open, + .pl_vout_set_mode = vout_set_mode, + .pl_vout_flip = vout_flip, + .pl_vout_close = vout_close, + .mmap = pl_mmap, + .munmap = pl_munmap, + /* from psxcounters */ + .gpu_hcnt = &hSyncCount, + .gpu_frame_count = &frame_counter, }; void pl_frame_limit(void) { - /* called once per frame, make psxCpu->Execute() above return */ - stop = 1; + /* called once per frame, make psxCpu->Execute() above return */ + stop = 1; } void pl_timing_prepare(int is_pal) { - is_pal_mode = is_pal; + is_pal_mode = is_pal; } void plat_trigger_vibrate(int pad, int low, int high) { - rumble.set_rumble_state(pad, RETRO_RUMBLE_STRONG, high << 8); - rumble.set_rumble_state(pad, RETRO_RUMBLE_WEAK, low ? 0xffff : 0x0); + if (!rumble_cb) + return; + + if (in_enable_vibration) + { + rumble_cb(pad, RETRO_RUMBLE_STRONG, high << 8); + rumble_cb(pad, RETRO_RUMBLE_WEAK, low ? 0xffff : 0x0); + } } void pl_update_gun(int *xn, int *yn, int *xres, int *yres, int *in) @@ -213,60 +526,271 @@ void pl_update_gun(int *xn, int *yn, int *xres, int *yres, int *in) } /* sound calls */ -static int snd_init(void) +static void snd_feed(void *buf, int bytes) { - return 0; + if (audio_batch_cb != NULL) + audio_batch_cb(buf, bytes / 4); } -static void snd_finish(void) +void out_register_libretro(struct out_driver *drv) { + drv->name = "libretro"; + drv->init = snd_init; + drv->finish = snd_finish; + drv->busy = snd_busy; + drv->feed = snd_feed; } -static int snd_busy(void) +#define RETRO_DEVICE_PSE_STANDARD RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_JOYPAD, 0) +#define RETRO_DEVICE_PSE_ANALOG RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_ANALOG, 0) +#define RETRO_DEVICE_PSE_DUALSHOCK RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_ANALOG, 1) +#define RETRO_DEVICE_PSE_NEGCON RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_ANALOG, 2) +#define RETRO_DEVICE_PSE_GUNCON RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_LIGHTGUN, 0) +#define RETRO_DEVICE_PSE_MOUSE RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_MOUSE, 0) + +static char *get_pse_pad_label[] = { + "none", "mouse", "negcon", "konami gun", "standard", "analog", "guncon", "dualshock" +}; + +static const struct retro_controller_description pads[7] = { - return 0; -} + { "standard", RETRO_DEVICE_JOYPAD }, + { "analog", RETRO_DEVICE_PSE_ANALOG }, + { "dualshock", RETRO_DEVICE_PSE_DUALSHOCK }, + { "negcon", RETRO_DEVICE_PSE_NEGCON }, + { "guncon", RETRO_DEVICE_PSE_GUNCON }, + { "mouse", RETRO_DEVICE_PSE_MOUSE }, + { NULL, 0 }, +}; -static void snd_feed(void *buf, int bytes) +static const struct retro_controller_info ports[9] = { - if (audio_batch_cb != NULL) - audio_batch_cb(buf, bytes / 4); -} + { pads, 7 }, + { pads, 7 }, + { pads, 7 }, + { pads, 7 }, + { pads, 7 }, + { pads, 7 }, + { pads, 7 }, + { pads, 7 }, + { NULL, 0 }, +}; -void out_register_libretro(struct out_driver *drv) +/* libretro */ + +static bool update_option_visibility(void) { - drv->name = "libretro"; - drv->init = snd_init; - drv->finish = snd_finish; - drv->busy = snd_busy; - drv->feed = snd_feed; + struct retro_variable var = {0}; + struct retro_core_option_display option_display = {0}; + bool updated = false; + unsigned i; + + /* If frontend supports core option categories + * then show/hide core option entries are ignored + * and no options should be hidden */ + if (libretro_supports_option_categories) + return false; + + var.key = "pcsx_rearmed_show_input_settings"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + bool show_input_settings_prev = + show_input_settings; + + show_input_settings = true; + if (strcmp(var.value, "disabled") == 0) + show_input_settings = false; + + if (show_input_settings != + show_input_settings_prev) + { + char input_option[][50] = { + "pcsx_rearmed_analog_axis_modifier", + "pcsx_rearmed_vibration", + "pcsx_rearmed_multitap", + "pcsx_rearmed_negcon_deadzone", + "pcsx_rearmed_negcon_response", + "pcsx_rearmed_input_sensitivity", + "pcsx_rearmed_gunconadjustx", + "pcsx_rearmed_gunconadjusty", + "pcsx_rearmed_gunconadjustratiox", + "pcsx_rearmed_gunconadjustratioy" + }; + + option_display.visible = show_input_settings; + + for (i = 0; + i < (sizeof(input_option) / + sizeof(input_option[0])); + i++) + { + option_display.key = input_option[i]; + environ_cb(RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY, + &option_display); + } + + updated = true; + } + } +#ifdef GPU_PEOPS + var.key = "pcsx_rearmed_show_gpu_peops_settings"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + bool show_advanced_gpu_peops_settings_prev = + show_advanced_gpu_peops_settings; + + show_advanced_gpu_peops_settings = true; + if (strcmp(var.value, "disabled") == 0) + show_advanced_gpu_peops_settings = false; + + if (show_advanced_gpu_peops_settings != + show_advanced_gpu_peops_settings_prev) + { + unsigned i; + struct retro_core_option_display option_display; + char gpu_peops_option[][45] = { + "pcsx_rearmed_gpu_peops_odd_even_bit", + "pcsx_rearmed_gpu_peops_expand_screen_width", + "pcsx_rearmed_gpu_peops_ignore_brightness", + "pcsx_rearmed_gpu_peops_disable_coord_check", + "pcsx_rearmed_gpu_peops_lazy_screen_update", + "pcsx_rearmed_gpu_peops_repeated_triangles", + "pcsx_rearmed_gpu_peops_quads_with_triangles", + "pcsx_rearmed_gpu_peops_fake_busy_state" + }; + + option_display.visible = show_advanced_gpu_peops_settings; + + for (i = 0; + i < (sizeof(gpu_peops_option) / + sizeof(gpu_peops_option[0])); + i++) + { + option_display.key = gpu_peops_option[i]; + environ_cb(RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY, + &option_display); + } + + updated = true; + } + } +#endif +#ifdef GPU_UNAI + var.key = "pcsx_rearmed_show_gpu_unai_settings"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + bool show_advanced_gpu_unai_settings_prev = + show_advanced_gpu_unai_settings; + + show_advanced_gpu_unai_settings = true; + if (strcmp(var.value, "disabled") == 0) + show_advanced_gpu_unai_settings = false; + + if (show_advanced_gpu_unai_settings != + show_advanced_gpu_unai_settings_prev) + { + unsigned i; + struct retro_core_option_display option_display; + char gpu_unai_option[][40] = { + "pcsx_rearmed_gpu_unai_blending", + "pcsx_rearmed_gpu_unai_lighting", + "pcsx_rearmed_gpu_unai_fast_lighting", + "pcsx_rearmed_gpu_unai_scale_hires", + }; + + option_display.visible = show_advanced_gpu_unai_settings; + + for (i = 0; + i < (sizeof(gpu_unai_option) / + sizeof(gpu_unai_option[0])); + i++) + { + option_display.key = gpu_unai_option[i]; + environ_cb(RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY, + &option_display); + } + + updated = true; + } + } +#endif + return updated; } -/* libretro */ void retro_set_environment(retro_environment_t cb) { - static const struct retro_variable vars[] = { - { "pcsx_rearmed_frameskip", "Frameskip; 0|1|2|3" }, - { "pcsx_rearmed_region", "Region; Auto|NTSC|PAL" }, - { "pcsx_rearmed_pad1type", "Pad 1 Type; standard|analog" }, - { "pcsx_rearmed_pad2type", "Pad 2 Type; standard|analog" }, -#ifndef DRC_DISABLE - { "pcsx_rearmed_drc", "Dynamic recompiler; enabled|disabled" }, -#endif -#ifdef __ARM_NEON__ - { "pcsx_rearmed_neon_interlace_enable", "Enable interlacing mode(s); disabled|enabled" }, - { "pcsx_rearmed_neon_enhancement_enable", "Enhanced resolution (slow); disabled|enabled" }, - { "pcsx_rearmed_neon_enhancement_no_main", "Enhanced resolution speed hack; disabled|enabled" }, + bool option_categories = false; +#ifdef USE_LIBRETRO_VFS + struct retro_vfs_interface_info vfs_iface_info; #endif - { "pcsx_rearmed_duping_enable", "Frame duping; on|off" }, - { "pcsx_rearmed_spu_reverb", "Sound: Reverb; on|off" }, - { "pcsx_rearmed_spu_interpolation", "Sound: Interpolation; simple|gaussian|cubic|off" }, - { NULL, NULL }, - }; environ_cb = cb; - cb(RETRO_ENVIRONMENT_SET_VARIABLES, (void*)vars); + if (cb(RETRO_ENVIRONMENT_GET_LOG_INTERFACE, &logging)) + log_cb = logging.log; + + environ_cb(RETRO_ENVIRONMENT_SET_CONTROLLER_INFO, (void*)ports); + + /* Set core options + * An annoyance: retro_set_environment() can be called + * multiple times, and depending upon the current frontend + * state various environment callbacks may be disabled. + * This means the reported 'categories_supported' status + * may change on subsequent iterations. We therefore have + * to record whether 'categories_supported' is true on any + * iteration, and latch the result */ + libretro_set_core_options(environ_cb, &option_categories); + libretro_supports_option_categories |= option_categories; + + /* If frontend supports core option categories, + * any show/hide core option entries are unused + * and should be hidden */ + if (libretro_supports_option_categories) + { + struct retro_core_option_display option_display; + option_display.visible = false; + + option_display.key = "pcsx_rearmed_show_input_settings"; + environ_cb(RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY, + &option_display); + +#ifdef GPU_PEOPS + option_display.key = "pcsx_rearmed_show_gpu_peops_settings"; + environ_cb(RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY, + &option_display); +#endif +#ifdef GPU_UNAI + option_display.key = "pcsx_rearmed_show_gpu_unai_settings"; + environ_cb(RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY, + &option_display); +#endif + } + /* If frontend does not support core option + * categories, core options may be shown/hidden + * at runtime. In this case, register 'update + * display' callback, so frontend can update + * core options menu without calling retro_run() */ + else + { + struct retro_core_options_update_display_callback update_display_cb; + update_display_cb.callback = update_option_visibility; + + environ_cb(RETRO_ENVIRONMENT_SET_CORE_OPTIONS_UPDATE_DISPLAY_CALLBACK, + &update_display_cb); + } + +#ifdef USE_LIBRETRO_VFS + vfs_iface_info.required_interface_version = 1; + vfs_iface_info.iface = NULL; + if (environ_cb(RETRO_ENVIRONMENT_GET_VFS_INTERFACE, &vfs_iface_info)) + filestream_vfs_init(&vfs_iface_info); +#endif } void retro_set_video_refresh(retro_video_refresh_t cb) { video_cb = cb; } @@ -277,331 +801,565 @@ void retro_set_input_state(retro_input_state_t cb) { input_state_cb = cb; } unsigned retro_api_version(void) { - return RETRO_API_VERSION; + return RETRO_API_VERSION; +} + +static void update_multitap(void) +{ + struct retro_variable var = { 0 }; + + multitap1 = 0; + multitap2 = 0; + + var.value = NULL; + var.key = "pcsx_rearmed_multitap"; + if (environ_cb && (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)) + { + if (strcmp(var.value, "port 1") == 0) + multitap1 = 1; + else if (strcmp(var.value, "port 2") == 0) + multitap2 = 1; + else if (strcmp(var.value, "ports 1 and 2") == 0) + { + multitap1 = 1; + multitap2 = 1; + } + } } void retro_set_controller_port_device(unsigned port, unsigned device) { + if (port >= PORTS_NUMBER) + return; + + switch (device) + { + case RETRO_DEVICE_JOYPAD: + case RETRO_DEVICE_PSE_STANDARD: + in_type[port] = PSE_PAD_TYPE_STANDARD; + break; + case RETRO_DEVICE_PSE_ANALOG: + in_type[port] = PSE_PAD_TYPE_ANALOGJOY; + break; + case RETRO_DEVICE_PSE_DUALSHOCK: + in_type[port] = PSE_PAD_TYPE_ANALOGPAD; + break; + case RETRO_DEVICE_PSE_MOUSE: + in_type[port] = PSE_PAD_TYPE_MOUSE; + break; + case RETRO_DEVICE_PSE_NEGCON: + in_type[port] = PSE_PAD_TYPE_NEGCON; + break; + case RETRO_DEVICE_PSE_GUNCON: + in_type[port] = PSE_PAD_TYPE_GUNCON; + break; + case RETRO_DEVICE_NONE: + default: + in_type[port] = PSE_PAD_TYPE_NONE; + break; + } + + SysPrintf("port: %u device: %s\n", port + 1, get_pse_pad_label[in_type[port]]); } void retro_get_system_info(struct retro_system_info *info) { - memset(info, 0, sizeof(*info)); - info->library_name = "PCSX-ReARMed"; - info->library_version = "r22"; - info->valid_extensions = "bin|cue|img|mdf|pbp|toc|cbn|m3u"; - info->need_fullpath = true; +#ifndef GIT_VERSION +#define GIT_VERSION "" +#endif + memset(info, 0, sizeof(*info)); + info->library_name = "PCSX-ReARMed"; + info->library_version = "r23l" GIT_VERSION; + info->valid_extensions = "bin|cue|img|mdf|pbp|toc|cbn|m3u|chd"; + info->need_fullpath = true; } void retro_get_system_av_info(struct retro_system_av_info *info) { - memset(info, 0, sizeof(*info)); - info->timing.fps = is_pal_mode ? 50 : 60; - info->timing.sample_rate = 44100; - info->geometry.base_width = 320; - info->geometry.base_height = 240; - info->geometry.max_width = VOUT_MAX_WIDTH; - info->geometry.max_height = VOUT_MAX_HEIGHT; - info->geometry.aspect_ratio = 4.0 / 3.0; + unsigned geom_height = vout_height > 0 ? vout_height : 240; + unsigned geom_width = vout_width > 0 ? vout_width : 320; + + memset(info, 0, sizeof(*info)); + info->timing.fps = is_pal_mode ? 50.0 : 60.0; + info->timing.sample_rate = 44100.0; + info->geometry.base_width = geom_width; + info->geometry.base_height = geom_height; + info->geometry.max_width = VOUT_MAX_WIDTH; + info->geometry.max_height = VOUT_MAX_HEIGHT; + info->geometry.aspect_ratio = 4.0 / 3.0; } /* savestates */ -size_t retro_serialize_size(void) -{ - // it's currently 4380651-4397047 bytes, - // but have some reserved for future - return 0x440000; +size_t retro_serialize_size(void) +{ + // it's currently 4380651-4397047 bytes, + // but have some reserved for future + return 0x440000; } -struct save_fp { - char *buf; - size_t pos; - int is_write; +struct save_fp +{ + char *buf; + size_t pos; + int is_write; }; static void *save_open(const char *name, const char *mode) { - struct save_fp *fp; + struct save_fp *fp; - if (name == NULL || mode == NULL) - return NULL; + if (name == NULL || mode == NULL) + return NULL; - fp = malloc(sizeof(*fp)); - if (fp == NULL) - return NULL; + fp = malloc(sizeof(*fp)); + if (fp == NULL) + return NULL; - fp->buf = (char *)name; - fp->pos = 0; - fp->is_write = (mode[0] == 'w' || mode[1] == 'w'); + fp->buf = (char *)name; + fp->pos = 0; + fp->is_write = (mode[0] == 'w' || mode[1] == 'w'); - return fp; + return fp; } static int save_read(void *file, void *buf, u32 len) { - struct save_fp *fp = file; - if (fp == NULL || buf == NULL) - return -1; + struct save_fp *fp = file; + if (fp == NULL || buf == NULL) + return -1; - memcpy(buf, fp->buf + fp->pos, len); - fp->pos += len; - return len; + memcpy(buf, fp->buf + fp->pos, len); + fp->pos += len; + return len; } static int save_write(void *file, const void *buf, u32 len) { - struct save_fp *fp = file; - if (fp == NULL || buf == NULL) - return -1; + struct save_fp *fp = file; + if (fp == NULL || buf == NULL) + return -1; - memcpy(fp->buf + fp->pos, buf, len); - fp->pos += len; - return len; + memcpy(fp->buf + fp->pos, buf, len); + fp->pos += len; + return len; } static long save_seek(void *file, long offs, int whence) { - struct save_fp *fp = file; - if (fp == NULL) - return -1; + struct save_fp *fp = file; + if (fp == NULL) + return -1; - switch (whence) { - case SEEK_CUR: - fp->pos += offs; - return fp->pos; - case SEEK_SET: - fp->pos = offs; - return fp->pos; - default: - return -1; - } + switch (whence) + { + case SEEK_CUR: + fp->pos += offs; + return fp->pos; + case SEEK_SET: + fp->pos = offs; + return fp->pos; + default: + return -1; + } } static void save_close(void *file) { - struct save_fp *fp = file; - size_t r_size = retro_serialize_size(); - if (fp == NULL) - return; - - if (fp->pos > r_size) - SysPrintf("ERROR: save buffer overflow detected\n"); - else if (fp->is_write && fp->pos < r_size) - // make sure we don't save trash in leftover space - memset(fp->buf + fp->pos, 0, r_size - fp->pos); - free(fp); + struct save_fp *fp = file; + size_t r_size = retro_serialize_size(); + if (fp == NULL) + return; + + if (fp->pos > r_size) + SysPrintf("ERROR: save buffer overflow detected\n"); + else if (fp->is_write && fp->pos < r_size) + // make sure we don't save trash in leftover space + memset(fp->buf + fp->pos, 0, r_size - fp->pos); + free(fp); } bool retro_serialize(void *data, size_t size) -{ - int ret = SaveState(data); - return ret == 0 ? true : false; +{ + int ret = SaveState(data); + return ret == 0 ? true : false; } bool retro_unserialize(const void *data, size_t size) { - int ret = LoadState(data); - return ret == 0 ? true : false; + int ret = LoadState(data); + return ret == 0 ? true : false; } /* cheats */ void retro_cheat_reset(void) { - ClearAllCheats(); + ClearAllCheats(); } void retro_cheat_set(unsigned index, bool enabled, const char *code) { - char buf[256]; - int ret; + char buf[256]; + int ret; + + // cheat funcs are destructive, need a copy.. + strncpy(buf, code, sizeof(buf)); + buf[sizeof(buf) - 1] = 0; - // cheat funcs are destructive, need a copy.. - strncpy(buf, code, sizeof(buf)); - buf[sizeof(buf) - 1] = 0; + //Prepare buffered cheat for PCSX's AddCheat fucntion. + int cursor = 0; + int nonhexdec = 0; + while (buf[cursor]) + { + if (!(ISHEXDEC)) + { + if (++nonhexdec % 2) + { + buf[cursor] = ' '; + } + else + { + buf[cursor] = '\n'; + } + } + cursor++; + } - if (index < NumCheats) - ret = EditCheat(index, "", buf); - else - ret = AddCheat("", buf); + if (index < NumCheats) + ret = EditCheat(index, "", buf); + else + ret = AddCheat("", buf); - if (ret != 0) - SysPrintf("Failed to set cheat %#u\n", index); - else if (index < NumCheats) - Cheats[index].Enabled = enabled; + if (ret != 0) + SysPrintf("Failed to set cheat %#u\n", index); + else if (index < NumCheats) + Cheats[index].Enabled = enabled; } +// just in case, maybe a win-rt port in the future? +#ifdef _WIN32 +#define SLASH '\\' +#else +#define SLASH '/' +#endif + +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + /* multidisk support */ +static unsigned int disk_initial_index; +static char disk_initial_path[PATH_MAX]; static bool disk_ejected; static unsigned int disk_current_index; static unsigned int disk_count; -static struct disks_state { - char *fname; - int internal_index; // for multidisk eboots +static struct disks_state +{ + char *fname; + char *flabel; + int internal_index; // for multidisk eboots } disks[8]; +static void get_disk_label(char *disk_label, const char *disk_path, size_t len) +{ + const char *base = NULL; + + if (!disk_path || (*disk_path == '\0')) + return; + + base = strrchr(disk_path, SLASH); + if (!base) + base = disk_path; + + if (*base == SLASH) + base++; + + strncpy(disk_label, base, len - 1); + disk_label[len - 1] = '\0'; + + char *ext = strrchr(disk_label, '.'); + if (ext) + *ext = '\0'; +} + +static void disk_init(void) +{ + size_t i; + + disk_ejected = false; + disk_current_index = 0; + disk_count = 0; + + for (i = 0; i < sizeof(disks) / sizeof(disks[0]); i++) + { + if (disks[i].fname != NULL) + { + free(disks[i].fname); + disks[i].fname = NULL; + } + if (disks[i].flabel != NULL) + { + free(disks[i].flabel); + disks[i].flabel = NULL; + } + disks[i].internal_index = 0; + } +} + static bool disk_set_eject_state(bool ejected) { - // weird PCSX API.. - SetCdOpenCaseTime(ejected ? -1 : (time(NULL) + 2)); - LidInterrupt(); + // weird PCSX API.. + SetCdOpenCaseTime(ejected ? -1 : (time(NULL) + 2)); + LidInterrupt(); - disk_ejected = ejected; - return true; + disk_ejected = ejected; + return true; } static bool disk_get_eject_state(void) { - /* can't be controlled by emulated software */ - return disk_ejected; + /* can't be controlled by emulated software */ + return disk_ejected; } static unsigned int disk_get_image_index(void) { - return disk_current_index; + return disk_current_index; } static bool disk_set_image_index(unsigned int index) { - if (index >= sizeof(disks) / sizeof(disks[0])) - return false; + if (index >= sizeof(disks) / sizeof(disks[0])) + return false; - CdromId[0] = '\0'; - CdromLabel[0] = '\0'; + CdromId[0] = '\0'; + CdromLabel[0] = '\0'; - if (disks[index].fname == NULL) { - SysPrintf("missing disk #%u\n", index); - CDR_shutdown(); + if (disks[index].fname == NULL) + { + SysPrintf("missing disk #%u\n", index); + CDR_shutdown(); - // RetroArch specifies "no disk" with index == count, - // so don't fail here.. - disk_current_index = index; - return true; - } + // RetroArch specifies "no disk" with index == count, + // so don't fail here.. + disk_current_index = index; + return true; + } - SysPrintf("switching to disk %u: \"%s\" #%d\n", index, - disks[index].fname, disks[index].internal_index); + SysPrintf("switching to disk %u: \"%s\" #%d\n", index, + disks[index].fname, disks[index].internal_index); - cdrIsoMultidiskSelect = disks[index].internal_index; - set_cd_image(disks[index].fname); - if (ReloadCdromPlugin() < 0) { - SysPrintf("failed to load cdr plugin\n"); - return false; - } - if (CDR_open() < 0) { - SysPrintf("failed to open cdr plugin\n"); - return false; - } + cdrIsoMultidiskSelect = disks[index].internal_index; + set_cd_image(disks[index].fname); + if (ReloadCdromPlugin() < 0) + { + SysPrintf("failed to load cdr plugin\n"); + return false; + } + if (CDR_open() < 0) + { + SysPrintf("failed to open cdr plugin\n"); + return false; + } - if (!disk_ejected) { - SetCdOpenCaseTime(time(NULL) + 2); - LidInterrupt(); - } + if (!disk_ejected) + { + SetCdOpenCaseTime(time(NULL) + 2); + LidInterrupt(); + } - disk_current_index = index; - return true; + disk_current_index = index; + return true; } static unsigned int disk_get_num_images(void) { - return disk_count; + return disk_count; } static bool disk_replace_image_index(unsigned index, - const struct retro_game_info *info) + const struct retro_game_info *info) { - char *old_fname; - bool ret = true; + char *old_fname = NULL; + char *old_flabel = NULL; + bool ret = true; + + if (index >= sizeof(disks) / sizeof(disks[0])) + return false; + + old_fname = disks[index].fname; + old_flabel = disks[index].flabel; + + disks[index].fname = NULL; + disks[index].flabel = NULL; + disks[index].internal_index = 0; + + if (info != NULL) + { + char disk_label[PATH_MAX]; + disk_label[0] = '\0'; + + disks[index].fname = strdup(info->path); - if (index >= sizeof(disks) / sizeof(disks[0])) - return false; + get_disk_label(disk_label, info->path, PATH_MAX); + disks[index].flabel = strdup(disk_label); - old_fname = disks[index].fname; - disks[index].fname = NULL; - disks[index].internal_index = 0; + if (index == disk_current_index) + ret = disk_set_image_index(index); + } - if (info != NULL) { - disks[index].fname = strdup(info->path); - if (index == disk_current_index) - ret = disk_set_image_index(index); - } + if (old_fname != NULL) + free(old_fname); - if (old_fname != NULL) - free(old_fname); + if (old_flabel != NULL) + free(old_flabel); - return ret; + return ret; } static bool disk_add_image_index(void) { - if (disk_count >= 8) - return false; + if (disk_count >= 8) + return false; - disk_count++; - return true; + disk_count++; + return true; } -static struct retro_disk_control_callback disk_control = { - .set_eject_state = disk_set_eject_state, - .get_eject_state = disk_get_eject_state, - .get_image_index = disk_get_image_index, - .set_image_index = disk_set_image_index, - .get_num_images = disk_get_num_images, - .replace_image_index = disk_replace_image_index, - .add_image_index = disk_add_image_index, -}; +static bool disk_set_initial_image(unsigned index, const char *path) +{ + if (index >= sizeof(disks) / sizeof(disks[0])) + return false; -// just in case, maybe a win-rt port in the future? -#ifdef _WIN32 -#define SLASH '\\' -#else -#define SLASH '/' -#endif + if (!path || (*path == '\0')) + return false; -static char base_dir[PATH_MAX]; + disk_initial_index = index; -static bool read_m3u(const char *file) -{ - char line[PATH_MAX]; - char name[PATH_MAX]; - FILE *f = fopen(file, "r"); - if (!f) - return false; - - while (fgets(line, sizeof(line), f) && disk_count < sizeof(disks) / sizeof(disks[0])) { - if (line[0] == '#') - continue; - char *carrige_return = strchr(line, '\r'); - if (carrige_return) - *carrige_return = '\0'; - char *newline = strchr(line, '\n'); - if (newline) - *newline = '\0'; - - if (line[0] != '\0') - { - snprintf(name, sizeof(name), "%s%c%s", base_dir, SLASH, line); - disks[disk_count++].fname = strdup(name); - } - } - - fclose(f); - return (disk_count != 0); + strncpy(disk_initial_path, path, sizeof(disk_initial_path) - 1); + disk_initial_path[sizeof(disk_initial_path) - 1] = '\0'; + + return true; } -static void extract_directory(char *buf, const char *path, size_t size) +static bool disk_get_image_path(unsigned index, char *path, size_t len) { - char *base; - strncpy(buf, path, size - 1); - buf[size - 1] = '\0'; + const char *fname = NULL; - base = strrchr(buf, '/'); - if (!base) - base = strrchr(buf, '\\'); + if (len < 1) + return false; - if (base) - *base = '\0'; - else - { + if (index >= sizeof(disks) / sizeof(disks[0])) + return false; + + fname = disks[index].fname; + + if (!fname || (*fname == '\0')) + return false; + + strncpy(path, fname, len - 1); + path[len - 1] = '\0'; + + return true; +} + +static bool disk_get_image_label(unsigned index, char *label, size_t len) +{ + const char *flabel = NULL; + + if (len < 1) + return false; + + if (index >= sizeof(disks) / sizeof(disks[0])) + return false; + + flabel = disks[index].flabel; + + if (!flabel || (*flabel == '\0')) + return false; + + strncpy(label, flabel, len - 1); + label[len - 1] = '\0'; + + return true; +} + +static struct retro_disk_control_callback disk_control = { + .set_eject_state = disk_set_eject_state, + .get_eject_state = disk_get_eject_state, + .get_image_index = disk_get_image_index, + .set_image_index = disk_set_image_index, + .get_num_images = disk_get_num_images, + .replace_image_index = disk_replace_image_index, + .add_image_index = disk_add_image_index, +}; + +static struct retro_disk_control_ext_callback disk_control_ext = { + .set_eject_state = disk_set_eject_state, + .get_eject_state = disk_get_eject_state, + .get_image_index = disk_get_image_index, + .set_image_index = disk_set_image_index, + .get_num_images = disk_get_num_images, + .replace_image_index = disk_replace_image_index, + .add_image_index = disk_add_image_index, + .set_initial_image = disk_set_initial_image, + .get_image_path = disk_get_image_path, + .get_image_label = disk_get_image_label, +}; + +static char base_dir[1024]; + +static bool read_m3u(const char *file) +{ + char line[1024]; + char name[PATH_MAX]; + FILE *fp = fopen(file, "r"); + if (!fp) + return false; + + while (fgets(line, sizeof(line), fp) && disk_count < sizeof(disks) / sizeof(disks[0])) + { + if (line[0] == '#') + continue; + char *carrige_return = strchr(line, '\r'); + if (carrige_return) + *carrige_return = '\0'; + char *newline = strchr(line, '\n'); + if (newline) + *newline = '\0'; + + if (line[0] != '\0') + { + char disk_label[PATH_MAX]; + disk_label[0] = '\0'; + + snprintf(name, sizeof(name), "%s%c%s", base_dir, SLASH, line); + disks[disk_count].fname = strdup(name); + + get_disk_label(disk_label, name, PATH_MAX); + disks[disk_count].flabel = strdup(disk_label); + + disk_count++; + } + } + + fclose(fp); + return (disk_count != 0); +} + +static void extract_directory(char *buf, const char *path, size_t size) +{ + char *base; + strncpy(buf, path, size - 1); + buf[size - 1] = '\0'; + + base = strrchr(buf, '/'); + if (!base) + base = strrchr(buf, '\\'); + + if (base) + *base = '\0'; + else + { buf[0] = '.'; buf[1] = '\0'; } @@ -614,361 +1372,430 @@ static void extract_directory(char *buf, const char *path, size_t size) * Find the first occurrence of find in s, ignore case. */ char * -strcasestr(const char *s, const char*find) -{ - char c, sc; - size_t len; - - if ((c = *find++) != 0) { - c = tolower((unsigned char)c); - len = strlen(find); - do { - do { - if ((sc = *s++) == 0) - return (NULL); - } while ((char)tolower((unsigned char)sc) != c); - } while (strncasecmp(s, find, len) != 0); - s--; - } - return ((char *)s); +strcasestr(const char *s, const char *find) +{ + char c, sc; + size_t len; + + if ((c = *find++) != 0) + { + c = tolower((unsigned char)c); + len = strlen(find); + do + { + do + { + if ((sc = *s++) == 0) + return (NULL); + } while ((char)tolower((unsigned char)sc) != c); + } while (strncasecmp(s, find, len) != 0); + s--; + } + return ((char *)s); } #endif +static void set_retro_memmap(void) +{ +#ifndef NDEBUG + struct retro_memory_map retromap = { 0 }; + struct retro_memory_descriptor mmap = { + 0, psxM, 0, 0, 0, 0, 0x200000 + }; + + retromap.descriptors = &mmap; + retromap.num_descriptors = 1; + + environ_cb(RETRO_ENVIRONMENT_SET_MEMORY_MAPS, &retromap); +#endif +} + +static void retro_audio_buff_status_cb( + bool active, unsigned occupancy, bool underrun_likely) +{ + retro_audio_buff_active = active; + retro_audio_buff_occupancy = occupancy; + retro_audio_buff_underrun = underrun_likely; +} + +static void retro_set_audio_buff_status_cb(void) +{ + if (frameskip_type == FRAMESKIP_NONE) + { + environ_cb(RETRO_ENVIRONMENT_SET_AUDIO_BUFFER_STATUS_CALLBACK, NULL); + retro_audio_latency = 0; + } + else + { + bool calculate_audio_latency = true; + + if (frameskip_type == FRAMESKIP_FIXED_INTERVAL) + environ_cb(RETRO_ENVIRONMENT_SET_AUDIO_BUFFER_STATUS_CALLBACK, NULL); + else + { + struct retro_audio_buffer_status_callback buf_status_cb; + buf_status_cb.callback = retro_audio_buff_status_cb; + if (!environ_cb(RETRO_ENVIRONMENT_SET_AUDIO_BUFFER_STATUS_CALLBACK, + &buf_status_cb)) + { + retro_audio_buff_active = false; + retro_audio_buff_occupancy = 0; + retro_audio_buff_underrun = false; + retro_audio_latency = 0; + calculate_audio_latency = false; + } + } + + if (calculate_audio_latency) + { + /* Frameskip is enabled - increase frontend + * audio latency to minimise potential + * buffer underruns */ + uint32_t frame_time_usec = 1000000.0 / (is_pal_mode ? 50.0 : 60.0); + + /* Set latency to 6x current frame time... */ + retro_audio_latency = (unsigned)(6 * frame_time_usec / 1000); + + /* ...then round up to nearest multiple of 32 */ + retro_audio_latency = (retro_audio_latency + 0x1F) & ~0x1F; + } + } + + update_audio_latency = true; + frameskip_counter = 0; +} + +static void update_variables(bool in_flight); bool retro_load_game(const struct retro_game_info *info) { - size_t i; - bool is_m3u = (strcasestr(info->path, ".m3u") != NULL); + size_t i; + unsigned int cd_index = 0; + bool is_m3u = (strcasestr(info->path, ".m3u") != NULL); struct retro_input_descriptor desc[] = { - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "Cross" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "Circle" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "Triangle" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "Square" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "L1" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L2, "L2" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L3, "L3" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "R1" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R2, "R2" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R3, "R3" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Select" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, - { 0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Left Analog X" }, - { 0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Left Analog Y" }, - { 0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_X, "Right Analog X" }, - { 0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_Y, "Right Analog Y" }, - - - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "Cross" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "Circle" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "Triangle" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "Square" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "L1" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L2, "L2" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L3, "L3" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "R1" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R2, "R2" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R3, "R3" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Select" }, - { 1, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, - { 1, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Left Analog X" }, - { 1, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Left Analog Y" }, - { 1, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_X, "Right Analog X" }, - { 1, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_Y, "Right Analog Y" }, - - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "Cross" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "Circle" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "Triangle" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "Square" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "L1" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L2, "L2" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L3, "L3" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "R1" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R2, "R2" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R3, "R3" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Select" }, - { 2, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, - { 2, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Left Analog X" }, - { 2, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Left Analog Y" }, - { 2, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_X, "Right Analog X" }, - { 2, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_Y, "Right Analog Y" }, - - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "Cross" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "Circle" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "Triangle" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "Square" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "L1" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L2, "L2" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L3, "L3" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "R1" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R2, "R2" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R3, "R3" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Select" }, - { 3, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, - { 3, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Left Analog X" }, - { 3, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Left Analog Y" }, - { 3, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_X, "Right Analog X" }, - { 3, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_Y, "Right Analog Y" }, - - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "Cross" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "Circle" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "Triangle" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "Square" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "L1" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L2, "L2" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L3, "L3" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "R1" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R2, "R2" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R3, "R3" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Select" }, - { 4, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, - { 4, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Left Analog X" }, - { 4, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Left Analog Y" }, - { 4, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_X, "Right Analog X" }, - { 4, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_Y, "Right Analog Y" }, - - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "Cross" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "Circle" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "Triangle" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "Square" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "L1" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L2, "L2" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L3, "L3" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "R1" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R2, "R2" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R3, "R3" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Select" }, - { 5, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, - { 5, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Left Analog X" }, - { 5, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Left Analog Y" }, - { 5, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_X, "Right Analog X" }, - { 5, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_Y, "Right Analog Y" }, - - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "Cross" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "Circle" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "Triangle" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "Square" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "L1" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L2, "L2" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L3, "L3" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "R1" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R2, "R2" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R3, "R3" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Select" }, - { 6, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, - { 6, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Left Analog X" }, - { 6, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Left Analog Y" }, - { 6, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_X, "Right Analog X" }, - { 6, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_Y, "Right Analog Y" }, - - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "Cross" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "Circle" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "Triangle" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "Square" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "L1" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L2, "L2" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L3, "L3" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "R1" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R2, "R2" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R3, "R3" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Select" }, - { 7, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, - { 7, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Left Analog X" }, - { 7, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Left Analog Y" }, - { 7, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_X, "Right Analog X" }, - { 7, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_Y, "Right Analog Y" }, +#define JOYP(port) \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "Cross" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "Circle" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "Triangle" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "Square" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "L1" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L2, "L2" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L3, "L3" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "R1" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R2, "R2" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R3, "R3" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Select" }, \ + { port, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, \ + { port, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Left Analog X" }, \ + { port, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Left Analog Y" }, \ + { port, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_X, "Right Analog X" }, \ + { port, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_Y, "Right Analog Y" }, \ + { port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_TRIGGER, "Gun Trigger" }, \ + { port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_RELOAD, "Gun Reload" }, \ + { port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_AUX_A, "Gun Aux A" }, \ + { port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_AUX_B, "Gun Aux B" }, + + JOYP(0) + JOYP(1) + JOYP(2) + JOYP(3) + JOYP(4) + JOYP(5) + JOYP(6) + JOYP(7) { 0 }, }; + frame_count = 0; + environ_cb(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, desc); #ifdef FRONTEND_SUPPORTS_RGB565 - enum retro_pixel_format fmt = RETRO_PIXEL_FORMAT_RGB565; - if (environ_cb(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &fmt)) { - SysPrintf("RGB565 supported, using it\n"); - } + enum retro_pixel_format fmt = RETRO_PIXEL_FORMAT_RGB565; + if (environ_cb(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &fmt)) + { + SysPrintf("RGB565 supported, using it\n"); + } #endif - if (info == NULL || info->path == NULL) { - SysPrintf("info->path required\n"); - return false; - } + if (info == NULL || info->path == NULL) + { + SysPrintf("info->path required\n"); + return false; + } - if (plugins_opened) { - ClosePlugins(); - plugins_opened = 0; - } + update_variables(false); - for (i = 0; i < sizeof(disks) / sizeof(disks[0]); i++) { - if (disks[i].fname != NULL) { - free(disks[i].fname); - disks[i].fname = NULL; - } - disks[i].internal_index = 0; - } + if (plugins_opened) + { + ClosePlugins(); + plugins_opened = 0; + } - disk_current_index = 0; - extract_directory(base_dir, info->path, sizeof(base_dir)); + disk_init(); - if (is_m3u) { - if (!read_m3u(info->path)) { - SysPrintf("failed to read m3u file\n"); - return false; - } - } else { - disk_count = 1; - disks[0].fname = strdup(info->path); - } + extract_directory(base_dir, info->path, sizeof(base_dir)); - set_cd_image(disks[0].fname); + if (is_m3u) + { + if (!read_m3u(info->path)) + { + log_cb(RETRO_LOG_INFO, "failed to read m3u file\n"); + return false; + } + } + else + { + char disk_label[PATH_MAX]; + disk_label[0] = '\0'; - /* have to reload after set_cd_image for correct cdr plugin */ - if (LoadPlugins() == -1) { - SysPrintf("failed to load plugins\n"); - return false; - } + disk_count = 1; + disks[0].fname = strdup(info->path); - plugins_opened = 1; - NetOpened = 0; + get_disk_label(disk_label, info->path, PATH_MAX); + disks[0].flabel = strdup(disk_label); + } - if (OpenPlugins() == -1) { - SysPrintf("failed to open plugins\n"); - return false; - } + /* If this is an M3U file, attempt to set the + * initial disk image */ + if (is_m3u && (disk_initial_index > 0) && (disk_initial_index < disk_count)) + { + const char *fname = disks[disk_initial_index].fname; - plugin_call_rearmed_cbs(); - dfinput_activate(); + if (fname && (*fname != '\0')) + if (strcmp(disk_initial_path, fname) == 0) + cd_index = disk_initial_index; + } - Config.PsxAuto = 1; - if (CheckCdrom() == -1) { - SysPrintf("unsupported/invalid CD image: %s\n", info->path); - return false; - } + set_cd_image(disks[cd_index].fname); + disk_current_index = cd_index; - SysReset(); + /* have to reload after set_cd_image for correct cdr plugin */ + if (LoadPlugins() == -1) + { + log_cb(RETRO_LOG_INFO, "failed to load plugins\n"); + return false; + } - if (LoadCdrom() == -1) { - SysPrintf("could not load CD-ROM!\n"); - return false; - } - emu_on_new_cd(0); + plugins_opened = 1; + NetOpened = 0; - // multidisk images - if (!is_m3u) { - disk_count = cdrIsoMultidiskCount < 8 ? cdrIsoMultidiskCount : 8; - for (i = 1; i < sizeof(disks) / sizeof(disks[0]) && i < cdrIsoMultidiskCount; i++) { - disks[i].fname = strdup(info->path); - disks[i].internal_index = i; - } - } + if (OpenPlugins() == -1) + { + log_cb(RETRO_LOG_INFO, "failed to open plugins\n"); + return false; + } - return true; -} + /* Handle multi-disk images (i.e. PBP) + * > Cannot do this until after OpenPlugins() is + * called (since this sets the value of + * cdrIsoMultidiskCount) */ + if (!is_m3u && (cdrIsoMultidiskCount > 1)) + { + disk_count = cdrIsoMultidiskCount < 8 ? cdrIsoMultidiskCount : 8; -bool retro_load_game_special(unsigned game_type, const struct retro_game_info *info, size_t num_info) -{ - return false; -} + /* Small annoyance: We need to change the label + * of disk 0, so have to clear existing entries */ + if (disks[0].fname != NULL) + free(disks[0].fname); + disks[0].fname = NULL; -void retro_unload_game(void) -{ + if (disks[0].flabel != NULL) + free(disks[0].flabel); + disks[0].flabel = NULL; + + for (i = 0; i < sizeof(disks) / sizeof(disks[0]) && i < cdrIsoMultidiskCount; i++) + { + char disk_name[PATH_MAX - 16] = { 0 }; + char disk_label[PATH_MAX] = { 0 }; + + disks[i].fname = strdup(info->path); + + get_disk_label(disk_name, info->path, sizeof(disk_name)); + snprintf(disk_label, sizeof(disk_label), "%s #%u", disk_name, (unsigned)i + 1); + disks[i].flabel = strdup(disk_label); + + disks[i].internal_index = i; + } + + /* This is not an M3U file, so initial disk + * image has not yet been set - attempt to + * do so now */ + if ((disk_initial_index > 0) && (disk_initial_index < disk_count)) + { + const char *fname = disks[disk_initial_index].fname; + + if (fname && (*fname != '\0')) + if (strcmp(disk_initial_path, fname) == 0) + cd_index = disk_initial_index; + } + + if (cd_index > 0) + { + CdromId[0] = '\0'; + CdromLabel[0] = '\0'; + + cdrIsoMultidiskSelect = disks[cd_index].internal_index; + disk_current_index = cd_index; + set_cd_image(disks[cd_index].fname); + + if (ReloadCdromPlugin() < 0) + { + log_cb(RETRO_LOG_INFO, "failed to reload cdr plugins\n"); + return false; + } + if (CDR_open() < 0) + { + log_cb(RETRO_LOG_INFO, "failed to open cdr plugin\n"); + return false; + } + } + } + + /* set ports to use "standard controller" initially */ + for (i = 0; i < 8; ++i) + in_type[i] = PSE_PAD_TYPE_STANDARD; + + plugin_call_rearmed_cbs(); + /* dfinput_activate(); */ + + if (CheckCdrom() == -1) + { + log_cb(RETRO_LOG_INFO, "unsupported/invalid CD image: %s\n", info->path); + return false; + } + + SysReset(); + + if (LoadCdrom() == -1) + { + log_cb(RETRO_LOG_INFO, "could not load CD\n"); + return false; + } + emu_on_new_cd(0); + + set_retro_memmap(); + retro_set_audio_buff_status_cb(); + + return true; } unsigned retro_get_region(void) { - return is_pal_mode ? RETRO_REGION_PAL : RETRO_REGION_NTSC; + return is_pal_mode ? RETRO_REGION_PAL : RETRO_REGION_NTSC; } void *retro_get_memory_data(unsigned id) { - if (id == RETRO_MEMORY_SAVE_RAM) - return Mcd1Data; - else - return NULL; + if (id == RETRO_MEMORY_SAVE_RAM) + return Mcd1Data; + else if (id == RETRO_MEMORY_SYSTEM_RAM) + return psxM; + else + return NULL; } size_t retro_get_memory_size(unsigned id) { - if (id == RETRO_MEMORY_SAVE_RAM) - return MCD_SIZE; - else - return 0; + if (id == RETRO_MEMORY_SAVE_RAM) + return MCD_SIZE; + else if (id == RETRO_MEMORY_SYSTEM_RAM) + return 0x200000; + else + return 0; } void retro_reset(void) { - SysReset(); + //hack to prevent retroarch freezing when reseting in the menu but not while running with the hot key + rebootemu = 1; + //SysReset(); } static const unsigned short retro_psx_map[] = { - [RETRO_DEVICE_ID_JOYPAD_B] = 1 << DKEY_CROSS, - [RETRO_DEVICE_ID_JOYPAD_Y] = 1 << DKEY_SQUARE, - [RETRO_DEVICE_ID_JOYPAD_SELECT] = 1 << DKEY_SELECT, - [RETRO_DEVICE_ID_JOYPAD_START] = 1 << DKEY_START, - [RETRO_DEVICE_ID_JOYPAD_UP] = 1 << DKEY_UP, - [RETRO_DEVICE_ID_JOYPAD_DOWN] = 1 << DKEY_DOWN, - [RETRO_DEVICE_ID_JOYPAD_LEFT] = 1 << DKEY_LEFT, - [RETRO_DEVICE_ID_JOYPAD_RIGHT] = 1 << DKEY_RIGHT, - [RETRO_DEVICE_ID_JOYPAD_A] = 1 << DKEY_CIRCLE, - [RETRO_DEVICE_ID_JOYPAD_X] = 1 << DKEY_TRIANGLE, - [RETRO_DEVICE_ID_JOYPAD_L] = 1 << DKEY_L1, - [RETRO_DEVICE_ID_JOYPAD_R] = 1 << DKEY_R1, - [RETRO_DEVICE_ID_JOYPAD_L2] = 1 << DKEY_L2, - [RETRO_DEVICE_ID_JOYPAD_R2] = 1 << DKEY_R2, - [RETRO_DEVICE_ID_JOYPAD_L3] = 1 << DKEY_L3, - [RETRO_DEVICE_ID_JOYPAD_R3] = 1 << DKEY_R3, + [RETRO_DEVICE_ID_JOYPAD_B] = 1 << DKEY_CROSS, + [RETRO_DEVICE_ID_JOYPAD_Y] = 1 << DKEY_SQUARE, + [RETRO_DEVICE_ID_JOYPAD_SELECT] = 1 << DKEY_SELECT, + [RETRO_DEVICE_ID_JOYPAD_START] = 1 << DKEY_START, + [RETRO_DEVICE_ID_JOYPAD_UP] = 1 << DKEY_UP, + [RETRO_DEVICE_ID_JOYPAD_DOWN] = 1 << DKEY_DOWN, + [RETRO_DEVICE_ID_JOYPAD_LEFT] = 1 << DKEY_LEFT, + [RETRO_DEVICE_ID_JOYPAD_RIGHT] = 1 << DKEY_RIGHT, + [RETRO_DEVICE_ID_JOYPAD_A] = 1 << DKEY_CIRCLE, + [RETRO_DEVICE_ID_JOYPAD_X] = 1 << DKEY_TRIANGLE, + [RETRO_DEVICE_ID_JOYPAD_L] = 1 << DKEY_L1, + [RETRO_DEVICE_ID_JOYPAD_R] = 1 << DKEY_R1, + [RETRO_DEVICE_ID_JOYPAD_L2] = 1 << DKEY_L2, + [RETRO_DEVICE_ID_JOYPAD_R2] = 1 << DKEY_R2, + [RETRO_DEVICE_ID_JOYPAD_L3] = 1 << DKEY_L3, + [RETRO_DEVICE_ID_JOYPAD_R3] = 1 << DKEY_R3, }; #define RETRO_PSX_MAP_LEN (sizeof(retro_psx_map) / sizeof(retro_psx_map[0])) +//Percentage distance of screen to adjust +static int GunconAdjustX = 0; +static int GunconAdjustY = 0; + +//Used when out by a percentage +static float GunconAdjustRatioX = 1; +static float GunconAdjustRatioY = 1; + static void update_variables(bool in_flight) { struct retro_variable var; +#ifdef GPU_PEOPS + // Always enable GPU_PEOPS_OLD_FRAME_SKIP flag + // (this is set in standalone, with no option + // to change it) + int gpu_peops_fix = GPU_PEOPS_OLD_FRAME_SKIP; +#endif + frameskip_type_t prev_frameskip_type; + + var.value = NULL; + var.key = "pcsx_rearmed_frameskip_type"; + + prev_frameskip_type = frameskip_type; + frameskip_type = FRAMESKIP_NONE; + pl_rearmed_cbs.frameskip = 0; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "auto") == 0) + frameskip_type = FRAMESKIP_AUTO; + if (strcmp(var.value, "auto_threshold") == 0) + frameskip_type = FRAMESKIP_AUTO_THRESHOLD; + if (strcmp(var.value, "fixed_interval") == 0) + frameskip_type = FRAMESKIP_FIXED_INTERVAL; + } + + if (frameskip_type != 0) + pl_rearmed_cbs.frameskip = -1; var.value = NULL; - var.key = "pcsx_rearmed_frameskip"; + var.key = "pcsx_rearmed_frameskip_threshold"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + frameskip_threshold = strtol(var.value, NULL, 10); + } - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) - pl_rearmed_cbs.frameskip = atoi(var.value); + var.value = NULL; + var.key = "pcsx_rearmed_frameskip_interval"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + frameskip_interval = strtol(var.value, NULL, 10); + } var.value = NULL; var.key = "pcsx_rearmed_region"; - - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { Config.PsxAuto = 0; - if (strcmp(var.value, "Automatic") == 0) + if (strcmp(var.value, "auto") == 0) Config.PsxAuto = 1; else if (strcmp(var.value, "NTSC") == 0) Config.PsxType = 0; @@ -976,31 +1803,87 @@ static void update_variables(bool in_flight) Config.PsxType = 1; } + update_multitap(); + var.value = NULL; - var.key = "pcsx_rearmed_pad1type"; + var.key = "pcsx_rearmed_negcon_deadzone"; + negcon_deadzone = 0; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + negcon_deadzone = (int)(atoi(var.value) * 0.01f * NEGCON_RANGE); + } - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + var.value = NULL; + var.key = "pcsx_rearmed_negcon_response"; + negcon_linearity = 1; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - in_type1 = PSE_PAD_TYPE_STANDARD; - if (strcmp(var.value, "analog") == 0) - in_type1 = PSE_PAD_TYPE_ANALOGPAD; + if (strcmp(var.value, "quadratic") == 0) + { + negcon_linearity = 2; + } + else if (strcmp(var.value, "cubic") == 0) + { + negcon_linearity = 3; + } + } + + var.value = NULL; + var.key = "pcsx_rearmed_analog_axis_modifier"; + axis_bounds_modifier = true; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "square") == 0) + { + axis_bounds_modifier = true; + } + else if (strcmp(var.value, "circle") == 0) + { + axis_bounds_modifier = false; + } + } + + var.value = NULL; + var.key = "pcsx_rearmed_vibration"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + in_enable_vibration = 0; + else if (strcmp(var.value, "enabled") == 0) + in_enable_vibration = 1; } var.value = NULL; - var.key = "pcsx_rearmed_pad2type"; + var.key = "pcsx_rearmed_dithering"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - in_type2 = PSE_PAD_TYPE_STANDARD; - if (strcmp(var.value, "analog") == 0) - in_type2 = PSE_PAD_TYPE_ANALOGPAD; + if (strcmp(var.value, "disabled") == 0) + { + pl_rearmed_cbs.gpu_peops.iUseDither = 0; + pl_rearmed_cbs.gpu_peopsgl.bDrawDither = 0; + pl_rearmed_cbs.gpu_unai.dithering = 0; +#ifdef GPU_NEON + pl_rearmed_cbs.gpu_neon.allow_dithering = 0; +#endif + } + else if (strcmp(var.value, "enabled") == 0) + { + pl_rearmed_cbs.gpu_peops.iUseDither = 1; + pl_rearmed_cbs.gpu_peopsgl.bDrawDither = 1; + pl_rearmed_cbs.gpu_unai.dithering = 1; +#ifdef GPU_NEON + pl_rearmed_cbs.gpu_neon.allow_dithering = 1; +#endif + } } -#ifdef __ARM_NEON__ - var.value = "NULL"; +#ifdef GPU_NEON + var.value = NULL; var.key = "pcsx_rearmed_neon_interlace_enable"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "disabled") == 0) pl_rearmed_cbs.gpu_neon.allow_interlace = 0; @@ -1011,7 +1894,7 @@ static void update_variables(bool in_flight) var.value = NULL; var.key = "pcsx_rearmed_neon_enhancement_enable"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "disabled") == 0) pl_rearmed_cbs.gpu_neon.enhancement_enable = 0; @@ -1022,7 +1905,7 @@ static void update_variables(bool in_flight) var.value = NULL; var.key = "pcsx_rearmed_neon_enhancement_no_main"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "disabled") == 0) pl_rearmed_cbs.gpu_neon.enhancement_no_main = 0; @@ -1031,55 +1914,155 @@ static void update_variables(bool in_flight) } #endif - var.value = "NULL"; + var.value = NULL; var.key = "pcsx_rearmed_duping_enable"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - if (strcmp(var.value, "off") == 0) + if (strcmp(var.value, "disabled") == 0) duping_enable = false; - else if (strcmp(var.value, "on") == 0) + else if (strcmp(var.value, "enabled") == 0) duping_enable = true; } + var.value = NULL; + var.key = "pcsx_rearmed_display_internal_fps"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + display_internal_fps = false; + else if (strcmp(var.value, "enabled") == 0) + display_internal_fps = true; + } + + // + // CPU emulation related config #ifndef DRC_DISABLE var.value = NULL; var.key = "pcsx_rearmed_drc"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + if (!environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var)) + var.value = "enabled"; + { R3000Acpu *prev_cpu = psxCpu; +#if defined(LIGHTREC) + bool can_use_dynarec = found_bios; +#else + bool can_use_dynarec = 1; +#endif - if (strcmp(var.value, "disabled") == 0) +#ifdef _3DS + if (!__ctr_svchax) + Config.Cpu = CPU_INTERPRETER; + else +#endif + if (strcmp(var.value, "disabled") == 0 || !can_use_dynarec) Config.Cpu = CPU_INTERPRETER; else if (strcmp(var.value, "enabled") == 0) Config.Cpu = CPU_DYNAREC; psxCpu = (Config.Cpu == CPU_INTERPRETER) ? &psxInt : &psxRec; - if (psxCpu != prev_cpu) { + if (psxCpu != prev_cpu) + { prev_cpu->Shutdown(); psxCpu->Init(); psxCpu->Reset(); // not really a reset.. } } -#endif +#endif /* !DRC_DISABLE */ + + var.value = NULL; + var.key = "pcsx_rearmed_psxclock"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + int psxclock = atoi(var.value); + Config.cycle_multiplier = 10000 / psxclock; + } + +#if !defined(DRC_DISABLE) && !defined(LIGHTREC) + var.value = NULL; + var.key = "pcsx_rearmed_nosmccheck"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + new_dynarec_hacks |= NDHACK_NO_SMC_CHECK; + else + new_dynarec_hacks &= ~NDHACK_NO_SMC_CHECK; + } + + var.value = NULL; + var.key = "pcsx_rearmed_gteregsunneeded"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + new_dynarec_hacks |= NDHACK_GTE_UNNEEDED; + else + new_dynarec_hacks &= ~NDHACK_GTE_UNNEEDED; + } + + var.value = NULL; + var.key = "pcsx_rearmed_nogteflags"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + new_dynarec_hacks |= NDHACK_GTE_NO_FLAGS; + else + new_dynarec_hacks &= ~NDHACK_GTE_NO_FLAGS; + } + + var.value = NULL; + var.key = "pcsx_rearmed_nocompathacks"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + new_dynarec_hacks |= NDHACK_NO_COMPAT_HACKS; + else + new_dynarec_hacks &= ~NDHACK_NO_COMPAT_HACKS; + } +#endif /* !DRC_DISABLE && !LIGHTREC */ + + var.value = NULL; + var.key = "pcsx_rearmed_nostalls"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + Config.DisableStalls = 1; + else + Config.DisableStalls = 0; + } + + var.value = NULL; + var.key = "pcsx_rearmed_icache_emulation"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + Config.icache_emulation = 0; + else if (strcmp(var.value, "enabled") == 0) + Config.icache_emulation = 1; + } + psxCpu->ApplyConfig(); - var.value = "NULL"; + // end of CPU emu config + // + + var.value = NULL; var.key = "pcsx_rearmed_spu_reverb"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - if (strcmp(var.value, "off") == 0) + if (strcmp(var.value, "disabled") == 0) spu_config.iUseReverb = false; - else if (strcmp(var.value, "on") == 0) + else if (strcmp(var.value, "enabled") == 0) spu_config.iUseReverb = true; } - var.value = "NULL"; + var.value = NULL; var.key = "pcsx_rearmed_spu_interpolation"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "simple") == 0) spu_config.iUseInterpolation = 1; @@ -1091,103 +2074,739 @@ static void update_variables(bool in_flight) spu_config.iUseInterpolation = 0; } - if (in_flight) { +#ifndef _WIN32 + var.value = NULL; + var.key = "pcsx_rearmed_async_cd"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "async") == 0) + { + Config.AsyncCD = 1; + Config.CHD_Precache = 0; + } + else if (strcmp(var.value, "sync") == 0) + { + Config.AsyncCD = 0; + Config.CHD_Precache = 0; + } + else if (strcmp(var.value, "precache") == 0) + { + Config.AsyncCD = 0; + Config.CHD_Precache = 1; + } + } +#endif + + var.value = NULL; + var.key = "pcsx_rearmed_noxadecoding"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + Config.Xa = 1; + else + Config.Xa = 0; + } + + var.value = NULL; + var.key = "pcsx_rearmed_nocdaudio"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + Config.Cdda = 1; + else + Config.Cdda = 0; + } + +#ifdef THREAD_RENDERING + var.key = "pcsx_rearmed_gpu_thread_rendering"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + pl_rearmed_cbs.thread_rendering = THREAD_RENDERING_OFF; + else if (strcmp(var.value, "sync") == 0) + pl_rearmed_cbs.thread_rendering = THREAD_RENDERING_SYNC; + else if (strcmp(var.value, "async") == 0) + pl_rearmed_cbs.thread_rendering = THREAD_RENDERING_ASYNC; + } +#endif + +#ifdef GPU_PEOPS + var.value = NULL; + var.key = "pcsx_rearmed_gpu_peops_odd_even_bit"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + gpu_peops_fix |= GPU_PEOPS_ODD_EVEN_BIT; + } + + var.value = NULL; + var.key = "pcsx_rearmed_gpu_peops_expand_screen_width"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + gpu_peops_fix |= GPU_PEOPS_EXPAND_SCREEN_WIDTH; + } + + var.value = NULL; + var.key = "pcsx_rearmed_gpu_peops_ignore_brightness"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + gpu_peops_fix |= GPU_PEOPS_IGNORE_BRIGHTNESS; + } + + var.value = NULL; + var.key = "pcsx_rearmed_gpu_peops_disable_coord_check"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + gpu_peops_fix |= GPU_PEOPS_DISABLE_COORD_CHECK; + } + + var.value = NULL; + var.key = "pcsx_rearmed_gpu_peops_lazy_screen_update"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + gpu_peops_fix |= GPU_PEOPS_LAZY_SCREEN_UPDATE; + } + + var.value = NULL; + var.key = "pcsx_rearmed_gpu_peops_repeated_triangles"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + gpu_peops_fix |= GPU_PEOPS_REPEATED_TRIANGLES; + } + + var.value = NULL; + var.key = "pcsx_rearmed_gpu_peops_quads_with_triangles"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + gpu_peops_fix |= GPU_PEOPS_QUADS_WITH_TRIANGLES; + } + + var.value = NULL; + var.key = "pcsx_rearmed_gpu_peops_fake_busy_state"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + gpu_peops_fix |= GPU_PEOPS_FAKE_BUSY_STATE; + } + + if (pl_rearmed_cbs.gpu_peops.dwActFixes != gpu_peops_fix) + pl_rearmed_cbs.gpu_peops.dwActFixes = gpu_peops_fix; +#endif + +#ifdef GPU_UNAI + /* Note: This used to be an option, but it only works + * (correctly) when running high resolution games + * (480i, 512i) and has been obsoleted by + * pcsx_rearmed_gpu_unai_scale_hires */ + pl_rearmed_cbs.gpu_unai.ilace_force = 0; + /* Note: This used to be an option, but it has no + * discernable effect and has been obsoleted by + * pcsx_rearmed_gpu_unai_scale_hires */ + pl_rearmed_cbs.gpu_unai.pixel_skip = 0; + + var.key = "pcsx_rearmed_gpu_unai_lighting"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + pl_rearmed_cbs.gpu_unai.lighting = 0; + else if (strcmp(var.value, "enabled") == 0) + pl_rearmed_cbs.gpu_unai.lighting = 1; + } + + var.key = "pcsx_rearmed_gpu_unai_fast_lighting"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + pl_rearmed_cbs.gpu_unai.fast_lighting = 0; + else if (strcmp(var.value, "enabled") == 0) + pl_rearmed_cbs.gpu_unai.fast_lighting = 1; + } + + var.key = "pcsx_rearmed_gpu_unai_blending"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + pl_rearmed_cbs.gpu_unai.blending = 0; + else if (strcmp(var.value, "enabled") == 0) + pl_rearmed_cbs.gpu_unai.blending = 1; + } + + var.key = "pcsx_rearmed_gpu_unai_scale_hires"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + pl_rearmed_cbs.gpu_unai.scale_hires = 0; + else if (strcmp(var.value, "enabled") == 0) + pl_rearmed_cbs.gpu_unai.scale_hires = 1; + } +#endif // GPU_UNAI + + //This adjustment process gives the user the ability to manually align the mouse up better + //with where the shots are in the emulator. + + var.value = NULL; + var.key = "pcsx_rearmed_gunconadjustx"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + GunconAdjustX = atoi(var.value); + } + + var.value = NULL; + var.key = "pcsx_rearmed_gunconadjusty"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + GunconAdjustY = atoi(var.value); + } + + var.value = NULL; + var.key = "pcsx_rearmed_gunconadjustratiox"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + GunconAdjustRatioX = atof(var.value); + } + + var.value = NULL; + var.key = "pcsx_rearmed_gunconadjustratioy"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + GunconAdjustRatioY = atof(var.value); + } + + var.value = NULL; + var.key = "pcsx_rearmed_input_sensitivity"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + mouse_sensitivity = atof(var.value); + } + + if (in_flight) + { // inform core things about possible config changes plugin_call_rearmed_cbs(); - if (GPU_open != NULL && GPU_close != NULL) { + if (GPU_open != NULL && GPU_close != NULL) + { GPU_close(); GPU_open(&gpuDisp, "PCSX", NULL); } - dfinput_activate(); + /* Reinitialise frameskipping, if required */ + if (((frameskip_type != prev_frameskip_type))) + retro_set_audio_buff_status_cb(); + + /* dfinput_activate(); */ } + else + { + //not yet running + + //bootlogo display hack + if (found_bios) + { + var.value = NULL; + var.key = "pcsx_rearmed_show_bios_bootlogo"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + Config.SlowBoot = 0; + rebootemu = 0; + if (strcmp(var.value, "enabled") == 0) + { + Config.SlowBoot = 1; + rebootemu = 1; + } + } + } + } + + update_option_visibility(); } -void retro_run(void) +// Taken from beetle-psx-libretro +static uint16_t get_analog_button(int16_t ret, retro_input_state_t input_state_cb, int player_index, int id) { - int i; + // NOTE: Analog buttons were added Nov 2017. Not all front-ends support this + // feature (or pre-date it) so we need to handle this in a graceful way. + + // First, try and get an analog value using the new libretro API constant + uint16_t button = input_state_cb(player_index, + RETRO_DEVICE_ANALOG, + RETRO_DEVICE_INDEX_ANALOG_BUTTON, + id); + button = MIN(button / 128, 255); - input_poll_cb(); + if (button == 0) + { + // If we got exactly zero, we're either not pressing the button, or the front-end + // is not reporting analog values. We need to do a second check using the classic + // digital API method, to at least get some response - better than nothing. - bool updated = false; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated) && updated) - update_variables(true); + // NOTE: If we're really just not holding the button, we're still going to get zero. - in_keystate = 0; - for (i = 0; i < RETRO_PSX_MAP_LEN; i++) - if (input_state_cb(1, RETRO_DEVICE_JOYPAD, 0, i)) - in_keystate |= retro_psx_map[i]; - in_keystate <<= 16; - for (i = 0; i < RETRO_PSX_MAP_LEN; i++) - if (input_state_cb(0, RETRO_DEVICE_JOYPAD, 0, i)) - in_keystate |= retro_psx_map[i]; + button = (ret & (1 << id)) ? 255 : 0; + } - if (in_type1 == PSE_PAD_TYPE_ANALOGPAD) - { - in_a1[0] = (input_state_cb(0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X) / 256) + 128; - in_a1[1] = (input_state_cb(0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y) / 256) + 128; - in_a2[0] = (input_state_cb(0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_X) / 256) + 128; - in_a2[1] = (input_state_cb(0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_Y) / 256) + 128; - } + return button; +} - stop = 0; - psxCpu->Execute(); +unsigned char axis_range_modifier(int16_t axis_value, bool is_square) +{ + float modifier_axis_range = 0; + + if (is_square) + { + modifier_axis_range = round((axis_value >> 8) / 0.785) + 128; + if (modifier_axis_range < 0) + { + modifier_axis_range = 0; + } + else if (modifier_axis_range > 255) + { + modifier_axis_range = 255; + } + } + else + { + modifier_axis_range = MIN(((axis_value >> 8) + 128), 255); + } - video_cb((vout_fb_dirty || !vout_can_dupe || !duping_enable) ? vout_buf : NULL, - vout_width, vout_height, vout_width * 2); - vout_fb_dirty = 0; + return modifier_axis_range; } -static bool try_use_bios(const char *path) +static void update_input_guncon(int port, int ret) { - FILE *f; - long size; - const char *name; + //ToDo: + //Core option for cursors for both players + //Separate pointer and lightgun control types + + //Mouse range is -32767 -> 32767 + //1% is about 655 + //Use the left analog stick field to store the absolute coordinates + //Fix cursor to top-left when gun is detected as "offscreen" + if (input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_IS_OFFSCREEN) || input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_RELOAD)) + { + in_analog_left[port][0] = -32767; + in_analog_left[port][1] = -32767; + } + else + { + int gunx = input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_X); + int guny = input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_Y); + + in_analog_left[port][0] = (gunx * GunconAdjustRatioX) + (GunconAdjustX * 655); + in_analog_left[port][1] = (guny * GunconAdjustRatioY) + (GunconAdjustY * 655); + } + + //GUNCON has 3 controls, Trigger,A,B which equal Circle,Start,Cross - f = fopen(path, "rb"); - if (f == NULL) - return false; + // Trigger + if (input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_TRIGGER) || input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_RELOAD)) + in_keystate[port] |= (1 << DKEY_CIRCLE); - fseek(f, 0, SEEK_END); - size = ftell(f); - fclose(f); + // A + if (input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_AUX_A)) + in_keystate[port] |= (1 << DKEY_START); - if (size != 512 * 1024) - return false; + // B + if (input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_AUX_B)) + in_keystate[port] |= (1 << DKEY_CROSS); + +} - name = strrchr(path, SLASH); - if (name++ == NULL) - name = path; - snprintf(Config.Bios, sizeof(Config.Bios), "%s", name); - return true; +static void update_input_negcon(int port, int ret) +{ + int lsx; + int rsy; + int negcon_i_rs; + int negcon_ii_rs; + float negcon_twist_amplitude; + + // Query digital inputs + // + // > Pad-Up + if (ret & (1 << RETRO_DEVICE_ID_JOYPAD_UP)) + in_keystate[port] |= (1 << DKEY_UP); + // > Pad-Right + if (ret & (1 << RETRO_DEVICE_ID_JOYPAD_RIGHT)) + in_keystate[port] |= (1 << DKEY_RIGHT); + // > Pad-Down + if (ret & (1 << RETRO_DEVICE_ID_JOYPAD_DOWN)) + in_keystate[port] |= (1 << DKEY_DOWN); + // > Pad-Left + if (ret & (1 << RETRO_DEVICE_ID_JOYPAD_LEFT)) + in_keystate[port] |= (1 << DKEY_LEFT); + // > Start + if (ret & (1 << RETRO_DEVICE_ID_JOYPAD_START)) + in_keystate[port] |= (1 << DKEY_START); + // > neGcon A + if (ret & (1 << RETRO_DEVICE_ID_JOYPAD_A)) + in_keystate[port] |= (1 << DKEY_CIRCLE); + // > neGcon B + if (ret & (1 << RETRO_DEVICE_ID_JOYPAD_X)) + in_keystate[port] |= (1 << DKEY_TRIANGLE); + // > neGcon R shoulder (digital) + if (ret & (1 << RETRO_DEVICE_ID_JOYPAD_R)) + in_keystate[port] |= (1 << DKEY_R1); + // Query analog inputs + // + // From studying 'libpcsxcore/plugins.c' and 'frontend/plugin.c': + // >> pad->leftJoyX == in_analog_left[port][0] == NeGcon II + // >> pad->leftJoyY == in_analog_left[port][1] == NeGcon L + // >> pad->rightJoyX == in_analog_right[port][0] == NeGcon twist + // >> pad->rightJoyY == in_analog_right[port][1] == NeGcon I + // So we just have to map in_analog_left/right to more + // appropriate inputs... + // + // > NeGcon twist + // >> Get raw analog stick value and account for deadzone + lsx = input_state_cb(port, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X); + if (lsx > negcon_deadzone) + lsx = lsx - negcon_deadzone; + else if (lsx < -negcon_deadzone) + lsx = lsx + negcon_deadzone; + else + lsx = 0; + // >> Convert to an 'amplitude' [-1.0,1.0] and adjust response + negcon_twist_amplitude = (float)lsx / (float)(NEGCON_RANGE - negcon_deadzone); + if (negcon_linearity == 2) + { + if (negcon_twist_amplitude < 0.0) + negcon_twist_amplitude = -(negcon_twist_amplitude * negcon_twist_amplitude); + else + negcon_twist_amplitude = negcon_twist_amplitude * negcon_twist_amplitude; + } + else if (negcon_linearity == 3) + negcon_twist_amplitude = negcon_twist_amplitude * negcon_twist_amplitude * negcon_twist_amplitude; + // >> Convert to final 'in_analog' integer value [0,255] + in_analog_right[port][0] = MAX(MIN((int)(negcon_twist_amplitude * 128.0f) + 128, 255), 0); + // > NeGcon I + II + // >> Handle right analog stick vertical axis mapping... + // - Up (-Y) == accelerate == neGcon I + // - Down (+Y) == brake == neGcon II + negcon_i_rs = 0; + negcon_ii_rs = 0; + rsy = input_state_cb(port, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_Y); + if (rsy >= 0) + { + // Account for deadzone + // (Note: have never encountered a gamepad with significant differences + // in deadzone between left/right analog sticks, so use the regular 'twist' + // deadzone here) + if (rsy > negcon_deadzone) + rsy = rsy - negcon_deadzone; + else + rsy = 0; + // Convert to 'in_analog' integer value [0,255] + negcon_ii_rs = MIN((int)(((float)rsy / (float)(NEGCON_RANGE - negcon_deadzone)) * 255.0f), 255); + } + else + { + if (rsy < -negcon_deadzone) + rsy = -1 * (rsy + negcon_deadzone); + else + rsy = 0; + negcon_i_rs = MIN((int)(((float)rsy / (float)(NEGCON_RANGE - negcon_deadzone)) * 255.0f), 255); + } + // >> NeGcon I + in_analog_right[port][1] = MAX( + MAX( + get_analog_button(ret, input_state_cb, port, RETRO_DEVICE_ID_JOYPAD_R2), + get_analog_button(ret, input_state_cb, port, RETRO_DEVICE_ID_JOYPAD_B)), + negcon_i_rs); + // >> NeGcon II + in_analog_left[port][0] = MAX( + MAX( + get_analog_button(ret, input_state_cb, port, RETRO_DEVICE_ID_JOYPAD_L2), + get_analog_button(ret, input_state_cb, port, RETRO_DEVICE_ID_JOYPAD_Y)), + negcon_ii_rs); + // > NeGcon L + in_analog_left[port][1] = get_analog_button(ret, input_state_cb, port, RETRO_DEVICE_ID_JOYPAD_L); +} + +static void update_input_mouse(int port, int ret) +{ + float raw_x = input_state_cb(port, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_X); + float raw_y = input_state_cb(port, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_Y); + + int x = (int)roundf(raw_x * mouse_sensitivity); + int y = (int)roundf(raw_y * mouse_sensitivity); + + if (x > 127) x = 127; + else if (x < -128) x = -128; + + if (y > 127) y = 127; + else if (y < -128) y = -128; + + in_mouse[port][0] = x; /* -128..+128 left/right movement, 0 = no movement */ + in_mouse[port][1] = y; /* -128..+128 down/up movement, 0 = no movement */ + + /* left mouse button state */ + if (input_state_cb(port, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_LEFT)) + in_keystate[port] |= 1 << 11; + + /* right mouse button state */ + if (input_state_cb(port, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_RIGHT)) + in_keystate[port] |= 1 << 10; } -#if 1 +static void update_input(void) +{ + // reset all keystate, query libretro for keystate + int i; + int j; + + for (i = 0; i < PORTS_NUMBER; i++) + { + int16_t ret = 0; + int type = in_type[i]; + + in_keystate[i] = 0; + + if (type == PSE_PAD_TYPE_NONE) + continue; + + if (libretro_supports_bitmasks) + ret = input_state_cb(i, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_MASK); + else + { + for (j = 0; j < (RETRO_DEVICE_ID_JOYPAD_R3 + 1); j++) + { + if (input_state_cb(i, RETRO_DEVICE_JOYPAD, 0, j)) + ret |= (1 << j); + } + } + + switch (type) + { + case PSE_PAD_TYPE_GUNCON: + update_input_guncon(i, ret); + break; + case PSE_PAD_TYPE_NEGCON: + update_input_negcon(i, ret); + break; + case PSE_PAD_TYPE_MOUSE: + update_input_mouse(i, ret); + break; + default: + // Query digital inputs + for (j = 0; j < RETRO_PSX_MAP_LEN; j++) + if (ret & (1 << j)) + in_keystate[i] |= retro_psx_map[j]; + + // Query analog inputs + if (type == PSE_PAD_TYPE_ANALOGJOY || type == PSE_PAD_TYPE_ANALOGPAD) + { + in_analog_left[i][0] = axis_range_modifier(input_state_cb(i, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X), axis_bounds_modifier); + in_analog_left[i][1] = axis_range_modifier(input_state_cb(i, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y), axis_bounds_modifier); + in_analog_right[i][0] = axis_range_modifier(input_state_cb(i, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_X), axis_bounds_modifier); + in_analog_right[i][1] = axis_range_modifier(input_state_cb(i, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_Y), axis_bounds_modifier); + } + } + } +} + +static void print_internal_fps(void) +{ + if (display_internal_fps) + { + frame_count++; + + if (frame_count % INTERNAL_FPS_SAMPLE_PERIOD == 0) + { + unsigned internal_fps = pl_rearmed_cbs.flip_cnt * (is_pal_mode ? 50 : 60) / INTERNAL_FPS_SAMPLE_PERIOD; + char str[64]; + const char *strc = (const char *)str; + + str[0] = '\0'; + + snprintf(str, sizeof(str), "Internal FPS: %2d", internal_fps); + + pl_rearmed_cbs.flip_cnt = 0; + + if (msg_interface_version >= 1) + { + struct retro_message_ext msg = { + strc, + 3000, + 1, + RETRO_LOG_INFO, + RETRO_MESSAGE_TARGET_OSD, + RETRO_MESSAGE_TYPE_STATUS, + -1 + }; + environ_cb(RETRO_ENVIRONMENT_SET_MESSAGE_EXT, &msg); + } + else + { + struct retro_message msg = { + strc, + 180 + }; + environ_cb(RETRO_ENVIRONMENT_SET_MESSAGE, &msg); + } + } + } + else + frame_count = 0; +} + +void retro_run(void) +{ + //SysReset must be run while core is running,Not in menu (Locks up Retroarch) + if (rebootemu != 0) + { + rebootemu = 0; + SysReset(); + if (!Config.HLE && !Config.SlowBoot) + { + // skip BIOS logos + psxRegs.pc = psxRegs.GPR.n.ra; + } + return; + } + + print_internal_fps(); + + /* Check whether current frame should + * be skipped */ + pl_rearmed_cbs.fskip_force = 0; + pl_rearmed_cbs.fskip_dirty = 0; + + if (frameskip_type != FRAMESKIP_NONE) + { + bool skip_frame = false; + + switch (frameskip_type) + { + case FRAMESKIP_AUTO: + skip_frame = retro_audio_buff_active && retro_audio_buff_underrun; + break; + case FRAMESKIP_AUTO_THRESHOLD: + skip_frame = retro_audio_buff_active && (retro_audio_buff_occupancy < frameskip_threshold); + break; + case FRAMESKIP_FIXED_INTERVAL: + skip_frame = true; + break; + default: + break; + } + + if (skip_frame && frameskip_counter < frameskip_interval) + pl_rearmed_cbs.fskip_force = 1; + } + + /* If frameskip/timing settings have changed, + * update frontend audio latency + * > Can do this before or after the frameskip + * check, but doing it after means we at least + * retain the current frame's audio output */ + if (update_audio_latency) + { + environ_cb(RETRO_ENVIRONMENT_SET_MINIMUM_AUDIO_LATENCY, + &retro_audio_latency); + update_audio_latency = false; + } + + input_poll_cb(); + + update_input(); + + bool updated = false; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated) && updated) + update_variables(true); + + stop = 0; + psxCpu->Execute(); + + if (pl_rearmed_cbs.fskip_dirty == 1) { + if (frameskip_counter < frameskip_interval) + frameskip_counter++; + else if (frameskip_counter >= frameskip_interval || !pl_rearmed_cbs.fskip_force) + frameskip_counter = 0; + } + + video_cb((vout_fb_dirty || !vout_can_dupe || !duping_enable) ? vout_buf_ptr : NULL, + vout_width, vout_height, vout_width * 2); + vout_fb_dirty = 0; + + set_vout_fb(); +} + +static bool try_use_bios(const char *path) +{ + long size; + const char *name; + FILE *fp = fopen(path, "rb"); + if (fp == NULL) + return false; + + fseek(fp, 0, SEEK_END); + size = ftell(fp); + fclose(fp); + + if (size != 512 * 1024) + return false; + + name = strrchr(path, SLASH); + if (name++ == NULL) + name = path; + snprintf(Config.Bios, sizeof(Config.Bios), "%s", name); + return true; +} + +#ifndef VITA #include #include static bool find_any_bios(const char *dirpath, char *path, size_t path_size) { - DIR *dir; - struct dirent *ent; - bool ret = false; + DIR *dir; + struct dirent *ent; + bool ret = false; - dir = opendir(dirpath); - if (dir == NULL) - return false; + dir = opendir(dirpath); + if (dir == NULL) + return false; - while ((ent = readdir(dir))) { - if (strncasecmp(ent->d_name, "scph", 4) != 0) - continue; + while ((ent = readdir(dir))) + { + if ((strncasecmp(ent->d_name, "scph", 4) != 0) && (strncasecmp(ent->d_name, "psx", 3) != 0)) + continue; - snprintf(path, path_size, "%s/%s", dirpath, ent->d_name); - ret = try_use_bios(path); - if (ret) - break; - } - closedir(dir); - return ret; + snprintf(path, path_size, "%s%c%s", dirpath, SLASH, ent->d_name); + ret = try_use_bios(path); + if (ret) + break; + } + closedir(dir); + return ret; } #else #define find_any_bios(...) false @@ -1199,91 +2818,303 @@ static void check_system_specs(void) environ_cb(RETRO_ENVIRONMENT_SET_PERFORMANCE_LEVEL, &level); } +static int init_memcards(void) +{ + int ret = 0; + const char *dir; + struct retro_variable var = { .key = "pcsx_rearmed_memcard2", .value = NULL }; + static const char CARD2_FILE[] = "pcsx-card2.mcd"; + + // Memcard2 will be handled and is re-enabled if needed using core + // operations. + // Memcard1 is handled by libretro, doing this will set core to + // skip file io operations for memcard1 like SaveMcd + snprintf(Config.Mcd1, sizeof(Config.Mcd1), "none"); + snprintf(Config.Mcd2, sizeof(Config.Mcd2), "none"); + init_memcard(Mcd1Data); + // Memcard 2 is managed by the emulator on the filesystem, + // There is no need to initialize Mcd2Data like Mcd1Data. + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + SysPrintf("Memcard 2: %s\n", var.value); + if (memcmp(var.value, "enabled", 7) == 0) + { + if (environ_cb(RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY, &dir) && dir) + { + if (strlen(dir) + strlen(CARD2_FILE) + 2 > sizeof(Config.Mcd2)) + { + SysPrintf("Path '%s' is too long. Cannot use memcard 2. Use a shorter path.\n", dir); + ret = -1; + } + else + { + McdDisable[1] = 0; + snprintf(Config.Mcd2, sizeof(Config.Mcd2), "%s/%s", dir, CARD2_FILE); + SysPrintf("Use memcard 2: %s\n", Config.Mcd2); + } + } + else + { + SysPrintf("Could not get save directory! Could not create memcard 2."); + ret = -1; + } + } + } + return ret; +} + +static void loadPSXBios(void) +{ + const char *dir; + char path[PATH_MAX]; + unsigned useHLE = 0; + + const char *bios[] = { + "PSXONPSP660", "psxonpsp660", + "SCPH101", "scph101", + "SCPH5501", "scph5501", + "SCPH7001", "scph7001", + "SCPH1001", "scph1001" + }; + + struct retro_variable var = { + .key = "pcsx_rearmed_bios", + .value = NULL + }; + + found_bios = 0; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (!strcmp(var.value, "HLE")) + useHLE = 1; + } + + if (!useHLE) + { + if (environ_cb(RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY, &dir) && dir) + { + unsigned i; + snprintf(Config.BiosDir, sizeof(Config.BiosDir), "%s", dir); + + for (i = 0; i < sizeof(bios) / sizeof(bios[0]); i++) + { + snprintf(path, sizeof(path), "%s%c%s.bin", dir, SLASH, bios[i]); + found_bios = try_use_bios(path); + if (found_bios) + break; + } + + if (!found_bios) + found_bios = find_any_bios(dir, path, sizeof(path)); + } + if (found_bios) + { + SysPrintf("found BIOS file: %s\n", Config.Bios); + } + } + + if (!found_bios) + { + const char *msg_str; + if (useHLE) + { + msg_str = "BIOS set to \'hle\' in core options - real BIOS will be ignored"; + SysPrintf("Using HLE BIOS.\n"); + } + else + { + msg_str = "No PlayStation BIOS file found - add for better compatibility"; + SysPrintf("No BIOS files found.\n"); + } + + if (msg_interface_version >= 1) + { + struct retro_message_ext msg = { + msg_str, + 3000, + 3, + RETRO_LOG_WARN, + RETRO_MESSAGE_TARGET_ALL, + RETRO_MESSAGE_TYPE_NOTIFICATION, + -1 + }; + environ_cb(RETRO_ENVIRONMENT_SET_MESSAGE_EXT, &msg); + } + else + { + struct retro_message msg = { + msg_str, + 180 + }; + environ_cb(RETRO_ENVIRONMENT_SET_MESSAGE, &msg); + } + } +} + void retro_init(void) { - const char *bios[] = { "scph1001", "scph5501", "scph7001" }; - const char *dir; - char path[256]; - int i, ret; - bool found_bios = false; + unsigned dci_version = 0; + struct retro_rumble_interface rumble; + int ret; -#ifdef __MACH__ - // magic sauce to make the dynarec work on iOS - syscall(SYS_ptrace, 0 /*PTRACE_TRACEME*/, 0, 0, 0); + msg_interface_version = 0; + environ_cb(RETRO_ENVIRONMENT_GET_MESSAGE_INTERFACE_VERSION, &msg_interface_version); + +#if defined(__MACH__) && !defined(TVOS) + // magic sauce to make the dynarec work on iOS + syscall(SYS_ptrace, 0 /*PTRACE_TRACEME*/, 0, 0, 0); #endif - ret = emu_core_preinit(); - ret |= emu_core_init(); - if (ret != 0) { - SysPrintf("PCSX init failed.\n"); - exit(1); - } +#ifdef _3DS + psxMapHook = pl_3ds_mmap; + psxUnmapHook = pl_3ds_munmap; +#endif +#ifdef VITA + if (init_vita_mmap() < 0) + abort(); + psxMapHook = pl_vita_mmap; + psxUnmapHook = pl_vita_munmap; +#endif + ret = emu_core_preinit(); +#ifdef _3DS + /* emu_core_preinit sets the cpu to dynarec */ + if (!__ctr_svchax) + Config.Cpu = CPU_INTERPRETER; +#endif + ret |= init_memcards(); -#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) - posix_memalign(&vout_buf, 16, VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2); + ret |= emu_core_init(); + if (ret != 0) + { + SysPrintf("PCSX init failed.\n"); + exit(1); + } + +#ifdef _3DS + vout_buf = linearMemAlign(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2, 0x80); +#elif defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) && !defined(VITA) && !defined(__SWITCH__) + if (posix_memalign(&vout_buf, 16, VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2) != 0) + vout_buf = (void *) 0; #else - vout_buf = malloc(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2); + vout_buf = malloc(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2); #endif - if (environ_cb(RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY, &dir) && dir) - { - snprintf(Config.BiosDir, sizeof(Config.BiosDir), "%s/", dir); - - for (i = 0; i < sizeof(bios) / sizeof(bios[0]); i++) { - snprintf(path, sizeof(path), "%s/%s.bin", dir, bios[i]); - found_bios = try_use_bios(path); - if (found_bios) - break; - } - - if (!found_bios) - found_bios = find_any_bios(dir, path, sizeof(path)); - } - if (found_bios) { - SysPrintf("found BIOS file: %s\n", Config.Bios); - } - else - { - SysPrintf("no BIOS files found.\n"); - struct retro_message msg = - { - "no BIOS found, expect bugs!", - 180 - }; - environ_cb(RETRO_ENVIRONMENT_SET_MESSAGE, (void*)&msg); - } - - environ_cb(RETRO_ENVIRONMENT_GET_CAN_DUPE, &vout_can_dupe); - environ_cb(RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE, &disk_control); - environ_cb(RETRO_ENVIRONMENT_GET_RUMBLE_INTERFACE, &rumble); - - /* Set how much slower PSX CPU runs * 100 (so that 200 is 2 times) - * we have to do this because cache misses and some IO penalties - * are not emulated. Warning: changing this may break compatibility. */ - cycle_multiplier = 175; -#ifdef HAVE_PRE_ARMV7 - cycle_multiplier = 200; + vout_buf_ptr = vout_buf; + + loadPSXBios(); + + environ_cb(RETRO_ENVIRONMENT_GET_CAN_DUPE, &vout_can_dupe); + + disk_initial_index = 0; + disk_initial_path[0] = '\0'; + if (environ_cb(RETRO_ENVIRONMENT_GET_DISK_CONTROL_INTERFACE_VERSION, &dci_version) && (dci_version >= 1)) + environ_cb(RETRO_ENVIRONMENT_SET_DISK_CONTROL_EXT_INTERFACE, &disk_control_ext); + else + environ_cb(RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE, &disk_control); + + rumble_cb = NULL; + if (environ_cb(RETRO_ENVIRONMENT_GET_RUMBLE_INTERFACE, &rumble)) + rumble_cb = rumble.set_rumble_state; + + /* Set how much slower PSX CPU runs * 100 (so that 200 is 2 times) + * we have to do this because cache misses and some IO penalties + * are not emulated. Warning: changing this may break compatibility. */ + Config.cycle_multiplier = CYCLE_MULT_DEFAULT; +#if defined(HAVE_PRE_ARMV7) && !defined(_3DS) + Config.cycle_multiplier = 200; #endif - pl_rearmed_cbs.gpu_peops.iUseDither = 1; - spu_config.iUseFixedUpdates = 1; + pl_rearmed_cbs.gpu_peops.iUseDither = 1; + pl_rearmed_cbs.gpu_peops.dwActFixes = GPU_PEOPS_OLD_FRAME_SKIP; + spu_config.iUseFixedUpdates = 1; - McdDisable[0] = 0; - McdDisable[1] = 1; - init_memcard(Mcd1Data); + SaveFuncs.open = save_open; + SaveFuncs.read = save_read; + SaveFuncs.write = save_write; + SaveFuncs.seek = save_seek; + SaveFuncs.close = save_close; - SaveFuncs.open = save_open; - SaveFuncs.read = save_read; - SaveFuncs.write = save_write; - SaveFuncs.seek = save_seek; - SaveFuncs.close = save_close; + if (environ_cb(RETRO_ENVIRONMENT_GET_INPUT_BITMASKS, NULL)) + libretro_supports_bitmasks = true; - update_variables(false); - check_system_specs(); + check_system_specs(); } void retro_deinit(void) { - SysClose(); - free(vout_buf); - vout_buf = NULL; + if (plugins_opened) + { + ClosePlugins(); + plugins_opened = 0; + } + SysClose(); +#ifdef _3DS + linearFree(vout_buf); +#else + free(vout_buf); +#endif + vout_buf = NULL; + +#ifdef VITA + deinit_vita_mmap(); +#endif + libretro_supports_bitmasks = false; + libretro_supports_option_categories = false; + + show_input_settings = true; +#ifdef GPU_PEOPS + show_advanced_gpu_peops_settings = true; +#endif +#ifdef GPU_UNAI + show_advanced_gpu_unai_settings = true; +#endif + + /* Have to reset disks struct, otherwise + * fnames/flabels will leak memory */ + disk_init(); + frameskip_type = FRAMESKIP_NONE; + frameskip_threshold = 0; + frameskip_interval = 0; + frameskip_counter = 0; + retro_audio_buff_active = false; + retro_audio_buff_occupancy = 0; + retro_audio_buff_underrun = false; + retro_audio_latency = 0; + update_audio_latency = false; +} + +#ifdef VITA +#include +int usleep(unsigned long us) +{ + sceKernelDelayThread(us); +} +#endif + +void SysPrintf(const char *fmt, ...) +{ + va_list list; + char msg[512]; + + va_start(list, fmt); + vsprintf(msg, fmt, list); + va_end(list); + + if (log_cb) + log_cb(RETRO_LOG_INFO, "%s", msg); +} + +/* Prints debug-level logs */ +void SysDLog(const char *fmt, ...) +{ + va_list list; + char msg[512]; + + va_start(list, fmt); + vsprintf(msg, fmt, list); + va_end(list); + + if (log_cb) + log_cb(RETRO_LOG_DEBUG, "%s", msg); } diff --git a/frontend/libretro.h b/frontend/libretro.h deleted file mode 100755 index 16c274a1a..000000000 --- a/frontend/libretro.h +++ /dev/null @@ -1,1926 +0,0 @@ -/* Copyright (C) 2010-2014 The RetroArch team - * - * --------------------------------------------------------------------------------------- - * The following license statement only applies to this libretro API header (libretro.h). - * --------------------------------------------------------------------------------------- - * - * Permission is hereby granted, free of charge, - * to any person obtaining a copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef LIBRETRO_H__ -#define LIBRETRO_H__ - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef __cplusplus -#if defined(_MSC_VER) && !defined(SN_TARGET_PS3) -/* Hack applied for MSVC when compiling in C89 mode - * as it isn't C99-compliant. */ -#define bool unsigned char -#define true 1 -#define false 0 -#else -#include -#endif -#endif - -/* Used for checking API/ABI mismatches that can break libretro - * implementations. - * It is not incremented for compatible changes to the API. - */ -#define RETRO_API_VERSION 1 - -/* - * Libretro's fundamental device abstractions. - * - * Libretro's input system consists of some standardized device types, - * such as a joypad (with/without analog), mouse, keyboard, lightgun - * and a pointer. - * - * The functionality of these devices are fixed, and individual cores - * map their own concept of a controller to libretro's abstractions. - * This makes it possible for frontends to map the abstract types to a - * real input device, and not having to worry about binding input - * correctly to arbitrary controller layouts. - */ - -#define RETRO_DEVICE_TYPE_SHIFT 8 -#define RETRO_DEVICE_MASK ((1 << RETRO_DEVICE_TYPE_SHIFT) - 1) -#define RETRO_DEVICE_SUBCLASS(base, id) (((id + 1) << RETRO_DEVICE_TYPE_SHIFT) | base) - -/* Input disabled. */ -#define RETRO_DEVICE_NONE 0 - -/* The JOYPAD is called RetroPad. It is essentially a Super Nintendo - * controller, but with additional L2/R2/L3/R3 buttons, similar to a - * PS1 DualShock. */ -#define RETRO_DEVICE_JOYPAD 1 - -/* The mouse is a simple mouse, similar to Super Nintendo's mouse. - * X and Y coordinates are reported relatively to last poll (poll callback). - * It is up to the libretro implementation to keep track of where the mouse - * pointer is supposed to be on the screen. - * The frontend must make sure not to interfere with its own hardware - * mouse pointer. - */ -#define RETRO_DEVICE_MOUSE 2 - -/* KEYBOARD device lets one poll for raw key pressed. - * It is poll based, so input callback will return with the current - * pressed state. - * For event/text based keyboard input, see - * RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK. - */ -#define RETRO_DEVICE_KEYBOARD 3 - -/* Lightgun X/Y coordinates are reported relatively to last poll, - * similar to mouse. */ -#define RETRO_DEVICE_LIGHTGUN 4 - -/* The ANALOG device is an extension to JOYPAD (RetroPad). - * Similar to DualShock it adds two analog sticks. - * This is treated as a separate device type as it returns values in the - * full analog range of [-0x8000, 0x7fff]. Positive X axis is right. - * Positive Y axis is down. - * Only use ANALOG type when polling for analog values of the axes. - */ -#define RETRO_DEVICE_ANALOG 5 - -/* Abstracts the concept of a pointing mechanism, e.g. touch. - * This allows libretro to query in absolute coordinates where on the - * screen a mouse (or something similar) is being placed. - * For a touch centric device, coordinates reported are the coordinates - * of the press. - * - * Coordinates in X and Y are reported as: - * [-0x7fff, 0x7fff]: -0x7fff corresponds to the far left/top of the screen, - * and 0x7fff corresponds to the far right/bottom of the screen. - * The "screen" is here defined as area that is passed to the frontend and - * later displayed on the monitor. - * - * The frontend is free to scale/resize this screen as it sees fit, however, - * (X, Y) = (-0x7fff, -0x7fff) will correspond to the top-left pixel of the - * game image, etc. - * - * To check if the pointer coordinates are valid (e.g. a touch display - * actually being touched), PRESSED returns 1 or 0. - * - * If using a mouse on a desktop, PRESSED will usually correspond to the - * left mouse button, but this is a frontend decision. - * PRESSED will only return 1 if the pointer is inside the game screen. - * - * For multi-touch, the index variable can be used to successively query - * more presses. - * If index = 0 returns true for _PRESSED, coordinates can be extracted - * with _X, _Y for index = 0. One can then query _PRESSED, _X, _Y with - * index = 1, and so on. - * Eventually _PRESSED will return false for an index. No further presses - * are registered at this point. */ -#define RETRO_DEVICE_POINTER 6 - -/* Buttons for the RetroPad (JOYPAD). - * The placement of these is equivalent to placements on the - * Super Nintendo controller. - * L2/R2/L3/R3 buttons correspond to the PS1 DualShock. */ -#define RETRO_DEVICE_ID_JOYPAD_B 0 -#define RETRO_DEVICE_ID_JOYPAD_Y 1 -#define RETRO_DEVICE_ID_JOYPAD_SELECT 2 -#define RETRO_DEVICE_ID_JOYPAD_START 3 -#define RETRO_DEVICE_ID_JOYPAD_UP 4 -#define RETRO_DEVICE_ID_JOYPAD_DOWN 5 -#define RETRO_DEVICE_ID_JOYPAD_LEFT 6 -#define RETRO_DEVICE_ID_JOYPAD_RIGHT 7 -#define RETRO_DEVICE_ID_JOYPAD_A 8 -#define RETRO_DEVICE_ID_JOYPAD_X 9 -#define RETRO_DEVICE_ID_JOYPAD_L 10 -#define RETRO_DEVICE_ID_JOYPAD_R 11 -#define RETRO_DEVICE_ID_JOYPAD_L2 12 -#define RETRO_DEVICE_ID_JOYPAD_R2 13 -#define RETRO_DEVICE_ID_JOYPAD_L3 14 -#define RETRO_DEVICE_ID_JOYPAD_R3 15 - -/* Index / Id values for ANALOG device. */ -#define RETRO_DEVICE_INDEX_ANALOG_LEFT 0 -#define RETRO_DEVICE_INDEX_ANALOG_RIGHT 1 -#define RETRO_DEVICE_ID_ANALOG_X 0 -#define RETRO_DEVICE_ID_ANALOG_Y 1 - -/* Id values for MOUSE. */ -#define RETRO_DEVICE_ID_MOUSE_X 0 -#define RETRO_DEVICE_ID_MOUSE_Y 1 -#define RETRO_DEVICE_ID_MOUSE_LEFT 2 -#define RETRO_DEVICE_ID_MOUSE_RIGHT 3 -#define RETRO_DEVICE_ID_MOUSE_WHEELUP 4 -#define RETRO_DEVICE_ID_MOUSE_WHEELDOWN 5 -#define RETRO_DEVICE_ID_MOUSE_MIDDLE 6 - -/* Id values for LIGHTGUN types. */ -#define RETRO_DEVICE_ID_LIGHTGUN_X 0 -#define RETRO_DEVICE_ID_LIGHTGUN_Y 1 -#define RETRO_DEVICE_ID_LIGHTGUN_TRIGGER 2 -#define RETRO_DEVICE_ID_LIGHTGUN_CURSOR 3 -#define RETRO_DEVICE_ID_LIGHTGUN_TURBO 4 -#define RETRO_DEVICE_ID_LIGHTGUN_PAUSE 5 -#define RETRO_DEVICE_ID_LIGHTGUN_START 6 - -/* Id values for POINTER. */ -#define RETRO_DEVICE_ID_POINTER_X 0 -#define RETRO_DEVICE_ID_POINTER_Y 1 -#define RETRO_DEVICE_ID_POINTER_PRESSED 2 - -/* Returned from retro_get_region(). */ -#define RETRO_REGION_NTSC 0 -#define RETRO_REGION_PAL 1 - -/* Id values for LANGUAGE */ -enum retro_language -{ - RETRO_LANGUAGE_ENGLISH = 0, - RETRO_LANGUAGE_JAPANESE = 1, - RETRO_LANGUAGE_FRENCH = 2, - RETRO_LANGUAGE_SPANISH = 3, - RETRO_LANGUAGE_GERMAN = 4, - RETRO_LANGUAGE_ITALIAN = 5, - RETRO_LANGUAGE_DUTCH = 6, - RETRO_LANGUAGE_PORTUGUESE = 7, - RETRO_LANGUAGE_RUSSIAN = 8, - RETRO_LANGUAGE_KOREAN = 9, - RETRO_LANGUAGE_CHINESE_TRADITIONAL = 10, - RETRO_LANGUAGE_CHINESE_SIMPLIFIED = 11, - RETRO_LANGUAGE_LAST, - - /* Ensure sizeof(enum) == sizeof(int) */ - RETRO_LANGUAGE_DUMMY = INT_MAX -}; - -/* Passed to retro_get_memory_data/size(). - * If the memory type doesn't apply to the - * implementation NULL/0 can be returned. - */ -#define RETRO_MEMORY_MASK 0xff - -/* Regular save RAM. This RAM is usually found on a game cartridge, - * backed up by a battery. - * If save game data is too complex for a single memory buffer, - * the SAVE_DIRECTORY (preferably) or SYSTEM_DIRECTORY environment - * callback can be used. */ -#define RETRO_MEMORY_SAVE_RAM 0 - -/* Some games have a built-in clock to keep track of time. - * This memory is usually just a couple of bytes to keep track of time. - */ -#define RETRO_MEMORY_RTC 1 - -/* System ram lets a frontend peek into a game systems main RAM. */ -#define RETRO_MEMORY_SYSTEM_RAM 2 - -/* Video ram lets a frontend peek into a game systems video RAM (VRAM). */ -#define RETRO_MEMORY_VIDEO_RAM 3 - -/* Keysyms used for ID in input state callback when polling RETRO_KEYBOARD. */ -enum retro_key -{ - RETROK_UNKNOWN = 0, - RETROK_FIRST = 0, - RETROK_BACKSPACE = 8, - RETROK_TAB = 9, - RETROK_CLEAR = 12, - RETROK_RETURN = 13, - RETROK_PAUSE = 19, - RETROK_ESCAPE = 27, - RETROK_SPACE = 32, - RETROK_EXCLAIM = 33, - RETROK_QUOTEDBL = 34, - RETROK_HASH = 35, - RETROK_DOLLAR = 36, - RETROK_AMPERSAND = 38, - RETROK_QUOTE = 39, - RETROK_LEFTPAREN = 40, - RETROK_RIGHTPAREN = 41, - RETROK_ASTERISK = 42, - RETROK_PLUS = 43, - RETROK_COMMA = 44, - RETROK_MINUS = 45, - RETROK_PERIOD = 46, - RETROK_SLASH = 47, - RETROK_0 = 48, - RETROK_1 = 49, - RETROK_2 = 50, - RETROK_3 = 51, - RETROK_4 = 52, - RETROK_5 = 53, - RETROK_6 = 54, - RETROK_7 = 55, - RETROK_8 = 56, - RETROK_9 = 57, - RETROK_COLON = 58, - RETROK_SEMICOLON = 59, - RETROK_LESS = 60, - RETROK_EQUALS = 61, - RETROK_GREATER = 62, - RETROK_QUESTION = 63, - RETROK_AT = 64, - RETROK_LEFTBRACKET = 91, - RETROK_BACKSLASH = 92, - RETROK_RIGHTBRACKET = 93, - RETROK_CARET = 94, - RETROK_UNDERSCORE = 95, - RETROK_BACKQUOTE = 96, - RETROK_a = 97, - RETROK_b = 98, - RETROK_c = 99, - RETROK_d = 100, - RETROK_e = 101, - RETROK_f = 102, - RETROK_g = 103, - RETROK_h = 104, - RETROK_i = 105, - RETROK_j = 106, - RETROK_k = 107, - RETROK_l = 108, - RETROK_m = 109, - RETROK_n = 110, - RETROK_o = 111, - RETROK_p = 112, - RETROK_q = 113, - RETROK_r = 114, - RETROK_s = 115, - RETROK_t = 116, - RETROK_u = 117, - RETROK_v = 118, - RETROK_w = 119, - RETROK_x = 120, - RETROK_y = 121, - RETROK_z = 122, - RETROK_DELETE = 127, - - RETROK_KP0 = 256, - RETROK_KP1 = 257, - RETROK_KP2 = 258, - RETROK_KP3 = 259, - RETROK_KP4 = 260, - RETROK_KP5 = 261, - RETROK_KP6 = 262, - RETROK_KP7 = 263, - RETROK_KP8 = 264, - RETROK_KP9 = 265, - RETROK_KP_PERIOD = 266, - RETROK_KP_DIVIDE = 267, - RETROK_KP_MULTIPLY = 268, - RETROK_KP_MINUS = 269, - RETROK_KP_PLUS = 270, - RETROK_KP_ENTER = 271, - RETROK_KP_EQUALS = 272, - - RETROK_UP = 273, - RETROK_DOWN = 274, - RETROK_RIGHT = 275, - RETROK_LEFT = 276, - RETROK_INSERT = 277, - RETROK_HOME = 278, - RETROK_END = 279, - RETROK_PAGEUP = 280, - RETROK_PAGEDOWN = 281, - - RETROK_F1 = 282, - RETROK_F2 = 283, - RETROK_F3 = 284, - RETROK_F4 = 285, - RETROK_F5 = 286, - RETROK_F6 = 287, - RETROK_F7 = 288, - RETROK_F8 = 289, - RETROK_F9 = 290, - RETROK_F10 = 291, - RETROK_F11 = 292, - RETROK_F12 = 293, - RETROK_F13 = 294, - RETROK_F14 = 295, - RETROK_F15 = 296, - - RETROK_NUMLOCK = 300, - RETROK_CAPSLOCK = 301, - RETROK_SCROLLOCK = 302, - RETROK_RSHIFT = 303, - RETROK_LSHIFT = 304, - RETROK_RCTRL = 305, - RETROK_LCTRL = 306, - RETROK_RALT = 307, - RETROK_LALT = 308, - RETROK_RMETA = 309, - RETROK_LMETA = 310, - RETROK_LSUPER = 311, - RETROK_RSUPER = 312, - RETROK_MODE = 313, - RETROK_COMPOSE = 314, - - RETROK_HELP = 315, - RETROK_PRINT = 316, - RETROK_SYSREQ = 317, - RETROK_BREAK = 318, - RETROK_MENU = 319, - RETROK_POWER = 320, - RETROK_EURO = 321, - RETROK_UNDO = 322, - - RETROK_LAST, - - RETROK_DUMMY = INT_MAX /* Ensure sizeof(enum) == sizeof(int) */ -}; - -enum retro_mod -{ - RETROKMOD_NONE = 0x0000, - - RETROKMOD_SHIFT = 0x01, - RETROKMOD_CTRL = 0x02, - RETROKMOD_ALT = 0x04, - RETROKMOD_META = 0x08, - - RETROKMOD_NUMLOCK = 0x10, - RETROKMOD_CAPSLOCK = 0x20, - RETROKMOD_SCROLLOCK = 0x40, - - RETROKMOD_DUMMY = INT_MAX /* Ensure sizeof(enum) == sizeof(int) */ -}; - -/* If set, this call is not part of the public libretro API yet. It can - * change or be removed at any time. */ -#define RETRO_ENVIRONMENT_EXPERIMENTAL 0x10000 -/* Environment callback to be used internally in frontend. */ -#define RETRO_ENVIRONMENT_PRIVATE 0x20000 - -/* Environment commands. */ -#define RETRO_ENVIRONMENT_SET_ROTATION 1 /* const unsigned * -- - * Sets screen rotation of graphics. - * Is only implemented if rotation can be accelerated by hardware. - * Valid values are 0, 1, 2, 3, which rotates screen by 0, 90, 180, - * 270 degrees counter-clockwise respectively. - */ -#define RETRO_ENVIRONMENT_GET_OVERSCAN 2 /* bool * -- - * Boolean value whether or not the implementation should use overscan, - * or crop away overscan. - */ -#define RETRO_ENVIRONMENT_GET_CAN_DUPE 3 /* bool * -- - * Boolean value whether or not frontend supports frame duping, - * passing NULL to video frame callback. - */ - - /* Environ 4, 5 are no longer supported (GET_VARIABLE / SET_VARIABLES), - * and reserved to avoid possible ABI clash. - */ - -#define RETRO_ENVIRONMENT_SET_MESSAGE 6 /* const struct retro_message * -- - * Sets a message to be displayed in implementation-specific manner - * for a certain amount of 'frames'. - * Should not be used for trivial messages, which should simply be - * logged via RETRO_ENVIRONMENT_GET_LOG_INTERFACE (or as a - * fallback, stderr). - */ -#define RETRO_ENVIRONMENT_SHUTDOWN 7 /* N/A (NULL) -- - * Requests the frontend to shutdown. - * Should only be used if game has a specific - * way to shutdown the game from a menu item or similar. - */ -#define RETRO_ENVIRONMENT_SET_PERFORMANCE_LEVEL 8 - /* const unsigned * -- - * Gives a hint to the frontend how demanding this implementation - * is on a system. E.g. reporting a level of 2 means - * this implementation should run decently on all frontends - * of level 2 and up. - * - * It can be used by the frontend to potentially warn - * about too demanding implementations. - * - * The levels are "floating". - * - * This function can be called on a per-game basis, - * as certain games an implementation can play might be - * particularly demanding. - * If called, it should be called in retro_load_game(). - */ -#define RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY 9 - /* const char ** -- - * Returns the "system" directory of the frontend. - * This directory can be used to store system specific - * content such as BIOSes, configuration data, etc. - * The returned value can be NULL. - * If so, no such directory is defined, - * and it's up to the implementation to find a suitable directory. - * - * NOTE: Some cores used this folder also for "save" data such as - * memory cards, etc, for lack of a better place to put it. - * This is now discouraged, and if possible, cores should try to - * use the new GET_SAVE_DIRECTORY. - */ -#define RETRO_ENVIRONMENT_SET_PIXEL_FORMAT 10 - /* const enum retro_pixel_format * -- - * Sets the internal pixel format used by the implementation. - * The default pixel format is RETRO_PIXEL_FORMAT_0RGB1555. - * This pixel format however, is deprecated (see enum retro_pixel_format). - * If the call returns false, the frontend does not support this pixel - * format. - * - * This function should be called inside retro_load_game() or - * retro_get_system_av_info(). - */ -#define RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS 11 - /* const struct retro_input_descriptor * -- - * Sets an array of retro_input_descriptors. - * It is up to the frontend to present this in a usable way. - * The array is terminated by retro_input_descriptor::description - * being set to NULL. - * This function can be called at any time, but it is recommended - * to call it as early as possible. - */ -#define RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK 12 - /* const struct retro_keyboard_callback * -- - * Sets a callback function used to notify core about keyboard events. - */ -#define RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE 13 - /* const struct retro_disk_control_callback * -- - * Sets an interface which frontend can use to eject and insert - * disk images. - * This is used for games which consist of multiple images and - * must be manually swapped out by the user (e.g. PSX). - */ -#define RETRO_ENVIRONMENT_SET_HW_RENDER 14 - /* struct retro_hw_render_callback * -- - * Sets an interface to let a libretro core render with - * hardware acceleration. - * Should be called in retro_load_game(). - * If successful, libretro cores will be able to render to a - * frontend-provided framebuffer. - * The size of this framebuffer will be at least as large as - * max_width/max_height provided in get_av_info(). - * If HW rendering is used, pass only RETRO_HW_FRAME_BUFFER_VALID or - * NULL to retro_video_refresh_t. - */ -#define RETRO_ENVIRONMENT_GET_VARIABLE 15 - /* struct retro_variable * -- - * Interface to acquire user-defined information from environment - * that cannot feasibly be supported in a multi-system way. - * 'key' should be set to a key which has already been set by - * SET_VARIABLES. - * 'data' will be set to a value or NULL. - */ -#define RETRO_ENVIRONMENT_SET_VARIABLES 16 - /* const struct retro_variable * -- - * Allows an implementation to signal the environment - * which variables it might want to check for later using - * GET_VARIABLE. - * This allows the frontend to present these variables to - * a user dynamically. - * This should be called as early as possible (ideally in - * retro_set_environment). - * - * 'data' points to an array of retro_variable structs - * terminated by a { NULL, NULL } element. - * retro_variable::key should be namespaced to not collide - * with other implementations' keys. E.g. A core called - * 'foo' should use keys named as 'foo_option'. - * retro_variable::value should contain a human readable - * description of the key as well as a '|' delimited list - * of expected values. - * - * The number of possible options should be very limited, - * i.e. it should be feasible to cycle through options - * without a keyboard. - * - * First entry should be treated as a default. - * - * Example entry: - * { "foo_option", "Speed hack coprocessor X; false|true" } - * - * Text before first ';' is description. This ';' must be - * followed by a space, and followed by a list of possible - * values split up with '|'. - * - * Only strings are operated on. The possible values will - * generally be displayed and stored as-is by the frontend. - */ -#define RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE 17 - /* bool * -- - * Result is set to true if some variables are updated by - * frontend since last call to RETRO_ENVIRONMENT_GET_VARIABLE. - * Variables should be queried with GET_VARIABLE. - */ -#define RETRO_ENVIRONMENT_SET_SUPPORT_NO_GAME 18 - /* const bool * -- - * If true, the libretro implementation supports calls to - * retro_load_game() with NULL as argument. - * Used by cores which can run without particular game data. - * This should be called within retro_set_environment() only. - */ -#define RETRO_ENVIRONMENT_GET_LIBRETRO_PATH 19 - /* const char ** -- - * Retrieves the absolute path from where this libretro - * implementation was loaded. - * NULL is returned if the libretro was loaded statically - * (i.e. linked statically to frontend), or if the path cannot be - * determined. - * Mostly useful in cooperation with SET_SUPPORT_NO_GAME as assets can - * be loaded without ugly hacks. - */ - - /* Environment 20 was an obsolete version of SET_AUDIO_CALLBACK. - * It was not used by any known core at the time, - * and was removed from the API. */ -#define RETRO_ENVIRONMENT_SET_AUDIO_CALLBACK 22 - /* const struct retro_audio_callback * -- - * Sets an interface which is used to notify a libretro core about audio - * being available for writing. - * The callback can be called from any thread, so a core using this must - * have a thread safe audio implementation. - * It is intended for games where audio and video are completely - * asynchronous and audio can be generated on the fly. - * This interface is not recommended for use with emulators which have - * highly synchronous audio. - * - * The callback only notifies about writability; the libretro core still - * has to call the normal audio callbacks - * to write audio. The audio callbacks must be called from within the - * notification callback. - * The amount of audio data to write is up to the implementation. - * Generally, the audio callback will be called continously in a loop. - * - * Due to thread safety guarantees and lack of sync between audio and - * video, a frontend can selectively disallow this interface based on - * internal configuration. A core using this interface must also - * implement the "normal" audio interface. - * - * A libretro core using SET_AUDIO_CALLBACK should also make use of - * SET_FRAME_TIME_CALLBACK. - */ -#define RETRO_ENVIRONMENT_SET_FRAME_TIME_CALLBACK 21 - /* const struct retro_frame_time_callback * -- - * Lets the core know how much time has passed since last - * invocation of retro_run(). - * The frontend can tamper with the timing to fake fast-forward, - * slow-motion, frame stepping, etc. - * In this case the delta time will use the reference value - * in frame_time_callback.. - */ -#define RETRO_ENVIRONMENT_GET_RUMBLE_INTERFACE 23 - /* struct retro_rumble_interface * -- - * Gets an interface which is used by a libretro core to set - * state of rumble motors in controllers. - * A strong and weak motor is supported, and they can be - * controlled indepedently. - */ -#define RETRO_ENVIRONMENT_GET_INPUT_DEVICE_CAPABILITIES 24 - /* uint64_t * -- - * Gets a bitmask telling which device type are expected to be - * handled properly in a call to retro_input_state_t. - * Devices which are not handled or recognized always return - * 0 in retro_input_state_t. - * Example bitmask: caps = (1 << RETRO_DEVICE_JOYPAD) | (1 << RETRO_DEVICE_ANALOG). - * Should only be called in retro_run(). - */ -#define RETRO_ENVIRONMENT_GET_SENSOR_INTERFACE (25 | RETRO_ENVIRONMENT_EXPERIMENTAL) - /* struct retro_sensor_interface * -- - * Gets access to the sensor interface. - * The purpose of this interface is to allow - * setting state related to sensors such as polling rate, - * enabling/disable it entirely, etc. - * Reading sensor state is done via the normal - * input_state_callback API. - */ -#define RETRO_ENVIRONMENT_GET_CAMERA_INTERFACE (26 | RETRO_ENVIRONMENT_EXPERIMENTAL) - /* struct retro_camera_callback * -- - * Gets an interface to a video camera driver. - * A libretro core can use this interface to get access to a - * video camera. - * New video frames are delivered in a callback in same - * thread as retro_run(). - * - * GET_CAMERA_INTERFACE should be called in retro_load_game(). - * - * Depending on the camera implementation used, camera frames - * will be delivered as a raw framebuffer, - * or as an OpenGL texture directly. - * - * The core has to tell the frontend here which types of - * buffers can be handled properly. - * An OpenGL texture can only be handled when using a - * libretro GL core (SET_HW_RENDER). - * It is recommended to use a libretro GL core when - * using camera interface. - * - * The camera is not started automatically. The retrieved start/stop - * functions must be used to explicitly - * start and stop the camera driver. - */ -#define RETRO_ENVIRONMENT_GET_LOG_INTERFACE 27 - /* struct retro_log_callback * -- - * Gets an interface for logging. This is useful for - * logging in a cross-platform way - * as certain platforms cannot use use stderr for logging. - * It also allows the frontend to - * show logging information in a more suitable way. - * If this interface is not used, libretro cores should - * log to stderr as desired. - */ -#define RETRO_ENVIRONMENT_GET_PERF_INTERFACE 28 - /* struct retro_perf_callback * -- - * Gets an interface for performance counters. This is useful - * for performance logging in a cross-platform way and for detecting - * architecture-specific features, such as SIMD support. - */ -#define RETRO_ENVIRONMENT_GET_LOCATION_INTERFACE 29 - /* struct retro_location_callback * -- - * Gets access to the location interface. - * The purpose of this interface is to be able to retrieve - * location-based information from the host device, - * such as current latitude / longitude. - */ -#define RETRO_ENVIRONMENT_GET_CONTENT_DIRECTORY 30 - /* const char ** -- - * Returns the "content" directory of the frontend. - * This directory can be used to store specific assets that the - * core relies upon, such as art assets, - * input data, etc etc. - * The returned value can be NULL. - * If so, no such directory is defined, - * and it's up to the implementation to find a suitable directory. - */ -#define RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY 31 - /* const char ** -- - * Returns the "save" directory of the frontend. - * This directory can be used to store SRAM, memory cards, - * high scores, etc, if the libretro core - * cannot use the regular memory interface (retro_get_memory_data()). - * - * NOTE: libretro cores used to check GET_SYSTEM_DIRECTORY for - * similar things before. - * They should still check GET_SYSTEM_DIRECTORY if they want to - * be backwards compatible. - * The path here can be NULL. It should only be non-NULL if the - * frontend user has set a specific save path. - */ -#define RETRO_ENVIRONMENT_SET_SYSTEM_AV_INFO 32 - /* const struct retro_system_av_info * -- - * Sets a new av_info structure. This can only be called from - * within retro_run(). - * This should *only* be used if the core is completely altering the - * internal resolutions, aspect ratios, timings, sampling rate, etc. - * Calling this can require a full reinitialization of video/audio - * drivers in the frontend, - * - * so it is important to call it very sparingly, and usually only with - * the users explicit consent. - * An eventual driver reinitialize will happen so that video and - * audio callbacks - * happening after this call within the same retro_run() call will - * target the newly initialized driver. - * - * This callback makes it possible to support configurable resolutions - * in games, which can be useful to - * avoid setting the "worst case" in max_width/max_height. - * - * ***HIGHLY RECOMMENDED*** Do not call this callback every time - * resolution changes in an emulator core if it's - * expected to be a temporary change, for the reasons of possible - * driver reinitialization. - * This call is not a free pass for not trying to provide - * correct values in retro_get_system_av_info(). If you need to change - * things like aspect ratio or nominal width/height, - * use RETRO_ENVIRONMENT_SET_GEOMETRY, which is a softer variant - * of SET_SYSTEM_AV_INFO. - * - * If this returns false, the frontend does not acknowledge a - * changed av_info struct. - */ -#define RETRO_ENVIRONMENT_SET_PROC_ADDRESS_CALLBACK 33 - /* const struct retro_get_proc_address_interface * -- - * Allows a libretro core to announce support for the - * get_proc_address() interface. - * This interface allows for a standard way to extend libretro where - * use of environment calls are too indirect, - * e.g. for cases where the frontend wants to call directly into the core. - * - * If a core wants to expose this interface, SET_PROC_ADDRESS_CALLBACK - * **MUST** be called from within retro_set_environment(). - */ -#define RETRO_ENVIRONMENT_SET_SUBSYSTEM_INFO 34 - /* const struct retro_subsystem_info * -- - * This environment call introduces the concept of libretro "subsystems". - * A subsystem is a variant of a libretro core which supports - * different kinds of games. - * The purpose of this is to support e.g. emulators which might - * have special needs, e.g. Super Nintendo's Super GameBoy, Sufami Turbo. - * It can also be used to pick among subsystems in an explicit way - * if the libretro implementation is a multi-system emulator itself. - * - * Loading a game via a subsystem is done with retro_load_game_special(), - * and this environment call allows a libretro core to expose which - * subsystems are supported for use with retro_load_game_special(). - * A core passes an array of retro_game_special_info which is terminated - * with a zeroed out retro_game_special_info struct. - * - * If a core wants to use this functionality, SET_SUBSYSTEM_INFO - * **MUST** be called from within retro_set_environment(). - */ -#define RETRO_ENVIRONMENT_SET_CONTROLLER_INFO 35 - /* const struct retro_controller_info * -- - * This environment call lets a libretro core tell the frontend - * which controller types are recognized in calls to - * retro_set_controller_port_device(). - * - * Some emulators such as Super Nintendo - * support multiple lightgun types which must be specifically - * selected from. - * It is therefore sometimes necessary for a frontend to be able - * to tell the core about a special kind of input device which is - * not covered by the libretro input API. - * - * In order for a frontend to understand the workings of an input device, - * it must be a specialized type - * of the generic device types already defined in the libretro API. - * - * Which devices are supported can vary per input port. - * The core must pass an array of const struct retro_controller_info which - * is terminated with a blanked out struct. Each element of the struct - * corresponds to an ascending port index to - * retro_set_controller_port_device(). - * Even if special device types are set in the libretro core, - * libretro should only poll input based on the base input device types. - */ -#define RETRO_ENVIRONMENT_SET_MEMORY_MAPS (36 | RETRO_ENVIRONMENT_EXPERIMENTAL) - /* const struct retro_memory_map * -- - * This environment call lets a libretro core tell the frontend - * about the memory maps this core emulates. - * This can be used to implement, for example, cheats in a core-agnostic way. - * - * Should only be used by emulators; it doesn't make much sense for - * anything else. - * It is recommended to expose all relevant pointers through - * retro_get_memory_* as well. - * - * Can be called from retro_init and retro_load_game. - */ -#define RETRO_ENVIRONMENT_SET_GEOMETRY 37 - /* const struct retro_game_geometry * -- - * This environment call is similar to SET_SYSTEM_AV_INFO for changing - * video parameters, but provides a guarantee that drivers will not be - * reinitialized. - * This can only be called from within retro_run(). - * - * The purpose of this call is to allow a core to alter nominal - * width/heights as well as aspect ratios on-the-fly, which can be - * useful for some emulators to change in run-time. - * - * max_width/max_height arguments are ignored and cannot be changed - * with this call as this could potentially require a reinitialization or a - * non-constant time operation. - * If max_width/max_height are to be changed, SET_SYSTEM_AV_INFO is required. - * - * A frontend must guarantee that this environment call completes in - * constant time. - */ -#define RETRO_ENVIRONMENT_GET_USERNAME 38 - /* const char ** - * Returns the specified username of the frontend, if specified by the user. - * This username can be used as a nickname for a core that has online facilities - * or any other mode where personalization of the user is desirable. - * The returned value can be NULL. - * If this environ callback is used by a core that requires a valid username, - * a default username should be specified by the core. - */ -#define RETRO_ENVIRONMENT_GET_LANGUAGE 39 - /* unsigned * -- - * Returns the specified language of the frontend, if specified by the user. - * It can be used by the core for localization purposes. - */ - -#define RETRO_MEMDESC_CONST (1 << 0) /* The frontend will never change this memory area once retro_load_game has returned. */ -#define RETRO_MEMDESC_BIGENDIAN (1 << 1) /* The memory area contains big endian data. Default is little endian. */ -#define RETRO_MEMDESC_ALIGN_2 (1 << 16) /* All memory access in this area is aligned to their own size, or 2, whichever is smaller. */ -#define RETRO_MEMDESC_ALIGN_4 (2 << 16) -#define RETRO_MEMDESC_ALIGN_8 (3 << 16) -#define RETRO_MEMDESC_MINSIZE_2 (1 << 24) /* All memory in this region is accessed at least 2 bytes at the time. */ -#define RETRO_MEMDESC_MINSIZE_4 (2 << 24) -#define RETRO_MEMDESC_MINSIZE_8 (3 << 24) -struct retro_memory_descriptor -{ - uint64_t flags; - - /* Pointer to the start of the relevant ROM or RAM chip. - * It's strongly recommended to use 'offset' if possible, rather than - * doing math on the pointer. - * - * If the same byte is mapped my multiple descriptors, their descriptors - * must have the same pointer. - * If 'start' does not point to the first byte in the pointer, put the - * difference in 'offset' instead. - * - * May be NULL if there's nothing usable here (e.g. hardware registers and - * open bus). No flags should be set if the pointer is NULL. - * It's recommended to minimize the number of descriptors if possible, - * but not mandatory. */ - void *ptr; - size_t offset; - - /* This is the location in the emulated address space - * where the mapping starts. */ - size_t start; - - /* Which bits must be same as in 'start' for this mapping to apply. - * The first memory descriptor to claim a certain byte is the one - * that applies. - * A bit which is set in 'start' must also be set in this. - * Can be zero, in which case each byte is assumed mapped exactly once. - * In this case, 'len' must be a power of two. */ - size_t select; - - /* If this is nonzero, the set bits are assumed not connected to the - * memory chip's address pins. */ - size_t disconnect; - - /* This one tells the size of the current memory area. - * If, after start+disconnect are applied, the address is higher than - * this, the highest bit of the address is cleared. - * - * If the address is still too high, the next highest bit is cleared. - * Can be zero, in which case it's assumed to be infinite (as limited - * by 'select' and 'disconnect'). */ - size_t len; - - /* To go from emulated address to physical address, the following - * order applies: - * Subtract 'start', pick off 'disconnect', apply 'len', add 'offset'. - * - * The address space name must consist of only a-zA-Z0-9_-, - * should be as short as feasible (maximum length is 8 plus the NUL), - * and may not be any other address space plus one or more 0-9A-F - * at the end. - * However, multiple memory descriptors for the same address space is - * allowed, and the address space name can be empty. NULL is treated - * as empty. - * - * Address space names are case sensitive, but avoid lowercase if possible. - * The same pointer may exist in multiple address spaces. - * - * Examples: - * blank+blank - valid (multiple things may be mapped in the same namespace) - * 'Sp'+'Sp' - valid (multiple things may be mapped in the same namespace) - * 'A'+'B' - valid (neither is a prefix of each other) - * 'S'+blank - valid ('S' is not in 0-9A-F) - * 'a'+blank - valid ('a' is not in 0-9A-F) - * 'a'+'A' - valid (neither is a prefix of each other) - * 'AR'+blank - valid ('R' is not in 0-9A-F) - * 'ARB'+blank - valid (the B can't be part of the address either, because - * there is no namespace 'AR') - * blank+'B' - not valid, because it's ambigous which address space B1234 - * would refer to. - * The length can't be used for that purpose; the frontend may want - * to append arbitrary data to an address, without a separator. */ - const char *addrspace; -}; - -/* The frontend may use the largest value of 'start'+'select' in a - * certain namespace to infer the size of the address space. - * - * If the address space is larger than that, a mapping with .ptr=NULL - * should be at the end of the array, with .select set to all ones for - * as long as the address space is big. - * - * Sample descriptors (minus .ptr, and RETRO_MEMFLAG_ on the flags): - * SNES WRAM: - * .start=0x7E0000, .len=0x20000 - * (Note that this must be mapped before the ROM in most cases; some of the - * ROM mappers - * try to claim $7E0000, or at least $7E8000.) - * SNES SPC700 RAM: - * .addrspace="S", .len=0x10000 - * SNES WRAM mirrors: - * .flags=MIRROR, .start=0x000000, .select=0xC0E000, .len=0x2000 - * .flags=MIRROR, .start=0x800000, .select=0xC0E000, .len=0x2000 - * SNES WRAM mirrors, alternate equivalent descriptor: - * .flags=MIRROR, .select=0x40E000, .disconnect=~0x1FFF - * (Various similar constructions can be created by combining parts of - * the above two.) - * SNES LoROM (512KB, mirrored a couple of times): - * .flags=CONST, .start=0x008000, .select=0x408000, .disconnect=0x8000, .len=512*1024 - * .flags=CONST, .start=0x400000, .select=0x400000, .disconnect=0x8000, .len=512*1024 - * SNES HiROM (4MB): - * .flags=CONST, .start=0x400000, .select=0x400000, .len=4*1024*1024 - * .flags=CONST, .offset=0x8000, .start=0x008000, .select=0x408000, .len=4*1024*1024 - * SNES ExHiROM (8MB): - * .flags=CONST, .offset=0, .start=0xC00000, .select=0xC00000, .len=4*1024*1024 - * .flags=CONST, .offset=4*1024*1024, .start=0x400000, .select=0xC00000, .len=4*1024*1024 - * .flags=CONST, .offset=0x8000, .start=0x808000, .select=0xC08000, .len=4*1024*1024 - * .flags=CONST, .offset=4*1024*1024+0x8000, .start=0x008000, .select=0xC08000, .len=4*1024*1024 - * Clarify the size of the address space: - * .ptr=NULL, .select=0xFFFFFF - * .len can be implied by .select in many of them, but was included for clarity. - */ - -struct retro_memory_map -{ - const struct retro_memory_descriptor *descriptors; - unsigned num_descriptors; -}; - -struct retro_controller_description -{ - /* Human-readable description of the controller. Even if using a generic - * input device type, this can be set to the particular device type the - * core uses. */ - const char *desc; - - /* Device type passed to retro_set_controller_port_device(). If the device - * type is a sub-class of a generic input device type, use the - * RETRO_DEVICE_SUBCLASS macro to create an ID. - * - * E.g. RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_JOYPAD, 1). */ - unsigned id; -}; - -struct retro_controller_info -{ - const struct retro_controller_description *types; - unsigned num_types; -}; - -struct retro_subsystem_memory_info -{ - /* The extension associated with a memory type, e.g. "psram". */ - const char *extension; - - /* The memory type for retro_get_memory(). This should be at - * least 0x100 to avoid conflict with standardized - * libretro memory types. */ - unsigned type; -}; - -struct retro_subsystem_rom_info -{ - /* Describes what the content is (SGB BIOS, GB ROM, etc). */ - const char *desc; - - /* Same definition as retro_get_system_info(). */ - const char *valid_extensions; - - /* Same definition as retro_get_system_info(). */ - bool need_fullpath; - - /* Same definition as retro_get_system_info(). */ - bool block_extract; - - /* This is set if the content is required to load a game. - * If this is set to false, a zeroed-out retro_game_info can be passed. */ - bool required; - - /* Content can have multiple associated persistent - * memory types (retro_get_memory()). */ - const struct retro_subsystem_memory_info *memory; - unsigned num_memory; -}; - -struct retro_subsystem_info -{ - /* Human-readable string of the subsystem type, e.g. "Super GameBoy" */ - const char *desc; - - /* A computer friendly short string identifier for the subsystem type. - * This name must be [a-z]. - * E.g. if desc is "Super GameBoy", this can be "sgb". - * This identifier can be used for command-line interfaces, etc. - */ - const char *ident; - - /* Infos for each content file. The first entry is assumed to be the - * "most significant" content for frontend purposes. - * E.g. with Super GameBoy, the first content should be the GameBoy ROM, - * as it is the most "significant" content to a user. - * If a frontend creates new file paths based on the content used - * (e.g. savestates), it should use the path for the first ROM to do so. */ - const struct retro_subsystem_rom_info *roms; - - /* Number of content files associated with a subsystem. */ - unsigned num_roms; - - /* The type passed to retro_load_game_special(). */ - unsigned id; -}; - -typedef void (*retro_proc_address_t)(void); - -/* libretro API extension functions: - * (None here so far). - * - * Get a symbol from a libretro core. - * Cores should only return symbols which are actual - * extensions to the libretro API. - * - * Frontends should not use this to obtain symbols to standard - * libretro entry points (static linking or dlsym). - * - * The symbol name must be equal to the function name, - * e.g. if void retro_foo(void); exists, the symbol must be called "retro_foo". - * The returned function pointer must be cast to the corresponding type. - */ -typedef retro_proc_address_t (*retro_get_proc_address_t)(const char *sym); - -struct retro_get_proc_address_interface -{ - retro_get_proc_address_t get_proc_address; -}; - -enum retro_log_level -{ - RETRO_LOG_DEBUG = 0, - RETRO_LOG_INFO, - RETRO_LOG_WARN, - RETRO_LOG_ERROR, - - RETRO_LOG_DUMMY = INT_MAX -}; - -/* Logging function. Takes log level argument as well. */ -typedef void (*retro_log_printf_t)(enum retro_log_level level, - const char *fmt, ...); - -struct retro_log_callback -{ - retro_log_printf_t log; -}; - -/* Performance related functions */ - -/* ID values for SIMD CPU features */ -#define RETRO_SIMD_SSE (1 << 0) -#define RETRO_SIMD_SSE2 (1 << 1) -#define RETRO_SIMD_VMX (1 << 2) -#define RETRO_SIMD_VMX128 (1 << 3) -#define RETRO_SIMD_AVX (1 << 4) -#define RETRO_SIMD_NEON (1 << 5) -#define RETRO_SIMD_SSE3 (1 << 6) -#define RETRO_SIMD_SSSE3 (1 << 7) -#define RETRO_SIMD_MMX (1 << 8) -#define RETRO_SIMD_MMXEXT (1 << 9) -#define RETRO_SIMD_SSE4 (1 << 10) -#define RETRO_SIMD_SSE42 (1 << 11) -#define RETRO_SIMD_AVX2 (1 << 12) -#define RETRO_SIMD_VFPU (1 << 13) -#define RETRO_SIMD_PS (1 << 14) -#define RETRO_SIMD_AES (1 << 15) - -typedef uint64_t retro_perf_tick_t; -typedef int64_t retro_time_t; - -struct retro_perf_counter -{ - const char *ident; - retro_perf_tick_t start; - retro_perf_tick_t total; - retro_perf_tick_t call_cnt; - - bool registered; -}; - -/* Returns current time in microseconds. - * Tries to use the most accurate timer available. - */ -typedef retro_time_t (*retro_perf_get_time_usec_t)(void); - -/* A simple counter. Usually nanoseconds, but can also be CPU cycles. - * Can be used directly if desired (when creating a more sophisticated - * performance counter system). - * */ -typedef retro_perf_tick_t (*retro_perf_get_counter_t)(void); - -/* Returns a bit-mask of detected CPU features (RETRO_SIMD_*). */ -typedef uint64_t (*retro_get_cpu_features_t)(void); - -/* Asks frontend to log and/or display the state of performance counters. - * Performance counters can always be poked into manually as well. - */ -typedef void (*retro_perf_log_t)(void); - -/* Register a performance counter. - * ident field must be set with a discrete value and other values in - * retro_perf_counter must be 0. - * Registering can be called multiple times. To avoid calling to - * frontend redundantly, you can check registered field first. */ -typedef void (*retro_perf_register_t)(struct retro_perf_counter *counter); - -/* Starts a registered counter. */ -typedef void (*retro_perf_start_t)(struct retro_perf_counter *counter); - -/* Stops a registered counter. */ -typedef void (*retro_perf_stop_t)(struct retro_perf_counter *counter); - -/* For convenience it can be useful to wrap register, start and stop in macros. - * E.g.: - * #ifdef LOG_PERFORMANCE - * #define RETRO_PERFORMANCE_INIT(perf_cb, name) static struct retro_perf_counter name = {#name}; if (!name.registered) perf_cb.perf_register(&(name)) - * #define RETRO_PERFORMANCE_START(perf_cb, name) perf_cb.perf_start(&(name)) - * #define RETRO_PERFORMANCE_STOP(perf_cb, name) perf_cb.perf_stop(&(name)) - * #else - * ... Blank macros ... - * #endif - * - * These can then be used mid-functions around code snippets. - * - * extern struct retro_perf_callback perf_cb; * Somewhere in the core. - * - * void do_some_heavy_work(void) - * { - * RETRO_PERFORMANCE_INIT(cb, work_1; - * RETRO_PERFORMANCE_START(cb, work_1); - * heavy_work_1(); - * RETRO_PERFORMANCE_STOP(cb, work_1); - * - * RETRO_PERFORMANCE_INIT(cb, work_2); - * RETRO_PERFORMANCE_START(cb, work_2); - * heavy_work_2(); - * RETRO_PERFORMANCE_STOP(cb, work_2); - * } - * - * void retro_deinit(void) - * { - * perf_cb.perf_log(); * Log all perf counters here for example. - * } - */ - -struct retro_perf_callback -{ - retro_perf_get_time_usec_t get_time_usec; - retro_get_cpu_features_t get_cpu_features; - - retro_perf_get_counter_t get_perf_counter; - retro_perf_register_t perf_register; - retro_perf_start_t perf_start; - retro_perf_stop_t perf_stop; - retro_perf_log_t perf_log; -}; - -/* FIXME: Document the sensor API and work out behavior. - * It will be marked as experimental until then. - */ -enum retro_sensor_action -{ - RETRO_SENSOR_ACCELEROMETER_ENABLE = 0, - RETRO_SENSOR_ACCELEROMETER_DISABLE, - - RETRO_SENSOR_DUMMY = INT_MAX -}; - -/* Id values for SENSOR types. */ -#define RETRO_SENSOR_ACCELEROMETER_X 0 -#define RETRO_SENSOR_ACCELEROMETER_Y 1 -#define RETRO_SENSOR_ACCELEROMETER_Z 2 - -typedef bool (*retro_set_sensor_state_t)(unsigned port, - enum retro_sensor_action action, unsigned rate); - -typedef float (*retro_sensor_get_input_t)(unsigned port, unsigned id); - -struct retro_sensor_interface -{ - retro_set_sensor_state_t set_sensor_state; - retro_sensor_get_input_t get_sensor_input; -}; - -enum retro_camera_buffer -{ - RETRO_CAMERA_BUFFER_OPENGL_TEXTURE = 0, - RETRO_CAMERA_BUFFER_RAW_FRAMEBUFFER, - - RETRO_CAMERA_BUFFER_DUMMY = INT_MAX -}; - -/* Starts the camera driver. Can only be called in retro_run(). */ -typedef bool (*retro_camera_start_t)(void); - -/* Stops the camera driver. Can only be called in retro_run(). */ -typedef void (*retro_camera_stop_t)(void); - -/* Callback which signals when the camera driver is initialized - * and/or deinitialized. - * retro_camera_start_t can be called in initialized callback. - */ -typedef void (*retro_camera_lifetime_status_t)(void); - -/* A callback for raw framebuffer data. buffer points to an XRGB8888 buffer. - * Width, height and pitch are similar to retro_video_refresh_t. - * First pixel is top-left origin. - */ -typedef void (*retro_camera_frame_raw_framebuffer_t)(const uint32_t *buffer, - unsigned width, unsigned height, size_t pitch); - -/* A callback for when OpenGL textures are used. - * - * texture_id is a texture owned by camera driver. - * Its state or content should be considered immutable, except for things like - * texture filtering and clamping. - * - * texture_target is the texture target for the GL texture. - * These can include e.g. GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE, and possibly - * more depending on extensions. - * - * affine points to a packed 3x3 column-major matrix used to apply an affine - * transform to texture coordinates. (affine_matrix * vec3(coord_x, coord_y, 1.0)) - * After transform, normalized texture coord (0, 0) should be bottom-left - * and (1, 1) should be top-right (or (width, height) for RECTANGLE). - * - * GL-specific typedefs are avoided here to avoid relying on gl.h in - * the API definition. - */ -typedef void (*retro_camera_frame_opengl_texture_t)(unsigned texture_id, - unsigned texture_target, const float *affine); - -struct retro_camera_callback -{ - /* Set by libretro core. - * Example bitmask: caps = (1 << RETRO_CAMERA_BUFFER_OPENGL_TEXTURE) | (1 << RETRO_CAMERA_BUFFER_RAW_FRAMEBUFFER). - */ - uint64_t caps; - - unsigned width; /* Desired resolution for camera. Is only used as a hint. */ - unsigned height; - retro_camera_start_t start; /* Set by frontend. */ - retro_camera_stop_t stop; /* Set by frontend. */ - - /* Set by libretro core if raw framebuffer callbacks will be used. */ - retro_camera_frame_raw_framebuffer_t frame_raw_framebuffer; - /* Set by libretro core if OpenGL texture callbacks will be used. */ - retro_camera_frame_opengl_texture_t frame_opengl_texture; - - /* Set by libretro core. Called after camera driver is initialized and - * ready to be started. - * Can be NULL, in which this callback is not called. - */ - retro_camera_lifetime_status_t initialized; - - /* Set by libretro core. Called right before camera driver is - * deinitialized. - * Can be NULL, in which this callback is not called. - */ - retro_camera_lifetime_status_t deinitialized; -}; - -/* Sets the interval of time and/or distance at which to update/poll - * location-based data. - * - * To ensure compatibility with all location-based implementations, - * values for both interval_ms and interval_distance should be provided. - * - * interval_ms is the interval expressed in milliseconds. - * interval_distance is the distance interval expressed in meters. - */ -typedef void (*retro_location_set_interval_t)(unsigned interval_ms, - unsigned interval_distance); - -/* Start location services. The device will start listening for changes to the - * current location at regular intervals (which are defined with - * retro_location_set_interval_t). */ -typedef bool (*retro_location_start_t)(void); - -/* Stop location services. The device will stop listening for changes - * to the current location. */ -typedef void (*retro_location_stop_t)(void); - -/* Get the position of the current location. Will set parameters to - * 0 if no new location update has happened since the last time. */ -typedef bool (*retro_location_get_position_t)(double *lat, double *lon, - double *horiz_accuracy, double *vert_accuracy); - -/* Callback which signals when the location driver is initialized - * and/or deinitialized. - * retro_location_start_t can be called in initialized callback. - */ -typedef void (*retro_location_lifetime_status_t)(void); - -struct retro_location_callback -{ - retro_location_start_t start; - retro_location_stop_t stop; - retro_location_get_position_t get_position; - retro_location_set_interval_t set_interval; - - retro_location_lifetime_status_t initialized; - retro_location_lifetime_status_t deinitialized; -}; - -enum retro_rumble_effect -{ - RETRO_RUMBLE_STRONG = 0, - RETRO_RUMBLE_WEAK = 1, - - RETRO_RUMBLE_DUMMY = INT_MAX -}; - -/* Sets rumble state for joypad plugged in port 'port'. - * Rumble effects are controlled independently, - * and setting e.g. strong rumble does not override weak rumble. - * Strength has a range of [0, 0xffff]. - * - * Returns true if rumble state request was honored. - * Calling this before first retro_run() is likely to return false. */ -typedef bool (*retro_set_rumble_state_t)(unsigned port, - enum retro_rumble_effect effect, uint16_t strength); - -struct retro_rumble_interface -{ - retro_set_rumble_state_t set_rumble_state; -}; - -/* Notifies libretro that audio data should be written. */ -typedef void (*retro_audio_callback_t)(void); - -/* True: Audio driver in frontend is active, and callback is - * expected to be called regularily. - * False: Audio driver in frontend is paused or inactive. - * Audio callback will not be called until set_state has been - * called with true. - * Initial state is false (inactive). - */ -typedef void (*retro_audio_set_state_callback_t)(bool enabled); - -struct retro_audio_callback -{ - retro_audio_callback_t callback; - retro_audio_set_state_callback_t set_state; -}; - -/* Notifies a libretro core of time spent since last invocation - * of retro_run() in microseconds. - * - * It will be called right before retro_run() every frame. - * The frontend can tamper with timing to support cases like - * fast-forward, slow-motion and framestepping. - * - * In those scenarios the reference frame time value will be used. */ -typedef int64_t retro_usec_t; -typedef void (*retro_frame_time_callback_t)(retro_usec_t usec); -struct retro_frame_time_callback -{ - retro_frame_time_callback_t callback; - /* Represents the time of one frame. It is computed as - * 1000000 / fps, but the implementation will resolve the - * rounding to ensure that framestepping, etc is exact. */ - retro_usec_t reference; -}; - -/* Pass this to retro_video_refresh_t if rendering to hardware. - * Passing NULL to retro_video_refresh_t is still a frame dupe as normal. - * */ -#define RETRO_HW_FRAME_BUFFER_VALID ((void*)-1) - -/* Invalidates the current HW context. - * Any GL state is lost, and must not be deinitialized explicitly. - * If explicit deinitialization is desired by the libretro core, - * it should implement context_destroy callback. - * If called, all GPU resources must be reinitialized. - * Usually called when frontend reinits video driver. - * Also called first time video driver is initialized, - * allowing libretro core to initialize resources. - */ -typedef void (*retro_hw_context_reset_t)(void); - -/* Gets current framebuffer which is to be rendered to. - * Could change every frame potentially. - */ -typedef uintptr_t (*retro_hw_get_current_framebuffer_t)(void); - -/* Get a symbol from HW context. */ -typedef retro_proc_address_t (*retro_hw_get_proc_address_t)(const char *sym); - -enum retro_hw_context_type -{ - RETRO_HW_CONTEXT_NONE = 0, - /* OpenGL 2.x. Driver can choose to use latest compatibility context. */ - RETRO_HW_CONTEXT_OPENGL = 1, - /* OpenGL ES 2.0. */ - RETRO_HW_CONTEXT_OPENGLES2 = 2, - /* Modern desktop core GL context. Use version_major/ - * version_minor fields to set GL version. */ - RETRO_HW_CONTEXT_OPENGL_CORE = 3, - /* OpenGL ES 3.0 */ - RETRO_HW_CONTEXT_OPENGLES3 = 4, - /* OpenGL ES 3.1+. Set version_major/version_minor. For GLES2 and GLES3, - * use the corresponding enums directly. */ - RETRO_HW_CONTEXT_OPENGLES_VERSION = 5, - - RETRO_HW_CONTEXT_DUMMY = INT_MAX -}; - -struct retro_hw_render_callback -{ - /* Which API to use. Set by libretro core. */ - enum retro_hw_context_type context_type; - - /* Called when a context has been created or when it has been reset. - * An OpenGL context is only valid after context_reset() has been called. - * - * When context_reset is called, OpenGL resources in the libretro - * implementation are guaranteed to be invalid. - * - * It is possible that context_reset is called multiple times during an - * application lifecycle. - * If context_reset is called without any notification (context_destroy), - * the OpenGL context was lost and resources should just be recreated - * without any attempt to "free" old resources. - */ - retro_hw_context_reset_t context_reset; - - /* Set by frontend. */ - retro_hw_get_current_framebuffer_t get_current_framebuffer; - - /* Set by frontend. */ - retro_hw_get_proc_address_t get_proc_address; - - /* Set if render buffers should have depth component attached. */ - bool depth; - - /* Set if stencil buffers should be attached. */ - bool stencil; - - /* If depth and stencil are true, a packed 24/8 buffer will be added. - * Only attaching stencil is invalid and will be ignored. */ - - /* Use conventional bottom-left origin convention. If false, - * standard libretro top-left origin semantics are used. */ - bool bottom_left_origin; - - /* Major version number for core GL context or GLES 3.1+. */ - unsigned version_major; - - /* Minor version number for core GL context or GLES 3.1+. */ - unsigned version_minor; - - /* If this is true, the frontend will go very far to avoid - * resetting context in scenarios like toggling fullscreen, etc. - */ - bool cache_context; - - /* The reset callback might still be called in extreme situations - * such as if the context is lost beyond recovery. - * - * For optimal stability, set this to false, and allow context to be - * reset at any time. - */ - - /* A callback to be called before the context is destroyed in a - * controlled way by the frontend. */ - retro_hw_context_reset_t context_destroy; - - /* OpenGL resources can be deinitialized cleanly at this step. - * context_destroy can be set to NULL, in which resources will - * just be destroyed without any notification. - * - * Even when context_destroy is non-NULL, it is possible that - * context_reset is called without any destroy notification. - * This happens if context is lost by external factors (such as - * notified by GL_ARB_robustness). - * - * In this case, the context is assumed to be already dead, - * and the libretro implementation must not try to free any OpenGL - * resources in the subsequent context_reset. - */ - - /* Creates a debug context. */ - bool debug_context; -}; - -/* Callback type passed in RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK. - * Called by the frontend in response to keyboard events. - * down is set if the key is being pressed, or false if it is being released. - * keycode is the RETROK value of the char. - * character is the text character of the pressed key. (UTF-32). - * key_modifiers is a set of RETROKMOD values or'ed together. - * - * The pressed/keycode state can be indepedent of the character. - * It is also possible that multiple characters are generated from a - * single keypress. - * Keycode events should be treated separately from character events. - * However, when possible, the frontend should try to synchronize these. - * If only a character is posted, keycode should be RETROK_UNKNOWN. - * - * Similarily if only a keycode event is generated with no corresponding - * character, character should be 0. - */ -typedef void (*retro_keyboard_event_t)(bool down, unsigned keycode, - uint32_t character, uint16_t key_modifiers); - -struct retro_keyboard_callback -{ - retro_keyboard_event_t callback; -}; - -/* Callbacks for RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE. - * Should be set for implementations which can swap out multiple disk - * images in runtime. - * - * If the implementation can do this automatically, it should strive to do so. - * However, there are cases where the user must manually do so. - * - * Overview: To swap a disk image, eject the disk image with - * set_eject_state(true). - * Set the disk index with set_image_index(index). Insert the disk again - * with set_eject_state(false). - */ - -/* If ejected is true, "ejects" the virtual disk tray. - * When ejected, the disk image index can be set. - */ -typedef bool (*retro_set_eject_state_t)(bool ejected); - -/* Gets current eject state. The initial state is 'not ejected'. */ -typedef bool (*retro_get_eject_state_t)(void); - -/* Gets current disk index. First disk is index 0. - * If return value is >= get_num_images(), no disk is currently inserted. - */ -typedef unsigned (*retro_get_image_index_t)(void); - -/* Sets image index. Can only be called when disk is ejected. - * The implementation supports setting "no disk" by using an - * index >= get_num_images(). - */ -typedef bool (*retro_set_image_index_t)(unsigned index); - -/* Gets total number of images which are available to use. */ -typedef unsigned (*retro_get_num_images_t)(void); - -struct retro_game_info; - -/* Replaces the disk image associated with index. - * Arguments to pass in info have same requirements as retro_load_game(). - * Virtual disk tray must be ejected when calling this. - * - * Replacing a disk image with info = NULL will remove the disk image - * from the internal list. - * As a result, calls to get_image_index() can change. - * - * E.g. replace_image_index(1, NULL), and previous get_image_index() - * returned 4 before. - * Index 1 will be removed, and the new index is 3. - */ -typedef bool (*retro_replace_image_index_t)(unsigned index, - const struct retro_game_info *info); - -/* Adds a new valid index (get_num_images()) to the internal disk list. - * This will increment subsequent return values from get_num_images() by 1. - * This image index cannot be used until a disk image has been set - * with replace_image_index. */ -typedef bool (*retro_add_image_index_t)(void); - -struct retro_disk_control_callback -{ - retro_set_eject_state_t set_eject_state; - retro_get_eject_state_t get_eject_state; - - retro_get_image_index_t get_image_index; - retro_set_image_index_t set_image_index; - retro_get_num_images_t get_num_images; - - retro_replace_image_index_t replace_image_index; - retro_add_image_index_t add_image_index; -}; - -enum retro_pixel_format -{ - /* 0RGB1555, native endian. - * 0 bit must be set to 0. - * This pixel format is default for compatibility concerns only. - * If a 15/16-bit pixel format is desired, consider using RGB565. */ - RETRO_PIXEL_FORMAT_0RGB1555 = 0, - - /* XRGB8888, native endian. - * X bits are ignored. */ - RETRO_PIXEL_FORMAT_XRGB8888 = 1, - - /* RGB565, native endian. - * This pixel format is the recommended format to use if a 15/16-bit - * format is desired as it is the pixel format that is typically - * available on a wide range of low-power devices. - * - * It is also natively supported in APIs like OpenGL ES. */ - RETRO_PIXEL_FORMAT_RGB565 = 2, - - /* Ensure sizeof() == sizeof(int). */ - RETRO_PIXEL_FORMAT_UNKNOWN = INT_MAX -}; - -struct retro_message -{ - const char *msg; /* Message to be displayed. */ - unsigned frames; /* Duration in frames of message. */ -}; - -/* Describes how the libretro implementation maps a libretro input bind - * to its internal input system through a human readable string. - * This string can be used to better let a user configure input. */ -struct retro_input_descriptor -{ - /* Associates given parameters with a description. */ - unsigned port; - unsigned device; - unsigned index; - unsigned id; - - /* Human readable description for parameters. - * The pointer must remain valid until - * retro_unload_game() is called. */ - const char *description; -}; - -struct retro_system_info -{ - /* All pointers are owned by libretro implementation, and pointers must - * remain valid until retro_deinit() is called. */ - - const char *library_name; /* Descriptive name of library. Should not - * contain any version numbers, etc. */ - const char *library_version; /* Descriptive version of core. */ - - const char *valid_extensions; /* A string listing probably content - * extensions the core will be able to - * load, separated with pipe. - * I.e. "bin|rom|iso". - * Typically used for a GUI to filter - * out extensions. */ - - /* If true, retro_load_game() is guaranteed to provide a valid pathname - * in retro_game_info::path. - * ::data and ::size are both invalid. - * - * If false, ::data and ::size are guaranteed to be valid, but ::path - * might not be valid. - * - * This is typically set to true for libretro implementations that must - * load from file. - * Implementations should strive for setting this to false, as it allows - * the frontend to perform patching, etc. */ - bool need_fullpath; - - /* If true, the frontend is not allowed to extract any archives before - * loading the real content. - * Necessary for certain libretro implementations that load games - * from zipped archives. */ - bool block_extract; -}; - -struct retro_game_geometry -{ - unsigned base_width; /* Nominal video width of game. */ - unsigned base_height; /* Nominal video height of game. */ - unsigned max_width; /* Maximum possible width of game. */ - unsigned max_height; /* Maximum possible height of game. */ - - float aspect_ratio; /* Nominal aspect ratio of game. If - * aspect_ratio is <= 0.0, an aspect ratio - * of base_width / base_height is assumed. - * A frontend could override this setting, - * if desired. */ -}; - -struct retro_system_timing -{ - double fps; /* FPS of video content. */ - double sample_rate; /* Sampling rate of audio. */ -}; - -struct retro_system_av_info -{ - struct retro_game_geometry geometry; - struct retro_system_timing timing; -}; - -struct retro_variable -{ - /* Variable to query in RETRO_ENVIRONMENT_GET_VARIABLE. - * If NULL, obtains the complete environment string if more - * complex parsing is necessary. - * The environment string is formatted as key-value pairs - * delimited by semicolons as so: - * "key1=value1;key2=value2;..." - */ - const char *key; - - /* Value to be obtained. If key does not exist, it is set to NULL. */ - const char *value; -}; - -struct retro_game_info -{ - const char *path; /* Path to game, UTF-8 encoded. - * Usually used as a reference. - * May be NULL if rom was loaded from stdin - * or similar. - * retro_system_info::need_fullpath guaranteed - * that this path is valid. */ - const void *data; /* Memory buffer of loaded game. Will be NULL - * if need_fullpath was set. */ - size_t size; /* Size of memory buffer. */ - const char *meta; /* String of implementation specific meta-data. */ -}; - -/* Callbacks */ - -/* Environment callback. Gives implementations a way of performing - * uncommon tasks. Extensible. */ -typedef bool (*retro_environment_t)(unsigned cmd, void *data); - -/* Render a frame. Pixel format is 15-bit 0RGB1555 native endian - * unless changed (see RETRO_ENVIRONMENT_SET_PIXEL_FORMAT). - * - * Width and height specify dimensions of buffer. - * Pitch specifices length in bytes between two lines in buffer. - * - * For performance reasons, it is highly recommended to have a frame - * that is packed in memory, i.e. pitch == width * byte_per_pixel. - * Certain graphic APIs, such as OpenGL ES, do not like textures - * that are not packed in memory. - */ -typedef void (*retro_video_refresh_t)(const void *data, unsigned width, - unsigned height, size_t pitch); - -/* Renders a single audio frame. Should only be used if implementation - * generates a single sample at a time. - * Format is signed 16-bit native endian. - */ -typedef void (*retro_audio_sample_t)(int16_t left, int16_t right); - -/* Renders multiple audio frames in one go. - * - * One frame is defined as a sample of left and right channels, interleaved. - * I.e. int16_t buf[4] = { l, r, l, r }; would be 2 frames. - * Only one of the audio callbacks must ever be used. - */ -typedef size_t (*retro_audio_sample_batch_t)(const int16_t *data, - size_t frames); - -/* Polls input. */ -typedef void (*retro_input_poll_t)(void); - -/* Queries for input for player 'port'. device will be masked with - * RETRO_DEVICE_MASK. - * - * Specialization of devices such as RETRO_DEVICE_JOYPAD_MULTITAP that - * have been set with retro_set_controller_port_device() - * will still use the higher level RETRO_DEVICE_JOYPAD to request input. - */ -typedef int16_t (*retro_input_state_t)(unsigned port, unsigned device, - unsigned index, unsigned id); - -/* Sets callbacks. retro_set_environment() is guaranteed to be called - * before retro_init(). - * - * The rest of the set_* functions are guaranteed to have been called - * before the first call to retro_run() is made. */ -void retro_set_environment(retro_environment_t); -void retro_set_video_refresh(retro_video_refresh_t); -void retro_set_audio_sample(retro_audio_sample_t); -void retro_set_audio_sample_batch(retro_audio_sample_batch_t); -void retro_set_input_poll(retro_input_poll_t); -void retro_set_input_state(retro_input_state_t); - -/* Library global initialization/deinitialization. */ -void retro_init(void); -void retro_deinit(void); - -/* Must return RETRO_API_VERSION. Used to validate ABI compatibility - * when the API is revised. */ -unsigned retro_api_version(void); - -/* Gets statically known system info. Pointers provided in *info - * must be statically allocated. - * Can be called at any time, even before retro_init(). */ -void retro_get_system_info(struct retro_system_info *info); - -/* Gets information about system audio/video timings and geometry. - * Can be called only after retro_load_game() has successfully completed. - * NOTE: The implementation of this function might not initialize every - * variable if needed. - * E.g. geom.aspect_ratio might not be initialized if core doesn't - * desire a particular aspect ratio. */ -void retro_get_system_av_info(struct retro_system_av_info *info); - -/* Sets device to be used for player 'port'. - * By default, RETRO_DEVICE_JOYPAD is assumed to be plugged into all - * available ports. - * Setting a particular device type is not a guarantee that libretro cores - * will only poll input based on that particular device type. It is only a - * hint to the libretro core when a core cannot automatically detect the - * appropriate input device type on its own. It is also relevant when a - * core can change its behavior depending on device type. */ -void retro_set_controller_port_device(unsigned port, unsigned device); - -/* Resets the current game. */ -void retro_reset(void); - -/* Runs the game for one video frame. - * During retro_run(), input_poll callback must be called at least once. - * - * If a frame is not rendered for reasons where a game "dropped" a frame, - * this still counts as a frame, and retro_run() should explicitly dupe - * a frame if GET_CAN_DUPE returns true. - * In this case, the video callback can take a NULL argument for data. - */ -void retro_run(void); - -/* Returns the amount of data the implementation requires to serialize - * internal state (save states). - * Between calls to retro_load_game() and retro_unload_game(), the - * returned size is never allowed to be larger than a previous returned - * value, to ensure that the frontend can allocate a save state buffer once. - */ -size_t retro_serialize_size(void); - -/* Serializes internal state. If failed, or size is lower than - * retro_serialize_size(), it should return false, true otherwise. */ -bool retro_serialize(void *data, size_t size); -bool retro_unserialize(const void *data, size_t size); - -void retro_cheat_reset(void); -void retro_cheat_set(unsigned index, bool enabled, const char *code); - -/* Loads a game. */ -bool retro_load_game(const struct retro_game_info *game); - -/* Loads a "special" kind of game. Should not be used, - * except in extreme cases. */ -bool retro_load_game_special( - unsigned game_type, - const struct retro_game_info *info, size_t num_info -); - -/* Unloads a currently loaded game. */ -void retro_unload_game(void); - -/* Gets region of game. */ -unsigned retro_get_region(void); - -/* Gets region of memory. */ -void *retro_get_memory_data(unsigned id); -size_t retro_get_memory_size(unsigned id); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h new file mode 100644 index 000000000..39bc32b6c --- /dev/null +++ b/frontend/libretro_core_options.h @@ -0,0 +1,1557 @@ +#ifndef LIBRETRO_CORE_OPTIONS_H__ +#define LIBRETRO_CORE_OPTIONS_H__ + +#include +#include + +#include +#include + +#ifndef HAVE_NO_LANGEXTRA +#include "libretro_core_options_intl.h" +#endif + +/* + ******************************** + * VERSION: 2.0 + ******************************** + * + * - 2.0: Add support for core options v2 interface + * - 1.3: Move translations to libretro_core_options_intl.h + * - libretro_core_options_intl.h includes BOM and utf-8 + * fix for MSVC 2010-2013 + * - Added HAVE_NO_LANGEXTRA flag to disable translations + * on platforms/compilers without BOM support + * - 1.2: Use core options v1 interface when + * RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION is >= 1 + * (previously required RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION == 1) + * - 1.1: Support generation of core options v0 retro_core_option_value + * arrays containing options with a single value + * - 1.0: First commit +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + ******************************** + * Core Option Definitions + ******************************** +*/ + +/* RETRO_LANGUAGE_ENGLISH */ + +/* Default language: + * - All other languages must include the same keys and values + * - Will be used as a fallback in the event that frontend language + * is not available + * - Will be used as a fallback for any missing entries in + * frontend language definition + */ + +struct retro_core_option_v2_category option_cats_us[] = { + { + "system", + "System", + "Configure base hardware parameters: region, BIOS selection, memory cards, etc." + }, + { + "video", + "Video", + "Configure base display parameters." + }, +#ifdef GPU_NEON + { + "gpu_neon", + "GPU Plugin", + "Configure low-level settings of the NEON GPU plugin." + }, +#endif +#ifdef GPU_PEOPS + { + "gpu_peops", + "GPU Plugin (Advanced)", + "Configure low-level settings of the P.E.Op.S. GPU plugin." + }, +#endif +#ifdef GPU_UNAI + { + "gpu_unai", + "GPU Plugin (Advanced)", + "Configure low-level settings of the UNAI GPU plugin." + }, +#endif + { + "audio", + "Audio", + "Configure sound emulation: reverb, interpolation, CD audio decoding." + }, + { + "input", + "Input", + "Configure input devices: analog response, haptic feedback, Multitaps, light guns, etc." + }, + { + "compat_hack", + "Compatibility Fixes", + "Configure settings/workarounds required for correct operation of specific games." + }, +#if !defined(DRC_DISABLE) && !defined(LIGHTREC) + { + "speed_hack", + "Speed Hacks (Advanced)", + "Configure hacks that may improve performance at the expense of decreased accuracy/stability." + }, +#endif + { NULL, NULL, NULL }, +}; + +struct retro_core_option_v2_definition option_defs_us[] = { + { + "pcsx_rearmed_region", + "Region", + NULL, + "Specify which region the system is from. 'NTSC' is 60 Hz while 'PAL' is 50 Hz. 'Auto' will detect the region of the currently loaded content. Games may run faster or slower than normal if the incorrect region is selected.", + NULL, + "system", + { + { "auto", "Auto" }, + { "NTSC", NULL }, + { "PAL", NULL }, + { NULL, NULL }, + }, + "auto", + }, + { + "pcsx_rearmed_bios", + "BIOS Selection", + NULL, + "Specify which BIOS to use. 'Auto' will attempt to load a real bios file from the frontend 'system' directory, falling back to high level emulation if unavailable. 'HLE' forces high level BIOS emulation. It is recommended to use an official bios file for better compatibility.", + NULL, + "system", + { + { "auto", "Auto" }, + { "HLE", NULL }, + { NULL, NULL }, + }, + "auto", + }, + { + "pcsx_rearmed_show_bios_bootlogo", + "Show BIOS Boot Logo", + NULL, + "When using an official BIOS file, specify whether to show the PlayStation logo upon starting or resetting content. Warning: Enabling the boot logo may reduce game compatibility.", + NULL, + "system", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_memcard2", + "Enable Second Memory Card (Shared)", + NULL, + "Emulate a second memory card in slot 2. This will be shared by all games.", + NULL, + "system", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, +#ifndef _WIN32 + { + "pcsx_rearmed_async_cd", + "CD Access Method (Restart)", + NULL, + "Select method used to read data from content disk images. 'Synchronous' mimics original hardware. 'Asynchronous' can reduce stuttering on devices with slow storage. 'Pre-Cache (CHD)' loads disk image into memory for faster access (CHD files only).", + NULL, + "system", + { + { "sync", "Synchronous" }, + { "async", "Asynchronous" }, + { "precache", "Pre-Cache (CHD)" }, + { NULL, NULL}, + }, + "sync", + }, +#endif +#ifndef DRC_DISABLE + { + "pcsx_rearmed_drc", + "Dynamic Recompiler", + NULL, + "Dynamically recompile PSX CPU instructions to native instructions. Much faster than using an interpreter, but may be less accurate on some platforms.", + NULL, + "system", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "enabled", + }, +#endif + { + "pcsx_rearmed_psxclock", + "PSX CPU Clock Speed", + NULL, + "Overclock or under-clock the PSX CPU. Try adjusting this if the game is too slow, too fast or hangs." +#if defined(LIGHTREC) + " Currently doesn't work with Lightrec dynarec." +#endif +#if defined(HAVE_PRE_ARMV7) && !defined(_3DS) + " Default is 50." +#else + " Default is 57." +#endif + , + NULL, + "system", + { + { "30", NULL }, + { "31", NULL }, + { "32", NULL }, + { "33", NULL }, + { "34", NULL }, + { "35", NULL }, + { "36", NULL }, + { "37", NULL }, + { "38", NULL }, + { "39", NULL }, + { "40", NULL }, + { "41", NULL }, + { "42", NULL }, + { "43", NULL }, + { "44", NULL }, + { "45", NULL }, + { "46", NULL }, + { "47", NULL }, + { "48", NULL }, + { "49", NULL }, + { "50", NULL }, + { "51", NULL }, + { "52", NULL }, + { "53", NULL }, + { "54", NULL }, + { "55", NULL }, + { "56", NULL }, + { "57", NULL }, + { "58", NULL }, + { "59", NULL }, + { "60", NULL }, + { "61", NULL }, + { "62", NULL }, + { "63", NULL }, + { "64", NULL }, + { "65", NULL }, + { "66", NULL }, + { "67", NULL }, + { "68", NULL }, + { "69", NULL }, + { "70", NULL }, + { "71", NULL }, + { "72", NULL }, + { "73", NULL }, + { "74", NULL }, + { "75", NULL }, + { "76", NULL }, + { "77", NULL }, + { "78", NULL }, + { "79", NULL }, + { "80", NULL }, + { "81", NULL }, + { "82", NULL }, + { "83", NULL }, + { "84", NULL }, + { "85", NULL }, + { "86", NULL }, + { "87", NULL }, + { "88", NULL }, + { "89", NULL }, + { "90", NULL }, + { "91", NULL }, + { "92", NULL }, + { "93", NULL }, + { "94", NULL }, + { "95", NULL }, + { "96", NULL }, + { "97", NULL }, + { "98", NULL }, + { "99", NULL }, + { "100", NULL }, + { NULL, NULL }, + }, +#if defined(HAVE_PRE_ARMV7) && !defined(_3DS) + "50", +#else + "57", +#endif + }, + { + "pcsx_rearmed_dithering", + "Dithering Pattern", + NULL, + "Enable emulation of the dithering technique used by the PSX to smooth out color banding artifacts. Increases performance requirements.", + NULL, + "video", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, +#if defined HAVE_LIBNX || defined _3DS + "disabled", +#else + "enabled", +#endif + }, + { + "pcsx_rearmed_duping_enable", + "Frame Duping (Speedup)", + NULL, + "When enabled and supported by the libretro frontend, provides a small performance increase by directing the frontend to repeat the previous frame if the core has nothing new to display.", + NULL, + "video", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "enabled", + }, +#ifdef THREAD_RENDERING + { + "pcsx_rearmed_gpu_thread_rendering", + "Threaded Rendering", + NULL, + "When enabled, runs GPU commands in a secondary thread. 'Synchronous' improves performance while maintaining proper frame pacing. 'Asynchronous' improves performance even further, but may cause dropped frames and increased latency. Produces best results with games that run natively at less than 60 frames per second.", + NULL, + "video", + { + { "disabled", NULL }, + { "sync", "Synchronous" }, + { "async", "Asynchronous" }, + { NULL, NULL}, + }, + "disabled", + }, +#endif + { + "pcsx_rearmed_frameskip_type", + "Frameskip", + NULL, + "Skip frames to avoid audio buffer under-run (crackling). Improves performance at the expense of visual smoothness. 'Auto' skips frames when advised by the frontend. 'Auto (Threshold)' utilises the 'Frameskip Threshold (%)' setting. 'Fixed Interval' utilises the 'Frameskip Interval' setting.", + NULL, + "video", + { + { "disabled", NULL }, + { "auto", "Auto" }, + { "auto_threshold", "Auto (Threshold)" }, + { "fixed_interval", "Fixed Interval" }, + { NULL, NULL }, + }, + "disabled" + }, + { + "pcsx_rearmed_frameskip_threshold", + "Frameskip Threshold (%)", + NULL, + "When 'Frameskip' is set to 'Auto (Threshold)', specifies the audio buffer occupancy threshold (percentage) below which frames will be skipped. Higher values reduce the risk of crackling by causing frames to be dropped more frequently.", + NULL, + "video", + { + { "15", NULL }, + { "18", NULL }, + { "21", NULL }, + { "24", NULL }, + { "27", NULL }, + { "30", NULL }, + { "33", NULL }, + { "36", NULL }, + { "39", NULL }, + { "42", NULL }, + { "45", NULL }, + { "48", NULL }, + { "51", NULL }, + { "54", NULL }, + { "57", NULL }, + { "60", NULL }, + { NULL, NULL }, + }, + "33" + }, + { + "pcsx_rearmed_frameskip_interval", + "Frameskip Interval", + NULL, + "Specify the maximum number of frames that can be skipped before a new frame is rendered.", + NULL, + "video", + { + { "1", NULL }, + { "2", NULL }, + { "3", NULL }, + { "4", NULL }, + { "5", NULL }, + { "6", NULL }, + { "7", NULL }, + { "8", NULL }, + { "9", NULL }, + { "10", NULL }, + { NULL, NULL }, + }, + "3" + }, + { + "pcsx_rearmed_display_internal_fps", + "Display Internal FPS", + NULL, + "Show the internal frame rate at which the emulated PlayStation system is rendering content. Note: Requires on-screen notifications to be enabled in the libretro frontend.", + NULL, + "video", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, +#ifdef GPU_NEON + { + "pcsx_rearmed_neon_interlace_enable", + "(GPU) Show Interlaced Video", + "Show Interlaced Video", + "When enabled, games that run in high resolution video modes (480i, 512i) will produced interlaced video output. While this displays correctly on CRT televisions, it will produce artifacts on modern displays. When disabled, all video is output in progressive format.", + NULL, + "gpu_neon", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_neon_enhancement_enable", + "(GPU) Enhanced Resolution (Slow)", + "Enhanced Resolution (Slow)", + "Render games that do not already run in high resolution video modes (480i, 512i) at twice the native internal resolution. Improves the fidelity of 3D models at the expense of increased performance requirements. 2D elements are generally unaffected by this setting.", + NULL, + "gpu_neon", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_neon_enhancement_no_main", + "(GPU) Enhanced Resolution Speed Hack", + "Enhanced Resolution Speed Hack", + "Improves performance when 'Enhanced Resolution (Slow)' is enabled, but reduces compatibility and may cause rendering errors.", + NULL, + "gpu_neon", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, +#endif /* GPU_NEON */ +#ifdef GPU_PEOPS + { + "pcsx_rearmed_show_gpu_peops_settings", + "Show Advanced P.E.Op.S. GPU Settings", + NULL, + "Show low-level configuration options for the P.E.Op.S. GPU plugin. Quick Menu may need to be toggled for this setting to take effect.", + NULL, + NULL, + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_gpu_peops_odd_even_bit", + "(GPU) Odd/Even Bit Hack", + "Odd/Even Bit Hack", + "A hack fix used to correct lock-ups that may occur in games such as Chrono Cross. Disable unless required.", + NULL, + "gpu_peops", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_gpu_peops_expand_screen_width", + "(GPU) Expand Screen Width", + "Expand Screen Width", + "Intended for use only with Capcom 2D fighting games. Enlarges the display area at the right side of the screen to show all background elements without cut-off. May cause rendering errors.", + NULL, + "gpu_peops", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_gpu_peops_ignore_brightness", + "(GPU) Ignore Brightness Color", + "Ignore Brightness Color", + "A hack fix used to repair black screens in Lunar Silver Star Story Complete when entering a house or a menu. Disable unless required.", + NULL, + "gpu_peops", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_gpu_peops_disable_coord_check", + "(GPU) Disable Coordinate Check", + "Disable Coordinate Check", + "Legacy compatibility mode. May improve games that fail to run correctly on newer GPU hardware. Disable unless required.", + NULL, + "gpu_peops", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_gpu_peops_lazy_screen_update", + "(GPU) Lazy Screen Update", + "Lazy Screen Update", + "A partial fix to prevent text box flickering in Dragon Warrior VII. May also improve Pandemonium 2. Disable unless required.", + NULL, + "gpu_peops", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_gpu_peops_repeated_triangles", + "(GPU) Repeat Flat Tex Triangles", + "Repeat Flat Tex Triangles", + "A hack fix used to correct rendering errors in Star Wars: Dark Forces. Disable unless required.", + NULL, + "gpu_peops", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_gpu_peops_quads_with_triangles", + "(GPU) Draw Tex-Quads as Triangles", + "Draw Tex-Quads as Triangles", + "Corrects graphical distortions that may occur when games utilize Gouraud Shading, at the expense of reduced texture quality. Disable unless required.", + NULL, + "gpu_peops", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_gpu_peops_fake_busy_state", + "(GPU) Fake 'GPU Busy' States", + "Fake 'GPU Busy' States", + "Emulate the 'GPU is busy' (drawing primitives) status flag of the original hardware instead of assuming the GPU is always ready for commands. May improve compatibility at the expense of reduced performance. Disable unless required.", + NULL, + "gpu_peops", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, +#endif /* GPU_PEOPS */ +#ifdef GPU_UNAI + { + "pcsx_rearmed_show_gpu_unai_settings", + "Show Advanced UNAI GPU Settings", + NULL, + "Show low-level configuration options for the UNAI GPU plugin. Quick Menu may need to be toggled for this setting to take effect.", + NULL, + NULL, + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL}, + }, + "disabled", + }, + { + "pcsx_rearmed_gpu_unai_blending", + "(GPU) Texture Blending", + "Texture Blending", + "Enable alpha-based (and additive) texture blending. Required for various rendering effects, including transparency (e.g. water, shadows). Can be disabled to improve performance at the expense of severe display errors/inaccuracies.", + NULL, + "gpu_unai", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL}, + }, + "enabled", + }, + { + "pcsx_rearmed_gpu_unai_lighting", + "(GPU) Lighting Effects", + "Lighting Effects", + "Enable simulated lighting effects (via vertex coloring combined with texture mapping). Required by almost all 3D games. Can be disabled to improve performance at the expense of severe display errors/inaccuracies (missing shadows, flat textures, etc.).", + NULL, + "gpu_unai", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL}, + }, + "enabled", + }, + { + "pcsx_rearmed_gpu_unai_fast_lighting", + "(GPU) Fast Lighting", + "Fast Lighting", + "Improves performance when 'Lighting Effects' are enabled, but may cause moderate/severe rendering errors.", + NULL, + "gpu_unai", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL}, + }, + "disabled", + }, + { + "pcsx_rearmed_gpu_unai_scale_hires", + "(GPU) Hi-Res Downscaling", + "Hi-Res Downscaling", + "When enabled, games that run in high resolution video modes (480i, 512i) will be downscaled to 320x240. Can improve performance, and is recommended on devices with native 240p display resolutions.", + NULL, + "gpu_unai", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL}, + }, +#ifdef _MIYOO + "enabled", +#else + "disabled", +#endif + }, +#endif /* GPU_UNAI */ + { + "pcsx_rearmed_spu_reverb", + "Audio Reverb Effects", + "Reverb Effects", + "Enable emulation of the reverb feature provided by the PSX SPU. Can be disabled to improve performance at the expense of reduced audio quality/authenticity.", + NULL, + "audio", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, +#ifdef HAVE_PRE_ARMV7 + "disabled", +#else + "enabled", +#endif + }, + { + "pcsx_rearmed_spu_interpolation", + "Sound Interpolation", + NULL, + "Enable emulation of the in-built audio interpolation provided by the PSX SPU. 'Gaussian' sounds closest to original hardware. 'Simple' improves performance but reduces quality. 'Cubic' has the highest performance requirements but produces increased clarity. Can be disabled entirely for maximum performance, at the expense of greatly reduced audio quality.", + NULL, + "audio", + { + { "simple", "Simple" }, + { "gaussian", "Gaussian" }, + { "cubic", "Cubic" }, + { "off", "disabled" }, + { NULL, NULL }, + }, +#ifdef HAVE_PRE_ARMV7 + "off", +#else + "simple", +#endif + }, + { + "pcsx_rearmed_nocdaudio", + "CD Audio", + NULL, + "Enable playback of CD (CD-DA) audio tracks. Can be disabled to improve performance in games that include CD audio, at the expense of missing music.", + NULL, + "audio", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "enabled", + }, + { + "pcsx_rearmed_noxadecoding", + "XA Decoding", + NULL, + "Enable playback of XA (eXtended Architecture ADPCM) audio tracks. Can be disabled to improve performance in games that include XA audio, at the expense of missing music.", + NULL, + "audio", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "enabled", + }, + { + "pcsx_rearmed_show_input_settings", + "Show Input Settings", + NULL, + "Show configuration options for all input devices: analog response, Multitaps, light guns, etc. Quick Menu may need to be toggled for this setting to take effect.", + NULL, + NULL, + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_analog_axis_modifier", + "Analog Axis Bounds", + NULL, + "Specify range limits for the left and right analog sticks when input device is set to 'analog' or 'dualshock'. 'Square' bounds improve input response when using controllers with highly circular ranges that are unable to fully saturate the X and Y axes at 45 degree deflections.", + NULL, + "input", + { + { "circle", "Circle" }, + { "square", "Square" }, + { NULL, NULL }, + }, + "circle", + }, + { + "pcsx_rearmed_vibration", + "Rumble Effects", + NULL, + "Enable haptic feedback when using a rumble-equipped gamepad with input device set to 'dualshock'.", + NULL, + "input", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "enabled", + }, + { + "pcsx_rearmed_multitap", + "Multitap Mode (Restart)", + NULL, + "Connect a virtual PSX Multitap peripheral to either controller 'Port 1' or controller 'Port 2' for 5 player simultaneous input, or to both 'Ports 1 and 2' for 8 player input. Mutlitap usage requires compatible games. To avoid input defects, option should be disabled when running games that have no support for Multitap features.", + NULL, + "input", + { + { "disabled", NULL }, + { "port 1", "Port 1" }, + { "port 2", "Port 2" }, + { "ports 1 and 2", "Ports 1 and 2" }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_negcon_deadzone", + "NegCon Twist Deadzone", + NULL, + "Set the deadzone of the RetroPad left analog stick when simulating the 'twist' action of emulated neGcon Controllers. Used to eliminate drift/unwanted input.", + NULL, + "input", + { + { "0", "0%" }, + { "3", "3%" }, + { "5", "5%" }, + { "7", "7%" }, + { "10", "10%" }, + { "13", "13%" }, + { "15", "15%" }, + { "17", "17%" }, + { "20", "20%" }, + { "23", "23%" }, + { "25", "25%" }, + { "27", "27%" }, + { "30", "30%" }, + { NULL, NULL }, + }, + "0", + }, + { + "pcsx_rearmed_negcon_response", + "NegCon Twist Response", + NULL, + "Specify the analog response when using a RetroPad left analog stick to simulate the 'twist' action of emulated neGcon Controllers.", + NULL, + "input", + { + { "linear", "Linear" }, + { "quadratic", "Quadratic" }, + { "cubic", "Cubic" }, + { NULL, NULL }, + }, + "linear", + }, + { + "pcsx_rearmed_input_sensitivity", + "Mouse Sensitivity", + NULL, + "Adjust responsiveness of emulated 'mouse' input devices.", + NULL, + "input", + { + { "0.05", NULL }, + { "0.10", NULL }, + { "0.15", NULL }, + { "0.20", NULL }, + { "0.25", NULL }, + { "0.30", NULL }, + { "0.35", NULL }, + { "0.40", NULL }, + { "0.45", NULL }, + { "0.50", NULL }, + { "0.55", NULL }, + { "0.60", NULL }, + { "0.65", NULL }, + { "0.70", NULL }, + { "0.75", NULL }, + { "0.80", NULL }, + { "0.85", NULL }, + { "0.90", NULL }, + { "0.95", NULL }, + { "1.00", NULL }, + { "1.05", NULL }, + { "1.10", NULL }, + { "1.15", NULL }, + { "1.20", NULL }, + { "1.25", NULL }, + { "1.30", NULL }, + { "1.35", NULL }, + { "1.40", NULL }, + { "1.45", NULL }, + { "1.50", NULL }, + { "1.55", NULL }, + { "1.60", NULL }, + { "1.65", NULL }, + { "1.70", NULL }, + { "1.75", NULL }, + { "1.80", NULL }, + { "1.85", NULL }, + { "1.90", NULL }, + { "1.95", NULL }, + { "2.00", NULL }, + }, + "1.00", + }, + { + "pcsx_rearmed_gunconadjustx", + "Guncon X Axis Offset", + NULL, + "Apply an X axis offset to light gun input when emulating a Guncon device. Can be used to correct aiming misalignments.", + NULL, + "input", + { + { "-25", NULL }, + { "-24", NULL }, + { "-23", NULL }, + { "-22", NULL }, + { "-21", NULL }, + { "-20", NULL }, + { "-19", NULL }, + { "-18", NULL }, + { "-17", NULL }, + { "-16", NULL }, + { "-15", NULL }, + { "-14", NULL }, + { "-13", NULL }, + { "-12", NULL }, + { "-11", NULL }, + { "-10", NULL }, + { "-9", NULL }, + { "-8", NULL }, + { "-7", NULL }, + { "-6", NULL }, + { "-5", NULL }, + { "-4", NULL }, + { "-3", NULL }, + { "-2", NULL }, + { "-1", NULL }, + { "0", NULL }, + { "1", NULL }, + { "2", NULL }, + { "3", NULL }, + { "4", NULL }, + { "5", NULL }, + { "6", NULL }, + { "7", NULL }, + { "8", NULL }, + { "9", NULL }, + { "10", NULL }, + { "11", NULL }, + { "12", NULL }, + { "13", NULL }, + { "14", NULL }, + { "15", NULL }, + { "16", NULL }, + { "17", NULL }, + { "18", NULL }, + { "19", NULL }, + { "20", NULL }, + { "21", NULL }, + { "22", NULL }, + { "23", NULL }, + { "24", NULL }, + { "25", NULL }, + { NULL, NULL }, + }, + "0", + }, + { + "pcsx_rearmed_gunconadjusty", + "Guncon Y Axis Offset", + NULL, + "Apply a Y axis offset to light gun input when emulating a Guncon device. Can be used to correct aiming misalignments.", + NULL, + "input", + { + { "-25", NULL }, + { "-24", NULL }, + { "-23", NULL }, + { "-22", NULL }, + { "-21", NULL }, + { "-20", NULL }, + { "-19", NULL }, + { "-18", NULL }, + { "-17", NULL }, + { "-16", NULL }, + { "-15", NULL }, + { "-14", NULL }, + { "-13", NULL }, + { "-12", NULL }, + { "-11", NULL }, + { "-10", NULL }, + { "-9", NULL }, + { "-8", NULL }, + { "-7", NULL }, + { "-6", NULL }, + { "-5", NULL }, + { "-4", NULL }, + { "-3", NULL }, + { "-2", NULL }, + { "-1", NULL }, + { "0", NULL }, + { "1", NULL }, + { "2", NULL }, + { "3", NULL }, + { "4", NULL }, + { "5", NULL }, + { "6", NULL }, + { "7", NULL }, + { "8", NULL }, + { "9", NULL }, + { "10", NULL }, + { "11", NULL }, + { "12", NULL }, + { "13", NULL }, + { "14", NULL }, + { "15", NULL }, + { "16", NULL }, + { "17", NULL }, + { "18", NULL }, + { "19", NULL }, + { "20", NULL }, + { "21", NULL }, + { "22", NULL }, + { "23", NULL }, + { "24", NULL }, + { "25", NULL }, + { NULL, NULL }, + }, + "0", + }, + { + "pcsx_rearmed_gunconadjustratiox", + "Guncon X Axis Response", + NULL, + "Adjust relative magnitude of horizontal light gun motion when emulating a Guncon device. Can be used to correct aiming misalignments.", + NULL, + "input", + { + { "0.75", NULL }, + { "0.76", NULL }, + { "0.77", NULL }, + { "0.78", NULL }, + { "0.79", NULL }, + { "0.80", NULL }, + { "0.81", NULL }, + { "0.82", NULL }, + { "0.83", NULL }, + { "0.84", NULL }, + { "0.85", NULL }, + { "0.86", NULL }, + { "0.87", NULL }, + { "0.88", NULL }, + { "0.89", NULL }, + { "0.90", NULL }, + { "0.91", NULL }, + { "0.92", NULL }, + { "0.93", NULL }, + { "0.94", NULL }, + { "0.95", NULL }, + { "0.96", NULL }, + { "0.97", NULL }, + { "0.98", NULL }, + { "0.99", NULL }, + { "1.00", NULL }, + { "1.01", NULL }, + { "1.02", NULL }, + { "1.03", NULL }, + { "1.04", NULL }, + { "1.05", NULL }, + { "1.06", NULL }, + { "1.07", NULL }, + { "1.08", NULL }, + { "1.09", NULL }, + { "1.10", NULL }, + { "1.11", NULL }, + { "1.12", NULL }, + { "1.13", NULL }, + { "1.14", NULL }, + { "1.15", NULL }, + { "1.16", NULL }, + { "1.17", NULL }, + { "1.18", NULL }, + { "1.19", NULL }, + { "1.20", NULL }, + { "1.21", NULL }, + { "1.22", NULL }, + { "1.23", NULL }, + { "1.24", NULL }, + { "1.25", NULL }, + { NULL, NULL }, + }, + "1.00", + }, + { + "pcsx_rearmed_gunconadjustratioy", + "Guncon Y Axis Response", + NULL, + "Adjust relative magnitude of vertical light gun motion when emulating a Guncon device. Can be used to correct aiming misalignments.", + NULL, + "input", + { + { "0.75", NULL }, + { "0.76", NULL }, + { "0.77", NULL }, + { "0.78", NULL }, + { "0.79", NULL }, + { "0.80", NULL }, + { "0.81", NULL }, + { "0.82", NULL }, + { "0.83", NULL }, + { "0.84", NULL }, + { "0.85", NULL }, + { "0.86", NULL }, + { "0.87", NULL }, + { "0.88", NULL }, + { "0.89", NULL }, + { "0.90", NULL }, + { "0.91", NULL }, + { "0.92", NULL }, + { "0.93", NULL }, + { "0.94", NULL }, + { "0.95", NULL }, + { "0.96", NULL }, + { "0.97", NULL }, + { "0.98", NULL }, + { "0.99", NULL }, + { "1.00", NULL }, + { "1.01", NULL }, + { "1.02", NULL }, + { "1.03", NULL }, + { "1.04", NULL }, + { "1.05", NULL }, + { "1.06", NULL }, + { "1.07", NULL }, + { "1.08", NULL }, + { "1.09", NULL }, + { "1.10", NULL }, + { "1.11", NULL }, + { "1.12", NULL }, + { "1.13", NULL }, + { "1.14", NULL }, + { "1.15", NULL }, + { "1.16", NULL }, + { "1.17", NULL }, + { "1.18", NULL }, + { "1.19", NULL }, + { "1.20", NULL }, + { "1.21", NULL }, + { "1.22", NULL }, + { "1.23", NULL }, + { "1.24", NULL }, + { "1.25", NULL }, + { NULL, NULL }, + }, + "1.00", + }, + { + "pcsx_rearmed_icache_emulation", + "Instruction Cache Emulation", + NULL, + "Enable emulation of the PSX CPU instruction cache. Improves accuracy at the expense of increased performance overheads. Required for Formula One 2001, Formula One Arcade and Formula One 99. [Interpreter only and partial on lightrec, unsupported when using ARMv7 backend]", + NULL, + "compat_hack", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, +#if !defined(DRC_DISABLE) && !defined(LIGHTREC) + { + "pcsx_rearmed_nocompathacks", + "Disable Automatic Compatibility Hacks", + NULL, + "By default, PCSX-ReARMed will apply auxiliary compatibility hacks automatically, based on the currently loaded content. This behaviour is required for correct operation, but may be disabled if desired.", + NULL, + "compat_hack", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_nosmccheck", + "(Speed Hack) Disable SMC Checks", + "Disable SMC Checks", + "Will cause crashes when loading, and lead to memory card failure.", + NULL, + "speed_hack", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_gteregsunneeded", + "(Speed Hack) Assume GTE Registers Unneeded", + "Assume GTE Registers Unneeded", + "May cause rendering errors.", + NULL, + "speed_hack", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_nogteflags", + "(Speed Hack) Disable GTE Flags", + "Disable GTE Flags", + "Will cause rendering errors.", + NULL, + "speed_hack", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_nostalls", + "(Speed Hack) Disable CPU/GTE Stalls", + "Disable CPU/GTE Stalls", + "Will cause some games to run too quickly.", + NULL, + "speed_hack", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, +#endif /* !DRC_DISABLE && !LIGHTREC */ + { NULL, NULL, NULL, NULL, NULL, NULL, {{0}}, NULL }, +}; + +struct retro_core_options_v2 options_us = { + option_cats_us, + option_defs_us +}; + +/* + ******************************** + * Language Mapping + ******************************** +*/ + +#ifndef HAVE_NO_LANGEXTRA +struct retro_core_options_v2 *options_intl[RETRO_LANGUAGE_LAST] = { + &options_us, /* RETRO_LANGUAGE_ENGLISH */ + NULL, /* RETRO_LANGUAGE_JAPANESE */ + NULL, /* RETRO_LANGUAGE_FRENCH */ + NULL, /* RETRO_LANGUAGE_SPANISH */ + NULL, /* RETRO_LANGUAGE_GERMAN */ + NULL, /* RETRO_LANGUAGE_ITALIAN */ + NULL, /* RETRO_LANGUAGE_DUTCH */ + NULL, /* RETRO_LANGUAGE_PORTUGUESE_BRAZIL */ + NULL, /* RETRO_LANGUAGE_PORTUGUESE_PORTUGAL */ + NULL, /* RETRO_LANGUAGE_RUSSIAN */ + NULL, /* RETRO_LANGUAGE_KOREAN */ + NULL, /* RETRO_LANGUAGE_CHINESE_TRADITIONAL */ + NULL, /* RETRO_LANGUAGE_CHINESE_SIMPLIFIED */ + NULL, /* RETRO_LANGUAGE_ESPERANTO */ + NULL, /* RETRO_LANGUAGE_POLISH */ + NULL, /* RETRO_LANGUAGE_VIETNAMESE */ + NULL, /* RETRO_LANGUAGE_ARABIC */ + NULL, /* RETRO_LANGUAGE_GREEK */ + &options_tr, /* RETRO_LANGUAGE_TURKISH */ +}; +#endif + +/* + ******************************** + * Functions + ******************************** +*/ + +/* Handles configuration/setting of core options. + * Should be called as early as possible - ideally inside + * retro_set_environment(), and no later than retro_load_game() + * > We place the function body in the header to avoid the + * necessity of adding more .c files (i.e. want this to + * be as painless as possible for core devs) + */ + +static INLINE void libretro_set_core_options(retro_environment_t environ_cb, + bool *categories_supported) +{ + unsigned version = 0; +#ifndef HAVE_NO_LANGEXTRA + unsigned language = 0; +#endif + + if (!environ_cb || !categories_supported) + return; + + *categories_supported = false; + + if (!environ_cb(RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION, &version)) + version = 0; + + if (version >= 2) + { +#ifndef HAVE_NO_LANGEXTRA + struct retro_core_options_v2_intl core_options_intl; + + core_options_intl.us = &options_us; + core_options_intl.local = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_LANGUAGE, &language) && + (language < RETRO_LANGUAGE_LAST) && (language != RETRO_LANGUAGE_ENGLISH)) + core_options_intl.local = options_intl[language]; + + *categories_supported = environ_cb(RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2_INTL, + &core_options_intl); +#else + *categories_supported = environ_cb(RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2, + &options_us); +#endif + } + else + { + size_t i, j; + size_t option_index = 0; + size_t num_options = 0; + struct retro_core_option_definition + *option_v1_defs_us = NULL; +#ifndef HAVE_NO_LANGEXTRA + size_t num_options_intl = 0; + struct retro_core_option_v2_definition + *option_defs_intl = NULL; + struct retro_core_option_definition + *option_v1_defs_intl = NULL; + struct retro_core_options_intl + core_options_v1_intl; +#endif + struct retro_variable *variables = NULL; + char **values_buf = NULL; + + /* Determine total number of options */ + while (true) + { + if (option_defs_us[num_options].key) + num_options++; + else + break; + } + + if (version >= 1) + { + /* Allocate US array */ + option_v1_defs_us = (struct retro_core_option_definition *) + calloc(num_options + 1, sizeof(struct retro_core_option_definition)); + + /* Copy parameters from option_defs_us array */ + for (i = 0; i < num_options; i++) + { + struct retro_core_option_v2_definition *option_def_us = &option_defs_us[i]; + struct retro_core_option_value *option_values = option_def_us->values; + struct retro_core_option_definition *option_v1_def_us = &option_v1_defs_us[i]; + struct retro_core_option_value *option_v1_values = option_v1_def_us->values; + + option_v1_def_us->key = option_def_us->key; + option_v1_def_us->desc = option_def_us->desc; + option_v1_def_us->info = option_def_us->info; + option_v1_def_us->default_value = option_def_us->default_value; + + /* Values must be copied individually... */ + while (option_values->value) + { + option_v1_values->value = option_values->value; + option_v1_values->label = option_values->label; + + option_values++; + option_v1_values++; + } + } + +#ifndef HAVE_NO_LANGEXTRA + if (environ_cb(RETRO_ENVIRONMENT_GET_LANGUAGE, &language) && + (language < RETRO_LANGUAGE_LAST) && (language != RETRO_LANGUAGE_ENGLISH) && + options_intl[language]) + option_defs_intl = options_intl[language]->definitions; + + if (option_defs_intl) + { + /* Determine number of intl options */ + while (true) + { + if (option_defs_intl[num_options_intl].key) + num_options_intl++; + else + break; + } + + /* Allocate intl array */ + option_v1_defs_intl = (struct retro_core_option_definition *) + calloc(num_options_intl + 1, sizeof(struct retro_core_option_definition)); + + /* Copy parameters from option_defs_intl array */ + for (i = 0; i < num_options_intl; i++) + { + struct retro_core_option_v2_definition *option_def_intl = &option_defs_intl[i]; + struct retro_core_option_value *option_values = option_def_intl->values; + struct retro_core_option_definition *option_v1_def_intl = &option_v1_defs_intl[i]; + struct retro_core_option_value *option_v1_values = option_v1_def_intl->values; + + option_v1_def_intl->key = option_def_intl->key; + option_v1_def_intl->desc = option_def_intl->desc; + option_v1_def_intl->info = option_def_intl->info; + option_v1_def_intl->default_value = option_def_intl->default_value; + + /* Values must be copied individually... */ + while (option_values->value) + { + option_v1_values->value = option_values->value; + option_v1_values->label = option_values->label; + + option_values++; + option_v1_values++; + } + } + } + + core_options_v1_intl.us = option_v1_defs_us; + core_options_v1_intl.local = option_v1_defs_intl; + + environ_cb(RETRO_ENVIRONMENT_SET_CORE_OPTIONS_INTL, &core_options_v1_intl); +#else + environ_cb(RETRO_ENVIRONMENT_SET_CORE_OPTIONS, option_v1_defs_us); +#endif + } + else + { + /* Allocate arrays */ + variables = (struct retro_variable *)calloc(num_options + 1, + sizeof(struct retro_variable)); + values_buf = (char **)calloc(num_options, sizeof(char *)); + + if (!variables || !values_buf) + goto error; + + /* Copy parameters from option_defs_us array */ + for (i = 0; i < num_options; i++) + { + const char *key = option_defs_us[i].key; + const char *desc = option_defs_us[i].desc; + const char *default_value = option_defs_us[i].default_value; + struct retro_core_option_value *values = option_defs_us[i].values; + size_t buf_len = 3; + size_t default_index = 0; + + values_buf[i] = NULL; + + /* Skip options that are irrelevant when using the + * old style core options interface */ + if ((strcmp(key, "pcsx_rearmed_show_input_settings") == 0) || + (strcmp(key, "pcsx_rearmed_show_gpu_peops_settings") == 0) || + (strcmp(key, "pcsx_rearmed_show_gpu_unai_settings") == 0)) + continue; + + if (desc) + { + size_t num_values = 0; + + /* Determine number of values */ + while (true) + { + if (values[num_values].value) + { + /* Check if this is the default value */ + if (default_value) + if (strcmp(values[num_values].value, default_value) == 0) + default_index = num_values; + + buf_len += strlen(values[num_values].value); + num_values++; + } + else + break; + } + + /* Build values string */ + if (num_values > 0) + { + buf_len += num_values - 1; + buf_len += strlen(desc); + + values_buf[i] = (char *)calloc(buf_len, sizeof(char)); + if (!values_buf[i]) + goto error; + + strcpy(values_buf[i], desc); + strcat(values_buf[i], "; "); + + /* Default value goes first */ + strcat(values_buf[i], values[default_index].value); + + /* Add remaining values */ + for (j = 0; j < num_values; j++) + { + if (j != default_index) + { + strcat(values_buf[i], "|"); + strcat(values_buf[i], values[j].value); + } + } + } + } + + variables[option_index].key = key; + variables[option_index].value = values_buf[i]; + option_index++; + } + + /* Set variables */ + environ_cb(RETRO_ENVIRONMENT_SET_VARIABLES, variables); + } + +error: + /* Clean up */ + + if (option_v1_defs_us) + { + free(option_v1_defs_us); + option_v1_defs_us = NULL; + } + +#ifndef HAVE_NO_LANGEXTRA + if (option_v1_defs_intl) + { + free(option_v1_defs_intl); + option_v1_defs_intl = NULL; + } +#endif + + if (values_buf) + { + for (i = 0; i < num_options; i++) + { + if (values_buf[i]) + { + free(values_buf[i]); + values_buf[i] = NULL; + } + } + + free(values_buf); + values_buf = NULL; + } + + if (variables) + { + free(variables); + variables = NULL; + } + } +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/frontend/libretro_core_options_intl.h b/frontend/libretro_core_options_intl.h new file mode 100644 index 000000000..5b04d601a --- /dev/null +++ b/frontend/libretro_core_options_intl.h @@ -0,0 +1,552 @@ +#ifndef LIBRETRO_CORE_OPTIONS_INTL_H__ +#define LIBRETRO_CORE_OPTIONS_INTL_H__ + +#if defined(_MSC_VER) && (_MSC_VER >= 1500 && _MSC_VER < 1900) +/* https://support.microsoft.com/en-us/kb/980263 */ +#pragma execution_character_set("utf-8") +#pragma warning(disable:4566) +#endif + +#include + +/* + ******************************** + * VERSION: 2.0 + ******************************** + * + * - 2.0: Add support for core options v2 interface + * - 1.3: Move translations to libretro_core_options_intl.h + * - libretro_core_options_intl.h includes BOM and utf-8 + * fix for MSVC 2010-2013 + * - Added HAVE_NO_LANGEXTRA flag to disable translations + * on platforms/compilers without BOM support + * - 1.2: Use core options v1 interface when + * RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION is >= 1 + * (previously required RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION == 1) + * - 1.1: Support generation of core options v0 retro_core_option_value + * arrays containing options with a single value + * - 1.0: First commit +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + ******************************** + * Core Option Definitions + ******************************** +*/ + +/* RETRO_LANGUAGE_JAPANESE */ + +/* RETRO_LANGUAGE_FRENCH */ + +/* RETRO_LANGUAGE_SPANISH */ + +/* RETRO_LANGUAGE_GERMAN */ + +/* RETRO_LANGUAGE_ITALIAN */ + +/* RETRO_LANGUAGE_DUTCH */ + +/* RETRO_LANGUAGE_PORTUGUESE_BRAZIL */ + +/* RETRO_LANGUAGE_PORTUGUESE_PORTUGAL */ + +/* RETRO_LANGUAGE_RUSSIAN */ + +/* RETRO_LANGUAGE_KOREAN */ + +/* RETRO_LANGUAGE_CHINESE_TRADITIONAL */ + +/* RETRO_LANGUAGE_CHINESE_SIMPLIFIED */ + +/* RETRO_LANGUAGE_ESPERANTO */ + +/* RETRO_LANGUAGE_POLISH */ + +/* RETRO_LANGUAGE_VIETNAMESE */ + +/* RETRO_LANGUAGE_ARABIC */ + +/* RETRO_LANGUAGE_GREEK */ + +/* RETRO_LANGUAGE_TURKISH */ + +struct retro_core_option_v2_category option_cats_tr[] = { + { NULL, NULL, NULL }, +}; + +struct retro_core_option_v2_definition option_defs_tr[] = { + { + "pcsx_rearmed_frameskip", + "Kare Atlama", + NULL, + "Görsel pürüzsüzlük pahasına performansı artırmak için ne kadar karenin atlanması gerektiğini seçin.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_bios", + "BIOS Kullan", + NULL, + "Gerçek bios dosyasını (varsa) veya öykünmüş bios'u (HLE) kullanmanızı sağlar. Daha iyi uyumluluk için resmi bios dosyasını kullanmanız önerilir.", + NULL, + NULL, + { + { "auto", "otomatik" }, + { NULL, NULL }, + }, + "auto", + }, + { + "pcsx_rearmed_region", + "Bölge", + NULL, + "Sistemin hangi bölgeden olduğunu seçin. NTSC için 60 Hz, PAL için 50 Hz.", + NULL, + NULL, + { + { "auto", "otomatik" }, + { NULL, NULL }, + }, + "auto", + }, + { + "pcsx_rearmed_memcard2", + "İkinci Bellek Kartını Etkinleştir (Paylaşılan)", + NULL, + "2. Hafıza kartı yuvasını etkinleştirin. Bu hafıza kartı tüm oyunlar arasında paylaşılır.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_multitap1", + "Multitap 1", + NULL, + "Bağlantı noktası 1'deki multitap'ı etkinleştirir / devre dışı bırakır ve izin veren oyunlarda 5 oyuncuya kadar izin verir.", + NULL, + NULL, + { + { "auto", "otomatik" }, + { NULL, NULL }, + }, + "auto", + }, + { + "pcsx_rearmed_multitap2", + "Multitap 2", + NULL, + "Bağlantı noktası 2'deki multitap'ı etkinleştirir/devre dışı bırakır ve izin veren oyunlarda 8 oyuncuya kadar izin verir. Bunun çalışması için Multitap 1'in etkinleştirilmesi gerekir.", + NULL, + NULL, + { + { "auto", "otomatik" }, + { NULL, NULL }, + }, + "auto", + }, + { + "pcsx_rearmed_negcon_deadzone", + "NegCon Twist Deadzone (Yüzdelik)", + NULL, + "Öykünülmüş neGcon kontrolörünün 'büküm' eylemini simüle ederken RetroPad sol analog çubuğunun ölü bölgesini ayarlar. Sürüklenme/istenmeyen girişi ortadan kaldırmak için kullanılır.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_negcon_response", + "NegCon Twist Response", + NULL, + "Öykünülmüş neGcon kontrolörünün 'bükümünü' simule etmek için bir RetroPad sol analog çubuğu kullanırken analog cevabını belirtir.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_vibration", + "Titreşimi Etkinleştir", + NULL, + "Titreşim özelliklerini destekleyen kontrolörler için titreşim geri bildirimini etkinleştirir.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_dithering", + "Dithering Etkinleştir", + NULL, + "Kapalı ise, PSX'in renk bantlarıyla mücadele etmek için uyguladığı renk taklidi düzenini devre dışı bırakır.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + +#ifdef NEW_DYNAREC + { + "pcsx_rearmed_drc", + "Dinamik Yeniden Derleyici", + NULL, + "Çekirdeğin dinamik yeniden derleyici veya tercüman(daha yavaş) CPU talimatlarını kullanmasını sağlar.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_psxclock", + "PSX CPU Saat Hızı", + NULL, +#if defined(HAVE_PRE_ARMV7) && !defined(_3DS) + "Overclock or underclock the PSX clock. Default is 50", +#else + "Overclock or underclock the PSX clock. Default is 57", +#endif + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, +#endif /* NEW_DYNAREC */ + +#ifdef GPU_NEON + { + "pcsx_rearmed_neon_interlace_enable", + "Interlacing Mode'u etkinleştir", + NULL, + "Sahte tarama çizgileri efektini etkinleştirir.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_neon_enhancement_enable", + "Geliştirilmiş Çözünürlük (Yavaş)", + NULL, + "Düşük performans pahasına çift çözünürlükte işler.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_neon_enhancement_no_main", + "Geliştirilmiş Çözünürlük (Speed Hack)", + NULL, + "Geliştirilmiş çözünürlük seçeneği için hız aşırtma(bazı oyunlarda sorun çıkartabilir).", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, +#endif /* GPU_NEON */ + + { + "pcsx_rearmed_duping_enable", + "Frame Duping", + NULL, + "Yeni bir veri yoksa, bir hızlandırma, son kareyi yeniden çizer/yeniden kullanır.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_display_internal_fps", + "Dahili FPS'yi görüntüle", + NULL, + "Etkinleştirildiğinde ekranda saniye başına kareyi gösterir.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + + /* GPU PEOPS OPTIONS */ +#ifdef GPU_PEOPS + { + "pcsx_rearmed_show_gpu_peops_settings", + "Gelişmiş GPU Ayarlarını Göster", + NULL, + "Çeşitli GPU düzeltmelerini etkinleştirin veya devre dışı bırakın. Ayarların etkili olması için core'un yeniden başlatılması gerekebilir. NOT: Bu ayarın etkili olabilmesi için Hızlı Menü’nün değiştirilmesi gerekir.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_gpu_peops_odd_even_bit", + "(GPU) Odd/Even Bit Hack", + NULL, + "Chrono Cross için gerekli.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_gpu_peops_expand_screen_width", + "(GPU) Ekran Genişliğini Genişlet", + NULL, + "Capcom dövüş oyunları", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_gpu_peops_ignore_brightness", + "(GPU) Parlaklık Rengini Yoksay", + NULL, + "Lunar Silver Star Story oyunlarında siyah ekran", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_gpu_peops_disable_coord_check", + "(GPU) Koordinat Kontrolünü Devre Dışı Bırak", + NULL, + "Uyumluluk modu", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_gpu_peops_lazy_screen_update", + "(GPU) Tembel Ekran Güncellemesi", + NULL, + "Pandemonium 2", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_gpu_peops_old_frame_skip", + "(GPU) Eski Çerçeve Atlama", + NULL, + "Her ikinci kareyi atla", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_gpu_peops_repeated_triangles", + "(GPU) Tekrarlanan Düz Doku Üçgenleri", + NULL, + "Star Wars: Dark Forces için gerekli", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_gpu_peops_quads_with_triangles", + "(GPU) Üçgenler ile Dörtlü Çiz", + NULL, + "Daha iyi g renkler, daha kötü dokular", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_gpu_peops_fake_busy_state", + "(GPU) Sahte 'Gpu Meşgul' Konumları", + NULL, + "Çizimden sonra meşgul bayraklarını değiştir", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, +#endif /* GPU_PEOPS */ + + { + "pcsx_rearmed_show_bios_bootlogo", + "Bios Bootlogo'yu Göster", + NULL, + "Etkinleştirildiğinde, başlatırken veya sıfırlarken PlayStation logosunu gösterir. (Bazı oyunları bozabilir).", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_spu_reverb", + "Ses Yankısı", + NULL, + "Ses yankı efektini etkinleştirir veya devre dışı bırakır.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_spu_interpolation", + "Ses Enterpolasyonu", + NULL, + NULL, + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_icache_emulation", + "ICache Düzeltmleri", + NULL, + NULL, + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + + /* ADVANCED OPTIONS */ + { + "pcsx_rearmed_noxadecoding", + "XA Kod Çözme", + NULL, + NULL, + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_nocdaudio", + "CD Ses", + NULL, + NULL, + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + +#ifdef NEW_DYNAREC + { + "pcsx_rearmed_nosmccheck", + "(Speed Hack) SMC Kontrollerini Devre Dışı Bırak", + NULL, + "Yükleme sırasında çökmelere neden olabilir, hafıza kartını bozabilir.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_gteregsunneeded", + "(Speed Hack) GTE'nin Gereksiz Olduğunu Varsayın", + NULL, + "Grafiksel bozukluklara neden olabilir.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, + { + "pcsx_rearmed_nogteflags", + "(Speed Hack) GTE Bayraklarını Devredışı Bırakın", + NULL, + "Grafiksel bozukluklara neden olur.", + NULL, + NULL, + { + { NULL, NULL }, + }, + NULL + }, +#endif /* NEW_DYNAREC */ + + { NULL, NULL, NULL, NULL, NULL, NULL, {{0}}, NULL }, +}; + +struct retro_core_options_v2 options_tr = { + option_cats_tr, + option_defs_tr +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/frontend/link.T b/frontend/link.T new file mode 100644 index 000000000..b0c262db9 --- /dev/null +++ b/frontend/link.T @@ -0,0 +1,5 @@ +{ + global: retro_*; + local: *; +}; + diff --git a/frontend/main.c b/frontend/main.c index 3440e3886..dcac1d982 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -11,7 +11,7 @@ #include #include #include -#ifndef _WIN32 +#if !defined(_WIN32) && !defined(NO_DYLIB) #include #endif @@ -158,15 +158,17 @@ void emu_set_default_config(void) spu_config.iVolume = 768; spu_config.iTempo = 0; spu_config.iUseThread = 1; // no effect if only 1 core is detected -#ifdef HAVE_PRE_ARMV7 /* XXX GPH hack */ +#if defined(HAVE_PRE_ARMV7) && !defined(_3DS) /* XXX GPH hack */ spu_config.iUseReverb = 0; spu_config.iUseInterpolation = 0; +#ifndef HAVE_LIBRETRO spu_config.iTempo = 1; +#endif #endif new_dynarec_hacks = 0; - in_type1 = PSE_PAD_TYPE_STANDARD; - in_type2 = PSE_PAD_TYPE_STANDARD; + in_type[0] = PSE_PAD_TYPE_STANDARD; + in_type[1] = PSE_PAD_TYPE_STANDARD; } void do_emu_action(void) @@ -313,7 +315,7 @@ static int cdidcmp(const char *id1, const char *id2) static void parse_cwcheat(void) { - char line[256], buf[64], name[64], *p; + char line[256], buf[256], name[256], *p; int newcheat = 1; u32 a, v; FILE *f; @@ -755,10 +757,10 @@ void SysReset() { // reset can run code, timing must be set pl_timing_prepare(Config.PsxType); - EmuReset(); - // hmh core forgets this CDR_stop(); + + EmuReset(); GPU_updateLace = real_lace; g_emu_resetting = 0; @@ -806,7 +808,7 @@ int emu_save_state(int slot) return ret; ret = SaveState(fname); -#ifdef HAVE_PRE_ARMV7 /* XXX GPH hack */ +#if defined(HAVE_PRE_ARMV7) && !defined(_3DS) && !defined(__SWITCH__) /* XXX GPH hack */ sync(); #endif SysPrintf("* %s \"%s\" [%d]\n", @@ -828,6 +830,7 @@ int emu_load_state(int slot) return LoadState(fname); } +#ifndef HAVE_LIBRETRO #ifndef ANDROID void SysPrintf(const char *fmt, ...) { @@ -852,6 +855,7 @@ void SysPrintf(const char *fmt, ...) { } #endif +#endif /* HAVE_LIBRETRO */ void SysMessage(const char *fmt, ...) { va_list list; @@ -901,14 +905,15 @@ static int _OpenPlugins(void) { if (Config.UseNet && !NetOpened) { netInfo info; - char path[MAXPATHLEN]; + char path[MAXPATHLEN * 2]; char dotdir[MAXPATHLEN]; MAKE_PATH(dotdir, "/.pcsx/plugins/", NULL); strcpy(info.EmuName, "PCSX"); - strncpy(info.CdromID, CdromId, 9); - strncpy(info.CdromLabel, CdromLabel, 9); + memcpy(info.CdromID, CdromId, 9); /* no \0 trailing character? */ + memcpy(info.CdromLabel, CdromLabel, 9); + info.CdromLabel[9] = '\0'; info.psxMem = psxM; info.GPU_showScreenPic = GPU_showScreenPic; info.GPU_displayText = GPU_displayText; @@ -1020,7 +1025,7 @@ void *SysLoadLibrary(const char *lib) { return (void *)(uintptr_t)(PLUGIN_DL_BASE + builtin_plugin_ids[i]); } -#ifndef _WIN32 +#if !defined(_WIN32) && !defined(NO_DYLIB) ret = dlopen(lib, RTLD_NOW); if (ret == NULL) SysMessage("dlopen: %s", dlerror()); @@ -1037,7 +1042,7 @@ void *SysLoadSym(void *lib, const char *sym) { if (PLUGIN_DL_BASE <= plugid && plugid < PLUGIN_DL_BASE + ARRAY_SIZE(builtin_plugins)) return plugin_link(plugid - PLUGIN_DL_BASE, sym); -#ifndef _WIN32 +#if !defined(_WIN32) && !defined(NO_DYLIB) return dlsym(lib, sym); #else return NULL; @@ -1045,7 +1050,9 @@ void *SysLoadSym(void *lib, const char *sym) { } const char *SysLibError() { -#ifndef _WIN32 +#if defined(NO_DYLIB) + return NULL; +#elif !defined(_WIN32) return dlerror(); #else return "not supported"; @@ -1058,8 +1065,7 @@ void SysCloseLibrary(void *lib) { if (PLUGIN_DL_BASE <= plugid && plugid < PLUGIN_DL_BASE + ARRAY_SIZE(builtin_plugins)) return; -#ifndef _WIN32 +#if !defined(_WIN32) && !defined(NO_DYLIB) dlclose(lib); #endif } - diff --git a/frontend/menu.c b/frontend/menu.c index 2e4091c38..d1e0413a2 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -309,14 +309,14 @@ static void menu_sync_config(void) Config.cycle_multiplier = 10000 / psx_clock; switch (in_type_sel1) { - case 1: in_type1 = PSE_PAD_TYPE_ANALOGPAD; break; - case 2: in_type1 = PSE_PAD_TYPE_GUNCON; break; - default: in_type1 = PSE_PAD_TYPE_STANDARD; + case 1: in_type[0] = PSE_PAD_TYPE_ANALOGPAD; break; + case 2: in_type[0] = PSE_PAD_TYPE_NEGCON; break; + default: in_type[0] = PSE_PAD_TYPE_STANDARD; } switch (in_type_sel2) { - case 1: in_type2 = PSE_PAD_TYPE_ANALOGPAD; break; - case 2: in_type2 = PSE_PAD_TYPE_GUNCON; break; - default: in_type2 = PSE_PAD_TYPE_STANDARD; + case 1: in_type[1] = PSE_PAD_TYPE_ANALOGPAD; break; + case 2: in_type[1] = PSE_PAD_TYPE_NEGCON; break; + default: in_type[1] = PSE_PAD_TYPE_STANDARD; } if (in_evdev_allow_abs_only != allow_abs_only_old) { in_probe(); diff --git a/frontend/menu.h b/frontend/menu.h index 81cd1baf5..8f5acda7a 100644 --- a/frontend/menu.h +++ b/frontend/menu.h @@ -1,3 +1,6 @@ +#ifndef __MENU_H__ +#define __MENU_H__ + void menu_init(void); void menu_prepare_emu(void); void menu_loop(void); @@ -35,3 +38,5 @@ extern int soft_filter; extern int g_menuscreen_w; extern int g_menuscreen_h; + +#endif /* __MENU_H__ */ diff --git a/frontend/nopic.h b/frontend/nopic.h index d664f8052..1815983f8 100644 --- a/frontend/nopic.h +++ b/frontend/nopic.h @@ -1,3 +1,6 @@ +#ifndef __NOPIC_H__ +#define __NOPIC_H__ + /* these are just deps, to be removed */ static const struct { @@ -54,4 +57,4 @@ void DrawNumBorPic(unsigned char *pMem, int lSelectedSlot) } } - +#endif /* __NOPIC_H__ */ diff --git a/frontend/pl_gun_ts.h b/frontend/pl_gun_ts.h index 4e3d195f8..8a3362712 100644 --- a/frontend/pl_gun_ts.h +++ b/frontend/pl_gun_ts.h @@ -1,3 +1,6 @@ +#ifndef __PL_GUN_TS_H__ +#define __PL_GUN_TS_H__ + #ifdef HAVE_TSLIB struct tsdev; @@ -16,3 +19,5 @@ int pl_gun_ts_get_fd(struct tsdev *ts); #define pl_set_gun_rect(...) do {} while (0) #endif + +#endif /* __PL_GUN_TS_H__ */ diff --git a/frontend/plat.h b/frontend/plat.h index 6b0cd65b2..8a296ea49 100644 --- a/frontend/plat.h +++ b/frontend/plat.h @@ -1,3 +1,6 @@ +#ifndef __PLAT_H__ +#define __PLAT_H__ + void plat_init(void); void plat_finish(void); void plat_minimize(void); @@ -8,3 +11,5 @@ void plat_gvideo_open(int is_pal); void *plat_gvideo_set_mode(int *w, int *h, int *bpp); void *plat_gvideo_flip(void); void plat_gvideo_close(void); + +#endif /* __PLAT_H__ */ diff --git a/frontend/plat_omap.h b/frontend/plat_omap.h index e47410a39..151f09cdc 100644 --- a/frontend/plat_omap.h +++ b/frontend/plat_omap.h @@ -1,5 +1,8 @@ +#ifndef __PLAT_OMAP_H__ +#define __PLAT_OMAP_H__ void plat_omap_init(void); void plat_omap_finish(void); void plat_omap_gvideo_open(void); +#endif /* __PLAT_OMAP_H__ */ diff --git a/frontend/plugin.c b/frontend/plugin.c index 5f9c5ffb0..7f3b8a463 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -48,21 +48,21 @@ extern int CALLBACK SPUplayCDDAchannel(short *, int, unsigned int, int); /* PAD */ static long CALLBACK PADreadPort1(PadDataS *pad) { - pad->controllerType = in_type1; - pad->buttonStatus = ~in_keystate; - if (in_type1 == PSE_PAD_TYPE_ANALOGPAD) { - pad->leftJoyX = in_a1[0]; - pad->leftJoyY = in_a1[1]; - pad->rightJoyX = in_a2[0]; - pad->rightJoyY = in_a2[1]; + pad->controllerType = in_type[0]; + pad->buttonStatus = ~in_keystate[0]; + if (in_type[0] == PSE_PAD_TYPE_ANALOGPAD) { + pad->leftJoyX = in_analog_left[0][0]; + pad->leftJoyY = in_analog_left[0][1]; + pad->rightJoyX = in_analog_right[0][0]; + pad->rightJoyY = in_analog_right[0][1]; } return 0; } static long CALLBACK PADreadPort2(PadDataS *pad) { - pad->controllerType = in_type2; - pad->buttonStatus = ~in_keystate >> 16; + pad->controllerType = in_type[1]; + pad->buttonStatus = ~in_keystate[0] >> 16; return 0; } diff --git a/frontend/plugin.h b/frontend/plugin.h index e7a564534..5e12f9024 100644 --- a/frontend/plugin.h +++ b/frontend/plugin.h @@ -1,3 +1,6 @@ +#ifndef __PLUGIN_H__ +#define __PLUGIN_H__ + #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #define PLUGIN_DL_BASE 0xfbad0000 @@ -12,3 +15,5 @@ enum builtint_plugins_e { void *plugin_link(enum builtint_plugins_e id, const char *sym); void plugin_call_rearmed_cbs(void); + +#endif /* __PLUGIN_H__ */ diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 588f13370..171296d25 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -36,11 +36,16 @@ #define HUD_HEIGHT 10 -int in_type1, in_type2; -int in_a1[2] = { 127, 127 }, in_a2[2] = { 127, 127 }; +int in_type[8]; +int multitap1; +int multitap2; +int in_analog_left[8][2] = {{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 }}; +int in_analog_right[8][2] = {{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 }}; int in_adev[2] = { -1, -1 }, in_adev_axis[2][2] = {{ 0, 1 }, { 0, 1 }}; int in_adev_is_nublike[2]; -int in_keystate, in_state_gun; +unsigned short in_keystate[8]; +int in_mouse[8][2]; +int in_state_gun; int in_enable_vibration; void *tsdev; void *pl_vout_buf; @@ -567,7 +572,7 @@ static void update_analog_nub_adjust(int *x_, int *y_) static void update_analogs(void) { - int *nubp[2] = { in_a1, in_a2 }; + int *nubp[2] = { in_analog_left[0], in_analog_right[0] }; int vals[2]; int i, a, v, ret; @@ -595,7 +600,6 @@ static void update_analogs(void) } } - //printf("%4d %4d %4d %4d\n", in_a1[0], in_a1[1], in_a2[0], in_a2[1]); } static void update_input(void) @@ -604,7 +608,7 @@ static void update_input(void) unsigned int emu_act; in_update(actions); - if (in_type1 == PSE_PAD_TYPE_ANALOGPAD) + if (in_type[0] == PSE_PAD_TYPE_ANALOGJOY || in_type[0] == PSE_PAD_TYPE_ANALOGPAD) update_analogs(); emu_act = actions[IN_BINDTYPE_EMU]; in_state_gun = (emu_act & SACTION_GUN_MASK) >> SACTION_GUN_TRIGGER; @@ -618,7 +622,7 @@ static void update_input(void) } emu_set_action(emu_act); - in_keystate = actions[IN_BINDTYPE_PLAYER12]; + in_keystate[0] = actions[IN_BINDTYPE_PLAYER12]; } #else /* MAEMO */ extern void update_input(void); diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index f55eb449b..3f8b5c413 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -1,3 +1,9 @@ +#ifndef __PLUGIN_LIB_H__ +#define __PLUGIN_LIB_H__ + +#define THREAD_RENDERING_OFF 0 +#define THREAD_RENDERING_SYNC 1 +#define THREAD_RENDERING_ASYNC 2 enum { DKEY_SELECT = 0, @@ -17,8 +23,15 @@ enum { DKEY_CROSS, DKEY_SQUARE, }; -extern int in_type1, in_type2; -extern int in_keystate, in_state_gun, in_a1[2], in_a2[2]; +extern int in_state_gun; +extern int in_type[8]; +extern int multitap1; +extern int multitap2; +extern int in_analog_left[8][2]; +extern int in_analog_right[8][2]; +extern unsigned short in_keystate[8]; +extern int in_mouse[8][2]; + extern int in_adev[2], in_adev_axis[2][2]; extern int in_adev_is_nublike[2]; extern int in_enable_vibration; @@ -112,3 +125,5 @@ extern void (*pl_plat_hud_print)(int x, int y, const char *str, int bpp); #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #endif + +#endif /* __PLUGIN_LIB_H__ */ diff --git a/include/config.h b/include/config.h index ce2f3ea18..340cc2f9d 100644 --- a/include/config.h +++ b/include/config.h @@ -1,2 +1,7 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + #define MAXPATHLEN 256 -#define PACKAGE_VERSION "1.9" +#define PCSX_VERSION "1.9" + +#endif /* __CONFIG_H__ */ diff --git a/include/pcnt.h b/include/pcnt.h index 9ddd50032..c956b411e 100644 --- a/include/pcnt.h +++ b/include/pcnt.h @@ -1,3 +1,5 @@ +#ifndef __PCNT_H__ +#define __PCNT_H__ enum pcounters { PCNT_ALL, @@ -130,3 +132,5 @@ void pcnt_gte_end(int op); #define pcnt_print(fps) #endif + +#endif /* __PCNT_H__ */ diff --git a/include/psemu_plugin_defs.h b/include/psemu_plugin_defs.h index fa626272a..b855eacc2 100644 --- a/include/psemu_plugin_defs.h +++ b/include/psemu_plugin_defs.h @@ -157,6 +157,8 @@ typedef struct +// No controller +#define PSE_PAD_TYPE_NONE 0 // MOUSE SCPH-1030 #define PSE_PAD_TYPE_MOUSE 1 // NEGCON - 16 button analog controller SLPH-00001 @@ -195,9 +197,15 @@ typedef struct typedef struct { - // controler type - fill it withe predefined values above + // controller type - fill it withe predefined values above unsigned char controllerType; + //0 : no multitap between psx and pad + //1 : multitap between psx and pad on port 1 + //2 : multitap between psx and pad on port 2 + int portMultitap; + int requestPadIndex; + // status of buttons - every controller fills this field unsigned short buttonStatus; @@ -211,8 +219,13 @@ typedef struct unsigned char Vib[2]; unsigned char VibF[2]; - + + //configuration mode Request 0x43 + int configMode; unsigned char reserved[87]; + + //Lightgun values + int absoluteX,absoluteY; } PadDataS; diff --git a/jni/Android.mk b/jni/Android.mk index da000f3ea..501b0671a 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -4,96 +4,221 @@ $(shell cd "$(LOCAL_PATH)" && ((git describe --always || echo) | sed -e 's/.*/#d $(shell cd "$(LOCAL_PATH)" && (diff -q ../frontend/revision.h_ ../frontend/revision.h > /dev/null 2>&1 || cp ../frontend/revision.h_ ../frontend/revision.h)) $(shell cd "$(LOCAL_PATH)" && (rm ../frontend/revision.h_)) -include $(CLEAR_VARS) - -APP_DIR := ../../src - -ifneq ($(TARGET_ARCH_ABI),armeabi-v7a) - NO_NEON_BUILD := 1 -else - NO_NEON_BUILD := $(NO_NEON) -endif - -ifeq ($(NO_NEON_BUILD)$(TARGET_ARCH_ABI),1armeabi-v7a) - LOCAL_MODULE := retro-noneon -else - LOCAL_MODULE := retro -endif +HAVE_CHD ?= 1 +USE_LIBRETRO_VFS ?= 0 + +ROOT_DIR := $(LOCAL_PATH)/.. +CORE_DIR := $(ROOT_DIR)/libpcsxcore +SPU_DIR := $(ROOT_DIR)/plugins/dfsound +GPU_DIR := $(ROOT_DIR)/plugins/gpulib +CDR_DIR := $(ROOT_DIR)/plugins/cdrcimg +INPUT_DIR := $(ROOT_DIR)/plugins/dfinput +FRONTEND_DIR := $(ROOT_DIR)/frontend +NEON_DIR := $(ROOT_DIR)/plugins/gpu_neon +UNAI_DIR := $(ROOT_DIR)/plugins/gpu_unai +PEOPS_DIR := $(ROOT_DIR)/plugins/dfxvideo +DYNAREC_DIR := $(ROOT_DIR)/libpcsxcore/new_dynarec +DEPS_DIR := $(ROOT_DIR)/deps +LIBRETRO_COMMON := $(ROOT_DIR)/libretro-common +EXTRA_INCLUDES := + +# core +SOURCES_C := $(CORE_DIR)/cdriso.c \ + $(CORE_DIR)/cdrom.c \ + $(CORE_DIR)/cheat.c \ + $(CORE_DIR)/database.c \ + $(CORE_DIR)/decode_xa.c \ + $(CORE_DIR)/mdec.c \ + $(CORE_DIR)/misc.c \ + $(CORE_DIR)/plugins.c \ + $(CORE_DIR)/ppf.c \ + $(CORE_DIR)/psxbios.c \ + $(CORE_DIR)/psxcommon.c \ + $(CORE_DIR)/psxcounters.c \ + $(CORE_DIR)/psxdma.c \ + $(CORE_DIR)/psxhle.c \ + $(CORE_DIR)/psxhw.c \ + $(CORE_DIR)/psxinterpreter.c \ + $(CORE_DIR)/psxmem.c \ + $(CORE_DIR)/r3000a.c \ + $(CORE_DIR)/sio.c \ + $(CORE_DIR)/spu.c \ + $(CORE_DIR)/gte.c \ + $(CORE_DIR)/gte_nf.c \ + $(CORE_DIR)/gte_divider.c -ifeq ($(TARGET_ARCH),arm) - LOCAL_ARM_MODE := arm - - LOCAL_CFLAGS += -DANDROID_ARM - - LOCAL_SRC_FILES += ../libpcsxcore/gte_arm.S - - # dynarec - LOCAL_SRC_FILES += ../libpcsxcore/new_dynarec/new_dynarec.c ../libpcsxcore/new_dynarec/linkage_arm.S ../libpcsxcore/new_dynarec/emu_if.c ../libpcsxcore/new_dynarec/pcsxmem.c - - # spu - LOCAL_SRC_FILES += ../plugins/dfsound/arm_utils.S +# spu +SOURCES_C += $(SPU_DIR)/dma.c \ + $(SPU_DIR)/freeze.c \ + $(SPU_DIR)/registers.c \ + $(SPU_DIR)/spu.c \ + $(SPU_DIR)/out.c \ + $(SPU_DIR)/nullsnd.c - # misc +# gpu +SOURCES_C += $(GPU_DIR)/gpu.c \ + $(GPU_DIR)/vout_pl.c - ifeq ($(NO_NEON_BUILD),1) - # gpu - LOCAL_CFLAGS += -DREARMED - LOCAL_SRC_FILES += ../plugins/gpu_unai/gpulib_if.cpp ../plugins/gpu_unai/gpu_arm.s - LOCAL_SRC_FILES += ../frontend/cspace_arm.S - else - LOCAL_ARM_NEON := true - LOCAL_CFLAGS += -DNEON_BUILD -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP - LOCAL_SRC_FILES += ../libpcsxcore/gte_neon.S ../frontend/cspace_neon.S +# cdrcimg +SOURCES_C += $(CDR_DIR)/cdrcimg.c - # gpu - LOCAL_SRC_FILES += ../plugins/gpu_neon/psx_gpu_if.c ../plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S - endif +# dfinput +SOURCES_C += $(INPUT_DIR)/main.c \ + $(INPUT_DIR)/pad.c \ + $(INPUT_DIR)/guncon.c + +# frontend +SOURCES_C += $(FRONTEND_DIR)/main.c \ + $(FRONTEND_DIR)/plugin.c \ + $(FRONTEND_DIR)/cspace.c \ + $(FRONTEND_DIR)/libretro.c + +# libchdr +SOURCES_C += \ + $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/Alloc.c \ + $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/Bra86.c \ + $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/BraIA64.c \ + $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/CpuArch.c \ + $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/Delta.c \ + $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/LzFind.c \ + $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/Lzma86Dec.c \ + $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/LzmaDec.c \ + $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/LzmaEnc.c \ + $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/Sort.c \ + $(DEPS_DIR)/libchdr/src/libchdr_bitstream.c \ + $(DEPS_DIR)/libchdr/src/libchdr_cdrom.c \ + $(DEPS_DIR)/libchdr/src/libchdr_chd.c \ + $(DEPS_DIR)/libchdr/src/libchdr_flac.c \ + $(DEPS_DIR)/libchdr/src/libchdr_huffman.c +SOURCES_ASM := + +COREFLAGS := -ffast-math -funroll-loops -DHAVE_LIBRETRO -DNO_FRONTEND -DFRONTEND_SUPPORTS_RGB565 -DANDROID -DREARMED +COREFLAGS += -DHAVE_CHD -D_7ZIP_ST + +ifeq ($(USE_LIBRETRO_VFS),1) +SOURCES_C += \ + $(LIBRETRO_COMMON)/compat/compat_posix_string.c \ + $(LIBRETRO_COMMON)/compat/fopen_utf8.c \ + $(LIBRETRO_COMMON)/encodings/compat_strl.c \ + $(LIBRETRO_COMMON)/encodings/encoding_utf.c \ + $(LIBRETRO_COMMON)/file/file_path.c \ + $(LIBRETRO_COMMON)/streams/file_stream.c \ + $(LIBRETRO_COMMON)/streams/file_stream_transforms.c \ + $(LIBRETRO_COMMON)/string/stdstring.c \ + $(LIBRETRO_COMMON)/time/rtime.c \ + $(LIBRETRO_COMMON)/vfs/vfs_implementation.c +COREFLAGS += -DUSE_LIBRETRO_VFS endif -ifeq ($(TARGET_ARCH),x86) - LOCAL_CFLAGS += -DANDROID_X86 +HAVE_ARI64=0 +HAVE_LIGHTREC=0 +LIGHTREC_CUSTOM_MAP=0 +HAVE_GPU_NEON=0 +ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) + HAVE_ARI64=1 + HAVE_GPU_NEON=1 +else ifeq ($(TARGET_ARCH_ABI),armeabi) + HAVE_ARI64=1 +else ifeq ($(TARGET_ARCH_ABI),arm64-v8a) + HAVE_ARI64=1 + HAVE_GPU_NEON=1 +else ifeq ($(TARGET_ARCH_ABI),x86_64) + HAVE_LIGHTREC=1 + HAVE_GPU_NEON=1 +else ifeq ($(TARGET_ARCH_ABI),x86) + HAVE_LIGHTREC=1 + HAVE_GPU_NEON=1 +else + COREFLAGS += -DDRC_DISABLE endif - -ifeq ($(TARGET_ARCH),mips) - LOCAL_CFLAGS += -DANDROID_MIPS -D__mips__ -D__MIPSEL__ + COREFLAGS += -DLIGHTREC_CUSTOM_MAP=$(LIGHTREC_CUSTOM_MAP) + +ifeq ($(HAVE_ARI64),1) + SOURCES_C += $(DYNAREC_DIR)/new_dynarec.c \ + $(DYNAREC_DIR)/pcsxmem.c + ifeq ($(TARGET_ARCH_ABI),arm64-v8a) + SOURCES_ASM += $(DYNAREC_DIR)/linkage_arm64.S + else + SOURCES_ASM += $(CORE_DIR)/gte_arm.S \ + $(SPU_DIR)/arm_utils.S \ + $(DYNAREC_DIR)/linkage_arm.S + endif +endif + SOURCES_C += $(DYNAREC_DIR)/emu_if.c + +ifeq ($(HAVE_LIGHTREC),1) + COREFLAGS += -DLIGHTREC -DLIGHTREC_STATIC + EXTRA_INCLUDES += $(DEPS_DIR)/lightning/include \ + $(DEPS_DIR)/lightrec \ + $(DEPS_DIR)/lightrec/tlsf \ + $(ROOT_DIR)/include/lightning \ + $(ROOT_DIR)/include/lightrec + SOURCES_C += $(DEPS_DIR)/lightrec/blockcache.c \ + $(DEPS_DIR)/lightrec/disassembler.c \ + $(DEPS_DIR)/lightrec/emitter.c \ + $(DEPS_DIR)/lightrec/interpreter.c \ + $(DEPS_DIR)/lightrec/lightrec.c \ + $(DEPS_DIR)/lightrec/memmanager.c \ + $(DEPS_DIR)/lightrec/optimizer.c \ + $(DEPS_DIR)/lightrec/regcache.c \ + $(DEPS_DIR)/lightrec/recompiler.c \ + $(DEPS_DIR)/lightrec/reaper.c + SOURCES_C += $(DEPS_DIR)/lightning/lib/jit_disasm.c \ + $(DEPS_DIR)/lightning/lib/jit_memory.c \ + $(DEPS_DIR)/lightning/lib/jit_names.c \ + $(DEPS_DIR)/lightning/lib/jit_note.c \ + $(DEPS_DIR)/lightning/lib/jit_print.c \ + $(DEPS_DIR)/lightning/lib/jit_size.c \ + $(DEPS_DIR)/lightning/lib/lightning.c + SOURCES_C += $(CORE_DIR)/lightrec/plugin.c $(DEPS_DIR)/lightrec/tlsf/tlsf.c +ifeq ($(LIGHTREC_CUSTOM_MAP),1) + SOURCES_C += $(CORE_DIR)/lightrec/mem.c endif - -ifneq ($(TARGET_ARCH),arm) - # gpu - LOCAL_CFLAGS += -DREARMED - LOCAL_SRC_FILES += ../plugins/gpu_unai/gpulib_if.cpp endif -LOCAL_SRC_FILES += ../libpcsxcore/cdriso.c ../libpcsxcore/cdrom.c ../libpcsxcore/cheat.c ../libpcsxcore/debug.c \ - ../libpcsxcore/decode_xa.c ../libpcsxcore/disr3000a.c ../libpcsxcore/mdec.c \ - ../libpcsxcore/misc.c ../libpcsxcore/plugins.c ../libpcsxcore/ppf.c ../libpcsxcore/psxbios.c \ - ../libpcsxcore/psxcommon.c ../libpcsxcore/psxcounters.c ../libpcsxcore/psxdma.c ../libpcsxcore/psxhle.c \ - ../libpcsxcore/psxhw.c ../libpcsxcore/psxinterpreter.c ../libpcsxcore/psxmem.c ../libpcsxcore/r3000a.c \ - ../libpcsxcore/sio.c ../libpcsxcore/socket.c ../libpcsxcore/spu.c -LOCAL_SRC_FILES += ../libpcsxcore/gte.c ../libpcsxcore/gte_nf.c ../libpcsxcore/gte_divider.c - -# spu -LOCAL_SRC_FILES += ../plugins/dfsound/dma.c ../plugins/dfsound/freeze.c \ - ../plugins/dfsound/registers.c ../plugins/dfsound/spu.c \ - ../plugins/dfsound/out.c ../plugins/dfsound/nullsnd.c - -# builtin gpu -LOCAL_SRC_FILES += ../plugins/gpulib/gpu.c ../plugins/gpulib/vout_pl.c - -# cdrcimg -LOCAL_SRC_FILES += ../plugins/cdrcimg/cdrcimg.c - -# dfinput -LOCAL_SRC_FILES += ../plugins/dfinput/main.c ../plugins/dfinput/pad.c ../plugins/dfinput/guncon.c -# misc -LOCAL_SRC_FILES += ../frontend/main.c ../frontend/plugin.c ../frontend/cspace.c +ifeq ($(HAVE_GPU_NEON),1) + COREFLAGS += -DNEON_BUILD -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP -DGPU_NEON + ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) + COREFLAGS += -DHAVE_bgr555_to_rgb565 -DHAVE_bgr888_to_x + SOURCES_ASM += $(CORE_DIR)/gte_neon.S \ + $(NEON_DIR)/psx_gpu/psx_gpu_arm_neon.S \ + $(FRONTEND_DIR)/cspace_neon.S + else + COREFLAGS += -DSIMD_BUILD + SOURCES_C += $(NEON_DIR)/psx_gpu/psx_gpu_simd.c + endif + SOURCES_C += $(NEON_DIR)/psx_gpu_if.c +else ifeq ($(TARGET_ARCH_ABI),armeabi) + COREFLAGS += -DUSE_GPULIB=1 -DGPU_UNAI + COREFLAGS += -DHAVE_bgr555_to_rgb565 + SOURCES_ASM += $(UNAI_DIR)/gpu_arm.S \ + $(FRONTEND_DIR)/cspace_arm.S + SOURCES_C += $(UNAI_DIR)/gpulib_if.cpp +else + COREFLAGS += -fno-strict-aliasing -DGPU_PEOPS + SOURCES_C += $(PEOPS_DIR)/gpulib_if.c +endif -# libretro -LOCAL_SRC_FILES += ../frontend/libretro.c +GIT_VERSION := " $(shell git rev-parse --short HEAD || echo unknown)" +ifneq ($(GIT_VERSION)," unknown") + COREFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" +endif -LOCAL_CFLAGS += -O3 -ffast-math -funroll-loops -DNDEBUG -D_FILE_OFFSET_BITS=64 -DHAVE_LIBRETRO -DNO_FRONTEND -DFRONTEND_SUPPORTS_RGB565 -LOCAL_C_INCLUDES += $(LOCAL_PATH)/../include -LOCAL_LDLIBS := -lz -llog +include $(CLEAR_VARS) +LOCAL_MODULE := retro +LOCAL_SRC_FILES := $(SOURCES_C) $(SOURCES_ASM) +LOCAL_CFLAGS := $(COREFLAGS) +LOCAL_C_INCLUDES := $(ROOT_DIR)/include +LOCAL_C_INCLUDES += $(DEPS_DIR)/crypto $(DEPS_DIR)/libchdr/deps/lzma-19.00/include $(DEPS_DIR)/libchdr/include $(DEPS_DIR)/libchdr/include/libchdr +LOCAL_C_INCLUDES += $(LIBRETRO_COMMON)/include +LOCAL_C_INCLUDES += $(EXTRA_INCLUDES) +LOCAL_LDFLAGS := -Wl,-version-script=$(FRONTEND_DIR)/link.T +LOCAL_LDLIBS := -lz -llog +LOCAL_ARM_MODE := arm + +ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) + LOCAL_ARM_NEON := true +endif include $(BUILD_SHARED_LIBRARY) diff --git a/jni/Application.mk b/jni/Application.mk index f05229c8a..a252a72d7 100644 --- a/jni/Application.mk +++ b/jni/Application.mk @@ -1 +1 @@ -APP_ABI := armeabi armeabi-v7a +APP_ABI := all diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 7715a2b7f..b3e238b66 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1518,7 +1518,7 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { switch (chcr & 0x71000000) { case 0x11000000: ptr = (u8 *)PSXM(madr); - if (ptr == NULL) { + if (ptr == INVALID_PTR) { CDR_LOG_I("psxDma3() Log: *** DMA 3 *** NULL Pointer!\n"); break; } diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 561aedeed..1ea8d43bf 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -30,7 +30,7 @@ cycle_multiplier_overrides[] = * changing memcard settings is enough to break/unbreak it */ { "SLPS02528", 190 }, { "SLPS02636", 190 }, -#ifdef DRC_DISABLE /* new_dynarec has a hack for this game */ +#if defined(DRC_DISABLE) || defined(LIGHTREC) /* new_dynarec has a hack for this game */ /* Parasite Eve II - internal timer checks */ { "SLUS01042", 125 }, { "SLUS01055", 125 }, diff --git a/libpcsxcore/debug.c b/libpcsxcore/debug.c index d7b2d21ec..7fac2e421 100644 --- a/libpcsxcore/debug.c +++ b/libpcsxcore/debug.c @@ -450,7 +450,7 @@ static void ProcessCommands() { sprintf(reply, "200 %s\r\n", arguments == NULL ? "OK" : arguments); break; case 0x101: - sprintf(reply, "201 %s\r\n", PACKAGE_VERSION); + sprintf(reply, "201 %s\r\n", PCSX_VERSION); break; case 0x102: sprintf(reply, "202 1.0\r\n"); diff --git a/libpcsxcore/disr3000a.c b/libpcsxcore/disr3000a.c index a350da1a1..5a79442df 100644 --- a/libpcsxcore/disr3000a.c +++ b/libpcsxcore/disr3000a.c @@ -78,17 +78,17 @@ typedef char* (*TdisR3000AF)(u32 code, u32 pc); #define _Branch_ (pc + 4 + ((short)_Im_ * 4)) #define _OfB_ _Im_, _nRs_ -#define dName(i) sprintf(ostr, "%s %-7s,", ostr, i) -#define dGPR(i) sprintf(ostr, "%s %8.8x (%s),", ostr, psxRegs.GPR.r[i], disRNameGPR[i]) -#define dCP0(i) sprintf(ostr, "%s %8.8x (%s),", ostr, psxRegs.CP0.r[i], disRNameCP0[i]) -#define dHI() sprintf(ostr, "%s %8.8x (%s),", ostr, psxRegs.GPR.n.hi, "hi") -#define dLO() sprintf(ostr, "%s %8.8x (%s),", ostr, psxRegs.GPR.n.lo, "lo") -#define dImm() sprintf(ostr, "%s %4.4x (%d),", ostr, _Im_, _Im_) -#define dTarget() sprintf(ostr, "%s %8.8x,", ostr, _Target_) -#define dSa() sprintf(ostr, "%s %2.2x (%d),", ostr, _Sa_, _Sa_) -#define dOfB() sprintf(ostr, "%s %4.4x (%8.8x (%s)),", ostr, _Im_, psxRegs.GPR.r[_Rs_], disRNameGPR[_Rs_]) -#define dOffset() sprintf(ostr, "%s %8.8x,", ostr, _Branch_) -#define dCode() sprintf(ostr, "%s %8.8x,", ostr, (code >> 6) & 0xffffff) +#define dName(i) snprintf(ostr, sizeof(ostr), "%s %-7s,", ostr, i) +#define dGPR(i) snprintf(ostr, sizeof(ostr), "%s %8.8x (%s),", ostr, psxRegs.GPR.r[i], disRNameGPR[i]) +#define dCP0(i) snprintf(ostr, sizeof(ostr), "%s %8.8x (%s),", ostr, psxRegs.CP0.r[i], disRNameCP0[i]) +#define dHI() snprintf(ostr, sizeof(ostr), "%s %8.8x (%s),", ostr, psxRegs.GPR.n.hi, "hi") +#define dLO() snprintf(ostr, sizeof(ostr), "%s %8.8x (%s),", ostr, psxRegs.GPR.n.lo, "lo") +#define dImm() snprintf(ostr, sizeof(ostr), "%s %4.4x (%d),", ostr, _Im_, _Im_) +#define dTarget() snprintf(ostr, sizeof(ostr), "%s %8.8x,", ostr, _Target_) +#define dSa() snprintf(ostr, sizeof(ostr), "%s %2.2x (%d),", ostr, _Sa_, _Sa_) +#define dOfB() snprintf(ostr, sizeof(ostr), "%s %4.4x (%8.8x (%s)),", ostr, _Im_, psxRegs.GPR.r[_Rs_], disRNameGPR[_Rs_]) +#define dOffset() snprintf(ostr, sizeof(ostr), "%s %8.8x,", ostr, _Branch_) +#define dCode() snprintf(ostr, sizeof(ostr), "%s %8.8x,", ostr, (code >> 6) & 0xffffff) /********************************************************* * Arithmetic with immediate operand * diff --git a/libpcsxcore/gpu.h b/libpcsxcore/gpu.h index 9dfe63475..c924b67b2 100644 --- a/libpcsxcore/gpu.h +++ b/libpcsxcore/gpu.h @@ -21,6 +21,9 @@ * that GPU plugin doesn't. */ +#ifndef __GPU_H__ +#define __GPU_H__ + #define PSXGPU_LCF (1<<31) #define PSXGPU_nBUSY (1<<26) #define PSXGPU_ILACE (1<<22) @@ -38,3 +41,5 @@ HW_GPU_STATUS &= SWAP32(PSXGPU_TIMING_BITS); \ HW_GPU_STATUS |= SWAP32(GPU_readStatus() & ~PSXGPU_TIMING_BITS); \ } + +#endif /* __GPU_H__ */ diff --git a/libpcsxcore/gte_arm.h b/libpcsxcore/gte_arm.h index 6b240db73..0288944ad 100644 --- a/libpcsxcore/gte_arm.h +++ b/libpcsxcore/gte_arm.h @@ -15,6 +15,9 @@ * along with this program; if not, see . */ +#ifndef __GTE_ARM_H__ +#define __GTE_ARM_H__ + void gteRTPS_nf_arm(void *cp2_regs, int opcode); void gteRTPT_nf_arm(void *cp2_regs, int opcode); void gteNCLIP_arm(void *cp2_regs, int opcode); @@ -28,3 +31,5 @@ void gteMACtoIR_lm0(void *cp2_regs); void gteMACtoIR_lm1(void *cp2_regs); void gteMACtoIR_lm0_nf(void *cp2_regs); void gteMACtoIR_lm1_nf(void *cp2_regs); + +#endif /* __GTE_ARM_H__ */ diff --git a/libpcsxcore/gte_divider.h b/libpcsxcore/gte_divider.h index 99b01eb3e..765b971d2 100644 --- a/libpcsxcore/gte_divider.h +++ b/libpcsxcore/gte_divider.h @@ -15,4 +15,9 @@ * along with this program; if not, see . */ +#ifndef __GTE_DIVIDER_H__ +#define __GTE_DIVIDER_H__ + u32 DIVIDE(u16 n, u16 d); + +#endif /* __GTE_DIVIDER_H__ */ diff --git a/libpcsxcore/gte_neon.h b/libpcsxcore/gte_neon.h index 2fd9e4d7f..f371640fc 100644 --- a/libpcsxcore/gte_neon.h +++ b/libpcsxcore/gte_neon.h @@ -15,6 +15,9 @@ * along with this program; if not, see . */ +#ifndef __GTE_NEON_H__ +#define __GTE_NEON_H__ + void gteRTPS_neon(void *cp2_regs, int opcode); void gteRTPT_neon(void *cp2_regs, int opcode); @@ -23,3 +26,5 @@ void gteMVMVA_part_neon(void *cp2_regs, int opcode); // after NEON call only, does not do gteIR void gteMACtoIR_flags_neon(void *cp2_regs, int lm); + +#endif /* __GTE_NEON_H__ */ diff --git a/libpcsxcore/lightrec/mem.h b/libpcsxcore/lightrec/mem.h new file mode 100644 index 000000000..98dbbdeb6 --- /dev/null +++ b/libpcsxcore/lightrec/mem.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2022 Paul Cercueil + */ + +#ifndef __LIGHTREC_MEM_H__ +#define __LIGHTREC_MEM_H__ + +#ifdef LIGHTREC + +#define CODE_BUFFER_SIZE (8 * 1024 * 1024) + +extern void *code_buffer; + +int lightrec_init_mmap(void); +void lightrec_free_mmap(void); + +#else /* if !LIGHTREC */ + +#define lightrec_init_mmap() -1 /* should not be called */ +#define lightrec_free_mmap() + +#undef LIGHTREC_CUSTOM_MAP +#define LIGHTREC_CUSTOM_MAP 0 + +#endif + +#endif /* __LIGHTREC_MEM_H__ */ diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index d83ee194c..8010d7a5b 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -69,7 +69,7 @@ static void mmssdd( char *b, char *p ) m = ((m / 10) << 4) | m % 10; s = ((s / 10) << 4) | s % 10; - d = ((d / 10) << 4) | d % 10; + d = ((d / 10) << 4) | d % 10; p[0] = m; p[1] = s; @@ -176,7 +176,7 @@ int LoadCdrom() { // is just below, do it here fake_bios_gpu_setup(); - if (!Config.HLE) { + if (!Config.HLE && !Config.SlowBoot) { // skip BIOS logos psxRegs.pc = psxRegs.GPR.n.ra; return 0; @@ -187,7 +187,7 @@ int LoadCdrom() { READTRACK(); // skip head and sub, and go to the root directory record - dir = (struct iso_directory_record*) &buf[12+156]; + dir = (struct iso_directory_record*) &buf[12+156]; mmssdd(dir->extent, (char*)time); @@ -232,7 +232,7 @@ int LoadCdrom() { psxRegs.pc = SWAP32(tmpHead.pc0); psxRegs.GPR.n.gp = SWAP32(tmpHead.gp0); - psxRegs.GPR.n.sp = SWAP32(tmpHead.s_addr); + psxRegs.GPR.n.sp = SWAP32(tmpHead.s_addr); if (psxRegs.GPR.n.sp == 0) psxRegs.GPR.n.sp = 0x801fff00; tmpHead.t_size = SWAP32(tmpHead.t_size); @@ -248,7 +248,7 @@ int LoadCdrom() { incTime(); READTRACK(); - if (ptr != NULL) memcpy(ptr, buf+12, 2048); + if (ptr != INVALID_PTR) memcpy(ptr, buf+12, 2048); tmpHead.t_size -= 2048; tmpHead.t_addr += 2048; @@ -272,7 +272,7 @@ int LoadCdromFile(const char *filename, EXE_HEADER *head) { READTRACK(); // skip head and sub, and go to the root directory record - dir = (struct iso_directory_record *)&buf[12 + 156]; + dir = (struct iso_directory_record *)&buf[12 + 156]; mmssdd(dir->extent, (char*)time); @@ -294,7 +294,7 @@ int LoadCdromFile(const char *filename, EXE_HEADER *head) { READTRACK(); mem = PSXM(addr); - if (mem) + if (mem != INVALID_PTR) memcpy(mem, buf + 12, 2048); size -= 2048; @@ -327,7 +327,7 @@ int CheckCdrom() { strncpy(CdromLabel, buf + 52, 32); // skip head and sub, and go to the root directory record - dir = (struct iso_directory_record *)&buf[12 + 156]; + dir = (struct iso_directory_record *)&buf[12 + 156]; mmssdd(dir->extent, (char *)time); @@ -355,6 +355,14 @@ int CheckCdrom() { return -1; } } + /* Workaround for Wild Arms EU/US which has non-standard string causing incorrect region detection */ + if (exename[0] == 'E' && exename[1] == 'X' && exename[2] == 'E' && exename[3] == '\\') { + size_t offset = 4; + size_t i, len = strlen(exename) - offset; + for (i = 0; i < len; i++) + exename[i] = exename[i + offset]; + exename[i] = '\0'; + } } else if (GetCdromFile(mdir, time, "PSX.EXE;1") != -1) { strcpy(exename, "PSX.EXE;1"); strcpy(CdromId, "SLUS99999"); @@ -409,7 +417,9 @@ static int PSXGetFileType(FILE *f) { current = ftell(f); fseek(f, 0L, SEEK_SET); - fread(mybuf, 2048, 1, f); + if (fread(&mybuf, 1, sizeof(mybuf), f) != sizeof(mybuf)) + goto io_fail; + fseek(f, current, SEEK_SET); exe_hdr = (EXE_HEADER *)mybuf; @@ -424,6 +434,12 @@ static int PSXGetFileType(FILE *f) { return COFF_EXE; return INVALID_EXE; + +io_fail: +#ifndef NDEBUG + SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); +#endif + return INVALID_EXE; } // temporary pandora workaround.. @@ -432,7 +448,7 @@ size_t fread_to_ram(void *ptr, size_t size, size_t nmemb, FILE *stream) { void *tmp; size_t ret = 0; - + tmp = malloc(size * nmemb); if (tmp) { ret = fread(tmp, size, nmemb, stream); @@ -451,8 +467,8 @@ int Load(const char *ExePath) { u32 section_address, section_size; void *mem; - strncpy(CdromId, "SLUS99999", 9); - strncpy(CdromLabel, "SLUS_999.99", 11); + strcpy(CdromId, "SLUS99999"); + strcpy(CdromLabel, "SLUS_999.99"); tmpFile = fopen(ExePath, "rb"); if (tmpFile == NULL) { @@ -462,19 +478,19 @@ int Load(const char *ExePath) { type = PSXGetFileType(tmpFile); switch (type) { case PSX_EXE: - fread(&tmpHead,sizeof(EXE_HEADER),1,tmpFile); + if (fread(&tmpHead, 1, sizeof(EXE_HEADER), tmpFile) != sizeof(EXE_HEADER)) + goto fail_io; section_address = SWAP32(tmpHead.t_addr); section_size = SWAP32(tmpHead.t_size); mem = PSXM(section_address); - if (mem != NULL) { - fseek(tmpFile, 0x800, SEEK_SET); + if (mem != INVALID_PTR) { + fseek(tmpFile, 0x800, SEEK_SET); fread_to_ram(mem, section_size, 1, tmpFile); psxCpu->Clear(section_address, section_size / 4); } - fclose(tmpFile); psxRegs.pc = SWAP32(tmpHead.pc0); psxRegs.GPR.n.gp = SWAP32(tmpHead.gp0); - psxRegs.GPR.n.sp = SWAP32(tmpHead.s_addr); + psxRegs.GPR.n.sp = SWAP32(tmpHead.s_addr); if (psxRegs.GPR.n.sp == 0) psxRegs.GPR.n.sp = 0x801fff00; retval = 0; @@ -482,25 +498,29 @@ int Load(const char *ExePath) { case CPE_EXE: fseek(tmpFile, 6, SEEK_SET); /* Something tells me we should go to 4 and read the "08 00" here... */ do { - fread(&opcode, 1, 1, tmpFile); + if (fread(&opcode, 1, sizeof(opcode), tmpFile) != sizeof(opcode)) + goto fail_io; switch (opcode) { case 1: /* Section loading */ - fread(§ion_address, 4, 1, tmpFile); - fread(§ion_size, 4, 1, tmpFile); + if (fread(§ion_address, 1, sizeof(section_address), tmpFile) != sizeof(section_address)) + goto fail_io; + if (fread(§ion_size, 1, sizeof(section_size), tmpFile) != sizeof(section_size)) + goto fail_io; section_address = SWAPu32(section_address); section_size = SWAPu32(section_size); #ifdef EMU_LOG EMU_LOG("Loading %08X bytes from %08X to %08X\n", section_size, ftell(tmpFile), section_address); #endif mem = PSXM(section_address); - if (mem != NULL) { + if (mem != INVALID_PTR) { fread_to_ram(mem, section_size, 1, tmpFile); psxCpu->Clear(section_address, section_size / 4); } break; case 3: /* register loading (PC only?) */ fseek(tmpFile, 2, SEEK_CUR); /* unknown field */ - fread(&psxRegs.pc, 4, 1, tmpFile); + if (fread(&psxRegs.pc, 1, sizeof(psxRegs.pc), tmpFile) != sizeof(psxRegs.pc)) + goto fail_io; psxRegs.pc = SWAPu32(psxRegs.pc); break; case 0: /* End of file */ @@ -529,7 +549,16 @@ int Load(const char *ExePath) { CdromLabel[0] = '\0'; } + if (tmpFile) + fclose(tmpFile); return retval; + +fail_io: +#ifndef NDEBUG + SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); +#endif + fclose(tmpFile); + return -1; } // STATES @@ -563,7 +592,7 @@ struct PcsxSaveFuncs SaveFuncs = { zlib_open, zlib_read, zlib_write, zlib_seek, zlib_close }; -static const char PcsxHeader[32] = "STv4 PCSX v" PACKAGE_VERSION; +static const char PcsxHeader[32] = "STv4 PCSX v" PCSX_VERSION; // Savestate Versioning! // If you make changes to the savestate version, please increment the value below. diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index c09e9eca6..dc17f2d45 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -190,7 +190,7 @@ void new_dyna_freeze(void *f, int mode) //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded"); } -#ifndef DRC_DISABLE +#if !defined(DRC_DISABLE) && !defined(LIGHTREC) /* GTE stuff */ void *gte_handlers[64]; diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index 0423310bc..c70ed67fc 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -702,7 +702,7 @@ void CALLBACK clearDynarec(void) { int LoadPlugins() { int ret; - char Plugin[MAXPATHLEN]; + char Plugin[MAXPATHLEN * 2]; ReleasePlugins(); SysLibError(); @@ -807,7 +807,7 @@ int ReloadCdromPlugin() if (UsingIso()) { LoadCDRplugin(NULL); } else { - char Plugin[MAXPATHLEN]; + char Plugin[MAXPATHLEN * 2]; sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Cdr); if (LoadCDRplugin(Plugin) == -1) return -1; } @@ -820,7 +820,7 @@ void SetIsoFile(const char *filename) { IsoFile[0] = '\0'; return; } - strncpy(IsoFile, filename, MAXPATHLEN); + strncpy(IsoFile, filename, MAXPATHLEN - 1); } const char *GetIsoFile(void) { diff --git a/libpcsxcore/ppf.c b/libpcsxcore/ppf.c index edebdd037..18c5413ed 100644 --- a/libpcsxcore/ppf.c +++ b/libpcsxcore/ppf.c @@ -183,7 +183,7 @@ void BuildPPFCache() { char method, undo = 0, blockcheck = 0; int dizlen, dizyn; unsigned char ppfmem[512]; - char szPPF[MAXPATHLEN]; + char szPPF[MAXPATHLEN * 2]; int count, seekpos, pos; u32 anz; // use 32-bit to avoid stupid overflows s32 ladr, off, anx; @@ -212,7 +212,8 @@ void BuildPPFCache() { if (ppffile == NULL) return; memset(buffer, 0, 5); - fread(buffer, 3, 1, ppffile); + if (fread(buffer, 1, 3, ppffile) != 3) + goto fail_io; if (strcmp(buffer, "PPF") != 0) { SysPrintf(_("Invalid PPF patch: %s.\n"), szPPF); @@ -235,12 +236,14 @@ void BuildPPFCache() { fseek(ppffile, -8, SEEK_END); memset(buffer, 0, 5); - fread(buffer, 4, 1, ppffile); + if (fread(buffer, 1, 4, ppffile) != 4) + goto fail_io; if (strcmp(".DIZ", buffer) != 0) { dizyn = 0; } else { - fread(&dizlen, 4, 1, ppffile); + if (fread(&dizlen, 1, 4, ppffile) != 4) + goto fail_io; dizlen = SWAP32(dizlen); dizyn = 1; } @@ -266,12 +269,15 @@ void BuildPPFCache() { fseek(ppffile, -6, SEEK_END); memset(buffer, 0, 5); - fread(buffer, 4, 1, ppffile); + if (fread(buffer, 1, 4, ppffile) != 4) + goto fail_io; dizlen = 0; if (strcmp(".DIZ", buffer) == 0) { fseek(ppffile, -2, SEEK_END); - fread(&dizlen, 2, 1, ppffile); + // TODO: Endian/size unsafe? + if (fread(&dizlen, 1, 2, ppffile) != 2) + goto fail_io; dizlen = SWAP32(dizlen); dizlen += 36; } @@ -298,13 +304,19 @@ void BuildPPFCache() { // now do the data reading do { fseek(ppffile, seekpos, SEEK_SET); - fread(&pos, 4, 1, ppffile); + if (fread(&pos, 1, sizeof(pos), ppffile) != sizeof(pos)) + goto fail_io; pos = SWAP32(pos); - if (method == 2) fread(buffer, 4, 1, ppffile); // skip 4 bytes on ppf3 (no int64 support here) + if (method == 2) { + // skip 4 bytes on ppf3 (no int64 support here) + if (fread(buffer, 1, 4, ppffile) != 4) + goto fail_io; + } anz = fgetc(ppffile); - fread(ppfmem, anz, 1, ppffile); + if (fread(ppfmem, 1, anz, ppffile) != anz) + goto fail_io; ladr = pos / CD_FRAMESIZE_RAW; off = pos % CD_FRAMESIZE_RAW; @@ -331,6 +343,12 @@ void BuildPPFCache() { FillPPFCache(); // build address array SysPrintf(_("Loaded PPF %d.0 patch: %s.\n"), method + 1, szPPF); + +fail_io: +#ifndef NDEBUG + SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); +#endif + fclose(ppffile); } // redump.org SBI files, slightly different handling from PCSX-Reloaded @@ -353,12 +371,15 @@ int LoadSBI(const char *fname, int sector_count) { } // 4-byte SBI header - fread(buffer, 1, 4, sbihandle); + if (fread(buffer, 1, 4, sbihandle) != 4) + goto fail_io; + while (1) { s = fread(sbitime, 1, 3, sbihandle); if (s != 3) - break; - fread(&t, 1, 1, sbihandle); + goto fail_io; + if (fread(&t, 1, sizeof(t), sbihandle) != sizeof(t)) + goto fail_io; switch (t) { default: case 1: @@ -379,8 +400,14 @@ int LoadSBI(const char *fname, int sector_count) { } fclose(sbihandle); - return 0; + +fail_io: +#ifndef NDEBUG + SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); +#endif + fclose(sbihandle); + return -1; } void UnloadSBI(void) { diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 8e993c3f4..203f85d15 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -18,7 +18,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * ***************************************************************************/ -/* Gameblabla 2018-2019 : +/* Gameblabla 2018-2019 : * Numerous changes to bios calls as well as improvements in order to conform to nocash's findings * for the PSX bios calls. Thanks senquack for helping out with some of the changes * and helping to spot issues and refine my patches. @@ -33,6 +33,7 @@ #include "psxbios.h" #include "psxhw.h" #include "gpu.h" +#include "sio.h" #include #if (defined(__GNUC__) && __GNUC__ >= 5) || defined(__clang__) @@ -221,7 +222,7 @@ typedef struct { u32 func; } TCB; -typedef struct { +typedef struct { u32 _pc0; u32 gp0; u32 t_addr; @@ -259,7 +260,7 @@ static int pad_buf1len, pad_buf2len; static int pad_stopped = 0; static u32 regs[35]; -static EvCB *Event; +static EvCB *EventCB; static EvCB *HwEV; // 0xf0 static EvCB *EvEV; // 0xf1 static EvCB *RcEV; // 0xf2 @@ -271,10 +272,10 @@ static u32 *heap_addr = NULL; static u32 *heap_end = NULL; static u32 SysIntRP[8]; static int CardState = -1; -static TCB Thread[8]; +static TCB ThreadCB[8]; static int CurThread = 0; static FileDesc FDesc[32]; -static u32 card_active_chan; +static u32 card_active_chan = 0; boolean hleSoftCall = FALSE; @@ -303,12 +304,12 @@ static inline void softCall2(u32 pc) { } static inline void DeliverEvent(u32 ev, u32 spec) { - if (Event[ev][spec].status != EvStACTIVE) return; + if (EventCB[ev][spec].status != EvStACTIVE) return; -// Event[ev][spec].status = EvStALREADY; - if (Event[ev][spec].mode == EvMdINTR) { - softCall2(Event[ev][spec].fhandler); - } else Event[ev][spec].status = EvStALREADY; +// EventCB[ev][spec].status = EvStALREADY; + if (EventCB[ev][spec].mode == EvMdINTR) { + softCall2(EventCB[ev][spec].fhandler); + } else EventCB[ev][spec].status = EvStALREADY; } static unsigned interrupt_r26=0x8004E8B0; @@ -372,7 +373,7 @@ void psxBios_getc(void) // 0x03, 0x35 #endif v0 = -1; - if (pa1) { + if (pa1 != INVALID_PTR) { switch (a0) { case 2: buread(pa1, 1, 1); break; case 3: buread(pa1, 2, 1); break; @@ -391,7 +392,7 @@ void psxBios_putc(void) // 0x09, 0x3B PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x09]); #endif v0 = -1; - if (!pa1) { + if (pa1 == INVALID_PTR) { pc0 = ra; return; } @@ -700,7 +701,7 @@ void psxBios_index() { // 0x1c pc0 = ra; return; } - + do { if (*p == a1) { v0 = a0 + (p - (char *)Ra0); @@ -929,7 +930,7 @@ void psxBios_memcmp() { // 0x2d void psxBios_memchr() { // 0x2e char *p = (char *)Ra0; - + if (a0 == 0 || a2 > 0x7FFFFFFF) { pc0 = ra; @@ -1260,7 +1261,7 @@ void psxBios_printf() { // 0x3f void *psp; psp = PSXM(sp); - if (psp) { + if (psp != INVALID_PTR) { memcpy(save, psp, 4 * 4); psxMu32ref(sp) = SWAP32((u32)a0); psxMu32ref(sp + 4) = SWAP32((u32)a1); @@ -1429,7 +1430,7 @@ void psxBios_GPU_dw() { // 0x46 } pc0 = ra; -} +} void psxBios_mem2vram() { // 0x47 int size; @@ -1457,8 +1458,8 @@ void psxBios_SendGPU() { // 0x48 void psxBios_GPU_cw() { // 0x49 gpuSyncPluginSR(); GPU_writeData(a0); - pc0 = ra; v0 = HW_GPU_STATUS; + pc0 = ra; } void psxBios_GPU_cwb() { // 0x4a @@ -1509,7 +1510,7 @@ void psxBios_LoadExec() { // 51 #endif s_addr = a1; s_size = a2; - a1 = 0xf000; + a1 = 0xf000; psxBios_Load(); header->S_addr = s_addr; @@ -1564,7 +1565,7 @@ void psxBios_SetMem() { // 9f psxHu32ref(0x1060) = SWAP32(new | 0x300); psxMu32ref(0x060) = a0; SysPrintf("Change effective memory : %d MBytes\n",a0); - + default: SysPrintf("Effective memory must be 2/8 MBytes\n"); break; @@ -1584,16 +1585,16 @@ void psxBios__card_info() { // ab #ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s: %x\n", biosA0n[0xab], a0); #endif - u32 ret; + u32 ret, port; card_active_chan = a0; - - switch (card_active_chan) - { - case 0x00: case 0x01: case 0x02: case 0x03: - ret = Config.Mcd1[0] ? 0x2 : 0x8; - break; - case 0x10: case 0x11: case 0x12: case 0x13: - ret = Config.Mcd2[0] ? 0x2 : 0x8; + port = card_active_chan >> 4; + + switch (port) { + case 0x0: + case 0x1: + ret = 0x2; + if (McdDisable[port & 1]) + ret = 0x8; break; default: #ifdef PSXBIOS_LOG @@ -1602,8 +1603,12 @@ void psxBios__card_info() { // ab ret = 0x11; break; } - - DeliverEvent(0x11, 0x2); // 0xf4000001, 0x0004 + + if (McdDisable[0] && McdDisable[1]) + ret = 0x8; + + DeliverEvent(0x11, 0x2); // 0xf0000011, 0x0004 +// DeliverEvent(0x81, 0x2); // 0xf4000001, 0x0004 DeliverEvent(0x81, ret); // 0xf4000001, 0x0004 v0 = 1; pc0 = ra; } @@ -1692,14 +1697,14 @@ void psxBios_ResetRCnt() { // 06 } -/* gets ev for use with Event */ +/* gets ev for use with EventCB */ #define GetEv() \ ev = (a0 >> 24) & 0xf; \ if (ev == 0xf) ev = 0x5; \ ev*= 32; \ ev+= a0&0x1f; -/* gets spec for use with Event */ +/* gets spec for use with EventCB */ #define GetSpec() \ spec = 0; \ switch (a1) { \ @@ -1737,9 +1742,9 @@ void psxBios_OpenEvent() { // 08 PSXBIOS_LOG("psxBios_%s %x,%x (class:%x, spec:%x, mode:%x, func:%x)\n", biosB0n[0x08], ev, spec, a0, a1, a2, a3); #endif - Event[ev][spec].status = EvStWAIT; - Event[ev][spec].mode = a2; - Event[ev][spec].fhandler = a3; + EventCB[ev][spec].status = EvStWAIT; + EventCB[ev][spec].mode = a2; + EventCB[ev][spec].fhandler = a3; v0 = ev | (spec << 8); pc0 = ra; @@ -1755,7 +1760,7 @@ void psxBios_CloseEvent() { // 09 PSXBIOS_LOG("psxBios_%s %x,%x\n", biosB0n[0x09], ev, spec); #endif - Event[ev][spec].status = EvStUNUSED; + EventCB[ev][spec].status = EvStUNUSED; v0 = 1; pc0 = ra; } @@ -1768,17 +1773,17 @@ void psxBios_WaitEvent() { // 0a #ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s %x,%x\n", biosB0n[0x0a], ev, spec); #endif - if (Event[ev][spec].status == EvStUNUSED) + if (EventCB[ev][spec].status == EvStUNUSED) { v0 = 0; - pc0 = ra; + pc0 = ra; return; } - if (Event[ev][spec].status == EvStALREADY) + if (EventCB[ev][spec].status == EvStALREADY) { /* Callback events (mode=EvMdINTR) do never set the ready flag (and thus WaitEvent would hang forever). */ - if (!(Event[ev][spec].mode == EvMdINTR)) Event[ev][spec].status = EvStACTIVE; + if (!(EventCB[ev][spec].mode == EvMdINTR)) EventCB[ev][spec].status = EvStACTIVE; v0 = 1; pc0 = ra; return; @@ -1794,12 +1799,12 @@ void psxBios_TestEvent() { // 0b ev = a0 & 0xff; spec = (a0 >> 8) & 0xff; - if (Event[ev][spec].status == EvStALREADY) + if (EventCB[ev][spec].status == EvStALREADY) { - if (!(Event[ev][spec].mode == EvMdINTR)) Event[ev][spec].status = EvStACTIVE; + if (!(EventCB[ev][spec].mode == EvMdINTR)) EventCB[ev][spec].status = EvStACTIVE; v0 = 1; - } - else + } + else { v0 = 0; } @@ -1821,7 +1826,7 @@ void psxBios_EnableEvent() { // 0c PSXBIOS_LOG("psxBios_%s %x,%x\n", biosB0n[0x0c], ev, spec); #endif - Event[ev][spec].status = EvStACTIVE; + EventCB[ev][spec].status = EvStACTIVE; v0 = 1; pc0 = ra; } @@ -1836,7 +1841,7 @@ void psxBios_DisableEvent() { // 0d PSXBIOS_LOG("psxBios_%s %x,%x\n", biosB0n[0x0d], ev, spec); #endif - Event[ev][spec].status = EvStWAIT; + EventCB[ev][spec].status = EvStWAIT; v0 = 1; pc0 = ra; } @@ -1850,7 +1855,7 @@ void psxBios_OpenTh() { // 0e for (th=1; th<8; th++) { - if (Thread[th].status == 0) break; + if (ThreadCB[th].status == 0) break; } if (th == 8) { @@ -1867,10 +1872,10 @@ void psxBios_OpenTh() { // 0e PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x0e], th); #endif - Thread[th].status = 1; - Thread[th].func = a0; - Thread[th].reg[29] = a1; - Thread[th].reg[28] = a2; + ThreadCB[th].status = 1; + ThreadCB[th].func = a0; + ThreadCB[th].reg[29] = a1; + ThreadCB[th].reg[28] = a2; v0 = th; pc0 = ra; } @@ -1887,8 +1892,8 @@ void psxBios_CloseTh() { // 0f #endif /* The return value is always 1 (even if the handle was already closed). */ v0 = 1; - if (Thread[th].status != 0) { - Thread[th].status = 0; + if (ThreadCB[th].status != 0) { + ThreadCB[th].status = 0; } pc0 = ra; @@ -1906,18 +1911,18 @@ void psxBios_ChangeTh() { // 10 #endif /* The return value is always 1. */ v0 = 1; - if (Thread[th].status == 0 || CurThread == th) { + if (ThreadCB[th].status == 0 || CurThread == th) { pc0 = ra; } else { - if (Thread[CurThread].status == 2) { - Thread[CurThread].status = 1; - Thread[CurThread].func = ra; - memcpy(Thread[CurThread].reg, psxRegs.GPR.r, 32*4); + if (ThreadCB[CurThread].status == 2) { + ThreadCB[CurThread].status = 1; + ThreadCB[CurThread].func = ra; + memcpy(ThreadCB[CurThread].reg, psxRegs.GPR.r, 32*4); } - memcpy(psxRegs.GPR.r, Thread[th].reg, 32*4); - pc0 = Thread[th].func; - Thread[th].status = 2; + memcpy(psxRegs.GPR.r, ThreadCB[th].reg, 32*4); + pc0 = ThreadCB[th].func; + ThreadCB[th].status = 2; CurThread = th; } } @@ -2021,26 +2026,27 @@ void psxBios_UnDeliverEvent() { // 0x20 PSXBIOS_LOG("psxBios_%s %x,%x\n", biosB0n[0x20], ev, spec); #endif - if (Event[ev][spec].status == EvStALREADY && - Event[ev][spec].mode == EvMdNOINTR) - Event[ev][spec].status = EvStACTIVE; + if (EventCB[ev][spec].status == EvStALREADY && + EventCB[ev][spec].mode == EvMdNOINTR) + EventCB[ev][spec].status = EvStACTIVE; pc0 = ra; } char ffile[64], *pfile; int nfile; + static void buopen(int mcd, char *ptr, char *cfg) { int i; - char *fptr = ptr; + char *mcd_data = ptr; strcpy(FDesc[1 + mcd].name, Ra0+5); FDesc[1 + mcd].offset = 0; FDesc[1 + mcd].mode = a1; for (i=1; i<16; i++) { - fptr += 128; + const char *fptr = mcd_data + 128 * i; if ((*fptr & 0xF0) != 0x50) continue; if (strcmp(FDesc[1 + mcd].name, fptr+0xa)) continue; FDesc[1 + mcd].mcfile = i; @@ -2049,12 +2055,11 @@ static void buopen(int mcd, char *ptr, char *cfg) break; } if (a1 & 0x200 && v0 == -1) { /* FCREAT */ - fptr = ptr; for (i=1; i<16; i++) { int j, xor, nblk = a1 >> 16; - u8 *pptr, *fptr2; + char *pptr, *fptr2; + char *fptr = mcd_data + 128 * i; - fptr += 128; if ((*fptr & 0xF0) != 0xa0) continue; FDesc[1 + mcd].mcfile = i; @@ -2064,12 +2069,12 @@ static void buopen(int mcd, char *ptr, char *cfg) fptr[6] = 0x00; fptr[7] = 0x00; strcpy(fptr+0xa, FDesc[1 + mcd].name); - pptr = fptr2 = (u8 *)fptr; + pptr = fptr2 = fptr; for(j=2; j<=nblk; j++) { int k; for(i++; i<16; i++) { fptr2 += 128; - + memset(fptr2, 0, 128); fptr2[0] = j < nblk ? 0x52 : 0x53; pptr[8] = i - 1; @@ -2107,7 +2112,7 @@ void psxBios_open() { // 0x32 v0 = -1; - if (pa0) { + if (pa0 != INVALID_PTR) { if (!strncmp(pa0, "bu00", 4)) { buopen(1, Mcd1Data, Config.Mcd1); } @@ -2161,13 +2166,13 @@ void psxBios_read() { // 0x34 v0 = -1; - if (pa1) { + if (pa1 != INVALID_PTR) { switch (a0) { case 2: buread(pa1, 1, a2); break; case 3: buread(pa1, 2, a2); break; } } - + pc0 = ra; } @@ -2184,7 +2189,7 @@ void psxBios_write() { // 0x35/0x03 #endif v0 = -1; - if (!pa1) { + if (pa1 == INVALID_PTR) { pc0 = ra; return; } @@ -2233,7 +2238,7 @@ void psxBios_puts() { // 3e/3f /* To avoid any issues with different behaviour when using the libc's own strlen instead. * We want to mimic the PSX's behaviour in this case for bufile. */ -static size_t strlen_internal(char* p) +static size_t strlen_internal(char* p) { size_t size_of_array = 0; while (*p++) size_of_array++; @@ -2274,7 +2279,7 @@ static size_t strlen_internal(char* p) /* * struct DIRENTRY* firstfile(char *name,struct DIRENTRY *dir); */ - + void psxBios_firstfile() { // 42 struct DIRENTRY *dir = (struct DIRENTRY *)Ra1; void *pa0 = Ra0; @@ -2288,7 +2293,7 @@ void psxBios_firstfile() { // 42 v0 = 0; - if (pa0) { + if (pa0 != INVALID_PTR) { strcpy(ffile, pa0); pfile = ffile+5; nfile = 0; @@ -2366,7 +2371,7 @@ void psxBios_rename() { // 44 v0 = 0; - if (pa0 && pa1) { + if (pa0 != INVALID_PTR && pa1 != INVALID_PTR) { if (!strncmp(pa0, "bu00", 4) && !strncmp(pa1, "bu00", 4)) { burename(1); } @@ -2408,7 +2413,7 @@ void psxBios_delete() { // 45 v0 = 0; - if (pa0) { + if (pa0 != INVALID_PTR) { if (!strncmp(pa0, "bu00", 4)) { budelete(1); } @@ -2471,7 +2476,7 @@ void psxBios__card_write() { // 0x4e card_active_chan = a0; port = a0 >> 4; - if (pa2) { + if (pa2 != INVALID_PTR) { if (port == 0) { memcpy(Mcd1Data + a1 * 128, pa2, 128); SaveMcd(Config.Mcd1, Mcd1Data, a1 * 128, 128); @@ -2507,7 +2512,7 @@ void psxBios__card_read() { // 0x4f card_active_chan = a0; port = a0 >> 4; - if (pa2) { + if (pa2 != INVALID_PTR) { if (port == 0) { memcpy(pa2, Mcd1Data + a1 * 128, 128); } else { @@ -2531,7 +2536,7 @@ void psxBios__new_card() { // 0x50 /* According to a user, this allows Final Fantasy Tactics to save/load properly */ void psxBios__get_error(void) // 55 -{ +{ v0 = 0; pc0 = ra; } @@ -2607,7 +2612,7 @@ void psxBios__card_chan() { // 0x58 void psxBios_ChangeClearPad() { // 5b #ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x5b], a0); -#endif +#endif pc0 = ra; } @@ -2675,11 +2680,11 @@ void psxBios_ChangeClearRCnt() { // 0a pc0 = ra; } -void psxBios_dummy() { +void psxBios_dummy() { #ifdef PSXBIOS_LOG PSXBIOS_LOG("unk %x call: %x\n", pc0 & 0x1fffff, t1); #endif - pc0 = ra; + pc0 = ra; } void (*biosA0[256])(); @@ -2690,7 +2695,7 @@ void (*biosC0[256])(); void psxBiosInit() { u32 base, size; - u32 *ptr; + u32 *ptr; int i; uLongf len; @@ -2773,7 +2778,7 @@ void psxBiosInit() { biosA0[0x39] = psxBios_InitHeap; //biosA0[0x3a] = psxBios__exit; biosA0[0x3b] = psxBios_getchar; - biosA0[0x3c] = psxBios_putchar; + biosA0[0x3c] = psxBios_putchar; //biosA0[0x3d] = psxBios_gets; //biosA0[0x40] = psxBios_sys_a0_40; //biosA0[0x41] = psxBios_LoadTest; @@ -2789,7 +2794,7 @@ void psxBiosInit() { biosA0[0x4b] = psxBios_GPU_SendPackets; biosA0[0x4c] = psxBios_sys_a0_4c; biosA0[0x4d] = psxBios_GPU_GetGPUStatus; - //biosA0[0x4e] = psxBios_GPU_sync; + //biosA0[0x4e] = psxBios_GPU_sync; //biosA0[0x4f] = psxBios_sys_a0_4f; //biosA0[0x50] = psxBios_sys_a0_50; biosA0[0x51] = psxBios_LoadExec; @@ -2841,10 +2846,10 @@ void psxBiosInit() { //biosA0[0x7f] = psxBios_sys_a0_7f; //biosA0[0x80] = psxBios_sys_a0_80; //biosA0[0x81] = psxBios_sys_a0_81; - //biosA0[0x82] = psxBios_sys_a0_82; + //biosA0[0x82] = psxBios_sys_a0_82; //biosA0[0x83] = psxBios_sys_a0_83; //biosA0[0x84] = psxBios_sys_a0_84; - //biosA0[0x85] = psxBios__96_CdStop; + //biosA0[0x85] = psxBios__96_CdStop; //biosA0[0x86] = psxBios_sys_a0_86; //biosA0[0x87] = psxBios_sys_a0_87; //biosA0[0x88] = psxBios_sys_a0_88; @@ -2996,7 +3001,7 @@ void psxBiosInit() { //biosC0[0x07] = psxBios_InstallExeptionHandler; //biosC0[0x08] = psxBios_SysInitMemory; //biosC0[0x09] = psxBios_SysInitKMem; - biosC0[0x0a] = psxBios_ChangeClearRCnt; + biosC0[0x0a] = psxBios_ChangeClearRCnt; //biosC0[0x0b] = psxBios_SystemError; //biosC0[0x0c] = psxBios_InitDefInt; //biosC0[0x0d] = psxBios_sys_c0_0d; @@ -3019,14 +3024,14 @@ void psxBiosInit() { /**/ base = 0x1000; size = sizeof(EvCB) * 32; - Event = (void *)&psxR[base]; base += size * 6; - memset(Event, 0, size * 6); - HwEV = Event; - EvEV = Event + 32; - RcEV = Event + 32 * 2; - UeEV = Event + 32 * 3; - SwEV = Event + 32 * 4; - ThEV = Event + 32 * 5; + EventCB = (void *)&psxR[base]; base += size * 6; + memset(EventCB, 0, size * 6); + HwEV = EventCB; + EvEV = EventCB + 32; + RcEV = EventCB + 32 * 2; + UeEV = EventCB + 32 * 3; + SwEV = EventCB + 32 * 4; + ThEV = EventCB + 32 * 5; ptr = (u32 *)&psxM[0x0874]; // b0 table ptr[0] = SWAPu32(0x4c54 - 0x884); @@ -3035,8 +3040,8 @@ void psxBiosInit() { ptr[6] = SWAPu32(0xc80); memset(SysIntRP, 0, sizeof(SysIntRP)); - memset(Thread, 0, sizeof(Thread)); - Thread[0].status = 2; // main thread + memset(ThreadCB, 0, sizeof(ThreadCB)); + ThreadCB[0].status = 2; // main thread pad_stopped = 1; jmp_int = NULL; @@ -3064,7 +3069,7 @@ void psxBiosInit() { */ // opcode HLE psxRu32ref(0x0000) = SWAPu32((0x3b << 26) | 4); - /* Whatever this does, it actually breaks CTR, even without the uninitiliazed memory patch. + /* Whatever this does, it actually breaks CTR, even without the uninitiliazed memory patch. Normally games shouldn't read from address 0 yet they do. See explanation below in details. */ //psxMu32ref(0x0000) = SWAPu32((0x3b << 26) | 0); psxMu32ref(0x00a0) = SWAPu32((0x3b << 26) | 1); @@ -3092,14 +3097,14 @@ void psxBiosInit() { psxHu32ref(0x1060) = SWAPu32(0x00000b88); hleSoftCall = FALSE; - + /* Some games like R-Types, CTR, Fade to Black read from adress 0x00000000 due to uninitialized pointers. See Garbage Area at Address 00000000h in Nocash PSX Specfications for more information. Here are some examples of games not working with this fix in place : R-type won't get past the Irem logo if not implemented. Crash Team Racing will softlock after the Sony logo. */ - + psxMu32ref(0x0000) = SWAPu32(0x00000003); /* But overwritten by 00000003h after soon. @@ -3280,7 +3285,7 @@ void psxBiosException() { break; case 2: // ExitCritical - enable irq's - psxRegs.CP0.n.Status |= 0x404; + psxRegs.CP0.n.Status |= 0x404; break; /* Normally this should cover SYS(00h, SYS(04h but they don't do anything relevant so... */ default: @@ -3339,7 +3344,7 @@ void psxBiosFreeze(int Mode) { bfreezes(regs); bfreezes(SysIntRP); bfreezel(&CardState); - bfreezes(Thread); + bfreezes(ThreadCB); bfreezel(&CurThread); bfreezes(FDesc); bfreezel(&card_active_chan); diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index 382d91949..522abbc8a 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -51,7 +51,9 @@ extern "C" { #include #include #include +#ifndef __SWITCH__ #include +#endif #include // Define types @@ -130,8 +132,10 @@ typedef struct { boolean Mdec; boolean PsxAuto; boolean Cdda; + boolean AsyncCD; boolean CHD_Precache; /* loads disk image into memory, works with CHD only. */ boolean HLE; + boolean SlowBoot; boolean Debug; boolean PsxOut; boolean UseNet; diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index 32a18475e..e19b78195 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -376,7 +376,7 @@ void psxRcntUpdate() psxRcntSet(); -#ifndef NDEBUG +#if 0 //ndef NDEBUG DebugVSync(); #endif } diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index 70e12f6e1..d7c4caece 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -45,7 +45,7 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU PSXDMA_LOG("*** DMA4 SPU - mem2spu *** %x addr = %x size = %x\n", chcr, madr, bcr); #endif ptr = (u16 *)PSXM(madr); - if (ptr == NULL) { + if (ptr == INVALID_PTR) { #ifdef CPU_LOG CPU_LOG("*** DMA4 SPU - mem2spu *** NULL Pointer!!!\n"); #endif @@ -62,7 +62,7 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU PSXDMA_LOG("*** DMA4 SPU - spu2mem *** %x addr = %x size = %x\n", chcr, madr, bcr); #endif ptr = (u16 *)PSXM(madr); - if (ptr == NULL) { + if (ptr == INVALID_PTR) { #ifdef CPU_LOG CPU_LOG("*** DMA4 SPU - spu2mem *** NULL Pointer!!!\n"); #endif @@ -137,7 +137,7 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU PSXDMA_LOG("*** DMA2 GPU - vram2mem *** %lx addr = %lx size = %lx\n", chcr, madr, bcr); #endif ptr = (u32 *)PSXM(madr); - if (ptr == NULL) { + if (ptr == INVALID_PTR) { #ifdef CPU_LOG CPU_LOG("*** DMA2 GPU - vram2mem *** NULL Pointer!!!\n"); #endif @@ -159,7 +159,7 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU PSXDMA_LOG("*** DMA 2 - GPU mem2vram *** %lx addr = %lx size = %lx\n", chcr, madr, bcr); #endif ptr = (u32 *)PSXM(madr); - if (ptr == NULL) { + if (ptr == INVALID_PTR) { #ifdef CPU_LOG CPU_LOG("*** DMA2 GPU - mem2vram *** NULL Pointer!!!\n"); #endif @@ -225,7 +225,7 @@ void psxDma6(u32 madr, u32 bcr, u32 chcr) { #endif if (chcr == 0x11000002) { - if (mem == NULL) { + if (mem == INVALID_PTR) { #ifdef CPU_LOG CPU_LOG("*** DMA6 OT *** NULL Pointer!!!\n"); #endif diff --git a/libpcsxcore/psxhle.c b/libpcsxcore/psxhle.c index 064d40115..7ca81b477 100644 --- a/libpcsxcore/psxhle.c +++ b/libpcsxcore/psxhle.c @@ -23,6 +23,12 @@ #include "psxhle.h" +#if 0 +#define PSXHLE_LOG SysPrintf +#else +#define PSXHLE_LOG(...) +#endif + static void hleDummy() { psxRegs.pc = psxRegs.GPR.n.ra; @@ -54,10 +60,10 @@ static void hleC0() { } static void hleBootstrap() { // 0xbfc00000 - SysPrintf("hleBootstrap\n"); + PSXHLE_LOG("hleBootstrap\n"); CheckCdrom(); LoadCdrom(); - SysPrintf("CdromLabel: \"%s\": PC = %8.8lx (SP = %8.8lx)\n", CdromLabel, psxRegs.pc, psxRegs.GPR.n.sp); + PSXHLE_LOG("CdromLabel: \"%s\": PC = %8.8lx (SP = %8.8lx)\n", CdromLabel, psxRegs.pc, psxRegs.GPR.n.sp); } typedef struct { @@ -77,7 +83,7 @@ typedef struct { static void hleExecRet() { EXEC *header = (EXEC*)PSXM(psxRegs.GPR.n.s0); - SysPrintf("ExecRet %x: %x\n", psxRegs.GPR.n.s0, header->ret); + PSXHLE_LOG("ExecRet %x: %x\n", psxRegs.GPR.n.s0, header->ret); psxRegs.GPR.n.ra = header->ret; psxRegs.GPR.n.sp = header->_sp; @@ -89,7 +95,7 @@ static void hleExecRet() { psxRegs.pc = psxRegs.GPR.n.ra; } -void (* const psxHLEt[])() = { +void (* const psxHLEt[8])() = { hleDummy, hleA0, hleB0, hleC0, hleBootstrap, hleExecRet, hleDummy, hleDummy diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 4ae9417a8..3d0836459 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -1101,7 +1101,7 @@ static void intExecute() { execI_(memRLUT, regs_); } -static void intExecuteBlock() { +void intExecuteBlock() { psxRegisters *regs_ = &psxRegs; u8 **memRLUT = psxMemRLUT; diff --git a/libpcsxcore/psxinterpreter.h b/libpcsxcore/psxinterpreter.h index f8581b825..b3652c0e1 100644 --- a/libpcsxcore/psxinterpreter.h +++ b/libpcsxcore/psxinterpreter.h @@ -8,4 +8,7 @@ void MTC0(psxRegisters *regs_, int reg, u32 val); void gteNULL(struct psxCP2Regs *regs); extern void (*psxCP2[64])(struct psxCP2Regs *regs); +// called by lightrec +void intExecuteBlock(); + #endif // __PSXINTERPRETER_H__ diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 2a9633348..37a0efd0d 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -30,8 +30,13 @@ //#include "debug.h" #define DebugCheckBP(...) +#include "lightrec/mem.h" #include "memmap.h" +#ifdef USE_LIBRETRO_VFS +#include +#endif + #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif @@ -52,7 +57,7 @@ void *psxMap(unsigned long addr, size_t size, int is_fixed, if (psxMapHook != NULL) { ret = psxMapHook(addr, size, 0, tag); if (ret == NULL) - return NULL; + return MAP_FAILED; } else { /* avoid MAP_FIXED, it overrides existing mappings.. */ @@ -62,7 +67,7 @@ void *psxMap(unsigned long addr, size_t size, int is_fixed, req = (void *)(uintptr_t)addr; ret = mmap(req, size, PROT_READ | PROT_WRITE, flags, -1, 0); if (ret == MAP_FAILED) - return NULL; + return ret; } if (addr != 0 && ret != (void *)(uintptr_t)addr) { @@ -71,7 +76,7 @@ void *psxMap(unsigned long addr, size_t size, int is_fixed, if (is_fixed) { psxUnmap(ret, size, tag); - return NULL; + return MAP_FAILED; } if (((addr ^ (unsigned long)(uintptr_t)ret) & ~0xff000000l) && try_ < 2) @@ -128,33 +133,71 @@ u8 **psxMemRLUT = NULL; 0xbfc0_0000-0xbfc7_ffff BIOS Mirror (512K) Uncached */ -int psxMemInit() { - int i; - - psxMemRLUT = (u8 **)malloc(0x10000 * sizeof(void *)); - psxMemWLUT = (u8 **)malloc(0x10000 * sizeof(void *)); - memset(psxMemRLUT, 0, 0x10000 * sizeof(void *)); - memset(psxMemWLUT, 0, 0x10000 * sizeof(void *)); - +static int psxMemInitMap(void) +{ psxM = psxMap(0x80000000, 0x00210000, 1, MAP_TAG_RAM); - if (psxM == NULL) + if (psxM == MAP_FAILED) psxM = psxMap(0x77000000, 0x00210000, 0, MAP_TAG_RAM); - if (psxM == NULL) { + if (psxM == MAP_FAILED) { SysMessage(_("mapping main RAM failed")); + psxM = NULL; return -1; } - psxP = &psxM[0x200000]; + psxH = psxMap(0x1f800000, 0x10000, 0, MAP_TAG_OTHER); + if (psxH == MAP_FAILED) { + SysMessage(_("Error allocating memory!")); + psxMemShutdown(); + return -1; + } + psxR = psxMap(0x1fc00000, 0x80000, 0, MAP_TAG_OTHER); + if (psxR == MAP_FAILED) { + SysMessage(_("Error allocating memory!")); + psxMemShutdown(); + return -1; + } - if (psxMemRLUT == NULL || psxMemWLUT == NULL || - psxR == NULL || psxP == NULL || psxH == NULL) { + return 0; +} + +static void psxMemFreeMap(void) +{ + if (psxM) psxUnmap(psxM, 0x00210000, MAP_TAG_RAM); + if (psxH) psxUnmap(psxH, 0x10000, MAP_TAG_OTHER); + if (psxR) psxUnmap(psxR, 0x80000, MAP_TAG_OTHER); + psxM = psxH = psxR = NULL; + psxP = NULL; +} + +int psxMemInit(void) +{ + unsigned int i; + int ret; + + if (LIGHTREC_CUSTOM_MAP) + ret = lightrec_init_mmap(); + else + ret = psxMemInitMap(); + if (ret) { + SysMessage(_("Error allocating memory!")); + psxMemShutdown(); + return -1; + } + + psxMemRLUT = (u8 **)malloc(0x10000 * sizeof(void *)); + psxMemWLUT = (u8 **)malloc(0x10000 * sizeof(void *)); + + if (psxMemRLUT == NULL || psxMemWLUT == NULL) { SysMessage(_("Error allocating memory!")); psxMemShutdown(); return -1; } + memset(psxMemRLUT, (uintptr_t)INVALID_PTR, 0x10000 * sizeof(void *)); + memset(psxMemWLUT, (uintptr_t)INVALID_PTR, 0x10000 * sizeof(void *)); + // MemR for (i = 0; i < 0x80; i++) psxMemRLUT[i + 0x0000] = (u8 *)&psxM[(i & 0x1f) << 16]; @@ -179,7 +222,7 @@ int psxMemInit() { // NOTE: Not sure if this is needed to fix any games but seems wise, // seeing as some games do read from PIO as part of copy-protection // check. (See fix in psxMemReset() regarding psxP region reads). - psxMemWLUT[0x1f00] = NULL; + psxMemWLUT[0x1f00] = INVALID_PTR; psxMemWLUT[0x1f80] = (u8 *)psxH; return 0; @@ -192,6 +235,8 @@ void psxMemReset() { memset(psxM, 0, 0x00200000); memset(psxP, 0xff, 0x00010000); + Config.HLE = TRUE; + if (strcmp(Config.Bios, "HLE") != 0) { sprintf(bios, "%s/%s", Config.BiosDir, Config.Bios); f = fopen(bios, "rb"); @@ -199,19 +244,22 @@ void psxMemReset() { if (f == NULL) { SysMessage(_("Could not open BIOS:\"%s\". Enabling HLE Bios!\n"), bios); memset(psxR, 0, 0x80000); - Config.HLE = TRUE; } else { - fread(psxR, 1, 0x80000, f); + if (fread(psxR, 1, 0x80000, f) == 0x80000) { + Config.HLE = FALSE; + } else { + SysMessage(_("The selected BIOS:\"%s\" is of wrong size. Enabling HLE Bios!\n"), bios); + } fclose(f); - Config.HLE = FALSE; } - } else Config.HLE = TRUE; + } } void psxMemShutdown() { - psxUnmap(psxM, 0x00210000, MAP_TAG_RAM); psxM = NULL; - psxUnmap(psxH, 0x10000, MAP_TAG_OTHER); psxH = NULL; - psxUnmap(psxR, 0x80000, MAP_TAG_OTHER); psxR = NULL; + if (LIGHTREC_CUSTOM_MAP) + lightrec_free_mmap(); + else + psxMemFreeMap(); free(psxMemRLUT); psxMemRLUT = NULL; free(psxMemWLUT); psxMemWLUT = NULL; @@ -231,7 +279,7 @@ u8 psxMemRead8(u32 mem) { return psxHwRead8(mem); } else { p = (char *)(psxMemRLUT[t]); - if (p != NULL) { + if (p != INVALID_PTR) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, R1); return *(u8 *)(p + (mem & 0xffff)); @@ -256,7 +304,7 @@ u16 psxMemRead16(u32 mem) { return psxHwRead16(mem); } else { p = (char *)(psxMemRLUT[t]); - if (p != NULL) { + if (p != INVALID_PTR) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, R2); return SWAPu16(*(u16 *)(p + (mem & 0xffff))); @@ -281,7 +329,7 @@ u32 psxMemRead32(u32 mem) { return psxHwRead32(mem); } else { p = (char *)(psxMemRLUT[t]); - if (p != NULL) { + if (p != INVALID_PTR) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, R4); return SWAPu32(*(u32 *)(p + (mem & 0xffff))); @@ -306,7 +354,7 @@ void psxMemWrite8(u32 mem, u8 value) { psxHwWrite8(mem, value); } else { p = (char *)(psxMemWLUT[t]); - if (p != NULL) { + if (p != INVALID_PTR) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, W1); *(u8 *)(p + (mem & 0xffff)) = value; @@ -333,7 +381,7 @@ void psxMemWrite16(u32 mem, u16 value) { psxHwWrite16(mem, value); } else { p = (char *)(psxMemWLUT[t]); - if (p != NULL) { + if (p != INVALID_PTR) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, W2); *(u16 *)(p + (mem & 0xffff)) = SWAPu16(value); @@ -361,7 +409,7 @@ void psxMemWrite32(u32 mem, u32 value) { psxHwWrite32(mem, value); } else { p = (char *)(psxMemWLUT[t]); - if (p != NULL) { + if (p != INVALID_PTR) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, W4); *(u32 *)(p + (mem & 0xffff)) = SWAPu32(value); @@ -385,9 +433,9 @@ void psxMemWrite32(u32 mem, u32 value) { case 0x800: case 0x804: if (writeok == 0) break; writeok = 0; - memset(psxMemWLUT + 0x0000, 0, 0x80 * sizeof(void *)); - memset(psxMemWLUT + 0x8000, 0, 0x80 * sizeof(void *)); - memset(psxMemWLUT + 0xa000, 0, 0x80 * sizeof(void *)); + memset(psxMemWLUT + 0x0000, (uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); + memset(psxMemWLUT + 0x8000, (uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); + memset(psxMemWLUT + 0xa000, (uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); /* Required for icache interpreter otherwise Armored Core won't boot on icache interpreter */ psxCpu->Notify(R3000ACPU_NOTIFY_CACHE_ISOLATED, NULL); break; @@ -423,7 +471,7 @@ void *psxMemPointer(u32 mem) { return NULL; } else { p = (char *)(psxMemWLUT[t]); - if (p != NULL) { + if (p != INVALID_PTR) { return (void *)(p + (mem & 0xffff)); } return NULL; diff --git a/libpcsxcore/psxmem.h b/libpcsxcore/psxmem.h index ec4b970a6..14ff0033b 100644 --- a/libpcsxcore/psxmem.h +++ b/libpcsxcore/psxmem.h @@ -46,6 +46,12 @@ extern "C" { #endif +#ifdef LIGHTREC +#define INVALID_PTR ((void *)-1) +#else +#define INVALID_PTR NULL +#endif + extern s8 *psxM; #define psxMs8(mem) psxM[(mem) & 0x1fffff] #define psxMs16(mem) (SWAP16(*(s16 *)&psxM[(mem) & 0x1fffff])) @@ -109,7 +115,7 @@ extern s8 *psxH; extern u8 **psxMemWLUT; extern u8 **psxMemRLUT; -#define PSXM(mem) (psxMemRLUT[(mem) >> 16] == 0 ? NULL : (u8*)(psxMemRLUT[(mem) >> 16] + ((mem) & 0xffff))) +#define PSXM(mem) (psxMemRLUT[(mem) >> 16] == INVALID_PTR ? INVALID_PTR : (u8*)(psxMemRLUT[(mem) >> 16] + ((mem) & 0xffff))) #define PSXMs8(mem) (*(s8 *)PSXM(mem)) #define PSXMs16(mem) (SWAP16(*(s16 *)PSXM(mem))) #define PSXMs32(mem) (SWAP32(*(s32 *)PSXM(mem))) diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index d8268e2a6..ddf838866 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -33,7 +33,7 @@ psxRegisters psxRegs; #endif int psxInit() { - SysPrintf(_("Running PCSX Version %s (%s).\n"), PACKAGE_VERSION, __DATE__); + SysPrintf(_("Running PCSX Version %s (%s).\n"), PCSX_VERSION, __DATE__); #ifndef DRC_DISABLE if (Config.Cpu == CPU_INTERPRETER) { @@ -77,10 +77,11 @@ void psxReset() { } void psxShutdown() { - psxMemShutdown(); psxBiosShutdown(); psxCpu->Shutdown(); + + psxMemShutdown(); } void psxException(u32 code, u32 bd) { diff --git a/libpcsxcore/sio.c b/libpcsxcore/sio.c index 329bc36ba..6478338de 100644 --- a/libpcsxcore/sio.c +++ b/libpcsxcore/sio.c @@ -24,6 +24,10 @@ #include "sio.h" #include +#ifdef USE_LIBRETRO_VFS +#include +#endif + // Status Flags #define TX_RDY 0x0001 #define RX_RDY 0x0002 @@ -407,6 +411,12 @@ void LoadMcd(int mcd, char *str) { } McdDisable[mcd - 1] = 0; +#ifdef HAVE_LIBRETRO + // memcard1 is handled by libretro + if (mcd == 1) + return; +#endif + if (str == NULL || strcmp(str, "none") == 0) { McdDisable[mcd - 1] = 1; return; @@ -428,7 +438,12 @@ void LoadMcd(int mcd, char *str) { else if(buf.st_size == MCD_SIZE + 3904) fseek(f, 3904, SEEK_SET); } - fread(data, 1, MCD_SIZE, f); + if (fread(data, 1, MCD_SIZE, f) != MCD_SIZE) { +#ifndef NDEBUG + SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); +#endif + memset(data, 0x00, MCD_SIZE); + } fclose(f); } else @@ -443,7 +458,12 @@ void LoadMcd(int mcd, char *str) { else if(buf.st_size == MCD_SIZE + 3904) fseek(f, 3904, SEEK_SET); } - fread(data, 1, MCD_SIZE, f); + if (fread(data, 1, MCD_SIZE, f) != MCD_SIZE) { +#ifndef NDEBUG + SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); +#endif + memset(data, 0x00, MCD_SIZE); + } fclose(f); } } diff --git a/libpcsxcore/sjisfont.h b/libpcsxcore/sjisfont.h index 105d6243d..0692648bc 100644 --- a/libpcsxcore/sjisfont.h +++ b/libpcsxcore/sjisfont.h @@ -21,6 +21,9 @@ // Converted to binary format by Wei Mingzhi . // +#ifndef __SJISFONT_H__ +#define __SJISFONT_H__ + const unsigned char font_8140[] = { 0x78, 0xda, 0xad, 0x3b, 0x3b, 0x90, 0x1b, 0xc9, 0x75, 0x3d, 0x9f, 0x05, 0x1a, 0xcb, 0xe1, 0x4e, @@ -6954,3 +6957,5 @@ const unsigned char font_889f[] = { 0xeb, 0xe7, 0xa8, 0x89, 0x0a, 0x11, 0xbc, 0xbc, 0x33, 0xf9, 0xff, 0xe8, 0xc4, 0x21, 0xbf }; + +#endif /* __SJISFONT_H__ */ diff --git a/libpcsxcore/socket.c b/libpcsxcore/socket.c index 31f82e251..df768e688 100644 --- a/libpcsxcore/socket.c +++ b/libpcsxcore/socket.c @@ -15,6 +15,22 @@ * along with this program; if not, see . */ +#ifdef NO_SOCKET + +int StartServer() { return 0;} +void StopServer() {} +void GetClient() {} +void CloseClient() {} +int HasClient() { return 0;} +int ReadSocket(char * buffer, int len) { return 0;} +int RawReadSocket(char * buffer, int len) { return 0;} +void WriteSocket(char * buffer, int len) {} + +void SetsBlock() {} +void SetsNonblock() {} + +#else // NO_SOCKET + #ifdef _WIN32 #include #endif @@ -119,7 +135,7 @@ void GetClient() { } #endif - sprintf(hello, "000 PCSX Version %s - Debug console\r\n", PACKAGE_VERSION); + sprintf(hello, "000 PCSX Version %s - Debug console\r\n", PCSX_VERSION); WriteSocket(hello, strlen(hello)); ptr = 0; } @@ -252,3 +268,4 @@ void SetsNonblock() { fcntl(server_socket, F_SETFL, flags | O_NONBLOCK); #endif } +#endif // NO_SOCKET diff --git a/plugins/cdrcimg/cdrcimg.c b/plugins/cdrcimg/cdrcimg.c index 225451628..b3bee2718 100644 --- a/plugins/cdrcimg/cdrcimg.c +++ b/plugins/cdrcimg/cdrcimg.c @@ -284,7 +284,7 @@ static long CDRinit(void) return -1; } } -#ifndef _WIN32 +#if !defined(_WIN32) && !defined(NO_DYLIB) if (pBZ2_bzBuffToBuffDecompress == NULL) { void *h = dlopen("/usr/lib/libbz2.so.1", RTLD_LAZY); if (h == NULL) diff --git a/plugins/cdrcimg/cdrcimg.h b/plugins/cdrcimg/cdrcimg.h index efeaaf9d5..0c6d001c2 100644 --- a/plugins/cdrcimg/cdrcimg.h +++ b/plugins/cdrcimg/cdrcimg.h @@ -1,3 +1,7 @@ +#ifndef __P_CDRCIMG_H__ +#define __P_CDRCIMG_H__ void cdrcimg_set_fname(const char *fname); void *cdrcimg_get_sym(const char *sym); + +#endif /* __P_CDRCIMG_H__ */ diff --git a/plugins/dfinput/externals.h b/plugins/dfinput/externals.h index a446956eb..2e216fdc1 100644 --- a/plugins/dfinput/externals.h +++ b/plugins/dfinput/externals.h @@ -1,3 +1,5 @@ +#ifndef __P_EXTERNALS_H__ +#define __P_EXTERNALS_H__ void dfinput_activate(void); @@ -12,3 +14,5 @@ extern void pl_update_gun(int *xn, int *yn, int *xres, int *yres, int *in); /* vibration trigger to frontend */ extern int in_enable_vibration; extern void plat_trigger_vibrate(int pad, int low, int high); + +#endif /* __P_EXTERNALS_H__ */ diff --git a/plugins/dfinput/main.c b/plugins/dfinput/main.c index 937f78823..4f1d03f6c 100644 --- a/plugins/dfinput/main.c +++ b/plugins/dfinput/main.c @@ -20,6 +20,8 @@ extern unsigned char CALLBACK PAD2__startPoll(int pad); extern unsigned char CALLBACK PAD1__poll(unsigned char value); extern unsigned char CALLBACK PAD2__poll(unsigned char value); +#ifndef HAVE_LIBRETRO + static int old_controller_type1 = -1, old_controller_type2 = -1; #define select_pad(n) \ @@ -37,6 +39,7 @@ static int old_controller_type1 = -1, old_controller_type2 = -1; PAD##n##_poll = PADpoll_guncon; \ guncon_init(); \ break; \ + case PSE_PAD_TYPE_NEGCON: \ case PSE_PAD_TYPE_GUN: \ default: \ PAD##n##_startPoll = PAD##n##__startPoll; \ @@ -49,9 +52,20 @@ void dfinput_activate(void) { PadDataS pad; + pad.portMultitap = -1; + pad.requestPadIndex = 0; PAD1_readPort1(&pad); select_pad(1); + pad.requestPadIndex = 1; PAD2_readPort2(&pad); select_pad(2); } + +#else // use libretro's libpcsxcore/plugins.c code + +void dfinput_activate(void) +{ +} + +#endif diff --git a/plugins/dfinput/main.h b/plugins/dfinput/main.h index e83306a62..96cebfac4 100644 --- a/plugins/dfinput/main.h +++ b/plugins/dfinput/main.h @@ -1,3 +1,6 @@ +#ifndef __P_MAIN_H__ +#define __P_MAIN_H__ + #include "psemu_plugin_defs.h" #include "externals.h" @@ -16,3 +19,5 @@ void guncon_init(void); /* get button state and pad type from main emu */ extern long (*PAD1_readPort1)(PadDataS *pad); extern long (*PAD2_readPort2)(PadDataS *pad); + +#endif /* __P_MAIN_H__ */ diff --git a/plugins/dfinput/pad.c b/plugins/dfinput/pad.c index 7e00a1131..3e333660c 100644 --- a/plugins/dfinput/pad.c +++ b/plugins/dfinput/pad.c @@ -42,6 +42,7 @@ enum { CMD_VIBRATION_TOGGLE = 0x4D, }; +#ifndef HAVE_LIBRETRO static struct { uint8_t PadMode; uint8_t PadID; @@ -242,6 +243,7 @@ static void do_vibration(unsigned char value) break; } } +#endif #if 0 #include @@ -254,6 +256,7 @@ unsigned char PADpoll(unsigned char value) { #define PADpoll PADpoll_ #endif +#ifndef HAVE_LIBRETRO unsigned char PADpoll_pad(unsigned char value) { if (CurByte == 0) { CurCmd = value; @@ -302,3 +305,4 @@ void pad_init(void) padstate[i].PadMode = padstate[i].pad.controllerType == PSE_PAD_TYPE_ANALOGPAD; } } +#endif diff --git a/plugins/dfsound/dma.h b/plugins/dfsound/dma.h index 440536feb..4982432b9 100644 --- a/plugins/dfsound/dma.h +++ b/plugins/dfsound/dma.h @@ -24,8 +24,12 @@ // //*************************************************************************// +#ifndef __P_DMA_H__ +#define __P_DMA_H__ unsigned short CALLBACK SPUreadDMA(void); void CALLBACK SPUreadDMAMem(unsigned short * pusPSXMem,int iSize); void CALLBACK SPUwriteDMA(unsigned short val); void CALLBACK SPUwriteDMAMem(unsigned short * pusPSXMem,int iSize); + +#endif /* __P_DMA_H__ */ diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index 1cfef6614..515741218 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -15,6 +15,9 @@ * * ***************************************************************************/ +#ifndef __P_SOUND_EXTERNALS_H__ +#define __P_SOUND_EXTERNALS_H__ + #include ///////////////////////////////////////////////////////// @@ -259,3 +262,4 @@ void schedule_next_irq(void); #endif +#endif /* __P_SOUND_EXTERNALS_H__ */ diff --git a/plugins/dfsound/out.h b/plugins/dfsound/out.h index 460709976..e4878a83f 100644 --- a/plugins/dfsound/out.h +++ b/plugins/dfsound/out.h @@ -1,3 +1,5 @@ +#ifndef __P_OUT_H__ +#define __P_OUT_H__ struct out_driver { const char *name; @@ -10,3 +12,5 @@ struct out_driver { extern struct out_driver *out_current; void SetupSound(void); + +#endif /* __P_OUT_H__ */ diff --git a/plugins/dfsound/registers.h b/plugins/dfsound/registers.h index 3bca5180b..28641b81e 100644 --- a/plugins/dfsound/registers.h +++ b/plugins/dfsound/registers.h @@ -15,6 +15,9 @@ * * ***************************************************************************/ +#ifndef __P_REGISTERS_H__ +#define __P_REGISTERS_H__ + #define H_SPUReverbAddr 0x0da2 #define H_SPUirqAddr 0x0da4 #define H_SPUaddr 0x0da6 @@ -154,3 +157,4 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, unsigned int cycles); +#endif /* __P_REGISTERS_H__ */ diff --git a/plugins/dfsound/spu.h b/plugins/dfsound/spu.h index 8a0f2d2e4..0cef6520d 100644 --- a/plugins/dfsound/spu.h +++ b/plugins/dfsound/spu.h @@ -15,6 +15,11 @@ * * ***************************************************************************/ +#ifndef __P_SPU_H__ +#define __P_SPU_H__ + void ClearWorkingState(void); void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycle, int is_start); int CALLBACK SPUplayCDDAchannel(short *pcm, int bytes, unsigned int cycle, int is_start); + +#endif /* __P_SPU_H__ */ diff --git a/plugins/dfsound/spu_c64x.h b/plugins/dfsound/spu_c64x.h index 7c4d5659b..56ede38da 100644 --- a/plugins/dfsound/spu_c64x.h +++ b/plugins/dfsound/spu_c64x.h @@ -1,3 +1,6 @@ +#ifndef __P_SPU_C64X_H__ +#define __P_SPU_C64X_H__ + #define COMPONENT_NAME "pcsxr_spu" enum { @@ -26,3 +29,5 @@ struct region_mem { }; #define ACTIVE_CNT 3 + +#endif /* __P_SPU_C64X_H__ */ diff --git a/plugins/dfsound/spu_config.h b/plugins/dfsound/spu_config.h index 3e88a2c22..95c894867 100644 --- a/plugins/dfsound/spu_config.h +++ b/plugins/dfsound/spu_config.h @@ -1,3 +1,6 @@ +#ifndef __P_SPU_CONFIG_H__ +#define __P_SPU_CONFIG_H__ + // user settings typedef struct @@ -15,3 +18,5 @@ typedef struct } SPUConfig; extern SPUConfig spu_config; + +#endif /* __P_SPU_CONFIG_H__ */ diff --git a/plugins/dfsound/stdafx.h b/plugins/dfsound/stdafx.h index 82b0d7e58..96335e38a 100644 --- a/plugins/dfsound/stdafx.h +++ b/plugins/dfsound/stdafx.h @@ -15,6 +15,9 @@ * * ***************************************************************************/ +#ifndef __P_STDAFX_H__ +#define __P_STDAFX_H__ + #include #include #include @@ -30,3 +33,5 @@ #endif #include "psemuxa.h" + +#endif /* __P_STDAFX_H__ */ diff --git a/plugins/dfsound/xa.h b/plugins/dfsound/xa.h index cbf2843fd..137fe4362 100644 --- a/plugins/dfsound/xa.h +++ b/plugins/dfsound/xa.h @@ -15,6 +15,11 @@ * * ***************************************************************************/ +#ifndef __P_XA_H__ +#define __P_XA_H__ + INLINE void MixXA(void); INLINE void FeedXA(xa_decode_t *xap); -INLINE int FeedCDDA(unsigned char *pcm, int nBytes); +INLINE int FeedCDDA(unsigned char *pcm, int nBytes); + +#endif /* __P_XA_H__ */ diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index 245d2274b..3a41cd7cc 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -301,11 +301,11 @@ void renderer_notify_res_change(void) extern const unsigned char cmd_lengths[256]; -int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) +int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) { unsigned int cmd = 0, len; - unsigned int *list_start = list; - unsigned int *list_end = list + list_len; + uint32_t *list_start = list; + uint32_t *list_end = list + list_len; for (; list < list_end; list += 1 + len) { diff --git a/plugins/dfxvideo/soft.c b/plugins/dfxvideo/soft.c index a9d9e0426..5c71fd333 100644 --- a/plugins/dfxvideo/soft.c +++ b/plugins/dfxvideo/soft.c @@ -6317,6 +6317,7 @@ static void DrawSoftwareSpriteMirror(unsigned char * baseAddr,int32_t w,int32_t sprtYa=(sprtY<<10); clutP=(clutY0<<10)+clutX0; for (sprCY=0;sprCY>4)&0xf)])); GetTextureTransColG_SPR(&psxVuw[sprA+1],GETLE16(&psxVuw[clutP+(tC&0xf)])); } + } return; case 1: clutP>>=1; for(sprCY=0;sprCY #ifdef __cplusplus @@ -135,3 +138,5 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs_); #ifdef __cplusplus } #endif + +#endif /* __GPULIB_GPU_H__ */ diff --git a/plugins/gpulib/vout_sdl.c b/plugins/gpulib/vout_sdl.c index b8c4eae2b..56ab811c0 100644 --- a/plugins/gpulib/vout_sdl.c +++ b/plugins/gpulib/vout_sdl.c @@ -54,7 +54,7 @@ void vout_update(void) int i; SDL_LockSurface(screen); - if (gpu.status.rgb24) + if (gpu.status & PSX_GPU_STATUS_RGB24) { uint8_t *s; int y; diff --git a/plugins/spunull/register.h b/plugins/spunull/register.h index 52128b76a..2e0b8923f 100644 --- a/plugins/spunull/register.h +++ b/plugins/spunull/register.h @@ -1,3 +1,6 @@ +#ifndef __SPUNULL_REGISTER_H__ +#define __SPUNULL_REGISTER_H__ + #define H_SPUirqAddr 0x0da4 #define H_SPUaddr 0x0da6 #define H_SPUdata 0x0da8 @@ -119,3 +122,4 @@ #define H_SPU_ADSRLevel22 0x0d68 #define H_SPU_ADSRLevel23 0x0d78 +#endif /* __SPUNULL_REGISTER_H__ */ From d04b892423f6753c369fb1a8df641d39d04952bf Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 15 Oct 2022 00:37:43 +0300 Subject: [PATCH 228/597] gpulib: update gpuinfo according to nocash --- plugins/gpulib/gpu.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index bef297fb5..32a797d58 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -126,17 +126,16 @@ static noinline void get_gpu_info(uint32_t data) case 0x02: case 0x03: case 0x04: - case 0x05: gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff; break; - case 0x06: - gpu.gp0 = gpu.ex_regs[5] & 0xfffff; + case 0x05: + gpu.gp0 = gpu.ex_regs[5] & 0x3fffff; break; case 0x07: gpu.gp0 = 2; break; default: - gpu.gp0 = 0; + // gpu.gp0 unchanged break; } } From a94ccc7fd9e3f11522dadda27423096735717c04 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 22 Oct 2022 19:16:46 +0300 Subject: [PATCH 229/597] psxbios: use noninvasive print for -psxout --- libpcsxcore/psxbios.c | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 203f85d15..f57f5129c 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -1252,7 +1252,7 @@ void psxBios_getchar() { //0x3b v0 = getchar(); pc0 = ra; } -void psxBios_printf() { // 0x3f +static void psxBios_printf_psxout() { // 0x3f char tmp[1024]; char tmp2[1024]; u32 save[4]; @@ -1317,11 +1317,14 @@ void psxBios_printf() { // 0x3f } *ptmp = 0; - if (psp) + if (psp != INVALID_PTR) memcpy(psp, save, 4 * 4); SysPrintf("%s", tmp); +} +void psxBios_printf() { // 0x3f + psxBios_printf_psxout(); pc0 = ra; } @@ -2212,6 +2215,25 @@ void psxBios_write() { // 0x35/0x03 pc0 = ra; } +static void psxBios_write_psxout() { + if (a0 == 1) { // stdout + const char *ptr = Ra1; + int len = a2; + + if (ptr != INVALID_PTR) + while (len-- > 0) + SysPrintf("%c", *ptr++); + } +} + +static void psxBios_putchar_psxout() { // 3d + SysPrintf("%c", (char)a0); +} + +static void psxBios_puts_psxout() { // 3e/3f + SysPrintf("%s", Ra0); +} + /* * int close(int fd); */ @@ -2704,11 +2726,10 @@ void psxBiosInit() { biosB0[i] = NULL; biosC0[i] = NULL; } - biosA0[0x3e] = psxBios_puts; - biosA0[0x3f] = psxBios_printf; - - biosB0[0x3d] = psxBios_putchar; - biosB0[0x3f] = psxBios_puts; + biosA0[0x03] = biosB0[0x35] = psxBios_write_psxout; + biosA0[0x3c] = biosB0[0x3d] = psxBios_putchar_psxout; + biosA0[0x3e] = biosB0[0x3f] = psxBios_puts_psxout; + biosA0[0x3f] = psxBios_printf_psxout; if (!Config.HLE) return; From e0f25b64cf98406b7059b373b04a18b45e2a0aa7 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 22 Oct 2022 19:20:26 +0300 Subject: [PATCH 230/597] cdrom: report read errors correctly --- libpcsxcore/cdriso.c | 4 ++-- libpcsxcore/cdrom.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index 246b6a062..2a31950f3 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -1238,7 +1238,7 @@ static int cdread_2048(FILE *f, unsigned int base, void *dest, int sector) sec2msf(sector + 2 * 75, (char *)&cdbuffer[12]); cdbuffer[12 + 3] = 1; - return ret; + return 12*2 + ret; } static unsigned char * CALLBACK ISOgetBuffer_compr(void) { @@ -1540,7 +1540,7 @@ static boolean CALLBACK ISOreadTrack(unsigned char *time) { } ret = cdimg_read_func(cdHandle, 0, cdbuffer, sector); - if (ret < 0) + if (ret < 12*2 + 2048) return 0; if (subHandle != NULL) { diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index b3e238b66..41dc15136 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -463,7 +463,8 @@ static int ReadTrack(const u8 *time) { CDR_LOG("ReadTrack *** %02x:%02x:%02x\n", tmp[0], tmp[1], tmp[2]); read_ok = CDR_readTrack(tmp); - memcpy(cdr.Prev, tmp, 3); + if (read_ok) + memcpy(cdr.Prev, tmp, 3); if (CheckSBI(time)) return read_ok; @@ -1253,7 +1254,6 @@ static void cdrReadInterrupt(void) if (!read_ok) { CDR_LOG_I("cdrReadInterrupt() Log: err\n"); - memset(cdr.Transfer, 0, DATA_SIZE); cdrReadInterruptSetResult(cdr.StatP | STATUS_ERROR); return; } From 4134c627c94686ef483a400e7664decf10c64e1e Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 25 Oct 2022 22:08:12 +0300 Subject: [PATCH 231/597] cdrom: add a timing hack notaz/pcsx_rearmed#267 --- libpcsxcore/cdrom.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 41dc15136..12945d60c 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -578,6 +578,27 @@ static int cdrSeekTime(unsigned char *target) return seekTime; } +static u32 cdrAlignTimingHack(u32 cycles) +{ + /* + * timing hack for T'ai Fu - Wrath of the Tiger: + * The game has a bug where it issues some cdc commands from a low priority + * vint handler, however there is a higher priority default bios handler + * that acks the vint irq and returns, so game's handler is not reached + * (see bios irq handler chains at e004 and the game's irq handling func + * at 80036810). For the game to work, vint has to arrive after the bios + * vint handler rejects some other irq (of which only cd and rcnt2 are + * active), but before the game's handler loop reads I_STAT. The time + * window for this is quite small (~1k cycles of so). Apparently this + * somehow happens naturally on the real hardware. + */ + u32 vint_rel = rcnts[3].cycleStart + 63000 - psxRegs.cycle; + vint_rel += PSXCLK / 60; + while ((s32)(vint_rel - cycles) < 0) + vint_rel += PSXCLK / 60; + return vint_rel; +} + static void cdrUpdateTransferBuf(const u8 *buf); static void cdrReadInterrupt(void); static void cdrPrepCdda(s16 *buf, int samples); @@ -637,7 +658,7 @@ void cdrPlayReadInterrupt(void) void cdrInterrupt(void) { int start_rotating = 0; int error = 0; - unsigned int seekTime = 0; + u32 cycles, seekTime = 0; u32 second_resp_time = 0; const void *buf; u8 ParamC; @@ -1116,7 +1137,10 @@ void cdrInterrupt(void) { ReadTrack(cdr.SetSectorPlay); cdr.LocL[0] = LOCL_INVALID; - CDRPLAYREAD_INT(((cdr.Mode & 0x80) ? (cdReadTime) : cdReadTime * 2) + seekTime, 1); + cycles = (cdr.Mode & 0x80) ? cdReadTime : cdReadTime * 2; + cycles += seekTime; + cycles = cdrAlignTimingHack(cycles); + CDRPLAYREAD_INT(cycles, 1); SetPlaySeekRead(cdr.StatP, STATUS_SEEK); start_rotating = 1; From afaac9354c80862f1bb153144a811f12d6836eec Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 2 Nov 2022 01:20:50 +0200 Subject: [PATCH 232/597] cdrom: handle fifo overreads based on nocash doc notaz/pcsx_rearmed#269 --- libpcsxcore/cdrom.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 12945d60c..e1065739b 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1430,7 +1430,7 @@ void cdrWrite1(unsigned char rt) { } unsigned char cdrRead2(void) { - unsigned char ret = 0; + unsigned char ret = cdr.Transfer[0x920]; if (cdr.FifoOffset < cdr.FifoSize) ret = cdr.Transfer[cdr.FifoOffset++]; @@ -1562,10 +1562,12 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { { memcpy(ptr, cdr.Transfer + cdr.FifoOffset, size); cdr.FifoOffset += size; - psxCpu->Clear(madr, size / 4); } - if (size < cdsize) + if (size < cdsize) { CDR_LOG_I("cdrom: dma3 %d/%d\n", size, cdsize); + memset(ptr + size, cdr.Transfer[0x920], cdsize - size); + } + psxCpu->Clear(madr, cdsize / 4); CDRDMA_INT((cdsize/4) * 24); @@ -1655,7 +1657,7 @@ int cdrFreeze(void *f, int Mode) { if (Mode == 0) { getCdInfo(); - cdr.FifoOffset = tmp; + cdr.FifoOffset = tmp < DATA_SIZE ? tmp : DATA_SIZE; cdr.FifoSize = (cdr.Mode & 0x20) ? 2340 : 2048 + 12; // read right sub data From 752c1c850b35ff0239abc4d55be091542f52bae4 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 2 Nov 2022 22:24:20 +0200 Subject: [PATCH 233/597] cdrom: change GetlocP handling maybe better? notaz/pcsx_rearmed#268 --- libpcsxcore/cdriso.c | 128 +++++++++++++++++++++++++++--------------- libpcsxcore/cdrom.c | 84 +++++++++++++++------------ libpcsxcore/plugins.h | 2 +- 3 files changed, 134 insertions(+), 80 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index 2a31950f3..a755a23b4 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -54,7 +54,6 @@ static FILE *subHandle = NULL; static boolean subChanMixed = FALSE; static boolean subChanRaw = FALSE; -static boolean subChanMissing = FALSE; static boolean multifile = FALSE; @@ -93,7 +92,8 @@ static struct { } *chd_img; #endif -int (*cdimg_read_func)(FILE *f, unsigned int base, void *dest, int sector); +static int (*cdimg_read_func)(FILE *f, unsigned int base, void *dest, int sector); +static int (*cdimg_read_sub_func)(FILE *f, int sector); char* CALLBACK CDR__getDriveLetter(void); long CALLBACK CDR__configure(void); @@ -1070,29 +1070,48 @@ static int opensbifile(const char *isoname) { static int cdread_normal(FILE *f, unsigned int base, void *dest, int sector) { - fseek(f, base + sector * CD_FRAMESIZE_RAW, SEEK_SET); - return fread(dest, 1, CD_FRAMESIZE_RAW, f); + int ret; + if (fseek(f, base + sector * CD_FRAMESIZE_RAW, SEEK_SET)) + goto fail_io; + ret = fread(dest, 1, CD_FRAMESIZE_RAW, f); + if (ret <= 0) + goto fail_io; + return ret; + +fail_io: + // often happens in cdda gaps of a split cue/bin, so not logged + //SysPrintf("File IO error %d, base %u, sector %u\n", errno, base, sector); + return -1; } static int cdread_sub_mixed(FILE *f, unsigned int base, void *dest, int sector) { int ret; - fseek(f, base + sector * (CD_FRAMESIZE_RAW + SUB_FRAMESIZE), SEEK_SET); + if (fseek(f, base + sector * (CD_FRAMESIZE_RAW + SUB_FRAMESIZE), SEEK_SET)) + goto fail_io; ret = fread(dest, 1, CD_FRAMESIZE_RAW, f); + if (ret <= 0) + goto fail_io; + return ret; + +fail_io: + //SysPrintf("File IO error %d, base %u, sector %u\n", errno, base, sector); + return -1; +} + +static int cdread_sub_sub_mixed(FILE *f, int sector) +{ + if (fseek(f, sector * (CD_FRAMESIZE_RAW + SUB_FRAMESIZE) + CD_FRAMESIZE_RAW, SEEK_SET)) + goto fail_io; if (fread(subbuffer, 1, SUB_FRAMESIZE, f) != SUB_FRAMESIZE) goto fail_io; - if (subChanRaw) DecodeRawSubData(); - goto done; + return SUB_FRAMESIZE; fail_io: -#ifndef NDEBUG - SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); -#endif - -done: - return ret; + SysPrintf("subchannel: file IO error %d, sector %u\n", errno, sector); + return -1; } static int uncompress2_pcsx(void *out, unsigned long *out_size, void *in, unsigned long in_size) @@ -1201,8 +1220,7 @@ static int cdread_chd(FILE *f, unsigned int base, void *dest, int sector) { int hunk; - if (base) - sector += base; + sector += base; hunk = sector / chd_img->sectors_per_hunk; chd_img->sector_in_hunk = sector % chd_img->sectors_per_hunk; @@ -1216,14 +1234,28 @@ static int cdread_chd(FILE *f, unsigned int base, void *dest, int sector) if (dest != cdbuffer) // copy avoid HACK memcpy(dest, chd_img->buffer[chd_img->sector_in_hunk], CD_FRAMESIZE_RAW); - if (subChanMixed) { - memcpy(subbuffer, chd_img->buffer[chd_img->sector_in_hunk] + CD_FRAMESIZE_RAW, - SUB_FRAMESIZE); - if (subChanRaw) - DecodeRawSubData(); - } return CD_FRAMESIZE_RAW; } + +static int cdread_sub_chd(FILE *f, int sector) +{ + int hunk; + + if (!subChanMixed) + return -1; + + hunk = sector / chd_img->sectors_per_hunk; + chd_img->sector_in_hunk = sector % chd_img->sectors_per_hunk; + + if (hunk != chd_img->current_hunk) + { + chd_read(chd_img->chd, hunk, chd_img->buffer); + chd_img->current_hunk = hunk; + } + + memcpy(subbuffer, chd_img->buffer[chd_img->sector_in_hunk] + CD_FRAMESIZE_RAW, SUB_FRAMESIZE); + return SUB_FRAMESIZE; +} #endif static int cdread_2048(FILE *f, unsigned int base, void *dest, int sector) @@ -1298,6 +1330,7 @@ static long CALLBACK ISOopen(void) { CDR_getBuffer = ISOgetBuffer; cdimg_read_func = cdread_normal; + cdimg_read_sub_func = NULL; if (parsetoc(GetIsoFile()) == 0) { strcat(image_str, "[+toc]"); @@ -1326,6 +1359,7 @@ static long CALLBACK ISOopen(void) { strcat(image_str, "[+chd]"); CDR_getBuffer = ISOgetBuffer_chd; cdimg_read_func = cdread_chd; + cdimg_read_sub_func = cdread_sub_chd; is_chd = 1; } #endif @@ -1387,10 +1421,14 @@ static long CALLBACK ISOopen(void) { PrintTracks(); - if (subChanMixed && !is_chd) + if (subChanMixed && !is_chd) { cdimg_read_func = cdread_sub_mixed; - else if (isMode1ISO) + cdimg_read_sub_func = cdread_sub_sub_mixed; + } + else if (isMode1ISO) { cdimg_read_func = cdread_2048; + cdimg_read_sub_func = NULL; + } // make sure we have another handle open for cdda if (numtracks > 1 && ti[1].handle == NULL) { @@ -1530,28 +1568,13 @@ static boolean CALLBACK ISOreadTrack(unsigned char *time) { return 0; } - if (pregapOffset) { - subChanMissing = FALSE; - if (sector >= pregapOffset) { - sector -= 2 * 75; - if (sector < pregapOffset) - subChanMissing = TRUE; - } - } + if (pregapOffset && sector >= pregapOffset) + sector -= 2 * 75; ret = cdimg_read_func(cdHandle, 0, cdbuffer, sector); if (ret < 12*2 + 2048) return 0; - if (subHandle != NULL) { - fseek(subHandle, sector * SUB_FRAMESIZE, SEEK_SET); - if (fread(subbuffer, 1, SUB_FRAMESIZE, subHandle) != SUB_FRAMESIZE) - /* Faulty subchannel data shouldn't cause a read failure */ - return 1; - - if (subChanRaw) DecodeRawSubData(); - } - return 1; } @@ -1570,12 +1593,29 @@ static long CALLBACK ISOstop(void) { } // gets subchannel data -static unsigned char* CALLBACK ISOgetBufferSub(void) { - if ((subHandle != NULL || subChanMixed) && !subChanMissing) { - return subbuffer; +static unsigned char* CALLBACK ISOgetBufferSub(int sector) { + if (pregapOffset && sector >= pregapOffset) { + sector -= 2 * 75; + if (sector < pregapOffset) // ? + return NULL; } - return NULL; + if (cdimg_read_sub_func != NULL) { + if (cdimg_read_sub_func(cdHandle, sector) != SUB_FRAMESIZE) + return NULL; + } + else if (subHandle != NULL) { + if (fseek(subHandle, sector * SUB_FRAMESIZE, SEEK_SET)) + return NULL; + if (fread(subbuffer, 1, SUB_FRAMESIZE, subHandle) != SUB_FRAMESIZE) + return NULL; + } + else { + return NULL; + } + + if (subChanRaw) DecodeRawSubData(); + return subbuffer; } static long CALLBACK ISOgetStatus(struct CdrStat *stat) { diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index e1065739b..145ca32eb 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -21,6 +21,7 @@ * Handles all CD-ROM registers and functions. */ +#include #include "cdrom.h" #include "ppf.h" #include "psxdma.h" @@ -76,7 +77,7 @@ static struct { unsigned char ResultP; unsigned char ResultReady; unsigned char Cmd; - unsigned char unused4; + unsigned char SubqForwardSectors; unsigned char SetlocPending; u32 Reading; @@ -208,6 +209,7 @@ unsigned char Test23[] = { 0x43, 0x58, 0x44, 0x32, 0x39 ,0x34, 0x30, 0x51 }; #define cdReadTime (PSXCLK / 75) #define LOCL_INVALID 0xff +#define SUBQ_FORWARD_SECTORS 2u enum drive_state { DRIVESTATE_STANDBY = 0, // pause, play, read @@ -447,11 +449,10 @@ static void generate_subq(const u8 *time) cdr.subq.Absolute[2] = itob(time[2]); } -static int ReadTrack(const u8 *time) { +static int ReadTrack(const u8 *time) +{ unsigned char tmp[3]; - struct SubQ *subq; int read_ok; - u16 crc; tmp[0] = itob(time[0]); tmp[1] = itob(time[1]); @@ -465,11 +466,18 @@ static int ReadTrack(const u8 *time) { read_ok = CDR_readTrack(tmp); if (read_ok) memcpy(cdr.Prev, tmp, 3); + return read_ok; +} + +static void UpdateSubq(const u8 *time) +{ + const struct SubQ *subq; + u16 crc; if (CheckSBI(time)) - return read_ok; + return; - subq = (struct SubQ *)CDR_getBufferSub(); + subq = (struct SubQ *)CDR_getBufferSub(MSF2SECT(time[0], time[1], time[2])); if (subq != NULL && cdr.CurTrack == 1) { crc = calcCrc((u8 *)subq + 12, 10); if (crc == (((u16)subq->CRC[0] << 8) | subq->CRC[1])) { @@ -479,8 +487,8 @@ static int ReadTrack(const u8 *time) { memcpy(cdr.subq.Absolute, subq->AbsoluteAddress, 3); } else { - CDR_LOG_I("subq bad crc @%02x:%02x:%02x\n", - tmp[0], tmp[1], tmp[2]); + CDR_LOG_I("subq bad crc @%02d:%02d:%02d\n", + time[0], time[1], time[2]); } } else { @@ -491,8 +499,6 @@ static int ReadTrack(const u8 *time) { cdr.subq.Track, cdr.subq.Index, cdr.subq.Relative[0], cdr.subq.Relative[1], cdr.subq.Relative[2], cdr.subq.Absolute[0], cdr.subq.Absolute[1], cdr.subq.Absolute[2]); - - return read_ok; } static void cdrPlayInterrupt_Autopause() @@ -604,6 +610,20 @@ static void cdrReadInterrupt(void); static void cdrPrepCdda(s16 *buf, int samples); static void cdrAttenuate(s16 *buf, int samples, int stereo); +static void msfiAdd(u8 *msfi, u32 count) +{ + assert(count < 75); + msfi[2] += count; + if (msfi[2] >= 75) { + msfi[2] -= 75; + msfi[1]++; + if (msfi[1] == 60) { + msfi[1] = 0; + msfi[0]++; + } + } +} + void cdrPlayReadInterrupt(void) { if (cdr.Reading) { @@ -636,15 +656,7 @@ void cdrPlayReadInterrupt(void) cdr.FirstSector = 0; } - cdr.SetSectorPlay[2]++; - if (cdr.SetSectorPlay[2] == 75) { - cdr.SetSectorPlay[2] = 0; - cdr.SetSectorPlay[1]++; - if (cdr.SetSectorPlay[1] == 60) { - cdr.SetSectorPlay[1] = 0; - cdr.SetSectorPlay[0]++; - } - } + msfiAdd(cdr.SetSectorPlay, 1); // update for CdlGetlocP/autopause generate_subq(cdr.SetSectorPlay); @@ -794,8 +806,9 @@ void cdrInterrupt(void) { - plays tracks without retry play */ Find_CurTrack(cdr.SetSectorPlay); - ReadTrack(cdr.SetSectorPlay); + generate_subq(cdr.SetSectorPlay); cdr.LocL[0] = LOCL_INVALID; + cdr.SubqForwardSectors = 1; cdr.TrackChanged = FALSE; cdr.FirstSector = 1; @@ -1035,6 +1048,7 @@ void cdrInterrupt(void) { read_ok = ReadTrack(cdr.SetSectorPlay); if (read_ok && (buf = CDR_getBuffer())) memcpy(cdr.LocL, buf, 8); + UpdateSubq(cdr.SetSectorPlay); cdr.TrackChanged = FALSE; break; @@ -1134,8 +1148,9 @@ void cdrInterrupt(void) { // Fighting Force 2 - update subq time immediately // - fixes new game - ReadTrack(cdr.SetSectorPlay); + UpdateSubq(cdr.SetSectorPlay); cdr.LocL[0] = LOCL_INVALID; + cdr.SubqForwardSectors = 1; cycles = (cdr.Mode & 0x80) ? cdReadTime : cdReadTime * 2; cycles += seekTime; @@ -1266,10 +1281,20 @@ static void cdrUpdateTransferBuf(const u8 *buf) static void cdrReadInterrupt(void) { u8 *buf = NULL, *hdr; + u8 subqPos[3]; int read_ok; SetPlaySeekRead(cdr.StatP, STATUS_READ | STATUS_ROTATING); + memcpy(subqPos, cdr.SetSectorPlay, sizeof(subqPos)); + msfiAdd(subqPos, cdr.SubqForwardSectors); + UpdateSubq(subqPos); + if (cdr.SubqForwardSectors < SUBQ_FORWARD_SECTORS) { + cdr.SubqForwardSectors++; + CDRPLAYREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime, 0); + return; + } + read_ok = ReadTrack(cdr.SetSectorPlay); if (read_ok) buf = CDR_getBuffer(); @@ -1319,20 +1344,7 @@ static void cdrReadInterrupt(void) if (!(cdr.Mode & MODE_STRSND) || !(buf[4+2] & 0x4)) cdrReadInterruptSetResult(cdr.StatP); - cdr.SetSectorPlay[2]++; - if (cdr.SetSectorPlay[2] == 75) { - cdr.SetSectorPlay[2] = 0; - cdr.SetSectorPlay[1]++; - if (cdr.SetSectorPlay[1] == 60) { - cdr.SetSectorPlay[1] = 0; - cdr.SetSectorPlay[0]++; - } - } - - if (!cdr.Irq1Pending) { - // update for CdlGetlocP - ReadTrack(cdr.SetSectorPlay); - } + msfiAdd(cdr.SetSectorPlay, 1); CDRPLAYREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime, 0); } @@ -1659,6 +1671,8 @@ int cdrFreeze(void *f, int Mode) { cdr.FifoOffset = tmp < DATA_SIZE ? tmp : DATA_SIZE; cdr.FifoSize = (cdr.Mode & 0x20) ? 2340 : 2048 + 12; + if (cdr.SubqForwardSectors > SUBQ_FORWARD_SECTORS) + cdr.SubqForwardSectors = SUBQ_FORWARD_SECTORS; // read right sub data tmpp[0] = btoi(cdr.Prev[0]); diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index 38c41ca73..178f83a81 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -112,7 +112,7 @@ typedef long (CALLBACK* CDRgetTN)(unsigned char *); typedef long (CALLBACK* CDRgetTD)(unsigned char, unsigned char *); typedef boolean (CALLBACK* CDRreadTrack)(unsigned char *); typedef unsigned char* (CALLBACK* CDRgetBuffer)(void); -typedef unsigned char* (CALLBACK* CDRgetBufferSub)(void); +typedef unsigned char* (CALLBACK* CDRgetBufferSub)(int sector); typedef long (CALLBACK* CDRconfigure)(void); typedef long (CALLBACK* CDRtest)(void); typedef void (CALLBACK* CDRabout)(void); From 9afc3b574dbe79c7a4c615986beb503cc5435864 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 3 Nov 2022 02:13:39 +0200 Subject: [PATCH 234/597] cdriso: fix up chd for separated subq reads --- libpcsxcore/cdriso.c | 52 +++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index a755a23b4..318957949 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -83,11 +83,12 @@ static struct { #ifdef HAVE_CHD static struct { - unsigned char (*buffer)[CD_FRAMESIZE_RAW + SUB_FRAMESIZE]; + unsigned char *buffer; chd_file* chd; const chd_header* header; unsigned int sectors_per_hunk; - unsigned int current_hunk; + unsigned int current_hunk[2]; + unsigned int current_buffer; unsigned int sector_in_hunk; } *chd_img; #endif @@ -952,12 +953,13 @@ static int handlechd(const char *isofile) { chd_img->header = chd_get_header(chd_img->chd); - chd_img->buffer = malloc(chd_img->header->hunkbytes); + chd_img->buffer = malloc(chd_img->header->hunkbytes * 2); if (chd_img->buffer == NULL) goto fail_io; chd_img->sectors_per_hunk = chd_img->header->hunkbytes / (CD_FRAMESIZE_RAW + SUB_FRAMESIZE); - chd_img->current_hunk = (unsigned int)-1; + chd_img->current_hunk[0] = (unsigned int)-1; + chd_img->current_hunk[1] = (unsigned int)-1; cddaBigEndian = TRUE; @@ -1216,44 +1218,64 @@ static int cdread_compressed(FILE *f, unsigned int base, void *dest, int sector) } #ifdef HAVE_CHD +static unsigned char *chd_get_sector(unsigned int current_buffer, unsigned int sector_in_hunk) +{ + return chd_img->buffer + + current_buffer * chd_img->header->hunkbytes + + sector_in_hunk * (CD_FRAMESIZE_RAW + SUB_FRAMESIZE); +} + static int cdread_chd(FILE *f, unsigned int base, void *dest, int sector) { int hunk; - sector += base; + assert(base == 0); hunk = sector / chd_img->sectors_per_hunk; chd_img->sector_in_hunk = sector % chd_img->sectors_per_hunk; - if (hunk != chd_img->current_hunk) + if (hunk == chd_img->current_hunk[0]) + chd_img->current_buffer = 0; + else if (hunk == chd_img->current_hunk[1]) + chd_img->current_buffer = 1; + else { - chd_read(chd_img->chd, hunk, chd_img->buffer); - chd_img->current_hunk = hunk; + chd_read(chd_img->chd, hunk, chd_img->buffer + + chd_img->current_buffer * chd_img->header->hunkbytes); + chd_img->current_hunk[chd_img->current_buffer] = hunk; } if (dest != cdbuffer) // copy avoid HACK - memcpy(dest, chd_img->buffer[chd_img->sector_in_hunk], + memcpy(dest, chd_get_sector(chd_img->current_buffer, chd_img->sector_in_hunk), CD_FRAMESIZE_RAW); return CD_FRAMESIZE_RAW; } static int cdread_sub_chd(FILE *f, int sector) { + unsigned int sector_in_hunk; + unsigned int buffer; int hunk; if (!subChanMixed) return -1; hunk = sector / chd_img->sectors_per_hunk; - chd_img->sector_in_hunk = sector % chd_img->sectors_per_hunk; + sector_in_hunk = sector % chd_img->sectors_per_hunk; - if (hunk != chd_img->current_hunk) + if (hunk == chd_img->current_hunk[0]) + buffer = 0; + else if (hunk == chd_img->current_hunk[1]) + buffer = 1; + else { - chd_read(chd_img->chd, hunk, chd_img->buffer); - chd_img->current_hunk = hunk; + buffer = chd_img->current_buffer ^ 1; + chd_read(chd_img->chd, hunk, chd_img->buffer + + buffer * chd_img->header->hunkbytes); + chd_img->current_hunk[buffer] = hunk; } - memcpy(subbuffer, chd_img->buffer[chd_img->sector_in_hunk] + CD_FRAMESIZE_RAW, SUB_FRAMESIZE); + memcpy(subbuffer, chd_get_sector(buffer, sector_in_hunk) + CD_FRAMESIZE_RAW, SUB_FRAMESIZE); return SUB_FRAMESIZE; } #endif @@ -1279,7 +1301,7 @@ static unsigned char * CALLBACK ISOgetBuffer_compr(void) { #ifdef HAVE_CHD static unsigned char * CALLBACK ISOgetBuffer_chd(void) { - return chd_img->buffer[chd_img->sector_in_hunk] + 12; + return chd_get_sector(chd_img->current_buffer, chd_img->sector_in_hunk) + 12; } #endif From 1d5d35bc9d0d3d84873cd1d31870d09248ddc05b Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 3 Nov 2022 22:49:08 +0200 Subject: [PATCH 235/597] spu: fix a wrong assumption from 5aa94fa080e1b0a661b23aa912022dd464d41110 libretro/pcsx_rearmed#704 --- plugins/dfsound/registers.c | 6 ++---- plugins/dfsound/spu.c | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index e75f70861..e00939e6a 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -505,10 +505,8 @@ static void SetPitch(int ch,unsigned short val) // SET PITCH spu.s_chan[ch].sinc = NP << 4; spu.s_chan[ch].sinc_inv = 0; spu.SB[ch * SB_SIZE + 32] = 1; // -> freq change in simple interpolation mode: set flag - if (val) - spu.dwChannelsAudible |= 1u << ch; - else - spu.dwChannelsAudible &= ~(1u << ch); + + // don't mess spu.dwChannelsAudible as adsr runs independently } //////////////////////////////////////////////////////////////////////// diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 3b4c051df..1127cd711 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -251,8 +251,7 @@ static void StartSoundMain(int ch) spu.dwNewChannel&=~(1<iRawPitch) - spu.dwChannelsAudible|=1<ADSRX, d); if (d < ns_to) { spu.dwChannelsAudible &= ~(1 << ch); + s_chan->ADSRX.State = ADSR_RELEASE; s_chan->ADSRX.EnvelopeVol = 0; } } From 7e6d030e6da39c49b51cbdb2e9cf3feb0a2b9a56 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 5 Nov 2022 22:51:37 +0200 Subject: [PATCH 236/597] cdrom: don't report read too early libretro/pcsx_rearmed#706 --- libpcsxcore/cdrom.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 145ca32eb..0f1479e28 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1284,8 +1284,6 @@ static void cdrReadInterrupt(void) u8 subqPos[3]; int read_ok; - SetPlaySeekRead(cdr.StatP, STATUS_READ | STATUS_ROTATING); - memcpy(subqPos, cdr.SetSectorPlay, sizeof(subqPos)); msfiAdd(subqPos, cdr.SubqForwardSectors); UpdateSubq(subqPos); @@ -1295,6 +1293,9 @@ static void cdrReadInterrupt(void) return; } + // note: CdlGetlocL should work as soon as STATUS_READ is indicated + SetPlaySeekRead(cdr.StatP, STATUS_READ | STATUS_ROTATING); + read_ok = ReadTrack(cdr.SetSectorPlay); if (read_ok) buf = CDR_getBuffer(); From cbd88286c7e8473e1062fbe740ec59a302031531 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 12 Nov 2022 17:57:31 +0200 Subject: [PATCH 237/597] cdriso: unbreak cdda for chd notaz/pcsx_rearmed#272 --- libpcsxcore/cdriso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index 318957949..f47fcfddc 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -110,7 +110,7 @@ struct trackinfo { char start[3]; // MSF-format char length[3]; // MSF-format FILE *handle; // for multi-track images CDDA - unsigned int start_offset; // byte offset from start of above file + unsigned int start_offset; // byte offset from start of above file (chd: sector offset) }; #define MAXTRACKS 100 /* How many tracks can a CD hold? */ @@ -1229,7 +1229,7 @@ static int cdread_chd(FILE *f, unsigned int base, void *dest, int sector) { int hunk; - assert(base == 0); + sector += base; hunk = sector / chd_img->sectors_per_hunk; chd_img->sector_in_hunk = sector % chd_img->sectors_per_hunk; From 688bdb9526d42181368e64ceaa6828727a10188c Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 12 Nov 2022 23:05:52 +0200 Subject: [PATCH 238/597] cdrom: make the timing hack conditional libretro/pcsx_rearmed#707 --- libpcsxcore/cdrom.c | 11 +++++++-- libpcsxcore/database.c | 53 ++++++++++++++++++++++++++++++++--------- libpcsxcore/psxcommon.h | 3 +++ 3 files changed, 54 insertions(+), 13 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 0f1479e28..18d6bf9eb 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -597,8 +597,14 @@ static u32 cdrAlignTimingHack(u32 cycles) * active), but before the game's handler loop reads I_STAT. The time * window for this is quite small (~1k cycles of so). Apparently this * somehow happens naturally on the real hardware. + * + * Note: always enforcing this breaks other games like Crash PAL version + * (inputs get dropped because bios handler doesn't see interrupts). */ - u32 vint_rel = rcnts[3].cycleStart + 63000 - psxRegs.cycle; + u32 vint_rel; + if (psxRegs.cycle - rcnts[3].cycleStart > 250000) + return cycles; + vint_rel = rcnts[3].cycleStart + 63000 - psxRegs.cycle; vint_rel += PSXCLK / 60; while ((s32)(vint_rel - cycles) < 0) vint_rel += PSXCLK / 60; @@ -1154,7 +1160,8 @@ void cdrInterrupt(void) { cycles = (cdr.Mode & 0x80) ? cdReadTime : cdReadTime * 2; cycles += seekTime; - cycles = cdrAlignTimingHack(cycles); + if (Config.hacks.cdr_read_timing) + cycles = cdrAlignTimingHack(cycles); CDRPLAYREAD_INT(cycles, 1); SetPlaySeekRead(cdr.StatP, STATUS_SEEK); diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 1ea8d43bf..f947b068d 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -5,16 +5,32 @@ /* It's duplicated from emu_if.c */ #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) -static const char MemorycardHack_db[8][10] = +static const char * const MemorycardHack_db[] = { /* Lifeforce Tenka, also known as Codename Tenka */ - {"SLES00613"}, - {"SLED00690"}, - {"SLES00614"}, - {"SLES00615"}, - {"SLES00616"}, - {"SLES00617"}, - {"SCUS94409"} + "SLES00613", "SLED00690", "SLES00614", "SLES00615", + "SLES00616", "SLES00617", "SCUS94409" +}; + +static const char * const cdr_read_hack_db[] = +{ + /* T'ai Fu - Wrath of the Tiger */ + "SLUS00787", +}; + +#define HACK_ENTRY(var, list) \ + { #var, &Config.hacks.var, list, ARRAY_SIZE(list) } + +static const struct +{ + const char *name; + boolean *var; + const char * const * id_list; + size_t id_list_len; +} +hack_db[] = +{ + HACK_ENTRY(cdr_read_timing, cdr_read_hack_db), }; static const struct @@ -42,10 +58,24 @@ cycle_multiplier_overrides[] = /* Function for automatic patching according to GameID. */ void Apply_Hacks_Cdrom() { - uint32_t i; - + size_t i, j; + + memset(&Config.hacks, 0, sizeof(Config.hacks)); + + for (i = 0; i < ARRAY_SIZE(hack_db); i++) + { + for (j = 0; j < hack_db[i].id_list_len; j++) + { + if (strncmp(CdromId, hack_db[i].id_list[j], 9)) + continue; + *hack_db[i].var = 1; + SysPrintf("using hack: %s\n", hack_db[i].name); + break; + } + } + /* Apply Memory card hack for Codename Tenka. (The game needs one of the memory card slots to be empty) */ - for(i=0;i Date: Sun, 13 Nov 2022 23:33:49 +0200 Subject: [PATCH 239/597] dma: add optional slow linked list walking libretro/pcsx_rearmed#478 libretro/pcsx_rearmed#264 libretro/pcsx_rearmed#132 notaz/pcsx_rearmed#95 --- frontend/libretro.c | 12 ++++++++++++ frontend/libretro_core_options.h | 15 +++++++++++++++ frontend/main.c | 1 + frontend/menu.c | 13 ++++++++++++- libpcsxcore/database.c | 13 +++++++++++++ libpcsxcore/mdec.c | 6 +++++- libpcsxcore/plugins.h | 2 +- libpcsxcore/psxcommon.h | 2 ++ libpcsxcore/psxdma.c | 28 ++++++++++++++++++++-------- libpcsxcore/psxhw.c | 2 ++ plugins/gpulib/gpu.c | 6 +++++- plugins/gpulib/gpu.h | 2 +- 12 files changed, 89 insertions(+), 13 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 42f6151fe..3e74b2302 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2117,6 +2117,18 @@ static void update_variables(bool in_flight) Config.Cdda = 0; } + var.value = NULL; + var.key = "pcsx_rearmed_gpu_slow_llists"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + Config.GpuListWalking = 0; + else if (strcmp(var.value, "enabled") == 0) + Config.GpuListWalking = 1; + else // auto + Config.GpuListWalking = -1; + } + #ifdef THREAD_RENDERING var.key = "pcsx_rearmed_gpu_thread_rendering"; var.value = NULL; diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 39bc32b6c..935c6f975 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -423,6 +423,21 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled", }, + { + "pcsx_rearmed_gpu_slow_llists", + "(GPU) Slow linked list processing", + NULL, + "Slower but more accurate GPU linked list processing. Needed by only a few games like Vampire Hunter D. Should be autodetected in most cases.", + NULL, + "video", + { + { "auto", NULL }, + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "auto", + }, #ifdef GPU_NEON { "pcsx_rearmed_neon_interlace_enable", diff --git a/frontend/main.c b/frontend/main.c index dcac1d982..2773f7a1b 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -133,6 +133,7 @@ void emu_set_default_config(void) Config.icache_emulation = 0; Config.PsxAuto = 1; Config.cycle_multiplier = CYCLE_MULT_DEFAULT; + Config.GpuListWalking = -1; pl_rearmed_cbs.gpu_neon.allow_interlace = 2; // auto pl_rearmed_cbs.gpu_neon.enhancement_enable = diff --git a/frontend/menu.c b/frontend/menu.c index d1e0413a2..3a772d0e8 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -397,6 +397,7 @@ static const struct { CE_CONFIG_VAL(icache_emulation), CE_CONFIG_VAL(DisableStalls), CE_CONFIG_VAL(Cpu), + CE_CONFIG_VAL(GpuListWalking), CE_INTVAL(region), CE_INTVAL_V(g_scaler, 3), CE_INTVAL(g_gamma), @@ -1578,6 +1579,8 @@ static int menu_loop_speed_hacks(int id, int keys) return 0; } +static const char *men_gpul[] = { "Auto", "Off", "On", NULL }; + static const char h_cfg_cpul[] = "Shows CPU usage in %"; static const char h_cfg_spu[] = "Shows active SPU channels\n" "(green: normal, red: fmod, blue: noise)"; @@ -1591,10 +1594,12 @@ static const char h_cfg_nodrc[] = "Disable dynamic recompiler and use interpret #endif static const char h_cfg_shacks[] = "Breaks games but may give better performance"; static const char h_cfg_icache[] = "Support F1 games (only when dynarec is off)"; +static const char h_cfg_gpul[] = "Try enabling this if the game is missing some graphics\n" + "causes a performance hit"; static const char h_cfg_psxclk[] = "Over/under-clock the PSX, default is " DEFAULT_PSX_CLOCK_S "\n" "(adjust this if the game is too slow/too fast/hangs)"; -enum { AMO_XA, AMO_CDDA, AMO_IC, AMO_CPU }; +enum { AMO_XA, AMO_CDDA, AMO_IC, AMO_CPU, AMO_GPUL }; static menu_entry e_menu_adv_options[] = { @@ -1604,6 +1609,7 @@ static menu_entry e_menu_adv_options[] = mee_onoff_h ("Disable XA Decoding", 0, menu_iopts[AMO_XA], 1, h_cfg_xa), mee_onoff_h ("Disable CD Audio", 0, menu_iopts[AMO_CDDA], 1, h_cfg_cdda), mee_onoff_h ("ICache emulation", 0, menu_iopts[AMO_IC], 1, h_cfg_icache), + mee_enum_h ("GPU l-list slow walking",0, menu_iopts[AMO_GPUL], men_gpul, h_cfg_gpul), #if !defined(DRC_DISABLE) || defined(LIGHTREC) mee_onoff_h ("Disable dynarec (slow!)",0, menu_iopts[AMO_CPU], 1, h_cfg_nodrc), #endif @@ -1627,9 +1633,14 @@ static int menu_loop_adv_options(int id, int keys) int i; for (i = 0; i < ARRAY_SIZE(opts); i++) *opts[i].mopt = *opts[i].opt; + menu_iopts[AMO_GPUL] = Config.GpuListWalking + 1; + me_loop(e_menu_adv_options, &sel); + for (i = 0; i < ARRAY_SIZE(opts); i++) *opts[i].opt = *opts[i].mopt; + Config.GpuListWalking = menu_iopts[AMO_GPUL] - 1; + return 0; } diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index f947b068d..ae6fe3680 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -18,6 +18,18 @@ static const char * const cdr_read_hack_db[] = "SLUS00787", }; +static const char * const gpu_slow_llist_db[] = +{ + /* Crash Bash */ + "SCES02834", "SCUS94570", "SCUS94616", "SCUS94654", + /* Final Fantasy IV */ + "SCES03840", "SLPM86028", "SLUS01360", + /* Spot Goes to Hollywood */ + "SLES00330", "SLPS00394", "SLUS00014", + /* Vampire Hunter D */ + "SLES02731", "SLPS02477", "SLPS03198", "SLUS01138", +}; + #define HACK_ENTRY(var, list) \ { #var, &Config.hacks.var, list, ARRAY_SIZE(list) } @@ -31,6 +43,7 @@ static const struct hack_db[] = { HACK_ENTRY(cdr_read_timing, cdr_read_hack_db), + HACK_ENTRY(gpu_slow_list_walking, gpu_slow_llist_db), }; static const struct diff --git a/libpcsxcore/mdec.c b/libpcsxcore/mdec.c index ca4245856..167a1cff1 100644 --- a/libpcsxcore/mdec.c +++ b/libpcsxcore/mdec.c @@ -476,6 +476,7 @@ void psxDma0(u32 adr, u32 bcr, u32 chcr) { int size; if (chcr != 0x01000201) { + log_unhandled("mdec0: invalid dma %08x\n", chcr); return; } @@ -545,7 +546,10 @@ void psxDma1(u32 adr, u32 bcr, u32 chcr) { int size; u32 words; - if (chcr != 0x01000200) return; + if (chcr != 0x01000200) { + log_unhandled("mdec1: invalid dma %08x\n", chcr); + return; + } words = (bcr >> 16) * (bcr & 0xffff); /* size in byte */ diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index 178f83a81..c997c611c 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -58,7 +58,7 @@ typedef void (CALLBACK* GPUwriteDataMem)(uint32_t *, int); typedef uint32_t (CALLBACK* GPUreadStatus)(void); typedef uint32_t (CALLBACK* GPUreadData)(void); typedef void (CALLBACK* GPUreadDataMem)(uint32_t *, int); -typedef long (CALLBACK* GPUdmaChain)(uint32_t *,uint32_t); +typedef long (CALLBACK* GPUdmaChain)(uint32_t *,uint32_t, uint32_t *); typedef void (CALLBACK* GPUupdateLace)(void); typedef long (CALLBACK* GPUconfigure)(void); typedef long (CALLBACK* GPUtest)(void); diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index e0876cf5b..b621326cd 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -141,12 +141,14 @@ typedef struct { boolean UseNet; boolean icache_emulation; boolean DisableStalls; + int GpuListWalking; int cycle_multiplier; // 100 for 1.0 int cycle_multiplier_override; u8 Cpu; // CPU_DYNAREC or CPU_INTERPRETER u8 PsxType; // PSX_TYPE_NTSC or PSX_TYPE_PAL struct { boolean cdr_read_timing; + boolean gpu_slow_list_walking; } hacks; #ifdef _WIN32 char Lang[256]; diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index d7c4caece..30aa9bdb9 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -127,7 +127,8 @@ static u32 gpuDmaChainSize(u32 addr) { } void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU - u32 *ptr; + u32 *ptr, madr_next, *madr_next_p; + int do_walking; u32 words; u32 size; @@ -179,22 +180,25 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU #ifdef PSXDMA_LOG PSXDMA_LOG("*** DMA 2 - GPU dma chain *** %lx addr = %lx size = %lx\n", chcr, madr, bcr); #endif + // when not emulating walking progress, end immediately + madr_next = 0xffffff; - size = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff); + do_walking = Config.GpuListWalking; + if (do_walking < 0) + do_walking = Config.hacks.gpu_slow_list_walking; + madr_next_p = do_walking ? &madr_next : NULL; + + size = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, madr_next_p); if ((int)size <= 0) size = gpuDmaChainSize(madr); - HW_GPU_STATUS &= SWAP32(~PSXGPU_nBUSY); - // we don't emulate progress, just busy flag and end irq, - // so pretend we're already at the last block - HW_DMA2_MADR = SWAPu32(0xffffff); + HW_GPU_STATUS &= SWAP32(~PSXGPU_nBUSY); + HW_DMA2_MADR = SWAPu32(madr_next); // Tekken 3 = use 1.0 only (not 1.5x) // Einhander = parse linked list in pieces (todo) - // Final Fantasy 4 = internal vram time (todo) // Rebel Assault 2 = parse linked list in pieces (todo) - // Vampire Hunter D = allow edits to linked list (todo) GPUDMA_INT(size); return; @@ -208,6 +212,14 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU } void gpuInterrupt() { + if (HW_DMA2_CHCR == SWAP32(0x01000401) && !(HW_DMA2_MADR & SWAP32(0x800000))) + { + u32 size, madr_next = 0xffffff; + size = GPU_dmaChain((u32 *)psxM, HW_DMA2_MADR & 0x1fffff, &madr_next); + HW_DMA2_MADR = SWAPu32(madr_next); + GPUDMA_INT(size); + return; + } if (HW_DMA2_CHCR & SWAP32(0x01000000)) { HW_DMA2_CHCR &= SWAP32(~0x01000000); diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index 483f4962d..27ddfeab5 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -519,6 +519,8 @@ void psxHwWrite16(u32 add, u16 value) { } #define DmaExec(n) { \ + if (value & SWAPu32(HW_DMA##n##_CHCR) & 0x01000000) \ + log_unhandled("dma" #n " %08x -> %08x\n", HW_DMA##n##_CHCR, value); \ HW_DMA##n##_CHCR = SWAPu32(value); \ \ if (SWAPu32(HW_DMA##n##_CHCR) & 0x01000000 && SWAPu32(HW_DMA_PCR) & (8 << (n * 4))) { \ diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 32a797d58..f468cf8b9 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -527,7 +527,7 @@ void GPUwriteData(uint32_t data) flush_cmd_buffer(); } -long GPUdmaChain(uint32_t *rambase, uint32_t start_addr) +long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr) { uint32_t addr, *list, ld_addr = 0; int len, left, count; @@ -559,6 +559,10 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr) log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len); } + if (progress_addr) { + *progress_addr = addr; + break; + } #define LD_THRESHOLD (8*1024) if (count >= LD_THRESHOLD) { if (count == LD_THRESHOLD) { diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 42b281763..717d2fe22 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -122,7 +122,7 @@ struct GPUFreeze; long GPUinit(void); long GPUshutdown(void); void GPUwriteDataMem(uint32_t *mem, int count); -long GPUdmaChain(uint32_t *rambase, uint32_t addr); +long GPUdmaChain(uint32_t *rambase, uint32_t addr, uint32_t *progress_addr); void GPUwriteData(uint32_t data); void GPUreadDataMem(uint32_t *mem, int count); uint32_t GPUreadData(void); From 4d311cb718868cf9d78238fcbc5643bc2551ae34 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 14 Nov 2022 01:17:58 +0200 Subject: [PATCH 240/597] cdrom: forget old sector on cd change libretro/pcsx_rearmed#689 --- libpcsxcore/cdrom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 18d6bf9eb..10fc61562 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -325,6 +325,7 @@ void cdrLidSeekInterrupt(void) if (stat.Status & STATUS_SHELLOPEN) { + memset(cdr.Prev, 0xff, sizeof(cdr.Prev)); cdr.DriveState = DRIVESTATE_LID_OPEN; CDRLID_INT(0x800); } From ae36bb287237de0a199f52de86dbfc6115d4cb70 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 15 Nov 2022 00:57:47 +0200 Subject: [PATCH 241/597] gpulib: allow commands to span list entries libretro/pcsx_rearmed#271 --- plugins/gpulib/gpu.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index f468cf8b9..e9714e4c9 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -551,12 +551,22 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr if (len > 0) cpu_cycles += 5 + len; - log_io(".chain %08x #%d\n", (list - rambase) * 4, len); + log_io(".chain %08lx #%d+%d\n", + (long)(list - rambase) * 4, len, gpu.cmd_len); + if (unlikely(gpu.cmd_len > 0)) { + memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4); + gpu.cmd_len += len; + flush_cmd_buffer(); + continue; + } if (len) { left = do_cmd_buffer(list + 1, len); - if (left) - log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len); + if (left) { + memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4); + gpu.cmd_len = left; + log_anomaly("GPUdmaChain: %d/%d words left\n", left, len); + } } if (progress_addr) { From 399f666e98e02f2d451b0df140c57d1a308a1b86 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 11 Dec 2022 22:28:42 +0200 Subject: [PATCH 242/597] cdrom: adjust a timing hack notaz/pcsx_rearmed#276 --- libpcsxcore/cdrom.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 10fc61562..098b77d0b 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1503,14 +1503,22 @@ void cdrWrite3(unsigned char rt) { break; // transfer case 1: if (cdr.Stat & rt) { + u32 nextCycle = psxRegs.intCycle[PSXINT_CDR].sCycle + + psxRegs.intCycle[PSXINT_CDR].cycle; #ifdef CDR_LOG_CMD_IRQ - SysPrintf("%u cdrom: ack %02x (w %02x)\n", - psxRegs.cycle, cdr.Stat & rt, rt); + SysPrintf("%u cdrom: ack %02x (w=%02x p=%d,%d)\n", + psxRegs.cycle, cdr.Stat & rt, rt, + !!(psxRegs.interrupt & (1 << PSXINT_CDR)), + nextCycle - psxRegs.cycle); #endif - // note: Croc vs Discworld Noir + // note: Croc, Shadow Tower (more) vs Discworld Noir (<993) if (!(psxRegs.interrupt & (1 << PSXINT_CDR)) && (cdr.CmdInProgress || cdr.Irq1Pending)) - CDR_INT(850); // 711-993 + { + s32 c = 2048 - (psxRegs.cycle - nextCycle); + c = MAX_VALUE(c, 512); + CDR_INT(c); + } } cdr.Stat &= ~rt; From 3499746e173b925f317b6e1509ea933fb595f4fc Mon Sep 17 00:00:00 2001 From: Jools Wills Date: Mon, 12 Dec 2022 15:29:05 +0000 Subject: [PATCH 243/597] Fix building gpu_unai on armv6 f23b103c8248c10855949bfb2185b6b10d4f0457 was missing changes to gpu_unai/gpulib_if.cpp --- plugins/gpu_unai/gpulib_if.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 2dedbf83d..45eac41d2 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -173,7 +173,7 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) linesInterlace = force_interlace; #ifdef HAVE_PRE_ARMV7 /* XXX */ - linesInterlace |= gpu.status.interlace; + linesInterlace |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE); #endif for (; list < list_end; list += 1 + len) From a4621d435f84acf094a6601c3a444cc550f82929 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 8 Mar 2023 11:30:00 +0000 Subject: [PATCH 244/597] dfsound: Fix issues on big-endian systems Without this fix, the BIOS' music is missing on big-endian systems. The XA and reverb code are also fixed, which fixes games like Vib-Ribbon. Signed-off-by: Paul Cercueil --- plugins/dfsound/registers.c | 5 +++-- plugins/dfsound/reverb.c | 7 ++++--- plugins/dfsound/spu.h | 8 ++++++++ plugins/dfsound/xa.c | 9 +++++---- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index e00939e6a..badd0af06 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -22,6 +22,7 @@ #include "externals.h" #include "registers.h" #include "spu_config.h" +#include "spu.h" static void SoundOn(int start,int end,unsigned short val); static void SoundOff(int start,int end,unsigned short val); @@ -127,7 +128,7 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, break; //-------------------------------------------------// case H_SPUdata: - *(unsigned short *)(spu.spuMemC + spu.spuAddr) = val; + *(unsigned short *)(spu.spuMemC + spu.spuAddr) = HTOLE16(val); spu.spuAddr += 2; spu.spuAddr &= 0x7fffe; break; @@ -334,7 +335,7 @@ unsigned short CALLBACK SPUreadRegister(unsigned long reg) case H_SPUdata: { - unsigned short s = *(unsigned short *)(spu.spuMemC + spu.spuAddr); + unsigned short s = LE16TOH(*(unsigned short *)(spu.spuMemC + spu.spuAddr)); spu.spuAddr += 2; spu.spuAddr &= 0x7fffe; return s; diff --git a/plugins/dfsound/reverb.c b/plugins/dfsound/reverb.c index ec570fb3e..de9b804af 100644 --- a/plugins/dfsound/reverb.c +++ b/plugins/dfsound/reverb.c @@ -20,6 +20,7 @@ ***************************************************************************/ #include "stdafx.h" +#include "spu.h" #define _IN_REVERB @@ -50,16 +51,16 @@ INLINE int rvb2ram_offs(int curr, int space, int iOff) // get_buffer content helper: takes care about wraps #define g_buffer(var) \ - ((int)(signed short)spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->var)]) + ((int)(signed short)LE16TOH(spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->var)])) // saturate iVal and store it as var #define s_buffer(var, iVal) \ ssat32_to_16(iVal); \ - spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->var)] = iVal + spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->var)] = HTOLE16(iVal) #define s_buffer1(var, iVal) \ ssat32_to_16(iVal); \ - spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->var + 1)] = iVal + spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->var + 1)] = HTOLE16(iVal) //////////////////////////////////////////////////////////////////////// diff --git a/plugins/dfsound/spu.h b/plugins/dfsound/spu.h index 0cef6520d..334c68099 100644 --- a/plugins/dfsound/spu.h +++ b/plugins/dfsound/spu.h @@ -18,6 +18,14 @@ #ifndef __P_SPU_H__ #define __P_SPU_H__ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define HTOLE16(x) __builtin_bswap16(x) +#define LE16TOH(x) __builtin_bswap16(x) +#else +#define HTOLE16(x) (x) +#define LE16TOH(x) (x) +#endif + void ClearWorkingState(void); void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycle, int is_start); int CALLBACK SPUplayCDDAchannel(short *pcm, int bytes, unsigned int cycle, int is_start); diff --git a/plugins/dfsound/xa.c b/plugins/dfsound/xa.c index c7a84fd01..397ed592f 100644 --- a/plugins/dfsound/xa.c +++ b/plugins/dfsound/xa.c @@ -16,6 +16,7 @@ ***************************************************************************/ #include "stdafx.h" +#include "spu.h" #define _IN_XA #include @@ -60,8 +61,8 @@ INLINE void MixXA(int *SSumLR, int ns_to, int decode_pos) SSumLR[ns++] += l; SSumLR[ns++] += r; - spu.spuMem[cursor] = v; - spu.spuMem[cursor + 0x400/2] = v >> 16; + spu.spuMem[cursor] = HTOLE16(v); + spu.spuMem[cursor + 0x400/2] = HTOLE16(v >> 16); cursor = (cursor + 1) & 0x1ff; } spu.XALastVal = v; @@ -80,8 +81,8 @@ INLINE void MixXA(int *SSumLR, int ns_to, int decode_pos) SSumLR[ns++] += l; SSumLR[ns++] += r; - spu.spuMem[cursor] = v; - spu.spuMem[cursor + 0x400/2] = v >> 16; + spu.spuMem[cursor] = HTOLE16(v); + spu.spuMem[cursor + 0x400/2] = HTOLE16(v >> 16); cursor = (cursor + 1) & 0x1ff; } spu.XALastVal = v; From a5ff8be2c598043b351f7eb2dc2d262fc61f63c5 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 5 Jul 2023 00:12:44 +0300 Subject: [PATCH 245/597] spu: irq adjustments according to MiSTer libretro/pcsx_rearmed#725 --- plugins/dfsound/dma.c | 61 ++++++++++++++++++++++--------------- plugins/dfsound/externals.h | 1 + plugins/dfsound/registers.c | 2 ++ plugins/dfsound/spu.c | 14 ++++++++- 4 files changed, 52 insertions(+), 26 deletions(-) diff --git a/plugins/dfsound/dma.c b/plugins/dfsound/dma.c index 43019f1d9..2f2d69a18 100644 --- a/plugins/dfsound/dma.c +++ b/plugins/dfsound/dma.c @@ -20,6 +20,7 @@ #define _IN_DMA #include "externals.h" +#include "registers.h" //////////////////////////////////////////////////////////////////////// // READ DMA (one value) @@ -28,6 +29,7 @@ unsigned short CALLBACK SPUreadDMA(void) { unsigned short s = *(unsigned short *)(spu.spuMemC + spu.spuAddr); + check_irq_io(spu.spuAddr); spu.spuAddr += 2; spu.spuAddr &= 0x7fffe; @@ -41,26 +43,27 @@ unsigned short CALLBACK SPUreadDMA(void) void CALLBACK SPUreadDMAMem(unsigned short *pusPSXMem, int iSize, unsigned int cycles) { - int i; + unsigned int addr = spu.spuAddr, irq_addr = regAreaGet(H_SPUirqAddr) << 3; + int i, irq; do_samples_if_needed(cycles, 1); - - for(i=0;idirty)) REVERBPrep(); From 88b0dc1df1a0e7b6ec0a505bcb0c19061f8dab6e Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 5 Jul 2023 00:23:18 +0300 Subject: [PATCH 246/597] spu: fix typo --- plugins/dfsound/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/dfsound/dma.c b/plugins/dfsound/dma.c index 2f2d69a18..eb85a7316 100644 --- a/plugins/dfsound/dma.c +++ b/plugins/dfsound/dma.c @@ -56,7 +56,7 @@ void CALLBACK SPUreadDMAMem(unsigned short *pusPSXMem, int iSize, addr &= 0x7fffe; } if (irq && (spu.spuCtrl & CTRL_IRQ)) - log_unhandled("wdma spu irq: %x/%x+%x\n", irq_addr, spu.spuAddr, iSize * 2); + log_unhandled("rdma spu irq: %x/%x+%x\n", irq_addr, spu.spuAddr, iSize * 2); spu.spuAddr = addr; } From 53d4b74d2fa40b17d4c692b9f3ed74fe40428926 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 6 Jul 2023 18:04:55 +0300 Subject: [PATCH 247/597] spu: fix some threading issues but others remain, so disabling in next commit --- plugins/dfsound/externals.h | 1 + plugins/dfsound/registers.c | 2 +- plugins/dfsound/spu.c | 41 ++++++++++++++++++++++--------------- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index f42ceff12..b63ac3c4e 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -117,6 +117,7 @@ typedef struct unsigned int bFMod:2; // freq mod (0=off, 1=sound channel, 2=freq channel) unsigned int prevflags:3; // flags from previous block unsigned int bIgnoreLoop:1; // Ignore loop + unsigned int bNewPitch:1; // pitch changed int iLeftVolume; // left volume int iRightVolume; // right volume ADSRInfoEx ADSRX; diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index a8fb59ed2..2f5774944 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -506,7 +506,7 @@ static void SetPitch(int ch,unsigned short val) // SET PITCH spu.s_chan[ch].iRawPitch = NP; spu.s_chan[ch].sinc = NP << 4; spu.s_chan[ch].sinc_inv = 0; - spu.SB[ch * SB_SIZE + 32] = 1; // -> freq change in simple interpolation mode: set flag + spu.s_chan[ch].bNewPitch = 1; // don't mess spu.dwChannelsAudible as adsr runs independently } diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 7f8ab1df9..97f07aaec 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -286,9 +286,8 @@ INLINE int FModChangeFrequency(int *SB, int pitch, int ns) if(NP<0x1) NP=0x1; sinc=NP<<4; // calc frequency - if(spu_config.iUseInterpolation==1) // freq change in simple interpolation mode - SB[32]=1; iFMod[ns]=0; + SB[32]=1; // reset interpolation return sinc; } @@ -780,6 +779,9 @@ static void do_channels(int ns_to) s_chan = &spu.s_chan[ch]; SB = spu.SB + ch * SB_SIZE; sinc = s_chan->sinc; + if (spu.s_chan[ch].bNewPitch) + SB[32] = 1; // reset interpolation + spu.s_chan[ch].bNewPitch = 0; if (s_chan->bNoise) d = do_samples_noise(ch, ns_to); @@ -868,11 +870,14 @@ static struct spu_worker { int sinc; int start; int loop; - int ns_to; short vol_l; short vol_r; + unsigned short ns_to; + unsigned short bNoise:1; + unsigned short bFMod:2; + unsigned short bRVBActive:1; + unsigned short bNewPitch:1; ADSRInfoEx adsr; - // might also want to add fmod flags.. } ch[24]; int SSumLR[NSSIZE * 2]; } i[4]; @@ -950,6 +955,10 @@ static void queue_channel_work(int ns_to, unsigned int silentch) work->ch[ch].vol_r = s_chan->iRightVolume; work->ch[ch].start = s_chan->pCurr - spu.spuMemC; work->ch[ch].loop = s_chan->pLoop - spu.spuMemC; + work->ch[ch].bNoise = s_chan->bNoise; + work->ch[ch].bFMod = s_chan->bFMod; + work->ch[ch].bRVBActive = s_chan->bRVBActive; + work->ch[ch].bNewPitch = s_chan->bNewPitch; if (s_chan->prevflags & 1) work->ch[ch].start = work->ch[ch].loop; @@ -963,6 +972,7 @@ static void queue_channel_work(int ns_to, unsigned int silentch) s_chan->ADSRX.State = ADSR_RELEASE; s_chan->ADSRX.EnvelopeVol = 0; } + s_chan->bNewPitch = 0; } work->rvb_addr = 0; @@ -982,8 +992,6 @@ static void queue_channel_work(int ns_to, unsigned int silentch) static void do_channel_work(struct work_item *work) { unsigned int mask; - unsigned int decode_dirty_ch = 0; - const SPUCHAN *s_chan; int *SB, sinc, spos, sbpos; int d, ch, ns_to; @@ -1008,15 +1016,16 @@ static void do_channel_work(struct work_item *work) sbpos = work->ch[ch].sbpos; sinc = work->ch[ch].sinc; - s_chan = &spu.s_chan[ch]; SB = spu.SB + ch * SB_SIZE; + if (work->ch[ch].bNewPitch) + SB[32] = 1; // reset interpolation - if (s_chan->bNoise) + if (work->ch[ch].bNoise) do_lsfr_samples(d, work->ctrl, &spu.dwNoiseCount, &spu.dwNoiseVal); - else if (s_chan->bFMod == 2 - || (s_chan->bFMod == 0 && spu_config.iUseInterpolation == 0)) + else if (work->ch[ch].bFMod == 2 + || (work->ch[ch].bFMod == 0 && spu_config.iUseInterpolation == 0)) do_samples_noint(decode_block_work, work, ch, d, SB, sinc, &spos, &sbpos); - else if (s_chan->bFMod == 0 && spu_config.iUseInterpolation == 1) + else if (work->ch[ch].bFMod == 0 && spu_config.iUseInterpolation == 1) do_samples_simple(decode_block_work, work, ch, d, SB, sinc, &spos, &sbpos); else do_samples_default(decode_block_work, work, ch, d, SB, sinc, &spos, &sbpos); @@ -1028,14 +1037,11 @@ static void do_channel_work(struct work_item *work) } if (ch == 1 || ch == 3) - { - do_decode_bufs(spu.spuMem, ch/2, ns_to, work->decode_pos); - decode_dirty_ch |= 1 << ch; - } + do_decode_bufs(spu.spuMem, ch/2, ns_to, work->decode_pos); - if (s_chan->bFMod == 2) // fmod freq channel + if (work->ch[ch].bFMod == 2) // fmod freq channel memcpy(iFMod, &ChanBuf, ns_to * sizeof(iFMod[0])); - if (s_chan->bRVBActive && work->rvb_addr) + if (work->ch[ch].bRVBActive && work->rvb_addr) mix_chan_rvb(work->SSumLR, ns_to, work->ch[ch].vol_l, work->ch[ch].vol_r, RVB); else @@ -1158,6 +1164,7 @@ void do_samples(unsigned int cycles_to, int do_direct) } else { queue_channel_work(ns_to, silentch); + //sync_worker_thread(1); // uncomment for debug } // advance "stopped" channels that can cause irqs From 16809ab9f432e902b4b049b4f1f01a0f42e92914 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 6 Jul 2023 18:47:31 +0300 Subject: [PATCH 248/597] libretro: disable spu thread by default, with option to reenable it may still cause problems, especially with reverb, fixing it is too much work for too little benefit --- frontend/libretro.c | 10 ++++++++++ frontend/libretro_core_options.h | 16 ++++++++++++++++ frontend/main.c | 3 ++- plugins/dfsound/spu.c | 3 +-- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 3e74b2302..f4b23cdfb 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2074,6 +2074,16 @@ static void update_variables(bool in_flight) spu_config.iUseInterpolation = 0; } + var.value = NULL; + var.key = "pcsx_rearmed_spu_thread"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + spu_config.iUseThread = 1; + else + spu_config.iUseThread = 0; + } + #ifndef _WIN32 var.value = NULL; var.key = "pcsx_rearmed_async_cd"; diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 935c6f975..7d070f9aa 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -752,6 +752,22 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "enabled", }, +#if !defined(THREAD_ENABLED) && !defined(_WIN32) && !defined(NO_OS) + { + "pcsx_rearmed_spu_thread", + "Threaded SPU", + NULL, + "Emulates the PSX SPU on another CPU thread. May cause audio glitches in some games.", + NULL, + "audio", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, +#endif // THREAD_ENABLED { "pcsx_rearmed_show_input_settings", "Show Input Settings", diff --git a/frontend/main.c b/frontend/main.c index 2773f7a1b..e0635ef4b 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -158,7 +158,8 @@ void emu_set_default_config(void) spu_config.iXAPitch = 0; spu_config.iVolume = 768; spu_config.iTempo = 0; - spu_config.iUseThread = 1; // no effect if only 1 core is detected + // may cause issues, no effect if only 1 core is detected + spu_config.iUseThread = 0; #if defined(HAVE_PRE_ARMV7) && !defined(_3DS) /* XXX GPH hack */ spu_config.iUseReverb = 0; spu_config.iUseInterpolation = 0; diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 97f07aaec..ebebd2a2c 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -18,8 +18,7 @@ * * ***************************************************************************/ -#if !defined(_WIN32) && !defined(NO_OS) -#include // gettimeofday in xa.c +#if !defined(THREAD_ENABLED) && !defined(_WIN32) && !defined(NO_OS) #define THREAD_ENABLED 1 #endif #include "stdafx.h" From 4904809d2ed121844ea23cf0ea580785d3da1f0b Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 6 Jul 2023 23:51:41 +0300 Subject: [PATCH 249/597] sbi: fix parser thinking it failed it still worked though as it kept sbi_sectors around --- libpcsxcore/ppf.c | 52 ++++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/libpcsxcore/ppf.c b/libpcsxcore/ppf.c index 18c5413ed..454290d0d 100644 --- a/libpcsxcore/ppf.c +++ b/libpcsxcore/ppf.c @@ -355,6 +355,8 @@ void BuildPPFCache() { unsigned char *sbi_sectors; int LoadSBI(const char *fname, int sector_count) { + int good_sectors = 0; + int clean_eof = 0; char buffer[16]; FILE *sbihandle; u8 sbitime[3], t; @@ -365,21 +367,33 @@ int LoadSBI(const char *fname, int sector_count) { return -1; sbi_sectors = calloc(1, sector_count / 8); - if (sbi_sectors == NULL) { - fclose(sbihandle); - return -1; - } + if (sbi_sectors == NULL) + goto end; // 4-byte SBI header if (fread(buffer, 1, 4, sbihandle) != 4) - goto fail_io; + goto end; while (1) { s = fread(sbitime, 1, 3, sbihandle); if (s != 3) - goto fail_io; + { + if (s == 0) + clean_eof = 1; + break; + } + s = MSF2SECT(btoi(sbitime[0]), btoi(sbitime[1]), btoi(sbitime[2])); + if (s < sector_count) { + sbi_sectors[s >> 3] |= 1 << (s&7); + good_sectors++; + } + else + SysPrintf(_("SBI sector %d >= %d?\n"), s, sector_count); + + // skip to the next record if (fread(&t, 1, sizeof(t), sbihandle) != sizeof(t)) - goto fail_io; + break; + s = -1; switch (t) { default: case 1: @@ -390,24 +404,24 @@ int LoadSBI(const char *fname, int sector_count) { s = 3; break; } - fseek(sbihandle, s, SEEK_CUR); - - s = MSF2SECT(btoi(sbitime[0]), btoi(sbitime[1]), btoi(sbitime[2])); - if (s < sector_count) - sbi_sectors[s >> 3] |= 1 << (s&7); - else - SysPrintf(_("SBI sector %d >= %d?\n"), s, sector_count); + if (s < 0) + break; + if (fseek(sbihandle, s, SEEK_CUR)) + break; } fclose(sbihandle); return 0; -fail_io: -#ifndef NDEBUG - SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); -#endif +end: + if (!clean_eof) + SysPrintf(_("SBI: parse failure at 0x%lx\n"), ftell(sbihandle)); + if (!good_sectors) { + free(sbi_sectors); + sbi_sectors = NULL; + } fclose(sbihandle); - return -1; + return sbi_sectors ? 0 : -1; } void UnloadSBI(void) { From 8cb4b1b67091f9c5e8c448e3db15eb36712cdb94 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 6 Jul 2023 23:55:43 +0300 Subject: [PATCH 250/597] fix missed double resolution change --- plugins/gpulib/vout_pl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index 1c98b55af..46af2590b 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -44,10 +44,10 @@ static void check_mode_change(int force) } // width|rgb24 change? - if (force || (gpu.status ^ old_status) & ((7<<16)|(1<<21)) || h != old_h) + if (force || (gpu.status ^ old_status) & ((7<<16)|(1<<21)) || h_out != old_h) { old_status = gpu.status; - old_h = h; + old_h = h_out; cbs->pl_vout_set_mode(w_out, h_out, w, h, (gpu.status & PSX_GPU_STATUS_RGB24) ? 24 : 16); From 8127758678756717e7732ce6e4ee807150d9739f Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 7 Jul 2023 00:44:11 +0300 Subject: [PATCH 251/597] fix another missed double resolution change --- plugins/gpulib/gpu.h | 1 + plugins/gpulib/vout_pl.c | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 717d2fe22..4637a71c0 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -76,6 +76,7 @@ struct psx_gpu { uint32_t hcnt; } last_list; uint32_t last_vram_read_frame; + uint32_t w_out_old, h_out_old, status_vo_old; } state; struct { int32_t set:3; /* -1 auto, 0 off, 1-3 fixed */ diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index 46af2590b..a6a3f63c8 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -27,8 +27,6 @@ int vout_finish(void) static void check_mode_change(int force) { - static uint32_t old_status; - static int old_h; int w = gpu.screen.hres; int h = gpu.screen.h; int w_out = w; @@ -44,10 +42,12 @@ static void check_mode_change(int force) } // width|rgb24 change? - if (force || (gpu.status ^ old_status) & ((7<<16)|(1<<21)) || h_out != old_h) + if (force || (gpu.status ^ gpu.state.status_vo_old) & ((7<<16)|(1<<21)) + || w_out != gpu.state.w_out_old || h_out != gpu.state.h_out_old) { - old_status = gpu.status; - old_h = h_out; + gpu.state.status_vo_old = gpu.status; + gpu.state.w_out_old = w_out; + gpu.state.h_out_old = h_out; cbs->pl_vout_set_mode(w_out, h_out, w, h, (gpu.status & PSX_GPU_STATUS_RGB24) ? 24 : 16); From 203769fe909f0f98af8c007629e7d49ef265ba07 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 7 Jul 2023 21:27:15 +0300 Subject: [PATCH 252/597] make: try a different way to detect 32bit neon --- include/arm_features.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/arm_features.h b/include/arm_features.h index 7c82ff38f..4f216a3c2 100644 --- a/include/arm_features.h +++ b/include/arm_features.h @@ -47,7 +47,10 @@ #endif -/* no need for HAVE_NEON - GCC defines __ARM_NEON__ consistently */ +/* gcc defines __ARM_NEON__ consistently for 32bit, but apple clang defines it for 64bit also... */ +#if defined(HAVE_ARMV7) && defined(__ARM_NEON__) +#define HAVE_NEON32 +#endif /* global function/external symbol */ #ifndef __MACH__ From f189413eb7a73f42e27184ebf3609d77cf9d13a7 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 8 Jul 2023 00:18:08 +0300 Subject: [PATCH 253/597] cscpace: fix more alignment issues libretro/pcsx_rearmed#719 --- frontend/blit320.s | 3 +++ frontend/cspace.c | 5 +++-- frontend/cspace_arm.S | 6 ++++++ frontend/cspace_neon.S | 8 +++++--- frontend/plugin_lib.c | 2 -- 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/frontend/blit320.s b/frontend/blit320.s index 201fdea60..2d50dfd78 100644 --- a/frontend/blit320.s +++ b/frontend/blit320.s @@ -25,6 +25,7 @@ blit320_640: stmfd sp!, {r4-r8,lr} mov r12, #40 + bic r1, r1, #3 0: ldmia r1!, {r2-r8,lr} lhw_str r2, r3 @@ -40,6 +41,7 @@ blit320_640: blit320_512: stmfd sp!, {r4-r8,lr} mov r12, #32 + bic r1, r1, #3 0: ldmia r1!, {r2-r8,lr} lsl r2, #16 @@ -73,6 +75,7 @@ blit320_512: blit320_368: stmfd sp!, {r4-r8,lr} mov r12, #23 + bic r1, r1, #3 0: ldmia r1!, {r2-r8,lr} unaligned_str r2, r3 @ 1,2 diff --git a/frontend/cspace.c b/frontend/cspace.c index 2b528a5a1..785b3d137 100644 --- a/frontend/cspace.c +++ b/frontend/cspace.c @@ -8,6 +8,7 @@ * See the COPYING file in the top-level directory. */ +#include #include "cspace.h" /* @@ -30,7 +31,6 @@ || (defined(__GNUC__) && __GNUC__ >= 5)) \ && __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ -#include #include #if defined(__ARM_NEON) || defined(__ARM_NEON__) @@ -93,7 +93,8 @@ void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_, void bgr555_to_rgb565(void *dst_, const void *src_, int bytes) { - const unsigned int *src = src_; + // source can be misaligned, but it's very rare, so just force + const unsigned int *src = (const void *)((intptr_t)src_ & ~3); unsigned int *dst = dst_; unsigned int x, p, r, g, b; diff --git a/frontend/cspace_arm.S b/frontend/cspace_arm.S index 67778da58..177b08583 100644 --- a/frontend/cspace_arm.S +++ b/frontend/cspace_arm.S @@ -34,6 +34,12 @@ FUNCTION(bgr555_to_rgb565): @ void *dst, const void *src, int bytes orr lr, lr, lsl #16 blt 1f + @ src can be unaligned, but that's very rare, so just force it. + @ The manual says unaligned ldm should fault, and it does on + @ cortex-a78's 32bit mode, but curiously on cortex-a8 it just + @ works and loads the data correctly. + bic r1, r1, #3 + 0: ldmia r1!, {r3-r10} subs r2, #4*8 diff --git a/frontend/cspace_neon.S b/frontend/cspace_neon.S index 4928b44ab..3a89fdb90 100644 --- a/frontend/cspace_neon.S +++ b/frontend/cspace_neon.S @@ -40,7 +40,8 @@ FUNCTION(bgr555_to_rgb565): @ dst, src, bytes pld [r1, #64*2] @ Pulls 15-bit BGR color values (which are actually 16 bits) into q0-q3. @ example: q0 = 0111 1110 0101 0011 - vldmia r1!, {q0-q3} + vld1.16 {d0-d3}, [r1]! + vld1.16 {d4-d7}, [r1]! @ Shift BGR color 1 bit to the left, discarding MSB and preparing for vbit. @ MSB is used for transparency (not needed here, and can mess with green). @ example: q0 = 1111 1100 1010 0110 @@ -113,7 +114,8 @@ FUNCTION(bgr555_to_rgb565_b): @ dst, src, bytes, int brightness2k // 0-0x0800 vdup.16 q14, r3 0: pld [r1, #64*2] - vldmia r1!, {q0-q3} + vld1.16 {d0-d3}, [r1]! + vld1.16 {d4-d7}, [r1]! vand.u16 q8, q0, q14 vand.u16 q9, q1, q14 vand.u16 q10, q2, q14 @@ -270,4 +272,4 @@ FUNCTION(rgb888_to_rgb565): @ dst, src, bytes bx lr -@ vim:filetype=armasm +@ vim:filetype=armasm:expandtab diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 171296d25..d5cec766c 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -402,8 +402,6 @@ static void pl_vout_flip(const void *vram, int stride, int bgr24, int w, int h) #endif else { - src = (void *)((uintptr_t)src & ~3); // align for the blitter - for (; h1-- > 0; dest += dstride * 2, src += stride) { bgr555_to_rgb565(dest, src, w * 2); From e3973c6949814717f22e3db91129829a62941232 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 8 Jul 2023 17:53:56 +0300 Subject: [PATCH 254/597] drc: fix some table math libretro/pcsx_rearmed#713 --- libpcsxcore/new_dynarec/pcsxmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index 69a4c99df..a32b41830 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -47,7 +47,8 @@ void map_item(uintptr_t *out, const void *h, uintptr_t flag) // size must be power of 2, at least 4k #define map_l1_mem(tab, i, addr, size, base) \ - map_item(&tab[((addr)>>12) + i], (u8 *)(base) - (u32)(addr) - ((i << 12) & ~(size - 1)), 0) + map_item(&tab[((addr)>>12) + i], \ + (u8 *)(base) - (u32)((addr) + ((i << 12) & ~(size - 1))), 0) #define IOMEM32(a) (((a) & 0xfff) / 4) #define IOMEM16(a) (0x1000/4 + (((a) & 0xfff) / 2)) From d75460620efa21df695967c72a92265dac04001f Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 9 Jul 2023 00:07:06 +0300 Subject: [PATCH 255/597] drc: minor cleanup --- libpcsxcore/new_dynarec/assem_arm.c | 4 ++-- libpcsxcore/new_dynarec/assem_arm64.c | 2 +- libpcsxcore/new_dynarec/emu_if.c | 4 ++-- libpcsxcore/new_dynarec/emu_if.h | 5 ----- libpcsxcore/new_dynarec/new_dynarec.c | 16 ++++++++-------- libpcsxcore/r3000a.h | 1 - 6 files changed, 13 insertions(+), 19 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 95007dfaf..6af93e22e 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -430,7 +430,7 @@ static void emit_loadreg(int r, int hr) //case HIREG: addr = &hi; break; //case LOREG: addr = &lo; break; case CCREG: addr = &cycle_count; break; - case CSREG: addr = &Status; break; + case CSREG: addr = &psxRegs.CP0.n.Status; break; case INVCP: addr = &invc_ptr; break; case ROREG: addr = &ram_offset; break; default: @@ -1752,7 +1752,7 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, emit_readword(&last_count,3); emit_addimm(cc<0?2:cc,adj,2); emit_add(2,3,2); - emit_writeword(2,&Count); + emit_writeword(2,&psxRegs.cycle); } emit_far_call(handler); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 271bee580..67ce02ada 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -463,7 +463,7 @@ static void emit_loadreg(u_int r, u_int hr) //case HIREG: addr = &hi; break; //case LOREG: addr = &lo; break; case CCREG: addr = &cycle_count; break; - case CSREG: addr = &Status; break; + case CSREG: addr = &psxRegs.CP0.n.Status; break; case INVCP: addr = &invc_ptr; is64 = 1; break; case ROREG: addr = &ram_offset; is64 = 1; break; default: diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index dc17f2d45..33319ba6a 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -84,7 +84,7 @@ static void irq_test(void) } } - if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) { + if ((psxHu32(0x1070) & psxHu32(0x1074)) && (psxRegs.CP0.n.Status & 0x401) == 0x401) { psxException(0x400, 0); pending_exception = 1; } @@ -110,7 +110,7 @@ void pcsx_mtc0(u32 reg, u32 val) evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); MTC0(&psxRegs, reg, val); gen_interupt(); - if (Cause & Status & 0x0300) // possible sw irq + if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.Status & 0x0300) // possible sw irq pending_exception = 1; } diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index 7fa0a171a..b49b84bad 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -14,11 +14,6 @@ extern int hi, lo; /* same as psxRegs.CP0.n.* */ extern int reg_cop0[]; -#define Status psxRegs.CP0.n.Status -#define Cause psxRegs.CP0.n.Cause -#define EPC psxRegs.CP0.n.EPC -#define BadVAddr psxRegs.CP0.n.BadVAddr -#define Count psxRegs.cycle // psxRegs.CP0.n.Count /* COP2/GTE */ enum gte_opcodes { diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 276ef8afd..2673a6ea9 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -783,10 +783,10 @@ static void noinline *get_addr(u_int vaddr, int can_compile) return ndrc_get_addr_ht(vaddr); // generate an address error - Status|=2; - Cause=(vaddr<<31)|(4<<2); - EPC=(vaddr&1)?vaddr-5:vaddr; - BadVAddr=(vaddr&~1); + psxRegs.CP0.n.Status |= 2; + psxRegs.CP0.n.Cause = (vaddr<<31) | (4<<2); + psxRegs.CP0.n.EPC = (vaddr&1) ? vaddr-5 : vaddr; + psxRegs.CP0.n.BadVAddr = vaddr & ~1; return ndrc_get_addr_ht(0x80000080); } @@ -3500,7 +3500,7 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); emit_addimm(HOST_CCREG,ccadj_,HOST_CCREG); - emit_writeword(HOST_CCREG,&Count); + emit_writeword(HOST_CCREG,&psxRegs.cycle); } // What a mess. The status register (12) can enable interrupts, // so needs a special case to handle a pending interrupt. @@ -3532,7 +3532,7 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_movimm(copr,0); emit_far_call(pcsx_mtc0); if(copr==9||copr==11||copr==12||copr==13) { - emit_readword(&Count,HOST_CCREG); + emit_readword(&psxRegs.cycle,HOST_CCREG); emit_readword(&next_interupt,HOST_TEMPREG); emit_addimm(HOST_CCREG,-ccadj_,HOST_CCREG); emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); @@ -3558,11 +3558,11 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) assert(dops[i].opcode2==0x10); //if((source[i]&0x3f)==0x10) // RFE { - emit_readword(&Status,0); + emit_readword(&psxRegs.CP0.n.Status,0); emit_andimm(0,0x3c,1); emit_andimm(0,~0xf,0); emit_orrshr_imm(1,2,0); - emit_writeword(0,&Status); + emit_writeword(0,&psxRegs.CP0.n.Status); } } } diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 2339d5957..a052a59a3 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -32,7 +32,6 @@ extern "C" { enum { R3000ACPU_NOTIFY_CACHE_ISOLATED = 0, R3000ACPU_NOTIFY_CACHE_UNISOLATED = 1, - R3000ACPU_NOTIFY_DMA3_EXE_LOAD = 2 }; typedef struct { From 679d5ee3c46235923a99391922de1df0239e9ff3 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 9 Jul 2023 00:09:24 +0300 Subject: [PATCH 256/597] clean up cache isolation handling also ari64 drc now handles unmapped io same as interpreter --- libpcsxcore/new_dynarec/emu_if.c | 19 ++-- libpcsxcore/new_dynarec/linkage_offsets.h | 6 +- libpcsxcore/new_dynarec/pcsxmem.c | 100 +++++++++++++--------- libpcsxcore/new_dynarec/pcsxmem.h | 3 +- libpcsxcore/psxinterpreter.c | 6 +- libpcsxcore/psxmem.c | 66 ++++++-------- libpcsxcore/psxmem.h | 1 + libpcsxcore/r3000a.h | 2 + 8 files changed, 105 insertions(+), 98 deletions(-) diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 33319ba6a..aac9f78c0 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -370,19 +370,15 @@ static void ari64_clear(u32 addr, u32 size) } static void ari64_notify(int note, void *data) { - /* - Should be fixed when ARM dynarec has proper icache emulation. switch (note) { - case R3000ACPU_NOTIFY_CACHE_UNISOLATED: - break; - case R3000ACPU_NOTIFY_CACHE_ISOLATED: - Sent from psxDma3(). - case R3000ACPU_NOTIFY_DMA3_EXE_LOAD: - default: - break; + case R3000ACPU_NOTIFY_CACHE_UNISOLATED: + case R3000ACPU_NOTIFY_CACHE_ISOLATED: + new_dyna_pcsx_mem_isolate(note == R3000ACPU_NOTIFY_CACHE_ISOLATED); + break; + default: + break; } - */ } static void ari64_apply_config() @@ -430,7 +426,7 @@ int new_dynarec_hacks_old; int new_dynarec_hacks; void *psxH_ptr; void *zeromem_ptr; -u8 zero_mem[0x1000]; +u32 zero_mem[0x1000/4]; void *mem_rtab; void *scratch_buf_ptr; void new_dynarec_init() {} @@ -442,6 +438,7 @@ void new_dynarec_invalidate_range(unsigned int start, unsigned int end) {} void new_dyna_pcsx_mem_init(void) {} void new_dyna_pcsx_mem_reset(void) {} void new_dyna_pcsx_mem_load_state(void) {} +void new_dyna_pcsx_mem_isolate(int enable) {} void new_dyna_pcsx_mem_shutdown(void) {} int new_dynarec_save_blocks(void *save, int size) { return 0; } void new_dynarec_load_blocks(const void *save, int size) {} diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h index 0c189d78f..23935b875 100644 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ b/libpcsxcore/new_dynarec/linkage_offsets.h @@ -24,8 +24,10 @@ #define LO_intCycle (LO_interrupt + 4) #define LO_gteBusyCycle (LO_intCycle + 256) #define LO_muldivBusyCycle (LO_gteBusyCycle + 4) -#define LO_psxRegs_reserved (LO_muldivBusyCycle + 4) -#define LO_psxRegs_end (LO_psxRegs_reserved + 4*2) +#define LO_psxRegs_subCycle (LO_muldivBusyCycle + 4) +#define LO_psxRegs_biuReg (LO_psxRegs_subCycle + 4*2) +#define LO_psxRegs_reserved (LO_psxRegs_biuReg + 4) +#define LO_psxRegs_end (LO_psxRegs_reserved + 4*3) #define LO_rcnts (LO_psxRegs_end) #define LO_rcnts_end (LO_rcnts + 7*4*4) #define LO_inv_code_start (LO_rcnts_end) diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index a32b41830..8057b7ed5 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -26,6 +26,7 @@ static uintptr_t *mem_readtab; static uintptr_t *mem_writetab; static uintptr_t mem_iortab[(1+2+4) * 0x1000 / 4]; static uintptr_t mem_iowtab[(1+2+4) * 0x1000 / 4]; +static uintptr_t mem_ffrtab[(1+2+4) * 0x1000 / 4]; static uintptr_t mem_ffwtab[(1+2+4) * 0x1000 / 4]; //static uintptr_t mem_unmrtab[(1+2+4) * 0x1000 / 4]; static uintptr_t mem_unmwtab[(1+2+4) * 0x1000 / 4]; @@ -47,23 +48,28 @@ void map_item(uintptr_t *out, const void *h, uintptr_t flag) // size must be power of 2, at least 4k #define map_l1_mem(tab, i, addr, size, base) \ - map_item(&tab[((addr)>>12) + i], \ + map_item(&tab[((u32)(addr) >> 12) + i], \ (u8 *)(base) - (u32)((addr) + ((i << 12) & ~(size - 1))), 0) #define IOMEM32(a) (((a) & 0xfff) / 4) #define IOMEM16(a) (0x1000/4 + (((a) & 0xfff) / 2)) #define IOMEM8(a) (0x1000/4 + 0x1000/2 + ((a) & 0xfff)) -u8 zero_mem[0x1000]; +u32 zero_mem[0x1000/4]; +static u32 ffff_mem[0x1000/4]; -u32 read_mem_dummy() +static u32 read_mem_dummy(u32 addr) { - return 0; + // use 'addr' and not 'address', yes the api is weird... + memprintf("unmapped r %08x @%08x %u\n", addr, psxRegs.pc, psxRegs.cycle); + return 0xffffffff; } static void write_mem_dummy(u32 data) { - memprintf("unmapped w %08x, %08x @%08x %u\n", address, data, psxRegs.pc, psxRegs.cycle); + if (!(psxRegs.CP0.n.Status & (1 << 16))) + memprintf("unmapped w %08x, %08x @%08x %u\n", + address, data, psxRegs.pc, psxRegs.cycle); } /* IO handlers */ @@ -240,46 +246,51 @@ static void io_gpu_write_status(u32 value) gpuSyncPluginSR(); } -static void map_ram_write(void) +void new_dyna_pcsx_mem_isolate(int enable) { int i; - for (i = 0; i < (0x800000 >> 12); i++) { - map_l1_mem(mem_writetab, i, 0x80000000, 0x200000, psxM); - map_l1_mem(mem_writetab, i, 0x00000000, 0x200000, psxM); - map_l1_mem(mem_writetab, i, 0xa0000000, 0x200000, psxM); + // note: apparently 0xa0000000 uncached access still works, + // at least read does for sure, so assume write does too + memprintf("mem isolate %d\n", enable); + if (enable) { + for (i = 0; i < (0x800000 >> 12); i++) { + map_item(&mem_writetab[0x80000|i], mem_unmwtab, 1); + map_item(&mem_writetab[0x00000|i], mem_unmwtab, 1); + //map_item(&mem_writetab[0xa0000|i], mem_unmwtab, 1); + } + } + else { + for (i = 0; i < (0x800000 >> 12); i++) { + map_l1_mem(mem_writetab, i, 0x80000000, 0x200000, psxM); + map_l1_mem(mem_writetab, i, 0x00000000, 0x200000, psxM); + map_l1_mem(mem_writetab, i, 0xa0000000, 0x200000, psxM); + } } } -static void unmap_ram_write(void) +static u32 read_biu(u32 addr) { - int i; - - for (i = 0; i < (0x800000 >> 12); i++) { - map_item(&mem_writetab[0x80000|i], mem_unmwtab, 1); - map_item(&mem_writetab[0x00000|i], mem_unmwtab, 1); - map_item(&mem_writetab[0xa0000|i], mem_unmwtab, 1); - } + if (addr != 0xfffe0130) + return read_mem_dummy(addr); + + FILE *f = fopen("/tmp/psxbiu.bin", "wb"); + fwrite(psxM, 1, 0x200000, f); + fclose(f); + memprintf("read_biu %08x @%08x %u\n", + psxRegs.biuReg, psxRegs.pc, psxRegs.cycle); + return psxRegs.biuReg; } static void write_biu(u32 value) { - memprintf("write_biu %08x, %08x @%08x %u\n", address, value, psxRegs.pc, psxRegs.cycle); - - if (address != 0xfffe0130) + if (address != 0xfffe0130) { + write_mem_dummy(value); return; - - switch (value) { - case 0x800: case 0x804: - unmap_ram_write(); - break; - case 0: case 0x1e988: - map_ram_write(); - break; - default: - printf("write_biu: unexpected val: %08x\n", value); - break; } + + memprintf("write_biu %08x @%08x %u\n", value, psxRegs.pc, psxRegs.cycle); + psxRegs.biuReg = value; } void new_dyna_pcsx_mem_load_state(void) @@ -302,6 +313,8 @@ void new_dyna_pcsx_mem_init(void) { int i; + memset(ffff_mem, 0xff, sizeof(ffff_mem)); + // have to map these further to keep tcache close to .text mem_readtab = psxMap(0x08000000, 0x200000 * sizeof(mem_readtab[0]), 0, MAP_TAG_LUTS); if (mem_readtab == NULL) { @@ -320,7 +333,7 @@ void new_dyna_pcsx_mem_init(void) // default/unmapped memhandlers for (i = 0; i < 0x100000; i++) { //map_item(&mem_readtab[i], mem_unmrtab, 1); - map_l1_mem(mem_readtab, i, 0, 0x1000, zero_mem); + map_l1_mem(mem_readtab, i, 0, 0x1000, ffff_mem); map_item(&mem_writetab[i], mem_unmwtab, 1); } @@ -330,7 +343,7 @@ void new_dyna_pcsx_mem_init(void) map_l1_mem(mem_readtab, i, 0x00000000, 0x200000, psxM); map_l1_mem(mem_readtab, i, 0xa0000000, 0x200000, psxM); } - map_ram_write(); + new_dyna_pcsx_mem_isolate(0); // BIOS and it's mirrors for (i = 0; i < (0x80000 >> 12); i++) { @@ -345,12 +358,12 @@ void new_dyna_pcsx_mem_init(void) map_l1_mem(mem_writetab, 0, 0x9f800000, 0x1000, psxH); // I/O - map_item(&mem_readtab[0x1f801000 >> 12], mem_iortab, 1); - map_item(&mem_readtab[0x9f801000 >> 12], mem_iortab, 1); - map_item(&mem_readtab[0xbf801000 >> 12], mem_iortab, 1); - map_item(&mem_writetab[0x1f801000 >> 12], mem_iowtab, 1); - map_item(&mem_writetab[0x9f801000 >> 12], mem_iowtab, 1); - map_item(&mem_writetab[0xbf801000 >> 12], mem_iowtab, 1); + map_item(&mem_readtab[0x1f801000u >> 12], mem_iortab, 1); + map_item(&mem_readtab[0x9f801000u >> 12], mem_iortab, 1); + map_item(&mem_readtab[0xbf801000u >> 12], mem_iortab, 1); + map_item(&mem_writetab[0x1f801000u >> 12], mem_iowtab, 1); + map_item(&mem_writetab[0x9f801000u >> 12], mem_iowtab, 1); + map_item(&mem_writetab[0xbf801000u >> 12], mem_iowtab, 1); // L2 // unmapped tables @@ -461,9 +474,12 @@ void new_dyna_pcsx_mem_init(void) } // misc - map_item(&mem_writetab[0xfffe0130 >> 12], mem_ffwtab, 1); - for (i = 0; i < 0x1000/4 + 0x1000/2 + 0x1000; i++) + map_item(&mem_readtab[0xfffe0130u >> 12], mem_ffrtab, 1); + map_item(&mem_writetab[0xfffe0130u >> 12], mem_ffwtab, 1); + for (i = 0; i < 0x1000/4 + 0x1000/2 + 0x1000; i++) { + map_item(&mem_ffrtab[i], read_biu, 1); map_item(&mem_ffwtab[i], write_biu, 1); + } mem_rtab = mem_readtab; mem_wtab = mem_writetab; diff --git a/libpcsxcore/new_dynarec/pcsxmem.h b/libpcsxcore/new_dynarec/pcsxmem.h index 72892a8e3..7f8283d1b 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.h +++ b/libpcsxcore/new_dynarec/pcsxmem.h @@ -1,9 +1,10 @@ -extern u8 zero_mem[0x1000]; +extern u32 zero_mem[0x1000/4]; void new_dyna_pcsx_mem_init(void); void new_dyna_pcsx_mem_reset(void); void new_dyna_pcsx_mem_load_state(void); +void new_dyna_pcsx_mem_isolate(int enable); void new_dyna_pcsx_mem_shutdown(void); int pcsxmem_is_handler_dynamic(unsigned int addr); diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 3d0836459..9ece259cf 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -911,7 +911,9 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) { // SysPrintf("MTC0 %d: %x\n", reg, val); switch (reg) { case 12: // Status - regs_->CP0.r[12] = val; + if ((regs_->CP0.n.Status ^ val) & (1 << 16)) + psxMemOnIsolate((val >> 16) & 1); + regs_->CP0.n.Status = val; psxTestSWInts(regs_); break; @@ -1114,7 +1116,7 @@ static void intClear(u32 Addr, u32 Size) { } void intNotify (int note, void *data) { - /* Gameblabla - Only clear the icache if it's isolated */ + /* Armored Core won't boot without this */ if (note == R3000ACPU_NOTIFY_CACHE_ISOLATED) { memset(&ICache, 0xff, sizeof(ICache)); diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 37a0efd0d..fb48f77bf 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -195,8 +195,8 @@ int psxMemInit(void) return -1; } - memset(psxMemRLUT, (uintptr_t)INVALID_PTR, 0x10000 * sizeof(void *)); - memset(psxMemWLUT, (uintptr_t)INVALID_PTR, 0x10000 * sizeof(void *)); + memset(psxMemRLUT, (int)(uintptr_t)INVALID_PTR, 0x10000 * sizeof(void *)); + memset(psxMemWLUT, (int)(uintptr_t)INVALID_PTR, 0x10000 * sizeof(void *)); // MemR for (i = 0; i < 0x80; i++) psxMemRLUT[i + 0x0000] = (u8 *)&psxM[(i & 0x1f) << 16]; @@ -265,7 +265,22 @@ void psxMemShutdown() { free(psxMemWLUT); psxMemWLUT = NULL; } -static int writeok = 1; +void psxMemOnIsolate(int enable) +{ + if (enable) { + memset(psxMemWLUT + 0x0000, (int)(uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); + memset(psxMemWLUT + 0x8000, (int)(uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); + //memset(psxMemWLUT + 0xa000, (int)(uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); + } else { + int i; + for (i = 0; i < 0x80; i++) + psxMemWLUT[i + 0x0000] = (void *)&psxM[(i & 0x1f) << 16]; + memcpy(psxMemWLUT + 0x8000, psxMemWLUT, 0x80 * sizeof(void *)); + memcpy(psxMemWLUT + 0xa000, psxMemWLUT, 0x80 * sizeof(void *)); + } + psxCpu->Notify(enable ? R3000ACPU_NOTIFY_CACHE_ISOLATED + : R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL); +} u8 psxMemRead8(u32 mem) { char *p; @@ -334,8 +349,10 @@ u32 psxMemRead32(u32 mem) { DebugCheckBP((mem & 0xffffff) | 0x80000000, R4); return SWAPu32(*(u32 *)(p + (mem & 0xffff))); } else { + if (mem == 0xfffe0130) + return psxRegs.biuReg; #ifdef PSXMEM_LOG - if (writeok) { PSXMEM_LOG("err lw %8.8lx\n", mem); } + PSXMEM_LOG("err lw %8.8lx\n", mem); #endif return 0xFFFFFFFF; } @@ -417,44 +434,13 @@ void psxMemWrite32(u32 mem, u32 value) { psxCpu->Clear(mem, 1); #endif } else { - if (mem != 0xfffe0130) { -#ifndef DRC_DISABLE - if (!writeok) - psxCpu->Clear(mem, 1); -#endif - -#ifdef PSXMEM_LOG - if (writeok) { PSXMEM_LOG("err sw %8.8lx\n", mem); } -#endif - } else { - int i; - - switch (value) { - case 0x800: case 0x804: - if (writeok == 0) break; - writeok = 0; - memset(psxMemWLUT + 0x0000, (uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); - memset(psxMemWLUT + 0x8000, (uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); - memset(psxMemWLUT + 0xa000, (uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); - /* Required for icache interpreter otherwise Armored Core won't boot on icache interpreter */ - psxCpu->Notify(R3000ACPU_NOTIFY_CACHE_ISOLATED, NULL); - break; - case 0x00: case 0x1e988: - if (writeok == 1) break; - writeok = 1; - for (i = 0; i < 0x80; i++) psxMemWLUT[i + 0x0000] = (void *)&psxM[(i & 0x1f) << 16]; - memcpy(psxMemWLUT + 0x8000, psxMemWLUT, 0x80 * sizeof(void *)); - memcpy(psxMemWLUT + 0xa000, psxMemWLUT, 0x80 * sizeof(void *)); - /* Dynarecs might take this opportunity to flush their code cache */ - psxCpu->Notify(R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL); - break; - default: + if (mem == 0xfffe0130) { + psxRegs.biuReg = value; + return; + } #ifdef PSXMEM_LOG - PSXMEM_LOG("unk %8.8lx = %x\n", mem, value); + PSXMEM_LOG("err sw %8.8lx\n", mem); #endif - break; - } - } } } } diff --git a/libpcsxcore/psxmem.h b/libpcsxcore/psxmem.h index 14ff0033b..129973cf9 100644 --- a/libpcsxcore/psxmem.h +++ b/libpcsxcore/psxmem.h @@ -127,6 +127,7 @@ extern u8 **psxMemRLUT; int psxMemInit(); void psxMemReset(); +void psxMemOnIsolate(int enable); void psxMemShutdown(); u8 psxMemRead8 (u32 mem); diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index a052a59a3..6973afe8b 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -192,6 +192,8 @@ typedef struct { u32 muldivBusyCycle; u32 subCycle; /* interpreter cycle counting */ u32 subCycleStep; + u32 biuReg; + u32 reserved[3]; // warning: changing anything in psxRegisters requires update of all // asm in libpcsxcore/new_dynarec/ } psxRegisters; From 5fa6cb1729bd2bc6c1d9d0d04c4acd611675518f Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 9 Jul 2023 00:13:36 +0300 Subject: [PATCH 257/597] gpu-gles: fix wrong long usage No idea if there is any point touching this code but the warnings were annoying. --- plugins/gpu-gles/gpuDraw.c | 6 +- plugins/gpu-gles/gpuDraw.h | 2 +- plugins/gpu-gles/gpuExternals.h | 80 +++++------ plugins/gpu-gles/gpuPrim.c | 122 ++++++++-------- plugins/gpu-gles/gpuPrim.h | 4 +- plugins/gpu-gles/gpuTexture.c | 248 ++++++++++++++++---------------- plugins/gpu-gles/gpuTexture.h | 42 +++--- plugins/gpu-gles/gpulib_if.c | 21 +-- 8 files changed, 263 insertions(+), 262 deletions(-) diff --git a/plugins/gpu-gles/gpuDraw.c b/plugins/gpu-gles/gpuDraw.c index c49eac5ff..d6670ee25 100644 --- a/plugins/gpu-gles/gpuDraw.c +++ b/plugins/gpu-gles/gpuDraw.c @@ -111,7 +111,7 @@ BOOL bCheckMask=FALSE; int iUseMask=0; int iSetMask=0; unsigned short sSetMask=0; -unsigned long lSetMask=0; +unsigned int lSetMask=0; // drawing/coord vars @@ -921,14 +921,14 @@ void offsetST(void) ///////////////////////////////////////////////////////// -void offsetScreenUpload(long Position) +void offsetScreenUpload(int Position) { if(bDisplayNotSet) SetOGLDisplaySettings(1); if(Position==-1) { - long lmdx,lmdy; + int lmdx,lmdy; lmdx=xrUploadArea.x0; lmdy=xrUploadArea.y0; diff --git a/plugins/gpu-gles/gpuDraw.h b/plugins/gpu-gles/gpuDraw.h index a45bf4626..153e1e509 100644 --- a/plugins/gpu-gles/gpuDraw.h +++ b/plugins/gpu-gles/gpuDraw.h @@ -64,7 +64,7 @@ unsigned short offsetline(void); #endif void offsetST(void); void offsetBlk(void); -void offsetScreenUpload(long Position); +void offsetScreenUpload(int Position); void assignTexture3(void); void assignTexture4(void); void assignTextureSprite(void); diff --git a/plugins/gpu-gles/gpuExternals.h b/plugins/gpu-gles/gpuExternals.h index 0a8acf594..a6dbceb20 100644 --- a/plugins/gpu-gles/gpuExternals.h +++ b/plugins/gpu-gles/gpuExternals.h @@ -143,10 +143,10 @@ extern void ( APIENTRY * glPixelStorei )(GLenum pname, GLint param); #define bool unsigned short #endif #define LOWORD(l) ((unsigned short)(l)) -#define HIWORD(l) ((unsigned short)(((unsigned long)(l) >> 16) & 0xFFFF)) +#define HIWORD(l) ((unsigned short)(((unsigned int)(l) >> 16) & 0xFFFF)) #define max(a,b) (((a) > (b)) ? (a) : (b)) #define min(a,b) (((a) < (b)) ? (a) : (b)) -#define DWORD unsigned long +#define DWORD unsigned int typedef struct RECTTAG { @@ -173,8 +173,8 @@ typedef struct VRAMLOADTAG typedef struct PSXPOINTTAG { - long x; - long y; + int x; + int y; } PSXPoint_t; typedef struct PSXSPOINTTAG @@ -211,19 +211,19 @@ typedef struct PSXDISPLAYTAG PSXPoint_t DisplayPosition; PSXPoint_t DisplayEnd; - long Double; - long Height; - long PAL; - long InterlacedNew; - long Interlaced; - long InterlacedTest; - long RGB24New; - long RGB24; + int Double; + int Height; + int PAL; + int InterlacedNew; + int Interlaced; + int InterlacedTest; + int RGB24New; + int RGB24; PSXSPoint_t DrawOffset; PSXRect_t DrawArea; PSXPoint_t GDrawOffset; PSXPoint_t CumulOffset; - long Disabled; + int Disabled; PSXRect_t Range; } PSXDisplay_t; @@ -242,7 +242,7 @@ typedef struct OGLVertexTag COLTAG { unsigned char col[4]; - unsigned long lcol; + unsigned int lcol; } c; } OGLVertex; @@ -256,7 +256,7 @@ typedef union EXShortTag typedef union EXLongTag { unsigned char c[4]; - unsigned long l; + unsigned int l; EXShort s[2]; } EXLong; @@ -299,7 +299,7 @@ extern int iSetMask; extern int iDepthFunc; extern BOOL bCheckMask; extern unsigned short sSetMask; -extern unsigned long lSetMask; +extern unsigned int lSetMask; extern BOOL bSetClip; extern GLuint gTexScanName; @@ -309,8 +309,8 @@ extern GLuint gTexScanName; #ifndef _IN_SOFT -extern long GlobalTextAddrX,GlobalTextAddrY,GlobalTextTP; -extern long GlobalTextREST,GlobalTextABR,GlobalTextPAGE; +extern int GlobalTextAddrX,GlobalTextAddrY,GlobalTextTP; +extern int GlobalTextREST,GlobalTextABR,GlobalTextPAGE; extern short ly0,lx0,ly1,lx1,ly2,lx2,ly3,lx3; extern short g_m1; extern short g_m2; @@ -347,22 +347,22 @@ extern GLubyte ubGloAlpha; extern short sSprite_ux2; extern short sSprite_vy2; extern BOOL bRenderFrontBuffer; -extern unsigned long ulOLDCOL; -extern unsigned long ulClutID; +extern unsigned int ulOLDCOL; +extern unsigned int ulClutID; extern void (*primTableJ[256])(unsigned char *); extern void (*primTableSkip[256])(unsigned char *); extern unsigned short usMirror; -extern unsigned long dwCfgFixes; -extern unsigned long dwActFixes; -extern unsigned long dwEmuFixes; +extern unsigned int dwCfgFixes; +extern unsigned int dwActFixes; +extern unsigned int dwEmuFixes; extern BOOL bUseFixes; extern int iSpriteTex; extern int iDrawnSomething; -extern long drawX; -extern long drawY; -extern long drawW; -extern long drawH; +extern int drawX; +extern int drawY; +extern int drawW; +extern int drawH; extern short sxmin; extern short sxmax; extern short symin; @@ -383,10 +383,10 @@ extern GLint giWantedRGBA; extern GLint giWantedFMT; extern GLint giWantedTYPE; extern void (*LoadSubTexFn) (int,int,short,short); -extern long GlobalTexturePage; -extern unsigned long (*TCF[]) (unsigned long); +extern int GlobalTexturePage; +extern unsigned int (*TCF[]) (unsigned int ); extern unsigned short (*PTCF[]) (unsigned short); -extern unsigned long (*PalTexturedColourFn) (unsigned long); +extern unsigned int (*PalTexturedColourFn) (unsigned int); extern BOOL bUseFastMdec; extern BOOL bUse15bitMdec; extern int iFrameTexType; @@ -420,32 +420,32 @@ extern char szDispBuf[]; extern char szGPUKeys[]; extern PSXDisplay_t PSXDisplay; extern PSXDisplay_t PreviousPSXDisplay; -//extern unsigned long ulKeybits; +//extern unsigned int ulKeybits; extern TWin_t TWin; extern BOOL bDisplayNotSet; -extern long lGPUstatusRet; +extern int lGPUstatusRet; extern short imageX0,imageX1; extern short imageY0,imageY1; -extern long lClearOnSwap,lClearOnSwapColor; +extern int lClearOnSwap,lClearOnSwapColor; extern unsigned char * psxVub; extern char * psxVsb; extern unsigned short * psxVuw; extern signed short * psxVsw; -extern unsigned long * psxVul; -extern signed long * psxVsl; +extern unsigned int * psxVul; +extern signed int * psxVsl; extern GLfloat gl_z; extern BOOL bNeedRGB24Update; extern BOOL bChangeWinMode; extern GLuint uiScanLine; extern int iUseScanLines; -extern long lSelectedSlot; +extern int lSelectedSlot; extern int iScanBlend; extern BOOL bInitCap; extern int iBlurBuffer; extern int iLastRGB24; extern int iRenderFVR; extern int iNoScreenSaver; -extern unsigned long ulGPUInfoVals[]; +extern unsigned int ulGPUInfoVals[]; extern BOOL bNeedInterlaceUpdate; extern BOOL bNeedWriteUpload; extern BOOL bSkipNextFrame; @@ -461,7 +461,7 @@ extern int bFullScreen; #ifndef _IN_MENU -//extern unsigned long dwCoreFlags; +//extern unsigned int dwCoreFlags; extern GLuint gTexPicName; //extern PSXPoint_t ptCursorPoint[]; //extern unsigned short usCursorActive; @@ -539,7 +539,7 @@ typedef struct { #ifndef _IN_KEY -//extern unsigned long ulKeybits; +//extern unsigned int ulKeybits; #endif @@ -547,7 +547,7 @@ typedef struct { #ifndef _IN_ZN -extern unsigned long dwGPUVersion; +extern unsigned int dwGPUVersion; extern int iGPUHeight; extern int iGPUHeightMask; extern int GlobalTextIL; diff --git a/plugins/gpu-gles/gpuPrim.c b/plugins/gpu-gles/gpuPrim.c index 218ff66d3..ed16e4a9c 100644 --- a/plugins/gpu-gles/gpuPrim.c +++ b/plugins/gpu-gles/gpuPrim.c @@ -77,15 +77,15 @@ BOOL bUsingMovie=FALSE; // movie active flag PSXRect_t xrMovieArea; // rect for movie upload short sSprite_ux2; // needed for sprire adjust short sSprite_vy2; // -unsigned long ulOLDCOL=0; // active color -unsigned long ulClutID; // clut +unsigned int ulOLDCOL=0; // active color +unsigned int ulClutID; // clut -unsigned long dwCfgFixes; // game fixes -unsigned long dwActFixes=0; -unsigned long dwEmuFixes=0; +unsigned int dwCfgFixes; // game fixes +unsigned int dwActFixes=0; +unsigned int dwEmuFixes=0; BOOL bUseFixes; -long drawX,drawY,drawW,drawH; // offscreen drawing checkers +int drawX,drawY,drawW,drawH; // offscreen drawing checkers short sxmin,sxmax,symin,symax; unsigned int CSVERTEX=0,CSCOLOR=0,CSTEXTURE=0; @@ -147,9 +147,9 @@ void UpdateGlobalTP(unsigned short gdata) //////////////////////////////////////////////////////////////////////// -unsigned long DoubleBGR2RGB (unsigned long BGR) +unsigned int DoubleBGR2RGB (unsigned int BGR) { - unsigned long ebx,eax,edx; + unsigned int ebx,eax,edx; ebx=(BGR&0x000000ff)<<1; if(ebx&0x00000100) ebx=0x000000ff; @@ -163,7 +163,7 @@ unsigned long DoubleBGR2RGB (unsigned long BGR) return (ebx|eax|edx); } -unsigned short BGR24to16 (unsigned long BGR) +unsigned short BGR24to16 (unsigned int BGR) { return ((BGR>>3)&0x1f)|((BGR&0xf80000)>>9)|((BGR&0xf800)>>6); } @@ -947,7 +947,7 @@ void SetZMask3NT(void) //////////////////////////////////////////////////////////////////////// - void SetRenderState(unsigned long DrawAttributes) + void SetRenderState(unsigned int DrawAttributes) { bDrawNonShaded = (SHADETEXBIT(DrawAttributes)) ? TRUE : FALSE; DrawSemiTrans = (SEMITRANSBIT(DrawAttributes)) ? TRUE : FALSE; @@ -955,7 +955,7 @@ void SetZMask3NT(void) //////////////////////////////////////////////////////////////////////// - void SetRenderColor(unsigned long DrawAttributes) + void SetRenderColor(unsigned int DrawAttributes) { if(bDrawNonShaded) {g_m1=g_m2=g_m3=128;} else @@ -968,7 +968,7 @@ void SetZMask3NT(void) //////////////////////////////////////////////////////////////////////// -void SetRenderMode(unsigned long DrawAttributes,BOOL bSCol) +void SetRenderMode(unsigned int DrawAttributes,BOOL bSCol) { if((bUseMultiPass) && (bDrawTextured) && !(bDrawNonShaded)) {bDrawMultiPass = TRUE; SetSemiTransMulti(0);} @@ -1024,7 +1024,7 @@ void SetRenderMode(unsigned long DrawAttributes,BOOL bSCol) // Set Opaque multipass color //////////////////////////////////////////////////////////////////////// -void SetOpaqueColor(unsigned long DrawAttributes) +void SetOpaqueColor(unsigned int DrawAttributes) { if(bDrawNonShaded) return; // no shading? bye @@ -1399,7 +1399,7 @@ BOOL CheckAgainstFrontScreen(short imageX0,short imageY0,short imageX1,short ima //////////////////////////////////////////////////////////////////////// -void PrepareFullScreenUpload (long Position) +void PrepareFullScreenUpload (int Position) { if (Position==-1) // rgb24 { @@ -1481,7 +1481,7 @@ void PrepareFullScreenUpload (long Position) unsigned char * LoadDirectMovieFast(void); -void UploadScreenEx(long Position) +void UploadScreenEx(int Position) { short ya,yb,xa,xb,x, y, YStep, XStep, U, UStep,ux[4],vy[4]; @@ -1565,7 +1565,7 @@ void UploadScreenEx(long Position) //////////////////////////////////////////////////////////////////////// -void UploadScreen(long Position) +void UploadScreen(int Position) { short x, y, YStep, XStep, U, s, UStep,ux[4],vy[4]; short xa,xb,ya,yb; @@ -1646,8 +1646,8 @@ void UploadScreen(long Position) gl_vy[2] = gl_vy[3] = s; gl_ux[0] = gl_ux[3] = gl_vy[0] = gl_vy[1] = 0; - SetRenderState((unsigned long)0x01000000); - SetRenderMode((unsigned long)0x01000000, FALSE); // upload texture data + SetRenderState((unsigned int)0x01000000); + SetRenderMode((unsigned int)0x01000000, FALSE); // upload texture data offsetScreenUpload(Position); assignTextureVRAMWrite(); @@ -1712,7 +1712,7 @@ BOOL IsInsideNextScreen(short x, short y, short xoff, short yoff) void cmdSTP(unsigned char * baseAddr) { - unsigned long gdata = ((unsigned long*)baseAddr)[0]; + unsigned int gdata = ((unsigned int*)baseAddr)[0]; STATUSREG&=~0x1800; // clear the necessary bits STATUSREG|=((gdata & 0x03) << 11); // set the current bits @@ -1745,7 +1745,7 @@ void cmdSTP(unsigned char * baseAddr) void cmdTexturePage(unsigned char * baseAddr) { - unsigned long gdata = ((unsigned long*)baseAddr)[0]; + unsigned int gdata = ((unsigned int*)baseAddr)[0]; UpdateGlobalTP((unsigned short)gdata); GlobalTextREST = (gdata&0x00ffffff)>>9; } @@ -1756,9 +1756,9 @@ void cmdTexturePage(unsigned char * baseAddr) void cmdTextureWindow(unsigned char *baseAddr) { - unsigned long gdata = ((unsigned long*)baseAddr)[0]; + unsigned int gdata = ((unsigned int*)baseAddr)[0]; - unsigned long YAlign,XAlign; + unsigned int YAlign,XAlign; ulGPUInfoVals[INFO_TW]=gdata&0xFFFFF; @@ -1792,8 +1792,8 @@ void cmdTextureWindow(unsigned char *baseAddr) // Re-calculate the bit field, because we can't trust what is passed in the data - YAlign = (unsigned long)(32 - (TWin.Position.y1 >> 3)); - XAlign = (unsigned long)(32 - (TWin.Position.x1 >> 3)); + YAlign = (unsigned int)(32 - (TWin.Position.y1 >> 3)); + XAlign = (unsigned int)(32 - (TWin.Position.x1 >> 3)); // Absolute position of the start of the texture window @@ -1965,7 +1965,7 @@ void ClampToPSXScreenOffset(short *x0, short *y0, short *x1, short *y1) void cmdDrawAreaStart(unsigned char * baseAddr) { - unsigned long gdata = ((unsigned long*)baseAddr)[0]; + unsigned int gdata = ((unsigned int*)baseAddr)[0]; drawX = gdata & 0x3ff; // for soft drawing if(drawX>=1024) drawX=1023; @@ -1996,7 +1996,7 @@ void cmdDrawAreaStart(unsigned char * baseAddr) void cmdDrawAreaEnd(unsigned char * baseAddr) { - unsigned long gdata = ((unsigned long*)baseAddr)[0]; + unsigned int gdata = ((unsigned int*)baseAddr)[0]; drawW = gdata & 0x3ff; // for soft drawing if(drawW>=1024) drawW=1023; @@ -2031,7 +2031,7 @@ void cmdDrawAreaEnd(unsigned char * baseAddr) void cmdDrawOffset(unsigned char * baseAddr) { - unsigned long gdata = ((unsigned long*)baseAddr)[0]; + unsigned int gdata = ((unsigned int*)baseAddr)[0]; PreviousPSXDisplay.DrawOffset.x = PSXDisplay.DrawOffset.x = (short)(gdata & 0x7ff); @@ -2241,7 +2241,7 @@ void primStoreImage(unsigned char * baseAddr) void primBlkFill(unsigned char * baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); iDrawnSomething=1; @@ -2292,8 +2292,8 @@ void primBlkFill(unsigned char * baseAddr) { bDrawTextured = FALSE; bDrawSmoothShaded = FALSE; - SetRenderState((unsigned long)0x01000000); - SetRenderMode((unsigned long)0x01000000, FALSE); + SetRenderState((unsigned int)0x01000000); + SetRenderMode((unsigned int)0x01000000, FALSE); vertex[0].c.lcol=0xff000000; SETCOL(vertex[0]); if(ly0>pd->DisplayPosition.y) @@ -2320,8 +2320,8 @@ void primBlkFill(unsigned char * baseAddr) { bDrawTextured = FALSE; bDrawSmoothShaded = FALSE; - SetRenderState((unsigned long)0x01000000); - SetRenderMode((unsigned long)0x01000000, FALSE); + SetRenderState((unsigned int)0x01000000); + SetRenderMode((unsigned int)0x01000000, FALSE); vertex[0].c.lcol=gpuData[0]|0xff000000; SETCOL(vertex[0]); //glDisable(GL_SCISSOR_TEST); glError(); @@ -2491,12 +2491,12 @@ void primMoveImage(unsigned char * baseAddr) } else { - unsigned long *SRCPtr, *DSTPtr; + unsigned int *SRCPtr, *DSTPtr; unsigned short LineOffset; int dx=imageSX>>1; - SRCPtr = (unsigned long *)(psxVuw + (1024*imageY0) + imageX0); - DSTPtr = (unsigned long *)(psxVuw + (1024*imageY1) + imageX1); + SRCPtr = (unsigned int *)(psxVuw + (1024*imageY0) + imageX0); + DSTPtr = (unsigned int *)(psxVuw + (1024*imageY1) + imageX1); LineOffset = 512 - dx; @@ -2597,7 +2597,7 @@ void primMoveImage(unsigned char * baseAddr) void primTileS(unsigned char * baseAddr) { - unsigned long *gpuData = ((unsigned long*)baseAddr); + unsigned int *gpuData = ((unsigned int*)baseAddr); short *sgpuData = ((short *) baseAddr); sprtX = sgpuData[2]; @@ -2662,7 +2662,7 @@ void primTileS(unsigned char * baseAddr) void primTile1(unsigned char * baseAddr) { - unsigned long *gpuData = ((unsigned long*)baseAddr); + unsigned int *gpuData = ((unsigned int*)baseAddr); short *sgpuData = ((short *) baseAddr); sprtX = sgpuData[2]; @@ -2709,7 +2709,7 @@ void primTile1(unsigned char * baseAddr) void primTile8(unsigned char * baseAddr) { - unsigned long *gpuData = ((unsigned long*)baseAddr); + unsigned int *gpuData = ((unsigned int*)baseAddr); short *sgpuData = ((short *) baseAddr); sprtX = sgpuData[2]; @@ -2756,7 +2756,7 @@ void primTile8(unsigned char * baseAddr) void primTile16(unsigned char * baseAddr) { - unsigned long *gpuData = ((unsigned long*)baseAddr); + unsigned int *gpuData = ((unsigned int*)baseAddr); short *sgpuData = ((short *) baseAddr); sprtX = sgpuData[2]; @@ -2803,7 +2803,7 @@ void primTile16(unsigned char * baseAddr) /*void DrawMultiBlur(void) { - long lABR,lDST;float fx,fy; + int lABR,lDST;float fx,fy; lABR=GlobalTextABR; lDST=DrawSemiTrans; @@ -2836,7 +2836,7 @@ void primTile16(unsigned char * baseAddr) void DrawMultiFilterSprite(void) { - long lABR,lDST; + int lABR,lDST; if(bUseMultiPass || DrawSemiTrans || ubOpaqueDraw) { @@ -2867,7 +2867,7 @@ void DrawMultiFilterSprite(void) void primSprt8(unsigned char * baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); short s; @@ -2988,7 +2988,7 @@ void primSprt8(unsigned char * baseAddr) void primSprt16(unsigned char * baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); short s; @@ -3108,7 +3108,7 @@ void primSprt16(unsigned char * baseAddr) void primSprtSRest(unsigned char * baseAddr,unsigned short type) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); short s;unsigned short sTypeRest=0; @@ -3279,7 +3279,7 @@ void primSprtSRest(unsigned char * baseAddr,unsigned short type) void primSprtS(unsigned char * baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); short s;unsigned short sTypeRest=0; @@ -3418,7 +3418,7 @@ void primSprtS(unsigned char * baseAddr) void primPolyF4(unsigned char *baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); lx0 = sgpuData[2]; @@ -3513,7 +3513,7 @@ BOOL bCheckFF9G4(unsigned char * baseAddr) if(iFF9Fix==2) { - long labr=GlobalTextABR; + int labr=GlobalTextABR; GlobalTextABR=1; primPolyG4(pFF9G4Cache); GlobalTextABR=labr; @@ -3527,7 +3527,7 @@ BOOL bCheckFF9G4(unsigned char * baseAddr) void primPolyG4(unsigned char * baseAddr) { - unsigned long *gpuData = (unsigned long *)baseAddr; + unsigned int *gpuData = (unsigned int *)baseAddr; short *sgpuData = ((short *) baseAddr); lx0 = sgpuData[2]; @@ -3578,7 +3578,7 @@ void primPolyG4(unsigned char * baseAddr) // cmd: flat shaded Texture3 //////////////////////////////////////////////////////////////////////// -BOOL DoLineCheck(unsigned long * gpuData) +BOOL DoLineCheck(unsigned int * gpuData) { BOOL bQuad=FALSE;short dx,dy; @@ -3745,7 +3745,7 @@ BOOL DoLineCheck(unsigned long * gpuData) void primPolyFT3(unsigned char * baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); lx0 = sgpuData[2]; @@ -4181,7 +4181,7 @@ void RectTexAlign(void) void primPolyFT4(unsigned char * baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); lx0 = sgpuData[2]; @@ -4269,7 +4269,7 @@ void primPolyFT4(unsigned char * baseAddr) void primPolyGT3(unsigned char *baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); lx0 = sgpuData[2]; @@ -4378,7 +4378,7 @@ void primPolyGT3(unsigned char *baseAddr) void primPolyG3(unsigned char *baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); lx0 = sgpuData[2]; @@ -4423,7 +4423,7 @@ void primPolyG3(unsigned char *baseAddr) void primPolyGT4(unsigned char *baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); lx0 = sgpuData[2]; @@ -4543,7 +4543,7 @@ void primPolyGT4(unsigned char *baseAddr) void primPolyF3(unsigned char *baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); lx0 = sgpuData[2]; @@ -4587,7 +4587,7 @@ void primPolyF3(unsigned char *baseAddr) void primLineGSkip(unsigned char *baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); int iMax=255; int i=2; @@ -4612,7 +4612,7 @@ void primLineGSkip(unsigned char *baseAddr) void primLineGEx(unsigned char *baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); int iMax=255; short cx0,cx1,cy0,cy1;int i;BOOL bDraw=TRUE; @@ -4676,7 +4676,7 @@ void primLineGEx(unsigned char *baseAddr) void primLineG2(unsigned char *baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); lx0 = sgpuData[2]; @@ -4721,7 +4721,7 @@ void primLineG2(unsigned char *baseAddr) void primLineFSkip(unsigned char *baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); int i=2,iMax=255; ly1 = (short)((gpuData[1]>>16) & 0xffff); @@ -4741,7 +4741,7 @@ void primLineFSkip(unsigned char *baseAddr) void primLineFEx(unsigned char *baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); int iMax; short cx0,cx1,cy0,cy1;int i; @@ -4799,7 +4799,7 @@ void primLineFEx(unsigned char *baseAddr) void primLineF2(unsigned char *baseAddr) { - unsigned long *gpuData = ((unsigned long *) baseAddr); + unsigned int *gpuData = ((unsigned int *) baseAddr); short *sgpuData = ((short *) baseAddr); lx0 = sgpuData[2]; diff --git a/plugins/gpu-gles/gpuPrim.h b/plugins/gpu-gles/gpuPrim.h index dc4976717..c533acea7 100644 --- a/plugins/gpu-gles/gpuPrim.h +++ b/plugins/gpu-gles/gpuPrim.h @@ -39,8 +39,8 @@ extern EGLSurface surface; extern EGLDisplay display; #endif -void UploadScreen (long Position); -void PrepareFullScreenUpload (long Position); +void UploadScreen (int Position); +void PrepareFullScreenUpload (int Position); BOOL CheckAgainstScreen(short imageX0,short imageY0,short imageX1,short imageY1); BOOL CheckAgainstFrontScreen(short imageX0,short imageY0,short imageX1,short imageY1); BOOL FastCheckAgainstScreen(short imageX0,short imageY0,short imageX1,short imageY1); diff --git a/plugins/gpu-gles/gpuTexture.c b/plugins/gpu-gles/gpuTexture.c index 2f96482ae..5136682be 100644 --- a/plugins/gpu-gles/gpuTexture.c +++ b/plugins/gpu-gles/gpuTexture.c @@ -88,12 +88,12 @@ GLuint gTexMovieName=0; GLuint gTexBlurName=0; GLuint gTexFrameName=0; int iTexGarbageCollection=1; -unsigned long dwTexPageComp=0; +unsigned int dwTexPageComp=0; int iVRamSize=0; int iClampType=GL_CLAMP_TO_EDGE; int iFilter = GL_LINEAR; void (*LoadSubTexFn) (int,int,short,short); -unsigned long (*PalTexturedColourFn) (unsigned long); +unsigned int (*PalTexturedColourFn) (unsigned int); //////////////////////////////////////////////////////////////////////// // defines @@ -127,7 +127,7 @@ unsigned long (*PalTexturedColourFn) (unsigned long); //////////////////////////////////////////////////////////////////////// -unsigned char * CheckTextureInSubSCache(long TextureMode,unsigned long GivenClutId,unsigned short * pCache); +unsigned char * CheckTextureInSubSCache(int TextureMode,unsigned int GivenClutId,unsigned short * pCache); void LoadSubTexturePageSort(int pageid, int mode, short cx, short cy); void LoadPackedSubTexturePageSort(int pageid, int mode, short cx, short cy); void DefineSubTextureSort(void); @@ -136,7 +136,7 @@ void DefineSubTextureSort(void); // some globals //////////////////////////////////////////////////////////////////////// -long GlobalTexturePage; +int GlobalTexturePage; GLint XTexS; GLint YTexS; GLint DXTexS; @@ -147,7 +147,7 @@ BOOL bUse15bitMdec=FALSE; int iFrameTexType=0; int iFrameReadType=0; -unsigned long (*TCF[2]) (unsigned long); +unsigned int (*TCF[2]) (unsigned int); unsigned short (*PTCF[2]) (unsigned short); //////////////////////////////////////////////////////////////////////// @@ -158,7 +158,7 @@ unsigned short (*PTCF[2]) (unsigned short); typedef struct textureWndCacheEntryTag { - unsigned long ClutID; + unsigned int ClutID; short pageid; short textureMode; short Opaque; @@ -171,7 +171,7 @@ typedef struct textureWndCacheEntryTag typedef struct textureSubCacheEntryTagS { - unsigned long ClutID; + unsigned int ClutID; EXLong pos; unsigned char posTX; unsigned char posTY; @@ -200,7 +200,7 @@ int iTexWndLimit=MAXWNDTEXCACHE/2; GLubyte * texturepart=NULL; GLubyte * texturebuffer=NULL; -unsigned long g_x1,g_y1,g_x2,g_y2; +unsigned int g_x1,g_y1,g_x2,g_y2; unsigned char ubOpaqueDraw=0; unsigned short MAXTPAGES = 32; @@ -213,7 +213,7 @@ unsigned short MAXSORTTEX = 196; // porting... and honestly: nowadays the speed gain would be pointless //////////////////////////////////////////////////////////////////////// -unsigned long XP8RGBA(unsigned long BGR) +unsigned int XP8RGBA(unsigned int BGR) { if(!(BGR&0xffff)) return 0x50000000; if(DrawSemiTrans && !(BGR&0x8000)) @@ -221,7 +221,7 @@ unsigned long XP8RGBA(unsigned long BGR) return ((((BGR<<3)&0xf8)|((BGR<<6)&0xf800)|((BGR<<9)&0xf80000))&0xffffff)|0xff000000; } -unsigned long XP8RGBAEx(unsigned long BGR) +unsigned int XP8RGBAEx(unsigned int BGR) { if(!(BGR&0xffff)) return 0x03000000; if(DrawSemiTrans && !(BGR&0x8000)) @@ -229,9 +229,9 @@ unsigned long XP8RGBAEx(unsigned long BGR) return ((((BGR<<3)&0xf8)|((BGR<<6)&0xf800)|((BGR<<9)&0xf80000))&0xffffff)|0xff000000; } -unsigned long CP8RGBA(unsigned long BGR) +unsigned int CP8RGBA(unsigned int BGR) { - unsigned long l; + unsigned int l; if(!(BGR&0xffff)) return 0x50000000; if(DrawSemiTrans && !(BGR&0x8000)) {ubOpaqueDraw=1;return ((((BGR<<3)&0xf8)|((BGR<<6)&0xf800)|((BGR<<9)&0xf80000))&0xffffff);} @@ -240,9 +240,9 @@ unsigned long CP8RGBA(unsigned long BGR) return l; } -unsigned long CP8RGBAEx(unsigned long BGR) +unsigned int CP8RGBAEx(unsigned int BGR) { - unsigned long l; + unsigned int l; if(!(BGR&0xffff)) return 0x03000000; if(DrawSemiTrans && !(BGR&0x8000)) {ubOpaqueDraw=1;return ((((BGR<<3)&0xf8)|((BGR<<6)&0xf800)|((BGR<<9)&0xf80000))&0xffffff);} @@ -251,33 +251,33 @@ unsigned long CP8RGBAEx(unsigned long BGR) return l; } -unsigned long XP8RGBA_0(unsigned long BGR) +unsigned int XP8RGBA_0(unsigned int BGR) { if(!(BGR&0xffff)) return 0x50000000; return ((((BGR<<3)&0xf8)|((BGR<<6)&0xf800)|((BGR<<9)&0xf80000))&0xffffff)|0xff000000; } -unsigned long XP8RGBAEx_0(unsigned long BGR) +unsigned int XP8RGBAEx_0(unsigned int BGR) { if(!(BGR&0xffff)) return 0x03000000; return ((((BGR<<3)&0xf8)|((BGR<<6)&0xf800)|((BGR<<9)&0xf80000))&0xffffff)|0xff000000; } -unsigned long XP8BGRA_0(unsigned long BGR) +unsigned int XP8BGRA_0(unsigned int BGR) { if(!(BGR&0xffff)) return 0x50000000; return ((((BGR>>7)&0xf8)|((BGR<<6)&0xf800)|((BGR<<19)&0xf80000))&0xffffff)|0xff000000; } -unsigned long XP8BGRAEx_0(unsigned long BGR) +unsigned int XP8BGRAEx_0(unsigned int BGR) { if(!(BGR&0xffff)) return 0x03000000; return ((((BGR>>7)&0xf8)|((BGR<<6)&0xf800)|((BGR<<19)&0xf80000))&0xffffff)|0xff000000; } -unsigned long CP8RGBA_0(unsigned long BGR) +unsigned int CP8RGBA_0(unsigned int BGR) { - unsigned long l; + unsigned int l; if(!(BGR&0xffff)) return 0x50000000; l=((((BGR<<3)&0xf8)|((BGR<<6)&0xf800)|((BGR<<9)&0xf80000))&0xffffff)|0xff000000; @@ -285,9 +285,9 @@ unsigned long CP8RGBA_0(unsigned long BGR) return l; } -unsigned long CP8RGBAEx_0(unsigned long BGR) +unsigned int CP8RGBAEx_0(unsigned int BGR) { - unsigned long l; + unsigned int l; if(!(BGR&0xffff)) return 0x03000000; l=((((BGR<<3)&0xf8)|((BGR<<6)&0xf800)|((BGR<<9)&0xf80000))&0xffffff)|0xff000000; @@ -295,9 +295,9 @@ unsigned long CP8RGBAEx_0(unsigned long BGR) return l; } -unsigned long CP8BGRA_0(unsigned long BGR) +unsigned int CP8BGRA_0(unsigned int BGR) { - unsigned long l; + unsigned int l; if(!(BGR&0xffff)) return 0x50000000; l=((((BGR>>7)&0xf8)|((BGR<<6)&0xf800)|((BGR<<19)&0xf80000))&0xffffff)|0xff000000; @@ -305,9 +305,9 @@ unsigned long CP8BGRA_0(unsigned long BGR) return l; } -unsigned long CP8BGRAEx_0(unsigned long BGR) +unsigned int CP8BGRAEx_0(unsigned int BGR) { - unsigned long l; + unsigned int l; if(!(BGR&0xffff)) return 0x03000000; l=((((BGR>>7)&0xf8)|((BGR<<6)&0xf800)|((BGR<<19)&0xf80000))&0xffffff)|0xff000000; @@ -315,41 +315,41 @@ unsigned long CP8BGRAEx_0(unsigned long BGR) return l; } -unsigned long XP8RGBA_1(unsigned long BGR) +unsigned int XP8RGBA_1(unsigned int BGR) { if(!(BGR&0xffff)) return 0x50000000; if(!(BGR&0x8000)) {ubOpaqueDraw=1;return ((((BGR<<3)&0xf8)|((BGR<<6)&0xf800)|((BGR<<9)&0xf80000))&0xffffff);} return ((((BGR<<3)&0xf8)|((BGR<<6)&0xf800)|((BGR<<9)&0xf80000))&0xffffff)|0xff000000; } -unsigned long XP8RGBAEx_1(unsigned long BGR) +unsigned int XP8RGBAEx_1(unsigned int BGR) { if(!(BGR&0xffff)) return 0x03000000; if(!(BGR&0x8000)) {ubOpaqueDraw=1;return ((((BGR<<3)&0xf8)|((BGR<<6)&0xf800)|((BGR<<9)&0xf80000))&0xffffff);} return ((((BGR<<3)&0xf8)|((BGR<<6)&0xf800)|((BGR<<9)&0xf80000))&0xffffff)|0xff000000; } -unsigned long XP8BGRA_1(unsigned long BGR) +unsigned int XP8BGRA_1(unsigned int BGR) { if(!(BGR&0xffff)) return 0x50000000; if(!(BGR&0x8000)) {ubOpaqueDraw=1;return ((((BGR>>7)&0xf8)|((BGR<<6)&0xf800)|((BGR<<19)&0xf80000))&0xffffff);} return ((((BGR>>7)&0xf8)|((BGR<<6)&0xf800)|((BGR<<19)&0xf80000))&0xffffff)|0xff000000; } -unsigned long XP8BGRAEx_1(unsigned long BGR) +unsigned int XP8BGRAEx_1(unsigned int BGR) { if(!(BGR&0xffff)) return 0x03000000; if(!(BGR&0x8000)) {ubOpaqueDraw=1;return ((((BGR>>7)&0xf8)|((BGR<<6)&0xf800)|((BGR<<19)&0xf80000))&0xffffff);} return ((((BGR>>7)&0xf8)|((BGR<<6)&0xf800)|((BGR<<19)&0xf80000))&0xffffff)|0xff000000; } -unsigned long P8RGBA(unsigned long BGR) +unsigned int P8RGBA(unsigned int BGR) { if(!(BGR&0xffff)) return 0; return ((((BGR<<3)&0xf8)|((BGR<<6)&0xf800)|((BGR<<9)&0xf80000))&0xffffff)|0xff000000; } -unsigned long P8BGRA(unsigned long BGR) +unsigned int P8BGRA(unsigned int BGR) { if(!(BGR&0xffff)) return 0; return ((((BGR>>7)&0xf8)|((BGR<<6)&0xf800)|((BGR<<19)&0xf80000))&0xffffff)|0xff000000; @@ -680,7 +680,7 @@ void ResetTextureArea(BOOL bDelTex) // Invalidate tex windows //////////////////////////////////////////////////////////////////////// -void InvalidateWndTextureArea(long X,long Y,long W, long H) +void InvalidateWndTextureArea(int X,int Y,int W, int H) { int i,px1,px2,py1,py2,iYM=1; textureWndCacheEntry * tsw=wcWndtexStore; @@ -771,11 +771,11 @@ void MarkFree(textureSubCacheEntryS * tsx) } } -void InvalidateSubSTextureArea(long X,long Y,long W, long H) +void InvalidateSubSTextureArea(int X,int Y,int W, int H) { int i,j,k,iMax,px,py,px1,px2,py1,py2,iYM=1; EXLong npos;textureSubCacheEntryS * tsb; - long x1,x2,y1,y2,xa,sw; + int x1,x2,y1,y2,xa,sw; W+=X-1; H+=Y-1; @@ -875,7 +875,7 @@ void InvalidateTextureAreaEx(void) //////////////////////////////////////////////////////////////////////// -void InvalidateTextureArea(long X,long Y,long W, long H) +void InvalidateTextureArea(int X,int Y,int W, int H) { if(W==0 && H==0) return; @@ -920,12 +920,12 @@ void DefineTextureWnd(void) void LoadStretchPackedWndTexturePage(int pageid, int mode, short cx, short cy) { - unsigned long start,row,column,j,sxh,sxm,ldx,ldy,ldxo; + unsigned int start,row,column,j,sxh,sxm,ldx,ldy,ldxo; unsigned int palstart; unsigned short *px,*pa,*ta; unsigned char *cSRCPtr,*cOSRCPtr; unsigned short *wSRCPtr,*wOSRCPtr; - unsigned long LineOffset;unsigned short s; + unsigned int LineOffset;unsigned short s; int pmult=pageid/16; unsigned short (*LPTCOL)(unsigned short); @@ -1112,22 +1112,22 @@ void LoadStretchPackedWndTexturePage(int pageid, int mode, short cx, short cy) void LoadStretchWndTexturePage(int pageid, int mode, short cx, short cy) { - unsigned long start,row,column,j,sxh,sxm,ldx,ldy,ldxo,s; + unsigned int start,row,column,j,sxh,sxm,ldx,ldy,ldxo,s; unsigned int palstart; - unsigned long *px,*pa,*ta; + unsigned int *px,*pa,*ta; unsigned char *cSRCPtr,*cOSRCPtr; unsigned short *wSRCPtr,*wOSRCPtr; - unsigned long LineOffset; + unsigned int LineOffset; int pmult=pageid/16; - unsigned long (*LTCOL)(unsigned long); + unsigned int (*LTCOL)(unsigned int); LTCOL=TCF[DrawSemiTrans]; ldxo=TWin.Position.x1-TWin.OPosition.x1; ldy =TWin.Position.y1-TWin.OPosition.y1; - pa=px=(unsigned long *)ubPaletteBuffer; - ta=(unsigned long *)texturepart; + pa=px=(unsigned int *)ubPaletteBuffer; + ta=(unsigned int *)texturepart; palstart=cx+(cy*1024); ubOpaqueDraw=0; @@ -1323,12 +1323,12 @@ void LoadStretchWndTexturePage(int pageid, int mode, short cx, short cy) void LoadPackedWndTexturePage(int pageid, int mode, short cx, short cy) { - unsigned long start,row,column,j,sxh,sxm; + unsigned int start,row,column,j,sxh,sxm; unsigned int palstart; unsigned short *px,*pa,*ta; unsigned char *cSRCPtr; unsigned short *wSRCPtr; - unsigned long LineOffset; + unsigned int LineOffset; int pmult=pageid/16; unsigned short (*LPTCOL)(unsigned short); @@ -1465,19 +1465,19 @@ void LoadPackedWndTexturePage(int pageid, int mode, short cx, short cy) void LoadWndTexturePage(int pageid, int mode, short cx, short cy) { - unsigned long start,row,column,j,sxh,sxm; + unsigned int start,row,column,j,sxh,sxm; unsigned int palstart; - unsigned long *px,*pa,*ta; + unsigned int *px,*pa,*ta; unsigned char *cSRCPtr; unsigned short *wSRCPtr; - unsigned long LineOffset; + unsigned int LineOffset; int pmult=pageid/16; - unsigned long (*LTCOL)(unsigned long); + unsigned int (*LTCOL)(unsigned int); LTCOL=TCF[DrawSemiTrans]; - pa=px=(unsigned long *)ubPaletteBuffer; - ta=(unsigned long *)texturepart; + pa=px=(unsigned int *)ubPaletteBuffer; + ta=(unsigned int *)texturepart; palstart=cx+(cy*1024); ubOpaqueDraw=0; @@ -1627,7 +1627,7 @@ void UploadTexWndPal(int mode,short cx,short cy) { unsigned int i,iSize; unsigned short * wSrcPtr; - unsigned long * ta=(unsigned long *)texturepart; + unsigned int * ta=(unsigned int *)texturepart; wSrcPtr=psxVuw+cx+(cy*1024); if(mode==0) i=4; else i=64; @@ -1677,10 +1677,10 @@ void DefinePalTextureWnd(void) void LoadPalWndTexturePage(int pageid, int mode, short cx, short cy) { - unsigned long start,row,column,j,sxh,sxm; + unsigned int start,row,column,j,sxh,sxm; unsigned char *ta; unsigned char *cSRCPtr; - unsigned long LineOffset; + unsigned int LineOffset; int pmult=pageid/16; ta=(unsigned char *)texturepart; @@ -1737,10 +1737,10 @@ void LoadPalWndTexturePage(int pageid, int mode, short cx, short cy) void LoadStretchPalWndTexturePage(int pageid, int mode, short cx, short cy) { - unsigned long start,row,column,j,sxh,sxm,ldx,ldy,ldxo; + unsigned int start,row,column,j,sxh,sxm,ldx,ldy,ldxo; unsigned char *ta,s; unsigned char *cSRCPtr,*cOSRCPtr; - unsigned long LineOffset; + unsigned int LineOffset; int pmult=pageid/16; ldxo=TWin.Position.x1-TWin.OPosition.x1; @@ -1815,7 +1815,7 @@ void LoadStretchPalWndTexturePage(int pageid, int mode, short cx, short cy) // tex window: main selecting, cache handler included //////////////////////////////////////////////////////////////////////// -GLuint LoadTextureWnd(long pageid,long TextureMode,unsigned long GivenClutId) +GLuint LoadTextureWnd(int pageid,int TextureMode,unsigned int GivenClutId) { textureWndCacheEntry * ts, * tsx=NULL; int i;short cx,cy; @@ -1837,8 +1837,8 @@ GLuint LoadTextureWnd(long pageid,long TextureMode,unsigned long GivenClutId) // palette check sum { - unsigned long l=0,row; - unsigned long * lSRCPtr=(unsigned long *)(psxVuw+cx+(cy*1024)); + unsigned int l=0,row; + unsigned int * lSRCPtr=(unsigned int *)(psxVuw+cx+(cy*1024)); if(TextureMode==1) for(row=1;row<129;row++) l+=((*lSRCPtr++)-1)*row; else for(row=1;row<9;row++) l+=((*lSRCPtr++)-1)<ClutID&(1<<30)) DrawSemiTrans=1; else DrawSemiTrans=0; - *((unsigned long *)&gl_ux[4])=r.l; + *((unsigned int *)&gl_ux[4])=r.l; gTexName=uiStexturePage[tsx->cTexID]; LoadSubTexFn(k,j,cx,cy); @@ -4117,7 +4117,7 @@ void CompressTextureSpace(void) if(dwTexPageComp==0xffffffff) dwTexPageComp=0; - *((unsigned long *)&gl_ux[4])=opos.l; + *((unsigned int *)&gl_ux[4])=opos.l; GlobalTexturePage=lOGTP; DrawSemiTrans=sOldDST; } @@ -4132,7 +4132,7 @@ void CompressTextureSpace(void) ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// -GLuint SelectSubTextureS(long TextureMode, unsigned long GivenClutId) +GLuint SelectSubTextureS(int TextureMode, unsigned int GivenClutId) { unsigned char * OPtr;unsigned short iCache;short cx,cy; @@ -4174,9 +4174,9 @@ GLuint SelectSubTextureS(long TextureMode, unsigned long GivenClutId) // palette check sum.. removed MMX asm, this easy func works as well { - unsigned long l=0,row; + unsigned int l=0,row; - unsigned long * lSRCPtr=(unsigned long *)(psxVuw+cx+(cy*1024)); + unsigned int * lSRCPtr=(unsigned int *)(psxVuw+cx+(cy*1024)); if(TextureMode==1) for(row=1;row<129;row++) l+=((*lSRCPtr++)-1)*row; else for(row=1;row<9;row++) l+=((*lSRCPtr++)-1)<>24)&0xff) { From 980f7a58b47fefd3424bf8d55f6345128dc3774c Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 9 Jul 2023 22:50:47 +0300 Subject: [PATCH 258/597] clean up switching between dynarec and interpreter alternative to libretro/pcsx_rearmed#727 --- frontend/libretro.c | 4 +++- frontend/main.c | 2 -- frontend/menu.c | 3 ++- frontend/plugin.c | 1 - libpcsxcore/misc.c | 9 ++++----- libpcsxcore/new_dynarec/emu_if.c | 20 +++++--------------- libpcsxcore/plugins.c | 7 ------- libpcsxcore/plugins.h | 4 ---- libpcsxcore/psxinterpreter.c | 13 ++++++++----- libpcsxcore/r3000a.h | 8 ++++---- 10 files changed, 26 insertions(+), 45 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index f4b23cdfb..84baeda83 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -1966,9 +1966,11 @@ static void update_variables(bool in_flight) psxCpu = (Config.Cpu == CPU_INTERPRETER) ? &psxInt : &psxRec; if (psxCpu != prev_cpu) { + prev_cpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); prev_cpu->Shutdown(); psxCpu->Init(); - psxCpu->Reset(); // not really a reset.. + psxCpu->Reset(); + psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); } } #endif /* !DRC_DISABLE */ diff --git a/frontend/main.c b/frontend/main.c index e0635ef4b..7d140f823 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -889,8 +889,6 @@ static int _OpenPlugins(void) { signal(SIGPIPE, SignalExit); #endif - GPU_clearDynarec(clearDynarec); - ret = CDR_open(); if (ret < 0) { SysMessage(_("Error opening CD-ROM plugin!")); return -1; } ret = SPU_open(); diff --git a/frontend/menu.c b/frontend/menu.c index 3a772d0e8..9ca87c60b 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -2652,10 +2652,11 @@ void menu_prepare_emu(void) psxCpu = &psxInt; #endif if (psxCpu != prev_cpu) { + prev_cpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); prev_cpu->Shutdown(); psxCpu->Init(); - // note that this does not really reset, just clears drc caches psxCpu->Reset(); + psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); } menu_sync_config(); diff --git a/frontend/plugin.c b/frontend/plugin.c index 7f3b8a463..d2d3dbab2 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -187,7 +187,6 @@ static const struct { DIRECT_GPU(GPUgetScreenPic), DIRECT_GPU(GPUshowScreenPic), */ -// DIRECT_GPU(GPUclearDynarec), }; void *plugin_link(enum builtint_plugins_e id, const char *sym) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 8010d7a5b..d52a931c8 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -608,7 +608,7 @@ int SaveState(const char *file) { f = SaveFuncs.open(file, "wb"); if (f == NULL) return -1; - new_dyna_before_save(); + psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); SaveFuncs.write(f, (void *)PcsxHeader, 32); SaveFuncs.write(f, (void *)&SaveVersion, sizeof(u32)); @@ -655,8 +655,6 @@ int SaveState(const char *file) { SaveFuncs.close(f); - new_dyna_after_save(); - return 0; } @@ -685,15 +683,15 @@ int LoadState(const char *file) { if (Config.HLE) psxBiosInit(); - psxCpu->Reset(); SaveFuncs.seek(f, 128 * 96 * 3, SEEK_CUR); - SaveFuncs.read(f, psxM, 0x00200000); SaveFuncs.read(f, psxR, 0x00080000); SaveFuncs.read(f, psxH, 0x00010000); SaveFuncs.read(f, &psxRegs, offsetof(psxRegisters, gteBusyCycle)); psxRegs.gteBusyCycle = psxRegs.cycle; + psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); + if (Config.HLE) psxBiosFreeze(0); @@ -795,6 +793,7 @@ int RecvPcsxInfo() { SysClose(); return -1; } psxCpu->Reset(); + psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); } return 0; diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index aac9f78c0..c1dab56dd 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -120,18 +120,6 @@ void pcsx_mtc0_ds(u32 reg, u32 val) MTC0(&psxRegs, reg, val); } -void new_dyna_before_save(void) -{ - psxRegs.interrupt &= ~(1 << PSXINT_RCNT); // old savestate compat - - // psxRegs.intCycle is always maintained, no need to convert -} - -void new_dyna_after_save(void) -{ - psxRegs.interrupt |= 1 << PSXINT_RCNT; -} - static void new_dyna_restore(void) { int i; @@ -330,7 +318,6 @@ static int ari64_init() static void ari64_reset() { - printf("ari64_reset\n"); new_dyna_pcsx_mem_reset(); new_dynarec_invalidate_all_pages(); new_dyna_restore(); @@ -369,14 +356,17 @@ static void ari64_clear(u32 addr, u32 size) new_dynarec_invalidate_range(addr, addr + size); } -static void ari64_notify(int note, void *data) { +static void ari64_notify(enum R3000Anote note, void *data) { switch (note) { case R3000ACPU_NOTIFY_CACHE_UNISOLATED: case R3000ACPU_NOTIFY_CACHE_ISOLATED: new_dyna_pcsx_mem_isolate(note == R3000ACPU_NOTIFY_CACHE_ISOLATED); break; - default: + case R3000ACPU_NOTIFY_BEFORE_SAVE: + break; + case R3000ACPU_NOTIFY_AFTER_LOAD: + ari64_reset(); break; } } diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index c70ed67fc..23474f1bf 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -48,7 +48,6 @@ GPUmakeSnapshot GPU_makeSnapshot; GPUfreeze GPU_freeze; GPUgetScreenPic GPU_getScreenPic; GPUshowScreenPic GPU_showScreenPic; -GPUclearDynarec GPU_clearDynarec; GPUvBlank GPU_vBlank; CDRinit CDR_init; @@ -200,7 +199,6 @@ void CALLBACK GPU__makeSnapshot(void) {} void CALLBACK GPU__keypressed(int key) {} long CALLBACK GPU__getScreenPic(unsigned char *pMem) { return -1; } long CALLBACK GPU__showScreenPic(unsigned char *pMem) { return -1; } -void CALLBACK GPU__clearDynarec(void (CALLBACK *callback)(void)) {} void CALLBACK GPU__vBlank(int val) {} #define LoadGpuSym1(dest, name) \ @@ -240,7 +238,6 @@ static int LoadGPUplugin(const char *GPUdll) { LoadGpuSym1(freeze, "GPUfreeze"); LoadGpuSym0(getScreenPic, "GPUgetScreenPic"); LoadGpuSym0(showScreenPic, "GPUshowScreenPic"); - LoadGpuSym0(clearDynarec, "GPUclearDynarec"); LoadGpuSym0(vBlank, "GPUvBlank"); LoadGpuSym0(configure, "GPUconfigure"); LoadGpuSym0(test, "GPUtest"); @@ -696,10 +693,6 @@ static int LoadSIO1plugin(const char *SIO1dll) { #endif -void CALLBACK clearDynarec(void) { - psxCpu->Reset(); -} - int LoadPlugins() { int ret; char Plugin[MAXPATHLEN * 2]; diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index c997c611c..ced14cff7 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -75,7 +75,6 @@ typedef struct { typedef long (CALLBACK* GPUfreeze)(uint32_t, GPUFreeze_t *); typedef long (CALLBACK* GPUgetScreenPic)(unsigned char *); typedef long (CALLBACK* GPUshowScreenPic)(unsigned char *); -typedef void (CALLBACK* GPUclearDynarec)(void (CALLBACK *callback)(void)); typedef void (CALLBACK* GPUvBlank)(int, int); // GPU function pointers @@ -100,7 +99,6 @@ extern GPUmakeSnapshot GPU_makeSnapshot; extern GPUfreeze GPU_freeze; extern GPUgetScreenPic GPU_getScreenPic; extern GPUshowScreenPic GPU_showScreenPic; -extern GPUclearDynarec GPU_clearDynarec; extern GPUvBlank GPU_vBlank; // CD-ROM Functions @@ -383,8 +381,6 @@ extern SIO1registerCallback SIO1_registerCallback; #endif -void CALLBACK clearDynarec(void); - void SetIsoFile(const char *filename); const char *GetIsoFile(void); boolean UsingIso(void); diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 9ece259cf..036b062f6 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -1078,7 +1078,6 @@ static int intInit() { } static void intReset() { - memset(&ICache, 0xff, sizeof(ICache)); } static inline void execI_(u8 **memRLUT, psxRegisters *regs_) { @@ -1115,11 +1114,15 @@ void intExecuteBlock() { static void intClear(u32 Addr, u32 Size) { } -void intNotify (int note, void *data) { - /* Armored Core won't boot without this */ - if (note == R3000ACPU_NOTIFY_CACHE_ISOLATED) - { +static void intNotify(enum R3000Anote note, void *data) { + switch (note) { + case R3000ACPU_NOTIFY_CACHE_ISOLATED: // Armored Core? + case R3000ACPU_NOTIFY_AFTER_LOAD: memset(&ICache, 0xff, sizeof(ICache)); + break; + case R3000ACPU_NOTIFY_CACHE_UNISOLATED: + case R3000ACPU_NOTIFY_BEFORE_SAVE: + break; } } diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 6973afe8b..be0e33683 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -29,9 +29,11 @@ extern "C" { #include "psxcounters.h" #include "psxbios.h" -enum { +enum R3000Anote { R3000ACPU_NOTIFY_CACHE_ISOLATED = 0, R3000ACPU_NOTIFY_CACHE_UNISOLATED = 1, + R3000ACPU_NOTIFY_BEFORE_SAVE, + R3000ACPU_NOTIFY_AFTER_LOAD, }; typedef struct { @@ -40,7 +42,7 @@ typedef struct { void (*Execute)(); /* executes up to a break */ void (*ExecuteBlock)(); /* executes up to a jump */ void (*Clear)(u32 Addr, u32 Size); - void (*Notify)(int note, void *data); + void (*Notify)(enum R3000Anote note, void *data); void (*ApplyConfig)(); void (*Shutdown)(); } R3000Acpu; @@ -204,8 +206,6 @@ extern psxRegisters psxRegs; extern u32 event_cycles[PSXINT_COUNT]; extern u32 next_interupt; -void new_dyna_before_save(void); -void new_dyna_after_save(void); void new_dyna_freeze(void *f, int mode); #define new_dyna_set_event_abs(e, abs) { \ From 6d75addfea5544b926d3924b1bd1e1d8ce9ffdb4 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 10 Jul 2023 01:50:00 +0300 Subject: [PATCH 259/597] drc: handle regs-not-in-psxRegs case better for lightrec --- Makefile | 2 +- libpcsxcore/misc.c | 34 +++++---- libpcsxcore/new_dynarec/emu_if.c | 86 +---------------------- libpcsxcore/new_dynarec/emu_if.h | 6 -- libpcsxcore/new_dynarec/events.c | 91 +++++++++++++++++++++++++ libpcsxcore/new_dynarec/events.h | 3 + libpcsxcore/new_dynarec/linkage_arm.S | 4 +- libpcsxcore/new_dynarec/linkage_arm64.S | 4 +- libpcsxcore/new_dynarec/new_dynarec.c | 7 +- libpcsxcore/plugins.h | 5 ++ libpcsxcore/psxinterpreter.c | 6 +- libpcsxcore/r3000a.c | 21 +++--- libpcsxcore/r3000a.h | 10 +-- 13 files changed, 153 insertions(+), 126 deletions(-) create mode 100644 libpcsxcore/new_dynarec/events.c create mode 100644 libpcsxcore/new_dynarec/events.h diff --git a/Makefile b/Makefile index 585480d1d..ded26893d 100644 --- a/Makefile +++ b/Makefile @@ -71,7 +71,7 @@ OBJS += libpcsxcore/new_dynarec/pcsxmem.o else CFLAGS += -DDRC_DISABLE endif -OBJS += libpcsxcore/new_dynarec/emu_if.o +OBJS += libpcsxcore/new_dynarec/emu_if.o libpcsxcore/new_dynarec/events.o libpcsxcore/new_dynarec/new_dynarec.o: libpcsxcore/new_dynarec/pcsxmem_inline.c ifdef DRC_DBG libpcsxcore/new_dynarec/emu_if.o: CFLAGS += -D_FILE_OFFSET_BITS=64 diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index d52a931c8..57d3959f6 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -165,6 +165,18 @@ static void fake_bios_gpu_setup(void) GPU_writeData(gpu_data_def[i]); } +static void SetBootRegs(u32 pc, u32 gp, u32 sp) +{ + //printf("%s %08x %08x %08x\n", __func__, pc, gp, sp); + psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); + + psxRegs.pc = pc; + psxRegs.GPR.n.gp = gp; + psxRegs.GPR.n.sp = sp ? sp : 0x801fff00; + + psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); +} + int LoadCdrom() { EXE_HEADER tmpHead; struct iso_directory_record *dir; @@ -178,6 +190,7 @@ int LoadCdrom() { if (!Config.HLE && !Config.SlowBoot) { // skip BIOS logos + psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); psxRegs.pc = psxRegs.GPR.n.ra; return 0; } @@ -230,10 +243,7 @@ int LoadCdrom() { memcpy(&tmpHead, buf + 12, sizeof(EXE_HEADER)); - psxRegs.pc = SWAP32(tmpHead.pc0); - psxRegs.GPR.n.gp = SWAP32(tmpHead.gp0); - psxRegs.GPR.n.sp = SWAP32(tmpHead.s_addr); - if (psxRegs.GPR.n.sp == 0) psxRegs.GPR.n.sp = 0x801fff00; + SetBootRegs(SWAP32(tmpHead.pc0), SWAP32(tmpHead.gp0), SWAP32(tmpHead.s_addr)); tmpHead.t_size = SWAP32(tmpHead.t_size); tmpHead.t_addr = SWAP32(tmpHead.t_addr); @@ -488,11 +498,8 @@ int Load(const char *ExePath) { fread_to_ram(mem, section_size, 1, tmpFile); psxCpu->Clear(section_address, section_size / 4); } - psxRegs.pc = SWAP32(tmpHead.pc0); - psxRegs.GPR.n.gp = SWAP32(tmpHead.gp0); - psxRegs.GPR.n.sp = SWAP32(tmpHead.s_addr); - if (psxRegs.GPR.n.sp == 0) - psxRegs.GPR.n.sp = 0x801fff00; + SetBootRegs(SWAP32(tmpHead.pc0), SWAP32(tmpHead.gp0), + SWAP32(tmpHead.s_addr)); retval = 0; break; case CPE_EXE: @@ -601,6 +608,7 @@ static const u32 SaveVersion = 0x8b410006; int SaveState(const char *file) { void *f; GPUFreeze_t *gpufP; + SPUFreezeHdr_t *spufH; SPUFreeze_t *spufP; int Size; unsigned char *pMem; @@ -637,10 +645,10 @@ int SaveState(const char *file) { free(gpufP); // spu - spufP = (SPUFreeze_t *) malloc(16); - SPU_freeze(2, spufP, psxRegs.cycle); - Size = spufP->Size; SaveFuncs.write(f, &Size, 4); - free(spufP); + spufH = malloc(sizeof(*spufH)); + SPU_freeze(2, (SPUFreeze_t *)spufH, psxRegs.cycle); + Size = spufH->Size; SaveFuncs.write(f, &Size, 4); + free(spufH); spufP = (SPUFreeze_t *) malloc(Size); SPU_freeze(1, spufP, psxRegs.cycle); SaveFuncs.write(f, spufP, Size); diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index c1dab56dd..50819e4b9 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -9,12 +9,10 @@ #include "emu_if.h" #include "pcsxmem.h" +#include "events.h" #include "../psxhle.h" #include "../psxinterpreter.h" #include "../r3000a.h" -#include "../cdrom.h" -#include "../psxdma.h" -#include "../mdec.h" #include "../gte_arm.h" #include "../gte_neon.h" #define FLAGLESS @@ -25,91 +23,11 @@ //#define evprintf printf #define evprintf(...) -char invalid_code[0x100000]; -u32 event_cycles[PSXINT_COUNT]; - -static void schedule_timeslice(void) -{ - u32 i, c = psxRegs.cycle; - u32 irqs = psxRegs.interrupt; - s32 min, dif; - - min = PSXCLK; - for (i = 0; irqs != 0; i++, irqs >>= 1) { - if (!(irqs & 1)) - continue; - dif = event_cycles[i] - c; - //evprintf(" ev %d\n", dif); - if (0 < dif && dif < min) - min = dif; - } - next_interupt = c + min; -} - -static void unusedInterrupt() -{ -} - -typedef void (irq_func)(); - -static irq_func * const irq_funcs[] = { - [PSXINT_SIO] = sioInterrupt, - [PSXINT_CDR] = cdrInterrupt, - [PSXINT_CDREAD] = cdrPlayReadInterrupt, - [PSXINT_GPUDMA] = gpuInterrupt, - [PSXINT_MDECOUTDMA] = mdec1Interrupt, - [PSXINT_SPUDMA] = spuInterrupt, - [PSXINT_MDECINDMA] = mdec0Interrupt, - [PSXINT_GPUOTCDMA] = gpuotcInterrupt, - [PSXINT_CDRDMA] = cdrDmaInterrupt, - [PSXINT_CDRLID] = cdrLidSeekInterrupt, - [PSXINT_CDRPLAY_OLD] = unusedInterrupt, - [PSXINT_SPU_UPDATE] = spuUpdate, - [PSXINT_RCNT] = psxRcntUpdate, -}; - -/* local dupe of psxBranchTest, using event_cycles */ -static void irq_test(void) -{ - u32 cycle = psxRegs.cycle; - u32 irq, irq_bits; - - for (irq = 0, irq_bits = psxRegs.interrupt; irq_bits != 0; irq++, irq_bits >>= 1) { - if (!(irq_bits & 1)) - continue; - if ((s32)(cycle - event_cycles[irq]) >= 0) { - // note: irq_funcs() also modify psxRegs.interrupt - psxRegs.interrupt &= ~(1u << irq); - irq_funcs[irq](); - } - } - - if ((psxHu32(0x1070) & psxHu32(0x1074)) && (psxRegs.CP0.n.Status & 0x401) == 0x401) { - psxException(0x400, 0); - pending_exception = 1; - } -} - -void gen_interupt() -{ - evprintf(" +ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle, - next_interupt, next_interupt - psxRegs.cycle); - - irq_test(); - //psxBranchTest(); - //pending_exception = 1; - - schedule_timeslice(); - - evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle, - next_interupt, next_interupt - psxRegs.cycle); -} - void pcsx_mtc0(u32 reg, u32 val) { evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); MTC0(&psxRegs, reg, val); - gen_interupt(); + gen_interupt(&psxRegs.CP0); if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.Status & 0x0300) // possible sw irq pending_exception = 1; } diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index b49b84bad..3f4aba6fe 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -1,12 +1,6 @@ #include "new_dynarec.h" #include "../r3000a.h" -extern char invalid_code[0x100000]; - -/* weird stuff */ -#define EAX 0 -#define ECX 1 - extern int dynarec_local[]; /* same as psxRegs.GPR.n.* */ diff --git a/libpcsxcore/new_dynarec/events.c b/libpcsxcore/new_dynarec/events.c new file mode 100644 index 000000000..5d981f8d5 --- /dev/null +++ b/libpcsxcore/new_dynarec/events.c @@ -0,0 +1,91 @@ +#include +#include "../r3000a.h" +#include "../cdrom.h" +#include "../psxdma.h" +#include "../mdec.h" +#include "events.h" + +extern int pending_exception; + +//#define evprintf printf +#define evprintf(...) + +u32 event_cycles[PSXINT_COUNT]; + +void schedule_timeslice(void) +{ + u32 i, c = psxRegs.cycle; + u32 irqs = psxRegs.interrupt; + s32 min, dif; + + min = PSXCLK; + for (i = 0; irqs != 0; i++, irqs >>= 1) { + if (!(irqs & 1)) + continue; + dif = event_cycles[i] - c; + //evprintf(" ev %d\n", dif); + if (0 < dif && dif < min) + min = dif; + } + next_interupt = c + min; +} + +static void unusedInterrupt() +{ +} + +typedef void (irq_func)(); + +static irq_func * const irq_funcs[] = { + [PSXINT_SIO] = sioInterrupt, + [PSXINT_CDR] = cdrInterrupt, + [PSXINT_CDREAD] = cdrPlayReadInterrupt, + [PSXINT_GPUDMA] = gpuInterrupt, + [PSXINT_MDECOUTDMA] = mdec1Interrupt, + [PSXINT_SPUDMA] = spuInterrupt, + [PSXINT_MDECINDMA] = mdec0Interrupt, + [PSXINT_GPUOTCDMA] = gpuotcInterrupt, + [PSXINT_CDRDMA] = cdrDmaInterrupt, + [PSXINT_CDRLID] = cdrLidSeekInterrupt, + [PSXINT_CDRPLAY_OLD] = unusedInterrupt, + [PSXINT_SPU_UPDATE] = spuUpdate, + [PSXINT_RCNT] = psxRcntUpdate, +}; + +/* local dupe of psxBranchTest, using event_cycles */ +static void irq_test(psxCP0Regs *cp0) +{ + u32 cycle = psxRegs.cycle; + u32 irq, irq_bits; + + for (irq = 0, irq_bits = psxRegs.interrupt; irq_bits != 0; irq++, irq_bits >>= 1) { + if (!(irq_bits & 1)) + continue; + if ((s32)(cycle - event_cycles[irq]) >= 0) { + // note: irq_funcs() also modify psxRegs.interrupt + psxRegs.interrupt &= ~(1u << irq); + irq_funcs[irq](); + } + } + + if ((psxHu32(0x1070) & psxHu32(0x1074)) && (cp0->n.Status & 0x401) == 0x401) { + psxException(0x400, 0, cp0); + pending_exception = 1; + } +} + +void gen_interupt(psxCP0Regs *cp0) +{ + evprintf(" +ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle, + next_interupt, next_interupt - psxRegs.cycle); + + irq_test(cp0); + //pending_exception = 1; + + schedule_timeslice(); + + evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle, + next_interupt, next_interupt - psxRegs.cycle); +} + + diff --git a/libpcsxcore/new_dynarec/events.h b/libpcsxcore/new_dynarec/events.h new file mode 100644 index 000000000..919855cc1 --- /dev/null +++ b/libpcsxcore/new_dynarec/events.h @@ -0,0 +1,3 @@ +union psxCP0Regs_; +void schedule_timeslice(void); +void gen_interupt(union psxCP0Regs_ *cp0); diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 7e0db2d7a..f97b2d059 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -242,6 +242,7 @@ FUNCTION(cc_interrupt): @@ str r10, [fp, #LO_reg_cop0+36] /* Count - not on PSX */ mov r10, lr + add r0, fp, #(LO_psxRegs + 34*4) /* CP0 */ bl gen_interupt mov lr, r10 ldr r10, [fp, #LO_cycle] @@ -301,7 +302,8 @@ call_psxException: ldr r3, [fp, #LO_last_count] str r2, [fp, #LO_pcaddr] add r10, r3, r10 - str r10, [fp, #LO_cycle] /* PCSX cycles */ + str r10, [fp, #LO_cycle] /* PCSX cycles */ + add r2, fp, #(LO_psxRegs + 34*4) /* CP0 */ bl psxException /* note: psxException might do recursive recompiler call from it's HLE code, diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index d073ded4c..72d13f3d8 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -100,6 +100,7 @@ FUNCTION(cc_interrupt): # str rCC, [rFP, #LO_reg_cop0+36] /* Count */ mov x21, lr 1: + add x0, rFP, #(LO_psxRegs + 34*4) /* CP0 */ bl gen_interupt mov lr, x21 ldr rCC, [rFP, #LO_cycle] @@ -159,7 +160,8 @@ call_psxException: ldr w3, [rFP, #LO_last_count] str w2, [rFP, #LO_pcaddr] add rCC, w3, rCC - str rCC, [rFP, #LO_cycle] /* PCSX cycles */ + str rCC, [rFP, #LO_cycle] /* PCSX cycles */ + add x2, rFP, #(LO_psxRegs + 34*4) /* CP0 */ bl psxException /* note: psxException might do recursive recompiler call from it's HLE code, diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 2673a6ea9..17f7af95d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -242,6 +242,7 @@ static struct decoded_insn } dops[MAXBLOCK]; static u_char *out; + static char invalid_code[0x100000]; static struct ht_entry hash_table[65536]; static struct block_info *blocks[PAGE_COUNT]; static struct jump_info *jumps[PAGE_COUNT]; @@ -3605,7 +3606,7 @@ static void do_cop1stub(int n) //else {printf("fp exception in delay slot\n");} wb_dirtys(i_regs->regmap_entry,i_regs->wasdirty); if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_movimm(start+(i-ds)*4,EAX); // Get PC + emit_movimm(start+(i-ds)*4,0); // Get PC emit_addimm(HOST_CCREG,ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... emit_far_jump(ds?fp_exception_ds:fp_exception); } @@ -5071,8 +5072,8 @@ static void do_ccstub(int n) if(stubs[n].c!=-1) { // Save PC as return address - emit_movimm(stubs[n].c,EAX); - emit_writeword(EAX,&pcaddr); + emit_movimm(stubs[n].c,0); + emit_writeword(0,&pcaddr); } else { diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index ced14cff7..ac89d1ff4 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -176,6 +176,11 @@ typedef void (CALLBACK* SPUregisterScheduleCb)(void (CALLBACK *callback)(unsigne typedef long (CALLBACK* SPUconfigure)(void); typedef long (CALLBACK* SPUtest)(void); typedef void (CALLBACK* SPUabout)(void); +typedef struct { + unsigned char PluginName[8]; + uint32_t PluginVersion; + uint32_t Size; +} SPUFreezeHdr_t; typedef struct { unsigned char PluginName[8]; uint32_t PluginVersion; diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 036b062f6..f59934ae3 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -750,19 +750,19 @@ OP(psxMTLO) { _rLo_ = _rRs_; } // Lo = Rs *********************************************************/ OP(psxBREAK) { regs_->pc -= 4; - psxException(0x24, branch); + psxException(0x24, branch, ®s_->CP0); } OP(psxSYSCALL) { regs_->pc -= 4; - psxException(0x20, branch); + psxException(0x20, branch, ®s_->CP0); } static inline void psxTestSWInts(psxRegisters *regs_) { if (regs_->CP0.n.Cause & regs_->CP0.n.Status & 0x0300 && regs_->CP0.n.Status & 0x1) { regs_->CP0.n.Cause &= ~0x7c; - psxException(regs_->CP0.n.Cause, branch); + psxException(regs_->CP0.n.Cause, branch, ®s_->CP0); } } diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index ddf838866..8b7cfbf77 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -84,7 +84,8 @@ void psxShutdown() { psxMemShutdown(); } -void psxException(u32 code, u32 bd) { +// cp0 is passed separately for lightrec to be less messy +void psxException(u32 code, u32 bd, psxCP0Regs *cp0) { psxRegs.code = PSXMu32(psxRegs.pc); if (!Config.HLE && ((((psxRegs.code) >> 24) & 0xfe) == 0x4a)) { @@ -92,30 +93,30 @@ void psxException(u32 code, u32 bd) { // BIOS does not allow to return to GTE instructions // (just skips it, supposedly because it's scheduled already) // so we execute it here - psxCP2[psxRegs.code & 0x3f](&psxRegs.CP2); + psxCP2Regs *cp2 = (void *)(cp0 + 1); + psxCP2[psxRegs.code & 0x3f](cp2); } // Set the Cause - psxRegs.CP0.n.Cause = (psxRegs.CP0.n.Cause & 0x300) | code; + cp0->n.Cause = (cp0->n.Cause & 0x300) | code; // Set the EPC & PC if (bd) { #ifdef PSXCPU_LOG PSXCPU_LOG("bd set!!!\n"); #endif - psxRegs.CP0.n.Cause |= 0x80000000; - psxRegs.CP0.n.EPC = (psxRegs.pc - 4); + cp0->n.Cause |= 0x80000000; + cp0->n.EPC = (psxRegs.pc - 4); } else - psxRegs.CP0.n.EPC = (psxRegs.pc); + cp0->n.EPC = (psxRegs.pc); - if (psxRegs.CP0.n.Status & 0x400000) + if (cp0->n.Status & 0x400000) psxRegs.pc = 0xbfc00180; else psxRegs.pc = 0x80000080; // Set the Status - psxRegs.CP0.n.Status = (psxRegs.CP0.n.Status &~0x3f) | - ((psxRegs.CP0.n.Status & 0xf) << 2); + cp0->n.Status = (cp0->n.Status & ~0x3f) | ((cp0->n.Status & 0x0f) << 2); if (Config.HLE) psxBiosException(); } @@ -199,7 +200,7 @@ void psxBranchTest() { PSXCPU_LOG("Interrupt: %x %x\n", psxHu32(0x1070), psxHu32(0x1074)); #endif // SysPrintf("Interrupt (%x): %x %x\n", psxRegs.cycle, psxHu32(0x1070), psxHu32(0x1074)); - psxException(0x400, 0); + psxException(0x400, 0, &psxRegs.CP0); } } } diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index be0e33683..229b14a14 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -76,7 +76,7 @@ typedef union { PAIR p[34]; } psxGPRRegs; -typedef union { +typedef union psxCP0Regs_ { struct { u32 Index, Random, EntryLo0, EntryLo1, Context, PageMask, Wired, Reserved0, @@ -176,6 +176,8 @@ typedef struct psxCP2Regs { } psxCP2Regs; typedef struct { + // note: some cores like lightrec don't keep their data here, + // so use R3000ACPU_NOTIFY_BEFORE_SAVE to sync psxGPRRegs GPR; /* General Purpose Registers */ psxCP0Regs CP0; /* Coprocessor0 Registers */ union { @@ -185,8 +187,8 @@ typedef struct { }; psxCP2Regs CP2; }; - u32 pc; /* Program counter */ - u32 code; /* The instruction */ + u32 pc; /* Program counter */ + u32 code; /* The instruction */ u32 cycle; u32 interrupt; struct { u32 sCycle, cycle; } intCycle[32]; @@ -224,7 +226,7 @@ void new_dyna_freeze(void *f, int mode); int psxInit(); void psxReset(); void psxShutdown(); -void psxException(u32 code, u32 bd); +void psxException(u32 code, u32 bd, psxCP0Regs *cp0); void psxBranchTest(); void psxExecuteBios(); void psxJumpTest(); From 7b75929b9415646b25d9211975556f5466024a94 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 11 Jul 2023 01:19:58 +0300 Subject: [PATCH 260/597] some openbios support slowboot isn't working yet, doesn't work at all with lightrec, openbios-fastboot doesn't work with ari64 --- frontend/libretro.c | 6 ------ frontend/menu.c | 7 ++++--- libpcsxcore/misc.c | 26 ++++++++++++++++---------- libpcsxcore/misc.h | 3 +++ libpcsxcore/psxcommon.c | 1 + libpcsxcore/psxcommon.h | 4 +--- libpcsxcore/r3000a.c | 15 +++++++++++++-- 7 files changed, 38 insertions(+), 24 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 84baeda83..5acaba8b8 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2708,12 +2708,6 @@ void retro_run(void) { rebootemu = 0; SysReset(); - if (!Config.HLE && !Config.SlowBoot) - { - // skip BIOS logos - psxRegs.pc = psxRegs.GPR.n.ra; - } - return; } print_internal_fps(); diff --git a/frontend/menu.c b/frontend/menu.c index 9ca87c60b..f78d3d26c 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -2011,9 +2011,6 @@ static int reset_game(void) ClosePlugins(); OpenPlugins(); SysReset(); - if (CheckCdrom() != -1) { - LoadCdrom(); - } return 0; } @@ -2042,13 +2039,17 @@ static int reload_plugins(const char *cdimg) static int run_bios(void) { + boolean origSlowBoot = Config.SlowBoot; + if (bios_sel == 0) return -1; ready_to_go = 0; if (reload_plugins(NULL) != 0) return -1; + Config.SlowBoot = 1; SysReset(); + Config.SlowBoot = origSlowBoot; ready_to_go = 1; return 0; diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 57d3959f6..022ad6d9a 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -22,6 +22,7 @@ */ #include +#include #include "misc.h" #include "cdrom.h" #include "mdec.h" @@ -154,7 +155,7 @@ static const unsigned int gpu_data_def[] = { 0x02000000, 0x00000000, 0x01ff03ff, }; -static void fake_bios_gpu_setup(void) +void BiosLikeGPUSetup() { int i; @@ -177,6 +178,14 @@ static void SetBootRegs(u32 pc, u32 gp, u32 sp) psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); } +void BiosBootBypass() { + assert(psxRegs.pc == 0x80030000); + + // skip BIOS logos and region check + psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); + psxRegs.pc = psxRegs.GPR.n.ra; +} + int LoadCdrom() { EXE_HEADER tmpHead; struct iso_directory_record *dir; @@ -184,15 +193,10 @@ int LoadCdrom() { u8 mdir[4096]; char exename[256]; - // not the best place to do it, but since BIOS boot logo killer - // is just below, do it here - fake_bios_gpu_setup(); - - if (!Config.HLE && !Config.SlowBoot) { - // skip BIOS logos - psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); - psxRegs.pc = psxRegs.GPR.n.ra; - return 0; + if (!Config.HLE) { + if (!BiosBooted) return 0; // custom BIOS + if (psxRegs.pc != 0x80030000) return 0; // BiosBootBypass'ed + if (Config.SlowBoot) return 0; } time[0] = itob(0); time[1] = itob(2); time[2] = itob(0x10); @@ -210,6 +214,7 @@ int LoadCdrom() { if (GetCdromFile(mdir, time, "SYSTEM.CNF;1") == -1) { // if SYSTEM.CNF is missing, start an existing PSX.EXE if (GetCdromFile(mdir, time, "PSX.EXE;1") == -1) return -1; + strcpy(exename, "PSX.EXE;1"); READTRACK(); } @@ -243,6 +248,7 @@ int LoadCdrom() { memcpy(&tmpHead, buf + 12, sizeof(EXE_HEADER)); + SysPrintf("manual booting '%s'\n", exename); SetBootRegs(SWAP32(tmpHead.pc0), SWAP32(tmpHead.gp0), SWAP32(tmpHead.s_addr)); tmpHead.t_size = SWAP32(tmpHead.t_size); diff --git a/libpcsxcore/misc.h b/libpcsxcore/misc.h index ae3fc81f4..da99885a4 100644 --- a/libpcsxcore/misc.h +++ b/libpcsxcore/misc.h @@ -56,6 +56,9 @@ typedef struct { extern char CdromId[10]; extern char CdromLabel[33]; +void BiosLikeGPUSetup(); +void BiosBootBypass(); + int LoadCdrom(); int LoadCdromFile(const char *filename, EXE_HEADER *head); int CheckCdrom(); diff --git a/libpcsxcore/psxcommon.c b/libpcsxcore/psxcommon.c index 8313304c6..fcc3debf9 100644 --- a/libpcsxcore/psxcommon.c +++ b/libpcsxcore/psxcommon.c @@ -26,6 +26,7 @@ PcsxConfig Config; boolean NetOpened = FALSE; +boolean BiosBooted = FALSE; int Log = 0; FILE *emuLog = NULL; diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index b621326cd..67a0ae08e 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -150,13 +150,11 @@ typedef struct { boolean cdr_read_timing; boolean gpu_slow_list_walking; } hacks; -#ifdef _WIN32 - char Lang[256]; -#endif } PcsxConfig; extern PcsxConfig Config; extern boolean NetOpened; +extern boolean BiosBooted; struct PcsxSaveFuncs { void *(*open)(const char *name, const char *mode); diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 8b7cfbf77..53a5ebac5 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -67,8 +67,14 @@ void psxReset() { psxHwReset(); psxBiosInit(); - if (!Config.HLE) + BiosLikeGPUSetup(); // a bit of a hack but whatever + + BiosBooted = FALSE; + if (!Config.HLE) { psxExecuteBios(); + if (psxRegs.pc == 0x80030000 && !Config.SlowBoot) + BiosBootBypass(); + } #ifdef EMU_LOG EMU_LOG("*BIOS END*\n"); @@ -237,7 +243,12 @@ void psxJumpTest() { } void psxExecuteBios() { - while (psxRegs.pc != 0x80030000) + int i; + for (i = 0; psxRegs.pc != 0x80030000 && i < 5000000; i++) psxCpu->ExecuteBlock(); + if (psxRegs.pc == 0x80030000) + BiosBooted = TRUE; + else + SysPrintf("BIOS boot timeout - custom BIOS?\n"); } From 509ebde45113e74215286c592d0f33aefe82243f Mon Sep 17 00:00:00 2001 From: anzz1 Date: Wed, 12 Jul 2023 19:32:07 +0300 Subject: [PATCH 261/597] cdrom: rename mode constants to defineds --- libpcsxcore/cdrom.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 098b77d0b..921e3daa7 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -928,7 +928,7 @@ void cdrInterrupt(void) { SetPlaySeekRead(cdr.StatP, 0); cdr.LocL[0] = LOCL_INVALID; cdr.Muted = FALSE; - cdr.Mode = 0x20; /* This fixes This is Football 2, Pooh's Party lockups */ + cdr.Mode = MODE_SIZE_2340; /* This fixes This is Football 2, Pooh's Party lockups */ second_resp_time = not_ready ? 70000 : 4100000; start_rotating = 1; break; @@ -1159,7 +1159,7 @@ void cdrInterrupt(void) { cdr.LocL[0] = LOCL_INVALID; cdr.SubqForwardSectors = 1; - cycles = (cdr.Mode & 0x80) ? cdReadTime : cdReadTime * 2; + cycles = (cdr.Mode & MODE_SPEED) ? cdReadTime : cdReadTime * 2; cycles += seekTime; if (Config.hacks.cdr_read_timing) cycles = cdrAlignTimingHack(cycles); @@ -1543,7 +1543,7 @@ void cdrWrite3(unsigned char rt) { CDR_LOG("cdrom: FifoOffset(2) %d/%d\n", cdr.FifoOffset, cdr.FifoSize); } else if (rt & 0x80) { - switch (cdr.Mode & 0x30) { + switch (cdr.Mode & (MODE_SIZE_2328|MODE_SIZE_2340)) { case MODE_SIZE_2328: case 0x00: cdr.FifoOffset = 12; @@ -1687,7 +1687,7 @@ int cdrFreeze(void *f, int Mode) { getCdInfo(); cdr.FifoOffset = tmp < DATA_SIZE ? tmp : DATA_SIZE; - cdr.FifoSize = (cdr.Mode & 0x20) ? 2340 : 2048 + 12; + cdr.FifoSize = (cdr.Mode & MODE_SIZE_2340) ? 2340 : 2048 + 12; if (cdr.SubqForwardSectors > SUBQ_FORWARD_SECTORS) cdr.SubqForwardSectors = SUBQ_FORWARD_SECTORS; @@ -1706,7 +1706,7 @@ int cdrFreeze(void *f, int Mode) { if (!Config.Cdda) CDR_play(cdr.SetSectorPlay); if (psxRegs.interrupt & (1 << PSXINT_CDRPLAY_OLD)) - CDRPLAYREAD_INT((cdr.Mode & 0x80) ? (cdReadTime / 2) : cdReadTime, 1); + CDRPLAYREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime, 1); } if ((cdr.freeze_ver & 0xffffff00) != 0x63647200) { From 10b9bf1ee1e2e6b6fa7f31f34cd27e385365ea26 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 12 Jul 2023 02:21:05 +0300 Subject: [PATCH 262/597] cdrom: adjust logging --- libpcsxcore/cdrom.c | 55 +++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 921e3daa7..5f4ce60e5 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -36,7 +36,8 @@ #if 0 #define CDR_LOG_I SysPrintf #else -#define CDR_LOG_I log_unhandled +#define CDR_LOG_I(fmt, ...) \ + log_unhandled("%u cdrom: " fmt, psxRegs.cycle, ##__VA_ARGS__) #endif #if 0 #define CDR_LOG_IO SysPrintf @@ -298,8 +299,8 @@ static void setIrq(int log_cmd) if (cdr.Stat) { int i; - SysPrintf("%u cdrom: CDR IRQ=%d cmd %02x stat %02x: ", - psxRegs.cycle, !!(cdr.Stat & cdr.Reg2), log_cmd, cdr.Stat); + CDR_LOG_I("CDR IRQ=%d cmd %02x stat %02x: ", + !!(cdr.Stat & cdr.Reg2), log_cmd, cdr.Stat); for (i = 0; i < cdr.ResultC; i++) SysPrintf("%02x ", cdr.Result[i]); SysPrintf("\n"); @@ -311,7 +312,7 @@ static void setIrq(int log_cmd) // (yes it's slow, but you probably don't want to modify it) void cdrLidSeekInterrupt(void) { - CDR_LOG_I("%u %s cdr.DriveState=%d\n", psxRegs.cycle, __func__, cdr.DriveState); + CDR_LOG_I("%s cdr.DriveState=%d\n", __func__, cdr.DriveState); switch (cdr.DriveState) { default: @@ -688,15 +689,16 @@ void cdrInterrupt(void) { int i; if (cdr.Stat) { - CDR_LOG_I("%u cdrom: cmd %02x with irqstat %x\n", - psxRegs.cycle, cdr.CmdInProgress, cdr.Stat); + CDR_LOG_I("cmd %02x with irqstat %x\n", + cdr.CmdInProgress, cdr.Stat); return; } if (cdr.Irq1Pending) { // hand out the "newest" sector, according to nocash cdrUpdateTransferBuf(CDR_getBuffer()); - CDR_LOG_I("cdrom: %x:%02x:%02x loaded on ack\n", - cdr.Transfer[0], cdr.Transfer[1], cdr.Transfer[2]); + CDR_LOG_I("%x:%02x:%02x loaded on ack, cmd=%02x res=%02x\n", + cdr.Transfer[0], cdr.Transfer[1], cdr.Transfer[2], + cdr.CmdInProgress, cdr.Irq1Pending); SetResultSize(1); cdr.Result[0] = cdr.Irq1Pending; cdr.Stat = (cdr.Irq1Pending & STATUS_ERROR) ? DiskError : DataReady; @@ -1175,7 +1177,7 @@ void cdrInterrupt(void) { // FALLTHROUGH set_error: - CDR_LOG_I("cdrom: cmd %02x error %02x\n", Cmd, error); + CDR_LOG_I("cmd %02x error %02x\n", Cmd, error); SetResultSize(2); cdr.Result[0] = cdr.StatP | STATUS_ERROR; cdr.Result[1] = not_ready ? ERROR_NOTREADY : error; @@ -1194,8 +1196,7 @@ void cdrInterrupt(void) { } else if (cdr.Cmd && cdr.Cmd != (Cmd & 0xff)) { cdr.CmdInProgress = cdr.Cmd; - CDR_LOG_I("%u cdrom: cmd %02x came before %02x finished\n", - psxRegs.cycle, cdr.Cmd, Cmd); + CDR_LOG_I("cmd %02x came before %02x finished\n", cdr.Cmd, Cmd); } setIrq(Cmd); @@ -1263,7 +1264,7 @@ static void cdrAttenuate(s16 *buf, int samples, int stereo) static void cdrReadInterruptSetResult(unsigned char result) { if (cdr.Stat) { - CDR_LOG_I("cdrom: %d:%02d:%02d irq miss, cmd=%02x irqstat=%02x\n", + CDR_LOG_I("%d:%02d:%02d irq miss, cmd=%02x irqstat=%02x\n", cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], cdr.CmdInProgress, cdr.Stat); cdr.Irq1Pending = result; @@ -1283,7 +1284,7 @@ static void cdrUpdateTransferBuf(const u8 *buf) CheckPPFCache(cdr.Transfer, cdr.Prev[0], cdr.Prev[1], cdr.Prev[2]); CDR_LOG("cdr.Transfer %x:%x:%x\n", cdr.Transfer[0], cdr.Transfer[1], cdr.Transfer[2]); if (cdr.FifoOffset < 2048 + 12) - CDR_LOG("cdrom: FifoOffset(1) %d/%d\n", cdr.FifoOffset, cdr.FifoSize); + CDR_LOG("FifoOffset(1) %d/%d\n", cdr.FifoOffset, cdr.FifoSize); } static void cdrReadInterrupt(void) @@ -1420,7 +1421,7 @@ void cdrWrite1(unsigned char rt) { } #ifdef CDR_LOG_CMD_IRQ - SysPrintf("%u cdrom: CD1 write: %x (%s)", psxRegs.cycle, rt, CmdName[rt]); + CDR_LOG_I("CD1 write: %x (%s)", rt, CmdName[rt]); if (cdr.ParamC) { int i; SysPrintf(" Param[%d] = {", cdr.ParamC); @@ -1441,8 +1442,8 @@ void cdrWrite1(unsigned char rt) { CDR_INT(5000); } else { - CDR_LOG_I("%u cdrom: cmd while busy: %02x, prev %02x, busy %02x\n", - psxRegs.cycle, rt, cdr.Cmd, cdr.CmdInProgress); + CDR_LOG_I("cmd while busy: %02x, prev %02x, busy %02x\n", + rt, cdr.Cmd, cdr.CmdInProgress); if (cdr.CmdInProgress < 0x100) // no pending 2nd response cdr.CmdInProgress = rt; } @@ -1456,7 +1457,7 @@ unsigned char cdrRead2(void) { if (cdr.FifoOffset < cdr.FifoSize) ret = cdr.Transfer[cdr.FifoOffset++]; else - CDR_LOG_I("cdrom: read empty fifo (%d)\n", cdr.FifoSize); + CDR_LOG_I("read empty fifo (%d)\n", cdr.FifoSize); CDR_LOG_IO("cdr r2.dat: %02x\n", ret); return ret; @@ -1505,15 +1506,14 @@ void cdrWrite3(unsigned char rt) { if (cdr.Stat & rt) { u32 nextCycle = psxRegs.intCycle[PSXINT_CDR].sCycle + psxRegs.intCycle[PSXINT_CDR].cycle; + int pending = psxRegs.interrupt & (1 << PSXINT_CDR); #ifdef CDR_LOG_CMD_IRQ - SysPrintf("%u cdrom: ack %02x (w=%02x p=%d,%d)\n", - psxRegs.cycle, cdr.Stat & rt, rt, - !!(psxRegs.interrupt & (1 << PSXINT_CDR)), - nextCycle - psxRegs.cycle); + CDR_LOG_I("ack %02x (w=%02x p=%d,%x,%x,%d)\n", cdr.Stat & rt, rt, + !!pending, cdr.CmdInProgress, + cdr.Irq1Pending, nextCycle - psxRegs.cycle); #endif // note: Croc, Shadow Tower (more) vs Discworld Noir (<993) - if (!(psxRegs.interrupt & (1 << PSXINT_CDR)) && - (cdr.CmdInProgress || cdr.Irq1Pending)) + if (!pending && (cdr.CmdInProgress || cdr.Irq1Pending)) { s32 c = 2048 - (psxRegs.cycle - nextCycle); c = MAX_VALUE(c, 512); @@ -1566,7 +1566,14 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { int size; u8 *ptr; - CDR_LOG("psxDma3() Log: *** DMA 3 *** %x addr = %x size = %x\n", chcr, madr, bcr); +#if 0 + CDR_LOG_I("psxDma3() Log: *** DMA 3 *** %x addr = %x size = %x", chcr, madr, bcr); + if (cdr.FifoOffset == 0) { + ptr = cdr.Transfer; + SysPrintf(" %02x:%02x:%02x", ptr[0], ptr[1], ptr[2]); + } + SysPrintf("\n"); +#endif switch (chcr & 0x71000000) { case 0x11000000: From 69dc4e5ffc0eed8fb3914b46fde5d781e2e40dd3 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 12 Jul 2023 02:21:26 +0300 Subject: [PATCH 263/597] cdrom: moar hacks libretro/pcsx_rearmed#714 --- libpcsxcore/cdrom.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 5f4ce60e5..c092f2ca6 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1318,7 +1318,7 @@ static void cdrReadInterrupt(void) } memcpy(cdr.LocL, buf, 8); - if (!cdr.Irq1Pending) + if (!cdr.Stat && !cdr.Irq1Pending) cdrUpdateTransferBuf(buf); if ((!cdr.Muted) && (cdr.Mode & MODE_STRSND) && (!Config.Xa) && (cdr.FirstSector != -1)) { // CD-XA @@ -1515,8 +1515,11 @@ void cdrWrite3(unsigned char rt) { // note: Croc, Shadow Tower (more) vs Discworld Noir (<993) if (!pending && (cdr.CmdInProgress || cdr.Irq1Pending)) { - s32 c = 2048 - (psxRegs.cycle - nextCycle); - c = MAX_VALUE(c, 512); + s32 c = 2048; + if (cdr.CmdInProgress) { + c = 2048 - (psxRegs.cycle - nextCycle); + c = MAX_VALUE(c, 512); + } CDR_INT(c); } } From 54c4acacc4ec243579c3a9272c2ae055f04275a7 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 13 Jul 2023 01:24:18 +0300 Subject: [PATCH 264/597] more timing hacks --- libpcsxcore/database.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index ae6fe3680..a2e1822df 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -59,6 +59,9 @@ cycle_multiplier_overrides[] = * changing memcard settings is enough to break/unbreak it */ { "SLPS02528", 190 }, { "SLPS02636", 190 }, + /* Brave Fencer Musashi - cd sectors arrive too fast */ + { "SLUS00726", 170 }, + { "SLPS01490", 170 }, #if defined(DRC_DISABLE) || defined(LIGHTREC) /* new_dynarec has a hack for this game */ /* Parasite Eve II - internal timer checks */ { "SLUS01042", 125 }, From 45a1e89f9b213953b102c6217c97a2f8828f1a57 Mon Sep 17 00:00:00 2001 From: Ash Logan Date: Sun, 29 May 2022 22:53:34 +1000 Subject: [PATCH 265/597] libpcsxcore: Use the same type for next_interrupt everywhere u32 is long unsigned int on some platforms --- libpcsxcore/new_dynarec/emu_if.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 50819e4b9..f71358729 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -326,7 +326,7 @@ R3000Acpu psxRec = { unsigned int address; int pending_exception, stop; -unsigned int next_interupt; +u32 next_interupt; int new_dynarec_did_compile; int cycle_multiplier_old; int new_dynarec_hacks_pergame; From 826ba56b077130624f0b4c00f57ed7362449e5aa Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sat, 11 Feb 2023 13:26:15 +0000 Subject: [PATCH 266/597] psxmem: Use POSIX mmap functions as default hooks Simplify the code by using the POSIX mmap functions as the default psxMapHook / psxUnmapHook functions. Signed-off-by: Paul Cercueil --- libpcsxcore/psxmem.c | 48 ++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index fb48f77bf..54219ae05 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -41,34 +41,36 @@ #define MAP_ANONYMOUS MAP_ANON #endif +static void * psxMapDefault(unsigned long addr, size_t size, + int is_fixed, enum psxMapTag tag) +{ + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + + return mmap((void *)(uintptr_t)addr, size, + PROT_READ | PROT_WRITE, flags, -1, 0); +} + +static void psxUnmapDefault(void *ptr, size_t size, enum psxMapTag tag) +{ + munmap(ptr, size); +} + void *(*psxMapHook)(unsigned long addr, size_t size, int is_fixed, - enum psxMapTag tag); -void (*psxUnmapHook)(void *ptr, size_t size, enum psxMapTag tag); + enum psxMapTag tag) = psxMapDefault; +void (*psxUnmapHook)(void *ptr, size_t size, + enum psxMapTag tag) = psxUnmapDefault; void *psxMap(unsigned long addr, size_t size, int is_fixed, enum psxMapTag tag) { - int flags = MAP_PRIVATE | MAP_ANONYMOUS; int try_ = 0; unsigned long mask; - void *req, *ret; + void *ret; retry: - if (psxMapHook != NULL) { - ret = psxMapHook(addr, size, 0, tag); - if (ret == NULL) - return MAP_FAILED; - } - else { - /* avoid MAP_FIXED, it overrides existing mappings.. */ - /* if (is_fixed) - flags |= MAP_FIXED; */ - - req = (void *)(uintptr_t)addr; - ret = mmap(req, size, PROT_READ | PROT_WRITE, flags, -1, 0); - if (ret == MAP_FAILED) - return ret; - } + ret = psxMapHook(addr, size, 0, tag); + if (ret == NULL) + return MAP_FAILED; if (addr != 0 && ret != (void *)(uintptr_t)addr) { SysMessage("psxMap: warning: wanted to map @%08x, got %p\n", @@ -97,13 +99,7 @@ void *psxMap(unsigned long addr, size_t size, int is_fixed, void psxUnmap(void *ptr, size_t size, enum psxMapTag tag) { - if (psxUnmapHook != NULL) { - psxUnmapHook(ptr, size, tag); - return; - } - - if (ptr) - munmap(ptr, size); + psxUnmapHook(ptr, size, tag); } s8 *psxM = NULL; // Kernel & User Memory (2 Meg) From ed7afeef12576ad228f138bd7d7acd7f390dd25a Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 13 Jul 2023 00:03:03 +0300 Subject: [PATCH 267/597] drc: fix wrong unmap size --- libpcsxcore/new_dynarec/pcsxmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index 8057b7ed5..694b8d089 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -502,6 +502,6 @@ void new_dyna_pcsx_mem_reset(void) void new_dyna_pcsx_mem_shutdown(void) { - psxUnmap(mem_readtab, 0x200000 * 4, MAP_TAG_LUTS); + psxUnmap(mem_readtab, 0x200000 * sizeof(mem_readtab[0]), MAP_TAG_LUTS); mem_writetab = mem_readtab = NULL; } From b0d96051c9f087c22922966c651384c3ee84eee0 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 14 Jul 2023 01:15:00 +0300 Subject: [PATCH 268/597] gpu_neon: fix texels vs pixels confusion --- plugins/gpu_neon/psx_gpu/psx_gpu.h | 4 ++-- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu_simd.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 1500eea80..bdd9caec3 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -69,7 +69,7 @@ typedef struct u16 y; } edge_data_struct; -// 64 bytes total +// 64 (72) bytes total typedef struct { // 16 bytes @@ -93,7 +93,7 @@ typedef struct vec_8x16u pixels; }; - // 8 bytes + // 8 (16) bytes u32 draw_mask_bits; u16 *fb_ptr; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 5ef5f20f5..6a88beb78 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -1643,4 +1643,4 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, #endif /* PCSX */ -// vim:shiftwidth=2:expandtab +// vim:ts=2:shiftwidth=2:expandtab diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c index 486897f71..00392549d 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c @@ -2822,7 +2822,7 @@ void shade_blocks_unshaded_untextured_direct(psx_gpu_struct *psx_gpu) u32 num_blocks = psx_gpu->num_blocks; vec_8x16u pixels; - gvld1q_u16(pixels, block->texels.e); + gvld1q_u16(pixels, block->pixels.e); shade_blocks_load_msb_mask_direct(); while(num_blocks) From db57cbb83e573e115ca60f3855b28c264cf2159a Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 14 Jul 2023 01:20:43 +0300 Subject: [PATCH 269/597] handle more GP0 status bits should help openbios --- libpcsxcore/gpu.h | 13 +++++++------ libpcsxcore/misc.c | 2 ++ libpcsxcore/psxcounters.c | 12 +++++++++--- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/libpcsxcore/gpu.h b/libpcsxcore/gpu.h index c924b67b2..21384e582 100644 --- a/libpcsxcore/gpu.h +++ b/libpcsxcore/gpu.h @@ -24,18 +24,19 @@ #ifndef __GPU_H__ #define __GPU_H__ -#define PSXGPU_LCF (1<<31) -#define PSXGPU_nBUSY (1<<26) -#define PSXGPU_ILACE (1<<22) -#define PSXGPU_DHEIGHT (1<<19) +#define PSXGPU_LCF (1u<<31) +#define PSXGPU_nBUSY (1u<<26) +#define PSXGPU_ILACE (1u<<22) +#define PSXGPU_DHEIGHT (1u<<19) +#define PSXGPU_FIELD (1u<<13) // both must be set for interlace to work -#define PSXGPU_ILACE_BITS (PSXGPU_ILACE | PSXGPU_DHEIGHT) +#define PSXGPU_ILACE_BITS (PSXGPU_ILACE | PSXGPU_DHEIGHT | PSXGPU_FIELD) #define HW_GPU_STATUS psxHu32ref(0x1814) // TODO: handle com too -#define PSXGPU_TIMING_BITS (PSXGPU_LCF | PSXGPU_nBUSY) +#define PSXGPU_TIMING_BITS (PSXGPU_LCF | PSXGPU_nBUSY | PSXGPU_FIELD) #define gpuSyncPluginSR() { \ HW_GPU_STATUS &= SWAP32(PSXGPU_TIMING_BITS); \ diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 022ad6d9a..678f2dbfa 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -164,6 +164,8 @@ void BiosLikeGPUSetup() for (i = 0; i < sizeof(gpu_data_def) / sizeof(gpu_data_def[0]); i++) GPU_writeData(gpu_data_def[i]); + + HW_GPU_STATUS |= SWAP32(PSXGPU_nBUSY); } static void SetBootRegs(u32 pc, u32 gp, u32 sp) diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index e19b78195..18bd6a4e3 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -361,14 +361,20 @@ void psxRcntUpdate() // Update lace. if( hSyncCount >= HSyncTotal[Config.PsxType] ) { + u32 status, field = 0; rcnts[3].cycleStart += Config.PsxType ? PSXCLK / 50 : PSXCLK / 60; hSyncCount = 0; frame_counter++; gpuSyncPluginSR(); - if ((HW_GPU_STATUS & SWAP32(PSXGPU_ILACE_BITS)) == SWAP32(PSXGPU_ILACE_BITS)) - HW_GPU_STATUS |= SWAP32(frame_counter << 31); - GPU_vBlank(0, SWAP32(HW_GPU_STATUS) >> 31); + status = SWAP32(HW_GPU_STATUS) | PSXGPU_FIELD; + if ((status & PSXGPU_ILACE_BITS) == PSXGPU_ILACE_BITS) { + field = frame_counter & 1; + status |= field << 31; + status ^= field << 13; + } + HW_GPU_STATUS = SWAP32(status); + GPU_vBlank(0, field); } scheduleRcntBase(); From 6777e331551ed0cc90f4aa809bbd0fee23fcf1a6 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 14 Jul 2023 01:41:37 +0300 Subject: [PATCH 270/597] remove "slow" text from Enhanced Resolution Perhaps it made sense back in 2011, but now there is plenty of hardware that can do it without any slowdowns. --- frontend/libretro_core_options.h | 6 +++--- frontend/menu.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 7d070f9aa..e7c10c1fe 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -455,8 +455,8 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, { "pcsx_rearmed_neon_enhancement_enable", - "(GPU) Enhanced Resolution (Slow)", - "Enhanced Resolution (Slow)", + "(GPU) Enhanced Resolution", + "Enhanced Resolution", "Render games that do not already run in high resolution video modes (480i, 512i) at twice the native internal resolution. Improves the fidelity of 3D models at the expense of increased performance requirements. 2D elements are generally unaffected by this setting.", NULL, "gpu_neon", @@ -471,7 +471,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { "pcsx_rearmed_neon_enhancement_no_main", "(GPU) Enhanced Resolution Speed Hack", "Enhanced Resolution Speed Hack", - "Improves performance when 'Enhanced Resolution (Slow)' is enabled, but reduces compatibility and may cause rendering errors.", + "Improves performance when 'Enhanced Resolution' is enabled, but reduces compatibility and may cause rendering errors.", NULL, "gpu_neon", { diff --git a/frontend/menu.c b/frontend/menu.c index f78d3d26c..f1c786270 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1355,7 +1355,7 @@ static const char *men_gpu_interlace[] = { "Off", "On", "Auto", NULL }; static menu_entry e_menu_plugin_gpu_neon[] = { mee_enum ("Enable interlace mode", 0, pl_rearmed_cbs.gpu_neon.allow_interlace, men_gpu_interlace), - mee_onoff_h ("Enhanced resolution (slow)", 0, pl_rearmed_cbs.gpu_neon.enhancement_enable, 1, h_gpu_neon_enhanced), + mee_onoff_h ("Enhanced resolution", 0, pl_rearmed_cbs.gpu_neon.enhancement_enable, 1, h_gpu_neon_enhanced), mee_onoff_h ("Enhanced res. speed hack", 0, pl_rearmed_cbs.gpu_neon.enhancement_no_main, 1, h_gpu_neon_enhanced_hack), mee_end, }; From da65071fd7ceac663bb951b13da2563d7b16431d Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 14 Jul 2023 02:05:30 +0300 Subject: [PATCH 271/597] adjust bios handling again changed my mind about BiosBooted --- frontend/libretro.c | 2 +- frontend/main.c | 2 +- frontend/main.h | 2 +- libpcsxcore/misc.c | 7 ++++--- libpcsxcore/new_dynarec/emu_if.c | 13 ++++++++++++- libpcsxcore/psxbios.c | 4 ++-- libpcsxcore/psxcommon.c | 1 - libpcsxcore/psxcommon.h | 1 - libpcsxcore/psxinterpreter.c | 2 +- libpcsxcore/r3000a.c | 14 +++++++------- libpcsxcore/r3000a.h | 9 +++++++-- 11 files changed, 36 insertions(+), 21 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 5acaba8b8..4477183af 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -501,7 +501,7 @@ struct rearmed_cbs pl_rearmed_cbs = { void pl_frame_limit(void) { /* called once per frame, make psxCpu->Execute() above return */ - stop = 1; + stop++; } void pl_timing_prepare(int is_pal) diff --git a/frontend/main.c b/frontend/main.c index 7d140f823..11bc4ed47 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -487,7 +487,7 @@ int emu_core_init(void) void emu_core_ask_exit(void) { - stop = 1; + stop++; g_emu_want_quit = 1; } diff --git a/frontend/main.h b/frontend/main.h index 7ce9e5d63..22053bbc8 100644 --- a/frontend/main.h +++ b/frontend/main.h @@ -91,7 +91,7 @@ static inline void emu_set_action(enum sched_action action_) if (action_ == SACTION_NONE) emu_action_old = 0; else if (action_ != emu_action_old) - stop = 1; + stop++; emu_action = action_; } diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 678f2dbfa..223266bad 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -196,9 +196,10 @@ int LoadCdrom() { char exename[256]; if (!Config.HLE) { - if (!BiosBooted) return 0; // custom BIOS - if (psxRegs.pc != 0x80030000) return 0; // BiosBootBypass'ed - if (Config.SlowBoot) return 0; + if (psxRegs.pc != 0x80030000) // BiosBootBypass'ed or custom BIOS? + return 0; + if (Config.SlowBoot) + return 0; } time[0] = itob(0); time[1] = itob(2); time[2] = itob(0x10); diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index f71358729..e89b635fb 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -265,6 +265,17 @@ static void ari64_execute() } } +static void ari64_execute_block(enum blockExecCaller caller) +{ + if (caller == EXEC_CALLER_BOOT) + stop++; + + ari64_execute_until(); + + if (caller == EXEC_CALLER_BOOT) + stop--; +} + static void ari64_clear(u32 addr, u32 size) { size *= 4; /* PCSX uses DMA units (words) */ @@ -315,7 +326,7 @@ R3000Acpu psxRec = { ari64_init, ari64_reset, ari64_execute, - ari64_execute_until, + ari64_execute_block, ari64_clear, ari64_notify, ari64_apply_config, diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index f57f5129c..d31465cf2 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -285,7 +285,7 @@ static inline void softCall(u32 pc) { hleSoftCall = TRUE; - while (pc0 != 0x80001000) psxCpu->ExecuteBlock(); + while (pc0 != 0x80001000) psxCpu->ExecuteBlock(EXEC_CALLER_HLE); hleSoftCall = FALSE; } @@ -297,7 +297,7 @@ static inline void softCall2(u32 pc) { hleSoftCall = TRUE; - while (pc0 != 0x80001000) psxCpu->ExecuteBlock(); + while (pc0 != 0x80001000) psxCpu->ExecuteBlock(EXEC_CALLER_HLE); ra = sra; hleSoftCall = FALSE; diff --git a/libpcsxcore/psxcommon.c b/libpcsxcore/psxcommon.c index fcc3debf9..8313304c6 100644 --- a/libpcsxcore/psxcommon.c +++ b/libpcsxcore/psxcommon.c @@ -26,7 +26,6 @@ PcsxConfig Config; boolean NetOpened = FALSE; -boolean BiosBooted = FALSE; int Log = 0; FILE *emuLog = NULL; diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index 67a0ae08e..92e69eeef 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -154,7 +154,6 @@ typedef struct { extern PcsxConfig Config; extern boolean NetOpened; -extern boolean BiosBooted; struct PcsxSaveFuncs { void *(*open)(const char *name, const char *mode); diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index f59934ae3..8e0aafed7 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -1102,7 +1102,7 @@ static void intExecute() { execI_(memRLUT, regs_); } -void intExecuteBlock() { +void intExecuteBlock(enum blockExecCaller caller) { psxRegisters *regs_ = &psxRegs; u8 **memRLUT = psxMemRLUT; diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 53a5ebac5..fbccdea4f 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -69,7 +69,6 @@ void psxReset() { BiosLikeGPUSetup(); // a bit of a hack but whatever - BiosBooted = FALSE; if (!Config.HLE) { psxExecuteBios(); if (psxRegs.pc == 0x80030000 && !Config.SlowBoot) @@ -244,11 +243,12 @@ void psxJumpTest() { void psxExecuteBios() { int i; - for (i = 0; psxRegs.pc != 0x80030000 && i < 5000000; i++) - psxCpu->ExecuteBlock(); - if (psxRegs.pc == 0x80030000) - BiosBooted = TRUE; - else - SysPrintf("BIOS boot timeout - custom BIOS?\n"); + for (i = 0; i < 5000000; i++) { + psxCpu->ExecuteBlock(EXEC_CALLER_BOOT); + if ((psxRegs.pc & 0xff800000) == 0x80000000) + break; + } + if (psxRegs.pc != 0x80030000) + SysPrintf("non-standard BIOS detected (%d, %08x)\n", i, psxRegs.pc); } diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 229b14a14..8d53a181e 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -36,11 +36,16 @@ enum R3000Anote { R3000ACPU_NOTIFY_AFTER_LOAD, }; +enum blockExecCaller { + EXEC_CALLER_BOOT, + EXEC_CALLER_HLE, +}; + typedef struct { int (*Init)(); void (*Reset)(); - void (*Execute)(); /* executes up to a break */ - void (*ExecuteBlock)(); /* executes up to a jump */ + void (*Execute)(); + void (*ExecuteBlock)(enum blockExecCaller caller); /* executes up to a jump */ void (*Clear)(u32 Addr, u32 Size); void (*Notify)(enum R3000Anote note, void *data); void (*ApplyConfig)(); From 378428ac9a7248ce2a538f9798abf37549ed91c0 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 14 Jul 2023 22:50:13 +0300 Subject: [PATCH 272/597] libretro: look for openbios Lowest priority since it still has lower compatibility and lightrec has problems with it. libretro/pcsx_rearmed#708 --- frontend/libretro.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 4477183af..6b5256c00 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2805,23 +2805,36 @@ static bool try_use_bios(const char *path) static bool find_any_bios(const char *dirpath, char *path, size_t path_size) { + static const char *substrings[] = { "scph", "psx", "openbios" }; DIR *dir; struct dirent *ent; bool ret = false; + size_t i; dir = opendir(dirpath); if (dir == NULL) return false; - while ((ent = readdir(dir))) + for (i = 0; sizeof(substrings) / sizeof(substrings[0]); i++) { - if ((strncasecmp(ent->d_name, "scph", 4) != 0) && (strncasecmp(ent->d_name, "psx", 3) != 0)) - continue; - - snprintf(path, path_size, "%s%c%s", dirpath, SLASH, ent->d_name); - ret = try_use_bios(path); - if (ret) - break; + const char *substr = substrings[i]; + size_t len = strlen(substr); + rewinddir(dir); + while ((ent = readdir(dir))) + { + if ((strncasecmp(ent->d_name, substr, len) != 0)) + continue; + if (strstr(ent->d_name, "unirom")) + continue; + + snprintf(path, path_size, "%s%c%s", dirpath, SLASH, ent->d_name); + ret = try_use_bios(path); + if (ret) + { + closedir(dir); + return ret; + } + } } closedir(dir); return ret; @@ -3136,3 +3149,5 @@ void SysDLog(const char *fmt, ...) if (log_cb) log_cb(RETRO_LOG_DEBUG, "%s", msg); } + +// vim:sw=3:ts=3:expandtab From 0e8e5df915c9372abd55dc0b96894e1b5fce5f3e Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 14 Jul 2023 23:09:58 +0300 Subject: [PATCH 273/597] attempt to fix build --- libpcsxcore/psxinterpreter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/psxinterpreter.h b/libpcsxcore/psxinterpreter.h index b3652c0e1..fe289b061 100644 --- a/libpcsxcore/psxinterpreter.h +++ b/libpcsxcore/psxinterpreter.h @@ -9,6 +9,6 @@ void gteNULL(struct psxCP2Regs *regs); extern void (*psxCP2[64])(struct psxCP2Regs *regs); // called by lightrec -void intExecuteBlock(); +void intExecuteBlock(enum blockExecCaller caller); #endif // __PSXINTERPRETER_H__ From fdcde643ebe281e5cb71b2a4280a8db6103a4c12 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 15 Jul 2023 01:19:28 +0300 Subject: [PATCH 274/597] sync with libretro --- frontend/libretro.c | 4 +++- frontend/libretro_core_options.h | 10 ++++------ frontend/libretro_core_options_intl.h | 10 ++++++---- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 6b5256c00..7e5ac0cb6 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -1881,7 +1881,7 @@ static void update_variables(bool in_flight) #ifdef GPU_NEON var.value = NULL; - var.key = "pcsx_rearmed_neon_interlace_enable"; + var.key = "pcsx_rearmed_neon_interlace_enable_v2"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { @@ -1889,6 +1889,8 @@ static void update_variables(bool in_flight) pl_rearmed_cbs.gpu_neon.allow_interlace = 0; else if (strcmp(var.value, "enabled") == 0) pl_rearmed_cbs.gpu_neon.allow_interlace = 1; + else // auto + pl_rearmed_cbs.gpu_neon.allow_interlace = 2; } var.value = NULL; diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index e7c10c1fe..38169c45a 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -203,9 +203,6 @@ struct retro_core_option_v2_definition option_defs_us[] = { "PSX CPU Clock Speed", NULL, "Overclock or under-clock the PSX CPU. Try adjusting this if the game is too slow, too fast or hangs." -#if defined(LIGHTREC) - " Currently doesn't work with Lightrec dynarec." -#endif #if defined(HAVE_PRE_ARMV7) && !defined(_3DS) " Default is 50." #else @@ -440,18 +437,19 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, #ifdef GPU_NEON { - "pcsx_rearmed_neon_interlace_enable", + "pcsx_rearmed_neon_interlace_enable_v2", "(GPU) Show Interlaced Video", "Show Interlaced Video", - "When enabled, games that run in high resolution video modes (480i, 512i) will produced interlaced video output. While this displays correctly on CRT televisions, it will produce artifacts on modern displays. When disabled, all video is output in progressive format.", + "When enabled, games that run in high resolution video modes (480i, 512i) will produced interlaced video output. While this displays correctly on CRT televisions, it will produce artifacts on modern displays. When disabled, all video is output in progressive format. Note: there are games that will glitch is this is off.", NULL, "gpu_neon", { + { "auto", NULL }, { "disabled", NULL }, { "enabled", NULL }, { NULL, NULL }, }, - "disabled", + "auto", }, { "pcsx_rearmed_neon_enhancement_enable", diff --git a/frontend/libretro_core_options_intl.h b/frontend/libretro_core_options_intl.h index 5b04d601a..d66582221 100644 --- a/frontend/libretro_core_options_intl.h +++ b/frontend/libretro_core_options_intl.h @@ -221,11 +221,13 @@ struct retro_core_option_v2_definition option_defs_tr[] = { "pcsx_rearmed_psxclock", "PSX CPU Saat Hızı", NULL, + "Overclock or under-clock the PSX CPU. Try adjusting this if the game is too slow, too fast or hangs." #if defined(HAVE_PRE_ARMV7) && !defined(_3DS) - "Overclock or underclock the PSX clock. Default is 50", + " Default is 50." #else - "Overclock or underclock the PSX clock. Default is 57", + " Default is 57." #endif + , NULL, NULL, { @@ -237,7 +239,7 @@ struct retro_core_option_v2_definition option_defs_tr[] = { #ifdef GPU_NEON { - "pcsx_rearmed_neon_interlace_enable", + "pcsx_rearmed_neon_interlace_enable_v2", "Interlacing Mode'u etkinleştir", NULL, "Sahte tarama çizgileri efektini etkinleştirir.", @@ -250,7 +252,7 @@ struct retro_core_option_v2_definition option_defs_tr[] = { }, { "pcsx_rearmed_neon_enhancement_enable", - "Geliştirilmiş Çözünürlük (Yavaş)", + "Geliştirilmiş Çözünürlük", NULL, "Düşük performans pahasına çift çözünürlükte işler.", NULL, From 4c20e9554b257c1bbd91b865d193ac8954638d3f Mon Sep 17 00:00:00 2001 From: Bobby Smith <33353403+bslenul@users.noreply.github.com> Date: Mon, 17 Jul 2023 18:28:25 +0200 Subject: [PATCH 275/597] Fix crash when BIOS isn't found --- frontend/libretro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 7e5ac0cb6..21189d6fa 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2817,7 +2817,7 @@ static bool find_any_bios(const char *dirpath, char *path, size_t path_size) if (dir == NULL) return false; - for (i = 0; sizeof(substrings) / sizeof(substrings[0]); i++) + for (i = 0; i < (sizeof(substrings) / sizeof(substrings[0])); i++) { const char *substr = substrings[i]; size_t len = strlen(substr); From 19fae7fc20691f11264aa18602eb1f6f483356fc Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 16 Jul 2023 00:11:04 +0300 Subject: [PATCH 276/597] psxinterpreter: assorted fixes --- libpcsxcore/psxinterpreter.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 8e0aafed7..76a6a3d46 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -391,6 +391,7 @@ static void psxDelayTest(int reg, u32 bpc) { case 3: delayWrite(reg, bpc); return; } + // DS psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); branch = 0; @@ -537,7 +538,7 @@ static void doBranch(u32 tar) { addCycle(); // check for load delay - tmp = psxRegs.code >> 26; + tmp = code >> 26; switch (tmp) { case 0x10: // COP0 switch (_Rs_) { @@ -570,7 +571,7 @@ static void doBranch(u32 tar) { break; } - psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); + psxBSC[code >> 26](&psxRegs, code); branch = 0; psxRegs.pc = branchPC; @@ -686,8 +687,15 @@ OP(psxMULTU_stall) { * Register branch logic * * Format: OP rs, offset * *********************************************************/ -#define RepZBranchi32(op) if(_i32(_rRs_) op 0) doBranch(_BranchTarget_); -#define RepZBranchLinki32(op) { _SetLink(31); if(_i32(_rRs_) op 0) { doBranch(_BranchTarget_); } } +#define RepZBranchi32(op) \ + if(_i32(_rRs_) op 0) \ + doBranch(_BranchTarget_); +#define RepZBranchLinki32(op) { \ + s32 temp = _i32(_rRs_); \ + _SetLink(31); \ + if(temp op 0) \ + doBranch(_BranchTarget_); \ +} OP(psxBGEZ) { RepZBranchi32(>=) } // Branch if Rs >= 0 OP(psxBGEZAL) { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link @@ -1020,12 +1028,15 @@ static void psxBASIC(struct psxCP2Regs *cp2regs) { } OP(psxREGIMM) { - switch (_Rt_) { - case 0x00: psxBLTZ(regs_, code); break; - case 0x01: psxBGEZ(regs_, code); break; + u32 rt = _Rt_; + switch (rt) { case 0x10: psxBLTZAL(regs_, code); break; case 0x11: psxBGEZAL(regs_, code); break; - default: psxNULL_(); break; + default: + if (rt & 1) + psxBGEZ(regs_, code); + else + psxBLTZ(regs_, code); } } From 905b7c2512eebeedfcb1af03ffb30ad752e94928 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 17 Jul 2023 01:18:05 +0300 Subject: [PATCH 277/597] psxinterpreter: tons of new exceptions does anything need any of this though? --- include/compiler_features.h | 13 ++ libpcsxcore/psxinterpreter.c | 308 +++++++++++++++++++++++++---------- libpcsxcore/psxinterpreter.h | 3 + libpcsxcore/r3000a.c | 16 +- libpcsxcore/r3000a.h | 13 ++ 5 files changed, 257 insertions(+), 96 deletions(-) create mode 100644 include/compiler_features.h diff --git a/include/compiler_features.h b/include/compiler_features.h new file mode 100644 index 000000000..0c1119df4 --- /dev/null +++ b/include/compiler_features.h @@ -0,0 +1,13 @@ + +#ifdef __GNUC__ +# define likely(x) __builtin_expect((x),1) +# define unlikely(x) __builtin_expect((x),0) +#else +# define likely(x) (x) +# define unlikely(x) (x) +#endif + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 76a6a3d46..bd732b9f1 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -28,21 +28,17 @@ #include "psxinterpreter.h" #include #include -//#include "debug.h" -#define ProcessDebug() +#include "../include/compiler_features.h" + +// these may cause issues: because of poor timing we may step +// on instructions that real hardware would never reach +#define DO_EXCEPTION_RESERVEDI +#define DO_EXCEPTION_ADDR_ERR static int branch = 0; static int branch2 = 0; static u32 branchPC; -// These macros are used to assemble the repassembler functions - -#ifdef PSXCPU_LOG -#define debugI() PSXCPU_LOG("%s\n", disR3000AF(psxRegs.code, psxRegs.pc)); -#else -#define debugI() -#endif - #ifdef __i386__ #define INT_ATTR __attribute__((regparm(2))) #else @@ -56,12 +52,29 @@ static u32 branchPC; static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code); static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code); -static u32 INT_ATTR fetchNoCache(u8 **memRLUT, u32 pc) +// get an opcode without triggering exceptions or affecting cache +u32 intFakeFetch(u32 pc) +{ + u8 *base = psxMemRLUT[pc >> 16]; + u32 *code; + if (unlikely(base == INVALID_PTR)) + return 0; // nop + code = (u32 *)(base + (pc & 0xfffc)); + return SWAP32(*code); + +} + +static u32 INT_ATTR fetchNoCache(psxRegisters *regs, u8 **memRLUT, u32 pc) { u8 *base = memRLUT[pc >> 16]; - if (base == INVALID_PTR) - return 0; - u32 *code = (u32 *)(base + (pc & 0xfffc)); + u32 *code; + if (unlikely(base == INVALID_PTR)) { + SysPrintf("game crash @%08x, ra=%08x\n", pc, regs->GPR.n.ra); + regs->pc = pc; + psxException(R3000E_IBE << 2, branch, ®s->CP0); + return 0; // execute as nop + } + code = (u32 *)(base + (pc & 0xfffc)); return SWAP32(*code); } @@ -74,7 +87,7 @@ static struct cache_entry { u32 data[4]; } ICache[256]; -static u32 INT_ATTR fetchICache(u8 **memRLUT, u32 pc) +static u32 INT_ATTR fetchICache(psxRegisters *regs, u8 **memRLUT, u32 pc) { // cached? if (pc < 0xa0000000) @@ -86,8 +99,12 @@ static u32 INT_ATTR fetchICache(u8 **memRLUT, u32 pc) { const u8 *base = memRLUT[pc >> 16]; const u32 *code; - if (base == INVALID_PTR) - return 0; + if (unlikely(base == INVALID_PTR)) { + SysPrintf("game crash @%08x, ra=%08x\n", pc, regs->GPR.n.ra); + regs->pc = pc; + psxException(R3000E_IBE << 2, branch, ®s->CP0); + return 0; // execute as nop + } code = (u32 *)(base + (pc & 0xfff0)); entry->tag = pc; @@ -103,10 +120,10 @@ static u32 INT_ATTR fetchICache(u8 **memRLUT, u32 pc) return entry->data[(pc & 0x0f) >> 2]; } - return fetchNoCache(memRLUT, pc); + return fetchNoCache(regs, memRLUT, pc); } -static u32 (INT_ATTR *fetch)(u8 **memRLUT, u32 pc) = fetchNoCache; +static u32 (INT_ATTR *fetch)(psxRegisters *regs_, u8 **memRLUT, u32 pc) = fetchNoCache; // Make the timing events trigger faster as we are currently assuming everything // takes one cycle, which is not the case on real hardware. @@ -269,17 +286,9 @@ static int psxTestLoadDelay(int reg, u32 tmp) { } break; - case 0x01: // REGIMM - switch (_tRt_) { - case 0x00: case 0x01: - case 0x10: case 0x11: // BLTZ/BGEZ... - // Xenogears - lbu v0 / beq v0 - // - no load delay (fixes battle loading) - break; - - if (_tRs_ == reg) return 2; - break; - } + case 0x01: // REGIMM - BLTZ/BGEZ... + // Xenogears - lbu v0 / beq v0 + // - no load delay (fixes battle loading) break; // J would be just a break; @@ -287,22 +296,12 @@ static int psxTestLoadDelay(int reg, u32 tmp) { if (31 == reg) return 3; break; - case 0x04: case 0x05: // BEQ/BNE - // Xenogears - lbu v0 / beq v0 - // - no load delay (fixes battle loading) - break; - - if (_tRs_ == reg || _tRt_ == reg) return 2; - break; - case 0x06: case 0x07: // BLEZ/BGTZ + case 0x04: case 0x05: // BEQ/BNE // Xenogears - lbu v0 / beq v0 // - no load delay (fixes battle loading) break; - if (_tRs_ == reg) return 2; - break; - case 0x08: case 0x09: case 0x0a: case 0x0b: case 0x0c: case 0x0d: case 0x0e: // ADDI/ADDIU... if (_tRt_ == reg && _tRs_ == reg) return 1; else @@ -380,7 +379,7 @@ static int psxTestLoadDelay(int reg, u32 tmp) { } static void psxDelayTest(int reg, u32 bpc) { - u32 tmp = fetch(psxMemRLUT, bpc); + u32 tmp = intFakeFetch(bpc); branch = 1; switch (psxTestLoadDelay(reg, tmp)) { @@ -403,7 +402,7 @@ static void psxDelayTest(int reg, u32 bpc) { static u32 psxBranchNoDelay(psxRegisters *regs_) { u32 temp, code; - regs_->code = code = fetch(psxMemRLUT, regs_->pc); + regs_->code = code = intFakeFetch(regs_->pc); switch (_Op_) { case 0x00: // SPECIAL switch (_Funct_) { @@ -482,8 +481,6 @@ static int psxDelayBranchTest(u32 tar1) { if (tar2 == (u32)-1) return 0; - debugI(); - /* * Branch in delay slot: * - execute 1 instruction at tar1 @@ -495,7 +492,6 @@ static int psxDelayBranchTest(u32 tar1) { if (tmp1 == (u32)-1) { return psxDelayBranchExec(tar2); } - debugI(); addCycle(); /* @@ -508,7 +504,6 @@ static int psxDelayBranchTest(u32 tar1) { if (tmp2 == (u32)-1) { return psxDelayBranchExec(tmp1); } - debugI(); addCycle(); /* @@ -521,7 +516,7 @@ static int psxDelayBranchTest(u32 tar1) { } static void doBranch(u32 tar) { - u32 tmp, code; + u32 tmp, code, pc; branch2 = branch = 1; branchPC = tar; @@ -530,11 +525,10 @@ static void doBranch(u32 tar) { if (psxDelayBranchTest(tar)) return; - psxRegs.code = code = fetch(psxMemRLUT, psxRegs.pc); - - debugI(); - + pc = psxRegs.pc; psxRegs.pc += 4; + psxRegs.code = code = fetch(&psxRegs, psxMemRLUT, pc); + addCycle(); // check for load delay @@ -579,11 +573,55 @@ static void doBranch(u32 tar) { psxBranchTest(); } +static void doBranchReg(u32 tar) { +#ifdef DO_EXCEPTION_ADDR_ERR + if (unlikely(tar & 3)) { + psxRegs.pc = psxRegs.CP0.n.BadVAddr = tar; + psxException(R3000E_AdEL << 2, branch, &psxRegs.CP0); + return; + } +#else + tar &= ~3; +#endif + doBranch(tar); +} + +#if __has_builtin(__builtin_add_overflow) || (defined(__GNUC__) && __GNUC__ >= 5) +#define add_overflow(a, b, r) __builtin_add_overflow(a, b, &(r)) +#define sub_overflow(a, b, r) __builtin_sub_overflow(a, b, &(r)) +#else +#define add_overflow(a, b, r) ({r = (u32)a + (u32)b; (a ^ ~b) & (a ^ r) & (1u<<31);}) +#define sub_overflow(a, b, r) ({r = (u32)a - (u32)b; (a ^ b) & (a ^ r) & (1u<<31);}) +#endif + +static void addExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) { + s32 r; + if (add_overflow(a1, a2, r)) { + //printf("ov %08x + %08x = %08x\n", a1, a2, r); + regs->pc -= 4; + psxException(R3000E_Ov << 2, branch, ®s->CP0); + return; + } + if (rt) + regs->GPR.r[rt] = r; +} + +static void subExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) { + s32 r; + if (sub_overflow(a1, a2, r)) { + regs->pc -= 4; + psxException(R3000E_Ov << 2, branch, ®s->CP0); + return; + } + if (rt) + regs->GPR.r[rt] = r; +} + /********************************************************* * Arithmetic with immediate operand * * Format: OP rt, rs, immediate * *********************************************************/ -OP(psxADDI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im (Exception on Integer Overflow) +OP(psxADDI) { addExc(regs_, _Rt_, _i32(_rRs_), _Imm_); } // Rt = Rs + Im (Exception on Integer Overflow) OP(psxADDIU) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im OP(psxANDI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) & _ImmU_; } // Rt = Rs And Im OP(psxORI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) | _ImmU_; } // Rt = Rs Or Im @@ -595,9 +633,9 @@ OP(psxSLTIU) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) < ((u32)_Imm_); } // Rt = * Register arithmetic * * Format: OP rd, rs, rt * *********************************************************/ -OP(psxADD) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); } // Rd = Rs + Rt (Exception on Integer Overflow) +OP(psxADD) { addExc(regs_, _Rd_, _i32(_rRs_), _i32(_rRt_)); } // Rd = Rs + Rt (Exception on Integer Overflow) +OP(psxSUB) { subExc(regs_, _Rd_, _i32(_rRs_), _i32(_rRt_)); } // Rd = Rs - Rt (Exception on Integer Overflow) OP(psxADDU) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); } // Rd = Rs + Rt -OP(psxSUB) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); } // Rd = Rs - Rt (Exception on Integer Overflow) OP(psxSUBU) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); } // Rd = Rs - Rt OP(psxAND) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) & _u32(_rRt_); } // Rd = Rs And Rt OP(psxOR) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) | _u32(_rRt_); } // Rd = Rs Or Rt @@ -758,17 +796,21 @@ OP(psxMTLO) { _rLo_ = _rRs_; } // Lo = Rs *********************************************************/ OP(psxBREAK) { regs_->pc -= 4; - psxException(0x24, branch, ®s_->CP0); + psxException(R3000E_Bp << 2, branch, ®s_->CP0); } OP(psxSYSCALL) { regs_->pc -= 4; - psxException(0x20, branch, ®s_->CP0); + psxException(R3000E_Syscall << 2, branch, ®s_->CP0); } -static inline void psxTestSWInts(psxRegisters *regs_) { +static inline void execI_(u8 **memRLUT, psxRegisters *regs_); + +static inline void psxTestSWInts(psxRegisters *regs_, int step) { if (regs_->CP0.n.Cause & regs_->CP0.n.Status & 0x0300 && regs_->CP0.n.Status & 0x1) { + if (step) + execI_(psxMemRLUT, regs_); regs_->CP0.n.Cause &= ~0x7c; psxException(regs_->CP0.n.Cause, branch, ®s_->CP0); } @@ -778,7 +820,7 @@ OP(psxRFE) { // SysPrintf("psxRFE\n"); regs_->CP0.n.Status = (regs_->CP0.n.Status & 0xfffffff0) | ((regs_->CP0.n.Status & 0x3c) >> 2); - psxTestSWInts(regs_); + psxTestSWInts(regs_, 0); } /********************************************************* @@ -802,14 +844,14 @@ OP(psxJAL) { _SetLink(31); doBranch(_JumpTarget_); } * Format: OP rs, rd * *********************************************************/ OP(psxJR) { - doBranch(_rRs_ & ~3); + doBranchReg(_rRs_); psxJumpTest(); } OP(psxJALR) { u32 temp = _u32(_rRs_); if (_Rd_) { _SetLink(_Rd_); } - doBranch(temp & ~3); + doBranchReg(temp); } /********************************************************* @@ -912,23 +954,38 @@ OP(psxSWR) { * Moves between GPR and COPx * * Format: OP rt, fs * *********************************************************/ -OP(psxMFC0) { if (!_Rt_) return; _rRt_ = _rFs_; } +OP(psxMFC0) { + u32 r = _Rd_; +#ifdef DO_EXCEPTION_RESERVEDI + if (unlikely(r == 0)) { + regs_->pc -= 4; + psxException(R3000E_RI << 2, branch, ®s_->CP0); + } +#endif + if (_Rt_) + _rRt_ = regs_->CP0.r[r]; +} + OP(psxCFC0) { if (!_Rt_) return; _rRt_ = _rFs_; } +static void setupCop(u32 sr); + void MTC0(psxRegisters *regs_, int reg, u32 val) { // SysPrintf("MTC0 %d: %x\n", reg, val); switch (reg) { case 12: // Status - if ((regs_->CP0.n.Status ^ val) & (1 << 16)) + if (unlikely((regs_->CP0.n.Status ^ val) & (1 << 16))) psxMemOnIsolate((val >> 16) & 1); + if (unlikely((regs_->CP0.n.Status ^ val) & (7 << 29))) + setupCop(val); regs_->CP0.n.Status = val; - psxTestSWInts(regs_); + psxTestSWInts(regs_, 1); break; case 13: // Cause regs_->CP0.n.Cause &= ~0x0300; regs_->CP0.n.Cause |= val & 0x0300; - psxTestSWInts(regs_); + psxTestSWInts(regs_, 0); break; default: @@ -941,17 +998,24 @@ OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); } OP(psxCTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); } /********************************************************* -* Unknow instruction (would generate an exception) * +* Unknown instruction (would generate an exception) * * Format: ? * *********************************************************/ static inline void psxNULL_(void) { -#ifdef PSXCPU_LOG - PSXCPU_LOG("psx: Unimplemented op %x\n", psxRegs.code); + //printf("op %08x @%08x\n", psxRegs.code, psxRegs.pc); +} + +OP(psxNULL) { + psxNULL_(); +#ifdef DO_EXCEPTION_RESERVEDI + regs_->pc -= 4; + psxException(R3000E_RI << 2, branch, ®s_->CP0); #endif } -OP(psxNULL) { psxNULL_(); } -void gteNULL(struct psxCP2Regs *regs) { psxNULL_(); } +void gteNULL(struct psxCP2Regs *regs) { + psxNULL_(); +} OP(psxSPECIAL) { psxSPC[_Funct_](regs_, code); @@ -968,6 +1032,22 @@ OP(psxCOP0) { } } +OP(psxLWC0) { + // MTC0(regs_, _Rt_, psxMemRead32(_oB_)); // ? + log_unhandled("LWC0 %08x\n", code); +} + +OP(psxCOP1) { + // ??? what actually happens here? +} + +OP(psxCOP1d) { +#ifdef DO_EXCEPTION_RESERVEDI + regs_->pc -= 4; + psxException((1<<28) | (R3000E_RI << 2), branch, ®s_->CP0); +#endif +} + OP(psxCOP2) { psxCP2[_Funct_](®s_->CP2); } @@ -978,6 +1058,13 @@ OP(psxCOP2_stall) { psxCP2[f](®s_->CP2); } +OP(psxCOP2d) { +#ifdef DO_EXCEPTION_RESERVEDI + regs_->pc -= 4; + psxException((2<<28) | (R3000E_RI << 2), branch, ®s_->CP0); +#endif +} + OP(gteMFC2) { if (!_Rt_) return; regs_->GPR.r[_Rt_] = MFC2(®s_->CP2, _Rd_); @@ -1014,6 +1101,27 @@ OP(gteSWC2_stall) { gteSWC2(regs_, code); } +OP(psxCOP3) { + // ??? what actually happens here? +} + +OP(psxCOP3d) { +#ifdef DO_EXCEPTION_RESERVEDI + regs_->pc -= 4; + psxException((3<<28) | (R3000E_RI << 2), branch, ®s_->CP0); +#endif +} + +OP(psxLWCx) { + // does this read memory? + log_unhandled("LWCx %08x\n", code); +} + +OP(psxSWCx) { + // does this write something to memory? + log_unhandled("SWCx %08x\n", code); +} + static void psxBASIC(struct psxCP2Regs *cp2regs) { psxRegisters *regs_ = (void *)((char *)cp2regs - offsetof(psxRegisters, CP2)); u32 code = regs_->code; @@ -1041,23 +1149,28 @@ OP(psxREGIMM) { } OP(psxHLE) { - uint32_t hleCode = code & 0x03ffffff; - if (hleCode >= (sizeof(psxHLEt) / sizeof(psxHLEt[0]))) { - psxNULL_(); - } else { - psxHLEt[hleCode](); - } + u32 hleCode; + if (unlikely(!Config.HLE)) { + psxSWCx(regs_, code); + return; + } + hleCode = code & 0x03ffffff; + if (hleCode >= (sizeof(psxHLEt) / sizeof(psxHLEt[0]))) { + psxSWCx(regs_, code); + return; + } + psxHLEt[hleCode](); } static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = { psxSPECIAL, psxREGIMM, psxJ , psxJAL , psxBEQ , psxBNE , psxBLEZ, psxBGTZ, psxADDI , psxADDIU , psxSLTI, psxSLTIU, psxANDI, psxORI , psxXORI, psxLUI , - psxCOP0 , psxNULL , psxCOP2, psxNULL , psxNULL, psxNULL, psxNULL, psxNULL, - psxNULL , psxNULL , psxNULL, psxNULL , psxNULL, psxNULL, psxNULL, psxNULL, - psxLB , psxLH , psxLWL , psxLW , psxLBU , psxLHU , psxLWR , psxNULL, - psxSB , psxSH , psxSWL , psxSW , psxNULL, psxNULL, psxSWR , psxNULL, - psxNULL , psxNULL , gteLWC2, psxNULL , psxNULL, psxNULL, psxNULL, psxNULL, - psxNULL , psxNULL , gteSWC2, psxHLE , psxNULL, psxNULL, psxNULL, psxNULL + psxCOP0 , psxCOP1d , psxCOP2, psxCOP3d, psxNULL, psxCOP1d,psxCOP2d,psxCOP3d, + psxNULL , psxCOP1d , psxCOP2d,psxCOP3d, psxNULL, psxCOP1d,psxCOP2d,psxCOP3d, + psxLB , psxLH , psxLWL , psxLW , psxLBU , psxLHU , psxLWR , psxCOP3d, + psxSB , psxSH , psxSWL , psxSW , psxNULL, psxCOP1d,psxSWR , psxCOP3d, + psxLWC0 , psxLWCx , gteLWC2, psxLWCx , psxNULL, psxCOP1d,psxCOP2d,psxCOP3d, + psxSWCx , psxSWCx , gteSWC2, psxHLE , psxNULL, psxCOP1d,psxCOP2d,psxCOP3d, }; static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code) = { @@ -1092,13 +1205,10 @@ static void intReset() { } static inline void execI_(u8 **memRLUT, psxRegisters *regs_) { - regs_->code = fetch(memRLUT, regs_->pc); - - debugI(); - - if (Config.Debug) ProcessDebug(); - + u32 pc = regs_->pc; regs_->pc += 4; + regs_->code = fetch(regs_, memRLUT, pc); + addCycle(); psxBSC[regs_->code >> 26](regs_, regs_->code); @@ -1127,8 +1237,10 @@ static void intClear(u32 Addr, u32 Size) { static void intNotify(enum R3000Anote note, void *data) { switch (note) { - case R3000ACPU_NOTIFY_CACHE_ISOLATED: // Armored Core? case R3000ACPU_NOTIFY_AFTER_LOAD: + setupCop(psxRegs.CP0.n.Status); + // fallthrough + case R3000ACPU_NOTIFY_CACHE_ISOLATED: // Armored Core? memset(&ICache, 0xff, sizeof(ICache)); break; case R3000ACPU_NOTIFY_CACHE_UNISOLATED: @@ -1137,10 +1249,25 @@ static void intNotify(enum R3000Anote note, void *data) { } } +static void setupCop(u32 sr) +{ + if (sr & (1u << 29)) + psxBSC[17] = psxCOP1; + else + psxBSC[17] = psxCOP1d; + if (sr & (1u << 30)) + psxBSC[18] = Config.DisableStalls ? psxCOP2 : psxCOP2_stall; + else + psxBSC[18] = psxCOP2d; + if (sr & (1u << 31)) + psxBSC[19] = psxCOP3; + else + psxBSC[19] = psxCOP3d; +} + void intApplyConfig() { int cycle_mult; - assert(psxBSC[18] == psxCOP2 || psxBSC[18] == psxCOP2_stall); assert(psxBSC[50] == gteLWC2 || psxBSC[50] == gteLWC2_stall); assert(psxBSC[58] == gteSWC2 || psxBSC[58] == gteSWC2_stall); assert(psxSPC[16] == psxMFHI || psxSPC[16] == psxMFHI_stall); @@ -1171,6 +1298,7 @@ void intApplyConfig() { psxSPC[26] = psxDIV_stall; psxSPC[27] = psxDIVU_stall; } + setupCop(psxRegs.CP0.n.Status); // dynarec may occasionally call the interpreter, in such a case the // cache won't work (cache only works right if all fetches go through it) diff --git a/libpcsxcore/psxinterpreter.h b/libpcsxcore/psxinterpreter.h index fe289b061..746c8fe86 100644 --- a/libpcsxcore/psxinterpreter.h +++ b/libpcsxcore/psxinterpreter.h @@ -1,6 +1,9 @@ #ifndef __PSXINTERPRETER_H__ #define __PSXINTERPRETER_H__ +// get an opcode without triggering exceptions or affecting cache +u32 intFakeFetch(u32 pc); + // called by "new_dynarec" void execI(); void intApplyConfig(); diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index fbccdea4f..5374f8661 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -26,6 +26,7 @@ #include "mdec.h" #include "gte.h" #include "psxinterpreter.h" +#include "../include/compiler_features.h" R3000Acpu *psxCpu = NULL; #ifdef DRC_DISABLE @@ -58,8 +59,10 @@ void psxReset() { psxRegs.pc = 0xbfc00000; // Start in bootstrap - psxRegs.CP0.r[12] = 0x10900000; // COP0 enabled | BEV = 1 | TS = 1 + psxRegs.CP0.r[12] = 0x10600000; // COP0 enabled | BEV = 1 | TS = 1 psxRegs.CP0.r[15] = 0x00000002; // PRevID = Revision ID, same as R3000A + if (Config.HLE) + psxRegs.CP0.n.Status |= 1u << 30; // COP2 enabled psxCpu->ApplyConfig(); psxCpu->Reset(); @@ -90,20 +93,21 @@ void psxShutdown() { } // cp0 is passed separately for lightrec to be less messy -void psxException(u32 code, u32 bd, psxCP0Regs *cp0) { - psxRegs.code = PSXMu32(psxRegs.pc); +void psxException(u32 cause, u32 bd, psxCP0Regs *cp0) { + u32 opcode = intFakeFetch(psxRegs.pc); - if (!Config.HLE && ((((psxRegs.code) >> 24) & 0xfe) == 0x4a)) { + if (unlikely(!Config.HLE && ((((opcode) >> 24) & 0xfe) == 0x4a))) { // "hokuto no ken" / "Crash Bandicot 2" ... // BIOS does not allow to return to GTE instructions // (just skips it, supposedly because it's scheduled already) // so we execute it here psxCP2Regs *cp2 = (void *)(cp0 + 1); - psxCP2[psxRegs.code & 0x3f](cp2); + psxRegs.code = opcode; + psxCP2[opcode & 0x3f](cp2); } // Set the Cause - cp0->n.Cause = (cp0->n.Cause & 0x300) | code; + cp0->n.Cause = (cp0->n.Cause & 0x300) | cause; // Set the EPC & PC if (bd) { diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 8d53a181e..bdb8d27cd 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -29,6 +29,19 @@ extern "C" { #include "psxcounters.h" #include "psxbios.h" +enum R3000Aexception { + R3000E_Int = 0, // Interrupt + R3000E_AdEL = 4, // Address error (on load/I-fetch) + R3000E_AdES = 5, // Address error (on store) + R3000E_IBE = 6, // Bus error (instruction fetch) + R3000E_DBE = 7, // Bus error (data load) + R3000E_Syscall = 8, // syscall instruction + R3000E_Bp = 9, // Breakpoint - a break instruction + R3000E_RI = 10, // reserved instruction + R3000E_CpU = 11, // Co-Processor unusable + R3000E_Ov = 12 // arithmetic overflow +}; + enum R3000Anote { R3000ACPU_NOTIFY_CACHE_ISOLATED = 0, R3000ACPU_NOTIFY_CACHE_UNISOLATED = 1, From b9698f9dadce9831f21f103c09b3569d2b2441a2 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 17 Jul 2023 20:54:19 +0300 Subject: [PATCH 278/597] psxinterpreter: rework branching in ds --- libpcsxcore/psxinterpreter.c | 183 +++++++++++++++++------------------ 1 file changed, 90 insertions(+), 93 deletions(-) diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index bd732b9f1..9719a1342 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -37,7 +37,6 @@ static int branch = 0; static int branch2 = 0; -static u32 branchPC; #ifdef __i386__ #define INT_ATTR __attribute__((regparm(2))) @@ -399,138 +398,133 @@ static void psxDelayTest(int reg, u32 bpc) { psxBranchTest(); } -static u32 psxBranchNoDelay(psxRegisters *regs_) { - u32 temp, code; +#define isBranch(c_) \ + ((1 <= ((c_) >> 26) && ((c_) >> 26) <= 7) || ((c_) & 0xfc00003e) == 8) +#define swap_(a_, b_) { u32 t_ = a_; a_ = b_; b_ = t_; } - regs_->code = code = intFakeFetch(regs_->pc); - switch (_Op_) { +// tar1 is main branch target, 'code' is opcode in DS +static u32 psxBranchNoDelay(psxRegisters *regs_, u32 tar1, u32 code, int *taken) { + u32 temp, rt; + + assert(isBranch(code)); + *taken = 1; + switch (code >> 26) { case 0x00: // SPECIAL switch (_Funct_) { case 0x08: // JR return _u32(_rRs_); case 0x09: // JALR temp = _u32(_rRs_); - if (_Rd_) { _SetLink(_Rd_); } + if (_Rd_) + regs_->GPR.r[_Rd_] = tar1 + 4; return temp; } break; case 0x01: // REGIMM - switch (_Rt_) { - case 0x00: // BLTZ + rt = _Rt_; + switch (rt) { + case 0x10: // BLTZAL + regs_->GPR.n.ra = tar1 + 4; if (_i32(_rRs_) < 0) - return _BranchTarget_; + return tar1 + (s16)_Im_ * 4; break; - case 0x01: // BGEZ + case 0x11: // BGEZAL + regs_->GPR.n.ra = tar1 + 4; if (_i32(_rRs_) >= 0) - return _BranchTarget_; + return tar1 + (s16)_Im_ * 4; break; - case 0x08: // BLTZAL - if (_i32(_rRs_) < 0) { - _SetLink(31); - return _BranchTarget_; + default: + if (rt & 1) { // BGEZ + if (_i32(_rRs_) >= 0) + return tar1 + (s16)_Im_ * 4; } - break; - case 0x09: // BGEZAL - if (_i32(_rRs_) >= 0) { - _SetLink(31); - return _BranchTarget_; + else { // BLTZ + if (_i32(_rRs_) < 0) + return tar1 + (s16)_Im_ * 4; } break; } break; case 0x02: // J - return _JumpTarget_; + return (tar1 & 0xf0000000u) + _Target_ * 4; case 0x03: // JAL - _SetLink(31); - return _JumpTarget_; + regs_->GPR.n.ra = tar1 + 4; + return (tar1 & 0xf0000000u) + _Target_ * 4; case 0x04: // BEQ if (_i32(_rRs_) == _i32(_rRt_)) - return _BranchTarget_; + return tar1 + (s16)_Im_ * 4; break; case 0x05: // BNE if (_i32(_rRs_) != _i32(_rRt_)) - return _BranchTarget_; + return tar1 + (s16)_Im_ * 4; break; case 0x06: // BLEZ if (_i32(_rRs_) <= 0) - return _BranchTarget_; + return tar1 + (s16)_Im_ * 4; break; case 0x07: // BGTZ if (_i32(_rRs_) > 0) - return _BranchTarget_; + return tar1 + (s16)_Im_ * 4; break; } - return (u32)-1; -} - -static int psxDelayBranchExec(u32 tar) { - execI(); - - branch = 0; - psxRegs.pc = tar; - addCycle(); - psxBranchTest(); - return 1; + *taken = 0; + return tar1; } -static int psxDelayBranchTest(u32 tar1) { - u32 tar2, tmp1, tmp2; +static void psxDoDelayBranch(psxRegisters *regs, u32 tar1, u32 code1) { + u32 tar2, code; + int taken, lim; - tar2 = psxBranchNoDelay(&psxRegs); - if (tar2 == (u32)-1) - return 0; + tar2 = psxBranchNoDelay(regs, tar1, code1, &taken); + regs->pc = tar1; + if (!taken) + return; /* - * Branch in delay slot: + * taken branch in delay slot: * - execute 1 instruction at tar1 * - jump to tar2 (target of branch in delay slot; this branch * has no normal delay slot, instruction at tar1 was fetched instead) */ - psxRegs.pc = tar1; - tmp1 = psxBranchNoDelay(&psxRegs); - if (tmp1 == (u32)-1) { - return psxDelayBranchExec(tar2); - } - addCycle(); - - /* - * Got a branch at tar1: - * - execute 1 instruction at tar2 - * - jump to target of that branch (tmp1) - */ - psxRegs.pc = tar2; - tmp2 = psxBranchNoDelay(&psxRegs); - if (tmp2 == (u32)-1) { - return psxDelayBranchExec(tmp1); + for (lim = 0; lim < 8; lim++) { + regs->code = code = fetch(regs, psxMemRLUT, tar1); + addCycle(); + if (likely(!isBranch(code))) { + psxBSC[code >> 26](regs, code); + regs->pc = tar2; + return; + } + tar1 = psxBranchNoDelay(regs, tar2, code, &taken); + regs->pc = tar2; + if (!taken) + return; + swap_(tar1, tar2); } - addCycle(); - - /* - * Got a branch at tar2: - * - execute 1 instruction at tmp1 - * - jump to target of that branch (tmp2) - */ - psxRegs.pc = tmp1; - return psxDelayBranchExec(tmp2); + SysPrintf("Evil chained DS branches @ %08x %08x %08x\n", regs->pc, tar1, tar2); } -static void doBranch(u32 tar) { +static void doBranch(psxRegisters *regs, u32 tar) { u32 tmp, code, pc; branch2 = branch = 1; - branchPC = tar; - - // check for branch in delay slot - if (psxDelayBranchTest(tar)) - return; - pc = psxRegs.pc; - psxRegs.pc += 4; - psxRegs.code = code = fetch(&psxRegs, psxMemRLUT, pc); + // fetch the delay slot + pc = regs->pc; + regs->pc = pc + 4; + regs->code = code = fetch(regs, psxMemRLUT, pc); addCycle(); + // check for branch in delay slot + if (unlikely(isBranch(code))) { + psxDoDelayBranch(regs, tar, code); + log_unhandled("branch in DS: %08x->%08x\n", pc, regs->pc); + branch = 0; + psxBranchTest(); + return; + } + // check for load delay tmp = code >> 26; switch (tmp) { @@ -538,7 +532,7 @@ static void doBranch(u32 tar) { switch (_Rs_) { case 0x00: // MFC0 case 0x02: // CFC0 - psxDelayTest(_Rt_, branchPC); + psxDelayTest(_Rt_, tar); return; } break; @@ -548,32 +542,32 @@ static void doBranch(u32 tar) { switch (_Rs_) { case 0x00: // MFC2 case 0x02: // CFC2 - psxDelayTest(_Rt_, branchPC); + psxDelayTest(_Rt_, tar); return; } break; } break; case 0x32: // LWC2 - psxDelayTest(_Rt_, branchPC); + psxDelayTest(_Rt_, tar); return; default: if (tmp >= 0x20 && tmp <= 0x26) { // LB/LH/LWL/LW/LBU/LHU/LWR - psxDelayTest(_Rt_, branchPC); + psxDelayTest(_Rt_, tar); return; } break; } - psxBSC[code >> 26](&psxRegs, code); + psxBSC[code >> 26](regs, code); branch = 0; - psxRegs.pc = branchPC; + regs->pc = tar; psxBranchTest(); } -static void doBranchReg(u32 tar) { +static void doBranchReg(psxRegisters *regs, u32 tar) { #ifdef DO_EXCEPTION_ADDR_ERR if (unlikely(tar & 3)) { psxRegs.pc = psxRegs.CP0.n.BadVAddr = tar; @@ -583,7 +577,7 @@ static void doBranchReg(u32 tar) { #else tar &= ~3; #endif - doBranch(tar); + doBranch(regs, tar); } #if __has_builtin(__builtin_add_overflow) || (defined(__GNUC__) && __GNUC__ >= 5) @@ -727,12 +721,12 @@ OP(psxMULTU_stall) { *********************************************************/ #define RepZBranchi32(op) \ if(_i32(_rRs_) op 0) \ - doBranch(_BranchTarget_); + doBranch(regs_, _BranchTarget_); #define RepZBranchLinki32(op) { \ s32 temp = _i32(_rRs_); \ _SetLink(31); \ if(temp op 0) \ - doBranch(_BranchTarget_); \ + doBranch(regs_, _BranchTarget_); \ } OP(psxBGEZ) { RepZBranchi32(>=) } // Branch if Rs >= 0 @@ -827,7 +821,10 @@ OP(psxRFE) { * Register branch logic * * Format: OP rs, rt, offset * *********************************************************/ -#define RepBranchi32(op) if(_i32(_rRs_) op _i32(_rRt_)) doBranch(_BranchTarget_); +#define RepBranchi32(op) { \ + if (_i32(_rRs_) op _i32(_rRt_)) \ + doBranch(regs_, _BranchTarget_); \ +} OP(psxBEQ) { RepBranchi32(==) } // Branch if Rs == Rt OP(psxBNE) { RepBranchi32(!=) } // Branch if Rs != Rt @@ -836,22 +833,22 @@ OP(psxBNE) { RepBranchi32(!=) } // Branch if Rs != Rt * Jump to target * * Format: OP target * *********************************************************/ -OP(psxJ) { doBranch(_JumpTarget_); } -OP(psxJAL) { _SetLink(31); doBranch(_JumpTarget_); } +OP(psxJ) { doBranch(regs_, _JumpTarget_); } +OP(psxJAL) { _SetLink(31); doBranch(regs_, _JumpTarget_); } /********************************************************* * Register jump * * Format: OP rs, rd * *********************************************************/ OP(psxJR) { - doBranchReg(_rRs_); + doBranchReg(regs_, _rRs_); psxJumpTest(); } OP(psxJALR) { u32 temp = _u32(_rRs_); if (_Rd_) { _SetLink(_Rd_); } - doBranchReg(temp); + doBranchReg(regs_, temp); } /********************************************************* From f9ae4f29a408556f4b1d0f843995b389e66608c5 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 18 Jul 2023 01:49:46 +0300 Subject: [PATCH 279/597] psxinterpreter: rework load delays --- libpcsxcore/psxinterpreter.c | 521 +++++++++++++---------------------- libpcsxcore/r3000a.h | 5 +- 2 files changed, 193 insertions(+), 333 deletions(-) diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 9719a1342..b9e1dbc19 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -1,5 +1,6 @@ /*************************************************************************** * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team * + * Copyright (C) 2023 notaz * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -33,7 +34,9 @@ // these may cause issues: because of poor timing we may step // on instructions that real hardware would never reach #define DO_EXCEPTION_RESERVEDI -#define DO_EXCEPTION_ADDR_ERR +#define DO_EXCEPTION_ALIGNMENT_BRANCH +//#define DO_EXCEPTION_ALIGNMENT_DATA +#define HANDLE_LOAD_DELAY static int branch = 0; static int branch2 = 0; @@ -51,6 +54,69 @@ static int branch2 = 0; static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code); static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code); +// load delay +static void doLoad(psxRegisters *regs, u32 r, u32 val) +{ +#ifdef HANDLE_LOAD_DELAY + int sel = regs->dloadSel ^ 1; + assert(regs->dloadReg[sel] == 0); + regs->dloadReg[sel] = r; + regs->dloadVal[sel] = r ? val : 0; + if (regs->dloadReg[sel ^ 1] == r) + regs->dloadVal[sel ^ 1] = regs->dloadReg[sel ^ 1] = 0; +#else + regs->GPR.r[r] = r ? val : 0; +#endif +} + +static void dloadRt(psxRegisters *regs, u32 r, u32 val) +{ +#ifdef HANDLE_LOAD_DELAY + int sel = regs->dloadSel; + if (unlikely(regs->dloadReg[sel] == r)) + regs->dloadVal[sel] = regs->dloadReg[sel] = 0; +#endif + regs->GPR.r[r] = r ? val : 0; +} + +static void dloadStep(psxRegisters *regs) +{ +#ifdef HANDLE_LOAD_DELAY + int sel = regs->dloadSel; + regs->GPR.r[regs->dloadReg[sel]] = regs->dloadVal[sel]; + regs->dloadVal[sel] = regs->dloadReg[sel] = 0; + regs->dloadSel ^= 1; + assert(regs->GPR.r[0] == 0); +#endif +} + +static void dloadFlush(psxRegisters *regs) +{ +#ifdef HANDLE_LOAD_DELAY + regs->GPR.r[regs->dloadReg[0]] = regs->dloadVal[0]; + regs->GPR.r[regs->dloadReg[1]] = regs->dloadVal[1]; + regs->dloadVal[0] = regs->dloadVal[1] = 0; + regs->dloadReg[0] = regs->dloadReg[1] = 0; + assert(regs->GPR.r[0] == 0); +#endif +} + +static void dloadClear(psxRegisters *regs) +{ +#ifdef HANDLE_LOAD_DELAY + regs->dloadVal[0] = regs->dloadVal[1] = 0; + regs->dloadReg[0] = regs->dloadReg[1] = 0; + regs->dloadSel = 0; +#endif +} + +static void intException(psxRegisters *regs, u32 pc, u32 cause) +{ + dloadFlush(regs); + regs->pc = pc; + psxException(cause, branch, ®s->CP0); +} + // get an opcode without triggering exceptions or affecting cache u32 intFakeFetch(u32 pc) { @@ -69,8 +135,7 @@ static u32 INT_ATTR fetchNoCache(psxRegisters *regs, u8 **memRLUT, u32 pc) u32 *code; if (unlikely(base == INVALID_PTR)) { SysPrintf("game crash @%08x, ra=%08x\n", pc, regs->GPR.n.ra); - regs->pc = pc; - psxException(R3000E_IBE << 2, branch, ®s->CP0); + intException(regs, pc, R3000E_IBE << 2); return 0; // execute as nop } code = (u32 *)(base + (pc & 0xfffc)); @@ -100,8 +165,7 @@ static u32 INT_ATTR fetchICache(psxRegisters *regs, u8 **memRLUT, u32 pc) const u32 *code; if (unlikely(base == INVALID_PTR)) { SysPrintf("game crash @%08x, ra=%08x\n", pc, regs->GPR.n.ra); - regs->pc = pc; - psxException(R3000E_IBE << 2, branch, ®s->CP0); + intException(regs, pc, R3000E_IBE << 2); return 0; // execute as nop } code = (u32 *)(base + (pc & 0xfff0)); @@ -135,55 +199,6 @@ static inline void addCycle(void) psxRegs.subCycle &= 0xffff; } -static void delayRead(int reg, u32 bpc) { - u32 rold, rnew; - -// SysPrintf("delayRead at %x!\n", psxRegs.pc); - - rold = psxRegs.GPR.r[reg]; - psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); // branch delay load - rnew = psxRegs.GPR.r[reg]; - - psxRegs.pc = bpc; - - branch = 0; - - psxRegs.GPR.r[reg] = rold; - execI(); // first branch opcode - psxRegs.GPR.r[reg] = rnew; - - psxBranchTest(); -} - -static void delayWrite(int reg, u32 bpc) { - -/* SysPrintf("delayWrite at %x!\n", psxRegs.pc); - - SysPrintf("%s\n", disR3000AF(psxRegs.code, psxRegs.pc-4)); - SysPrintf("%s\n", disR3000AF(PSXMu32(bpc), bpc));*/ - - // no changes from normal behavior - - psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); - - branch = 0; - psxRegs.pc = bpc; - - psxBranchTest(); -} - -static void delayReadWrite(int reg, u32 bpc) { - -// SysPrintf("delayReadWrite at %x!\n", psxRegs.pc); - - // the branch delay load is skipped - - branch = 0; - psxRegs.pc = bpc; - - psxBranchTest(); -} - /**** R3000A Instruction Macros ****/ #define _PC_ regs_->pc // The next PC to be executed @@ -213,9 +228,7 @@ static void delayReadWrite(int reg, u32 bpc) { #define _rRs_ regs_->GPR.r[_Rs_] // Rs register #define _rRt_ regs_->GPR.r[_Rt_] // Rt register -#define _rRd_ regs_->GPR.r[_Rd_] // Rd register #define _rSa_ regs_->GPR.r[_Sa_] // Sa register -#define _rFs_ regs_->CP0.r[_Rd_] // Fs register #define _rHi_ regs_->GPR.n.hi // The HI register #define _rLo_ regs_->GPR.n.lo // The LO register @@ -223,7 +236,7 @@ static void delayReadWrite(int reg, u32 bpc) { #define _JumpTarget_ ((_Target_ * 4) + (_PC_ & 0xf0000000)) // Calculates the target during a jump instruction #define _BranchTarget_ ((s16)_Im_ * 4 + _PC_) // Calculates the target during a branch instruction -#define _SetLink(x) regs_->GPR.r[x] = _PC_ + 4; // Sets the return address in the link register +#define _SetLink(x) dloadRt(regs_, x, _PC_ + 4); // Sets the return address in the link register #define OP(name) \ static inline INT_ATTR void name(psxRegisters *regs_, u32 code) @@ -239,165 +252,6 @@ static void delayReadWrite(int reg, u32 bpc) { #define _i32(x) (s32)(x) #define _u32(x) (u32)(x) -static int psxTestLoadDelay(int reg, u32 tmp) { - if (tmp == 0) return 0; // NOP - switch (tmp >> 26) { - case 0x00: // SPECIAL - switch (_tFunct_) { - case 0x00: // SLL - case 0x02: case 0x03: // SRL/SRA - if (_tRd_ == reg && _tRt_ == reg) return 1; else - if (_tRt_ == reg) return 2; else - if (_tRd_ == reg) return 3; - break; - - case 0x08: // JR - if (_tRs_ == reg) return 2; - break; - case 0x09: // JALR - if (_tRd_ == reg && _tRs_ == reg) return 1; else - if (_tRs_ == reg) return 2; else - if (_tRd_ == reg) return 3; - break; - - // SYSCALL/BREAK just a break; - - case 0x20: case 0x21: case 0x22: case 0x23: - case 0x24: case 0x25: case 0x26: case 0x27: - case 0x2a: case 0x2b: // ADD/ADDU... - case 0x04: case 0x06: case 0x07: // SLLV... - if (_tRd_ == reg && (_tRt_ == reg || _tRs_ == reg)) return 1; else - if (_tRt_ == reg || _tRs_ == reg) return 2; else - if (_tRd_ == reg) return 3; - break; - - case 0x10: case 0x12: // MFHI/MFLO - if (_tRd_ == reg) return 3; - break; - case 0x11: case 0x13: // MTHI/MTLO - if (_tRs_ == reg) return 2; - break; - - case 0x18: case 0x19: - case 0x1a: case 0x1b: // MULT/DIV... - if (_tRt_ == reg || _tRs_ == reg) return 2; - break; - } - break; - - case 0x01: // REGIMM - BLTZ/BGEZ... - // Xenogears - lbu v0 / beq v0 - // - no load delay (fixes battle loading) - break; - - // J would be just a break; - case 0x03: // JAL - if (31 == reg) return 3; - break; - - case 0x06: case 0x07: // BLEZ/BGTZ - case 0x04: case 0x05: // BEQ/BNE - // Xenogears - lbu v0 / beq v0 - // - no load delay (fixes battle loading) - break; - - case 0x08: case 0x09: case 0x0a: case 0x0b: - case 0x0c: case 0x0d: case 0x0e: // ADDI/ADDIU... - if (_tRt_ == reg && _tRs_ == reg) return 1; else - if (_tRs_ == reg) return 2; else - if (_tRt_ == reg) return 3; - break; - - case 0x0f: // LUI - if (_tRt_ == reg) return 3; - break; - - case 0x10: // COP0 - switch (_tFunct_) { - case 0x00: // MFC0 - if (_tRt_ == reg) return 3; - break; - case 0x02: // CFC0 - if (_tRt_ == reg) return 3; - break; - case 0x04: // MTC0 - if (_tRt_ == reg) return 2; - break; - case 0x06: // CTC0 - if (_tRt_ == reg) return 2; - break; - // RFE just a break; - } - break; - - case 0x12: // COP2 - switch (_tFunct_) { - case 0x00: - switch (_tRs_) { - case 0x00: // MFC2 - if (_tRt_ == reg) return 3; - break; - case 0x02: // CFC2 - if (_tRt_ == reg) return 3; - break; - case 0x04: // MTC2 - if (_tRt_ == reg) return 2; - break; - case 0x06: // CTC2 - if (_tRt_ == reg) return 2; - break; - } - break; - // RTPS... break; - } - break; - - case 0x22: case 0x26: // LWL/LWR - if (_tRt_ == reg) return 3; else - if (_tRs_ == reg) return 2; - break; - - case 0x20: case 0x21: case 0x23: - case 0x24: case 0x25: // LB/LH/LW/LBU/LHU - if (_tRt_ == reg && _tRs_ == reg) return 1; else - if (_tRs_ == reg) return 2; else - if (_tRt_ == reg) return 3; - break; - - case 0x28: case 0x29: case 0x2a: - case 0x2b: case 0x2e: // SB/SH/SWL/SW/SWR - if (_tRt_ == reg || _tRs_ == reg) return 2; - break; - - case 0x32: case 0x3a: // LWC2/SWC2 - if (_tRs_ == reg) return 2; - break; - } - - return 0; -} - -static void psxDelayTest(int reg, u32 bpc) { - u32 tmp = intFakeFetch(bpc); - branch = 1; - - switch (psxTestLoadDelay(reg, tmp)) { - case 1: - delayReadWrite(reg, bpc); return; - case 2: - delayRead(reg, bpc); return; - case 3: - delayWrite(reg, bpc); return; - } - // DS - psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); - - branch = 0; - psxRegs.pc = bpc; - - psxBranchTest(); -} - #define isBranch(c_) \ ((1 <= ((c_) >> 26) && ((c_) >> 26) <= 7) || ((c_) & 0xfc00003e) == 8) #define swap_(a_, b_) { u32 t_ = a_; a_ = b_; b_ = t_; } @@ -491,6 +345,7 @@ static void psxDoDelayBranch(psxRegisters *regs, u32 tar1, u32 code1) { regs->code = code = fetch(regs, psxMemRLUT, tar1); addCycle(); if (likely(!isBranch(code))) { + dloadStep(regs); psxBSC[code >> 26](regs, code); regs->pc = tar2; return; @@ -505,7 +360,7 @@ static void psxDoDelayBranch(psxRegisters *regs, u32 tar1, u32 code1) { } static void doBranch(psxRegisters *regs, u32 tar) { - u32 tmp, code, pc; + u32 code, pc; branch2 = branch = 1; @@ -525,40 +380,7 @@ static void doBranch(psxRegisters *regs, u32 tar) { return; } - // check for load delay - tmp = code >> 26; - switch (tmp) { - case 0x10: // COP0 - switch (_Rs_) { - case 0x00: // MFC0 - case 0x02: // CFC0 - psxDelayTest(_Rt_, tar); - return; - } - break; - case 0x12: // COP2 - switch (_Funct_) { - case 0x00: - switch (_Rs_) { - case 0x00: // MFC2 - case 0x02: // CFC2 - psxDelayTest(_Rt_, tar); - return; - } - break; - } - break; - case 0x32: // LWC2 - psxDelayTest(_Rt_, tar); - return; - default: - if (tmp >= 0x20 && tmp <= 0x26) { // LB/LH/LWL/LW/LBU/LHU/LWR - psxDelayTest(_Rt_, tar); - return; - } - break; - } - + dloadStep(regs); psxBSC[code >> 26](regs, code); branch = 0; @@ -568,10 +390,11 @@ static void doBranch(psxRegisters *regs, u32 tar) { } static void doBranchReg(psxRegisters *regs, u32 tar) { -#ifdef DO_EXCEPTION_ADDR_ERR +#ifdef DO_EXCEPTION_ALIGNMENT_BRANCH if (unlikely(tar & 3)) { - psxRegs.pc = psxRegs.CP0.n.BadVAddr = tar; - psxException(R3000E_AdEL << 2, branch, &psxRegs.CP0); + SysPrintf("game crash @%08x, ra=%08x\n", tar, regs->GPR.n.ra); + psxRegs.CP0.n.BadVAddr = tar; + intException(regs, tar, R3000E_AdEL << 2); return; } #else @@ -589,54 +412,50 @@ static void doBranchReg(psxRegisters *regs, u32 tar) { #endif static void addExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) { - s32 r; - if (add_overflow(a1, a2, r)) { - //printf("ov %08x + %08x = %08x\n", a1, a2, r); - regs->pc -= 4; - psxException(R3000E_Ov << 2, branch, ®s->CP0); + s32 val; + if (add_overflow(a1, a2, val)) { + //printf("ov %08x + %08x = %08x\n", a1, a2, val); + intException(regs, regs->pc - 4, R3000E_Ov << 2); return; } - if (rt) - regs->GPR.r[rt] = r; + dloadRt(regs, rt, val); } static void subExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) { - s32 r; - if (sub_overflow(a1, a2, r)) { - regs->pc -= 4; - psxException(R3000E_Ov << 2, branch, ®s->CP0); + s32 val; + if (sub_overflow(a1, a2, val)) { + intException(regs, regs->pc - 4, R3000E_Ov << 2); return; } - if (rt) - regs->GPR.r[rt] = r; + dloadRt(regs, rt, val); } /********************************************************* * Arithmetic with immediate operand * * Format: OP rt, rs, immediate * *********************************************************/ -OP(psxADDI) { addExc(regs_, _Rt_, _i32(_rRs_), _Imm_); } // Rt = Rs + Im (Exception on Integer Overflow) -OP(psxADDIU) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im -OP(psxANDI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) & _ImmU_; } // Rt = Rs And Im -OP(psxORI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) | _ImmU_; } // Rt = Rs Or Im -OP(psxXORI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) ^ _ImmU_; } // Rt = Rs Xor Im -OP(psxSLTI) { if (!_Rt_) return; _rRt_ = _i32(_rRs_) < _Imm_ ; } // Rt = Rs < Im (Signed) -OP(psxSLTIU) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) < ((u32)_Imm_); } // Rt = Rs < Im (Unsigned) +OP(psxADDI) { addExc (regs_, _Rt_, _i32(_rRs_), _Imm_); } // Rt = Rs + Im (Exception on Integer Overflow) +OP(psxADDIU) { dloadRt(regs_, _Rt_, _u32(_rRs_) + _Imm_ ); } // Rt = Rs + Im +OP(psxANDI) { dloadRt(regs_, _Rt_, _u32(_rRs_) & _ImmU_); } // Rt = Rs And Im +OP(psxORI) { dloadRt(regs_, _Rt_, _u32(_rRs_) | _ImmU_); } // Rt = Rs Or Im +OP(psxXORI) { dloadRt(regs_, _Rt_, _u32(_rRs_) ^ _ImmU_); } // Rt = Rs Xor Im +OP(psxSLTI) { dloadRt(regs_, _Rt_, _i32(_rRs_) < _Imm_ ); } // Rt = Rs < Im (Signed) +OP(psxSLTIU) { dloadRt(regs_, _Rt_, _u32(_rRs_) < ((u32)_Imm_)); } // Rt = Rs < Im (Unsigned) /********************************************************* * Register arithmetic * * Format: OP rd, rs, rt * *********************************************************/ -OP(psxADD) { addExc(regs_, _Rd_, _i32(_rRs_), _i32(_rRt_)); } // Rd = Rs + Rt (Exception on Integer Overflow) -OP(psxSUB) { subExc(regs_, _Rd_, _i32(_rRs_), _i32(_rRt_)); } // Rd = Rs - Rt (Exception on Integer Overflow) -OP(psxADDU) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); } // Rd = Rs + Rt -OP(psxSUBU) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); } // Rd = Rs - Rt -OP(psxAND) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) & _u32(_rRt_); } // Rd = Rs And Rt -OP(psxOR) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) | _u32(_rRt_); } // Rd = Rs Or Rt -OP(psxXOR) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) ^ _u32(_rRt_); } // Rd = Rs Xor Rt -OP(psxNOR) { if (!_Rd_) return; _rRd_ =~(_u32(_rRs_) | _u32(_rRt_)); }// Rd = Rs Nor Rt -OP(psxSLT) { if (!_Rd_) return; _rRd_ = _i32(_rRs_) < _i32(_rRt_); } // Rd = Rs < Rt (Signed) -OP(psxSLTU) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) < _u32(_rRt_); } // Rd = Rs < Rt (Unsigned) +OP(psxADD) { addExc (regs_, _Rd_, _i32(_rRs_), _i32(_rRt_)); } // Rd = Rs + Rt (Exception on Integer Overflow) +OP(psxSUB) { subExc (regs_, _Rd_, _i32(_rRs_), _i32(_rRt_)); } // Rd = Rs - Rt (Exception on Integer Overflow) +OP(psxADDU) { dloadRt(regs_, _Rd_, _u32(_rRs_) + _u32(_rRt_)); } // Rd = Rs + Rt +OP(psxSUBU) { dloadRt(regs_, _Rd_, _u32(_rRs_) - _u32(_rRt_)); } // Rd = Rs - Rt +OP(psxAND) { dloadRt(regs_, _Rd_, _u32(_rRs_) & _u32(_rRt_)); } // Rd = Rs And Rt +OP(psxOR) { dloadRt(regs_, _Rd_, _u32(_rRs_) | _u32(_rRt_)); } // Rd = Rs Or Rt +OP(psxXOR) { dloadRt(regs_, _Rd_, _u32(_rRs_) ^ _u32(_rRt_)); } // Rd = Rs Xor Rt +OP(psxNOR) { dloadRt(regs_, _Rd_, ~_u32(_rRs_ | _u32(_rRt_))); } // Rd = Rs Nor Rt +OP(psxSLT) { dloadRt(regs_, _Rd_, _i32(_rRs_) < _i32(_rRt_)); } // Rd = Rs < Rt (Signed) +OP(psxSLTU) { dloadRt(regs_, _Rd_, _u32(_rRs_) < _u32(_rRt_)); } // Rd = Rs < Rt (Unsigned) /********************************************************* * Register mult/div & Register trap logic * @@ -740,30 +559,30 @@ OP(psxBLTZAL) { RepZBranchLinki32(<) } // Branch if Rs < 0 and link * Shift arithmetic with constant shift * * Format: OP rd, rt, sa * *********************************************************/ -OP(psxSLL) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) << _Sa_; } // Rd = Rt << sa -OP(psxSRA) { if (!_Rd_) return; _rRd_ = _i32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (arithmetic) -OP(psxSRL) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (logical) +OP(psxSLL) { dloadRt(regs_, _Rd_, _u32(_rRt_) << _Sa_); } // Rd = Rt << sa +OP(psxSRA) { dloadRt(regs_, _Rd_, _i32(_rRt_) >> _Sa_); } // Rd = Rt >> sa (arithmetic) +OP(psxSRL) { dloadRt(regs_, _Rd_, _u32(_rRt_) >> _Sa_); } // Rd = Rt >> sa (logical) /********************************************************* * Shift arithmetic with variant register shift * * Format: OP rd, rt, rs * *********************************************************/ -OP(psxSLLV) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) << (_u32(_rRs_) & 0x1F); } // Rd = Rt << rs -OP(psxSRAV) { if (!_Rd_) return; _rRd_ = _i32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (arithmetic) -OP(psxSRLV) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (logical) +OP(psxSLLV) { dloadRt(regs_, _Rd_, _u32(_rRt_) << (_u32(_rRs_) & 0x1F)); } // Rd = Rt << rs +OP(psxSRAV) { dloadRt(regs_, _Rd_, _i32(_rRt_) >> (_u32(_rRs_) & 0x1F)); } // Rd = Rt >> rs (arithmetic) +OP(psxSRLV) { dloadRt(regs_, _Rd_, _u32(_rRt_) >> (_u32(_rRs_) & 0x1F)); } // Rd = Rt >> rs (logical) /********************************************************* * Load higher 16 bits of the first word in GPR with imm * * Format: OP rt, immediate * *********************************************************/ -OP(psxLUI) { if (!_Rt_) return; _rRt_ = code << 16; } // Upper halfword of Rt = Im +OP(psxLUI) { dloadRt(regs_, _Rt_, code << 16); } // Upper halfword of Rt = Im /********************************************************* * Move from HI/LO to GPR * * Format: OP rd * *********************************************************/ -OP(psxMFHI) { if (!_Rd_) return; _rRd_ = _rHi_; } // Rd = Hi -OP(psxMFLO) { if (!_Rd_) return; _rRd_ = _rLo_; } // Rd = Lo +OP(psxMFHI) { dloadRt(regs_, _Rd_, _rHi_); } // Rd = Hi +OP(psxMFLO) { dloadRt(regs_, _Rd_, _rLo_); } // Rd = Lo static void mflohiCheckStall(psxRegisters *regs_) { @@ -789,13 +608,11 @@ OP(psxMTLO) { _rLo_ = _rRs_; } // Lo = Rs * Format: OP * *********************************************************/ OP(psxBREAK) { - regs_->pc -= 4; - psxException(R3000E_Bp << 2, branch, ®s_->CP0); + intException(regs_, regs_->pc - 4, R3000E_Bp << 2); } OP(psxSYSCALL) { - regs_->pc -= 4; - psxException(R3000E_Syscall << 2, branch, ®s_->CP0); + intException(regs_, regs_->pc - 4, R3000E_Syscall << 2); } static inline void execI_(u8 **memRLUT, psxRegisters *regs_); @@ -806,7 +623,7 @@ static inline void psxTestSWInts(psxRegisters *regs_, int step) { if (step) execI_(psxMemRLUT, regs_); regs_->CP0.n.Cause &= ~0x7c; - psxException(regs_->CP0.n.Cause, branch, ®s_->CP0); + intException(regs_, regs_->pc, regs_->CP0.n.Cause); } } @@ -856,23 +673,59 @@ OP(psxJALR) { * Format: OP rt, offset(base) * *********************************************************/ +static int algnChkL(psxRegisters *regs, u32 addr, u32 m) { + if (unlikely(addr & m)) { + log_unhandled("unaligned load %08x @%08x\n", addr, regs->pc - 4); +#ifdef DO_EXCEPTION_ALIGNMENT_DATA + psxRegs.CP0.n.BadVAddr = addr; + intException(regs, regs->pc - 4, R3000E_AdEL << 2); + return 0; +#endif + } + return 1; +} + +static int algnChkS(psxRegisters *regs, u32 addr, u32 m) { + if (unlikely(addr & m)) { + log_unhandled("unaligned store %08x @%08x\n", addr, regs->pc - 4); +#ifdef DO_EXCEPTION_ALIGNMENT_DATA + psxRegs.CP0.n.BadVAddr = addr; + intException(regs, regs->pc - 4, R3000E_AdES << 2); + return 0; +#endif + } + return 1; +} + +/********************************************************* +* Load and store for GPR * +* Format: OP rt, offset(base) * +*********************************************************/ + #define _oB_ (regs_->GPR.r[_Rs_] + _Imm_) -OP(psxLB) { u32 v = (s8)psxMemRead8(_oB_); if (_Rt_) _rRt_ = v; } -OP(psxLBU) { u32 v = psxMemRead8(_oB_); if (_Rt_) _rRt_ = v; } -OP(psxLH) { u32 v = (s16)psxMemRead16(_oB_); if (_Rt_) _rRt_ = v; } -OP(psxLHU) { u32 v = psxMemRead16(_oB_); if (_Rt_) _rRt_ = v; } -OP(psxLW) { u32 v = psxMemRead32(_oB_); if (_Rt_) _rRt_ = v; } +OP(psxLB) { doLoad(regs_, _Rt_, (s8)psxMemRead8(_oB_)); } +OP(psxLBU) { doLoad(regs_, _Rt_, psxMemRead8(_oB_)); } +OP(psxLH) { if (algnChkL(regs_, _oB_, 1)) doLoad(regs_, _Rt_, (s16)psxMemRead16(_oB_)); } +OP(psxLHU) { if (algnChkL(regs_, _oB_, 1)) doLoad(regs_, _Rt_, psxMemRead16(_oB_)); } +OP(psxLW) { if (algnChkL(regs_, _oB_, 3)) doLoad(regs_, _Rt_, psxMemRead32(_oB_)); } OP(psxLWL) { static const u32 LWL_MASK[4] = { 0xffffff, 0xffff, 0xff, 0 }; static const u32 LWL_SHIFT[4] = { 24, 16, 8, 0 }; - u32 addr = _oB_; + u32 addr = _oB_, val; u32 shift = addr & 3; u32 mem = psxMemRead32(addr & ~3); + u32 rt = _Rt_; + u32 oldval = regs_->GPR.r[rt]; - if (!_Rt_) return; - _rRt_ = (_u32(_rRt_) & LWL_MASK[shift]) | (mem << LWL_SHIFT[shift]); +#ifdef HANDLE_LOAD_DELAY + int sel = regs_->dloadSel; + if (regs_->dloadReg[sel] == rt) + oldval = regs_->dloadVal[sel]; +#endif + val = (oldval & LWL_MASK[shift]) | (mem << LWL_SHIFT[shift]); + doLoad(regs_, rt, val); /* Mem = 1234. Reg = abcd @@ -887,12 +740,19 @@ OP(psxLWL) { OP(psxLWR) { static const u32 LWR_MASK[4] = { 0, 0xff000000, 0xffff0000, 0xffffff00 }; static const u32 LWR_SHIFT[4] = { 0, 8, 16, 24 }; - u32 addr = _oB_; + u32 addr = _oB_, val; u32 shift = addr & 3; u32 mem = psxMemRead32(addr & ~3); + u32 rt = _Rt_; + u32 oldval = regs_->GPR.r[rt]; - if (!_Rt_) return; - _rRt_ = (_u32(_rRt_) & LWR_MASK[shift]) | (mem >> LWR_SHIFT[shift]); +#ifdef HANDLE_LOAD_DELAY + int sel = regs_->dloadSel; + if (regs_->dloadReg[sel] == rt) + oldval = regs_->dloadVal[sel]; +#endif + val = (oldval & LWR_MASK[shift]) | (mem >> LWR_SHIFT[shift]); + doLoad(regs_, rt, val); /* Mem = 1234. Reg = abcd @@ -904,10 +764,11 @@ OP(psxLWR) { */ } -OP(psxSB) { psxMemWrite8 (_oB_, _rRt_ & 0xff); } -OP(psxSH) { psxMemWrite16(_oB_, _rRt_ & 0xffff); } -OP(psxSW) { psxMemWrite32(_oB_, _rRt_); } +OP(psxSB) { psxMemWrite8 (_oB_, _rRt_ & 0xff); } +OP(psxSH) { if (algnChkS(regs_, _oB_, 1)) psxMemWrite16(_oB_, _rRt_ & 0xffff); } +OP(psxSW) { if (algnChkS(regs_, _oB_, 3)) psxMemWrite32(_oB_, _rRt_); } +// FIXME: this rmw implementation is wrong and would break on io like fifos OP(psxSWL) { static const u32 SWL_MASK[4] = { 0xffffff00, 0xffff0000, 0xff000000, 0 }; static const u32 SWL_SHIFT[4] = { 24, 16, 8, 0 }; @@ -954,16 +815,13 @@ OP(psxSWR) { OP(psxMFC0) { u32 r = _Rd_; #ifdef DO_EXCEPTION_RESERVEDI - if (unlikely(r == 0)) { - regs_->pc -= 4; - psxException(R3000E_RI << 2, branch, ®s_->CP0); - } + if (unlikely(r == 0)) + intException(regs_, regs_->pc - 4, R3000E_RI << 2); #endif - if (_Rt_) - _rRt_ = regs_->CP0.r[r]; + doLoad(regs_, _Rt_, regs_->CP0.r[r]); } -OP(psxCFC0) { if (!_Rt_) return; _rRt_ = _rFs_; } +OP(psxCFC0) { doLoad(regs_, _Rt_, regs_->CP0.r[_Rd_]); } static void setupCop(u32 sr); @@ -1005,8 +863,7 @@ static inline void psxNULL_(void) { OP(psxNULL) { psxNULL_(); #ifdef DO_EXCEPTION_RESERVEDI - regs_->pc -= 4; - psxException(R3000E_RI << 2, branch, ®s_->CP0); + intException(regs_, regs_->pc - 4, R3000E_RI << 2); #endif } @@ -1040,8 +897,7 @@ OP(psxCOP1) { OP(psxCOP1d) { #ifdef DO_EXCEPTION_RESERVEDI - regs_->pc -= 4; - psxException((1<<28) | (R3000E_RI << 2), branch, ®s_->CP0); + intException(regs_, regs_->pc - 4, (1<<28) | (R3000E_RI << 2)); #endif } @@ -1057,19 +913,16 @@ OP(psxCOP2_stall) { OP(psxCOP2d) { #ifdef DO_EXCEPTION_RESERVEDI - regs_->pc -= 4; - psxException((2<<28) | (R3000E_RI << 2), branch, ®s_->CP0); + intException(regs_, regs_->pc - 4, (2<<28) | (R3000E_RI << 2)); #endif } OP(gteMFC2) { - if (!_Rt_) return; - regs_->GPR.r[_Rt_] = MFC2(®s_->CP2, _Rd_); + doLoad(regs_, _Rt_, MFC2(®s_->CP2, _Rd_)); } OP(gteCFC2) { - if (!_Rt_) return; - regs_->GPR.r[_Rt_] = regs_->CP2C.r[_Rd_]; + doLoad(regs_, _Rt_, regs_->CP2C.r[_Rd_]); } OP(gteMTC2) { @@ -1104,8 +957,7 @@ OP(psxCOP3) { OP(psxCOP3d) { #ifdef DO_EXCEPTION_RESERVEDI - regs_->pc -= 4; - psxException((3<<28) | (R3000E_RI << 2), branch, ®s_->CP0); + intException(regs_, regs_->pc - 4, (3<<28) | (R3000E_RI << 2)); #endif } @@ -1199,6 +1051,7 @@ static int intInit() { } static void intReset() { + dloadClear(&psxRegs); } static inline void execI_(u8 **memRLUT, psxRegisters *regs_) { @@ -1208,6 +1061,7 @@ static inline void execI_(u8 **memRLUT, psxRegisters *regs_) { addCycle(); + dloadStep(regs_); psxBSC[regs_->code >> 26](regs_, regs_->code); } @@ -1234,14 +1088,17 @@ static void intClear(u32 Addr, u32 Size) { static void intNotify(enum R3000Anote note, void *data) { switch (note) { + case R3000ACPU_NOTIFY_BEFORE_SAVE: + dloadFlush(&psxRegs); + break; case R3000ACPU_NOTIFY_AFTER_LOAD: + dloadClear(&psxRegs); setupCop(psxRegs.CP0.n.Status); // fallthrough case R3000ACPU_NOTIFY_CACHE_ISOLATED: // Armored Core? memset(&ICache, 0xff, sizeof(ICache)); break; case R3000ACPU_NOTIFY_CACHE_UNISOLATED: - case R3000ACPU_NOTIFY_BEFORE_SAVE: break; } } diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index bdb8d27cd..778bd8d9f 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -215,7 +215,10 @@ typedef struct { u32 subCycle; /* interpreter cycle counting */ u32 subCycleStep; u32 biuReg; - u32 reserved[3]; + u8 reserved; + u8 dloadSel; + u8 dloadReg[2]; + u32 dloadVal[2]; // warning: changing anything in psxRegisters requires update of all // asm in libpcsxcore/new_dynarec/ } psxRegisters; From bc7c5acb6eb1ac9adc6b4381a2c2b2baffd5aebe Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 20 Jul 2023 01:51:46 +0300 Subject: [PATCH 280/597] psxinterpreter: yet more exceptions, new config option --- frontend/libretro.c | 10 + frontend/libretro_core_options.h | 16 +- frontend/menu.c | 9 +- include/compiler_features.h | 2 + libpcsxcore/new_dynarec/assem_arm.c | 7 +- libpcsxcore/new_dynarec/assem_arm64.c | 7 +- libpcsxcore/new_dynarec/emu_if.c | 2 +- libpcsxcore/new_dynarec/events.c | 2 +- libpcsxcore/new_dynarec/linkage_arm.S | 2 +- libpcsxcore/new_dynarec/linkage_arm64.S | 2 +- libpcsxcore/new_dynarec/new_dynarec.c | 14 +- libpcsxcore/new_dynarec/pcsxmem.c | 2 +- libpcsxcore/psxbios.c | 21 +- libpcsxcore/psxcommon.h | 1 + libpcsxcore/psxinterpreter.c | 536 ++++++++++++++++-------- libpcsxcore/psxinterpreter.h | 2 +- libpcsxcore/r3000a.c | 29 +- libpcsxcore/r3000a.h | 31 +- 18 files changed, 461 insertions(+), 234 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 21189d6fa..32d0bec36 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2047,6 +2047,16 @@ static void update_variables(bool in_flight) Config.icache_emulation = 1; } + var.value = NULL; + var.key = "pcsx_rearmed_exception_emulation"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + Config.PreciseExceptions = 1; + else + Config.PreciseExceptions = 0; + } + psxCpu->ApplyConfig(); // end of CPU emu config diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 38169c45a..781c514b3 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -1171,7 +1171,21 @@ struct retro_core_option_v2_definition option_defs_us[] = { "pcsx_rearmed_icache_emulation", "Instruction Cache Emulation", NULL, - "Enable emulation of the PSX CPU instruction cache. Improves accuracy at the expense of increased performance overheads. Required for Formula One 2001, Formula One Arcade and Formula One 99. [Interpreter only and partial on lightrec, unsupported when using ARMv7 backend]", + "Enable emulation of the PSX CPU instruction cache. Improves accuracy at the expense of increased performance overheads. Required for Formula One 2001, Formula One Arcade and Formula One 99. [Interpreter only; partial on lightrec and ARM dynarecs]", + NULL, + "compat_hack", + { + { "enabled", NULL }, + { "disabled", NULL }, + { NULL, NULL }, + }, + "enabled", + }, + { + "pcsx_rearmed_exception_emulation", + "Exception and Breakpoint Emulation", + NULL, + "Enable emulation of some almost never used PSX's debug features. This causes a performance hit, is not useful for games and is intended for PSX homebrew and romhack developers only. Only enable if you know what you are doing. [Interpreter only]", NULL, "compat_hack", { diff --git a/frontend/menu.c b/frontend/menu.c index f1c786270..b48050626 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -398,6 +398,7 @@ static const struct { CE_CONFIG_VAL(DisableStalls), CE_CONFIG_VAL(Cpu), CE_CONFIG_VAL(GpuListWalking), + CE_CONFIG_VAL(PreciseExceptions), CE_INTVAL(region), CE_INTVAL_V(g_scaler, 3), CE_INTVAL(g_gamma), @@ -1594,12 +1595,14 @@ static const char h_cfg_nodrc[] = "Disable dynamic recompiler and use interpret #endif static const char h_cfg_shacks[] = "Breaks games but may give better performance"; static const char h_cfg_icache[] = "Support F1 games (only when dynarec is off)"; -static const char h_cfg_gpul[] = "Try enabling this if the game is missing some graphics\n" +static const char h_cfg_exc[] = "Emulate some PSX's debug hw like breakpoints\n" + "and exceptions (slow, interpreter only, keep off)"; +static const char h_cfg_gpul[] = "Try enabling this if the game misses some graphics\n" "causes a performance hit"; static const char h_cfg_psxclk[] = "Over/under-clock the PSX, default is " DEFAULT_PSX_CLOCK_S "\n" "(adjust this if the game is too slow/too fast/hangs)"; -enum { AMO_XA, AMO_CDDA, AMO_IC, AMO_CPU, AMO_GPUL }; +enum { AMO_XA, AMO_CDDA, AMO_IC, AMO_BP, AMO_CPU, AMO_GPUL }; static menu_entry e_menu_adv_options[] = { @@ -1609,6 +1612,7 @@ static menu_entry e_menu_adv_options[] = mee_onoff_h ("Disable XA Decoding", 0, menu_iopts[AMO_XA], 1, h_cfg_xa), mee_onoff_h ("Disable CD Audio", 0, menu_iopts[AMO_CDDA], 1, h_cfg_cdda), mee_onoff_h ("ICache emulation", 0, menu_iopts[AMO_IC], 1, h_cfg_icache), + mee_onoff_h ("BP exception emulation", 0, menu_iopts[AMO_BP], 1, h_cfg_exc), mee_enum_h ("GPU l-list slow walking",0, menu_iopts[AMO_GPUL], men_gpul, h_cfg_gpul), #if !defined(DRC_DISABLE) || defined(LIGHTREC) mee_onoff_h ("Disable dynarec (slow!)",0, menu_iopts[AMO_CPU], 1, h_cfg_nodrc), @@ -1628,6 +1632,7 @@ static int menu_loop_adv_options(int id, int keys) { &Config.Xa, &menu_iopts[AMO_XA] }, { &Config.Cdda, &menu_iopts[AMO_CDDA] }, { &Config.icache_emulation, &menu_iopts[AMO_IC] }, + { &Config.PreciseExceptions, &menu_iopts[AMO_BP] }, { &Config.Cpu, &menu_iopts[AMO_CPU] }, }; int i; diff --git a/include/compiler_features.h b/include/compiler_features.h index 0c1119df4..384186645 100644 --- a/include/compiler_features.h +++ b/include/compiler_features.h @@ -2,9 +2,11 @@ #ifdef __GNUC__ # define likely(x) __builtin_expect((x),1) # define unlikely(x) __builtin_expect((x),0) +# define noinline __attribute__((noinline)) #else # define likely(x) (x) # define unlikely(x) (x) +# define noinline #endif #ifndef __has_builtin diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 6af93e22e..2847e516d 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -430,7 +430,7 @@ static void emit_loadreg(int r, int hr) //case HIREG: addr = &hi; break; //case LOREG: addr = &lo; break; case CCREG: addr = &cycle_count; break; - case CSREG: addr = &psxRegs.CP0.n.Status; break; + case CSREG: addr = &psxRegs.CP0.n.SR; break; case INVCP: addr = &invc_ptr; break; case ROREG: addr = &ram_offset; break; default: @@ -572,6 +572,11 @@ static void emit_addimm(u_int rs,int imm,u_int rt) else if(rs!=rt) emit_mov(rs,rt); } +static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt) +{ + emit_addimm(rs, imm, rt); +} + static void emit_addimm_and_set_flags(int imm,int rt) { assert(imm>-65536&&imm<65536); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 67ce02ada..6f9c91d9c 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -463,7 +463,7 @@ static void emit_loadreg(u_int r, u_int hr) //case HIREG: addr = &hi; break; //case LOREG: addr = &lo; break; case CCREG: addr = &cycle_count; break; - case CSREG: addr = &psxRegs.CP0.n.Status; break; + case CSREG: addr = &psxRegs.CP0.n.SR; break; case INVCP: addr = &invc_ptr; is64 = 1; break; case ROREG: addr = &ram_offset; is64 = 1; break; default: @@ -629,6 +629,11 @@ static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt) emit_addimm_s(0, 1, rs, imm, rt); } +static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt) +{ + emit_addimm64(rs, imm, rt); +} + static void emit_addimm_and_set_flags(int imm, u_int rt) { emit_addimm_s(1, 0, rt, imm, rt); diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index e89b635fb..89716fa0f 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -28,7 +28,7 @@ void pcsx_mtc0(u32 reg, u32 val) evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); MTC0(&psxRegs, reg, val); gen_interupt(&psxRegs.CP0); - if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.Status & 0x0300) // possible sw irq + if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.SR & 0x0300) // possible sw irq pending_exception = 1; } diff --git a/libpcsxcore/new_dynarec/events.c b/libpcsxcore/new_dynarec/events.c index 5d981f8d5..71aed6b2c 100644 --- a/libpcsxcore/new_dynarec/events.c +++ b/libpcsxcore/new_dynarec/events.c @@ -68,7 +68,7 @@ static void irq_test(psxCP0Regs *cp0) } } - if ((psxHu32(0x1070) & psxHu32(0x1074)) && (cp0->n.Status & 0x401) == 0x401) { + if ((psxHu32(0x1070) & psxHu32(0x1074)) && (cp0->n.SR & 0x401) == 0x401) { psxException(0x400, 0, cp0); pending_exception = 1; } diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index f97b2d059..7a6d2edd8 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -292,7 +292,7 @@ FUNCTION(jump_break): b call_psxException FUNCTION(jump_syscall_ds): mov r0, #0x20 - mov r1, #1 + mov r1, #2 b call_psxException FUNCTION(jump_syscall): mov r0, #0x20 diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 72d13f3d8..bc5f1151e 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -150,7 +150,7 @@ FUNCTION(jump_break): b call_psxException FUNCTION(jump_syscall_ds): mov w0, #0x20 - mov w1, #1 + mov w1, #2 b call_psxException FUNCTION(jump_syscall): mov w0, #0x20 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 17f7af95d..f59764628 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -784,7 +784,7 @@ static void noinline *get_addr(u_int vaddr, int can_compile) return ndrc_get_addr_ht(vaddr); // generate an address error - psxRegs.CP0.n.Status |= 2; + psxRegs.CP0.n.SR |= 2; psxRegs.CP0.n.Cause = (vaddr<<31) | (4<<2); psxRegs.CP0.n.EPC = (vaddr&1) ? vaddr-5 : vaddr; psxRegs.CP0.n.BadVAddr = vaddr & ~1; @@ -3559,11 +3559,11 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) assert(dops[i].opcode2==0x10); //if((source[i]&0x3f)==0x10) // RFE { - emit_readword(&psxRegs.CP0.n.Status,0); + emit_readword(&psxRegs.CP0.n.SR,0); emit_andimm(0,0x3c,1); emit_andimm(0,~0xf,0); emit_orrshr_imm(1,2,0); - emit_writeword(0,&psxRegs.CP0.n.Status); + emit_writeword(0,&psxRegs.CP0.n.SR); } } } @@ -4132,6 +4132,7 @@ static void call_c_cpu_handler(int i, const struct regstat *i_regs, int ccadj_, emit_addimm(HOST_CCREG,ccadj_,HOST_CCREG); emit_add(2,HOST_CCREG,2); emit_writeword(2,&psxRegs.cycle); + emit_addimm_ptr(FP,(u_char *)&psxRegs - (u_char *)&dynarec_local,0); emit_far_call(func); emit_far_jump(jump_to_new_pc); } @@ -4149,9 +4150,14 @@ static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_far_jump(func); } +static void hlecall_bad() +{ + SysPrintf("bad hlecall\n"); +} + static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_) { - void *hlefunc = gteNULL; + void *hlefunc = hlecall_bad; uint32_t hleCode = source[i] & 0x03ffffff; if (hleCode < ARRAY_SIZE(psxHLEt)) hlefunc = psxHLEt[hleCode]; diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index 694b8d089..190f8fc7b 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -67,7 +67,7 @@ static u32 read_mem_dummy(u32 addr) static void write_mem_dummy(u32 data) { - if (!(psxRegs.CP0.n.Status & (1 << 16))) + if (!(psxRegs.CP0.n.SR & (1 << 16))) memprintf("unmapped w %08x, %08x @%08x %u\n", address, data, psxRegs.pc, psxRegs.cycle); } diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index d31465cf2..13a7197f7 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -1949,7 +1949,7 @@ void psxBios_StartPAD() { // 13 #endif pad_stopped = 0; psxHwWrite16(0x1f801074, (unsigned short)(psxHwRead16(0x1f801074) | 0x1)); - psxRegs.CP0.n.Status |= 0x401; + psxRegs.CP0.n.SR |= 0x401; pc0 = ra; } @@ -1976,7 +1976,7 @@ void psxBios_PAD_init() { // 15 psxHwWrite16(0x1f801074, (u16)(psxHwRead16(0x1f801074) | 0x1)); pad_buf = (int *)Ra1; *pad_buf = -1; - psxRegs.CP0.n.Status |= 0x401; + psxRegs.CP0.n.SR |= 0x401; v0 = 2; pc0 = ra; } @@ -1996,8 +1996,7 @@ void psxBios_ReturnFromException() { // 17 k0 = interrupt_r26; if (psxRegs.CP0.n.Cause & 0x80000000) pc0 += 4; - psxRegs.CP0.n.Status = (psxRegs.CP0.n.Status & 0xfffffff0) | - ((psxRegs.CP0.n.Status & 0x3c) >> 2); + psxRegs.CP0.n.SR = (psxRegs.CP0.n.SR & ~0x0f) | ((psxRegs.CP0.n.SR & 0x3c) >> 2); } void psxBios_ResetEntryInt() { // 18 @@ -2698,7 +2697,7 @@ void psxBios_ChangeClearRCnt() { // 0a v0 = *ptr; *ptr = a1; -// psxRegs.CP0.n.Status|= 0x404; +// psxRegs.CP0.n.SR|= 0x404; pc0 = ra; } @@ -3301,12 +3300,12 @@ void psxBiosException() { switch (a0) { case 1: // EnterCritical - disable irq's /* Fixes Medievil 2 not loading up new game, Digimon World not booting up and possibly others */ - v0 = (psxRegs.CP0.n.Status & 0x404) == 0x404; - psxRegs.CP0.n.Status &= ~0x404; + v0 = (psxRegs.CP0.n.SR & 0x404) == 0x404; + psxRegs.CP0.n.SR &= ~0x404; break; case 2: // ExitCritical - enable irq's - psxRegs.CP0.n.Status |= 0x404; + psxRegs.CP0.n.SR |= 0x404; break; /* Normally this should cover SYS(00h, SYS(04h but they don't do anything relevant so... */ default: @@ -3314,8 +3313,7 @@ void psxBiosException() { } pc0 = psxRegs.CP0.n.EPC + 4; - psxRegs.CP0.n.Status = (psxRegs.CP0.n.Status & 0xfffffff0) | - ((psxRegs.CP0.n.Status & 0x3c) >> 2); + psxRegs.CP0.n.SR = (psxRegs.CP0.n.SR & ~0x0f) | ((psxRegs.CP0.n.SR & 0x3c) >> 2); return; default: @@ -3328,8 +3326,7 @@ void psxBiosException() { pc0 = psxRegs.CP0.n.EPC; if (psxRegs.CP0.n.Cause & 0x80000000) pc0+=4; - psxRegs.CP0.n.Status = (psxRegs.CP0.n.Status & 0xfffffff0) | - ((psxRegs.CP0.n.Status & 0x3c) >> 2); + psxRegs.CP0.n.SR = (psxRegs.CP0.n.SR & ~0x0f) | ((psxRegs.CP0.n.SR & 0x3c) >> 2); } #define bfreeze(ptr, size) { \ diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index 92e69eeef..4c78255e9 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -141,6 +141,7 @@ typedef struct { boolean UseNet; boolean icache_emulation; boolean DisableStalls; + boolean PreciseExceptions; int GpuListWalking; int cycle_multiplier; // 100 for 1.0 int cycle_multiplier_override; diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index b9e1dbc19..f3bf7b6ca 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -34,12 +34,9 @@ // these may cause issues: because of poor timing we may step // on instructions that real hardware would never reach #define DO_EXCEPTION_RESERVEDI -#define DO_EXCEPTION_ALIGNMENT_BRANCH -//#define DO_EXCEPTION_ALIGNMENT_DATA #define HANDLE_LOAD_DELAY -static int branch = 0; -static int branch2 = 0; +static int branchSeen = 0; #ifdef __i386__ #define INT_ATTR __attribute__((regparm(2))) @@ -112,9 +109,54 @@ static void dloadClear(psxRegisters *regs) static void intException(psxRegisters *regs, u32 pc, u32 cause) { + if (cause != 0x20) { + //FILE *f = fopen("/tmp/psx_ram.bin", "wb"); + //fwrite(psxM, 1, 0x200000, f); fclose(f); + log_unhandled("exception %08x @%08x\n", cause, pc); + } dloadFlush(regs); regs->pc = pc; - psxException(cause, branch, ®s->CP0); + psxException(cause, regs->branching, ®s->CP0); + regs->branching = R3000A_BRANCH_NONE_OR_EXCEPTION; +} + +// exception caused by current instruction (excluding unkasking) +static void intExceptionInsn(psxRegisters *regs, u32 cause) +{ + cause |= (regs->code & 0x0c000000) << 2; + intException(regs, regs->pc - 4, cause); +} + +// 29 Enable for 80000000-ffffffff +// 30 Enable for 00000000-7fffffff +// 31 Enable exception +#define DBR_ABIT(dc, a) ((dc) & (1u << (29+(((a)>>31)^1)))) +#define DBR_EN_EXEC(dc, a) (((dc) & 0x01800000) == 0x01800000 && DBR_ABIT(dc, a)) +#define DBR_EN_LD(dc, a) (((dc) & 0x06800000) == 0x06800000 && DBR_ABIT(dc, a)) +#define DBR_EN_ST(dc, a) (((dc) & 0x0a800000) == 0x0a800000 && DBR_ABIT(dc, a)) +static void intExceptionDebugBp(psxRegisters *regs, u32 pc) +{ + psxCP0Regs *cp0 = ®s->CP0; + dloadFlush(regs); + cp0->n.Cause &= 0x300; + cp0->n.Cause |= (regs->branching << 30) | (R3000E_Bp << 2); + cp0->n.SR = (cp0->n.SR & ~0x3f) | ((cp0->n.SR & 0x0f) << 2); + cp0->n.EPC = regs->branching ? pc - 4 : pc; + psxRegs.pc = 0x80000040; +} + +static int execBreakCheck(psxRegisters *regs, u32 pc) +{ + if (unlikely(DBR_EN_EXEC(regs->CP0.n.DCIC, pc) && + ((pc ^ regs->CP0.n.BPC) & regs->CP0.n.BPCM) == 0)) + { + regs->CP0.n.DCIC |= 0x03; + if (regs->CP0.n.DCIC & (1u << 31)) { + intExceptionDebugBp(regs, pc); + return 1; + } + } + return 0; } // get an opcode without triggering exceptions or affecting cache @@ -191,12 +233,12 @@ static u32 (INT_ATTR *fetch)(psxRegisters *regs_, u8 **memRLUT, u32 pc) = fetchN // Make the timing events trigger faster as we are currently assuming everything // takes one cycle, which is not the case on real hardware. // FIXME: count cache misses, memory latencies, stalls to get rid of this -static inline void addCycle(void) +static inline void addCycle(psxRegisters *regs) { - assert(psxRegs.subCycleStep >= 0x10000); - psxRegs.subCycle += psxRegs.subCycleStep; - psxRegs.cycle += psxRegs.subCycle >> 16; - psxRegs.subCycle &= 0xffff; + assert(regs->subCycleStep >= 0x10000); + regs->subCycle += regs->subCycleStep; + regs->cycle += regs->subCycle >> 16; + regs->subCycle &= 0xffff; } /**** R3000A Instruction Macros ****/ @@ -343,7 +385,7 @@ static void psxDoDelayBranch(psxRegisters *regs, u32 tar1, u32 code1) { */ for (lim = 0; lim < 8; lim++) { regs->code = code = fetch(regs, psxMemRLUT, tar1); - addCycle(); + addCycle(regs); if (likely(!isBranch(code))) { dloadStep(regs); psxBSC[code >> 26](regs, code); @@ -359,23 +401,26 @@ static void psxDoDelayBranch(psxRegisters *regs, u32 tar1, u32 code1) { SysPrintf("Evil chained DS branches @ %08x %08x %08x\n", regs->pc, tar1, tar2); } -static void doBranch(psxRegisters *regs, u32 tar) { - u32 code, pc; +static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) { + u32 code, pc, pc_final; - branch2 = branch = 1; + branchSeen = regs->branching = taken; + pc_final = taken == R3000A_BRANCH_TAKEN ? tar : regs->pc + 4; // fetch the delay slot pc = regs->pc; regs->pc = pc + 4; regs->code = code = fetch(regs, psxMemRLUT, pc); - addCycle(); + addCycle(regs); // check for branch in delay slot if (unlikely(isBranch(code))) { - psxDoDelayBranch(regs, tar, code); + regs->pc = pc; + if (taken == R3000A_BRANCH_TAKEN) + psxDoDelayBranch(regs, tar, code); log_unhandled("branch in DS: %08x->%08x\n", pc, regs->pc); - branch = 0; + regs->branching = 0; psxBranchTest(); return; } @@ -383,24 +428,30 @@ static void doBranch(psxRegisters *regs, u32 tar) { dloadStep(regs); psxBSC[code >> 26](regs, code); - branch = 0; - regs->pc = tar; + if (likely(regs->branching != R3000A_BRANCH_NONE_OR_EXCEPTION)) + regs->pc = pc_final; + else + regs->CP0.n.Target = pc_final; + regs->branching = 0; psxBranchTest(); } static void doBranchReg(psxRegisters *regs, u32 tar) { -#ifdef DO_EXCEPTION_ALIGNMENT_BRANCH + doBranch(regs, tar & ~3, R3000A_BRANCH_TAKEN); +} + +static void doBranchRegE(psxRegisters *regs, u32 tar) { + if (unlikely(DBR_EN_EXEC(regs->CP0.n.DCIC, tar) && + ((tar ^ regs->CP0.n.BPC) & regs->CP0.n.BPCM) == 0)) + regs->CP0.n.DCIC |= 0x03; if (unlikely(tar & 3)) { SysPrintf("game crash @%08x, ra=%08x\n", tar, regs->GPR.n.ra); - psxRegs.CP0.n.BadVAddr = tar; + regs->CP0.n.BadVAddr = tar; intException(regs, tar, R3000E_AdEL << 2); return; } -#else - tar &= ~3; -#endif - doBranch(regs, tar); + doBranch(regs, tar, R3000A_BRANCH_TAKEN); } #if __has_builtin(__builtin_add_overflow) || (defined(__GNUC__) && __GNUC__ >= 5) @@ -415,7 +466,7 @@ static void addExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) { s32 val; if (add_overflow(a1, a2, val)) { //printf("ov %08x + %08x = %08x\n", a1, a2, val); - intException(regs, regs->pc - 4, R3000E_Ov << 2); + intExceptionInsn(regs, R3000E_Ov << 2); return; } dloadRt(regs, rt, val); @@ -424,7 +475,7 @@ static void addExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) { static void subExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) { s32 val; if (sub_overflow(a1, a2, val)) { - intException(regs, regs->pc - 4, R3000E_Ov << 2); + intExceptionInsn(regs, R3000E_Ov << 2); return; } dloadRt(regs, rt, val); @@ -538,14 +589,14 @@ OP(psxMULTU_stall) { * Register branch logic * * Format: OP rs, offset * *********************************************************/ +#define BrCond(c) (c) ? R3000A_BRANCH_TAKEN : R3000A_BRANCH_NOT_TAKEN #define RepZBranchi32(op) \ - if(_i32(_rRs_) op 0) \ - doBranch(regs_, _BranchTarget_); + doBranch(regs_, _BranchTarget_, BrCond(_i32(_rRs_) op 0)); #define RepZBranchLinki32(op) { \ s32 temp = _i32(_rRs_); \ + dloadFlush(regs_); \ _SetLink(31); \ - if(temp op 0) \ - doBranch(regs_, _BranchTarget_); \ + doBranch(regs_, _BranchTarget_, BrCond(temp op 0)); \ } OP(psxBGEZ) { RepZBranchi32(>=) } // Branch if Rs >= 0 @@ -608,18 +659,18 @@ OP(psxMTLO) { _rLo_ = _rRs_; } // Lo = Rs * Format: OP * *********************************************************/ OP(psxBREAK) { - intException(regs_, regs_->pc - 4, R3000E_Bp << 2); + intExceptionInsn(regs_, R3000E_Bp << 2); } OP(psxSYSCALL) { - intException(regs_, regs_->pc - 4, R3000E_Syscall << 2); + intExceptionInsn(regs_, R3000E_Syscall << 2); } static inline void execI_(u8 **memRLUT, psxRegisters *regs_); static inline void psxTestSWInts(psxRegisters *regs_, int step) { - if (regs_->CP0.n.Cause & regs_->CP0.n.Status & 0x0300 && - regs_->CP0.n.Status & 0x1) { + if ((regs_->CP0.n.Cause & regs_->CP0.n.SR & 0x0300) && + (regs_->CP0.n.SR & 0x1)) { if (step) execI_(psxMemRLUT, regs_); regs_->CP0.n.Cause &= ~0x7c; @@ -628,9 +679,7 @@ static inline void psxTestSWInts(psxRegisters *regs_, int step) { } OP(psxRFE) { -// SysPrintf("psxRFE\n"); - regs_->CP0.n.Status = (regs_->CP0.n.Status & 0xfffffff0) | - ((regs_->CP0.n.Status & 0x3c) >> 2); + regs_->CP0.n.SR = (regs_->CP0.n.SR & ~0x0f) | ((regs_->CP0.n.SR & 0x3c) >> 2); psxTestSWInts(regs_, 0); } @@ -638,10 +687,8 @@ OP(psxRFE) { * Register branch logic * * Format: OP rs, rt, offset * *********************************************************/ -#define RepBranchi32(op) { \ - if (_i32(_rRs_) op _i32(_rRt_)) \ - doBranch(regs_, _BranchTarget_); \ -} +#define RepBranchi32(op) \ + doBranch(regs_, _BranchTarget_, BrCond(_i32(_rRs_) op _i32(_rRt_))); OP(psxBEQ) { RepBranchi32(==) } // Branch if Rs == Rt OP(psxBNE) { RepBranchi32(!=) } // Branch if Rs != Rt @@ -650,8 +697,12 @@ OP(psxBNE) { RepBranchi32(!=) } // Branch if Rs != Rt * Jump to target * * Format: OP target * *********************************************************/ -OP(psxJ) { doBranch(regs_, _JumpTarget_); } -OP(psxJAL) { _SetLink(31); doBranch(regs_, _JumpTarget_); } +OP(psxJ) { doBranch(regs_, _JumpTarget_, R3000A_BRANCH_TAKEN); } +OP(psxJAL) { + dloadFlush(regs_); + _SetLink(31); + doBranch(regs_, _JumpTarget_, R3000A_BRANCH_TAKEN); +} /********************************************************* * Register jump * @@ -662,37 +713,76 @@ OP(psxJR) { psxJumpTest(); } +OP(psxJRe) { + doBranchRegE(regs_, _rRs_); + psxJumpTest(); +} + OP(psxJALR) { u32 temp = _u32(_rRs_); + dloadFlush(regs_); if (_Rd_) { _SetLink(_Rd_); } doBranchReg(regs_, temp); } +OP(psxJALRe) { + u32 temp = _u32(_rRs_); + dloadFlush(regs_); + if (_Rd_) { _SetLink(_Rd_); } + doBranchRegE(regs_, temp); +} + /********************************************************* -* Load and store for GPR * -* Format: OP rt, offset(base) * *********************************************************/ -static int algnChkL(psxRegisters *regs, u32 addr, u32 m) { +// revisit: incomplete +#define BUS_LOCKED_ADDR(a) \ + ((0x1fc80000u <= (a) && (a) < 0x80000000u) || \ + (0xc0000000u <= (a) && (a) < 0xfffe0000u)) + +// exception checking order is important +static inline int checkLD(psxRegisters *regs, u32 addr, u32 m) { + int bpException = 0; + if (unlikely(DBR_EN_LD(regs->CP0.n.DCIC, addr) && + ((addr ^ regs->CP0.n.BDA) & regs->CP0.n.BDAM) == 0)) { + regs->CP0.n.DCIC |= 0x0d; + bpException = regs->CP0.n.DCIC >> 31; + } if (unlikely(addr & m)) { - log_unhandled("unaligned load %08x @%08x\n", addr, regs->pc - 4); -#ifdef DO_EXCEPTION_ALIGNMENT_DATA - psxRegs.CP0.n.BadVAddr = addr; - intException(regs, regs->pc - 4, R3000E_AdEL << 2); + regs->CP0.n.BadVAddr = addr; + intExceptionInsn(regs, R3000E_AdEL << 2); + return 0; + } + if (unlikely(bpException)) { + intExceptionDebugBp(regs, regs->pc - 4); + return 0; + } + if (unlikely(BUS_LOCKED_ADDR(addr))) { + intException(regs, regs->pc - 4, R3000E_DBE << 2); return 0; -#endif } return 1; } -static int algnChkS(psxRegisters *regs, u32 addr, u32 m) { +static inline int checkST(psxRegisters *regs, u32 addr, u32 m) { + int bpException = 0; + if (unlikely(DBR_EN_ST(regs->CP0.n.DCIC, addr) && + ((addr ^ regs->CP0.n.BDA) & regs->CP0.n.BDAM) == 0)) { + regs->CP0.n.DCIC |= 0x15; + bpException = regs->CP0.n.DCIC >> 31; + } if (unlikely(addr & m)) { - log_unhandled("unaligned store %08x @%08x\n", addr, regs->pc - 4); -#ifdef DO_EXCEPTION_ALIGNMENT_DATA - psxRegs.CP0.n.BadVAddr = addr; - intException(regs, regs->pc - 4, R3000E_AdES << 2); + regs->CP0.n.BadVAddr = addr; + intExceptionInsn(regs, R3000E_AdES << 2); + return 0; + } + if (unlikely(bpException)) { + intExceptionDebugBp(regs, regs->pc - 4); + return 0; + } + if (unlikely(BUS_LOCKED_ADDR(addr))) { + intException(regs, regs->pc - 4, R3000E_DBE << 2); return 0; -#endif } return 1; } @@ -702,30 +792,40 @@ static int algnChkS(psxRegisters *regs, u32 addr, u32 m) { * Format: OP rt, offset(base) * *********************************************************/ +/********************************************************* +* Load and store for GPR * +* Format: OP rt, offset(base) * +*********************************************************/ + #define _oB_ (regs_->GPR.r[_Rs_] + _Imm_) -OP(psxLB) { doLoad(regs_, _Rt_, (s8)psxMemRead8(_oB_)); } -OP(psxLBU) { doLoad(regs_, _Rt_, psxMemRead8(_oB_)); } -OP(psxLH) { if (algnChkL(regs_, _oB_, 1)) doLoad(regs_, _Rt_, (s16)psxMemRead16(_oB_)); } -OP(psxLHU) { if (algnChkL(regs_, _oB_, 1)) doLoad(regs_, _Rt_, psxMemRead16(_oB_)); } -OP(psxLW) { if (algnChkL(regs_, _oB_, 3)) doLoad(regs_, _Rt_, psxMemRead32(_oB_)); } +OP(psxLB) { doLoad(regs_, _Rt_, (s8)psxMemRead8(_oB_)); } +OP(psxLBU) { doLoad(regs_, _Rt_, psxMemRead8(_oB_)); } +OP(psxLH) { doLoad(regs_, _Rt_, (s16)psxMemRead16(_oB_ & ~1)); } +OP(psxLHU) { doLoad(regs_, _Rt_, psxMemRead16(_oB_ & ~1)); } +OP(psxLW) { doLoad(regs_, _Rt_, psxMemRead32(_oB_ & ~3)); } -OP(psxLWL) { +OP(psxLBe) { if (checkLD(regs_, _oB_, 0)) doLoad(regs_, _Rt_, (s8)psxMemRead8(_oB_)); } +OP(psxLBUe) { if (checkLD(regs_, _oB_, 0)) doLoad(regs_, _Rt_, psxMemRead8(_oB_)); } +OP(psxLHe) { if (checkLD(regs_, _oB_, 1)) doLoad(regs_, _Rt_, (s16)psxMemRead16(_oB_)); } +OP(psxLHUe) { if (checkLD(regs_, _oB_, 1)) doLoad(regs_, _Rt_, psxMemRead16(_oB_)); } +OP(psxLWe) { if (checkLD(regs_, _oB_, 3)) doLoad(regs_, _Rt_, psxMemRead32(_oB_)); } + +static void doLWL(psxRegisters *regs, u32 rt, u32 addr) { static const u32 LWL_MASK[4] = { 0xffffff, 0xffff, 0xff, 0 }; static const u32 LWL_SHIFT[4] = { 24, 16, 8, 0 }; - u32 addr = _oB_, val; u32 shift = addr & 3; - u32 mem = psxMemRead32(addr & ~3); - u32 rt = _Rt_; - u32 oldval = regs_->GPR.r[rt]; + u32 val, mem; + u32 oldval = regs->GPR.r[rt]; #ifdef HANDLE_LOAD_DELAY - int sel = regs_->dloadSel; - if (regs_->dloadReg[sel] == rt) - oldval = regs_->dloadVal[sel]; + int sel = regs->dloadSel; + if (regs->dloadReg[sel] == rt) + oldval = regs->dloadVal[sel]; #endif + mem = psxMemRead32(addr & ~3); val = (oldval & LWL_MASK[shift]) | (mem << LWL_SHIFT[shift]); - doLoad(regs_, rt, val); + doLoad(regs, rt, val); /* Mem = 1234. Reg = abcd @@ -737,22 +837,21 @@ OP(psxLWL) { */ } -OP(psxLWR) { +static void doLWR(psxRegisters *regs, u32 rt, u32 addr) { static const u32 LWR_MASK[4] = { 0, 0xff000000, 0xffff0000, 0xffffff00 }; static const u32 LWR_SHIFT[4] = { 0, 8, 16, 24 }; - u32 addr = _oB_, val; u32 shift = addr & 3; - u32 mem = psxMemRead32(addr & ~3); - u32 rt = _Rt_; - u32 oldval = regs_->GPR.r[rt]; + u32 val, mem; + u32 oldval = regs->GPR.r[rt]; #ifdef HANDLE_LOAD_DELAY - int sel = regs_->dloadSel; - if (regs_->dloadReg[sel] == rt) - oldval = regs_->dloadVal[sel]; + int sel = regs->dloadSel; + if (regs->dloadReg[sel] == rt) + oldval = regs->dloadVal[sel]; #endif + mem = psxMemRead32(addr & ~3); val = (oldval & LWR_MASK[shift]) | (mem >> LWR_SHIFT[shift]); - doLoad(regs_, rt, val); + doLoad(regs, rt, val); /* Mem = 1234. Reg = abcd @@ -764,20 +863,30 @@ OP(psxLWR) { */ } -OP(psxSB) { psxMemWrite8 (_oB_, _rRt_ & 0xff); } -OP(psxSH) { if (algnChkS(regs_, _oB_, 1)) psxMemWrite16(_oB_, _rRt_ & 0xffff); } -OP(psxSW) { if (algnChkS(regs_, _oB_, 3)) psxMemWrite32(_oB_, _rRt_); } +OP(psxLWL) { doLWL(regs_, _Rt_, _oB_); } +OP(psxLWR) { doLWR(regs_, _Rt_, _oB_); } -// FIXME: this rmw implementation is wrong and would break on io like fifos -OP(psxSWL) { - static const u32 SWL_MASK[4] = { 0xffffff00, 0xffff0000, 0xff000000, 0 }; - static const u32 SWL_SHIFT[4] = { 24, 16, 8, 0 }; - u32 addr = _oB_; - u32 shift = addr & 3; - u32 mem = psxMemRead32(addr & ~3); +OP(psxLWLe) { if (checkLD(regs_, _oB_ & ~3, 0)) doLWL(regs_, _Rt_, _oB_); } +OP(psxLWRe) { if (checkLD(regs_, _oB_ , 0)) doLWR(regs_, _Rt_, _oB_); } - psxMemWrite32(addr & ~3, (_u32(_rRt_) >> SWL_SHIFT[shift]) | - ( mem & SWL_MASK[shift]) ); +OP(psxSB) { psxMemWrite8 (_oB_, _rRt_ & 0xff); } +OP(psxSH) { psxMemWrite16(_oB_, _rRt_ & 0xffff); } +OP(psxSW) { psxMemWrite32(_oB_, _rRt_); } + +OP(psxSBe) { if (checkST(regs_, _oB_, 0)) psxMemWrite8 (_oB_, _rRt_ & 0xff); } +OP(psxSHe) { if (checkST(regs_, _oB_, 1)) psxMemWrite16(_oB_, _rRt_ & 0xffff); } +OP(psxSWe) { if (checkST(regs_, _oB_, 3)) psxMemWrite32(_oB_, _rRt_); } + +static void doSWL(psxRegisters *regs, u32 rt, u32 addr) { + u32 val = regs->GPR.r[rt]; + switch (addr & 3) { + case 0: psxMemWrite8( addr , val >> 24); break; + case 1: psxMemWrite16(addr & ~3, val >> 16); break; + case 2: // revisit: should be a single 24bit write + psxMemWrite16(addr & ~3, (val >> 8) & 0xffff); + psxMemWrite8( addr , val >> 24); break; + case 3: psxMemWrite32(addr & ~3, val); break; + } /* Mem = 1234. Reg = abcd @@ -788,15 +897,16 @@ OP(psxSWL) { */ } -OP(psxSWR) { - static const u32 SWR_MASK[4] = { 0, 0xff, 0xffff, 0xffffff }; - static const u32 SWR_SHIFT[4] = { 0, 8, 16, 24 }; - u32 addr = _oB_; - u32 shift = addr & 3; - u32 mem = psxMemRead32(addr & ~3); - - psxMemWrite32(addr & ~3, (_u32(_rRt_) << SWR_SHIFT[shift]) | - ( mem & SWR_MASK[shift]) ); +static void doSWR(psxRegisters *regs, u32 rt, u32 addr) { + u32 val = regs->GPR.r[rt]; + switch (addr & 3) { + case 0: psxMemWrite32(addr , val); break; + case 1: // revisit: should be a single 24bit write + psxMemWrite8 (addr , val & 0xff); + psxMemWrite16(addr + 1, (val >> 8) & 0xffff); break; + case 2: psxMemWrite16(addr , val & 0xffff); break; + case 3: psxMemWrite8 (addr , val & 0xff); break; + } /* Mem = 1234. Reg = abcd @@ -808,6 +918,12 @@ OP(psxSWR) { */ } +OP(psxSWL) { doSWL(regs_, _Rt_, _oB_); } +OP(psxSWR) { doSWR(regs_, _Rt_, _oB_); } + +OP(psxSWLe) { if (checkST(regs_, _oB_ & ~3, 0)) doSWL(regs_, _Rt_, _oB_); } +OP(psxSWRe) { if (checkST(regs_, _oB_ , 0)) doSWR(regs_, _Rt_, _oB_); } + /********************************************************* * Moves between GPR and COPx * * Format: OP rt, fs * @@ -815,25 +931,25 @@ OP(psxSWR) { OP(psxMFC0) { u32 r = _Rd_; #ifdef DO_EXCEPTION_RESERVEDI - if (unlikely(r == 0)) - intException(regs_, regs_->pc - 4, R3000E_RI << 2); + if (unlikely(0x00000417u & (1u << r))) + intExceptionInsn(regs_, R3000E_RI << 2); #endif doLoad(regs_, _Rt_, regs_->CP0.r[r]); } -OP(psxCFC0) { doLoad(regs_, _Rt_, regs_->CP0.r[_Rd_]); } - static void setupCop(u32 sr); +OP(psxCFC0) { doLoad(regs_, _Rt_, regs_->CP0.r[_Rd_]); } + void MTC0(psxRegisters *regs_, int reg, u32 val) { // SysPrintf("MTC0 %d: %x\n", reg, val); switch (reg) { - case 12: // Status - if (unlikely((regs_->CP0.n.Status ^ val) & (1 << 16))) + case 12: // SR + if (unlikely((regs_->CP0.n.SR ^ val) & (1 << 16))) psxMemOnIsolate((val >> 16) & 1); - if (unlikely((regs_->CP0.n.Status ^ val) & (7 << 29))) + if (unlikely((regs_->CP0.n.SR ^ val) & (7 << 29))) setupCop(val); - regs_->CP0.n.Status = val; + regs_->CP0.n.SR = val; psxTestSWInts(regs_, 1); break; @@ -843,6 +959,10 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) { psxTestSWInts(regs_, 0); break; + case 7: + if ((regs_->CP0.n.DCIC ^ val) & 0xff800000) + log_unhandled("DCIC: %08x->%08x\n", regs_->CP0.n.DCIC, val); + // fallthrough default: regs_->CP0.r[reg] = val; break; @@ -852,23 +972,26 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) { OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); } OP(psxCTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); } +// no exception +static inline void psxNULLne(psxRegisters *regs) { + log_unhandled("unhandled op %08x @%08x\n", regs->code, regs->pc - 4); +} + /********************************************************* * Unknown instruction (would generate an exception) * * Format: ? * *********************************************************/ -static inline void psxNULL_(void) { - //printf("op %08x @%08x\n", psxRegs.code, psxRegs.pc); -} OP(psxNULL) { - psxNULL_(); + psxNULLne(regs_); #ifdef DO_EXCEPTION_RESERVEDI - intException(regs_, regs_->pc - 4, R3000E_RI << 2); + intExceptionInsn(regs_, R3000E_RI << 2); #endif } void gteNULL(struct psxCP2Regs *regs) { - psxNULL_(); + psxRegisters *regs_ = (psxRegisters *)((u8 *)regs - offsetof(psxRegisters, CP2)); + psxNULLne(regs_); } OP(psxSPECIAL) { @@ -882,25 +1005,16 @@ OP(psxCOP0) { case 0x04: psxMTC0(regs_, code); break; case 0x06: psxCTC0(regs_, code); break; case 0x10: psxRFE(regs_, code); break; - default: psxNULL_(); break; + default: psxNULLne(regs_); break; } } -OP(psxLWC0) { - // MTC0(regs_, _Rt_, psxMemRead32(_oB_)); // ? - log_unhandled("LWC0 %08x\n", code); -} - OP(psxCOP1) { // ??? what actually happens here? + log_unhandled("COP1 %08x @%08x\n", code, regs_->pc - 4); } -OP(psxCOP1d) { -#ifdef DO_EXCEPTION_RESERVEDI - intException(regs_, regs_->pc - 4, (1<<28) | (R3000E_RI << 2)); -#endif -} - +// TODO: wrong COP2 decoding OP(psxCOP2) { psxCP2[_Funct_](®s_->CP2); } @@ -911,12 +1025,6 @@ OP(psxCOP2_stall) { psxCP2[f](®s_->CP2); } -OP(psxCOP2d) { -#ifdef DO_EXCEPTION_RESERVEDI - intException(regs_, regs_->pc - 4, (2<<28) | (R3000E_RI << 2)); -#endif -} - OP(gteMFC2) { doLoad(regs_, _Rt_, MFC2(®s_->CP2, _Rd_)); } @@ -937,50 +1045,67 @@ OP(gteLWC2) { MTC2(®s_->CP2, psxMemRead32(_oB_), _Rt_); } -OP(gteSWC2) { - psxMemWrite32(_oB_, MFC2(®s_->CP2, _Rt_)); -} - OP(gteLWC2_stall) { gteCheckStall(0); gteLWC2(regs_, code); } +OP(gteLWC2e_stall) { + gteCheckStall(0); + if (checkLD(regs_, _oB_, 3)) + MTC2(®s_->CP2, psxMemRead32(_oB_), _Rt_); +} + +OP(gteSWC2) { + psxMemWrite32(_oB_, MFC2(®s_->CP2, _Rt_)); +} + OP(gteSWC2_stall) { gteCheckStall(0); gteSWC2(regs_, code); } +OP(gteSWC2e_stall) { + gteCheckStall(0); + if (checkST(regs_, _oB_, 3)) + gteSWC2(regs_, code); +} + OP(psxCOP3) { // ??? what actually happens here? + log_unhandled("COP3 %08x @%08x\n", code, regs_->pc - 4); } -OP(psxCOP3d) { +OP(psxCOPd) { + log_unhandled("disabled cop%d @%08x\n", (code >> 26) & 3, regs_->pc - 4); #ifdef DO_EXCEPTION_RESERVEDI - intException(regs_, regs_->pc - 4, (3<<28) | (R3000E_RI << 2)); + intExceptionInsn(regs_, R3000E_CpU << 2); #endif } OP(psxLWCx) { - // does this read memory? - log_unhandled("LWCx %08x\n", code); + log_unhandled("LWCx %08x @%08x\n", code, regs_->pc - 4); + checkLD(regs_, _oB_, 3); } OP(psxSWCx) { // does this write something to memory? - log_unhandled("SWCx %08x\n", code); + log_unhandled("SWCx %08x @%08x\n", code, regs_->pc - 4); + checkST(regs_, _oB_, 3); } static void psxBASIC(struct psxCP2Regs *cp2regs) { - psxRegisters *regs_ = (void *)((char *)cp2regs - offsetof(psxRegisters, CP2)); - u32 code = regs_->code; - assert(regs_ == &psxRegs); + psxRegisters *regs = (void *)((u8 *)cp2regs - offsetof(psxRegisters, CP2)); + u32 code = regs->code; + assert(regs == &psxRegs); switch (_Rs_) { - case 0x00: gteMFC2(regs_, code); break; - case 0x02: gteCFC2(regs_, code); break; - case 0x04: gteMTC2(regs_, code); break; - case 0x06: gteCTC2(regs_, code); break; - default: psxNULL_(); break; + case 0x00: gteMFC2(regs, code); break; + case 0x02: gteCFC2(regs, code); break; + case 0x04: gteMTC2(regs, code); break; + case 0x06: gteCTC2(regs, code); break; + case 0x08: + case 0x0c: log_unhandled("BC2 %08x @%08x\n", code, regs->pc - 4); + default: psxNULLne(regs); break; } } @@ -1014,12 +1139,12 @@ OP(psxHLE) { static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = { psxSPECIAL, psxREGIMM, psxJ , psxJAL , psxBEQ , psxBNE , psxBLEZ, psxBGTZ, psxADDI , psxADDIU , psxSLTI, psxSLTIU, psxANDI, psxORI , psxXORI, psxLUI , - psxCOP0 , psxCOP1d , psxCOP2, psxCOP3d, psxNULL, psxCOP1d,psxCOP2d,psxCOP3d, - psxNULL , psxCOP1d , psxCOP2d,psxCOP3d, psxNULL, psxCOP1d,psxCOP2d,psxCOP3d, - psxLB , psxLH , psxLWL , psxLW , psxLBU , psxLHU , psxLWR , psxCOP3d, - psxSB , psxSH , psxSWL , psxSW , psxNULL, psxCOP1d,psxSWR , psxCOP3d, - psxLWC0 , psxLWCx , gteLWC2, psxLWCx , psxNULL, psxCOP1d,psxCOP2d,psxCOP3d, - psxSWCx , psxSWCx , gteSWC2, psxHLE , psxNULL, psxCOP1d,psxCOP2d,psxCOP3d, + psxCOP0 , psxCOPd , psxCOP2, psxCOPd, psxNULL, psxNULL, psxNULL, psxNULL, + psxNULL , psxNULL , psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, + psxLB , psxLH , psxLWL , psxLW , psxLBU , psxLHU , psxLWR , psxNULL, + psxSB , psxSH , psxSWL , psxSW , psxNULL, psxNULL, psxSWR , psxNULL, + psxLWCx , psxLWCx , gteLWC2, psxLWCx , psxNULL, psxNULL, psxNULL, psxNULL, + psxSWCx , psxSWCx , gteSWC2, psxHLE , psxNULL, psxNULL, psxNULL, psxNULL, }; static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code) = { @@ -1054,15 +1179,29 @@ static void intReset() { dloadClear(&psxRegs); } -static inline void execI_(u8 **memRLUT, psxRegisters *regs_) { - u32 pc = regs_->pc; - regs_->pc += 4; - regs_->code = fetch(regs_, memRLUT, pc); +static inline void execI_(u8 **memRLUT, psxRegisters *regs) { + u32 pc = regs->pc; - addCycle(); + addCycle(regs); + dloadStep(regs); - dloadStep(regs_); - psxBSC[regs_->code >> 26](regs_, regs_->code); + regs->pc += 4; + regs->code = fetch(regs, memRLUT, pc); + psxBSC[regs->code >> 26](regs, regs->code); +} + +static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { + u32 pc = regs->pc; + + addCycle(regs); + dloadStep(regs); + + if (execBreakCheck(regs, pc)) + return; + + regs->pc += 4; + regs->code = fetch(regs, memRLUT, pc); + psxBSC[regs->code >> 26](regs, regs->code); } static void intExecute() { @@ -1074,12 +1213,21 @@ static void intExecute() { execI_(memRLUT, regs_); } +static void intExecuteBp() { + psxRegisters *regs_ = &psxRegs; + u8 **memRLUT = psxMemRLUT; + extern int stop; + + while (!stop) + execIbp(memRLUT, regs_); +} + void intExecuteBlock(enum blockExecCaller caller) { psxRegisters *regs_ = &psxRegs; u8 **memRLUT = psxMemRLUT; - branch2 = 0; - while (!branch2) + branchSeen = 0; + while (!branchSeen) execI_(memRLUT, regs_); } @@ -1093,7 +1241,7 @@ static void intNotify(enum R3000Anote note, void *data) { break; case R3000ACPU_NOTIFY_AFTER_LOAD: dloadClear(&psxRegs); - setupCop(psxRegs.CP0.n.Status); + setupCop(psxRegs.CP0.n.SR); // fallthrough case R3000ACPU_NOTIFY_CACHE_ISOLATED: // Armored Core? memset(&ICache, 0xff, sizeof(ICache)); @@ -1108,22 +1256,20 @@ static void setupCop(u32 sr) if (sr & (1u << 29)) psxBSC[17] = psxCOP1; else - psxBSC[17] = psxCOP1d; + psxBSC[17] = psxCOPd; if (sr & (1u << 30)) psxBSC[18] = Config.DisableStalls ? psxCOP2 : psxCOP2_stall; else - psxBSC[18] = psxCOP2d; + psxBSC[18] = psxCOPd; if (sr & (1u << 31)) psxBSC[19] = psxCOP3; else - psxBSC[19] = psxCOP3d; + psxBSC[19] = psxCOPd; } void intApplyConfig() { int cycle_mult; - assert(psxBSC[50] == gteLWC2 || psxBSC[50] == gteLWC2_stall); - assert(psxBSC[58] == gteSWC2 || psxBSC[58] == gteSWC2_stall); assert(psxSPC[16] == psxMFHI || psxSPC[16] == psxMFHI_stall); assert(psxSPC[18] == psxMFLO || psxSPC[18] == psxMFLO_stall); assert(psxSPC[24] == psxMULT || psxSPC[24] == psxMULT_stall); @@ -1152,9 +1298,46 @@ void intApplyConfig() { psxSPC[26] = psxDIV_stall; psxSPC[27] = psxDIVU_stall; } - setupCop(psxRegs.CP0.n.Status); + setupCop(psxRegs.CP0.n.SR); + + if (Config.PreciseExceptions) { + psxBSC[0x20] = psxLBe; + psxBSC[0x21] = psxLHe; + psxBSC[0x22] = psxLWLe; + psxBSC[0x23] = psxLWe; + psxBSC[0x24] = psxLBUe; + psxBSC[0x25] = psxLHUe; + psxBSC[0x26] = psxLWRe; + psxBSC[0x28] = psxSBe; + psxBSC[0x29] = psxSHe; + psxBSC[0x2a] = psxSWLe; + psxBSC[0x2b] = psxSWe; + psxBSC[0x2e] = psxSWRe; + psxBSC[0x32] = gteLWC2e_stall; + psxBSC[0x3a] = gteSWC2e_stall; + psxSPC[0x08] = psxJRe; + psxSPC[0x09] = psxJALRe; + psxInt.Execute = intExecuteBp; + } else { + psxBSC[0x20] = psxLB; + psxBSC[0x21] = psxLH; + psxBSC[0x22] = psxLWL; + psxBSC[0x23] = psxLW; + psxBSC[0x24] = psxLBU; + psxBSC[0x25] = psxLHU; + psxBSC[0x26] = psxLWR; + psxBSC[0x28] = psxSB; + psxBSC[0x29] = psxSH; + psxBSC[0x2a] = psxSWL; + psxBSC[0x2b] = psxSW; + psxBSC[0x2e] = psxSWR; + // LWC2, SWC2 handled by Config.DisableStalls + psxSPC[0x08] = psxJR; + psxSPC[0x09] = psxJALR; + psxInt.Execute = intExecute; + } - // dynarec may occasionally call the interpreter, in such a case the + // the dynarec may occasionally call the interpreter, in such a case the // cache won't work (cache only works right if all fetches go through it) if (!Config.icache_emulation || psxCpu != &psxInt) fetch = fetchNoCache; @@ -1170,8 +1353,9 @@ static void intShutdown() { } // single step (may do several ops in case of a branch) -void execI() { - execI_(psxMemRLUT, &psxRegs); +void execI(psxRegisters *regs) { + execI_(psxMemRLUT, regs); + dloadFlush(regs); } R3000Acpu psxInt = { diff --git a/libpcsxcore/psxinterpreter.h b/libpcsxcore/psxinterpreter.h index 746c8fe86..2c3f3943b 100644 --- a/libpcsxcore/psxinterpreter.h +++ b/libpcsxcore/psxinterpreter.h @@ -5,7 +5,7 @@ u32 intFakeFetch(u32 pc); // called by "new_dynarec" -void execI(); +void execI(psxRegisters *regs); void intApplyConfig(); void MTC0(psxRegisters *regs_, int reg, u32 val); void gteNULL(struct psxCP2Regs *regs); diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 5374f8661..212735842 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -59,10 +59,10 @@ void psxReset() { psxRegs.pc = 0xbfc00000; // Start in bootstrap - psxRegs.CP0.r[12] = 0x10600000; // COP0 enabled | BEV = 1 | TS = 1 - psxRegs.CP0.r[15] = 0x00000002; // PRevID = Revision ID, same as R3000A + psxRegs.CP0.n.SR = 0x10600000; // COP0 enabled | BEV = 1 | TS = 1 + psxRegs.CP0.n.PRid = 0x00000002; // PRevID = Revision ID, same as R3000A if (Config.HLE) - psxRegs.CP0.n.Status |= 1u << 30; // COP2 enabled + psxRegs.CP0.n.SR |= 1u << 30; // COP2 enabled psxCpu->ApplyConfig(); psxCpu->Reset(); @@ -93,7 +93,7 @@ void psxShutdown() { } // cp0 is passed separately for lightrec to be less messy -void psxException(u32 cause, u32 bd, psxCP0Regs *cp0) { +void psxException(u32 cause, enum R3000Abdt bdt, psxCP0Regs *cp0) { u32 opcode = intFakeFetch(psxRegs.pc); if (unlikely(!Config.HLE && ((((opcode) >> 24) & 0xfe) == 0x4a))) { @@ -101,31 +101,24 @@ void psxException(u32 cause, u32 bd, psxCP0Regs *cp0) { // BIOS does not allow to return to GTE instructions // (just skips it, supposedly because it's scheduled already) // so we execute it here - psxCP2Regs *cp2 = (void *)(cp0 + 1); + psxCP2Regs *cp2 = (psxCP2Regs *)(cp0 + 1); psxRegs.code = opcode; psxCP2[opcode & 0x3f](cp2); } // Set the Cause - cp0->n.Cause = (cp0->n.Cause & 0x300) | cause; + cp0->n.Cause = (bdt << 30) | (cp0->n.Cause & 0x300) | cause; // Set the EPC & PC - if (bd) { -#ifdef PSXCPU_LOG - PSXCPU_LOG("bd set!!!\n"); -#endif - cp0->n.Cause |= 0x80000000; - cp0->n.EPC = (psxRegs.pc - 4); - } else - cp0->n.EPC = (psxRegs.pc); + cp0->n.EPC = bdt ? psxRegs.pc - 4 : psxRegs.pc; - if (cp0->n.Status & 0x400000) + if (cp0->n.SR & 0x400000) psxRegs.pc = 0xbfc00180; else psxRegs.pc = 0x80000080; - // Set the Status - cp0->n.Status = (cp0->n.Status & ~0x3f) | ((cp0->n.Status & 0x0f) << 2); + // Set the SR + cp0->n.SR = (cp0->n.SR & ~0x3f) | ((cp0->n.SR & 0x0f) << 2); if (Config.HLE) psxBiosException(); } @@ -204,7 +197,7 @@ void psxBranchTest() { } if (psxHu32(0x1070) & psxHu32(0x1074)) { - if ((psxRegs.CP0.n.Status & 0x401) == 0x401) { + if ((psxRegs.CP0.n.SR & 0x401) == 0x401) { #ifdef PSXCPU_LOG PSXCPU_LOG("Interrupt: %x %x\n", psxHu32(0x1070), psxHu32(0x1074)); #endif diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 778bd8d9f..668231477 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -34,7 +34,7 @@ enum R3000Aexception { R3000E_AdEL = 4, // Address error (on load/I-fetch) R3000E_AdES = 5, // Address error (on store) R3000E_IBE = 6, // Bus error (instruction fetch) - R3000E_DBE = 7, // Bus error (data load) + R3000E_DBE = 7, // Bus error (data load/store) R3000E_Syscall = 8, // syscall instruction R3000E_Bp = 9, // Breakpoint - a break instruction R3000E_RI = 10, // reserved instruction @@ -96,14 +96,11 @@ typedef union { typedef union psxCP0Regs_ { struct { - u32 Index, Random, EntryLo0, EntryLo1, - Context, PageMask, Wired, Reserved0, - BadVAddr, Count, EntryHi, Compare, - Status, Cause, EPC, PRid, - Config, LLAddr, WatchLO, WatchHI, - XContext, Reserved1, Reserved2, Reserved3, - Reserved4, Reserved5, ECC, CacheErr, - TagLo, TagHi, ErrorEPC, Reserved6; + u32 Reserved0, Reserved1, Reserved2, BPC, + Reserved4, BDA, Target, DCIC, + BadVAddr, BDAM, Reserved10, BPCM, + SR, Cause, EPC, PRid, + Reserved16[16]; } n; u32 r[32]; PAIR p[32]; @@ -188,6 +185,14 @@ enum { PSXINT_COUNT }; +enum R3000Abdt { + // corresponds to bits 31,30 of Cause reg + R3000A_BRANCH_TAKEN = 3, + R3000A_BRANCH_NOT_TAKEN = 2, + // none or tells that there was an exception in DS back to doBranch + R3000A_BRANCH_NONE_OR_EXCEPTION = 0, +}; + typedef struct psxCP2Regs { psxCP2Data CP2D; /* Cop2 data registers */ psxCP2Ctrl CP2C; /* Cop2 control registers */ @@ -212,11 +217,11 @@ typedef struct { struct { u32 sCycle, cycle; } intCycle[32]; u32 gteBusyCycle; u32 muldivBusyCycle; - u32 subCycle; /* interpreter cycle counting */ + u32 subCycle; /* interpreter cycle counting */ u32 subCycleStep; u32 biuReg; - u8 reserved; - u8 dloadSel; + u8 branching; /* interp. R3000A_BRANCH_TAKEN / not, 0 if not branch */ + u8 dloadSel; /* interp. delay load state */ u8 dloadReg[2]; u32 dloadVal[2]; // warning: changing anything in psxRegisters requires update of all @@ -247,7 +252,7 @@ void new_dyna_freeze(void *f, int mode); int psxInit(); void psxReset(); void psxShutdown(); -void psxException(u32 code, u32 bd, psxCP0Regs *cp0); +void psxException(u32 code, enum R3000Abdt bdt, psxCP0Regs *cp0); void psxBranchTest(); void psxExecuteBios(); void psxJumpTest(); From 3d1c03e76934037a5abab13f250bf5f27629d356 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 21 Jul 2023 00:04:03 +0300 Subject: [PATCH 281/597] psxinterpreter: adjust COP decoding --- libpcsxcore/psxinterpreter.c | 78 +++++++++++++++++------------------- libpcsxcore/r3000a.c | 2 +- 2 files changed, 37 insertions(+), 43 deletions(-) diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index f3bf7b6ca..be15f782f 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -939,8 +939,6 @@ OP(psxMFC0) { static void setupCop(u32 sr); -OP(psxCFC0) { doLoad(regs_, _Rt_, regs_->CP0.r[_Rd_]); } - void MTC0(psxRegisters *regs_, int reg, u32 val) { // SysPrintf("MTC0 %d: %x\n", reg, val); switch (reg) { @@ -970,7 +968,6 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) { } OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); } -OP(psxCTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); } // no exception static inline void psxNULLne(psxRegisters *regs) { @@ -999,12 +996,26 @@ OP(psxSPECIAL) { } OP(psxCOP0) { - switch (_Rs_) { + u32 rs = _Rs_; + if (rs & 0x10) { + u32 op2 = code & 0x1f; + switch (op2) { + case 0x01: + case 0x02: + case 0x06: + case 0x08: psxNULL(regs_, code); break; + case 0x10: psxRFE(regs_, code); break; + default: psxNULLne(regs_); break; + } + return; + } + switch (rs) { case 0x00: psxMFC0(regs_, code); break; - case 0x02: psxCFC0(regs_, code); break; case 0x04: psxMTC0(regs_, code); break; - case 0x06: psxCTC0(regs_, code); break; - case 0x10: psxRFE(regs_, code); break; + case 0x02: // CFC + case 0x06: psxNULL(regs_, code); break; // CTC -> exception + case 0x08: + case 0x0c: log_unhandled("BC0 %08x @%08x\n", code, regs_->pc - 4); default: psxNULLne(regs_); break; } } @@ -1014,31 +1025,27 @@ OP(psxCOP1) { log_unhandled("COP1 %08x @%08x\n", code, regs_->pc - 4); } -// TODO: wrong COP2 decoding OP(psxCOP2) { - psxCP2[_Funct_](®s_->CP2); + u32 rt = _Rt_, rd = _Rd_, rs = _Rs_; + if (rs & 0x10) { + psxCP2[_Funct_](®s_->CP2); + return; + } + switch (rs) { + case 0x00: doLoad(regs_, rt, MFC2(®s_->CP2, rd)); break; // MFC2 + case 0x02: doLoad(regs_, rt, regs_->CP2C.r[rd]); break; // CFC2 + case 0x04: MTC2(®s_->CP2, regs_->GPR.r[rt], rd); break; // MTC2 + case 0x06: CTC2(®s_->CP2, regs_->GPR.r[rt], rd); break; // CTC2 + case 0x08: + case 0x0c: log_unhandled("BC2 %08x @%08x\n", code, regs_->pc - 4); + default: psxNULLne(regs_); break; + } } OP(psxCOP2_stall) { u32 f = _Funct_; gteCheckStall(f); - psxCP2[f](®s_->CP2); -} - -OP(gteMFC2) { - doLoad(regs_, _Rt_, MFC2(®s_->CP2, _Rd_)); -} - -OP(gteCFC2) { - doLoad(regs_, _Rt_, regs_->CP2C.r[_Rd_]); -} - -OP(gteMTC2) { - MTC2(®s_->CP2, regs_->GPR.r[_Rt_], _Rd_); -} - -OP(gteCTC2) { - CTC2(®s_->CP2, regs_->GPR.r[_Rt_], _Rd_); + psxCOP2(regs_, code); } OP(gteLWC2) { @@ -1094,21 +1101,6 @@ OP(psxSWCx) { checkST(regs_, _oB_, 3); } -static void psxBASIC(struct psxCP2Regs *cp2regs) { - psxRegisters *regs = (void *)((u8 *)cp2regs - offsetof(psxRegisters, CP2)); - u32 code = regs->code; - assert(regs == &psxRegs); - switch (_Rs_) { - case 0x00: gteMFC2(regs, code); break; - case 0x02: gteCFC2(regs, code); break; - case 0x04: gteMTC2(regs, code); break; - case 0x06: gteCTC2(regs, code); break; - case 0x08: - case 0x0c: log_unhandled("BC2 %08x @%08x\n", code, regs->pc - 4); - default: psxNULLne(regs); break; - } -} - OP(psxREGIMM) { u32 rt = _Rt_; switch (rt) { @@ -1159,7 +1151,7 @@ static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code) = { }; void (*psxCP2[64])(struct psxCP2Regs *regs) = { - psxBASIC, gteRTPS , gteNULL , gteNULL, gteNULL, gteNULL , gteNCLIP, gteNULL, // 00 + gteNULL , gteRTPS , gteNULL , gteNULL, gteNULL, gteNULL , gteNCLIP, gteNULL, // 00 gteNULL , gteNULL , gteNULL , gteNULL, gteOP , gteNULL , gteNULL , gteNULL, // 08 gteDPCS , gteINTPL, gteMVMVA, gteNCDS, gteCDP , gteNULL , gteNCDT , gteNULL, // 10 gteNULL , gteNULL , gteNULL , gteNCCS, gteCC , gteNULL , gteNCS , gteNULL, // 18 @@ -1177,6 +1169,7 @@ static int intInit() { static void intReset() { dloadClear(&psxRegs); + psxRegs.subCycle = 0; } static inline void execI_(u8 **memRLUT, psxRegisters *regs) { @@ -1241,6 +1234,7 @@ static void intNotify(enum R3000Anote note, void *data) { break; case R3000ACPU_NOTIFY_AFTER_LOAD: dloadClear(&psxRegs); + psxRegs.subCycle = 0; setupCop(psxRegs.CP0.n.SR); // fallthrough case R3000ACPU_NOTIFY_CACHE_ISOLATED: // Armored Core? diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 212735842..dffbf6e74 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -96,7 +96,7 @@ void psxShutdown() { void psxException(u32 cause, enum R3000Abdt bdt, psxCP0Regs *cp0) { u32 opcode = intFakeFetch(psxRegs.pc); - if (unlikely(!Config.HLE && ((((opcode) >> 24) & 0xfe) == 0x4a))) { + if (unlikely(!Config.HLE && (opcode >> 25) == 0x25)) { // "hokuto no ken" / "Crash Bandicot 2" ... // BIOS does not allow to return to GTE instructions // (just skips it, supposedly because it's scheduled already) From 5e282df80d579f9a19c77a655c2c0dda6dc2c7b4 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sat, 11 Feb 2023 14:17:13 +0000 Subject: [PATCH 282/597] Support compiling without mmap functions Add NO_MMAP option in Makefile.libretro that can be turned ON on platforms that don't support mmap(), and for which memory mapping hooks must be provided. Signed-off-by: Paul Cercueil --- Makefile | 1 + libpcsxcore/memmap.h | 5 +++++ libpcsxcore/psxmem.c | 11 +++++++++++ 3 files changed, 17 insertions(+) diff --git a/Makefile b/Makefile index ded26893d..7b7ce5cea 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,7 @@ CFLAGS += -Wall -ggdb -Iinclude -ffast-math ifndef DEBUG CFLAGS += -O2 -DNDEBUG endif +CFLAGS += -DHAVE_MMAP=$(if $(NO_MMAP),0,1) CXXFLAGS += $(CFLAGS) #DRC_DBG = 1 #PCNT = 1 diff --git a/libpcsxcore/memmap.h b/libpcsxcore/memmap.h index 262cd7c2b..da1d0e119 100644 --- a/libpcsxcore/memmap.h +++ b/libpcsxcore/memmap.h @@ -34,6 +34,9 @@ #include <_mingw.h> #endif +#endif //_WIN32 + +#if defined(_WIN32) || !HAVE_MMAP #include #ifdef __cplusplus @@ -60,12 +63,14 @@ extern "C" { #define MS_SYNC 2 #define MS_INVALIDATE 4 +#ifdef _WIN32 void* mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off); int munmap(void *addr, size_t len); int mprotect(void *addr, size_t len, int prot); int msync(void *addr, size_t len, int flags); int mlock(const void *addr, size_t len); int munlock(const void *addr, size_t len); +#endif #ifdef __cplusplus }; diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 54219ae05..14e7a9e94 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -44,15 +44,26 @@ static void * psxMapDefault(unsigned long addr, size_t size, int is_fixed, enum psxMapTag tag) { +#if !HAVE_MMAP + void *ptr; + + ptr = malloc(size); + return ptr ? ptr : MAP_FAILED; +#else int flags = MAP_PRIVATE | MAP_ANONYMOUS; return mmap((void *)(uintptr_t)addr, size, PROT_READ | PROT_WRITE, flags, -1, 0); +#endif } static void psxUnmapDefault(void *ptr, size_t size, enum psxMapTag tag) { +#if !HAVE_MMAP + free(ptr); +#else munmap(ptr, size); +#endif } void *(*psxMapHook)(unsigned long addr, size_t size, int is_fixed, From f28d12a72998b0a6018b41af8f86a24b10ab506f Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sat, 11 Feb 2023 14:22:21 +0000 Subject: [PATCH 283/597] Support compiling without pthreads Add NO_PTHREAD option in the Makefile, which can be used on platforms that don't support pthreads. Since dfsound/spu.c disables threading support on Win32 and platform defining NO_OS (Switch, 3DS, PSP Vita), mark them as not having pthreads. Signed-off-by: Paul Cercueil --- Makefile | 3 ++- frontend/libretro.c | 42 ++++++++++++++++---------------- frontend/libretro_core_options.h | 4 +-- libpcsxcore/cdriso.c | 3 +-- plugins/dfsound/spu.c | 11 +++------ 5 files changed, 30 insertions(+), 33 deletions(-) diff --git a/Makefile b/Makefile index 7b7ce5cea..6e9d10cab 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,8 @@ CFLAGS += -Wall -ggdb -Iinclude -ffast-math ifndef DEBUG CFLAGS += -O2 -DNDEBUG endif -CFLAGS += -DHAVE_MMAP=$(if $(NO_MMAP),0,1) +CFLAGS += -DHAVE_MMAP=$(if $(NO_MMAP),0,1) \ + -DHAVE_PTHREAD=$(if $(NO_PTHREAD),0,1) CXXFLAGS += $(CFLAGS) #DRC_DBG = 1 #PCNT = 1 diff --git a/frontend/libretro.c b/frontend/libretro.c index 32d0bec36..e5b21d529 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2098,28 +2098,28 @@ static void update_variables(bool in_flight) spu_config.iUseThread = 0; } -#ifndef _WIN32 - var.value = NULL; - var.key = "pcsx_rearmed_async_cd"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) - { - if (strcmp(var.value, "async") == 0) - { - Config.AsyncCD = 1; - Config.CHD_Precache = 0; - } - else if (strcmp(var.value, "sync") == 0) - { - Config.AsyncCD = 0; - Config.CHD_Precache = 0; - } - else if (strcmp(var.value, "precache") == 0) - { - Config.AsyncCD = 0; - Config.CHD_Precache = 1; - } + if (HAVE_PTHREAD) { + var.value = NULL; + var.key = "pcsx_rearmed_async_cd"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "async") == 0) + { + Config.AsyncCD = 1; + Config.CHD_Precache = 0; + } + else if (strcmp(var.value, "sync") == 0) + { + Config.AsyncCD = 0; + Config.CHD_Precache = 0; + } + else if (strcmp(var.value, "precache") == 0) + { + Config.AsyncCD = 0; + Config.CHD_Precache = 1; + } + } } -#endif var.value = NULL; var.key = "pcsx_rearmed_noxadecoding"; diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 781c514b3..b9eaf77b6 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -750,7 +750,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "enabled", }, -#if !defined(THREAD_ENABLED) && !defined(_WIN32) && !defined(NO_OS) +#if HAVE_PTHREAD { "pcsx_rearmed_spu_thread", "Threaded SPU", @@ -765,7 +765,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled", }, -#endif // THREAD_ENABLED +#endif // HAVE_PTHREAD { "pcsx_rearmed_show_input_settings", "Show Input Settings", diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index f47fcfddc..d0440e20a 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -30,8 +30,7 @@ #include #include #define strcasecmp _stricmp -#define usleep(x) Sleep((x) / 1000) -#else +#elif HAVE_PTHREAD #include #include #include diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index ebebd2a2c..f5e8de503 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -18,9 +18,6 @@ * * ***************************************************************************/ -#if !defined(THREAD_ENABLED) && !defined(_WIN32) && !defined(NO_OS) -#define THREAD_ENABLED 1 -#endif #include "stdafx.h" #define _IN_SPU @@ -832,7 +829,7 @@ static void do_samples_finish(int *SSumLR, int ns_to, // optional worker thread handling -#if defined(THREAD_ENABLED) || defined(WANT_THREAD_CODE) +#if HAVE_PTHREAD || defined(WANT_THREAD_CODE) // worker thread state static struct spu_worker { @@ -1087,7 +1084,7 @@ static void sync_worker_thread(int force) {} static const void * const worker = NULL; -#endif // THREAD_ENABLED +#endif // HAVE_PTHREAD || defined(WANT_THREAD_CODE) //////////////////////////////////////////////////////////////////////// // MAIN SPU FUNCTION @@ -1368,7 +1365,7 @@ static void RemoveStreams(void) /* special code for TI C64x DSP */ #include "spu_c64x.c" -#elif defined(THREAD_ENABLED) +#elif HAVE_PTHREAD #include #include @@ -1467,7 +1464,7 @@ static void exit_spu_thread(void) worker = NULL; } -#else // if !THREAD_ENABLED +#else // if !HAVE_PTHREAD static void init_spu_thread(void) { From cddc7ca96ddefe7acabac845b4ec99cf72b3e6fc Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sat, 11 Feb 2023 14:47:30 +0000 Subject: [PATCH 284/597] Support compiling without posix_memalign() function Add NO_POSIX_MEMALIGN option in Makefile.libretro to support platform that don't provide the posix_memalign() function. Signed-off-by: Paul Cercueil --- Makefile | 3 ++- frontend/libretro.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 6e9d10cab..3d33b8c89 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,8 @@ ifndef DEBUG CFLAGS += -O2 -DNDEBUG endif CFLAGS += -DHAVE_MMAP=$(if $(NO_MMAP),0,1) \ - -DHAVE_PTHREAD=$(if $(NO_PTHREAD),0,1) + -DHAVE_PTHREAD=$(if $(NO_PTHREAD),0,1) \ + -DHAVE_POSIX_MEMALIGN=$(if $(NO_POSIX_MEMALIGN),0,1) CXXFLAGS += $(CFLAGS) #DRC_DBG = 1 #PCNT = 1 diff --git a/frontend/libretro.c b/frontend/libretro.c index e5b21d529..64df70344 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -3037,7 +3037,7 @@ void retro_init(void) #ifdef _3DS vout_buf = linearMemAlign(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2, 0x80); -#elif defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) && !defined(VITA) && !defined(__SWITCH__) +#elif defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) && HAVE_POSIX_MEMALIGN if (posix_memalign(&vout_buf, 16, VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2) != 0) vout_buf = (void *) 0; #else From 9165d434d935746da54484381ebbee754e899680 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 24 Jul 2023 23:20:12 +0300 Subject: [PATCH 285/597] try to fix win32 build HAVE_MMAP etc clashing with libretro-common --- Makefile | 6 +- frontend/libretro.c | 4 +- frontend/libretro_core_options.h | 4 +- jni/Android.mk | 225 +------------------------------ libpcsxcore/cdriso.c | 2 +- libpcsxcore/memmap.h | 2 +- libpcsxcore/psxmem.c | 4 +- plugins/dfsound/spu.c | 8 +- 8 files changed, 16 insertions(+), 239 deletions(-) diff --git a/Makefile b/Makefile index 3d33b8c89..8e5cb3e33 100644 --- a/Makefile +++ b/Makefile @@ -6,9 +6,9 @@ CFLAGS += -Wall -ggdb -Iinclude -ffast-math ifndef DEBUG CFLAGS += -O2 -DNDEBUG endif -CFLAGS += -DHAVE_MMAP=$(if $(NO_MMAP),0,1) \ - -DHAVE_PTHREAD=$(if $(NO_PTHREAD),0,1) \ - -DHAVE_POSIX_MEMALIGN=$(if $(NO_POSIX_MEMALIGN),0,1) +CFLAGS += -DP_HAVE_MMAP=$(if $(NO_MMAP),0,1) \ + -DP_HAVE_PTHREAD=$(if $(NO_PTHREAD),0,1) \ + -DP_HAVE_POSIX_MEMALIGN=$(if $(NO_POSIX_MEMALIGN),0,1) CXXFLAGS += $(CFLAGS) #DRC_DBG = 1 #PCNT = 1 diff --git a/frontend/libretro.c b/frontend/libretro.c index 64df70344..2f758a6c8 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2098,7 +2098,7 @@ static void update_variables(bool in_flight) spu_config.iUseThread = 0; } - if (HAVE_PTHREAD) { + if (P_HAVE_PTHREAD) { var.value = NULL; var.key = "pcsx_rearmed_async_cd"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) @@ -3037,7 +3037,7 @@ void retro_init(void) #ifdef _3DS vout_buf = linearMemAlign(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2, 0x80); -#elif defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) && HAVE_POSIX_MEMALIGN +#elif defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) && P_HAVE_POSIX_MEMALIGN if (posix_memalign(&vout_buf, 16, VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2) != 0) vout_buf = (void *) 0; #else diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index b9eaf77b6..e20503ec0 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -750,7 +750,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "enabled", }, -#if HAVE_PTHREAD +#if P_HAVE_PTHREAD { "pcsx_rearmed_spu_thread", "Threaded SPU", @@ -765,7 +765,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled", }, -#endif // HAVE_PTHREAD +#endif // P_HAVE_PTHREAD { "pcsx_rearmed_show_input_settings", "Show Input Settings", diff --git a/jni/Android.mk b/jni/Android.mk index 501b0671a..03ccff7ea 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -1,224 +1 @@ -LOCAL_PATH := $(call my-dir) - -$(shell cd "$(LOCAL_PATH)" && ((git describe --always || echo) | sed -e 's/.*/#define REV "\0"/' > ../frontend/revision.h_)) -$(shell cd "$(LOCAL_PATH)" && (diff -q ../frontend/revision.h_ ../frontend/revision.h > /dev/null 2>&1 || cp ../frontend/revision.h_ ../frontend/revision.h)) -$(shell cd "$(LOCAL_PATH)" && (rm ../frontend/revision.h_)) - -HAVE_CHD ?= 1 -USE_LIBRETRO_VFS ?= 0 - -ROOT_DIR := $(LOCAL_PATH)/.. -CORE_DIR := $(ROOT_DIR)/libpcsxcore -SPU_DIR := $(ROOT_DIR)/plugins/dfsound -GPU_DIR := $(ROOT_DIR)/plugins/gpulib -CDR_DIR := $(ROOT_DIR)/plugins/cdrcimg -INPUT_DIR := $(ROOT_DIR)/plugins/dfinput -FRONTEND_DIR := $(ROOT_DIR)/frontend -NEON_DIR := $(ROOT_DIR)/plugins/gpu_neon -UNAI_DIR := $(ROOT_DIR)/plugins/gpu_unai -PEOPS_DIR := $(ROOT_DIR)/plugins/dfxvideo -DYNAREC_DIR := $(ROOT_DIR)/libpcsxcore/new_dynarec -DEPS_DIR := $(ROOT_DIR)/deps -LIBRETRO_COMMON := $(ROOT_DIR)/libretro-common -EXTRA_INCLUDES := - -# core -SOURCES_C := $(CORE_DIR)/cdriso.c \ - $(CORE_DIR)/cdrom.c \ - $(CORE_DIR)/cheat.c \ - $(CORE_DIR)/database.c \ - $(CORE_DIR)/decode_xa.c \ - $(CORE_DIR)/mdec.c \ - $(CORE_DIR)/misc.c \ - $(CORE_DIR)/plugins.c \ - $(CORE_DIR)/ppf.c \ - $(CORE_DIR)/psxbios.c \ - $(CORE_DIR)/psxcommon.c \ - $(CORE_DIR)/psxcounters.c \ - $(CORE_DIR)/psxdma.c \ - $(CORE_DIR)/psxhle.c \ - $(CORE_DIR)/psxhw.c \ - $(CORE_DIR)/psxinterpreter.c \ - $(CORE_DIR)/psxmem.c \ - $(CORE_DIR)/r3000a.c \ - $(CORE_DIR)/sio.c \ - $(CORE_DIR)/spu.c \ - $(CORE_DIR)/gte.c \ - $(CORE_DIR)/gte_nf.c \ - $(CORE_DIR)/gte_divider.c - -# spu -SOURCES_C += $(SPU_DIR)/dma.c \ - $(SPU_DIR)/freeze.c \ - $(SPU_DIR)/registers.c \ - $(SPU_DIR)/spu.c \ - $(SPU_DIR)/out.c \ - $(SPU_DIR)/nullsnd.c - -# gpu -SOURCES_C += $(GPU_DIR)/gpu.c \ - $(GPU_DIR)/vout_pl.c - -# cdrcimg -SOURCES_C += $(CDR_DIR)/cdrcimg.c - -# dfinput -SOURCES_C += $(INPUT_DIR)/main.c \ - $(INPUT_DIR)/pad.c \ - $(INPUT_DIR)/guncon.c - -# frontend -SOURCES_C += $(FRONTEND_DIR)/main.c \ - $(FRONTEND_DIR)/plugin.c \ - $(FRONTEND_DIR)/cspace.c \ - $(FRONTEND_DIR)/libretro.c - -# libchdr -SOURCES_C += \ - $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/Alloc.c \ - $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/Bra86.c \ - $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/BraIA64.c \ - $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/CpuArch.c \ - $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/Delta.c \ - $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/LzFind.c \ - $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/Lzma86Dec.c \ - $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/LzmaDec.c \ - $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/LzmaEnc.c \ - $(DEPS_DIR)/libchdr/deps/lzma-19.00/src/Sort.c \ - $(DEPS_DIR)/libchdr/src/libchdr_bitstream.c \ - $(DEPS_DIR)/libchdr/src/libchdr_cdrom.c \ - $(DEPS_DIR)/libchdr/src/libchdr_chd.c \ - $(DEPS_DIR)/libchdr/src/libchdr_flac.c \ - $(DEPS_DIR)/libchdr/src/libchdr_huffman.c -SOURCES_ASM := - -COREFLAGS := -ffast-math -funroll-loops -DHAVE_LIBRETRO -DNO_FRONTEND -DFRONTEND_SUPPORTS_RGB565 -DANDROID -DREARMED -COREFLAGS += -DHAVE_CHD -D_7ZIP_ST - -ifeq ($(USE_LIBRETRO_VFS),1) -SOURCES_C += \ - $(LIBRETRO_COMMON)/compat/compat_posix_string.c \ - $(LIBRETRO_COMMON)/compat/fopen_utf8.c \ - $(LIBRETRO_COMMON)/encodings/compat_strl.c \ - $(LIBRETRO_COMMON)/encodings/encoding_utf.c \ - $(LIBRETRO_COMMON)/file/file_path.c \ - $(LIBRETRO_COMMON)/streams/file_stream.c \ - $(LIBRETRO_COMMON)/streams/file_stream_transforms.c \ - $(LIBRETRO_COMMON)/string/stdstring.c \ - $(LIBRETRO_COMMON)/time/rtime.c \ - $(LIBRETRO_COMMON)/vfs/vfs_implementation.c -COREFLAGS += -DUSE_LIBRETRO_VFS -endif - -HAVE_ARI64=0 -HAVE_LIGHTREC=0 -LIGHTREC_CUSTOM_MAP=0 -HAVE_GPU_NEON=0 -ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) - HAVE_ARI64=1 - HAVE_GPU_NEON=1 -else ifeq ($(TARGET_ARCH_ABI),armeabi) - HAVE_ARI64=1 -else ifeq ($(TARGET_ARCH_ABI),arm64-v8a) - HAVE_ARI64=1 - HAVE_GPU_NEON=1 -else ifeq ($(TARGET_ARCH_ABI),x86_64) - HAVE_LIGHTREC=1 - HAVE_GPU_NEON=1 -else ifeq ($(TARGET_ARCH_ABI),x86) - HAVE_LIGHTREC=1 - HAVE_GPU_NEON=1 -else - COREFLAGS += -DDRC_DISABLE -endif - COREFLAGS += -DLIGHTREC_CUSTOM_MAP=$(LIGHTREC_CUSTOM_MAP) - -ifeq ($(HAVE_ARI64),1) - SOURCES_C += $(DYNAREC_DIR)/new_dynarec.c \ - $(DYNAREC_DIR)/pcsxmem.c - ifeq ($(TARGET_ARCH_ABI),arm64-v8a) - SOURCES_ASM += $(DYNAREC_DIR)/linkage_arm64.S - else - SOURCES_ASM += $(CORE_DIR)/gte_arm.S \ - $(SPU_DIR)/arm_utils.S \ - $(DYNAREC_DIR)/linkage_arm.S - endif -endif - SOURCES_C += $(DYNAREC_DIR)/emu_if.c - -ifeq ($(HAVE_LIGHTREC),1) - COREFLAGS += -DLIGHTREC -DLIGHTREC_STATIC - EXTRA_INCLUDES += $(DEPS_DIR)/lightning/include \ - $(DEPS_DIR)/lightrec \ - $(DEPS_DIR)/lightrec/tlsf \ - $(ROOT_DIR)/include/lightning \ - $(ROOT_DIR)/include/lightrec - SOURCES_C += $(DEPS_DIR)/lightrec/blockcache.c \ - $(DEPS_DIR)/lightrec/disassembler.c \ - $(DEPS_DIR)/lightrec/emitter.c \ - $(DEPS_DIR)/lightrec/interpreter.c \ - $(DEPS_DIR)/lightrec/lightrec.c \ - $(DEPS_DIR)/lightrec/memmanager.c \ - $(DEPS_DIR)/lightrec/optimizer.c \ - $(DEPS_DIR)/lightrec/regcache.c \ - $(DEPS_DIR)/lightrec/recompiler.c \ - $(DEPS_DIR)/lightrec/reaper.c - SOURCES_C += $(DEPS_DIR)/lightning/lib/jit_disasm.c \ - $(DEPS_DIR)/lightning/lib/jit_memory.c \ - $(DEPS_DIR)/lightning/lib/jit_names.c \ - $(DEPS_DIR)/lightning/lib/jit_note.c \ - $(DEPS_DIR)/lightning/lib/jit_print.c \ - $(DEPS_DIR)/lightning/lib/jit_size.c \ - $(DEPS_DIR)/lightning/lib/lightning.c - SOURCES_C += $(CORE_DIR)/lightrec/plugin.c $(DEPS_DIR)/lightrec/tlsf/tlsf.c -ifeq ($(LIGHTREC_CUSTOM_MAP),1) - SOURCES_C += $(CORE_DIR)/lightrec/mem.c -endif -endif - - -ifeq ($(HAVE_GPU_NEON),1) - COREFLAGS += -DNEON_BUILD -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP -DGPU_NEON - ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) - COREFLAGS += -DHAVE_bgr555_to_rgb565 -DHAVE_bgr888_to_x - SOURCES_ASM += $(CORE_DIR)/gte_neon.S \ - $(NEON_DIR)/psx_gpu/psx_gpu_arm_neon.S \ - $(FRONTEND_DIR)/cspace_neon.S - else - COREFLAGS += -DSIMD_BUILD - SOURCES_C += $(NEON_DIR)/psx_gpu/psx_gpu_simd.c - endif - SOURCES_C += $(NEON_DIR)/psx_gpu_if.c -else ifeq ($(TARGET_ARCH_ABI),armeabi) - COREFLAGS += -DUSE_GPULIB=1 -DGPU_UNAI - COREFLAGS += -DHAVE_bgr555_to_rgb565 - SOURCES_ASM += $(UNAI_DIR)/gpu_arm.S \ - $(FRONTEND_DIR)/cspace_arm.S - SOURCES_C += $(UNAI_DIR)/gpulib_if.cpp -else - COREFLAGS += -fno-strict-aliasing -DGPU_PEOPS - SOURCES_C += $(PEOPS_DIR)/gpulib_if.c -endif - -GIT_VERSION := " $(shell git rev-parse --short HEAD || echo unknown)" -ifneq ($(GIT_VERSION)," unknown") - COREFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" -endif - -include $(CLEAR_VARS) -LOCAL_MODULE := retro -LOCAL_SRC_FILES := $(SOURCES_C) $(SOURCES_ASM) -LOCAL_CFLAGS := $(COREFLAGS) -LOCAL_C_INCLUDES := $(ROOT_DIR)/include -LOCAL_C_INCLUDES += $(DEPS_DIR)/crypto $(DEPS_DIR)/libchdr/deps/lzma-19.00/include $(DEPS_DIR)/libchdr/include $(DEPS_DIR)/libchdr/include/libchdr -LOCAL_C_INCLUDES += $(LIBRETRO_COMMON)/include -LOCAL_C_INCLUDES += $(EXTRA_INCLUDES) -LOCAL_LDFLAGS := -Wl,-version-script=$(FRONTEND_DIR)/link.T -LOCAL_LDLIBS := -lz -llog -LOCAL_ARM_MODE := arm - -ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) - LOCAL_ARM_NEON := true -endif - -include $(BUILD_SHARED_LIBRARY) +$(error This file is unmaintained. Please use the libretro fork: https://github.com/libretro/pcsx_rearmed) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index d0440e20a..eeb2c351a 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -30,7 +30,7 @@ #include #include #define strcasecmp _stricmp -#elif HAVE_PTHREAD +#elif P_HAVE_PTHREAD #include #include #include diff --git a/libpcsxcore/memmap.h b/libpcsxcore/memmap.h index da1d0e119..d16dea0b9 100644 --- a/libpcsxcore/memmap.h +++ b/libpcsxcore/memmap.h @@ -36,7 +36,7 @@ #endif //_WIN32 -#if defined(_WIN32) || !HAVE_MMAP +#if defined(_WIN32) || !P_HAVE_MMAP #include #ifdef __cplusplus diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 14e7a9e94..42755e529 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -44,7 +44,7 @@ static void * psxMapDefault(unsigned long addr, size_t size, int is_fixed, enum psxMapTag tag) { -#if !HAVE_MMAP +#if !P_HAVE_MMAP void *ptr; ptr = malloc(size); @@ -59,7 +59,7 @@ static void * psxMapDefault(unsigned long addr, size_t size, static void psxUnmapDefault(void *ptr, size_t size, enum psxMapTag tag) { -#if !HAVE_MMAP +#if !P_HAVE_MMAP free(ptr); #else munmap(ptr, size); diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index f5e8de503..f6730d64a 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -829,7 +829,7 @@ static void do_samples_finish(int *SSumLR, int ns_to, // optional worker thread handling -#if HAVE_PTHREAD || defined(WANT_THREAD_CODE) +#if P_HAVE_PTHREAD || defined(WANT_THREAD_CODE) // worker thread state static struct spu_worker { @@ -1084,7 +1084,7 @@ static void sync_worker_thread(int force) {} static const void * const worker = NULL; -#endif // HAVE_PTHREAD || defined(WANT_THREAD_CODE) +#endif // P_HAVE_PTHREAD || defined(WANT_THREAD_CODE) //////////////////////////////////////////////////////////////////////// // MAIN SPU FUNCTION @@ -1365,7 +1365,7 @@ static void RemoveStreams(void) /* special code for TI C64x DSP */ #include "spu_c64x.c" -#elif HAVE_PTHREAD +#elif P_HAVE_PTHREAD #include #include @@ -1464,7 +1464,7 @@ static void exit_spu_thread(void) worker = NULL; } -#else // if !HAVE_PTHREAD +#else // if !P_HAVE_PTHREAD static void init_spu_thread(void) { From a5cd72d0e598f037fd9d9f23948af5b2fb06e2eb Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 21 Jul 2023 23:33:10 +0300 Subject: [PATCH 286/597] drc: update according to interpreter much larger commit than I'd like but it's too much work to split it --- include/compiler_features.h | 16 +- libpcsxcore/new_dynarec/assem_arm.c | 156 +++- libpcsxcore/new_dynarec/assem_arm64.c | 128 ++- libpcsxcore/new_dynarec/linkage_arm.S | 37 +- libpcsxcore/new_dynarec/linkage_arm64.S | 37 +- libpcsxcore/new_dynarec/new_dynarec.c | 870 ++++++++---------- libpcsxcore/new_dynarec/patches/trace_drc_chk | 67 +- libpcsxcore/new_dynarec/patches/trace_intr | 212 +++-- libpcsxcore/psxinterpreter.c | 16 +- 9 files changed, 793 insertions(+), 746 deletions(-) diff --git a/include/compiler_features.h b/include/compiler_features.h index 384186645..753706d7f 100644 --- a/include/compiler_features.h +++ b/include/compiler_features.h @@ -2,14 +2,28 @@ #ifdef __GNUC__ # define likely(x) __builtin_expect((x),1) # define unlikely(x) __builtin_expect((x),0) -# define noinline __attribute__((noinline)) +# ifdef __clang__ +# define noinline __attribute__((noinline)) +# else +# define noinline __attribute__((noinline,noclone)) +# endif +# define unused __attribute__((unused)) #else # define likely(x) (x) # define unlikely(x) (x) # define noinline +# define unused #endif #ifndef __has_builtin #define __has_builtin(x) 0 #endif +#if __has_builtin(__builtin_add_overflow) || (defined(__GNUC__) && __GNUC__ >= 5) +#define add_overflow(a, b, r) __builtin_add_overflow(a, b, &(r)) +#define sub_overflow(a, b, r) __builtin_sub_overflow(a, b, &(r)) +#else +#define add_overflow(a, b, r) ({r = (u32)a + (u32)b; (a ^ ~b) & (a ^ r) & (1u<<31);}) +#define sub_overflow(a, b, r) ({r = (u32)a - (u32)b; (a ^ b) & (a ^ r) & (1u<<31);}) +#endif + diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 2847e516d..88b2ff36c 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -351,12 +351,24 @@ static void emit_neg(int rs, int rt) output_w32(0xe2600000|rd_rn_rm(rt,rs,0)); } +static void emit_negs(int rs, int rt) +{ + assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]); + output_w32(0xe2700000|rd_rn_rm(rt,rs,0)); +} + static void emit_sub(int rs1,int rs2,int rt) { assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2)); } +static void emit_subs(int rs1,int rs2,int rt) +{ + assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2)); +} + static void emit_zeroreg(int rt) { assem_debug("mov %s,#0\n",regname[rt]); @@ -489,6 +501,12 @@ static void emit_not(int rs,int rt) output_w32(0xe1e00000|rd_rn_rm(rt,0,rs)); } +static void emit_mvneq(int rs,int rt) +{ + assem_debug("mvneq %s,%s\n",regname[rt],regname[rs]); + output_w32(0x01e00000|rd_rn_rm(rt,0,rs)); +} + static void emit_and(u_int rs1,u_int rs2,u_int rt) { assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); @@ -577,29 +595,37 @@ static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt) emit_addimm(rs, imm, rt); } -static void emit_addimm_and_set_flags(int imm,int rt) +static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt) { assert(imm>-65536&&imm<65536); u_int armval; - if(genimm(imm,&armval)) { - assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm); - output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval); - }else if(genimm(-imm,&armval)) { - assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm); - output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval); - }else if(imm<0) { - assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00); + if (genimm(imm, &armval)) { + assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2900000|rd_rn_rm(rt,rs,0)|armval); + } else if (genimm(-imm, &armval)) { + assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2500000|rd_rn_rm(rt,rs,0)|armval); + } else if (rs != rt) { + emit_movimm(imm, rt); + emit_adds(rs, rt, rt); + } else if (imm < 0) { + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00); assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF); - output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8)); + output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8)); output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0)); - }else{ - assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00); + } else { + assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00); assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF); - output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8)); + output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8)); output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); } } +static void emit_addimm_and_set_flags(int imm, u_int rt) +{ + emit_addimm_and_set_flags3(rt, imm, rt); +} + static void emit_addnop(u_int r) { assert(r<16); @@ -1012,6 +1038,14 @@ static void emit_jge(const void *a_) output_w32(0xaa000000|offset); } +static void emit_jo(const void *a_) +{ + int a = (int)a_; + assem_debug("bvs %x\n",a); + u_int offset=genjmp(a); + output_w32(0x6a000000|offset); +} + static void emit_jno(const void *a_) { int a = (int)a_; @@ -1218,7 +1252,7 @@ static void emit_readword(void *addr, int rt) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; assert(offset<4096); - assem_debug("ldr %s,fp+%d\n",regname[rt],offset); + assem_debug("ldr %s,fp+%#x%s\n", regname[rt], offset, fpofs_name(offset)); output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset); } #define emit_readptr emit_readword @@ -1278,7 +1312,7 @@ static void emit_writeword(int rt, void *addr) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; assert(offset<4096); - assem_debug("str %s,fp+%d\n",regname[rt],offset); + assem_debug("str %s,fp+%#x%s\n", regname[rt], offset, fpofs_name(offset)); output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset); } @@ -1631,7 +1665,7 @@ static void do_readstub(int n) u_int reglist=stubs[n].e; const signed char *i_regmap=i_regs->regmap; int rt; - if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) { + if(dops[i].itype==C2LS||dops[i].itype==LOADLR) { rt=get_reg(i_regmap,FTEMP); }else{ rt=get_reg(i_regmap,dops[i].rt1); @@ -1658,7 +1692,7 @@ static void do_readstub(int n) emit_shrimm(rs,12,temp2); emit_readword_dualindexedx4(temp,temp2,temp2); emit_lsls_imm(temp2,1,temp2); - if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { + if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { switch(type) { case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break; case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break; @@ -1691,7 +1725,7 @@ static void do_readstub(int n) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); emit_far_call(handler); - if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { + if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { mov_loadtype_adj(type,0,rt); } if(restore_jump) @@ -1787,7 +1821,7 @@ static void do_writestub(int n) u_int reglist=stubs[n].e; const signed char *i_regmap=i_regs->regmap; int rt,r; - if(dops[i].itype==C1LS||dops[i].itype==C2LS) { + if(dops[i].itype==C2LS) { rt=get_reg(i_regmap,r=FTEMP); }else{ rt=get_reg(i_regmap,r=dops[i].rs2); @@ -2095,15 +2129,11 @@ static void multdiv_assemble_arm(int i, const struct regstat *i_regs) // case 0x19: MULTU // case 0x1A: DIV // case 0x1B: DIVU - // case 0x1C: DMULT - // case 0x1D: DMULTU - // case 0x1E: DDIV - // case 0x1F: DDIVU if(dops[i].rs1&&dops[i].rs2) { - if((dops[i].opcode2&4)==0) // 32-bit + switch (dops[i].opcode2) { - if(dops[i].opcode2==0x18) // MULT + case 0x18: // MULT { signed char m1=get_reg(i_regs->regmap,dops[i].rs1); signed char m2=get_reg(i_regs->regmap,dops[i].rs2); @@ -2115,7 +2145,8 @@ static void multdiv_assemble_arm(int i, const struct regstat *i_regs) assert(lo>=0); emit_smull(m1,m2,hi,lo); } - if(dops[i].opcode2==0x19) // MULTU + break; + case 0x19: // MULTU { signed char m1=get_reg(i_regs->regmap,dops[i].rs1); signed char m2=get_reg(i_regs->regmap,dops[i].rs2); @@ -2127,14 +2158,16 @@ static void multdiv_assemble_arm(int i, const struct regstat *i_regs) assert(lo>=0); emit_umull(m1,m2,hi,lo); } - if(dops[i].opcode2==0x1A) // DIV + break; + case 0x1A: // DIV { signed char d1=get_reg(i_regs->regmap,dops[i].rs1); signed char d2=get_reg(i_regs->regmap,dops[i].rs2); - assert(d1>=0); - assert(d2>=0); signed char quotient=get_reg(i_regs->regmap,LOREG); signed char remainder=get_reg(i_regs->regmap,HIREG); + void *jaddr_div0; + assert(d1>=0); + assert(d2>=0); assert(quotient>=0); assert(remainder>=0); emit_movs(d1,remainder); @@ -2142,11 +2175,12 @@ static void multdiv_assemble_arm(int i, const struct regstat *i_regs) emit_negmi(quotient,quotient); // .. quotient and .. emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump) emit_movs(d2,HOST_TEMPREG); - emit_jeq(out+52); // Division by zero + jaddr_div0 = out; + emit_jeq(0); // Division by zero emit_negsmi(HOST_TEMPREG,HOST_TEMPREG); #ifdef HAVE_ARMV5 emit_clz(HOST_TEMPREG,quotient); - emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG); + emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG); // shifted divisor #else emit_movimm(0,quotient); emit_addpl_imm(quotient,1,quotient); @@ -2162,23 +2196,27 @@ static void multdiv_assemble_arm(int i, const struct regstat *i_regs) emit_jcc(out-16); // -4 emit_teq(d1,d2); emit_negmi(quotient,quotient); + set_jump_target(jaddr_div0, out); emit_test(d1,d1); emit_negmi(remainder,remainder); } - if(dops[i].opcode2==0x1B) // DIVU + break; + case 0x1B: // DIVU { signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor - assert(d1>=0); - assert(d2>=0); signed char quotient=get_reg(i_regs->regmap,LOREG); signed char remainder=get_reg(i_regs->regmap,HIREG); + void *jaddr_div0; + assert(d1>=0); + assert(d2>=0); assert(quotient>=0); assert(remainder>=0); emit_mov(d1,remainder); emit_movimm(0xffffffff,quotient); // div0 case emit_test(d2,d2); - emit_jeq(out+40); // Division by zero + jaddr_div0 = out; + emit_jeq(0); // Division by zero #ifdef HAVE_ARMV5 emit_clz(d2,HOST_TEMPREG); emit_movimm(1<<31,quotient); @@ -2196,20 +2234,54 @@ static void multdiv_assemble_arm(int i, const struct regstat *i_regs) emit_adcs(quotient,quotient,quotient); emit_shrcc_imm(d2,1,d2); emit_jcc(out-16); // -4 + set_jump_target(jaddr_div0, out); } + break; } - else // 64-bit - assert(0); } else { - // Multiply by zero is zero. - // MIPS does not have a divide by zero exception. - // The result is undefined, we return zero. signed char hr=get_reg(i_regs->regmap,HIREG); signed char lr=get_reg(i_regs->regmap,LOREG); - if(hr>=0) emit_zeroreg(hr); - if(lr>=0) emit_zeroreg(lr); + if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0 + { + if (dops[i].rs1) { + signed char numerator = get_reg(i_regs->regmap, dops[i].rs1); + assert(numerator >= 0); + if (hr < 0) + hr = HOST_TEMPREG; + emit_movs(numerator, hr); + if (lr >= 0) { + if (dops[i].opcode2 == 0x1A) { // DIV + emit_movimm(0xffffffff, lr); + emit_negmi(lr, lr); + } + else + emit_movimm(~0, lr); + } + } + else { + if (hr >= 0) emit_zeroreg(hr); + if (lr >= 0) emit_movimm(~0,lr); + } + } + else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0) + { + signed char denominator = get_reg(i_regs->regmap, dops[i].rs2); + assert(denominator >= 0); + if (hr >= 0) emit_zeroreg(hr); + if (lr >= 0) { + emit_zeroreg(lr); + emit_test(denominator, denominator); + emit_mvneq(lr, lr); + } + } + else + { + // Multiply by zero is zero. + if (hr >= 0) emit_zeroreg(hr); + if (lr >= 0) emit_zeroreg(lr); + } } } #define multdiv_assemble multdiv_assemble_arm diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 6f9c91d9c..d35ad451e 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -302,6 +302,12 @@ static void emit_add(u_int rs1, u_int rs2, u_int rt) output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt)); } +static void emit_adds(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("adds %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]); + output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt)); +} + static void emit_add64(u_int rs1, u_int rs2, u_int rt) { assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]); @@ -315,19 +321,37 @@ static void emit_adds64(u_int rs1, u_int rs2, u_int rt) } #define emit_adds_ptr emit_adds64 +static void emit_add_lsrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) +{ + assem_debug("add %s,%s,%s,lsr #%u\n",regname[rt],regname[rs1],regname[rs2],shift); + output_w32(0x0b400000 | rm_imm6_rn_rd(rs2, shift, rs1, rt)); +} + static void emit_neg(u_int rs, u_int rt) { assem_debug("neg %s,%s\n",regname[rt],regname[rs]); output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt)); } +static void emit_negs(u_int rs, u_int rt) +{ + assem_debug("negs %s,%s\n",regname[rt],regname[rs]); + output_w32(0x6b000000 | rm_rn_rd(rs, WZR, rt)); +} + static void emit_sub(u_int rs1, u_int rs2, u_int rt) { assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]); output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt)); } -static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) +static void emit_subs(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("subs %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]); + output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt)); +} + +static unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) { assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift); output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt)); @@ -422,7 +446,7 @@ static void emit_readword(void *addr, u_int rt) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; if (!(offset & 3) && offset <= 16380) { - assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset); + assem_debug("ldr %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset)); output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt)); } else @@ -433,7 +457,7 @@ static void emit_readdword(void *addr, u_int rt) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; if (!(offset & 7) && offset <= 32760) { - assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset); + assem_debug("ldr %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset)); output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt)); } else @@ -482,7 +506,7 @@ static void emit_writeword(u_int rt, void *addr) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; if (!(offset & 3) && offset <= 16380) { - assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset); + assem_debug("str %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset)); output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt)); } else @@ -493,7 +517,7 @@ static void emit_writedword(u_int rt, void *addr) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; if (!(offset & 7) && offset <= 32760) { - assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset); + assem_debug("str %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset)); output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt)); } else @@ -564,6 +588,12 @@ static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt)); } +static void emit_orn_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) +{ + assem_debug("orn %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift); + output_w32(0x2aa00000 | rm_imm6_rn_rd(rs2, shift, rs1, rt)); +} + static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt) { assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm); @@ -595,24 +625,35 @@ static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm); output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt)); } - else if (imm < 16777216) { - assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000); - output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt)); - if ((imm & 0xfff) || s) { - assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff); - output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt)); + else if (imm < 16777216 && (!(imm & 0xfff) || !s)) { + assem_debug("add%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], imm&0xfff000); + output_w32(0x11400000 | is64 | s | imm12_rn_rd(imm >> 12, rs, rt)); + if (imm & 0xfff) { + assem_debug("add %s,%s,#%#lx\n", regname[rt], regname[rt], imm&0xfff); + output_w32(0x11000000 | is64 | imm12_rn_rd(imm & 0xfff, rt, rt)); } } - else if (-imm < 16777216) { - assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000); - output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt)); - if ((imm & 0xfff) || s) { - assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff); - output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt)); + else if (-imm < 16777216 && (!(-imm & 0xfff) || !s)) { + assem_debug("sub%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], -imm&0xfff000); + output_w32(0x51400000 | is64 | s | imm12_rn_rd(-imm >> 12, rs, rt)); + if (-imm & 0xfff) { + assem_debug("sub %s,%s,#%#lx\n", regname[rt], regname[rt], -imm&0xfff); + output_w32(0x51000000 | is64 | imm12_rn_rd(-imm & 0xfff, rt, rt)); } } - else - abort(); + else { + u_int tmp = rt; + assert(!is64); + if (rs == rt) { + host_tempreg_acquire(); + tmp = HOST_TEMPREG; + } + emit_movimm(imm, tmp); + assem_debug("add%s %s,%s,%s\n", st, regname[rt], regname[rs], regname[tmp]); + output_w32(0x0b000000 | s | rm_rn_rd(rs, tmp, rt)); + if (tmp == HOST_TEMPREG) + host_tempreg_release(); + } } static void emit_addimm(u_int rs, uintptr_t imm, u_int rt) @@ -639,6 +680,11 @@ static void emit_addimm_and_set_flags(int imm, u_int rt) emit_addimm_s(1, 0, rt, imm, rt); } +static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt) +{ + emit_addimm_s(1, 0, rs, imm, rt); +} + static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt) { const char *names[] = { "and", "orr", "eor", "ands" }; @@ -844,6 +890,12 @@ static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt) output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt)); } +static void emit_csinvne_reg(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("csinv %s,%s,%s,ne\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x5a800000 | (COND_NE << 12) | rm_rn_rd(rs2, rs1, rt)); +} + static void emit_slti32(u_int rs,int imm,u_int rt) { if(rs!=rt) emit_zeroreg(rt); @@ -972,6 +1024,13 @@ static void emit_jge(const void *a) output_w32(0x54000000 | (offset << 5) | COND_GE); } +static void emit_jo(const void *a) +{ + assem_debug("bvs %p\n", a); + u_int offset = genjmpcc(a); + output_w32(0x54000000 | (offset << 5) | COND_VS); +} + static void emit_jno(const void *a) { assem_debug("bvc %p\n", a); @@ -1405,7 +1464,7 @@ static void do_readstub(int n) u_int reglist = stubs[n].e; const signed char *i_regmap = i_regs->regmap; int rt; - if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) { + if(dops[i].itype==C2LS||dops[i].itype==LOADLR) { rt=get_reg(i_regmap,FTEMP); }else{ rt=get_reg(i_regmap,dops[i].rt1); @@ -1435,7 +1494,7 @@ static void do_readstub(int n) emit_adds64(temp2,temp2,temp2); handler_jump=out; emit_jc(0); - if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { + if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { switch(type) { case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break; case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break; @@ -1470,7 +1529,7 @@ static void do_readstub(int n) emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); emit_far_call(handler); // (no cycle reload after read) - if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { + if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { loadstore_extend(type,0,rt); } if(restore_jump) @@ -1561,7 +1620,7 @@ static void do_writestub(int n) u_int reglist=stubs[n].e; signed char *i_regmap=i_regs->regmap; int rt,r; - if(dops[i].itype==C1LS||dops[i].itype==C2LS) { + if(dops[i].itype==C2LS) { rt=get_reg(i_regmap,r=FTEMP); }else{ rt=get_reg(i_regmap,r=dops[i].rs2); @@ -1827,8 +1886,10 @@ static void multdiv_assemble_arm64(int i, const struct regstat *i_regs) // div 0 quotient (remainder is already correct) host_tempreg_acquire(); - if (dops[i].opcode2 == 0x1A) // DIV - emit_sub_asrimm(0,numerator,31,HOST_TEMPREG); + if (dops[i].opcode2 == 0x1A) { // DIV + emit_add_lsrimm(WZR,numerator,31,HOST_TEMPREG); + emit_orn_asrimm(HOST_TEMPREG,numerator,31,HOST_TEMPREG); + } else emit_movimm(~0,HOST_TEMPREG); emit_test(denominator,denominator); @@ -1852,8 +1913,10 @@ static void multdiv_assemble_arm64(int i, const struct regstat *i_regs) if (hr >= 0) emit_mov(numerator,hr); if (lr >= 0) { - if (dops[i].opcode2 == 0x1A) // DIV - emit_sub_asrimm(0,numerator,31,lr); + if (dops[i].opcode2 == 0x1A) { // DIV + emit_add_lsrimm(WZR,numerator,31,lr); + emit_orn_asrimm(lr,numerator,31,lr); + } else emit_movimm(~0,lr); } @@ -1863,6 +1926,17 @@ static void multdiv_assemble_arm64(int i, const struct regstat *i_regs) if (lr >= 0) emit_movimm(~0,lr); } } + else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0) + { + signed char denominator = get_reg(i_regs->regmap, dops[i].rs2); + assert(denominator >= 0); + if (hr >= 0) emit_zeroreg(hr); + if (lr >= 0) { + emit_zeroreg(lr); + emit_test(denominator, denominator); + emit_csinvne_reg(lr, lr, lr); + } + } else { // Multiply by zero is zero. diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 7a6d2edd8..f859817ad 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -261,41 +261,28 @@ FUNCTION(cc_interrupt): .size cc_interrupt, .-cc_interrupt .align 2 -FUNCTION(fp_exception): - mov r2, #0x10000000 -.E7: - ldr r1, [fp, #LO_reg_cop0+48] /* Status */ - mov r3, #0x80000000 - str r0, [fp, #LO_reg_cop0+56] /* EPC */ - orr r1, #2 - add r2, r2, #0x2c - str r1, [fp, #LO_reg_cop0+48] /* Status */ - str r2, [fp, #LO_reg_cop0+52] /* Cause */ - add r0, r3, #0x80 - bl ndrc_get_addr_ht - mov pc, r0 - .size fp_exception, .-fp_exception - .align 2 -FUNCTION(fp_exception_ds): - mov r2, #0x90000000 /* Set high bit if delay slot */ - b .E7 - .size fp_exception_ds, .-fp_exception_ds - - .align 2 +FUNCTION(jump_overflow_ds): + mov r0, #(12<<2) /* R3000E_Ov */ + mov r1, #1 + b call_psxException +FUNCTION(jump_overflow): + mov r0, #(12<<2) + mov r1, #0 + b call_psxException FUNCTION(jump_break_ds): - mov r0, #0x24 + mov r0, #(9<<2) /* R3000E_Bp */ mov r1, #1 b call_psxException FUNCTION(jump_break): - mov r0, #0x24 + mov r0, #(9<<2) mov r1, #0 b call_psxException FUNCTION(jump_syscall_ds): - mov r0, #0x20 + mov r0, #(8<<2) /* R3000E_Syscall */ mov r1, #2 b call_psxException FUNCTION(jump_syscall): - mov r0, #0x20 + mov r0, #(8<<2) mov r1, #0 call_psxException: diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index bc5f1151e..38c78dc36 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -119,41 +119,28 @@ FUNCTION(cc_interrupt): .size cc_interrupt, .-cc_interrupt .align 2 -FUNCTION(fp_exception): - mov w2, #0x10000000 -0: - ldr w1, [rFP, #LO_reg_cop0+48] /* Status */ - mov w3, #0x80000000 - str w0, [rFP, #LO_reg_cop0+56] /* EPC */ - orr w1, w1, #2 - add w2, w2, #0x2c - str w1, [rFP, #LO_reg_cop0+48] /* Status */ - str w2, [rFP, #LO_reg_cop0+52] /* Cause */ - add w0, w3, #0x80 - bl ndrc_get_addr_ht - br x0 - .size fp_exception, .-fp_exception - .align 2 -FUNCTION(fp_exception_ds): - mov w2, #0x90000000 /* Set high bit if delay slot */ - b 0b - .size fp_exception_ds, .-fp_exception_ds - - .align 2 +FUNCTION(jump_overflow_ds): + mov w0, #(12<<2) /* R3000E_Ov */ + mov w1, #1 + b call_psxException +FUNCTION(jump_overflow): + mov w0, #(12<<2) + mov w1, #0 + b call_psxException FUNCTION(jump_break_ds): - mov w0, #0x24 + mov w0, #(9<<2) /* R3000E_Bp */ mov w1, #1 b call_psxException FUNCTION(jump_break): - mov w0, #0x24 + mov w0, #(9<<2) mov w1, #0 b call_psxException FUNCTION(jump_syscall_ds): - mov w0, #0x20 + mov w0, #(8<<2) /* R3000E_Syscall */ mov w1, #2 b call_psxException FUNCTION(jump_syscall): - mov w0, #0x20 + mov w0, #(8<<2) mov w1, #0 call_psxException: diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index f59764628..067decb7d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -39,14 +39,10 @@ static Jit g_jit; #include "../psxinterpreter.h" #include "../gte.h" #include "emu_if.h" // emulator interface +#include "linkage_offsets.h" +#include "compiler_features.h" #include "arm_features.h" -#define unused __attribute__((unused)) -#ifdef __clang__ -#define noinline __attribute__((noinline)) -#else -#define noinline __attribute__((noinline,noclone)) -#endif #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #endif @@ -59,6 +55,7 @@ static Jit g_jit; //#define DISASM //#define ASSEM_PRINT +//#define REGMAP_PRINT // with DISASM only //#define INV_DEBUG_W //#define STAT_PRINT @@ -135,19 +132,20 @@ static long ndrc_write_ofs; // stubs enum stub_type { CC_STUB = 1, - FP_STUB = 2, + //FP_STUB = 2, LOADB_STUB = 3, LOADH_STUB = 4, LOADW_STUB = 5, - LOADD_STUB = 6, + //LOADD_STUB = 6, LOADBU_STUB = 7, LOADHU_STUB = 8, STOREB_STUB = 9, STOREH_STUB = 10, STOREW_STUB = 11, - STORED_STUB = 12, + //STORED_STUB = 12, STORELR_STUB = 13, INVCODE_STUB = 14, + OVERFLOW_STUB = 15, }; // regmap_pre[i] - regs before [i] insn starts; dirty things here that @@ -163,7 +161,7 @@ struct regstat uint64_t dirty; uint64_t u; u_int wasconst; // before; for example 'lw r2, (r2)' wasconst is true - u_int isconst; // ... but isconst is false when r2 is known + u_int isconst; // ... but isconst is false when r2 is known (hr) u_int loadedconst; // host regs that have constants loaded //u_int waswritten; // MIPS regs that were used as store base before }; @@ -225,8 +223,8 @@ struct jump_info static struct decoded_insn { u_char itype; - u_char opcode; - u_char opcode2; + u_char opcode; // bits 31-26 + u_char opcode2; // (depends on opcode) u_char rs1; u_char rs2; u_char rt1; @@ -239,6 +237,9 @@ static struct decoded_insn u_char is_ujump:1; u_char is_load:1; u_char is_store:1; + u_char is_delay_load:1; // is_load + MFC/CFC + u_char is_exception:1; // unconditional, also interp. fallback + u_char may_except:1; // might generate an exception } dops[MAXBLOCK]; static u_char *out; @@ -350,7 +351,7 @@ static struct decoded_insn #define STORE 2 // Store #define LOADLR 3 // Unaligned load #define STORELR 4 // Unaligned store -#define MOV 5 // Move +#define MOV 5 // Move (hi/lo only) #define ALU 6 // Arithmetic/logic #define MULTDIV 7 // Multiply/divide #define SHIFT 8 // Shift by register @@ -361,16 +362,9 @@ static struct decoded_insn #define CJUMP 13 // Conditional branch (BEQ/BNE/BGTZ/BLEZ) #define SJUMP 14 // Conditional branch (regimm format) #define COP0 15 // Coprocessor 0 -#define COP1 16 // Coprocessor 1 -#define C1LS 17 // Coprocessor 1 load/store -//#define FJUMP 18 // Conditional branch (floating point) -//#define FLOAT 19 // Floating point unit -//#define FCONV 20 // Convert integer to float -//#define FCOMP 21 // Floating point compare (sets FSREG) +#define RFE 16 #define SYSCALL 22// SYSCALL,BREAK -#define OTHER 23 // Other -//#define SPAN 24 // Branch/delay slot spans 2 pages -#define NI 25 // Not implemented +#define OTHER 23 // Other/unknown - do nothing #define HLECALL 26// PCSX fake opcodes for HLE #define COP2 27 // Coprocessor 2 move #define C2LS 28 // Coprocessor 2 load/store @@ -388,12 +382,12 @@ static struct decoded_insn // asm linkage void dyna_linker(); void cc_interrupt(); -void fp_exception(); -void fp_exception_ds(); void jump_syscall (u_int u0, u_int u1, u_int pc); void jump_syscall_ds(u_int u0, u_int u1, u_int pc); void jump_break (u_int u0, u_int u1, u_int pc); void jump_break_ds(u_int u0, u_int u1, u_int pc); +void jump_overflow (u_int u0, u_int u1, u_int pc); +void jump_overflow_ds(u_int u0, u_int u1, u_int pc); void jump_to_new_pc(); void call_gteStall(); void new_dyna_leave(); @@ -406,6 +400,7 @@ static void ndrc_write_invalidate_many(u_int addr, u_int end); static int new_recompile_block(u_int addr); static void invalidate_block(struct block_info *block); +static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_); // Needed by assembler static void wb_register(signed char r, const signed char regmap[], uint64_t dirty); @@ -784,10 +779,10 @@ static void noinline *get_addr(u_int vaddr, int can_compile) return ndrc_get_addr_ht(vaddr); // generate an address error - psxRegs.CP0.n.SR |= 2; - psxRegs.CP0.n.Cause = (vaddr<<31) | (4<<2); - psxRegs.CP0.n.EPC = (vaddr&1) ? vaddr-5 : vaddr; - psxRegs.CP0.n.BadVAddr = vaddr & ~1; + psxRegs.CP0.n.Cause &= 0x300; + psxRegs.CP0.n.Cause |= R3000E_AdEL << 2; + psxRegs.CP0.n.EPC = vaddr; + psxRegs.pc = 0x80000080; return ndrc_get_addr_ht(0x80000080); } @@ -834,6 +829,12 @@ static signed char get_reg(const signed char regmap[], signed char r) #endif +// get reg suitable for writing +static signed char get_reg_w(const signed char regmap[], signed char r) +{ + return r == 0 ? -1 : get_reg(regmap, r); +} + // get reg as mask bit (1 << hr) static u_int get_regm(const signed char regmap[], signed char r) { @@ -1069,7 +1070,7 @@ static int needed_again(int r, int i) j++; break; } - if(dops[i+j].itype==SYSCALL||dops[i+j].itype==HLECALL||dops[i+j].itype==INTCALL||((source[i+j]&0xfc00003f)==0x0d)) + if (dops[i+j].is_exception) { break; } @@ -1197,6 +1198,8 @@ static const struct { FUNCNAME(jump_break_ds), FUNCNAME(jump_syscall), FUNCNAME(jump_syscall_ds), + FUNCNAME(jump_overflow), + FUNCNAME(jump_overflow_ds), FUNCNAME(call_gteStall), FUNCNAME(new_dyna_leave), FUNCNAME(pcsx_mtc0), @@ -1214,8 +1217,43 @@ static const char *func_name(const void *a) return function_names[i].name; return ""; } + +static const char *fpofs_name(u_int ofs) +{ + u_int *p = (u_int *)&dynarec_local + ofs/sizeof(u_int); + static char buf[64]; + switch (ofs) { + #define ofscase(x) case LO_##x: return " ; " #x + ofscase(next_interupt); + ofscase(last_count); + ofscase(pending_exception); + ofscase(stop); + ofscase(address); + ofscase(lo); + ofscase(hi); + ofscase(PC); + ofscase(cycle); + ofscase(mem_rtab); + ofscase(mem_wtab); + ofscase(psxH_ptr); + ofscase(invc_ptr); + ofscase(ram_offset); + #undef ofscase + } + buf[0] = 0; + if (psxRegs.GPR.r <= p && p < &psxRegs.GPR.r[32]) + snprintf(buf, sizeof(buf), " ; r%d", (int)(p - psxRegs.GPR.r)); + else if (psxRegs.CP0.r <= p && p < &psxRegs.CP0.r[32]) + snprintf(buf, sizeof(buf), " ; cp0 $%d", (int)(p - psxRegs.CP0.r)); + else if (psxRegs.CP2D.r <= p && p < &psxRegs.CP2D.r[32]) + snprintf(buf, sizeof(buf), " ; cp2d $%d", (int)(p - psxRegs.CP2D.r)); + else if (psxRegs.CP2C.r <= p && p < &psxRegs.CP2C.r[32]) + snprintf(buf, sizeof(buf), " ; cp2c $%d", (int)(p - psxRegs.CP2C.r)); + return buf; +} #else #define func_name(x) "" +#define fpofs_name(x) "" #endif #ifdef __i386__ @@ -1933,6 +1971,10 @@ static void alu_alloc(struct regstat *current,int i) } alloc_reg(current,i,dops[i].rt1); } + if (!(dops[i].opcode2 & 1)) { + alloc_cc(current,i); // for exceptions + dirty_reg(current,CCREG); + } } if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU if(dops[i].rt1) { @@ -1955,9 +1997,6 @@ static void alu_alloc(struct regstat *current,int i) alloc_reg(current,i,dops[i].rt1); } } - if(dops[i].opcode2>=0x2c&&dops[i].opcode2<=0x2f) { // DADD/DADDU/DSUB/DSUBU - assert(0); - } clear_const(current,dops[i].rs1); clear_const(current,dops[i].rs2); clear_const(current,dops[i].rt1); @@ -1969,10 +2008,7 @@ static void imm16_alloc(struct regstat *current,int i) if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); else dops[i].use_lt1=!!dops[i].rs1; if(dops[i].rt1) alloc_reg(current,i,dops[i].rt1); - if(dops[i].opcode==0x18||dops[i].opcode==0x19) { // DADDI/DADDIU - assert(0); - } - else if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU + if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU clear_const(current,dops[i].rs1); clear_const(current,dops[i].rt1); } @@ -1991,6 +2027,14 @@ static void imm16_alloc(struct regstat *current,int i) set_const(current,dops[i].rt1,v+imm[i]); } else clear_const(current,dops[i].rt1); + if (dops[i].opcode == 0x08) { + alloc_cc(current,i); // for exceptions + dirty_reg(current,CCREG); + if (dops[i].rt1 == 0) { + alloc_reg_temp(current,i,-1); + minimum_free_regs[i]=1; + } + } } else { set_const(current,dops[i].rt1,imm[i]<<16); // LUI @@ -2009,15 +2053,7 @@ static void load_alloc(struct regstat *current,int i) alloc_reg(current, i, ROREG); if(dops[i].rt1&&!((current->u>>dops[i].rt1)&1)) { alloc_reg(current,i,dops[i].rt1); - assert(get_reg(current->regmap,dops[i].rt1)>=0); - if(dops[i].opcode==0x27||dops[i].opcode==0x37) // LWU/LD - { - assert(0); - } - else if(dops[i].opcode==0x1A||dops[i].opcode==0x1B) // LDL/LDR - { - assert(0); - } + assert(get_reg_w(current->regmap, dops[i].rt1)>=0); dirty_reg(current,dops[i].rt1); // LWL/LWR need a temporary register for the old value if(dops[i].opcode==0x22||dops[i].opcode==0x26) @@ -2037,10 +2073,6 @@ static void load_alloc(struct regstat *current,int i) } alloc_reg_temp(current,i,-1); minimum_free_regs[i]=1; - if(dops[i].opcode==0x1A||dops[i].opcode==0x1B) // LDL/LDR - { - assert(0); - } } } @@ -2067,12 +2099,6 @@ static void store_alloc(struct regstat *current,int i) minimum_free_regs[i]=1; } -static void c1ls_alloc(struct regstat *current,int i) -{ - clear_const(current,dops[i].rt1); - alloc_reg(current,i,CSREG); // Status -} - static void c2ls_alloc(struct regstat *current,int i) { clear_const(current,dops[i].rt1); @@ -2141,7 +2167,6 @@ static void cop0_alloc(struct regstat *current,int i) { if(dops[i].rt1) { clear_const(current,dops[i].rt1); - alloc_all(current,i); alloc_reg(current,i,dops[i].rt1); dirty_reg(current,dops[i].rt1); } @@ -2158,14 +2183,14 @@ static void cop0_alloc(struct regstat *current,int i) current->u&=~1LL; alloc_reg(current,i,0); } + minimum_free_regs[i] = HOST_REGS; } - else - { - // RFE - assert(dops[i].opcode2==0x10); - alloc_all(current,i); - } - minimum_free_regs[i]=HOST_REGS; +} + +static void rfe_alloc(struct regstat *current, int i) +{ + alloc_all(current, i); + minimum_free_regs[i] = HOST_REGS; } static void cop2_alloc(struct regstat *current,int i) @@ -2249,14 +2274,12 @@ static void delayslot_alloc(struct regstat *current,int i) case COP0: cop0_alloc(current,i); break; - case COP1: + case RFE: + rfe_alloc(current,i); break; case COP2: cop2_alloc(current,i); break; - case C1LS: - c1ls_alloc(current,i); - break; case C2LS: c2ls_alloc(current,i); break; @@ -2333,47 +2356,77 @@ static void pass_args(int a0, int a1) } } -static void alu_assemble(int i, const struct regstat *i_regs) +static void alu_assemble(int i, const struct regstat *i_regs, int ccadj_) { if(dops[i].opcode2>=0x20&&dops[i].opcode2<=0x23) { // ADD/ADDU/SUB/SUBU - if(dops[i].rt1) { - signed char s1,s2,t; - t=get_reg(i_regs->regmap,dops[i].rt1); - if(t>=0) { - s1=get_reg(i_regs->regmap,dops[i].rs1); - s2=get_reg(i_regs->regmap,dops[i].rs2); - if(dops[i].rs1&&dops[i].rs2) { + int do_oflow = dops[i].may_except; // ADD/SUB with exceptions enabled + if (dops[i].rt1 || do_oflow) { + int do_exception_check = 0; + signed char s1, s2, t, tmp; + t = get_reg_w(i_regs->regmap, dops[i].rt1); + tmp = get_reg_temp(i_regs->regmap); + if (t < 0 && do_oflow) + t = tmp; + if (t >= 0) { + s1 = get_reg(i_regs->regmap, dops[i].rs1); + s2 = get_reg(i_regs->regmap, dops[i].rs2); + if (dops[i].rs1 && dops[i].rs2) { assert(s1>=0); assert(s2>=0); - if(dops[i].opcode2&2) emit_sub(s1,s2,t); - else emit_add(s1,s2,t); + if (dops[i].opcode2 & 2) { + if (do_oflow) { + emit_subs(s1, s2, tmp); + do_exception_check = 1; + } + else + emit_sub(s1,s2,t); + } + else { + if (do_oflow) { + emit_adds(s1, s2, tmp); + do_exception_check = 1; + } + else + emit_add(s1,s2,t); + } } else if(dops[i].rs1) { if(s1>=0) emit_mov(s1,t); else emit_loadreg(dops[i].rs1,t); } else if(dops[i].rs2) { - if(s2>=0) { - if(dops[i].opcode2&2) emit_neg(s2,t); - else emit_mov(s2,t); + if (s2 < 0) { + emit_loadreg(dops[i].rs2, t); + s2 = t; } - else { - emit_loadreg(dops[i].rs2,t); - if(dops[i].opcode2&2) emit_neg(t,t); + if (dops[i].opcode2 & 2) { + if (do_oflow) { + emit_negs(s2, tmp); + do_exception_check = 1; + } + else + emit_neg(s2, t); } + else if (s2 != t) + emit_mov(s2, t); } - else emit_zeroreg(t); + else + emit_zeroreg(t); + } + if (do_exception_check) { + void *jaddr = out; + emit_jo(0); + if (t >= 0 && tmp != t) + emit_mov(tmp, t); + add_stub_r(OVERFLOW_STUB, jaddr, out, i, 0, i_regs, ccadj_, 0); } } } - if(dops[i].opcode2>=0x2c&&dops[i].opcode2<=0x2f) { // DADD/DADDU/DSUB/DSUBU - assert(0); - } - if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU + else if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU if(dops[i].rt1) { signed char s1l,s2l,t; { - t=get_reg(i_regs->regmap,dops[i].rt1); + t=get_reg_w(i_regs->regmap, dops[i].rt1); //assert(t>=0); if(t>=0) { s1l=get_reg(i_regs->regmap,dops[i].rs1); @@ -2406,10 +2459,10 @@ static void alu_assemble(int i, const struct regstat *i_regs) } } } - if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR + else if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR if(dops[i].rt1) { signed char s1l,s2l,tl; - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); { if(tl>=0) { s1l=get_reg(i_regs->regmap,dops[i].rs1); @@ -2473,12 +2526,12 @@ static void alu_assemble(int i, const struct regstat *i_regs) } } -static void imm16_assemble(int i, const struct regstat *i_regs) +static void imm16_assemble(int i, const struct regstat *i_regs, int ccadj_) { if (dops[i].opcode==0x0f) { // LUI if(dops[i].rt1) { signed char t; - t=get_reg(i_regs->regmap,dops[i].rt1); + t=get_reg_w(i_regs->regmap, dops[i].rt1); //assert(t>=0); if(t>=0) { if(!((i_regs->isconst>>t)&1)) @@ -2487,23 +2540,55 @@ static void imm16_assemble(int i, const struct regstat *i_regs) } } if(dops[i].opcode==0x08||dops[i].opcode==0x09) { // ADDI/ADDIU - if(dops[i].rt1) { - signed char s,t; - t=get_reg(i_regs->regmap,dops[i].rt1); + int is_addi = (dops[i].opcode == 0x08); + if (dops[i].rt1 || is_addi) { + signed char s, t, tmp; + t=get_reg_w(i_regs->regmap, dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); if(dops[i].rs1) { - //assert(t>=0); - //assert(s>=0); + tmp = get_reg_temp(i_regs->regmap); + if (is_addi) { + assert(tmp >= 0); + if (t < 0) t = tmp; + } if(t>=0) { if(!((i_regs->isconst>>t)&1)) { - if(s<0) { + int sum, do_exception_check = 0; + if (s < 0) { if(i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t); - emit_addimm(t,imm[i],t); - }else{ - if(!((i_regs->wasconst>>s)&1)) - emit_addimm(s,imm[i],t); + if (is_addi) { + emit_addimm_and_set_flags3(t, imm[i], tmp); + do_exception_check = 1; + } else - emit_movimm(constmap[i][s]+imm[i],t); + emit_addimm(t, imm[i], t); + } else { + if (!((i_regs->wasconst >> s) & 1)) { + if (is_addi) { + emit_addimm_and_set_flags3(s, imm[i], tmp); + do_exception_check = 1; + } + else + emit_addimm(s, imm[i], t); + } + else { + int oflow = add_overflow(constmap[i][s], imm[i], sum); + if (is_addi && oflow) + do_exception_check = 2; + else + emit_movimm(sum, t); + } + } + if (do_exception_check) { + void *jaddr = out; + if (do_exception_check == 2) + emit_jmp(0); + else { + emit_jo(0); + if (tmp != t) + emit_mov(tmp, t); + } + add_stub_r(OVERFLOW_STUB, jaddr, out, i, 0, i_regs, ccadj_, 0); } } } @@ -2515,26 +2600,11 @@ static void imm16_assemble(int i, const struct regstat *i_regs) } } } - if(dops[i].opcode==0x18||dops[i].opcode==0x19) { // DADDI/DADDIU - if(dops[i].rt1) { - signed char sl,tl; - tl=get_reg(i_regs->regmap,dops[i].rt1); - sl=get_reg(i_regs->regmap,dops[i].rs1); - if(tl>=0) { - if(dops[i].rs1) { - assert(sl>=0); - emit_addimm(sl,imm[i],tl); - } else { - emit_movimm(imm[i],tl); - } - } - } - } else if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU if(dops[i].rt1) { //assert(dops[i].rs1!=0); // r0 might be valid, but it's probably a bug signed char sl,t; - t=get_reg(i_regs->regmap,dops[i].rt1); + t=get_reg_w(i_regs->regmap, dops[i].rt1); sl=get_reg(i_regs->regmap,dops[i].rs1); //assert(t>=0); if(t>=0) { @@ -2573,7 +2643,7 @@ static void imm16_assemble(int i, const struct regstat *i_regs) else if(dops[i].opcode>=0x0c&&dops[i].opcode<=0x0e) { // ANDI/ORI/XORI if(dops[i].rt1) { signed char sl,tl; - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); sl=get_reg(i_regs->regmap,dops[i].rs1); if(tl>=0 && !((i_regs->isconst>>tl)&1)) { if(dops[i].opcode==0x0c) //ANDI @@ -2634,7 +2704,7 @@ static void shiftimm_assemble(int i, const struct regstat *i_regs) { if(dops[i].rt1) { signed char s,t; - t=get_reg(i_regs->regmap,dops[i].rt1); + t=get_reg_w(i_regs->regmap, dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); //assert(t>=0); if(t>=0&&!((i_regs->isconst>>t)&1)){ @@ -2940,7 +3010,7 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) int offset_reg = -1; int fastio_reg_override = -1; u_int reglist=get_host_reglist(i_regs->regmap); - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); offset=imm[i]; if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<>16)==0x1f80)) - ||dops[i].rt1==0) { + if(tl<0 && ((!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80) || dops[i].rt1==0)) { // could be FIFO, must perform the read // ||dummy read assem_debug("(forced read)\n"); @@ -2982,7 +3050,7 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) else if (ram_offset && memtarget) { offset_reg = get_ro_reg(i_regs, 0); } - int dummy=(dops[i].rt1==0)||(tl!=get_reg(i_regs->regmap,dops[i].rt1)); // ignore loads to r0 and unneeded reg + int dummy=(dops[i].rt1==0)||(tl!=get_reg_w(i_regs->regmap, dops[i].rt1)); // ignore loads to r0 and unneeded reg switch (dops[i].opcode) { case 0x20: // LB if(!c||memtarget) { @@ -3077,7 +3145,7 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) default: assert(0); } - } + } // tl >= 0 if (fastio_reg_override == HOST_TEMPREG || offset_reg == HOST_TEMPREG) host_tempreg_release(); } @@ -3092,7 +3160,7 @@ static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) int offset_reg = -1; int fastio_reg_override = -1; u_int reglist=get_host_reglist(i_regs->regmap); - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); temp=get_reg_temp(i_regs->regmap); temp2=get_reg(i_regs->regmap,FTEMP); @@ -3483,9 +3551,8 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) { if(dops[i].opcode2==0) // MFC0 { - signed char t=get_reg(i_regs->regmap,dops[i].rt1); + signed char t=get_reg_w(i_regs->regmap, dops[i].rt1); u_int copr=(source[i]>>11)&0x1f; - //assert(t>=0); // Why does this happen? OOT is weird if(t>=0&&dops[i].rt1!=0) { emit_readword(®_cop0[copr],t); } @@ -3554,61 +3621,15 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) } emit_loadreg(dops[i].rs1,s); } - else - { - assert(dops[i].opcode2==0x10); - //if((source[i]&0x3f)==0x10) // RFE - { - emit_readword(&psxRegs.CP0.n.SR,0); - emit_andimm(0,0x3c,1); - emit_andimm(0,~0xf,0); - emit_orrshr_imm(1,2,0); - emit_writeword(0,&psxRegs.CP0.n.SR); - } - } -} - -static void cop1_unusable(int i, const struct regstat *i_regs) -{ - // XXX: should just just do the exception instead - //if(!cop1_usable) - { - void *jaddr=out; - emit_jmp(0); - add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0); - } } -static void cop1_assemble(int i, const struct regstat *i_regs) +static void rfe_assemble(int i, const struct regstat *i_regs, int ccadj_) { - cop1_unusable(i, i_regs); -} - -static void c1ls_assemble(int i, const struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); -} - -// FP_STUB -static void do_cop1stub(int n) -{ - literal_pool(256); - assem_debug("do_cop1stub %x\n",start+stubs[n].a*4); - set_jump_target(stubs[n].addr, out); - int i=stubs[n].a; -// int rs=stubs[n].b; - struct regstat *i_regs=(struct regstat *)stubs[n].c; - int ds=stubs[n].d; - if(!ds) { - load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i); - //if(i_regs!=®s[i]) printf("oops: regs[i]=%x i_regs=%x",(int)®s[i],(int)i_regs); - } - //else {printf("fp exception in delay slot\n");} - wb_dirtys(i_regs->regmap_entry,i_regs->wasdirty); - if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_movimm(start+(i-ds)*4,0); // Get PC - emit_addimm(HOST_CCREG,ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... - emit_far_jump(ds?fp_exception_ds:fp_exception); + emit_readword(&psxRegs.CP0.n.SR, 0); + emit_andimm(0, 0x3c, 1); + emit_andimm(0, ~0xf, 0); + emit_orrshr_imm(1, 2, 0); + emit_writeword(0, &psxRegs.CP0.n.SR); } static int cop2_is_stalling_op(int i, int *cycles) @@ -4018,7 +4039,7 @@ static void cop2_assemble(int i, const struct regstat *i_regs) cop2_do_stall_check(0, i, i_regs, reglist); } if (dops[i].opcode2==0) { // MFC2 - signed char tl=get_reg(i_regs->regmap,dops[i].rt1); + signed char tl=get_reg_w(i_regs->regmap, dops[i].rt1); if(tl>=0&&dops[i].rt1!=0) cop2_get_dreg(copr,tl,temp); } @@ -4028,7 +4049,7 @@ static void cop2_assemble(int i, const struct regstat *i_regs) } else if (dops[i].opcode2==2) // CFC2 { - signed char tl=get_reg(i_regs->regmap,dops[i].rt1); + signed char tl=get_reg_w(i_regs->regmap, dops[i].rt1); if(tl>=0&&dops[i].rt1!=0) emit_readword(®_cop2c[copr],tl); } @@ -4092,6 +4113,18 @@ static void do_unalignedwritestub(int n) emit_jmp(stubs[n].retaddr); // return address } +static void do_overflowstub(int n) +{ + assem_debug("do_overflowstub %x\n", start + (u_int)stubs[n].a * 4); + literal_pool(24); + int i = stubs[n].a; + struct regstat *i_regs = (struct regstat *)stubs[n].c; + int ccadj = stubs[n].d; + set_jump_target(stubs[n].addr, out); + wb_dirtys(regs[i].regmap, regs[i].dirty); + exception_assemble(i, i_regs, ccadj); +} + #ifndef multdiv_assemble void multdiv_assemble(int i,struct regstat *i_regs) { @@ -4106,7 +4139,7 @@ static void mov_assemble(int i, const struct regstat *i_regs) //if(dops[i].opcode2==0x11||dops[i].opcode2==0x13) { // MTHI/MTLO if(dops[i].rt1) { signed char sl,tl; - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); //assert(tl>=0); if(tl>=0) { sl=get_reg(i_regs->regmap,dops[i].rs1); @@ -4137,13 +4170,17 @@ static void call_c_cpu_handler(int i, const struct regstat *i_regs, int ccadj_, emit_far_jump(jump_to_new_pc); } -static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_) +static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_) { // 'break' tends to be littered around to catch things like // division by 0 and is almost never executed, so don't emit much code here - void *func = (dops[i].opcode2 == 0x0C) - ? (is_delayslot ? jump_syscall_ds : jump_syscall) - : (is_delayslot ? jump_break_ds : jump_break); + void *func; + if (dops[i].itype == ALU || dops[i].itype == IMM16) + func = is_delayslot ? jump_overflow_ds : jump_overflow; + else if (dops[i].opcode2 == 0x0C) + func = is_delayslot ? jump_syscall_ds : jump_syscall; + else + func = is_delayslot ? jump_break_ds : jump_break; assert(get_reg(i_regs->regmap, CCREG) == HOST_CCREG); emit_movimm(start + i*4, 2); // pc emit_addimm(HOST_CCREG, ccadj_ + CLOCK_ADJUST(1), HOST_CCREG); @@ -4152,7 +4189,7 @@ static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_) static void hlecall_bad() { - SysPrintf("bad hlecall\n"); + assert(0); } static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_) @@ -4214,7 +4251,7 @@ static void speculate_register_values(int i) // fallthrough case IMM16: if(dops[i].rt1&&is_const(®s[i],dops[i].rt1)) { - int value,hr=get_reg(regs[i].regmap,dops[i].rt1); + int value,hr=get_reg_w(regs[i].regmap, dops[i].rt1); if(hr>=0) { if(get_final_value(hr,i,&value)) smrv[dops[i].rt1]=value; @@ -4272,10 +4309,10 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) int ds = 0; switch (dops[i].itype) { case ALU: - alu_assemble(i, i_regs); + alu_assemble(i, i_regs, ccadj_); break; case IMM16: - imm16_assemble(i, i_regs); + imm16_assemble(i, i_regs, ccadj_); break; case SHIFT: shift_assemble(i, i_regs); @@ -4298,11 +4335,8 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) case COP0: cop0_assemble(i, i_regs, ccadj_); break; - case COP1: - cop1_assemble(i, i_regs); - break; - case C1LS: - c1ls_assemble(i, i_regs); + case RFE: + rfe_assemble(i, i_regs, ccadj_); break; case COP2: cop2_assemble(i, i_regs); @@ -4321,7 +4355,7 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) mov_assemble(i, i_regs); break; case SYSCALL: - syscall_assemble(i, i_regs, ccadj_); + exception_assemble(i, i_regs, ccadj_); break; case HLECALL: hlecall_assemble(i, i_regs, ccadj_); @@ -4347,7 +4381,6 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) break; case NOP: case OTHER: - case NI: // not handled, just skip break; default: @@ -4460,7 +4493,7 @@ static void address_generation(int i, const struct regstat *i_regs, signed char int ra=-1; int agr=AGEN1+(i&1); if(dops[i].itype==LOAD) { - ra=get_reg(i_regs->regmap,dops[i].rt1); + ra=get_reg_w(i_regs->regmap, dops[i].rt1); if(ra<0) ra=get_reg_temp(i_regs->regmap); assert(ra>=0); } @@ -5364,7 +5397,7 @@ static void rjump_assemble_write_ra(int i) int rt,return_address; assert(dops[i+1].rt1!=dops[i].rt1); assert(dops[i+1].rt2!=dops[i].rt1); - rt=get_reg(branch_regs[i].regmap,dops[i].rt1); + rt=get_reg_w(branch_regs[i].regmap, dops[i].rt1); assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(rt>=0); return_address=start+i*4+8; @@ -5457,7 +5490,7 @@ static void rjump_assemble(int i, const struct regstat *i_regs) //assert(adj==0); emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), HOST_CCREG); add_stub(CC_STUB,out,NULL,0,i,-1,TAKEN,rs); - if(dops[i+1].itype==COP0 && dops[i+1].opcode2==0x10) + if (dops[i+1].itype == RFE) // special case for RFE emit_jmp(0); else @@ -6131,13 +6164,6 @@ void disassemble_inst(int i) printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC0 else printf (" %x: %s\n",start+i*4,insn[i]); break; - case COP1: - if(dops[i].opcode2<3) - printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],dops[i].rt1,(source[i]>>11)&0x1f); // MFC1 - else if(dops[i].opcode2>3) - printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC1 - else printf (" %x: %s\n",start+i*4,insn[i]); - break; case COP2: if(dops[i].opcode2<3) printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],dops[i].rt1,(source[i]>>11)&0x1f); // MFC2 @@ -6145,9 +6171,6 @@ void disassemble_inst(int i) printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC2 else printf (" %x: %s\n",start+i*4,insn[i]); break; - case C1LS: - printf (" %x: %s cpr1[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,dops[i].rs1,imm[i]); - break; case C2LS: printf (" %x: %s cpr2[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,dops[i].rs1,imm[i]); break; @@ -6158,9 +6181,12 @@ void disassemble_inst(int i) //printf (" %s %8x\n",insn[i],source[i]); printf (" %x: %s\n",start+i*4,insn[i]); } + #ifndef REGMAP_PRINT return; - printf("D: %"PRIu64" WD: %"PRIu64" U: %"PRIu64"\n", - regs[i].dirty, regs[i].wasdirty, unneeded_reg[i]); + #endif + printf("D: %"PRIx64" WD: %"PRIx64" U: %"PRIx64" hC: %x hWC: %x hLC: %x\n", + regs[i].dirty, regs[i].wasdirty, unneeded_reg[i], + regs[i].isconst, regs[i].wasconst, regs[i].loadedconst); print_regmap("pre: ", regmap_pre[i]); print_regmap("entry: ", regs[i].regmap_entry); print_regmap("map: ", regs[i].regmap); @@ -6551,17 +6577,20 @@ static int apply_hacks(void) static noinline void pass1_disassemble(u_int pagelimit) { int i, j, done = 0, ni_count = 0; - unsigned int type,op,op2; + unsigned int type,op,op2,op3; for (i = 0; !done; i++) { + int force_prev_to_interpreter = 0; memset(&dops[i], 0, sizeof(dops[i])); - op2=0; - minimum_free_regs[i]=0; - dops[i].opcode=op=source[i]>>26; + op2 = 0; + minimum_free_regs[i] = 0; + dops[i].opcode = op = source[i] >> 26; + type = INTCALL; + set_mnemonic(i, "???"); switch(op) { - case 0x00: set_mnemonic(i, "special"); type=NI; + case 0x00: set_mnemonic(i, "special"); op2=source[i]&0x3f; switch(op2) { @@ -6594,51 +6623,20 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x27: set_mnemonic(i, "NOR"); type=ALU; break; case 0x2A: set_mnemonic(i, "SLT"); type=ALU; break; case 0x2B: set_mnemonic(i, "SLTU"); type=ALU; break; - case 0x30: set_mnemonic(i, "TGE"); type=NI; break; - case 0x31: set_mnemonic(i, "TGEU"); type=NI; break; - case 0x32: set_mnemonic(i, "TLT"); type=NI; break; - case 0x33: set_mnemonic(i, "TLTU"); type=NI; break; - case 0x34: set_mnemonic(i, "TEQ"); type=NI; break; - case 0x36: set_mnemonic(i, "TNE"); type=NI; break; -#if 0 - case 0x14: set_mnemonic(i, "DSLLV"); type=SHIFT; break; - case 0x16: set_mnemonic(i, "DSRLV"); type=SHIFT; break; - case 0x17: set_mnemonic(i, "DSRAV"); type=SHIFT; break; - case 0x1C: set_mnemonic(i, "DMULT"); type=MULTDIV; break; - case 0x1D: set_mnemonic(i, "DMULTU"); type=MULTDIV; break; - case 0x1E: set_mnemonic(i, "DDIV"); type=MULTDIV; break; - case 0x1F: set_mnemonic(i, "DDIVU"); type=MULTDIV; break; - case 0x2C: set_mnemonic(i, "DADD"); type=ALU; break; - case 0x2D: set_mnemonic(i, "DADDU"); type=ALU; break; - case 0x2E: set_mnemonic(i, "DSUB"); type=ALU; break; - case 0x2F: set_mnemonic(i, "DSUBU"); type=ALU; break; - case 0x38: set_mnemonic(i, "DSLL"); type=SHIFTIMM; break; - case 0x3A: set_mnemonic(i, "DSRL"); type=SHIFTIMM; break; - case 0x3B: set_mnemonic(i, "DSRA"); type=SHIFTIMM; break; - case 0x3C: set_mnemonic(i, "DSLL32"); type=SHIFTIMM; break; - case 0x3E: set_mnemonic(i, "DSRL32"); type=SHIFTIMM; break; - case 0x3F: set_mnemonic(i, "DSRA32"); type=SHIFTIMM; break; -#endif } break; - case 0x01: set_mnemonic(i, "regimm"); type=NI; - op2=(source[i]>>16)&0x1f; + case 0x01: set_mnemonic(i, "regimm"); + type = SJUMP; + op2 = (source[i] >> 16) & 0x1f; switch(op2) { - case 0x00: set_mnemonic(i, "BLTZ"); type=SJUMP; break; - case 0x01: set_mnemonic(i, "BGEZ"); type=SJUMP; break; - //case 0x02: set_mnemonic(i, "BLTZL"); type=SJUMP; break; - //case 0x03: set_mnemonic(i, "BGEZL"); type=SJUMP; break; - //case 0x08: set_mnemonic(i, "TGEI"); type=NI; break; - //case 0x09: set_mnemonic(i, "TGEIU"); type=NI; break; - //case 0x0A: set_mnemonic(i, "TLTI"); type=NI; break; - //case 0x0B: set_mnemonic(i, "TLTIU"); type=NI; break; - //case 0x0C: set_mnemonic(i, "TEQI"); type=NI; break; - //case 0x0E: set_mnemonic(i, "TNEI"); type=NI; break; - case 0x10: set_mnemonic(i, "BLTZAL"); type=SJUMP; break; - case 0x11: set_mnemonic(i, "BGEZAL"); type=SJUMP; break; - //case 0x12: set_mnemonic(i, "BLTZALL"); type=SJUMP; break; - //case 0x13: set_mnemonic(i, "BGEZALL"); type=SJUMP; break; + case 0x10: set_mnemonic(i, "BLTZAL"); break; + case 0x11: set_mnemonic(i, "BGEZAL"); break; + default: + if (op2 & 1) + set_mnemonic(i, "BGEZ"); + else + set_mnemonic(i, "BLTZ"); } break; case 0x02: set_mnemonic(i, "J"); type=UJUMP; break; @@ -6655,68 +6653,40 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x0D: set_mnemonic(i, "ORI"); type=IMM16; break; case 0x0E: set_mnemonic(i, "XORI"); type=IMM16; break; case 0x0F: set_mnemonic(i, "LUI"); type=IMM16; break; - case 0x10: set_mnemonic(i, "cop0"); type=NI; - op2=(source[i]>>21)&0x1f; + case 0x10: set_mnemonic(i, "COP0"); + op2 = (source[i]>>21) & 0x1f; + if (op2 & 0x10) { + op3 = source[i] & 0x1f; + switch (op3) + { + case 0x01: case 0x02: case 0x06: case 0x08: type = INTCALL; break; + case 0x10: set_mnemonic(i, "RFE"); type=RFE; break; + default: type = OTHER; break; + } + break; + } switch(op2) { - case 0x00: set_mnemonic(i, "MFC0"); type=COP0; break; - case 0x02: set_mnemonic(i, "CFC0"); type=COP0; break; + u32 rd; + case 0x00: + set_mnemonic(i, "MFC0"); + rd = (source[i] >> 11) & 0x1F; + if (!(0x00000417u & (1u << rd))) + type = COP0; + break; case 0x04: set_mnemonic(i, "MTC0"); type=COP0; break; - case 0x06: set_mnemonic(i, "CTC0"); type=COP0; break; - case 0x10: set_mnemonic(i, "RFE"); type=COP0; break; + case 0x02: + case 0x06: type = INTCALL; break; + default: type = OTHER; break; } break; - case 0x11: set_mnemonic(i, "cop1"); type=COP1; + case 0x11: set_mnemonic(i, "COP1"); op2=(source[i]>>21)&0x1f; break; -#if 0 - case 0x14: set_mnemonic(i, "BEQL"); type=CJUMP; break; - case 0x15: set_mnemonic(i, "BNEL"); type=CJUMP; break; - case 0x16: set_mnemonic(i, "BLEZL"); type=CJUMP; break; - case 0x17: set_mnemonic(i, "BGTZL"); type=CJUMP; break; - case 0x18: set_mnemonic(i, "DADDI"); type=IMM16; break; - case 0x19: set_mnemonic(i, "DADDIU"); type=IMM16; break; - case 0x1A: set_mnemonic(i, "LDL"); type=LOADLR; break; - case 0x1B: set_mnemonic(i, "LDR"); type=LOADLR; break; -#endif - case 0x20: set_mnemonic(i, "LB"); type=LOAD; break; - case 0x21: set_mnemonic(i, "LH"); type=LOAD; break; - case 0x22: set_mnemonic(i, "LWL"); type=LOADLR; break; - case 0x23: set_mnemonic(i, "LW"); type=LOAD; break; - case 0x24: set_mnemonic(i, "LBU"); type=LOAD; break; - case 0x25: set_mnemonic(i, "LHU"); type=LOAD; break; - case 0x26: set_mnemonic(i, "LWR"); type=LOADLR; break; -#if 0 - case 0x27: set_mnemonic(i, "LWU"); type=LOAD; break; -#endif - case 0x28: set_mnemonic(i, "SB"); type=STORE; break; - case 0x29: set_mnemonic(i, "SH"); type=STORE; break; - case 0x2A: set_mnemonic(i, "SWL"); type=STORELR; break; - case 0x2B: set_mnemonic(i, "SW"); type=STORE; break; -#if 0 - case 0x2C: set_mnemonic(i, "SDL"); type=STORELR; break; - case 0x2D: set_mnemonic(i, "SDR"); type=STORELR; break; -#endif - case 0x2E: set_mnemonic(i, "SWR"); type=STORELR; break; - case 0x2F: set_mnemonic(i, "CACHE"); type=NOP; break; - case 0x30: set_mnemonic(i, "LL"); type=NI; break; - case 0x31: set_mnemonic(i, "LWC1"); type=C1LS; break; -#if 0 - case 0x34: set_mnemonic(i, "LLD"); type=NI; break; - case 0x35: set_mnemonic(i, "LDC1"); type=C1LS; break; - case 0x37: set_mnemonic(i, "LD"); type=LOAD; break; -#endif - case 0x38: set_mnemonic(i, "SC"); type=NI; break; - case 0x39: set_mnemonic(i, "SWC1"); type=C1LS; break; -#if 0 - case 0x3C: set_mnemonic(i, "SCD"); type=NI; break; - case 0x3D: set_mnemonic(i, "SDC1"); type=C1LS; break; - case 0x3F: set_mnemonic(i, "SD"); type=STORE; break; -#endif - case 0x12: set_mnemonic(i, "COP2"); type=NI; + case 0x12: set_mnemonic(i, "COP2"); op2=(source[i]>>21)&0x1f; - //if (op2 & 0x10) - if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns + if (op2 & 0x10) { + type = OTHER; if (gte_handlers[source[i]&0x3f]!=NULL) { #ifdef DISASM if (gte_regnames[source[i]&0x3f]!=NULL) @@ -6724,7 +6694,7 @@ static noinline void pass1_disassemble(u_int pagelimit) else snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f); #endif - type=C2OP; + type = C2OP; } } else switch(op2) @@ -6735,32 +6705,53 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x06: set_mnemonic(i, "CTC2"); type=COP2; break; } break; + case 0x13: set_mnemonic(i, "COP3"); + op2=(source[i]>>21)&0x1f; + break; + case 0x20: set_mnemonic(i, "LB"); type=LOAD; break; + case 0x21: set_mnemonic(i, "LH"); type=LOAD; break; + case 0x22: set_mnemonic(i, "LWL"); type=LOADLR; break; + case 0x23: set_mnemonic(i, "LW"); type=LOAD; break; + case 0x24: set_mnemonic(i, "LBU"); type=LOAD; break; + case 0x25: set_mnemonic(i, "LHU"); type=LOAD; break; + case 0x26: set_mnemonic(i, "LWR"); type=LOADLR; break; + case 0x28: set_mnemonic(i, "SB"); type=STORE; break; + case 0x29: set_mnemonic(i, "SH"); type=STORE; break; + case 0x2A: set_mnemonic(i, "SWL"); type=STORELR; break; + case 0x2B: set_mnemonic(i, "SW"); type=STORE; break; + case 0x2E: set_mnemonic(i, "SWR"); type=STORELR; break; case 0x32: set_mnemonic(i, "LWC2"); type=C2LS; break; case 0x3A: set_mnemonic(i, "SWC2"); type=C2LS; break; - case 0x3B: set_mnemonic(i, "HLECALL"); type=HLECALL; break; - default: set_mnemonic(i, "???"); type=NI; - SysPrintf("NI %08x @%08x (%08x)\n", source[i], start + i*4, start); + case 0x3B: + if (Config.HLE && (source[i] & 0x03ffffff) < ARRAY_SIZE(psxHLEt)) { + set_mnemonic(i, "HLECALL"); + type = HLECALL; + } + break; + default: break; } + if (type == INTCALL) + SysPrintf("NI %08x @%08x (%08x)\n", source[i], start + i*4, start); dops[i].itype=type; dops[i].opcode2=op2; /* Get registers/immediates */ dops[i].use_lt1=0; gte_rs[i]=gte_rt[i]=0; + dops[i].rs1 = 0; + dops[i].rs2 = 0; + dops[i].rt1 = 0; + dops[i].rt2 = 0; switch(type) { case LOAD: dops[i].rs1=(source[i]>>21)&0x1f; - dops[i].rs2=0; dops[i].rt1=(source[i]>>16)&0x1f; - dops[i].rt2=0; imm[i]=(short)source[i]; break; case STORE: case STORELR: dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=(source[i]>>16)&0x1f; - dops[i].rt1=0; - dops[i].rt2=0; imm[i]=(short)source[i]; break; case LOADLR: @@ -6769,7 +6760,6 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=(source[i]>>16)&0x1f; dops[i].rt1=(source[i]>>16)&0x1f; - dops[i].rt2=0; imm[i]=(short)source[i]; break; case IMM16: @@ -6777,7 +6767,6 @@ static noinline void pass1_disassemble(u_int pagelimit) else dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=0; dops[i].rt1=(source[i]>>16)&0x1f; - dops[i].rt2=0; if(op>=0x0c&&op<=0x0e) { // ANDI/ORI/XORI imm[i]=(unsigned short)source[i]; }else{ @@ -6785,10 +6774,6 @@ static noinline void pass1_disassemble(u_int pagelimit) } break; case UJUMP: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; // The JAL instruction writes to r31. if (op&1) { dops[i].rt1=31; @@ -6797,9 +6782,6 @@ static noinline void pass1_disassemble(u_int pagelimit) break; case RJUMP: dops[i].rs1=(source[i]>>21)&0x1f; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; // The JALR instruction writes to rd. if (op2&1) { dops[i].rt1=(source[i]>>11)&0x1f; @@ -6809,8 +6791,6 @@ static noinline void pass1_disassemble(u_int pagelimit) case CJUMP: dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=(source[i]>>16)&0x1f; - dops[i].rt1=0; - dops[i].rt2=0; if(op&2) { // BGTZ/BLEZ dops[i].rs2=0; } @@ -6818,10 +6798,8 @@ static noinline void pass1_disassemble(u_int pagelimit) case SJUMP: dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=CCREG; - dops[i].rt1=0; - dops[i].rt2=0; - if(op2&0x10) { // BxxAL - dops[i].rt1=31; + if (op2 == 0x10 || op2 == 0x11) { // BxxAL + dops[i].rt1 = 31; // NOTE: If the branch is not taken, r31 is still overwritten } break; @@ -6829,7 +6807,6 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rs1=(source[i]>>21)&0x1f; // source dops[i].rs2=(source[i]>>16)&0x1f; // subtract amount dops[i].rt1=(source[i]>>11)&0x1f; // destination - dops[i].rt2=0; break; case MULTDIV: dops[i].rs1=(source[i]>>21)&0x1f; // source @@ -6838,10 +6815,6 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rt2=LOREG; break; case MOV: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; if(op2==0x10) dops[i].rs1=HIREG; // MFHI if(op2==0x11) dops[i].rt1=HIREG; // MTHI if(op2==0x12) dops[i].rs1=LOREG; // MFLO @@ -6853,41 +6826,19 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rs1=(source[i]>>16)&0x1f; // target of shift dops[i].rs2=(source[i]>>21)&0x1f; // shift amount dops[i].rt1=(source[i]>>11)&0x1f; // destination - dops[i].rt2=0; break; case SHIFTIMM: dops[i].rs1=(source[i]>>16)&0x1f; dops[i].rs2=0; dops[i].rt1=(source[i]>>11)&0x1f; - dops[i].rt2=0; imm[i]=(source[i]>>6)&0x1f; - // DSxx32 instructions - if(op2>=0x3c) imm[i]|=0x20; break; case COP0: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; - if(op2==0||op2==2) dops[i].rt1=(source[i]>>16)&0x1F; // MFC0/CFC0 - if(op2==4||op2==6) dops[i].rs1=(source[i]>>16)&0x1F; // MTC0/CTC0 + if(op2==0) dops[i].rt1=(source[i]>>16)&0x1F; // MFC0 + if(op2==4) dops[i].rs1=(source[i]>>16)&0x1F; // MTC0 if(op2==4&&((source[i]>>11)&0x1f)==12) dops[i].rt2=CSREG; // Status - if(op2==16) if((source[i]&0x3f)==0x18) dops[i].rs2=CCREG; // ERET - break; - case COP1: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; - if(op2<3) dops[i].rt1=(source[i]>>16)&0x1F; // MFC1/DMFC1/CFC1 - if(op2>3) dops[i].rs1=(source[i]>>16)&0x1F; // MTC1/DMTC1/CTC1 - dops[i].rs2=CSREG; break; case COP2: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; if(op2<3) dops[i].rt1=(source[i]>>16)&0x1F; // MFC2/CFC2 if(op2>3) dops[i].rs1=(source[i]>>16)&0x1F; // MTC2/CTC2 dops[i].rs2=CSREG; @@ -6900,27 +6851,13 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x06: gte_rt[i]=1ll<<(gr+32); break; // CTC2 } break; - case C1LS: - dops[i].rs1=(source[i]>>21)&0x1F; - dops[i].rs2=CSREG; - dops[i].rt1=0; - dops[i].rt2=0; - imm[i]=(short)source[i]; - break; case C2LS: dops[i].rs1=(source[i]>>21)&0x1F; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; imm[i]=(short)source[i]; if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2 else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2 break; case C2OP: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; gte_rs[i]=gte_reg_reads[source[i]&0x3f]; gte_rt[i]=gte_reg_writes[source[i]&0x3f]; gte_rt[i]|=1ll<<63; // every op changes flags @@ -6935,15 +6872,9 @@ static noinline void pass1_disassemble(u_int pagelimit) case HLECALL: case INTCALL: dops[i].rs1=CCREG; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; break; default: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; + break; } /* Calculate branch target addresses */ if(type==UJUMP) @@ -6970,41 +6901,48 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].is_jump = (dops[i].itype == RJUMP || dops[i].itype == UJUMP || dops[i].itype == CJUMP || dops[i].itype == SJUMP); dops[i].is_ujump = (dops[i].itype == RJUMP || dops[i].itype == UJUMP); // || (source[i] >> 16) == 0x1000 // beq r0,r0 dops[i].is_load = (dops[i].itype == LOAD || dops[i].itype == LOADLR || op == 0x32); // LWC2 + dops[i].is_delay_load = (dops[i].is_load || (source[i] & 0xf3d00000) == 0x40000000); // MFC/CFC dops[i].is_store = (dops[i].itype == STORE || dops[i].itype == STORELR || op == 0x3a); // SWC2 + dops[i].is_exception = (dops[i].itype == SYSCALL || dops[i].itype == HLECALL || dops[i].itype == INTCALL); + dops[i].may_except = dops[i].is_exception || (dops[i].itype == ALU && (op2 == 0x20 || op2 == 0x22)) || op == 8; - /* messy cases to just pass over to the interpreter */ + /* rare messy cases to just pass over to the interpreter */ if (i > 0 && dops[i-1].is_jump) { - int do_in_intrp=0; // branch in delay slot? if (dops[i].is_jump) { // don't handle first branch and call interpreter if it's hit - SysPrintf("branch in delay slot @%08x (%08x)\n", start + i*4, start); - do_in_intrp=1; + SysPrintf("branch in DS @%08x (%08x)\n", start + i*4, start); + force_prev_to_interpreter = 1; } - // basic load delay detection - else if((type==LOAD||type==LOADLR||type==COP0||type==COP2||type==C2LS)&&dops[i].rt1!=0) { + // basic load delay detection through a branch + else if (dops[i].is_delay_load && dops[i].rt1 != 0) { int t=(ba[i-1]-start)/4; if(0 <= t && t < i &&(dops[i].rt1==dops[t].rs1||dops[i].rt1==dops[t].rs2)&&dops[t].itype!=CJUMP&&dops[t].itype!=SJUMP) { // jump target wants DS result - potential load delay effect - SysPrintf("load delay @%08x (%08x)\n", start + i*4, start); - do_in_intrp=1; + SysPrintf("load delay in DS @%08x (%08x)\n", start + i*4, start); + force_prev_to_interpreter = 1; dops[t+1].bt=1; // expected return from interpreter } else if(i>=2&&dops[i-2].rt1==2&&dops[i].rt1==2&&dops[i].rs1!=2&&dops[i].rs2!=2&&dops[i-1].rs1!=2&&dops[i-1].rs2!=2&& !(i>=3&&dops[i-3].is_jump)) { // v0 overwrite like this is a sign of trouble, bail out SysPrintf("v0 overwrite @%08x (%08x)\n", start + i*4, start); - do_in_intrp=1; + force_prev_to_interpreter = 1; } } - if (do_in_intrp) { - memset(&dops[i-1], 0, sizeof(dops[i-1])); - dops[i-1].itype = INTCALL; - dops[i-1].rs1 = CCREG; - ba[i-1] = -1; - done = 2; - i--; // don't compile the DS - } + } + else if (i > 0 && dops[i-1].is_delay_load && dops[i-1].rt1 != 0 + && (dops[i].rs1 == dops[i-1].rt1 || dops[i].rs2 == dops[i-1].rt1)) { + SysPrintf("load delay @%08x (%08x)\n", start + i*4, start); + force_prev_to_interpreter = 1; + } + if (force_prev_to_interpreter) { + memset(&dops[i-1], 0, sizeof(dops[i-1])); + dops[i-1].itype = INTCALL; + dops[i-1].rs1 = CCREG; + ba[i-1] = -1; + done = 2; + i--; // don't compile the DS/problematic load/etc } /* Is this the end of the block? */ @@ -7038,7 +6976,11 @@ static noinline void pass1_disassemble(u_int pagelimit) // Don't get too close to the limit if(i>MAXBLOCK/2) done=1; } - if (dops[i].itype == SYSCALL || dops[i].itype == HLECALL || dops[i].itype == INTCALL) + if (dops[i].itype == HLECALL) + stop = 1; + else if (dops[i].itype == INTCALL) + stop = 2; + else if (dops[i].is_exception) done = stop_after_jal ? 1 : 2; if (done == 2) { // Does the block continue due to a branch? @@ -7054,7 +6996,7 @@ static noinline void pass1_disassemble(u_int pagelimit) assert(start+i*4 8 || dops[i].opcode == 0x11)) { + if (dops[i].itype == INTCALL && (++ni_count > 8 || dops[i].opcode == 0x11)) { done=stop_after_jal=1; SysPrintf("Disabled speculative precompilation\n"); } @@ -7177,14 +7119,13 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) } } } - else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL) + else if(dops[i].may_except) { - // SYSCALL instruction (software interrupt) + // SYSCALL instruction, etc or conditional exception u=1; } - else if(dops[i].itype==COP0 && dops[i].opcode2==0x10) + else if (dops[i].itype == RFE) { - // RFE u=1; } //u=1; // DEBUG @@ -7356,7 +7297,6 @@ static noinline void pass3_register_alloc(u_int addr) delayslot_alloc(¤t,i+1); //current.isconst=0; // DEBUG ds=1; - //printf("i=%d, isconst=%x\n",i,current.isconst); break; case RJUMP: //current.isconst=0; @@ -7472,13 +7412,8 @@ static noinline void pass3_register_alloc(u_int addr) //current.isconst=0; break; case SJUMP: - //current.isconst=0; - //current.wasconst=0; - //regs[i].wasconst=0; clear_const(¤t,dops[i].rs1); clear_const(¤t,dops[i].rt1); - //if((dops[i].opcode2&0x1E)==0x0) // BLTZ/BGEZ - if((dops[i].opcode2&0x0E)==0x0) // BLTZ/BGEZ { alloc_cc(¤t,i); dirty_reg(¤t,CCREG); @@ -7486,9 +7421,6 @@ static noinline void pass3_register_alloc(u_int addr) if (dops[i].rt1==31) { // BLTZAL/BGEZAL alloc_reg(¤t,i,31); dirty_reg(¤t,31); - //#ifdef REG_PREFETCH - //alloc_reg(¤t,i,PTEMP); - //#endif } if((dops[i].rs1&&(dops[i].rs1==dops[i+1].rt1||dops[i].rs1==dops[i+1].rt2)) // The delay slot overwrites the branch condition. ||(dops[i].rt1==31&&(dops[i+1].rs1==31||dops[i+1].rs2==31||dops[i+1].rt1==31||dops[i+1].rt2==31))) { // DS touches $ra @@ -7504,17 +7436,6 @@ static noinline void pass3_register_alloc(u_int addr) delayslot_alloc(¤t,i+1); } } - else - // Don't alloc the delay slot yet because we might not execute it - if((dops[i].opcode2&0x1E)==0x2) // BLTZL/BGEZL - { - current.isconst=0; - current.wasconst=0; - regs[i].wasconst=0; - alloc_cc(¤t,i); - dirty_reg(¤t,CCREG); - alloc_reg(¤t,i,dops[i].rs1); - } ds=1; //current.isconst=0; break; @@ -7547,14 +7468,12 @@ static noinline void pass3_register_alloc(u_int addr) case COP0: cop0_alloc(¤t,i); break; - case COP1: + case RFE: + rfe_alloc(¤t,i); break; case COP2: cop2_alloc(¤t,i); break; - case C1LS: - c1ls_alloc(¤t,i); - break; case C2LS: c2ls_alloc(¤t,i); break; @@ -7746,8 +7665,6 @@ static noinline void pass3_register_alloc(u_int addr) } break; case SJUMP: - //if((dops[i-1].opcode2&0x1E)==0) // BLTZ/BGEZ - if((dops[i-1].opcode2&0x0E)==0) // BLTZ/BGEZ { alloc_cc(¤t,i-1); dirty_reg(¤t,CCREG); @@ -7771,22 +7688,8 @@ static noinline void pass3_register_alloc(u_int addr) memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); } - else - // Alloc the delay slot in case the branch is taken - if((dops[i-1].opcode2&0x1E)==2) // BLTZL/BGEZL - { - memcpy(&branch_regs[i-1],¤t,sizeof(current)); - branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL< 0 && (dops[i-1].is_jump || dops[i].itype == SYSCALL || dops[i].itype == HLECALL)) + if (i > 0 && (dops[i-1].is_jump || dops[i].is_exception)) { cc=0; } @@ -7940,14 +7843,9 @@ static noinline void pass4_cull_unused_regs(void) nr |= get_regm(regs[i].regmap_entry, INVCP); } } - else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL) + else if (dops[i].may_except) { - // SYSCALL instruction (software interrupt) - nr=0; - } - else if(dops[i].itype==COP0 && (source[i]&0x3f)==0x18) - { - // ERET instruction (return from interrupt) + // SYSCALL instruction, etc or conditional exception nr=0; } else // Non-branch @@ -8110,8 +8008,8 @@ static noinline void pass5a_preallocate1(void) if(ba[i]>=start && ba[i]<(start+i*4)) if(dops[i+1].itype==NOP||dops[i+1].itype==MOV||dops[i+1].itype==ALU ||dops[i+1].itype==SHIFTIMM||dops[i+1].itype==IMM16||dops[i+1].itype==LOAD - ||dops[i+1].itype==STORE||dops[i+1].itype==STORELR||dops[i+1].itype==C1LS - ||dops[i+1].itype==SHIFT||dops[i+1].itype==COP1 + ||dops[i+1].itype==STORE||dops[i+1].itype==STORELR + ||dops[i+1].itype==SHIFT ||dops[i+1].itype==COP2||dops[i+1].itype==C2LS||dops[i+1].itype==C2OP) { int t=(ba[i]-start)>>2; @@ -8379,9 +8277,9 @@ static noinline void pass5a_preallocate1(void) } } } - if(dops[i].itype!=STORE&&dops[i].itype!=STORELR&&dops[i].itype!=C1LS&&dops[i].itype!=SHIFT&& + if(dops[i].itype!=STORE&&dops[i].itype!=STORELR&&dops[i].itype!=SHIFT&& dops[i].itype!=NOP&&dops[i].itype!=MOV&&dops[i].itype!=ALU&&dops[i].itype!=SHIFTIMM&& - dops[i].itype!=IMM16&&dops[i].itype!=LOAD&&dops[i].itype!=COP1) + dops[i].itype!=IMM16&&dops[i].itype!=LOAD) { memcpy(f_regmap,regs[i].regmap,sizeof(f_regmap)); } @@ -8401,7 +8299,7 @@ static noinline void pass5b_preallocate2(void) if(!dops[i+1].bt) { if(dops[i].itype==ALU||dops[i].itype==MOV||dops[i].itype==LOAD||dops[i].itype==SHIFTIMM||dops[i].itype==IMM16 - ||((dops[i].itype==COP1||dops[i].itype==COP2)&&dops[i].opcode2<3)) + ||(dops[i].itype==COP2&&dops[i].opcode2<3)) { if(dops[i+1].rs1) { if((hr=get_reg(regs[i+1].regmap,dops[i+1].rs1))>=0) @@ -8437,7 +8335,7 @@ static noinline void pass5b_preallocate2(void) } // Preload target address for load instruction (non-constant) if(dops[i+1].itype==LOAD&&dops[i+1].rs1&&get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { - if((hr=get_reg(regs[i+1].regmap,dops[i+1].rt1))>=0) + if((hr=get_reg_w(regs[i+1].regmap, dops[i+1].rt1))>=0) { if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { @@ -8454,7 +8352,7 @@ static noinline void pass5b_preallocate2(void) } // Load source into target register if(dops[i+1].use_lt1&&get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { - if((hr=get_reg(regs[i+1].regmap,dops[i+1].rt1))>=0) + if((hr=get_reg_w(regs[i+1].regmap, dops[i+1].rt1))>=0) { if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { @@ -8528,10 +8426,10 @@ static noinline void pass5b_preallocate2(void) } } } - if(dops[i+1].itype==LOAD||dops[i+1].itype==LOADLR||dops[i+1].itype==STORE||dops[i+1].itype==STORELR/*||dops[i+1].itype==C1LS||||dops[i+1].itype==C2LS*/) { + if(dops[i+1].itype==LOAD||dops[i+1].itype==LOADLR||dops[i+1].itype==STORE||dops[i+1].itype==STORELR/*||dops[i+1].itype==C2LS*/) { hr = -1; if(dops[i+1].itype==LOAD) - hr=get_reg(regs[i+1].regmap,dops[i+1].rt1); + hr=get_reg_w(regs[i+1].regmap, dops[i+1].rt1); if(dops[i+1].itype==LOADLR||(dops[i+1].opcode&0x3b)==0x31||(dops[i+1].opcode&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2 hr=get_reg(regs[i+1].regmap,FTEMP); if(dops[i+1].itype==STORE||dops[i+1].itype==STORELR||(dops[i+1].opcode&0x3b)==0x39||(dops[i+1].opcode&0x3b)==0x3a) { // SWC1/SDC1/SWC2/SDC2 @@ -8808,15 +8706,9 @@ static noinline void pass6_clean_registers(int istart, int iend, int wr) } } } - else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL) - { - // SYSCALL instruction (software interrupt) - will_dirty_i=0; - wont_dirty_i=0; - } - else if(dops[i].itype==COP0 && (source[i]&0x3f)==0x18) + else if (dops[i].may_except) { - // ERET instruction (return from interrupt) + // SYSCALL instruction, etc or conditional exception will_dirty_i=0; wont_dirty_i=0; } @@ -8987,14 +8879,21 @@ static int new_recompile_block(u_int addr) assem_debug("NOTCOMPILED: addr = %x -> %p\n", addr, out); + if (addr & 3) { + if (addr != hack_addr) { + SysPrintf("game crash @%08x, ra=%08x\n", addr, psxRegs.GPR.n.ra); + hack_addr = addr; + } + return -1; + } + // this is just for speculation for (i = 1; i < 32; i++) { if ((psxRegs.GPR.r[i] & 0xffff0000) == 0x1f800000) state_rflags |= 1 << i; } - assert(!(addr & 3)); - start = addr & ~3; + start = addr; new_dynarec_did_compile=1; if (Config.HLE && start == 0x80001000) // hlecall { @@ -9248,32 +9147,31 @@ static int new_recompile_block(u_int addr) emit_jmp(0); } - // TODO: delay slot stubs? // Stubs - for(i=0;i 0 extern int last_count; // last absolute target, often = next_interupt -@@ -532,6 +532,7 @@ static int cycle_multiplier_active; +@@ -593,6 +593,7 @@ static int cycle_multiplier_active; static int CLOCK_ADJUST(int x) { @@ -19,7 +19,7 @@ index b160a4a..0d91999 100644 int m = cycle_multiplier_active; int s = (x >> 31) | 1; return (x * m + s * 50) / 100; -@@ -662,6 +663,9 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) +@@ -745,6 +746,9 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) // This is called from the recompiled JR/JALR instructions static void noinline *get_addr(u_int vaddr, int can_compile) { @@ -29,7 +29,7 @@ index b160a4a..0d91999 100644 u_int start_page = get_page_prev(vaddr); u_int i, page, end_page = get_page(vaddr); void *found_clean = NULL; -@@ -7046,7 +7050,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) +@@ -7143,7 +7147,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) // R0 is always unneeded u|=1; // Save it @@ -38,7 +38,7 @@ index b160a4a..0d91999 100644 gte_unneeded[i]=gte_u; /* printf("ur (%d,%d) %x: ",istart,iend,start+i*4); -@@ -8236,6 +8240,7 @@ static noinline void pass5a_preallocate1(void) +@@ -8292,6 +8296,7 @@ static noinline void pass5a_preallocate1(void) static noinline void pass5b_preallocate2(void) { int i, hr; @@ -46,7 +46,7 @@ index b160a4a..0d91999 100644 for(i=0;isubCycleStep >= 0x10000); + regs->subCycle += regs->subCycleStep; +- regs->cycle += regs->subCycle >> 16; ++ regs->cycle += 2; //regs->subCycle >> 16; + regs->subCycle &= 0xffff; + } + +@@ -1341,8 +1341,14 @@ static void intShutdown() { + + // single step (may do several ops in case of a branch or load delay) + void execI(psxRegisters *regs) { ++ extern int last_count; ++ void do_insn_cmp(void); ++ printf("execI %08x c %u, ni %u\n", regs->pc, regs->cycle, next_interupt); ++ last_count = 0; + do { + execIbp(psxMemRLUT, regs); ++ if (regs->dloadReg[0] || regs->dloadReg[1]) ++ do_insn_cmp(); + } while (regs->dloadReg[0] || regs->dloadReg[1]); + } + diff --git a/libpcsxcore/new_dynarec/patches/trace_intr b/libpcsxcore/new_dynarec/patches/trace_intr index c3f4cf135..40b3edb72 100644 --- a/libpcsxcore/new_dynarec/patches/trace_intr +++ b/libpcsxcore/new_dynarec/patches/trace_intr @@ -1,12 +1,13 @@ diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c -index 10d99ba..1e097ae 100644 +index 89716fa0..02a8d7c5 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c -@@ -405,13 +407,17 @@ static void ari64_shutdown() +@@ -320,13 +320,18 @@ static void ari64_shutdown() { new_dynarec_cleanup(); new_dyna_pcsx_mem_shutdown(); + (void)ari64_execute; ++ (void)ari64_execute_block; } +extern void intExecuteT(); @@ -16,13 +17,13 @@ index 10d99ba..1e097ae 100644 ari64_init, ari64_reset, - ari64_execute, -- ari64_execute_until, +- ari64_execute_block, + intExecuteT, + intExecuteBlockT, ari64_clear, ari64_notify, ari64_apply_config, -@@ -481,7 +487,7 @@ static u32 memcheck_read(u32 a) +@@ -395,7 +400,7 @@ static u32 memcheck_read(u32 a) return *(u32 *)(psxM + (a & 0x1ffffc)); } @@ -32,23 +33,23 @@ index 10d99ba..1e097ae 100644 { static psxRegisters oldregs; diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c -index bb471b6..8f68a3b 100644 +index 190f8fc7..5feb7a02 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c -@@ -272,6 +272,8 @@ static void write_biu(u32 value) - if (address != 0xfffe0130) +@@ -289,6 +289,8 @@ static void write_biu(u32 value) return; + } +extern u32 handler_cycle; +handler_cycle = psxRegs.cycle; - switch (value) { - case 0x800: case 0x804: - unmap_ram_write(); + memprintf("write_biu %08x @%08x %u\n", value, psxRegs.pc, psxRegs.cycle); + psxRegs.biuReg = value; + } diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c -index ff0efbc..4459644 100644 +index 18bd6a4e..bc2eb3f6 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c -@@ -379,9 +379,12 @@ void psxRcntUpdate() +@@ -389,9 +389,12 @@ void psxRcntUpdate() /******************************************************************************/ @@ -61,7 +62,7 @@ index ff0efbc..4459644 100644 _psxRcntWcount( index, value ); psxRcntSet(); -@@ -390,6 +393,7 @@ void psxRcntWcount( u32 index, u32 value ) +@@ -400,6 +403,7 @@ void psxRcntWcount( u32 index, u32 value ) void psxRcntWmode( u32 index, u32 value ) { verboseLog( 1, "[RCNT %i] wmode: %x\n", index, value ); @@ -69,7 +70,7 @@ index ff0efbc..4459644 100644 _psxRcntWmode( index, value ); _psxRcntWcount( index, 0 ); -@@ -401,6 +405,7 @@ void psxRcntWmode( u32 index, u32 value ) +@@ -411,6 +415,7 @@ void psxRcntWmode( u32 index, u32 value ) void psxRcntWtarget( u32 index, u32 value ) { verboseLog( 1, "[RCNT %i] wtarget: %x\n", index, value ); @@ -77,7 +78,7 @@ index ff0efbc..4459644 100644 rcnts[index].target = value; -@@ -413,6 +418,7 @@ void psxRcntWtarget( u32 index, u32 value ) +@@ -423,6 +428,7 @@ void psxRcntWtarget( u32 index, u32 value ) u32 psxRcntRcount( u32 index ) { u32 count; @@ -86,10 +87,10 @@ index ff0efbc..4459644 100644 count = _psxRcntRcount( index ); diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c -index dbcb989..0716f5e 100644 +index 27ddfeab..d7c6ff05 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c -@@ -373,13 +373,14 @@ void psxHwWrite8(u32 add, u8 value) { +@@ -377,13 +377,14 @@ void psxHwWrite8(u32 add, u8 value) { case 0x1f801803: cdrWrite3(value); break; default: @@ -105,7 +106,7 @@ index dbcb989..0716f5e 100644 #ifdef PSXHW_LOG PSXHW_LOG("*Known 8bit write at address %x value %x\n", add, value); #endif -@@ -504,6 +505,7 @@ void psxHwWrite16(u32 add, u16 value) { +@@ -506,6 +507,7 @@ void psxHwWrite16(u32 add, u16 value) { return; } @@ -113,7 +114,7 @@ index dbcb989..0716f5e 100644 psxHu16ref(add) = SWAPu16(value); #ifdef PSXHW_LOG PSXHW_LOG("*Unknown 16bit write at address %x value %x\n", add, value); -@@ -699,9 +701,9 @@ void psxHwWrite32(u32 add, u32 value) { +@@ -701,9 +703,9 @@ void psxHwWrite32(u32 add, u32 value) { return; case 0x1f801820: @@ -125,7 +126,7 @@ index dbcb989..0716f5e 100644 case 0x1f801100: #ifdef PSXHW_LOG -@@ -759,6 +761,7 @@ void psxHwWrite32(u32 add, u32 value) { +@@ -761,6 +763,7 @@ void psxHwWrite32(u32 add, u32 value) { return; } @@ -134,94 +135,99 @@ index dbcb989..0716f5e 100644 #ifdef PSXHW_LOG PSXHW_LOG("*Unknown 32bit write at address %x value %x\n", add, value); diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c -index e7e3269..8f4004d 100644 +index be15f782..6f07478f 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c -@@ -467,6 +467,8 @@ static void doBranch(u32 tar) { - psxRegs.pc += 4; - psxRegs.cycle += BIAS; - -+ (void)tmp; -+#if 0 - // check for load delay - tmp = psxRegs.code >> 26; - switch (tmp) { -@@ -500,13 +502,15 @@ static void doBranch(u32 tar) { - } - break; - } -- -+#endif - psxBSC[psxRegs.code >> 26](); +@@ -237,7 +237,7 @@ static inline void addCycle(psxRegisters *regs) + { + assert(regs->subCycleStep >= 0x10000); + regs->subCycle += regs->subCycleStep; +- regs->cycle += regs->subCycle >> 16; ++ regs->cycle += 2; //regs->subCycle >> 16; + regs->subCycle &= 0xffff; + } - branch = 0; - psxRegs.pc = branchPC; +@@ -434,7 +434,9 @@ static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) { + regs->CP0.n.Target = pc_final; + regs->branching = 0; -+ psxRegs.cycle += BIAS; ++ psxRegs.cycle += 2; psxBranchTest(); -+ psxRegs.cycle -= BIAS; ++ psxRegs.cycle -= 2; } - /********************************************************* -@@ -616,12 +620,13 @@ void psxMULTU_stall() { - psxMULTU(); + static void doBranchReg(psxRegisters *regs, u32 tar) { +@@ -967,7 +969,7 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) { + } } -+#define doBranchNotTaken() do { psxRegs.cycle += BIAS; execI(); psxBranchTest(); psxRegs.cycle -= BIAS; } while(0) - /********************************************************* - * Register branch logic * - * Format: OP rs, offset * - *********************************************************/ --#define RepZBranchi32(op) if(_i32(_rRs_) op 0) doBranch(_BranchTarget_); --#define RepZBranchLinki32(op) { _SetLink(31); if(_i32(_rRs_) op 0) { doBranch(_BranchTarget_); } } -+#define RepZBranchi32(op) if(_i32(_rRs_) op 0) doBranch(_BranchTarget_); else doBranchNotTaken(); -+#define RepZBranchLinki32(op) { _SetLink(31); if(_i32(_rRs_) op 0) { doBranch(_BranchTarget_); } else doBranchNotTaken(); } - - void psxBGEZ() { RepZBranchi32(>=) } // Branch if Rs >= 0 - void psxBGEZAL() { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link -@@ -703,7 +708,7 @@ void psxRFE() { - * Register branch logic * - * Format: OP rs, rt, offset * - *********************************************************/ --#define RepBranchi32(op) if(_i32(_rRs_) op _i32(_rRt_)) doBranch(_BranchTarget_); -+#define RepBranchi32(op) if(_i32(_rRs_) op _i32(_rRt_)) doBranch(_BranchTarget_); else doBranchNotTaken(); - - void psxBEQ() { RepBranchi32(==) } // Branch if Rs == Rt - void psxBNE() { RepBranchi32(!=) } // Branch if Rs != Rt -@@ -901,7 +907,7 @@ void MTC0(int reg, u32 val) { - } +-OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); } ++OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); psxBranchTest(); } + + // no exception + static inline void psxNULLne(psxRegisters *regs) { +@@ -1175,18 +1177,19 @@ static void intReset() { + static inline void execI_(u8 **memRLUT, psxRegisters *regs) { + u32 pc = regs->pc; + +- addCycle(regs); ++ //addCycle(regs); + dloadStep(regs); + + regs->pc += 4; + regs->code = fetch(regs, memRLUT, pc); + psxBSC[regs->code >> 26](regs, regs->code); ++ psxRegs.cycle += 2; } --void psxMTC0() { MTC0(_Rd_, _u32(_rRt_)); } -+void psxMTC0() { MTC0(_Rd_, _u32(_rRt_)); psxBranchTest(); } - void psxCTC0() { MTC0(_Rd_, _u32(_rRt_)); } + static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { + u32 pc = regs->pc; + +- addCycle(regs); ++ //addCycle(regs); + dloadStep(regs); + + if (execBreakCheck(regs, pc)) +@@ -1195,6 +1198,7 @@ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { + regs->pc += 4; + regs->code = fetch(regs, memRLUT, pc); + psxBSC[regs->code >> 26](regs, regs->code); ++ psxRegs.cycle += 2; + } - /********************************************************* -@@ -1028,6 +1034,23 @@ void intExecuteBlock() { - while (!branch2) execI(); + static void intExecute() { +@@ -1224,6 +1228,30 @@ void intExecuteBlock(enum blockExecCaller caller) { + execI_(memRLUT, regs_); } +extern void do_insn_trace(void); + +void intExecuteT() { -+ for (;;) { ++ psxRegisters *regs_ = &psxRegs; ++ u8 **memRLUT = psxMemRLUT; ++ extern int stop; ++ ++ while (!stop) { + do_insn_trace(); -+ execI(); ++ execIbp(memRLUT, regs_); + } +} + +void intExecuteBlockT() { -+ branch2 = 0; -+ while (!branch2) { ++ psxRegisters *regs_ = &psxRegs; ++ u8 **memRLUT = psxMemRLUT; ++ ++ branchSeen = 0; ++ while (!branchSeen) { + do_insn_trace(); -+ execI(); ++ execIbp(memRLUT, regs_); + } +} + static void intClear(u32 Addr, u32 Size) { } -@@ -1050,7 +1073,7 @@ void intApplyConfig() { +@@ -1271,7 +1299,7 @@ void intApplyConfig() { assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall); assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall); @@ -230,28 +236,16 @@ index e7e3269..8f4004d 100644 psxBSC[18] = psxCOP2; psxBSC[50] = gteLWC2; psxBSC[58] = gteSWC2; -@@ -1092,9 +1115,10 @@ void execI() { - if (Config.Debug) ProcessDebug(); - - psxRegs.pc += 4; -- psxRegs.cycle += BIAS; - - psxBSC[psxRegs.code >> 26](); -+ -+ psxRegs.cycle += BIAS; - } - - R3000Acpu psxInt = { diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c -index 46cee0c..c814587 100644 +index 54219ae0..41168ced 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c -@@ -218,11 +218,13 @@ void psxMemShutdown() { +@@ -278,10 +278,13 @@ void psxMemOnIsolate(int enable) + : R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL); } - static int writeok = 1; +extern u32 last_io_addr; - ++ u8 psxMemRead8(u32 mem) { char *p; u32 t; @@ -260,7 +254,7 @@ index 46cee0c..c814587 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -248,6 +250,7 @@ u16 psxMemRead16(u32 mem) { +@@ -307,6 +310,7 @@ u16 psxMemRead16(u32 mem) { char *p; u32 t; @@ -268,7 +262,7 @@ index 46cee0c..c814587 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -273,6 +276,7 @@ u32 psxMemRead32(u32 mem) { +@@ -332,6 +336,7 @@ u32 psxMemRead32(u32 mem) { char *p; u32 t; @@ -276,7 +270,7 @@ index 46cee0c..c814587 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -298,6 +302,7 @@ void psxMemWrite8(u32 mem, u8 value) { +@@ -359,6 +364,7 @@ void psxMemWrite8(u32 mem, u8 value) { char *p; u32 t; @@ -284,7 +278,7 @@ index 46cee0c..c814587 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -325,6 +330,7 @@ void psxMemWrite16(u32 mem, u16 value) { +@@ -386,6 +392,7 @@ void psxMemWrite16(u32 mem, u16 value) { char *p; u32 t; @@ -292,7 +286,7 @@ index 46cee0c..c814587 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -352,6 +358,7 @@ void psxMemWrite32(u32 mem, u32 value) { +@@ -413,6 +420,7 @@ void psxMemWrite32(u32 mem, u32 value) { char *p; u32 t; @@ -300,20 +294,20 @@ index 46cee0c..c814587 100644 // if ((mem&0x1fffff) == 0x71E18 || value == 0x48088800) SysPrintf("t2fix!!\n"); t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { -@@ -381,6 +388,8 @@ void psxMemWrite32(u32 mem, u32 value) { - } else { - int i; - +@@ -431,6 +439,8 @@ void psxMemWrite32(u32 mem, u32 value) { + #endif + } else { + if (mem == 0xfffe0130) { +extern u32 handler_cycle; +handler_cycle = psxRegs.cycle; - switch (value) { - case 0x800: case 0x804: - if (writeok == 0) break; + psxRegs.biuReg = value; + return; + } diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c -index 7e6f16b..0114947 100644 +index dffbf6e7..0a3bdb65 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c -@@ -120,6 +120,8 @@ void psxException(u32 code, u32 bd) { +@@ -124,6 +124,8 @@ void psxException(u32 cause, enum R3000Abdt bdt, psxCP0Regs *cp0) { } void psxBranchTest() { diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index be15f782f..e212d8a99 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -454,14 +454,6 @@ static void doBranchRegE(psxRegisters *regs, u32 tar) { doBranch(regs, tar, R3000A_BRANCH_TAKEN); } -#if __has_builtin(__builtin_add_overflow) || (defined(__GNUC__) && __GNUC__ >= 5) -#define add_overflow(a, b, r) __builtin_add_overflow(a, b, &(r)) -#define sub_overflow(a, b, r) __builtin_sub_overflow(a, b, &(r)) -#else -#define add_overflow(a, b, r) ({r = (u32)a + (u32)b; (a ^ ~b) & (a ^ r) & (1u<<31);}) -#define sub_overflow(a, b, r) ({r = (u32)a - (u32)b; (a ^ b) & (a ^ r) & (1u<<31);}) -#endif - static void addExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) { s32 val; if (add_overflow(a1, a2, val)) { @@ -1344,12 +1336,14 @@ void intApplyConfig() { } static void intShutdown() { + dloadClear(&psxRegs); } -// single step (may do several ops in case of a branch) +// single step (may do several ops in case of a branch or load delay) void execI(psxRegisters *regs) { - execI_(psxMemRLUT, regs); - dloadFlush(regs); + do { + execIbp(psxMemRLUT, regs); + } while (regs->dloadReg[0] || regs->dloadReg[1]); } R3000Acpu psxInt = { From 277718fa66c96f64360b2c97a5dfa3ef3e6f1711 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 24 Jul 2023 01:28:07 +0300 Subject: [PATCH 287/597] drc: optional address error exception support --- libpcsxcore/new_dynarec/assem_arm.c | 41 +- libpcsxcore/new_dynarec/assem_arm64.c | 42 +- libpcsxcore/new_dynarec/assem_arm64.h | 3 + libpcsxcore/new_dynarec/linkage_arm.S | 8 + libpcsxcore/new_dynarec/linkage_arm64.S | 8 + libpcsxcore/new_dynarec/new_dynarec.c | 899 ++++++++++-------- libpcsxcore/new_dynarec/patches/trace_drc_chk | 23 +- 7 files changed, 557 insertions(+), 467 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 88b2ff36c..2850d4e3a 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -1737,28 +1737,27 @@ static void do_readstub(int n) static void inline_readstub(enum stub_type type, int i, u_int addr, const signed char regmap[], int target, int adj, u_int reglist) { - int rs=get_reg(regmap,target); - int rt=get_reg(regmap,target); - if(rs<0) rs=get_reg_temp(regmap); - assert(rs>=0); + int ra = cinfo[i].addr; + int rt = get_reg(regmap,target); + assert(ra >= 0); u_int is_dynamic; uintptr_t host_addr = 0; void *handler; int cc=get_reg(regmap,CCREG); - if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt)) + if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt)) return; handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr); if (handler == NULL) { if(rt<0||dops[i].rt1==0) return; if(addr!=host_addr) - emit_movimm_from(addr,rs,host_addr,rs); + emit_movimm_from(addr,ra,host_addr,ra); switch(type) { - case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break; - case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break; - case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break; - case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break; - case LOADW_STUB: emit_readword_indexed(0,rs,rt); break; + case LOADB_STUB: emit_movsbl_indexed(0,ra,rt); break; + case LOADBU_STUB: emit_movzbl_indexed(0,ra,rt); break; + case LOADH_STUB: emit_movswl_indexed(0,ra,rt); break; + case LOADHU_STUB: emit_movzwl_indexed(0,ra,rt); break; + case LOADW_STUB: emit_readword_indexed(0,ra,rt); break; default: assert(0); } return; @@ -1779,8 +1778,8 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, save_regs(reglist); if(target==0) emit_movimm(addr,0); - else if(rs!=0) - emit_mov(rs,0); + else if(ra!=0) + emit_mov(ra,0); if(cc<0) emit_loadreg(CCREG,2); if(is_dynamic) { @@ -1893,19 +1892,19 @@ static void do_writestub(int n) static void inline_writestub(enum stub_type type, int i, u_int addr, const signed char regmap[], int target, int adj, u_int reglist) { - int rs=get_reg_temp(regmap); - int rt=get_reg(regmap,target); - assert(rs>=0); + int ra = cinfo[i].addr; + int rt = get_reg(regmap, target); + assert(ra>=0); assert(rt>=0); uintptr_t host_addr = 0; void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr); if (handler == NULL) { if(addr!=host_addr) - emit_movimm_from(addr,rs,host_addr,rs); + emit_movimm_from(addr,ra,host_addr,ra); switch(type) { - case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break; - case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break; - case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break; + case STOREB_STUB: emit_writebyte_indexed(rt,0,ra); break; + case STOREH_STUB: emit_writehword_indexed(rt,0,ra); break; + case STOREW_STUB: emit_writeword_indexed(rt,0,ra); break; default: assert(0); } return; @@ -1913,7 +1912,7 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, // call a memhandler save_regs(reglist); - pass_args(rs,rt); + pass_args(ra,rt); int cc=get_reg(regmap,CCREG); if(cc<0) emit_loadreg(CCREG,2); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index d35ad451e..670f3799e 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -23,9 +23,6 @@ #include "pcnt.h" #include "arm_features.h" -void do_memhandler_pre(); -void do_memhandler_post(); - /* Linker */ static void set_jump_target(void *addr, void *target) { @@ -1541,28 +1538,27 @@ static void do_readstub(int n) static void inline_readstub(enum stub_type type, int i, u_int addr, const signed char regmap[], int target, int adj, u_int reglist) { - int rs=get_reg(regmap,target); - int rt=get_reg(regmap,target); - if(rs<0) rs=get_reg_temp(regmap); - assert(rs>=0); + int ra = cinfo[i].addr; + int rt = get_reg(regmap, target); + assert(ra >= 0); u_int is_dynamic=0; uintptr_t host_addr = 0; void *handler; int cc=get_reg(regmap,CCREG); - //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt)) + //if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt)) // return; handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr); if (handler == NULL) { if(rt<0||dops[i].rt1==0) return; if (addr != host_addr) - emit_movimm_from64(addr, rs, host_addr, rs); + emit_movimm_from64(addr, ra, host_addr, ra); switch(type) { - case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break; - case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break; - case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break; - case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break; - case LOADW_STUB: emit_readword_indexed(0,rs,rt); break; + case LOADB_STUB: emit_movsbl_indexed(0,ra,rt); break; + case LOADBU_STUB: emit_movzbl_indexed(0,ra,rt); break; + case LOADH_STUB: emit_movswl_indexed(0,ra,rt); break; + case LOADHU_STUB: emit_movzwl_indexed(0,ra,rt); break; + case LOADW_STUB: emit_readword_indexed(0,ra,rt); break; default: assert(0); } return; @@ -1583,8 +1579,8 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, save_regs(reglist); if(target==0) emit_movimm(addr,0); - else if(rs!=0) - emit_mov(rs,0); + else if(ra!=0) + emit_mov(ra,0); if(cc<0) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,adj,2); @@ -1702,19 +1698,19 @@ static void do_writestub(int n) static void inline_writestub(enum stub_type type, int i, u_int addr, const signed char regmap[], int target, int adj, u_int reglist) { - int rs = get_reg_temp(regmap); + int ra = cinfo[i].addr; int rt = get_reg(regmap,target); - assert(rs >= 0); + assert(ra >= 0); assert(rt >= 0); uintptr_t host_addr = 0; void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr); if (handler == NULL) { if (addr != host_addr) - emit_movimm_from64(addr, rs, host_addr, rs); + emit_movimm_from64(addr, ra, host_addr, ra); switch (type) { - case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break; - case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break; - case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break; + case STOREB_STUB: emit_writebyte_indexed(rt, 0, ra); break; + case STOREH_STUB: emit_writehword_indexed(rt, 0, ra); break; + case STOREW_STUB: emit_writeword_indexed(rt, 0, ra); break; default: assert(0); } return; @@ -1722,7 +1718,7 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, // call a memhandler save_regs(reglist); - emit_writeword(rs, &address); // some handlers still need it + emit_writeword(ra, &address); // some handlers still need it loadstore_extend(type, rt, 0); int cc, cc_use; cc = cc_use = get_reg(regmap, CCREG); diff --git a/libpcsxcore/new_dynarec/assem_arm64.h b/libpcsxcore/new_dynarec/assem_arm64.h index c5fcadf39..6d1a17f0e 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.h +++ b/libpcsxcore/new_dynarec/assem_arm64.h @@ -46,4 +46,7 @@ struct tramp_insns static void clear_cache_arm64(char *start, char *end); +void do_memhandler_pre(); +void do_memhandler_post(); + #endif // !__ASSEMBLY__ diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index f859817ad..a7f4d796b 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -261,6 +261,14 @@ FUNCTION(cc_interrupt): .size cc_interrupt, .-cc_interrupt .align 2 +FUNCTION(jump_addrerror_ds): /* R3000E_AdEL / R3000E_AdES in r0 */ + str r1, [fp, #(LO_psxRegs + (34+8)*4)] /* BadVaddr */ + mov r1, #1 + b call_psxException +FUNCTION(jump_addrerror): + str r1, [fp, #(LO_psxRegs + (34+8)*4)] /* BadVaddr */ + mov r1, #0 + b call_psxException FUNCTION(jump_overflow_ds): mov r0, #(12<<2) /* R3000E_Ov */ mov r1, #1 diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 38c78dc36..8d11fcfa2 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -119,6 +119,14 @@ FUNCTION(cc_interrupt): .size cc_interrupt, .-cc_interrupt .align 2 +FUNCTION(jump_addrerror_ds): /* R3000E_AdEL / R3000E_AdES in w0 */ + str w1, [rFP, #(LO_psxRegs + (34+8)*4)] /* BadVaddr */ + mov w1, #1 + b call_psxException +FUNCTION(jump_addrerror): + str w1, [rFP, #(LO_psxRegs + (34+8)*4)] /* BadVaddr */ + mov w1, #0 + b call_psxException FUNCTION(jump_overflow_ds): mov w0, #(12<<2) /* R3000E_Ov */ mov w1, #1 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 067decb7d..c36021c23 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -146,6 +146,7 @@ enum stub_type { STORELR_STUB = 13, INVCODE_STUB = 14, OVERFLOW_STUB = 15, + ALIGNMENT_STUB = 16, }; // regmap_pre[i] - regs before [i] insn starts; dirty things here that @@ -242,6 +243,16 @@ static struct decoded_insn u_char may_except:1; // might generate an exception } dops[MAXBLOCK]; +static struct compile_info +{ + int imm; + u_int ba; + int ccadj; + signed char min_free_regs; + signed char addr; + signed char reserved[2]; +} cinfo[MAXBLOCK]; + static u_char *out; static char invalid_code[0x100000]; static struct ht_entry hash_table[65536]; @@ -257,8 +268,6 @@ static struct decoded_insn static u_int smrv_weak; // same, but somewhat less likely static u_int smrv_strong_next; // same, but after current insn executes static u_int smrv_weak_next; - static int imm[MAXBLOCK]; - static u_int ba[MAXBLOCK]; static uint64_t unneeded_reg[MAXBLOCK]; static uint64_t branch_unneeded_reg[MAXBLOCK]; // see 'struct regstat' for a description @@ -269,8 +278,6 @@ static struct decoded_insn static uint32_t constmap[MAXBLOCK][HOST_REGS]; static struct regstat regs[MAXBLOCK]; static struct regstat branch_regs[MAXBLOCK]; - static signed char minimum_free_regs[MAXBLOCK]; - static int ccadj[MAXBLOCK]; static int slen; static void *instr_addr[MAXBLOCK]; static struct link_entry link_addr[MAXBLOCK]; @@ -339,10 +346,8 @@ static struct decoded_insn #define RHTBL 44 // Return address hash table address #define RTEMP 45 // JR/JALR address register #define MAXREG 45 -#define AGEN1 46 // Address generation temporary register +#define AGEN1 46 // Address generation temporary register (pass5b_preallocate2) //#define AGEN2 47 // Address generation temporary register -//#define MGEN1 48 // Maptable address generation temporary register -//#define MGEN2 49 // Maptable address generation temporary register #define BTREG 50 // Branch target temporary register /* instruction types */ @@ -388,6 +393,8 @@ void jump_break (u_int u0, u_int u1, u_int pc); void jump_break_ds(u_int u0, u_int u1, u_int pc); void jump_overflow (u_int u0, u_int u1, u_int pc); void jump_overflow_ds(u_int u0, u_int u1, u_int pc); +void jump_addrerror (u_int cause, u_int addr, u_int pc); +void jump_addrerror_ds(u_int cause, u_int addr, u_int pc); void jump_to_new_pc(); void call_gteStall(); void new_dyna_leave(); @@ -418,8 +425,6 @@ static void add_stub(enum stub_type type, void *addr, void *retaddr, static void add_stub_r(enum stub_type type, void *addr, void *retaddr, int i, int addr_reg, const struct regstat *i_regs, int ccadj, u_int reglist); static void add_to_linker(void *addr, u_int target, int ext); -static void *emit_fastpath_cmp_jump(int i, const struct regstat *i_regs, - int addr, int *offset_reg, int *addr_reg_override); static void *get_direct_memhandler(void *table, u_int addr, enum stub_type type, uintptr_t *addr_host); static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist); @@ -791,9 +796,10 @@ void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile) { //check_for_block_changes(vaddr, vaddr + MAXBLOCK); const struct ht_entry *ht_bin = hash_table_get(vaddr); + u_int vaddr_a = vaddr & ~3; stat_inc(stat_ht_lookups); - if (ht_bin->vaddr[0] == vaddr) return ht_bin->tcaddr[0]; - if (ht_bin->vaddr[1] == vaddr) return ht_bin->tcaddr[1]; + if (ht_bin->vaddr[0] == vaddr_a) return ht_bin->tcaddr[0]; + if (ht_bin->vaddr[1] == vaddr_a) return ht_bin->tcaddr[1]; return get_addr(vaddr, can_compile); } @@ -1002,10 +1008,10 @@ static void lsn(u_char hsn[], int i, int *preferred_reg) } if(b>=0) { - if(ba[i+b]>=start && ba[i+b]<(start+slen*4)) + if(cinfo[i+b].ba>=start && cinfo[i+b].ba<(start+slen*4)) { // Follow first branch - int t=(ba[i+b]-start)>>2; + int t=(cinfo[i+b].ba-start)>>2; j=7-b;if(t+j>=slen) j=slen-t-1; for(;j>=0;j--) { @@ -1055,7 +1061,7 @@ static int needed_again(int r, int i) if (i > 0 && dops[i-1].is_ujump) { - if(ba[i-1]start+slen*4-4) + if(cinfo[i-1].bastart+slen*4-4) return 0; // Don't need any registers if exiting the block } for(j=0;j<9;j++) @@ -1119,9 +1125,9 @@ static int loop_reg(int i, int r, int hr) if((unneeded_reg[i+k]>>r)&1) return hr; if(i+k>=0&&(dops[i+k].itype==UJUMP||dops[i+k].itype==CJUMP||dops[i+k].itype==SJUMP)) { - if(ba[i+k]>=start && ba[i+k]<(start+i*4)) + if(cinfo[i+k].ba>=start && cinfo[i+k].ba<(start+i*4)) { - int t=(ba[i+k]-start)>>2; + int t=(cinfo[i+k].ba-start)>>2; int reg=get_reg(regs[t].regmap_entry,r); if(reg>=0) return reg; //reg=get_reg(regs[t+1].regmap_entry,r); @@ -1200,10 +1206,17 @@ static const struct { FUNCNAME(jump_syscall_ds), FUNCNAME(jump_overflow), FUNCNAME(jump_overflow_ds), + FUNCNAME(jump_addrerror), + FUNCNAME(jump_addrerror_ds), FUNCNAME(call_gteStall), FUNCNAME(new_dyna_leave), FUNCNAME(pcsx_mtc0), FUNCNAME(pcsx_mtc0_ds), + FUNCNAME(execI), +#ifdef __aarch64__ + FUNCNAME(do_memhandler_pre), + FUNCNAME(do_memhandler_post), +#endif #ifdef DRC_DBG FUNCNAME(do_insn_cmp), #endif @@ -1904,9 +1917,9 @@ static void shiftimm_alloc(struct regstat *current,int i) dirty_reg(current,dops[i].rt1); if(is_const(current,dops[i].rs1)) { int v=get_const(current,dops[i].rs1); - if(dops[i].opcode2==0x00) set_const(current,dops[i].rt1,v<>imm[i]); - if(dops[i].opcode2==0x03) set_const(current,dops[i].rt1,v>>imm[i]); + if(dops[i].opcode2==0x00) set_const(current,dops[i].rt1,v<>cinfo[i].imm); + if(dops[i].opcode2==0x03) set_const(current,dops[i].rt1,v>>cinfo[i].imm); } else clear_const(current,dops[i].rt1); } @@ -1945,7 +1958,7 @@ static void shift_alloc(struct regstat *current,int i) alloc_reg(current,i,dops[i].rt1); if(dops[i].rt1==dops[i].rs2) { alloc_reg_temp(current,i,-1); - minimum_free_regs[i]=1; + cinfo[i].min_free_regs=1; } } else { // DSLLV/DSRLV/DSRAV assert(0); @@ -1971,19 +1984,20 @@ static void alu_alloc(struct regstat *current,int i) } alloc_reg(current,i,dops[i].rt1); } - if (!(dops[i].opcode2 & 1)) { - alloc_cc(current,i); // for exceptions - dirty_reg(current,CCREG); + if (dops[i].may_except) { + alloc_cc(current, i); // for exceptions + alloc_reg_temp(current, i, -1); + cinfo[i].min_free_regs = 1; } } - if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU + else if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU if(dops[i].rt1) { alloc_reg(current,i,dops[i].rs1); alloc_reg(current,i,dops[i].rs2); alloc_reg(current,i,dops[i].rt1); } } - if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR + else if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR if(dops[i].rt1) { if(dops[i].rs1&&dops[i].rs2) { alloc_reg(current,i,dops[i].rs1); @@ -2015,35 +2029,33 @@ static void imm16_alloc(struct regstat *current,int i) else if(dops[i].opcode>=0x0c&&dops[i].opcode<=0x0e) { // ANDI/ORI/XORI if(is_const(current,dops[i].rs1)) { int v=get_const(current,dops[i].rs1); - if(dops[i].opcode==0x0c) set_const(current,dops[i].rt1,v&imm[i]); - if(dops[i].opcode==0x0d) set_const(current,dops[i].rt1,v|imm[i]); - if(dops[i].opcode==0x0e) set_const(current,dops[i].rt1,v^imm[i]); + if(dops[i].opcode==0x0c) set_const(current,dops[i].rt1,v&cinfo[i].imm); + if(dops[i].opcode==0x0d) set_const(current,dops[i].rt1,v|cinfo[i].imm); + if(dops[i].opcode==0x0e) set_const(current,dops[i].rt1,v^cinfo[i].imm); } else clear_const(current,dops[i].rt1); } else if(dops[i].opcode==0x08||dops[i].opcode==0x09) { // ADDI/ADDIU if(is_const(current,dops[i].rs1)) { int v=get_const(current,dops[i].rs1); - set_const(current,dops[i].rt1,v+imm[i]); + set_const(current,dops[i].rt1,v+cinfo[i].imm); } else clear_const(current,dops[i].rt1); - if (dops[i].opcode == 0x08) { - alloc_cc(current,i); // for exceptions - dirty_reg(current,CCREG); - if (dops[i].rt1 == 0) { - alloc_reg_temp(current,i,-1); - minimum_free_regs[i]=1; - } + if (dops[i].may_except) { + alloc_cc(current, i); // for exceptions + alloc_reg_temp(current, i, -1); + cinfo[i].min_free_regs = 1; } } else { - set_const(current,dops[i].rt1,imm[i]<<16); // LUI + set_const(current,dops[i].rt1,cinfo[i].imm<<16); // LUI } dirty_reg(current,dops[i].rt1); } static void load_alloc(struct regstat *current,int i) { + int need_temp = 0; clear_const(current,dops[i].rt1); //if(dops[i].rs1!=dops[i].rt1&&needed_again(dops[i].rs1,i)) clear_const(current,dops[i].rs1); // Does this help or hurt? if(!dops[i].rs1) current->u&=~1LL; // Allow allocating r0 if it's the source register @@ -2051,6 +2063,11 @@ static void load_alloc(struct regstat *current,int i) alloc_reg(current, i, dops[i].rs1); if (ram_offset) alloc_reg(current, i, ROREG); + if (dops[i].may_except) { + alloc_cc(current, i); // for exceptions + dirty_reg(current, CCREG); + need_temp = 1; + } if(dops[i].rt1&&!((current->u>>dops[i].rt1)&1)) { alloc_reg(current,i,dops[i].rt1); assert(get_reg_w(current->regmap, dops[i].rt1)>=0); @@ -2059,8 +2076,7 @@ static void load_alloc(struct regstat *current,int i) if(dops[i].opcode==0x22||dops[i].opcode==0x26) { alloc_reg(current,i,FTEMP); - alloc_reg_temp(current,i,-1); - minimum_free_regs[i]=1; + need_temp = 1; } } else @@ -2068,11 +2084,12 @@ static void load_alloc(struct regstat *current,int i) // Load to r0 or unneeded register (dummy load) // but we still need a register to calculate the address if(dops[i].opcode==0x22||dops[i].opcode==0x26) - { alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary - } - alloc_reg_temp(current,i,-1); - minimum_free_regs[i]=1; + need_temp = 1; + } + if (need_temp) { + alloc_reg_temp(current, i, -1); + cinfo[i].min_free_regs = 1; } } @@ -2082,21 +2099,22 @@ static void store_alloc(struct regstat *current,int i) if(!(dops[i].rs2)) current->u&=~1LL; // Allow allocating r0 if necessary if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); alloc_reg(current,i,dops[i].rs2); - if(dops[i].opcode==0x2c||dops[i].opcode==0x2d||dops[i].opcode==0x3f) { // 64-bit SDL/SDR/SD - assert(0); - } if (ram_offset) alloc_reg(current, i, ROREG); #if defined(HOST_IMM8) // On CPUs without 32-bit immediates we need a pointer to invalid_code alloc_reg(current, i, INVCP); #endif - if(dops[i].opcode==0x2a||dops[i].opcode==0x2e||dops[i].opcode==0x2c||dops[i].opcode==0x2d) { // SWL/SWL/SDL/SDR + if (dops[i].opcode == 0x2a || dops[i].opcode == 0x2e) { // SWL/SWL alloc_reg(current,i,FTEMP); } + if (dops[i].may_except) { + alloc_cc(current, i); // for exceptions + dirty_reg(current, CCREG); + } // We need a temporary register for address generation alloc_reg_temp(current,i,-1); - minimum_free_regs[i]=1; + cinfo[i].min_free_regs=1; } static void c2ls_alloc(struct regstat *current,int i) @@ -2111,9 +2129,13 @@ static void c2ls_alloc(struct regstat *current,int i) if (dops[i].opcode == 0x3a) // SWC2 alloc_reg(current,i,INVCP); #endif + if (dops[i].may_except) { + alloc_cc(current, i); // for exceptions + dirty_reg(current, CCREG); + } // We need a temporary register for address generation alloc_reg_temp(current,i,-1); - minimum_free_regs[i]=1; + cinfo[i].min_free_regs=1; } #ifndef multdiv_alloc @@ -2183,14 +2205,14 @@ static void cop0_alloc(struct regstat *current,int i) current->u&=~1LL; alloc_reg(current,i,0); } - minimum_free_regs[i] = HOST_REGS; + cinfo[i].min_free_regs = HOST_REGS; } } static void rfe_alloc(struct regstat *current, int i) { alloc_all(current, i); - minimum_free_regs[i] = HOST_REGS; + cinfo[i].min_free_regs = HOST_REGS; } static void cop2_alloc(struct regstat *current,int i) @@ -2217,7 +2239,7 @@ static void cop2_alloc(struct regstat *current,int i) } } alloc_reg_temp(current,i,-1); - minimum_free_regs[i]=1; + cinfo[i].min_free_regs=1; } static void c2op_alloc(struct regstat *current,int i) @@ -2232,7 +2254,7 @@ static void syscall_alloc(struct regstat *current,int i) alloc_cc(current,i); dirty_reg(current,CCREG); alloc_all(current,i); - minimum_free_regs[i]=HOST_REGS; + cinfo[i].min_free_regs=HOST_REGS; current->isconst=0; } @@ -2365,8 +2387,10 @@ static void alu_assemble(int i, const struct regstat *i_regs, int ccadj_) signed char s1, s2, t, tmp; t = get_reg_w(i_regs->regmap, dops[i].rt1); tmp = get_reg_temp(i_regs->regmap); - if (t < 0 && do_oflow) - t = tmp; + if (do_oflow) + assert(tmp >= 0); + //if (t < 0 && do_oflow) // broken s2 + // t = tmp; if (t >= 0) { s1 = get_reg(i_regs->regmap, dops[i].rs1); s2 = get_reg(i_regs->regmap, dops[i].rs2); @@ -2535,12 +2559,12 @@ static void imm16_assemble(int i, const struct regstat *i_regs, int ccadj_) //assert(t>=0); if(t>=0) { if(!((i_regs->isconst>>t)&1)) - emit_movimm(imm[i]<<16,t); + emit_movimm(cinfo[i].imm<<16,t); } } } if(dops[i].opcode==0x08||dops[i].opcode==0x09) { // ADDI/ADDIU - int is_addi = (dops[i].opcode == 0x08); + int is_addi = dops[i].may_except; if (dops[i].rt1 || is_addi) { signed char s, t, tmp; t=get_reg_w(i_regs->regmap, dops[i].rt1); @@ -2557,22 +2581,22 @@ static void imm16_assemble(int i, const struct regstat *i_regs, int ccadj_) if (s < 0) { if(i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t); if (is_addi) { - emit_addimm_and_set_flags3(t, imm[i], tmp); + emit_addimm_and_set_flags3(t, cinfo[i].imm, tmp); do_exception_check = 1; } else - emit_addimm(t, imm[i], t); + emit_addimm(t, cinfo[i].imm, t); } else { if (!((i_regs->wasconst >> s) & 1)) { if (is_addi) { - emit_addimm_and_set_flags3(s, imm[i], tmp); + emit_addimm_and_set_flags3(s, cinfo[i].imm, tmp); do_exception_check = 1; } else - emit_addimm(s, imm[i], t); + emit_addimm(s, cinfo[i].imm, t); } else { - int oflow = add_overflow(constmap[i][s], imm[i], sum); + int oflow = add_overflow(constmap[i][s], cinfo[i].imm, sum); if (is_addi && oflow) do_exception_check = 2; else @@ -2595,7 +2619,7 @@ static void imm16_assemble(int i, const struct regstat *i_regs, int ccadj_) } else { if(t>=0) { if(!((i_regs->isconst>>t)&1)) - emit_movimm(imm[i],t); + emit_movimm(cinfo[i].imm,t); } } } @@ -2612,28 +2636,28 @@ static void imm16_assemble(int i, const struct regstat *i_regs, int ccadj_) if(dops[i].opcode==0x0a) { // SLTI if(sl<0) { if(i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t); - emit_slti32(t,imm[i],t); + emit_slti32(t,cinfo[i].imm,t); }else{ - emit_slti32(sl,imm[i],t); + emit_slti32(sl,cinfo[i].imm,t); } } else { // SLTIU if(sl<0) { if(i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t); - emit_sltiu32(t,imm[i],t); + emit_sltiu32(t,cinfo[i].imm,t); }else{ - emit_sltiu32(sl,imm[i],t); + emit_sltiu32(sl,cinfo[i].imm,t); } } }else{ // SLTI(U) with r0 is just stupid, // nonetheless examples can be found if(dops[i].opcode==0x0a) // SLTI - if(0regmap_entry[tl]!=dops[i].rs1) emit_loadreg(dops[i].rs1,tl); - emit_andimm(tl,imm[i],tl); + emit_andimm(tl,cinfo[i].imm,tl); }else{ if(!((i_regs->wasconst>>sl)&1)) - emit_andimm(sl,imm[i],tl); + emit_andimm(sl,cinfo[i].imm,tl); else - emit_movimm(constmap[i][sl]&imm[i],tl); + emit_movimm(constmap[i][sl]&cinfo[i].imm,tl); } } else @@ -2670,27 +2694,27 @@ static void imm16_assemble(int i, const struct regstat *i_regs, int ccadj_) } if(dops[i].opcode==0x0d) { // ORI if(sl<0) { - emit_orimm(tl,imm[i],tl); + emit_orimm(tl,cinfo[i].imm,tl); }else{ if(!((i_regs->wasconst>>sl)&1)) - emit_orimm(sl,imm[i],tl); + emit_orimm(sl,cinfo[i].imm,tl); else - emit_movimm(constmap[i][sl]|imm[i],tl); + emit_movimm(constmap[i][sl]|cinfo[i].imm,tl); } } if(dops[i].opcode==0x0e) { // XORI if(sl<0) { - emit_xorimm(tl,imm[i],tl); + emit_xorimm(tl,cinfo[i].imm,tl); }else{ if(!((i_regs->wasconst>>sl)&1)) - emit_xorimm(sl,imm[i],tl); + emit_xorimm(sl,cinfo[i].imm,tl); else - emit_movimm(constmap[i][sl]^imm[i],tl); + emit_movimm(constmap[i][sl]^cinfo[i].imm,tl); } } } else { - emit_movimm(imm[i],tl); + emit_movimm(cinfo[i].imm,tl); } } } @@ -2715,18 +2739,18 @@ static void shiftimm_assemble(int i, const struct regstat *i_regs) else { if(s<0&&i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t); - if(imm[i]) { + if(cinfo[i].imm) { if(dops[i].opcode2==0) // SLL { - emit_shlimm(s<0?t:s,imm[i],t); + emit_shlimm(s<0?t:s,cinfo[i].imm,t); } if(dops[i].opcode2==2) // SRL { - emit_shrimm(s<0?t:s,imm[i],t); + emit_shrimm(s<0?t:s,cinfo[i].imm,t); } if(dops[i].opcode2==3) // SRA { - emit_sarimm(s<0?t:s,imm[i],t); + emit_sarimm(s<0?t:s,cinfo[i].imm,t); } }else{ // Shift by zero @@ -2835,11 +2859,12 @@ static int get_ro_reg(const struct regstat *i_regs, int host_tempreg_free) } static void *emit_fastpath_cmp_jump(int i, const struct regstat *i_regs, - int addr, int *offset_reg, int *addr_reg_override) + int addr, int *offset_reg, int *addr_reg_override, int ccadj_) { void *jaddr = NULL; int type = 0; int mr = dops[i].rs1; + assert(addr >= 0); *offset_reg = -1; if(((smrv_strong|smrv_weak)>>mr)&1) { type=get_ptr_mem_type(smrv[mr]); @@ -2851,6 +2876,17 @@ static void *emit_fastpath_cmp_jump(int i, const struct regstat *i_regs, //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type); } + if (dops[i].may_except) { + // alignment check + u_int op = dops[i].opcode; + int mask = ((op & 0x37) == 0x21 || op == 0x25) ? 1 : 3; // LH/SH/LHU + void *jaddr; + emit_testimm(addr, mask); + jaddr = out; + emit_jne(0); + add_stub_r(ALIGNMENT_STUB, jaddr, out, i, addr, i_regs, ccadj_, 0); + } + if(type==MTYPE_8020) { // RAM 80200000+ mirror host_tempreg_acquire(); emit_andimm(addr,~0x00e00000,HOST_TEMPREG); @@ -3003,7 +3039,8 @@ static void do_store_byte(int a, int rt, int offset_reg) static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) { - int s,tl,addr; + int addr = cinfo[i].addr; + int s,tl; int offset; void *jaddr=0; int memtarget=0,c=0; @@ -3012,7 +3049,7 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) u_int reglist=get_host_reglist(i_regs->regmap); tl=get_reg_w(i_regs->regmap, dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); - offset=imm[i]; + offset=cinfo[i].imm; if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<=0) { c=(i_regs->wasconst>>s)&1; @@ -3026,16 +3063,13 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) // could be FIFO, must perform the read // ||dummy read assem_debug("(forced read)\n"); - tl=get_reg_temp(i_regs->regmap); + tl = get_reg_temp(i_regs->regmap); // may be == addr assert(tl>=0); } - if(offset||s<0||c) addr=tl; - else addr=s; - //if(tl<0) tl=get_reg_temp(i_regs->regmap); + assert(addr >= 0); if(tl>=0) { //printf("load_assemble: c=%d\n",c); //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset); - assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O reglist&=~(1<= 0) a = fastio_reg_override; @@ -3074,8 +3107,7 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) case 0x21: // LH if(!c||memtarget) { if(!dummy) { - int a = tl; - if (!c) a = addr; + int a = addr; if (fastio_reg_override >= 0) a = fastio_reg_override; if (offset_reg >= 0) @@ -3106,8 +3138,7 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) case 0x24: // LBU if(!c||memtarget) { if(!dummy) { - int a = tl; - if (!c) a = addr; + int a = addr; if (fastio_reg_override >= 0) a = fastio_reg_override; @@ -3125,8 +3156,7 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) case 0x25: // LHU if(!c||memtarget) { if(!dummy) { - int a = tl; - if(!c) a = addr; + int a = addr; if (fastio_reg_override >= 0) a = fastio_reg_override; if (offset_reg >= 0) @@ -3140,8 +3170,6 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) else inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,dops[i].rt1,ccadj_,reglist); break; - case 0x27: // LWU - case 0x37: // LD default: assert(0); } @@ -3153,7 +3181,8 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) #ifndef loadlr_assemble static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) { - int s,tl,temp,temp2,addr; + int addr = cinfo[i].addr; + int s,tl,temp,temp2; int offset; void *jaddr=0; int memtarget=0,c=0; @@ -3164,12 +3193,9 @@ static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) s=get_reg(i_regs->regmap,dops[i].rs1); temp=get_reg_temp(i_regs->regmap); temp2=get_reg(i_regs->regmap,FTEMP); - addr=get_reg(i_regs->regmap,AGEN1+(i&1)); - assert(addr<0); - offset=imm[i]; + offset=cinfo[i].imm; reglist|=1<= 0); if(s>=0) { c=(i_regs->wasconst>>s)&1; if(c) { @@ -3184,7 +3210,7 @@ static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR } jaddr = emit_fastpath_cmp_jump(i, i_regs, temp2, - &offset_reg, &fastio_reg_override); + &offset_reg, &fastio_reg_override, ccadj_); } else { if (ram_offset && memtarget) { @@ -3277,19 +3303,19 @@ static void do_store_smc_check(int i, const struct regstat *i_regs, u_int reglis if (dops[i].rs1 == 29) return; - int j, imm_maxdiff = 32, imm_min = imm[i], imm_max = imm[i], count = 1; + int j, imm_maxdiff = 32, imm_min = cinfo[i].imm, imm_max = cinfo[i].imm, count = 1; if (i < slen - 1 && dops[i+1].is_store && dops[i+1].rs1 == dops[i].rs1 - && abs(imm[i+1] - imm[i]) <= imm_maxdiff) + && abs(cinfo[i+1].imm - cinfo[i].imm) <= imm_maxdiff) return; for (j = i - 1; j >= 0; j--) { if (!dops[j].is_store || dops[j].rs1 != dops[i].rs1 - || abs(imm[j] - imm[j+1]) > imm_maxdiff) + || abs(cinfo[j].imm - cinfo[j+1].imm) > imm_maxdiff) break; count++; - if (imm_min > imm[j]) - imm_min = imm[j]; - if (imm_max < imm[j]) - imm_max = imm[j]; + if (imm_min > cinfo[j].imm) + imm_min = cinfo[j].imm; + if (imm_max < cinfo[j].imm) + imm_max = cinfo[j].imm; } #if defined(HOST_IMM8) int ir = get_reg(i_regs->regmap, INVCP); @@ -3310,8 +3336,8 @@ static void do_store_smc_check(int i, const struct regstat *i_regs, u_int reglis #endif void *jaddr = emit_cbz(HOST_TEMPREG, 0); host_tempreg_release(); - imm_min -= imm[i]; - imm_max -= imm[i]; + imm_min -= cinfo[i].imm; + imm_max -= cinfo[i].imm; add_stub(INVCODE_STUB, jaddr, out, reglist|(1<regmap); tl=get_reg(i_regs->regmap,dops[i].rs2); s=get_reg(i_regs->regmap,dops[i].rs1); - temp=get_reg(i_regs->regmap,agr); - if(temp<0) temp=get_reg_temp(i_regs->regmap); - offset=imm[i]; + offset=cinfo[i].imm; if(s>=0) { c=(i_regs->wasconst>>s)&1; if(c) { @@ -3340,13 +3363,11 @@ static void store_assemble(int i, const struct regstat *i_regs, int ccadj_) } } assert(tl>=0); - assert(temp>=0); + assert(addr >= 0); if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<= 0) a = fastio_reg_override; do_store_byte(a, tl, offset_reg); @@ -3365,8 +3385,7 @@ static void store_assemble(int i, const struct regstat *i_regs, int ccadj_) break; case 0x29: // SH if(!c||memtarget) { - int a = temp; - if (!c) a = addr; + int a = addr; if (fastio_reg_override >= 0) a = fastio_reg_override; do_store_hword(a, 0, tl, offset_reg, 1); @@ -3382,7 +3401,6 @@ static void store_assemble(int i, const struct regstat *i_regs, int ccadj_) } type = STOREW_STUB; break; - case 0x3F: // SD default: assert(0); } @@ -3396,11 +3414,6 @@ static void store_assemble(int i, const struct regstat *i_regs, int ccadj_) } { if(!c||memtarget) { - #ifdef DESTRUCTIVE_SHIFT - // The x86 shift operation is 'destructive'; it overwrites the - // source register, so we need to make a copy first and use that. - addr=temp; - #endif do_store_smc_check(i, i_regs, reglist, addr); } } @@ -3430,21 +3443,18 @@ static void store_assemble(int i, const struct regstat *i_regs, int ccadj_) static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) { + int addr = cinfo[i].addr; int s,tl; - int temp; int offset; void *jaddr=0; void *case1, *case23, *case3; void *done0, *done1, *done2; int memtarget=0,c=0; - int agr=AGEN1+(i&1); int offset_reg = -1; u_int reglist=get_host_reglist(i_regs->regmap); tl=get_reg(i_regs->regmap,dops[i].rs2); s=get_reg(i_regs->regmap,dops[i].rs1); - temp=get_reg(i_regs->regmap,agr); - if(temp<0) temp=get_reg_temp(i_regs->regmap); - offset=imm[i]; + offset=cinfo[i].imm; if(s>=0) { c=(i_regs->isconst>>s)&1; if(c) { @@ -3452,10 +3462,10 @@ static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) } } assert(tl>=0); - assert(temp>=0); + assert(addr >= 0); if(!c) { - emit_cmpimm(s<0||offset?temp:s,RAM_SIZE); - if(!offset&&s!=temp) emit_mov(s,temp); + emit_cmpimm(addr, RAM_SIZE); + if (!offset && s != addr) emit_mov(s, addr); jaddr=out; emit_jno(0); } @@ -3473,22 +3483,22 @@ static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) assert(0); } - emit_testimm(temp,2); + emit_testimm(addr,2); case23=out; emit_jne(0); - emit_testimm(temp,1); + emit_testimm(addr,1); case1=out; emit_jne(0); // 0 if (dops[i].opcode == 0x2A) { // SWL // Write msb into least significant byte if (dops[i].rs2) emit_rorimm(tl, 24, tl); - do_store_byte(temp, tl, offset_reg); + do_store_byte(addr, tl, offset_reg); if (dops[i].rs2) emit_rorimm(tl, 8, tl); } else if (dops[i].opcode == 0x2E) { // SWR // Write entire word - do_store_word(temp, 0, tl, offset_reg, 1); + do_store_word(addr, 0, tl, offset_reg, 1); } done0 = out; emit_jmp(0); @@ -3497,45 +3507,45 @@ static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) if (dops[i].opcode == 0x2A) { // SWL // Write two msb into two least significant bytes if (dops[i].rs2) emit_rorimm(tl, 16, tl); - do_store_hword(temp, -1, tl, offset_reg, 0); + do_store_hword(addr, -1, tl, offset_reg, 0); if (dops[i].rs2) emit_rorimm(tl, 16, tl); } else if (dops[i].opcode == 0x2E) { // SWR // Write 3 lsb into three most significant bytes - do_store_byte(temp, tl, offset_reg); + do_store_byte(addr, tl, offset_reg); if (dops[i].rs2) emit_rorimm(tl, 8, tl); - do_store_hword(temp, 1, tl, offset_reg, 0); + do_store_hword(addr, 1, tl, offset_reg, 0); if (dops[i].rs2) emit_rorimm(tl, 24, tl); } done1=out; emit_jmp(0); // 2,3 set_jump_target(case23, out); - emit_testimm(temp,1); + emit_testimm(addr,1); case3 = out; emit_jne(0); // 2 if (dops[i].opcode==0x2A) { // SWL // Write 3 msb into three least significant bytes if (dops[i].rs2) emit_rorimm(tl, 8, tl); - do_store_hword(temp, -2, tl, offset_reg, 1); + do_store_hword(addr, -2, tl, offset_reg, 1); if (dops[i].rs2) emit_rorimm(tl, 16, tl); - do_store_byte(temp, tl, offset_reg); + do_store_byte(addr, tl, offset_reg); if (dops[i].rs2) emit_rorimm(tl, 8, tl); } else if (dops[i].opcode == 0x2E) { // SWR // Write two lsb into two most significant bytes - do_store_hword(temp, 0, tl, offset_reg, 1); + do_store_hword(addr, 0, tl, offset_reg, 1); } done2 = out; emit_jmp(0); // 3 set_jump_target(case3, out); if (dops[i].opcode == 0x2A) { // SWL - do_store_word(temp, -3, tl, offset_reg, 0); + do_store_word(addr, -3, tl, offset_reg, 0); } else if (dops[i].opcode == 0x2E) { // SWR - do_store_byte(temp, tl, offset_reg); + do_store_byte(addr, tl, offset_reg); } set_jump_target(done0, out); set_jump_target(done1, out); @@ -3543,8 +3553,8 @@ static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) if (offset_reg == HOST_TEMPREG) host_tempreg_release(); if(!c||!memtarget) - add_stub_r(STORELR_STUB,jaddr,out,i,temp,i_regs,ccadj_,reglist); - do_store_smc_check(i, i_regs, reglist, temp); + add_stub_r(STORELR_STUB,jaddr,out,i,addr,i_regs,ccadj_,reglist); + do_store_smc_check(i, i_regs, reglist, addr); } static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) @@ -3623,7 +3633,7 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) } } -static void rfe_assemble(int i, const struct regstat *i_regs, int ccadj_) +static void rfe_assemble(int i, const struct regstat *i_regs) { emit_readword(&psxRegs.CP0.n.SR, 0); emit_andimm(0, 0x3c, 1); @@ -3664,7 +3674,7 @@ static void emit_log_gte_stall(int i, int stall, u_int reglist) emit_movimm(stall, 0); else emit_mov(HOST_TEMPREG, 0); - emit_addimm(HOST_CCREG, ccadj[i], 1); + emit_addimm(HOST_CCREG, cinfo[i].ccadj, 1); emit_far_call(log_gte_stall); restore_regs(reglist); } @@ -3687,12 +3697,12 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u //if (dops[j].is_ds) break; if (cop2_is_stalling_op(j, &other_gte_op_cycles) || dops[j].bt) break; - if (j > 0 && ccadj[j - 1] > ccadj[j]) + if (j > 0 && cinfo[j - 1].ccadj > cinfo[j].ccadj) break; } j = max(j, 0); } - cycles_passed = ccadj[i] - ccadj[j]; + cycles_passed = cinfo[i].ccadj - cinfo[j].ccadj; if (other_gte_op_cycles >= 0) stall = other_gte_op_cycles - cycles_passed; else if (cycles_passed >= 44) @@ -3703,13 +3713,13 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u #if 0 // too slow save_regs(reglist); emit_movimm(gte_cycletab[op], 0); - emit_addimm(HOST_CCREG, ccadj[i], 1); + emit_addimm(HOST_CCREG, cinfo[i].ccadj, 1); emit_far_call(call_gteStall); restore_regs(reglist); #else host_tempreg_acquire(); emit_readword(&psxRegs.gteBusyCycle, rtmp); - emit_addimm(rtmp, -ccadj[i], rtmp); + emit_addimm(rtmp, -cinfo[i].ccadj, rtmp); emit_sub(rtmp, HOST_CCREG, HOST_TEMPREG); emit_cmpimm(HOST_TEMPREG, 44); emit_cmovb_reg(rtmp, HOST_CCREG); @@ -3739,7 +3749,7 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u if (other_gte_op_cycles >= 0) // will handle stall when assembling that op return; - cycles_passed = ccadj[min(j, slen -1)] - ccadj[i]; + cycles_passed = cinfo[min(j, slen -1)].ccadj - cinfo[i].ccadj; if (cycles_passed >= 44) return; assem_debug("; save gteBusyCycle\n"); @@ -3747,11 +3757,11 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u #if 0 emit_readword(&last_count, HOST_TEMPREG); emit_add(HOST_TEMPREG, HOST_CCREG, HOST_TEMPREG); - emit_addimm(HOST_TEMPREG, ccadj[i], HOST_TEMPREG); + emit_addimm(HOST_TEMPREG, cinfo[i].ccadj, HOST_TEMPREG); emit_addimm(HOST_TEMPREG, gte_cycletab[op]), HOST_TEMPREG); emit_writeword(HOST_TEMPREG, &psxRegs.gteBusyCycle); #else - emit_addimm(HOST_CCREG, ccadj[i] + gte_cycletab[op], HOST_TEMPREG); + emit_addimm(HOST_CCREG, cinfo[i].ccadj + gte_cycletab[op], HOST_TEMPREG); emit_writeword(HOST_TEMPREG, &psxRegs.gteBusyCycle); #endif host_tempreg_release(); @@ -3828,13 +3838,13 @@ static void multdiv_do_stall(int i, const struct regstat *i_regs) if (is_mflohi(j)) // already handled by this op return; - if (dops[j].bt || (j > 0 && ccadj[j - 1] > ccadj[j])) + if (dops[j].bt || (j > 0 && cinfo[j - 1].ccadj > cinfo[j].ccadj)) break; } j = max(j, 0); } if (known_cycles > 0) { - known_cycles -= ccadj[i] - ccadj[j]; + known_cycles -= cinfo[i].ccadj - cinfo[j].ccadj; assem_debug("; muldiv stall resolved %d\n", known_cycles); if (known_cycles > 0) emit_addimm(HOST_CCREG, known_cycles, HOST_CCREG); @@ -3843,7 +3853,7 @@ static void multdiv_do_stall(int i, const struct regstat *i_regs) assem_debug("; muldiv stall unresolved\n"); host_tempreg_acquire(); emit_readword(&psxRegs.muldivBusyCycle, rtmp); - emit_addimm(rtmp, -ccadj[i], rtmp); + emit_addimm(rtmp, -cinfo[i].ccadj, rtmp); emit_sub(rtmp, HOST_CCREG, HOST_TEMPREG); emit_cmpimm(HOST_TEMPREG, 37); emit_cmovb_reg(rtmp, HOST_CCREG); @@ -3942,14 +3952,13 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) int memtarget=0,c=0; void *jaddr2=NULL; enum stub_type type; - int agr=AGEN1+(i&1); int offset_reg = -1; int fastio_reg_override = -1; u_int reglist=get_host_reglist(i_regs->regmap); u_int copr=(source[i]>>16)&0x1f; s=get_reg(i_regs->regmap,dops[i].rs1); tl=get_reg(i_regs->regmap,FTEMP); - offset=imm[i]; + offset=cinfo[i].imm; assert(dops[i].rs1>0); assert(tl>=0); @@ -3957,17 +3966,13 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) reglist&=~(1<= 0); if (dops[i].opcode==0x3a) { // SWC2 - ar=get_reg(i_regs->regmap,agr); - if(ar<0) ar=get_reg_temp(i_regs->regmap); - reglist|=1<=0) c=(i_regs->wasconst>>s)&1; memtarget=c&&(((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE); - if (!offset&&!c&&s>=0) ar=s; - assert(ar>=0); cop2_do_stall_check(0, i, i_regs, reglist); @@ -3985,7 +3990,7 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) else { if(!c) { jaddr2 = emit_fastpath_cmp_jump(i, i_regs, ar, - &offset_reg, &fastio_reg_override); + &offset_reg, &fastio_reg_override, ccadj_); } else if (ram_offset && memtarget) { offset_reg = get_ro_reg(i_regs, 0); @@ -4125,6 +4130,24 @@ static void do_overflowstub(int n) exception_assemble(i, i_regs, ccadj); } +static void do_alignmentstub(int n) +{ + assem_debug("do_alignmentstub %x\n", start + (u_int)stubs[n].a * 4); + literal_pool(24); + int i = stubs[n].a; + struct regstat *i_regs = (struct regstat *)stubs[n].c; + int ccadj = stubs[n].d; + int is_store = dops[i].itype == STORE || dops[i].opcode == 0x3A; // SWC2 + int cause = (dops[i].opcode & 3) << 28; + cause |= is_store ? (R3000E_AdES << 2) : (R3000E_AdEL << 2); + set_jump_target(stubs[n].addr, out); + wb_dirtys(regs[i].regmap, regs[i].dirty); + if (stubs[n].b != 1) + emit_mov(stubs[n].b, 1); // faulting address + emit_movimm(cause, 0); + exception_assemble(i, i_regs, ccadj); +} + #ifndef multdiv_assemble void multdiv_assemble(int i,struct regstat *i_regs) { @@ -4177,11 +4200,14 @@ static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_) void *func; if (dops[i].itype == ALU || dops[i].itype == IMM16) func = is_delayslot ? jump_overflow_ds : jump_overflow; + else if (dops[i].itype == LOAD || dops[i].itype == STORE) + func = is_delayslot ? jump_addrerror_ds : jump_addrerror; else if (dops[i].opcode2 == 0x0C) func = is_delayslot ? jump_syscall_ds : jump_syscall; else func = is_delayslot ? jump_break_ds : jump_break; - assert(get_reg(i_regs->regmap, CCREG) == HOST_CCREG); + if (get_reg(i_regs->regmap, CCREG) != HOST_CCREG) // evicted + emit_loadreg(CCREG, HOST_CCREG); emit_movimm(start + i*4, 2); // pc emit_addimm(HOST_CCREG, ccadj_ + CLOCK_ADJUST(1), HOST_CCREG); emit_far_jump(func); @@ -4336,7 +4362,7 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) cop0_assemble(i, i_regs, ccadj_); break; case RFE: - rfe_assemble(i, i_regs, ccadj_); + rfe_assemble(i, i_regs); break; case COP2: cop2_assemble(i, i_regs); @@ -4404,7 +4430,7 @@ static void ds_assemble(int i, const struct regstat *i_regs) SysPrintf("Jump in the delay slot. This is probably a bug.\n"); break; default: - assemble(i, i_regs, ccadj[i]); + assemble(i, i_regs, cinfo[i].ccadj); } is_delayslot = 0; } @@ -4486,16 +4512,18 @@ static void loop_preload(signed char pre[],signed char entry[]) } // Generate address for load/store instruction -// goes to AGEN for writes, FTEMP for LOADLR and cop1/2 loads +// goes to AGEN (or temp) for writes, FTEMP for LOADLR and cop1/2 loads +// AGEN is assigned by pass5b_preallocate2 static void address_generation(int i, const struct regstat *i_regs, signed char entry[]) { if (dops[i].is_load || dops[i].is_store) { - int ra=-1; - int agr=AGEN1+(i&1); + int ra = -1; + int agr = AGEN1 + (i&1); if(dops[i].itype==LOAD) { - ra=get_reg_w(i_regs->regmap, dops[i].rt1); - if(ra<0) ra=get_reg_temp(i_regs->regmap); - assert(ra>=0); + if (!dops[i].may_except) + ra = get_reg_w(i_regs->regmap, dops[i].rt1); // reuse dest for agen + if (ra < 0) + ra = get_reg_temp(i_regs->regmap); } if(dops[i].itype==LOADLR) { ra=get_reg(i_regs->regmap,FTEMP); @@ -4505,57 +4533,76 @@ static void address_generation(int i, const struct regstat *i_regs, signed char if(ra<0) ra=get_reg_temp(i_regs->regmap); } if(dops[i].itype==C2LS) { - if ((dops[i].opcode&0x3b)==0x31||(dops[i].opcode&0x3b)==0x32) // LWC1/LDC1/LWC2/LDC2 + if (dops[i].opcode == 0x32) // LWC2 ra=get_reg(i_regs->regmap,FTEMP); - else { // SWC1/SDC1/SWC2/SDC2 + else { // SWC2 ra=get_reg(i_regs->regmap,agr); if(ra<0) ra=get_reg_temp(i_regs->regmap); } } - int rs=get_reg(i_regs->regmap,dops[i].rs1); - if(ra>=0) { - int offset=imm[i]; + int rs = get_reg(i_regs->regmap, dops[i].rs1); + //if(ra>=0) + { + int offset = cinfo[i].imm; + int add_offset = offset != 0; int c=(i_regs->wasconst>>rs)&1; if(dops[i].rs1==0) { // Using r0 as a base address + assert(ra >= 0); if(!entry||entry[ra]!=agr) { if (dops[i].opcode==0x22||dops[i].opcode==0x26) { emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR - }else if (dops[i].opcode==0x1a||dops[i].opcode==0x1b) { - emit_movimm(offset&0xFFFFFFF8,ra); // LDL/LDR }else{ emit_movimm(offset,ra); } } // else did it in the previous cycle - } - else if(rs<0) { - if(!entry||entry[ra]!=dops[i].rs1) - emit_loadreg(dops[i].rs1,ra); + cinfo[i].addr = ra; + add_offset = 0; + } + else if (rs < 0) { + assert(ra >= 0); + if (!entry || entry[ra] != dops[i].rs1) + emit_loadreg(dops[i].rs1, ra); + cinfo[i].addr = ra; //if(!entry||entry[ra]!=dops[i].rs1) // printf("poor load scheduling!\n"); } else if(c) { if(dops[i].rs1!=dops[i].rt1||dops[i].itype!=LOAD) { + assert(ra >= 0); if(!entry||entry[ra]!=agr) { if (dops[i].opcode==0x22||dops[i].opcode==0x26) { emit_movimm((constmap[i][rs]+offset)&0xFFFFFFFC,ra); // LWL/LWR - }else if (dops[i].opcode==0x1a||dops[i].opcode==0x1b) { - emit_movimm((constmap[i][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR }else{ emit_movimm(constmap[i][rs]+offset,ra); regs[i].loadedconst|=1<= 0); + assert(rs != ra); + emit_mov(rs, ra); + cinfo[i].addr = ra; + } + else + cinfo[i].addr = rs; + if (add_offset) { + assert(ra >= 0); if(rs>=0) { emit_addimm(rs,offset,ra); }else{ emit_addimm(ra,offset,ra); } + cinfo[i].addr = ra; } } + assert(cinfo[i].addr >= 0); } // Preload constants for next instruction if (dops[i+1].is_load || dops[i+1].is_store) { @@ -4565,7 +4612,7 @@ static void address_generation(int i, const struct regstat *i_regs, signed char ra=get_reg(i_regs->regmap,agr); if(ra>=0) { int rs=get_reg(regs[i+1].regmap,dops[i+1].rs1); - int offset=imm[i+1]; + int offset=cinfo[i+1].imm; int c=(regs[i+1].wasconst>>rs)&1; if(c&&(dops[i+1].rs1!=dops[i+1].rt1||dops[i+1].itype!=LOAD)) { if (dops[i+1].opcode==0x22||dops[i+1].opcode==0x26) { @@ -4611,15 +4658,15 @@ static int get_final_value(int hr, int i, int *value) if(dops[i+2].itype==LOAD&&dops[i+2].rs1==reg&&dops[i+2].rt1==reg&&((regs[i+1].wasconst>>hr)&1)) { // Precompute load address - *value=constmap[i][hr]+imm[i+2]; + *value=constmap[i][hr]+cinfo[i+2].imm; return 1; } } if(dops[i+1].itype==LOAD&&dops[i+1].rs1==reg&&dops[i+1].rt1==reg) { // Precompute load address - *value=constmap[i][hr]+imm[i+1]; - //printf("c=%x imm=%lx\n",(long)constmap[i][hr],imm[i+1]); + *value=constmap[i][hr]+cinfo[i+1].imm; + //printf("c=%x imm=%lx\n",(long)constmap[i][hr],cinfo[i+1].imm); return 1; } } @@ -4789,7 +4836,7 @@ static void load_regs_entry(int t) { int hr; if(dops[t].is_ds) emit_addimm(HOST_CCREG,CLOCK_ADJUST(1),HOST_CCREG); - else if(ccadj[t]) emit_addimm(HOST_CCREG,-ccadj[t],HOST_CCREG); + else if(cinfo[t].ccadj) emit_addimm(HOST_CCREG,-cinfo[t].ccadj,HOST_CCREG); if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) { emit_storereg(CCREG,HOST_CCREG); } @@ -4956,7 +5003,7 @@ static void drc_dbg_emit_do_cmp(int i, int ccadj_) if (i > 0 && !dops[i].bt) { for (hr = 0; hr < HOST_REGS; hr++) { int reg = regs[i].regmap_entry[hr]; // regs[i-1].regmap[hr]; - if (hr == EXCLUDE_REG || reg < 0) + if (hr == EXCLUDE_REG || reg <= 0) continue; if (!((regs[i-1].isconst >> hr) & 1)) continue; @@ -4988,11 +5035,11 @@ static void drc_dbg_emit_do_cmp(int i, int ccadj_) // Used when a branch jumps into the delay slot of another branch static void ds_assemble_entry(int i) { - int t = (ba[i] - start) >> 2; + int t = (cinfo[i].ba - start) >> 2; int ccadj_ = -CLOCK_ADJUST(1); if (!instr_addr[t]) instr_addr[t] = out; - assem_debug("Assemble delay slot at %x\n",ba[i]); + assem_debug("Assemble delay slot at %x\n",cinfo[i].ba); assem_debug("<->\n"); drc_dbg_emit_do_cmp(t, ccadj_); if(regs[t].regmap_entry[HOST_CCREG]==CCREG&®s[t].regmap[HOST_CCREG]!=CCREG) @@ -5017,14 +5064,14 @@ static void ds_assemble_entry(int i) default: assemble(t, ®s[t], ccadj_); } - store_regs_bt(regs[t].regmap,regs[t].dirty,ba[i]+4); - load_regs_bt(regs[t].regmap,regs[t].dirty,ba[i]+4); - if(internal_branch(ba[i]+4)) + store_regs_bt(regs[t].regmap,regs[t].dirty,cinfo[i].ba+4); + load_regs_bt(regs[t].regmap,regs[t].dirty,cinfo[i].ba+4); + if(internal_branch(cinfo[i].ba+4)) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - assert(internal_branch(ba[i]+4)); - add_to_linker(out,ba[i]+4,internal_branch(ba[i]+4)); + assert(internal_branch(cinfo[i].ba+4)); + add_to_linker(out,cinfo[i].ba+4,internal_branch(cinfo[i].ba+4)); emit_jmp(0); } @@ -5046,20 +5093,20 @@ static void do_cc(int i, const signed char i_regmap[], int *adj, { *adj=0; } - //if(ba[i]>=start && ba[i]<(start+slen*4)) - if(internal_branch(ba[i])) + //if(cinfo[i].ba>=start && cinfo[i].ba<(start+slen*4)) + if(internal_branch(cinfo[i].ba)) { - t=(ba[i]-start)>>2; + t=(cinfo[i].ba-start)>>2; if(dops[t].is_ds) *adj=-CLOCK_ADJUST(1); // Branch into delay slot adds an extra cycle - else *adj=ccadj[t]; + else *adj=cinfo[t].ccadj; } else { *adj=0; } - count = ccadj[i]; + count = cinfo[i].ccadj; count_plus2 = count + CLOCK_ADJUST(2); - if(taken==TAKEN && i==(ba[i]-start)>>2 && source[i+1]==0) { + if(taken==TAKEN && i==(cinfo[i].ba-start)>>2 && source[i+1]==0) { // Idle loop if(count&1) emit_addimm_and_set_flags(2*(count+2),HOST_CCREG); idle=out; @@ -5105,8 +5152,8 @@ static void do_ccstub(int n) wb_dirtys(branch_regs[i].regmap,branch_regs[i].dirty); } else { - if(internal_branch(ba[i])) - wb_needed_dirtys(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); + if(internal_branch(cinfo[i].ba)) + wb_needed_dirtys(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); } if(stubs[n].c!=-1) { @@ -5185,9 +5232,9 @@ static void do_ccstub(int n) #ifdef HAVE_CMOV_IMM if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); - emit_cmov2imm_e_ne_compact(ba[i],start+i*4+8,addr); + emit_cmov2imm_e_ne_compact(cinfo[i].ba,start+i*4+8,addr); #else - emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt); + emit_mov2imm_compact(cinfo[i].ba,addr,start+i*4+8,alt); if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); emit_cmovne_reg(alt,addr); @@ -5198,9 +5245,9 @@ static void do_ccstub(int n) #ifdef HAVE_CMOV_IMM if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); - emit_cmov2imm_e_ne_compact(start+i*4+8,ba[i],addr); + emit_cmov2imm_e_ne_compact(start+i*4+8,cinfo[i].ba,addr); #else - emit_mov2imm_compact(start+i*4+8,addr,ba[i],alt); + emit_mov2imm_compact(start+i*4+8,addr,cinfo[i].ba,alt); if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); emit_cmovne_reg(alt,addr); @@ -5208,50 +5255,50 @@ static void do_ccstub(int n) } if((dops[i].opcode&0x2f)==6) // BLEZ { - //emit_movimm(ba[i],alt); + //emit_movimm(cinfo[i].ba,alt); //emit_movimm(start+i*4+8,addr); - emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr); + emit_mov2imm_compact(cinfo[i].ba,alt,start+i*4+8,addr); emit_cmpimm(s1l,1); emit_cmovl_reg(alt,addr); } if((dops[i].opcode&0x2f)==7) // BGTZ { - //emit_movimm(ba[i],addr); + //emit_movimm(cinfo[i].ba,addr); //emit_movimm(start+i*4+8,ntaddr); - emit_mov2imm_compact(ba[i],addr,start+i*4+8,ntaddr); + emit_mov2imm_compact(cinfo[i].ba,addr,start+i*4+8,ntaddr); emit_cmpimm(s1l,1); emit_cmovl_reg(ntaddr,addr); } if((dops[i].opcode==1)&&(dops[i].opcode2&0x2D)==0) // BLTZ { - //emit_movimm(ba[i],alt); + //emit_movimm(cinfo[i].ba,alt); //emit_movimm(start+i*4+8,addr); - emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr); + emit_mov2imm_compact(cinfo[i].ba,alt,start+i*4+8,addr); emit_test(s1l,s1l); emit_cmovs_reg(alt,addr); } if((dops[i].opcode==1)&&(dops[i].opcode2&0x2D)==1) // BGEZ { - //emit_movimm(ba[i],addr); + //emit_movimm(cinfo[i].ba,addr); //emit_movimm(start+i*4+8,alt); - emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt); + emit_mov2imm_compact(cinfo[i].ba,addr,start+i*4+8,alt); emit_test(s1l,s1l); emit_cmovs_reg(alt,addr); } if(dops[i].opcode==0x11 && dops[i].opcode2==0x08 ) { if(source[i]&0x10000) // BC1T { - //emit_movimm(ba[i],alt); + //emit_movimm(cinfo[i].ba,alt); //emit_movimm(start+i*4+8,addr); - emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr); + emit_mov2imm_compact(cinfo[i].ba,alt,start+i*4+8,addr); emit_testimm(s1l,0x800000); emit_cmovne_reg(alt,addr); } else // BC1F { - //emit_movimm(ba[i],addr); + //emit_movimm(cinfo[i].ba,addr); //emit_movimm(start+i*4+8,alt); - emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt); + emit_mov2imm_compact(cinfo[i].ba,addr,start+i*4+8,alt); emit_testimm(s1l,0x800000); emit_cmovne_reg(alt,addr); } @@ -5275,8 +5322,8 @@ static void do_ccstub(int n) emit_far_call(cc_interrupt); if(stubs[n].a) emit_addimm(HOST_CCREG,-(int)stubs[n].a,HOST_CCREG); if(stubs[n].d==TAKEN) { - if(internal_branch(ba[i])) - load_needed_regs(branch_regs[i].regmap,regs[(ba[i]-start)>>2].regmap_entry); + if(internal_branch(cinfo[i].ba)) + load_needed_regs(branch_regs[i].regmap,regs[(cinfo[i].ba-start)>>2].regmap_entry); else if(dops[i].itype==RJUMP) { if(get_reg(branch_regs[i].regmap,RTEMP)>=0) emit_readword(&pcaddr,get_reg(branch_regs[i].regmap,RTEMP)); @@ -5346,7 +5393,7 @@ static void ujump_assemble_write_ra(int i) static void ujump_assemble(int i, const struct regstat *i_regs) { int ra_done=0; - if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); + if(i==(cinfo[i].ba-start)>>2) assem_debug("idle loop\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH int temp=get_reg(branch_regs[i].regmap,PTEMP); @@ -5372,22 +5419,22 @@ static void ujump_assemble(int i, const struct regstat *i_regs) int cc,adj; cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); + store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); #ifdef REG_PREFETCH if(dops[i].rt1==31&&temp>=0) emit_prefetchreg(temp); #endif - do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); - if(adj) emit_addimm(cc, ccadj[i] + CLOCK_ADJUST(2) - adj, cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); - if(internal_branch(ba[i])) + do_cc(i,branch_regs[i].regmap,&adj,cinfo[i].ba,TAKEN,0); + if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); + load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + if(internal_branch(cinfo[i].ba)) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if (internal_branch(ba[i]) && dops[(ba[i]-start)>>2].is_ds) { + if (internal_branch(cinfo[i].ba) && dops[(cinfo[i].ba-start)>>2].is_ds) { ds_assemble_entry(i); } else { - add_to_linker(out,ba[i],internal_branch(ba[i])); + add_to_linker(out,cinfo[i].ba,internal_branch(cinfo[i].ba)); emit_jmp(0); } } @@ -5486,9 +5533,9 @@ static void rjump_assemble(int i, const struct regstat *i_regs) } #endif //do_cc(i,branch_regs[i].regmap,&adj,-1,TAKEN); - //if(adj) emit_addimm(cc,2*(ccadj[i]+2-adj),cc); // ??? - Shouldn't happen + //if(adj) emit_addimm(cc,2*(cinfo[i].ccadj+2-adj),cc); // ??? - Shouldn't happen //assert(adj==0); - emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), HOST_CCREG); + emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), HOST_CCREG); add_stub(CC_STUB,out,NULL,0,i,-1,TAKEN,rs); if (dops[i+1].itype == RFE) // special case for RFE @@ -5515,16 +5562,16 @@ static void cjump_assemble(int i, const struct regstat *i_regs) const signed char *i_regmap = i_regs->regmap; int cc; int match; - match=match_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); + match=match_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); assem_debug("match=%d\n",match); int s1l,s2l; int unconditional=0,nop=0; int invert=0; - int internal=internal_branch(ba[i]); - if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); + int internal=internal_branch(cinfo[i].ba); + if(i==(cinfo[i].ba-start)>>2) assem_debug("idle loop\n"); if(!match) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - if(i>(ba[i]-start)>>2) invert=1; + if(i>(cinfo[i].ba-start)>>2) invert=1; #endif #ifdef __aarch64__ invert=1; // because of near cond. branches @@ -5572,23 +5619,23 @@ static void cjump_assemble(int i, const struct regstat *i_regs) cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); if(unconditional) - store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); - //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); + store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + //do_cc(i,branch_regs[i].regmap,&adj,unconditional?cinfo[i].ba:-1,unconditional); //assem_debug("cycle count (adj)\n"); if(unconditional) { - do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); - if(i!=(ba[i]-start)>>2 || source[i+1]!=0) { - if(adj) emit_addimm(cc, ccadj[i] + CLOCK_ADJUST(2) - adj, cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); + do_cc(i,branch_regs[i].regmap,&adj,cinfo[i].ba,TAKEN,0); + if(i!=(cinfo[i].ba-start)>>2 || source[i+1]!=0) { + if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); + load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); if(internal) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if (internal && dops[(ba[i]-start)>>2].is_ds) { + if (internal && dops[(cinfo[i].ba-start)>>2].is_ds) { ds_assemble_entry(i); } else { - add_to_linker(out,ba[i],internal); + add_to_linker(out,cinfo[i].ba,internal); emit_jmp(0); } #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -5597,7 +5644,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) } } else if(nop) { - emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), cc); + emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), cc); void *jaddr=out; emit_jns(0); add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5605,7 +5652,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) else { void *taken = NULL, *nottaken = NULL, *nottaken1 = NULL; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); - if(adj&&!invert) emit_addimm(cc, ccadj[i] + CLOCK_ADJUST(2) - adj, cc); + if(adj&&!invert) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); @@ -5617,7 +5664,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) nottaken=out; emit_jne(DJT_1); }else{ - add_to_linker(out,ba[i],internal); + add_to_linker(out,cinfo[i].ba,internal); emit_jeq(0); } } @@ -5629,7 +5676,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) nottaken=out; emit_jeq(DJT_1); }else{ - add_to_linker(out,ba[i],internal); + add_to_linker(out,cinfo[i].ba,internal); emit_jne(0); } } @@ -5640,7 +5687,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) nottaken=out; emit_jge(DJT_1); }else{ - add_to_linker(out,ba[i],internal); + add_to_linker(out,cinfo[i].ba,internal); emit_jl(0); } } @@ -5651,37 +5698,37 @@ static void cjump_assemble(int i, const struct regstat *i_regs) nottaken=out; emit_jl(DJT_1); }else{ - add_to_linker(out,ba[i],internal); + add_to_linker(out,cinfo[i].ba,internal); emit_jge(0); } } if(invert) { if(taken) set_jump_target(taken, out); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - if (match && (!internal || !dops[(ba[i]-start)>>2].is_ds)) { + if (match && (!internal || !dops[(cinfo[i].ba-start)>>2].is_ds)) { if(adj) { emit_addimm(cc,-adj,cc); - add_to_linker(out,ba[i],internal); + add_to_linker(out,cinfo[i].ba,internal); }else{ emit_addnop(13); - add_to_linker(out,ba[i],internal*2); + add_to_linker(out,cinfo[i].ba,internal*2); } emit_jmp(0); }else #endif { if(adj) emit_addimm(cc,-adj,cc); - store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); + store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); if(internal) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if (internal && dops[(ba[i] - start) >> 2].is_ds) { + if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) { ds_assemble_entry(i); } else { - add_to_linker(out,ba[i],internal); + add_to_linker(out,cinfo[i].ba,internal); emit_jmp(0); } } @@ -5750,20 +5797,20 @@ static void cjump_assemble(int i, const struct regstat *i_regs) // CHECK: Is the following instruction (fall thru) allocated ok? } assert(cc==HOST_CCREG); - store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); - do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); + store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + do_cc(i,i_regmap,&adj,cinfo[i].ba,TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc, ccadj[i] + CLOCK_ADJUST(2) - adj, cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); + if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); + load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); if(internal) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if (internal && dops[(ba[i] - start) >> 2].is_ds) { + if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) { ds_assemble_entry(i); } else { - add_to_linker(out,ba[i],internal); + add_to_linker(out,cinfo[i].ba,internal); emit_jmp(0); } } @@ -5784,7 +5831,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) if (cc == -1) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), HOST_CCREG); + emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), HOST_CCREG); void *jaddr=out; emit_jns(0); add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5793,7 +5840,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), cc); + emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), cc); void *jaddr=out; emit_jns(0); add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5807,16 +5854,16 @@ static void sjump_assemble(int i, const struct regstat *i_regs) const signed char *i_regmap = i_regs->regmap; int cc; int match; - match=match_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); + match=match_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); assem_debug("smatch=%d ooo=%d\n", match, dops[i].ooo); int s1l; int unconditional=0,nevertaken=0; int invert=0; - int internal=internal_branch(ba[i]); - if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); + int internal=internal_branch(cinfo[i].ba); + if(i==(cinfo[i].ba-start)>>2) assem_debug("idle loop\n"); if(!match) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - if(i>(ba[i]-start)>>2) invert=1; + if(i>(cinfo[i].ba-start)>>2) invert=1; #endif #ifdef __aarch64__ invert=1; // because of near cond. branches @@ -5870,23 +5917,23 @@ static void sjump_assemble(int i, const struct regstat *i_regs) cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); if(unconditional) - store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); - //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); + store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + //do_cc(i,branch_regs[i].regmap,&adj,unconditional?cinfo[i].ba:-1,unconditional); assem_debug("cycle count (adj)\n"); if(unconditional) { - do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); - if(i!=(ba[i]-start)>>2 || source[i+1]!=0) { - if(adj) emit_addimm(cc, ccadj[i] + CLOCK_ADJUST(2) - adj, cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); + do_cc(i,branch_regs[i].regmap,&adj,cinfo[i].ba,TAKEN,0); + if(i!=(cinfo[i].ba-start)>>2 || source[i+1]!=0) { + if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); + load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); if(internal) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if (internal && dops[(ba[i] - start) >> 2].is_ds) { + if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) { ds_assemble_entry(i); } else { - add_to_linker(out,ba[i],internal); + add_to_linker(out,cinfo[i].ba,internal); emit_jmp(0); } #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -5895,7 +5942,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) } } else if(nevertaken) { - emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), cc); + emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), cc); void *jaddr=out; emit_jns(0); add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5903,7 +5950,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) else { void *nottaken = NULL; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); - if(adj&&!invert) emit_addimm(cc, ccadj[i] + CLOCK_ADJUST(2) - adj, cc); + if(adj&&!invert) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); { assert(s1l>=0); if((dops[i].opcode2&0xf)==0) // BLTZ/BLTZAL @@ -5913,7 +5960,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) nottaken=out; emit_jns(DJT_1); }else{ - add_to_linker(out,ba[i],internal); + add_to_linker(out,cinfo[i].ba,internal); emit_js(0); } } @@ -5924,7 +5971,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) nottaken=out; emit_js(DJT_1); }else{ - add_to_linker(out,ba[i],internal); + add_to_linker(out,cinfo[i].ba,internal); emit_jns(0); } } @@ -5932,30 +5979,30 @@ static void sjump_assemble(int i, const struct regstat *i_regs) if(invert) { #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - if (match && (!internal || !dops[(ba[i] - start) >> 2].is_ds)) { + if (match && (!internal || !dops[(cinfo[i].ba - start) >> 2].is_ds)) { if(adj) { emit_addimm(cc,-adj,cc); - add_to_linker(out,ba[i],internal); + add_to_linker(out,cinfo[i].ba,internal); }else{ emit_addnop(13); - add_to_linker(out,ba[i],internal*2); + add_to_linker(out,cinfo[i].ba,internal*2); } emit_jmp(0); }else #endif { if(adj) emit_addimm(cc,-adj,cc); - store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); + store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); if(internal) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if (internal && dops[(ba[i] - start) >> 2].is_ds) { + if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) { ds_assemble_entry(i); } else { - add_to_linker(out,ba[i],internal); + add_to_linker(out,cinfo[i].ba,internal); emit_jmp(0); } } @@ -6021,20 +6068,20 @@ static void sjump_assemble(int i, const struct regstat *i_regs) // CHECK: Is the following instruction (fall thru) allocated ok? } assert(cc==HOST_CCREG); - store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); - do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); + store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + do_cc(i,i_regmap,&adj,cinfo[i].ba,TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc, ccadj[i] + CLOCK_ADJUST(2) - adj, cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,ba[i]); + if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); + load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); if(internal) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if (internal && dops[(ba[i] - start) >> 2].is_ds) { + if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) { ds_assemble_entry(i); } else { - add_to_linker(out,ba[i],internal); + add_to_linker(out,cinfo[i].ba,internal); emit_jmp(0); } } @@ -6053,7 +6100,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) if (cc == -1) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), HOST_CCREG); + emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), HOST_CCREG); void *jaddr=out; emit_jns(0); add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); @@ -6062,7 +6109,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), cc); + emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), cc); void *jaddr=out; emit_jns(0); add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); @@ -6114,9 +6161,9 @@ void disassemble_inst(int i) if (dops[i].bt) printf("*"); else printf(" "); switch(dops[i].itype) { case UJUMP: - printf (" %x: %s %8x\n",start+i*4,insn[i],ba[i]);break; + printf (" %x: %s %8x\n",start+i*4,insn[i],cinfo[i].ba);break; case CJUMP: - printf (" %x: %s r%d,r%d,%8x\n",start+i*4,insn[i],dops[i].rs1,dops[i].rs2,i?start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14):*ba);break; + printf (" %x: %s r%d,r%d,%8x\n",start+i*4,insn[i],dops[i].rs1,dops[i].rs2,i?start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14):cinfo[i].ba);break; case SJUMP: printf (" %x: %s r%d,%8x\n",start+i*4,insn[i],dops[i].rs1,start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14));break; case RJUMP: @@ -6127,17 +6174,17 @@ void disassemble_inst(int i) break; case IMM16: if(dops[i].opcode==0xf) //LUI - printf (" %x: %s r%d,%4x0000\n",start+i*4,insn[i],dops[i].rt1,imm[i]&0xffff); + printf (" %x: %s r%d,%4x0000\n",start+i*4,insn[i],dops[i].rt1,cinfo[i].imm&0xffff); else - printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1,imm[i]); + printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1,cinfo[i].imm); break; case LOAD: case LOADLR: - printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1,imm[i]); + printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1,cinfo[i].imm); break; case STORE: case STORELR: - printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],dops[i].rs2,dops[i].rs1,imm[i]); + printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],dops[i].rs2,dops[i].rs1,cinfo[i].imm); break; case ALU: case SHIFT: @@ -6147,7 +6194,7 @@ void disassemble_inst(int i) printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],dops[i].rs1,dops[i].rs2); break; case SHIFTIMM: - printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1,imm[i]); + printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1,cinfo[i].imm); break; case MOV: if((dops[i].opcode2&0x1d)==0x10) @@ -6172,7 +6219,7 @@ void disassemble_inst(int i) else printf (" %x: %s\n",start+i*4,insn[i]); break; case C2LS: - printf (" %x: %s cpr2[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,dops[i].rs1,imm[i]); + printf (" %x: %s cpr2[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,dops[i].rs1,cinfo[i].imm); break; case INTCALL: printf (" %x: %s (INTCALL)\n",start+i*4,insn[i]); @@ -6548,7 +6595,7 @@ static int apply_hacks(void) // lui a4, 0xf200; jal ; addu a0, 2; slti v0, 28224 if (source[i] == 0x3c04f200 && dops[i+1].itype == UJUMP && source[i+2] == 0x34840002 && dops[i+3].opcode == 0x0a - && imm[i+3] == 0x6e40 && dops[i+3].rs1 == 2) + && cinfo[i+3].imm == 0x6e40 && dops[i+3].rs1 == 2) { SysPrintf("PE2 hack @%08x\n", start + (i+3)*4); dops[i + 3].itype = NOP; @@ -6583,9 +6630,11 @@ static noinline void pass1_disassemble(u_int pagelimit) { int force_prev_to_interpreter = 0; memset(&dops[i], 0, sizeof(dops[i])); - op2 = 0; - minimum_free_regs[i] = 0; + memset(&cinfo[i], 0, sizeof(cinfo[i])); + cinfo[i].ba = -1; + cinfo[i].addr = -1; dops[i].opcode = op = source[i] >> 26; + op2 = 0; type = INTCALL; set_mnemonic(i, "???"); switch(op) @@ -6746,13 +6795,13 @@ static noinline void pass1_disassemble(u_int pagelimit) case LOAD: dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rt1=(source[i]>>16)&0x1f; - imm[i]=(short)source[i]; + cinfo[i].imm=(short)source[i]; break; case STORE: case STORELR: dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=(source[i]>>16)&0x1f; - imm[i]=(short)source[i]; + cinfo[i].imm=(short)source[i]; break; case LOADLR: // LWL/LWR only load part of the register, @@ -6760,7 +6809,7 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=(source[i]>>16)&0x1f; dops[i].rt1=(source[i]>>16)&0x1f; - imm[i]=(short)source[i]; + cinfo[i].imm=(short)source[i]; break; case IMM16: if (op==0x0f) dops[i].rs1=0; // LUI instruction has no source register @@ -6768,9 +6817,9 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rs2=0; dops[i].rt1=(source[i]>>16)&0x1f; if(op>=0x0c&&op<=0x0e) { // ANDI/ORI/XORI - imm[i]=(unsigned short)source[i]; + cinfo[i].imm=(unsigned short)source[i]; }else{ - imm[i]=(short)source[i]; + cinfo[i].imm=(short)source[i]; } break; case UJUMP: @@ -6831,7 +6880,7 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rs1=(source[i]>>16)&0x1f; dops[i].rs2=0; dops[i].rt1=(source[i]>>11)&0x1f; - imm[i]=(source[i]>>6)&0x1f; + cinfo[i].imm=(source[i]>>6)&0x1f; break; case COP0: if(op2==0) dops[i].rt1=(source[i]>>16)&0x1F; // MFC0 @@ -6853,7 +6902,7 @@ static noinline void pass1_disassemble(u_int pagelimit) break; case C2LS: dops[i].rs1=(source[i]>>21)&0x1F; - imm[i]=(short)source[i]; + cinfo[i].imm=(short)source[i]; if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2 else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2 break; @@ -6878,14 +6927,13 @@ static noinline void pass1_disassemble(u_int pagelimit) } /* Calculate branch target addresses */ if(type==UJUMP) - ba[i]=((start+i*4+4)&0xF0000000)|(((unsigned int)source[i]<<6)>>4); + cinfo[i].ba=((start+i*4+4)&0xF0000000)|(((unsigned int)source[i]<<6)>>4); else if(type==CJUMP&&dops[i].rs1==dops[i].rs2&&(op&1)) - ba[i]=start+i*4+8; // Ignore never taken branch + cinfo[i].ba=start+i*4+8; // Ignore never taken branch else if(type==SJUMP&&dops[i].rs1==0&&!(op2&1)) - ba[i]=start+i*4+8; // Ignore never taken branch + cinfo[i].ba=start+i*4+8; // Ignore never taken branch else if(type==CJUMP||type==SJUMP) - ba[i]=start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14); - else ba[i]=-1; + cinfo[i].ba=start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14); /* simplify always (not)taken branches */ if (type == CJUMP && dops[i].rs1 == dops[i].rs2) { @@ -6898,13 +6946,20 @@ static noinline void pass1_disassemble(u_int pagelimit) else if (type == SJUMP && dops[i].rs1 == 0 && (op2 & 1)) dops[i].itype = type = UJUMP; - dops[i].is_jump = (dops[i].itype == RJUMP || dops[i].itype == UJUMP || dops[i].itype == CJUMP || dops[i].itype == SJUMP); - dops[i].is_ujump = (dops[i].itype == RJUMP || dops[i].itype == UJUMP); // || (source[i] >> 16) == 0x1000 // beq r0,r0 - dops[i].is_load = (dops[i].itype == LOAD || dops[i].itype == LOADLR || op == 0x32); // LWC2 + dops[i].is_jump = type == RJUMP || type == UJUMP || type == CJUMP || type == SJUMP; + dops[i].is_ujump = type == RJUMP || type == UJUMP; + dops[i].is_load = type == LOAD || type == LOADLR || op == 0x32; // LWC2 dops[i].is_delay_load = (dops[i].is_load || (source[i] & 0xf3d00000) == 0x40000000); // MFC/CFC - dops[i].is_store = (dops[i].itype == STORE || dops[i].itype == STORELR || op == 0x3a); // SWC2 - dops[i].is_exception = (dops[i].itype == SYSCALL || dops[i].itype == HLECALL || dops[i].itype == INTCALL); - dops[i].may_except = dops[i].is_exception || (dops[i].itype == ALU && (op2 == 0x20 || op2 == 0x22)) || op == 8; + dops[i].is_store = type == STORE || type == STORELR || op == 0x3a; // SWC2 + dops[i].is_exception = type == SYSCALL || type == HLECALL || type == INTCALL; + dops[i].may_except = dops[i].is_exception || (type == ALU && (op2 == 0x20 || op2 == 0x22)) || op == 8; + + if (((op & 0x37) == 0x21 || op == 0x25) // LH/SH/LHU + && ((cinfo[i].imm & 1) || Config.PreciseExceptions)) + dops[i].may_except = 1; + if (((op & 0x37) == 0x23 || (op & 0x37) == 0x32) // LW/SW/LWC2/SWC2 + && ((cinfo[i].imm & 3) || Config.PreciseExceptions)) + dops[i].may_except = 1; /* rare messy cases to just pass over to the interpreter */ if (i > 0 && dops[i-1].is_jump) { @@ -6916,7 +6971,7 @@ static noinline void pass1_disassemble(u_int pagelimit) } // basic load delay detection through a branch else if (dops[i].is_delay_load && dops[i].rt1 != 0) { - int t=(ba[i-1]-start)/4; + int t=(cinfo[i-1].ba-start)/4; if(0 <= t && t < i &&(dops[i].rt1==dops[t].rs1||dops[i].rt1==dops[t].rs2)&&dops[t].itype!=CJUMP&&dops[t].itype!=SJUMP) { // jump target wants DS result - potential load delay effect SysPrintf("load delay in DS @%08x (%08x)\n", start + i*4, start); @@ -6940,7 +6995,7 @@ static noinline void pass1_disassemble(u_int pagelimit) memset(&dops[i-1], 0, sizeof(dops[i-1])); dops[i-1].itype = INTCALL; dops[i-1].rs1 = CCREG; - ba[i-1] = -1; + cinfo[i-1].ba = -1; done = 2; i--; // don't compile the DS/problematic load/etc } @@ -6948,7 +7003,7 @@ static noinline void pass1_disassemble(u_int pagelimit) /* Is this the end of the block? */ if (i > 0 && dops[i-1].is_ujump) { if (dops[i-1].rt1 == 0) { // not jal - int found_bbranch = 0, t = (ba[i-1] - start) / 4; + int found_bbranch = 0, t = (cinfo[i-1].ba - start) / 4; if ((u_int)(t - i) < 64 && start + (t+64)*4 < pagelimit) { // scan for a branch back to i+1 for (j = t; j < t + 64; j++) { @@ -6986,9 +7041,9 @@ static noinline void pass1_disassemble(u_int pagelimit) // Does the block continue due to a branch? for(j=i-1;j>=0;j--) { - if(ba[j]==start+i*4) done=j=0; // Branch into delay slot - if(ba[j]==start+i*4+4) done=j=0; - if(ba[j]==start+i*4+8) done=j=0; + if(cinfo[j].ba==start+i*4) done=j=0; // Branch into delay slot + if(cinfo[j].ba==start+i*4+4) done=j=0; + if(cinfo[j].ba==start+i*4+8) done=j=0; } } //assert(i=(start+slen*4)) + if(cinfo[i].ba=(start+slen*4)) { // Branch out of this block, flush all regs u=1; @@ -7050,8 +7105,8 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) else { // Internal branch, flag target - dops[(ba[i]-start)>>2].bt=1; - if(ba[i]<=start+i*4) { + dops[(cinfo[i].ba-start)>>2].bt=1; + if(cinfo[i].ba<=start+i*4) { // Backward branch if(dops[i].is_ujump) { @@ -7079,17 +7134,17 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) // Only go three levels deep. This recursion can take an // excessive amount of time if there are a lot of nested loops. if(r<2) { - pass2_unneeded_regs((ba[i]-start)>>2,i-1,r+1); + pass2_unneeded_regs((cinfo[i].ba-start)>>2,i-1,r+1); }else{ - unneeded_reg[(ba[i]-start)>>2]=1; - gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown; + unneeded_reg[(cinfo[i].ba-start)>>2]=1; + gte_unneeded[(cinfo[i].ba-start)>>2]=gte_u_unknown; } } /*else*/ if(1) { if (dops[i].is_ujump) { // Unconditional branch - u=unneeded_reg[(ba[i]-start)>>2]; - gte_u=gte_unneeded[(ba[i]-start)>>2]; + u=unneeded_reg[(cinfo[i].ba-start)>>2]; + gte_u=gte_unneeded[(cinfo[i].ba-start)>>2]; branch_unneeded_reg[i]=u; // Merge in delay slot u|=(1LL<>2]; - gte_b=gte_unneeded[(ba[i]-start)>>2]; + b=unneeded_reg[(cinfo[i].ba-start)>>2]; + gte_b=gte_unneeded[(cinfo[i].ba-start)>>2]; branch_unneeded_reg[i]=b; // Branch delay slot b|=(1LL<0&&(dops[i-1].itype==STORE||dops[i-1].itype==STORELR||(dops[i-1].itype==C2LS&&dops[i-1].opcode==0x3a))&&(u_int)imm[i-1]<0x800) + if(i>0&&(dops[i-1].itype==STORE||dops[i-1].itype==STORELR||(dops[i-1].itype==C2LS&&dops[i-1].opcode==0x3a))&&(u_int)cinfo[i-1].imm<0x800) current.waswritten|=1<=0x800) + if((dops[i].itype==STORE||dops[i].itype==STORELR||(dops[i].itype==C2LS&&dops[i].opcode==0x3a))&&(u_int)cinfo[i].imm>=0x800) current.waswritten&=~(1<=0;j--) { - if(ba[j]==start+i*4+4) { + if(cinfo[j].ba==start+i*4+4) { memcpy(current.regmap,branch_regs[j].regmap,sizeof(current.regmap)); current.dirty=branch_regs[j].dirty; break; } } while(j>=0) { - if(ba[j]==start+i*4+4) { + if(cinfo[j].ba==start+i*4+4) { for(hr=0;hr 0 && (dops[i-1].is_jump || dops[i].is_exception)) { cc=0; @@ -7794,7 +7849,7 @@ static noinline void pass4_cull_unused_regs(void) __builtin_prefetch(regs[i-2].regmap); if(dops[i].is_jump) { - if(ba[i]=(start+slen*4)) + if(cinfo[i].ba=(start+slen*4)) { // Branch out of this block, don't need anything nr=0; @@ -7804,7 +7859,7 @@ static noinline void pass4_cull_unused_regs(void) // Internal branch // Need whatever matches the target nr=0; - int t=(ba[i]-start)>>2; + int t=(cinfo[i].ba-start)>>2; for(hr=0;hr=0) { @@ -7843,9 +7898,9 @@ static noinline void pass4_cull_unused_regs(void) nr |= get_regm(regs[i].regmap_entry, INVCP); } } - else if (dops[i].may_except) + else if (dops[i].is_exception) { - // SYSCALL instruction, etc or conditional exception + // SYSCALL instruction, etc nr=0; } else // Non-branch @@ -8005,14 +8060,14 @@ static noinline void pass5a_preallocate1(void) { if(dops[i].itype==UJUMP||dops[i].itype==CJUMP||dops[i].itype==SJUMP) { - if(ba[i]>=start && ba[i]<(start+i*4)) + if(cinfo[i].ba>=start && cinfo[i].ba<(start+i*4)) if(dops[i+1].itype==NOP||dops[i+1].itype==MOV||dops[i+1].itype==ALU ||dops[i+1].itype==SHIFTIMM||dops[i+1].itype==IMM16||dops[i+1].itype==LOAD ||dops[i+1].itype==STORE||dops[i+1].itype==STORELR ||dops[i+1].itype==SHIFT ||dops[i+1].itype==COP2||dops[i+1].itype==C2LS||dops[i+1].itype==C2OP) { - int t=(ba[i]-start)>>2; + int t=(cinfo[i].ba-start)>>2; if(t > 0 && !dops[t-1].is_jump) // loop_preload can't handle jumps into delay slots if(t<2||(dops[t-2].itype!=UJUMP&&dops[t-2].itype!=RJUMP)||dops[t-2].rt1!=31) // call/ret assumes no registers allocated for(hr=0;hrclean transition @@ -8064,18 +8119,18 @@ static noinline void pass5a_preallocate1(void) int r=f_regmap[hr]; for(j=t;j<=i;j++) { - //printf("Test %x -> %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r); + //printf("Test %x -> %x, %x %d/%d\n",start+i*4,cinfo[i].ba,start+j*4,hr,r); if(r<34&&((unneeded_reg[j]>>r)&1)) break; assert(r < 64); if(regs[j].regmap[hr]==f_regmap[hr]&&f_regmap[hr] %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r); + //printf("Hit %x -> %x, %x %d/%d\n",start+i*4,cinfo[i].ba,start+j*4,hr,r); int k; if(regs[i].regmap[hr]==-1&&branch_regs[i].regmap[hr]==-1) { if(get_reg(regs[i].regmap,f_regmap[hr])>=0) break; if(get_reg(regs[i+2].regmap,f_regmap[hr])>=0) break; k=i; while(k>1&®s[k-1].regmap[hr]==-1) { - if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) { + if(count_free_regs(regs[k-1].regmap)<=cinfo[k-1].min_free_regs) { //printf("no free regs for store %x\n",start+(k-1)*4); break; } @@ -8182,10 +8237,10 @@ static noinline void pass5a_preallocate1(void) if(dops[j].itype==CJUMP||dops[j].itype==SJUMP) { if(dops[j].ooo) { - if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(regs[j].regmap)<=cinfo[j+1].min_free_regs) break; }else{ - if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(branch_regs[j].regmap)<=cinfo[j+1].min_free_regs) break; } if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) { @@ -8193,7 +8248,7 @@ static noinline void pass5a_preallocate1(void) break; } } - if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) { + if(count_free_regs(regs[j].regmap)<=cinfo[j].min_free_regs) { //printf("No free regs for store %x\n",start+j*4); break; } @@ -8226,7 +8281,7 @@ static noinline void pass5a_preallocate1(void) if(dops[i].bt) { for(j=i;j=0); if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) @@ -8391,7 +8462,7 @@ static noinline void pass5b_preallocate2(void) } } } - if(dops[i+1].itype==LOADLR||(dops[i+1].opcode&0x3b)==0x31||(dops[i+1].opcode&0x3b)==0x32) { // LWC1/LDC1, LWC2/LDC2 + if (dops[i+1].itype == LOADLR || dops[i+1].opcode == 0x32) { // LWC2 if(get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { int nr; hr=get_reg(regs[i+1].regmap,FTEMP); @@ -8430,9 +8501,9 @@ static noinline void pass5b_preallocate2(void) hr = -1; if(dops[i+1].itype==LOAD) hr=get_reg_w(regs[i+1].regmap, dops[i+1].rt1); - if(dops[i+1].itype==LOADLR||(dops[i+1].opcode&0x3b)==0x31||(dops[i+1].opcode&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2 + if (dops[i+1].itype == LOADLR || dops[i+1].opcode == 0x32) // LWC2 hr=get_reg(regs[i+1].regmap,FTEMP); - if(dops[i+1].itype==STORE||dops[i+1].itype==STORELR||(dops[i+1].opcode&0x3b)==0x39||(dops[i+1].opcode&0x3b)==0x3a) { // SWC1/SDC1/SWC2/SDC2 + if (dops[i+1].is_store) { hr=get_reg(regs[i+1].regmap,AGEN1+((i+1)&1)); if(hr<0) hr=get_reg_temp(regs[i+1].regmap); } @@ -8483,7 +8554,7 @@ static noinline void pass6_clean_registers(int istart, int iend, int wr) signed char branch_rregmap_i[RRMAP_SIZE]; u_int branch_hr_candirty = 0; make_rregs(branch_regs[i].regmap, branch_rregmap_i, &branch_hr_candirty); - if(ba[i]=(start+slen*4)) + if(cinfo[i].ba=(start+slen*4)) { // Branch out of this block, flush all regs will_dirty_i = 0; @@ -8540,7 +8611,7 @@ static noinline void pass6_clean_registers(int istart, int iend, int wr) else { // Internal branch - if(ba[i]<=start+i*4) { + if(cinfo[i].ba<=start+i*4) { // Backward branch if (dops[i].is_ujump) { @@ -8611,12 +8682,12 @@ static noinline void pass6_clean_registers(int istart, int iend, int wr) if(wr) { will_dirty[i]=temp_will_dirty; wont_dirty[i]=temp_wont_dirty; - pass6_clean_registers((ba[i]-start)>>2,i-1,0); + pass6_clean_registers((cinfo[i].ba-start)>>2,i-1,0); }else{ // Limit recursion. It can take an excessive amount // of time if there are a lot of nested loops. - will_dirty[(ba[i]-start)>>2]=0; - wont_dirty[(ba[i]-start)>>2]=-1; + will_dirty[(cinfo[i].ba-start)>>2]=0; + wont_dirty[(cinfo[i].ba-start)>>2]=-1; } } /*else*/ if(1) @@ -8626,16 +8697,16 @@ static noinline void pass6_clean_registers(int istart, int iend, int wr) // Unconditional branch will_dirty_i=0; wont_dirty_i=0; - //if(ba[i]>start+i*4) { // Disable recursion (for debugging) + //if(cinfo[i].ba>start+i*4) { // Disable recursion (for debugging) for(r=0;r>2].regmap_entry[r]) { - will_dirty_i|=will_dirty[(ba[i]-start)>>2]&(1<>2]&(1<>2].regmap_entry[r]) { + will_dirty_i|=will_dirty[(cinfo[i].ba-start)>>2]&(1<>2]&(1<=0) { - will_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>branch_regs[i].regmap[r])&1)<>2]>>branch_regs[i].regmap[r])&1)<>2]>>branch_regs[i].regmap[r])&1)<>2]>>branch_regs[i].regmap[r])&1)<start+i*4) // Disable recursion (for debugging) + //if(cinfo[i].ba>start+i*4) // Disable recursion (for debugging) for(r=0;r>2].regmap_entry[r]) { - will_dirty_i&=will_dirty[(ba[i]-start)>>2]&(1<>2]&(1<>2].regmap_entry[r]) { + will_dirty_i&=will_dirty[(cinfo[i].ba-start)>>2]&(1<>2]&(1<=0) { - will_dirty_i&=((unneeded_reg[(ba[i]-start)>>2]>>target_reg)&1)<>2]>>target_reg)&1)<>2]>>target_reg)&1)<>2]>>target_reg)&1)<\n"); - drc_dbg_emit_do_cmp(i, ccadj[i]); + drc_dbg_emit_do_cmp(i, cinfo[i].ccadj); if (clear_hack_addr) { emit_movimm(0, 0); emit_writeword(0, &hack_addr); @@ -9101,7 +9172,7 @@ static int new_recompile_block(u_int addr) if (dops[i].is_store) load_reg(regs[i].regmap_entry,regs[i].regmap,INVCP); - ds = assemble(i, ®s[i], ccadj[i]); + ds = assemble(i, ®s[i], cinfo[i].ccadj); if (dops[i].is_ujump) literal_pool(1024); @@ -9124,7 +9195,7 @@ static int new_recompile_block(u_int addr) store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(HOST_CCREG, ccadj[i-1] + CLOCK_ADJUST(1), HOST_CCREG); + emit_addimm(HOST_CCREG, cinfo[i-1].ccadj + CLOCK_ADJUST(1), HOST_CCREG); } else { @@ -9142,7 +9213,7 @@ static int new_recompile_block(u_int addr) store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(HOST_CCREG, ccadj[i-1] + CLOCK_ADJUST(1), HOST_CCREG); + emit_addimm(HOST_CCREG, cinfo[i-1].ccadj + CLOCK_ADJUST(1), HOST_CCREG); add_to_linker(out,start+i*4,0); emit_jmp(0); } @@ -9170,6 +9241,8 @@ static int new_recompile_block(u_int addr) do_unalignedwritestub(i);break; case OVERFLOW_STUB: do_overflowstub(i); break; + case ALIGNMENT_STUB: + do_alignmentstub(i); break; default: assert(0); } diff --git a/libpcsxcore/new_dynarec/patches/trace_drc_chk b/libpcsxcore/new_dynarec/patches/trace_drc_chk index 12eeb8ae2..7d435323d 100644 --- a/libpcsxcore/new_dynarec/patches/trace_drc_chk +++ b/libpcsxcore/new_dynarec/patches/trace_drc_chk @@ -1,8 +1,8 @@ diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c -index 2821d466..b3c7ea2f 100644 +index b71f8a8f..0a26f6f6 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c -@@ -311,7 +311,7 @@ static struct decoded_insn +@@ -318,7 +318,7 @@ static struct compile_info int new_dynarec_hacks_old; int new_dynarec_did_compile; @@ -11,7 +11,7 @@ index 2821d466..b3c7ea2f 100644 extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 extern int last_count; // last absolute target, often = next_interupt -@@ -593,6 +593,7 @@ static int cycle_multiplier_active; +@@ -598,6 +598,7 @@ static int cycle_multiplier_active; static int CLOCK_ADJUST(int x) { @@ -19,7 +19,7 @@ index 2821d466..b3c7ea2f 100644 int m = cycle_multiplier_active; int s = (x >> 31) | 1; return (x * m + s * 50) / 100; -@@ -745,6 +746,9 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) +@@ -750,6 +751,9 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) // This is called from the recompiled JR/JALR instructions static void noinline *get_addr(u_int vaddr, int can_compile) { @@ -29,7 +29,7 @@ index 2821d466..b3c7ea2f 100644 u_int start_page = get_page_prev(vaddr); u_int i, page, end_page = get_page(vaddr); void *found_clean = NULL; -@@ -7143,7 +7147,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) +@@ -7180,7 +7184,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) // R0 is always unneeded u|=1; // Save it @@ -38,7 +38,7 @@ index 2821d466..b3c7ea2f 100644 gte_unneeded[i]=gte_u; /* printf("ur (%d,%d) %x: ",istart,iend,start+i*4); -@@ -8292,6 +8296,7 @@ static noinline void pass5a_preallocate1(void) +@@ -8329,6 +8333,7 @@ static noinline void pass5a_preallocate1(void) static noinline void pass5b_preallocate2(void) { int i, hr; @@ -46,19 +46,22 @@ index 2821d466..b3c7ea2f 100644 for(i=0;i Date: Wed, 26 Jul 2023 01:23:06 +0300 Subject: [PATCH 288/597] drc: update according to interpreter (2) --- libpcsxcore/new_dynarec/emu_if.c | 15 +- libpcsxcore/new_dynarec/new_dynarec.c | 134 ++++++++---------- libpcsxcore/new_dynarec/patches/trace_drc_chk | 16 +-- 3 files changed, 80 insertions(+), 85 deletions(-) diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 89716fa0f..2590b3ab2 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -511,7 +511,9 @@ void do_insn_cmp(void) static u32 handler_cycle_intr; u32 *allregs_p = (void *)&psxRegs; u32 *allregs_e = (void *)&rregs; + u32 badregs_mask = 0; static u32 ppc, failcount; + static u32 badregs_mask_prev; int i, ret, bad = 0, fatal = 0, which_event = -1; u32 ev_cycles = 0; u8 code; @@ -591,18 +593,24 @@ void do_insn_cmp(void) if (allregs_p[i] != allregs_e[i]) { miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle); bad++; - if (i > 32+2) + if (i >= 32) fatal = 1; + else + badregs_mask |= 1u << i; } } - if (!fatal && psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) { + if (badregs_mask_prev & badregs_mask) + failcount++; + else + failcount = 0; + + if (!fatal && psxRegs.pc == rregs.pc && bad < 6 && failcount < 24) { static int last_mcycle; if (last_mcycle != psxRegs.cycle >> 20) { printf("%u\n", psxRegs.cycle); last_mcycle = psxRegs.cycle >> 20; } - failcount++; goto ok; } @@ -621,6 +629,7 @@ void do_insn_cmp(void) ok: //psxRegs.cycle = rregs.cycle + 2; // sync timing ppc = psxRegs.pc; + badregs_mask_prev = badregs_mask; } #endif diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index c36021c23..61b078fe4 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -605,7 +605,9 @@ static int CLOCK_ADJUST(int x) static int ds_writes_rjump_rs(int i) { - return dops[i].rs1 != 0 && (dops[i].rs1 == dops[i+1].rt1 || dops[i].rs1 == dops[i+1].rt2); + return dops[i].rs1 != 0 + && (dops[i].rs1 == dops[i+1].rt1 || dops[i].rs1 == dops[i+1].rt2 + || dops[i].rs1 == dops[i].rt1); // overwrites itself - same effect } // psx addr mirror masking (for invalidation) @@ -784,9 +786,16 @@ static void noinline *get_addr(u_int vaddr, int can_compile) return ndrc_get_addr_ht(vaddr); // generate an address error +#ifdef DRC_DBG + last_count -= 2; +#endif psxRegs.CP0.n.Cause &= 0x300; - psxRegs.CP0.n.Cause |= R3000E_AdEL << 2; psxRegs.CP0.n.EPC = vaddr; + if (vaddr & 3) { + psxRegs.CP0.n.Cause |= R3000E_AdEL << 2; + psxRegs.CP0.n.BadVAddr = vaddr; + } else + psxRegs.CP0.n.Cause |= R3000E_IBE << 2; psxRegs.pc = 0x80000080; return ndrc_get_addr_ht(0x80000080); } @@ -5213,7 +5222,7 @@ static void do_ccstub(int n) } hr++; } - if((dops[i].opcode&0x2E)==6) // BLEZ/BGTZ needs another register + if ((dops[i].opcode & 0x3e) == 6) // BLEZ/BGTZ needs another register { while(hr=0) emit_cmp(s1l,s2l); @@ -5240,7 +5249,7 @@ static void do_ccstub(int n) emit_cmovne_reg(alt,addr); #endif } - if((dops[i].opcode&0x2f)==5) // BNE + else if (dops[i].opcode == 5) // BNE { #ifdef HAVE_CMOV_IMM if(s2l>=0) emit_cmp(s1l,s2l); @@ -5253,7 +5262,7 @@ static void do_ccstub(int n) emit_cmovne_reg(alt,addr); #endif } - if((dops[i].opcode&0x2f)==6) // BLEZ + else if (dops[i].opcode == 6) // BLEZ { //emit_movimm(cinfo[i].ba,alt); //emit_movimm(start+i*4+8,addr); @@ -5261,7 +5270,7 @@ static void do_ccstub(int n) emit_cmpimm(s1l,1); emit_cmovl_reg(alt,addr); } - if((dops[i].opcode&0x2f)==7) // BGTZ + else if (dops[i].opcode == 7) // BGTZ { //emit_movimm(cinfo[i].ba,addr); //emit_movimm(start+i*4+8,ntaddr); @@ -5269,41 +5278,17 @@ static void do_ccstub(int n) emit_cmpimm(s1l,1); emit_cmovl_reg(ntaddr,addr); } - if((dops[i].opcode==1)&&(dops[i].opcode2&0x2D)==0) // BLTZ + else if (dops[i].itype == SJUMP) // BLTZ/BGEZ { //emit_movimm(cinfo[i].ba,alt); //emit_movimm(start+i*4+8,addr); - emit_mov2imm_compact(cinfo[i].ba,alt,start+i*4+8,addr); + emit_mov2imm_compact(cinfo[i].ba, + (dops[i].opcode2 & 1) ? addr : alt, start + i*4 + 8, + (dops[i].opcode2 & 1) ? alt : addr); emit_test(s1l,s1l); emit_cmovs_reg(alt,addr); } - if((dops[i].opcode==1)&&(dops[i].opcode2&0x2D)==1) // BGEZ - { - //emit_movimm(cinfo[i].ba,addr); - //emit_movimm(start+i*4+8,alt); - emit_mov2imm_compact(cinfo[i].ba,addr,start+i*4+8,alt); - emit_test(s1l,s1l); - emit_cmovs_reg(alt,addr); - } - if(dops[i].opcode==0x11 && dops[i].opcode2==0x08 ) { - if(source[i]&0x10000) // BC1T - { - //emit_movimm(cinfo[i].ba,alt); - //emit_movimm(start+i*4+8,addr); - emit_mov2imm_compact(cinfo[i].ba,alt,start+i*4+8,addr); - emit_testimm(s1l,0x800000); - emit_cmovne_reg(alt,addr); - } - else // BC1F - { - //emit_movimm(cinfo[i].ba,addr); - //emit_movimm(start+i*4+8,alt); - emit_mov2imm_compact(cinfo[i].ba,addr,start+i*4+8,alt); - emit_testimm(s1l,0x800000); - emit_cmovne_reg(alt,addr); - } - } - emit_writeword(addr,&pcaddr); + emit_writeword(addr, &pcaddr); } else if(dops[i].itype==RJUMP) @@ -5953,7 +5938,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) if(adj&&!invert) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); { assert(s1l>=0); - if((dops[i].opcode2&0xf)==0) // BLTZ/BLTZAL + if ((dops[i].opcode2 & 1) == 0) // BLTZ/BLTZAL { emit_test(s1l,s1l); if(invert){ @@ -5964,7 +5949,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) emit_js(0); } } - if((dops[i].opcode2&0xf)==1) // BGEZ/BLTZAL + else // BGEZ/BGEZAL { emit_test(s1l,s1l); if(invert){ @@ -6019,34 +6004,29 @@ static void sjump_assemble(int i, const struct regstat *i_regs) // In-order execution (branch first) //printf("IOE\n"); void *nottaken = NULL; - if(dops[i].rt1==31) { - int rt,return_address; - rt=get_reg(branch_regs[i].regmap,31); - if(rt>=0) { + if (!unconditional) { + assert(s1l >= 0); + emit_test(s1l, s1l); + } + if (dops[i].rt1 == 31) { + int rt, return_address; + rt = get_reg(branch_regs[i].regmap,31); + if(rt >= 0) { // Save the PC even if the branch is not taken - return_address=start+i*4+8; - emit_movimm(return_address,rt); // PC into link register + return_address = start + i*4+8; + emit_movimm(return_address, rt); // PC into link register #ifdef IMM_PREFETCH emit_prefetch(hash_table_get(return_address)); #endif } } - if(!unconditional) { - //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); - assert(s1l>=0); - if((dops[i].opcode2&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL - { - emit_test(s1l,s1l); - nottaken=out; - emit_jns(DJT_1); - } - if((dops[i].opcode2&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL - { - emit_test(s1l,s1l); - nottaken=out; - emit_js(DJT_1); - } - } // if(!unconditional) + if (!unconditional) { + nottaken = out; + if (!(dops[i].opcode2 & 1)) // BLTZ/BLTZAL + emit_jns(DJT_1); + else // BGEZ/BGEZAL + emit_js(DJT_1); + } int adj; uint64_t ds_unneeded=branch_regs[i].u; ds_unneeded&=~((1LL<>14));break; case RJUMP: - if (dops[i].opcode==0x9&&dops[i].rt1!=31) + if (dops[i].opcode2 == 9 && dops[i].rt1 != 31) printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1); else printf (" %x: %s r%d\n",start+i*4,insn[i],dops[i].rs1); @@ -6628,7 +6608,7 @@ static noinline void pass1_disassemble(u_int pagelimit) for (i = 0; !done; i++) { - int force_prev_to_interpreter = 0; + int force_j_to_interpreter = 0; memset(&dops[i], 0, sizeof(dops[i])); memset(&cinfo[i], 0, sizeof(cinfo[i])); cinfo[i].ba = -1; @@ -6963,11 +6943,12 @@ static noinline void pass1_disassemble(u_int pagelimit) /* rare messy cases to just pass over to the interpreter */ if (i > 0 && dops[i-1].is_jump) { + j = i - 1; // branch in delay slot? if (dops[i].is_jump) { // don't handle first branch and call interpreter if it's hit SysPrintf("branch in DS @%08x (%08x)\n", start + i*4, start); - force_prev_to_interpreter = 1; + force_j_to_interpreter = 1; } // basic load delay detection through a branch else if (dops[i].is_delay_load && dops[i].rt1 != 0) { @@ -6975,29 +6956,32 @@ static noinline void pass1_disassemble(u_int pagelimit) if(0 <= t && t < i &&(dops[i].rt1==dops[t].rs1||dops[i].rt1==dops[t].rs2)&&dops[t].itype!=CJUMP&&dops[t].itype!=SJUMP) { // jump target wants DS result - potential load delay effect SysPrintf("load delay in DS @%08x (%08x)\n", start + i*4, start); - force_prev_to_interpreter = 1; + force_j_to_interpreter = 1; dops[t+1].bt=1; // expected return from interpreter } else if(i>=2&&dops[i-2].rt1==2&&dops[i].rt1==2&&dops[i].rs1!=2&&dops[i].rs2!=2&&dops[i-1].rs1!=2&&dops[i-1].rs2!=2&& !(i>=3&&dops[i-3].is_jump)) { // v0 overwrite like this is a sign of trouble, bail out SysPrintf("v0 overwrite @%08x (%08x)\n", start + i*4, start); - force_prev_to_interpreter = 1; + force_j_to_interpreter = 1; } } } else if (i > 0 && dops[i-1].is_delay_load && dops[i-1].rt1 != 0 && (dops[i].rs1 == dops[i-1].rt1 || dops[i].rs2 == dops[i-1].rt1)) { SysPrintf("load delay @%08x (%08x)\n", start + i*4, start); - force_prev_to_interpreter = 1; + for (j = i - 1; j > 0 && dops[j-1].is_delay_load; j--) + if (dops[j-1].rt1 != dops[i-1].rt1) + break; + force_j_to_interpreter = 1; } - if (force_prev_to_interpreter) { - memset(&dops[i-1], 0, sizeof(dops[i-1])); - dops[i-1].itype = INTCALL; - dops[i-1].rs1 = CCREG; - cinfo[i-1].ba = -1; + if (force_j_to_interpreter) { + memset(&dops[j], 0, sizeof(dops[j])); + dops[j].itype = INTCALL; + dops[j].rs1 = CCREG; + cinfo[j].ba = -1; done = 2; - i--; // don't compile the DS/problematic load/etc + i = j; // don't compile the problematic branch/load/etc } /* Is this the end of the block? */ @@ -7473,11 +7457,13 @@ static noinline void pass3_register_alloc(u_int addr) alloc_cc(¤t,i); dirty_reg(¤t,CCREG); alloc_reg(¤t,i,dops[i].rs1); - if (dops[i].rt1==31) { // BLTZAL/BGEZAL + if (dops[i].rt1 == 31) { // BLTZAL/BGEZAL alloc_reg(¤t,i,31); dirty_reg(¤t,31); } - if((dops[i].rs1&&(dops[i].rs1==dops[i+1].rt1||dops[i].rs1==dops[i+1].rt2)) // The delay slot overwrites the branch condition. + if ((dops[i].rs1 && + (dops[i].rs1==dops[i+1].rt1||dops[i].rs1==dops[i+1].rt2)) // The delay slot overwrites the branch condition. + ||(dops[i].rt1 == 31 && dops[i].rs1 == 31) // overwrites it's own condition ||(dops[i].rt1==31&&(dops[i+1].rs1==31||dops[i+1].rs2==31||dops[i+1].rt1==31||dops[i+1].rt2==31))) { // DS touches $ra // Allocate the branch condition registers instead. current.isconst=0; diff --git a/libpcsxcore/new_dynarec/patches/trace_drc_chk b/libpcsxcore/new_dynarec/patches/trace_drc_chk index 7d435323d..8de3ba1fc 100644 --- a/libpcsxcore/new_dynarec/patches/trace_drc_chk +++ b/libpcsxcore/new_dynarec/patches/trace_drc_chk @@ -1,5 +1,5 @@ diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c -index b71f8a8f..0a26f6f6 100644 +index 2d3348e8..a85d2cd4 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -318,7 +318,7 @@ static struct compile_info @@ -19,7 +19,7 @@ index b71f8a8f..0a26f6f6 100644 int m = cycle_multiplier_active; int s = (x >> 31) | 1; return (x * m + s * 50) / 100; -@@ -750,6 +751,9 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) +@@ -752,6 +753,9 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) // This is called from the recompiled JR/JALR instructions static void noinline *get_addr(u_int vaddr, int can_compile) { @@ -29,7 +29,7 @@ index b71f8a8f..0a26f6f6 100644 u_int start_page = get_page_prev(vaddr); u_int i, page, end_page = get_page(vaddr); void *found_clean = NULL; -@@ -7180,7 +7184,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) +@@ -7164,7 +7168,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) // R0 is always unneeded u|=1; // Save it @@ -38,7 +38,7 @@ index b71f8a8f..0a26f6f6 100644 gte_unneeded[i]=gte_u; /* printf("ur (%d,%d) %x: ",istart,iend,start+i*4); -@@ -8329,6 +8333,7 @@ static noinline void pass5a_preallocate1(void) +@@ -8315,6 +8319,7 @@ static noinline void pass5a_preallocate1(void) static noinline void pass5b_preallocate2(void) { int i, hr; @@ -46,22 +46,22 @@ index b71f8a8f..0a26f6f6 100644 for(i=0;i Date: Thu, 27 Jul 2023 03:09:34 +0300 Subject: [PATCH 289/597] drc: update according to the interpreter (3) --- libpcsxcore/new_dynarec/assem_arm.c | 3 +- libpcsxcore/new_dynarec/assem_arm64.c | 1 - libpcsxcore/new_dynarec/emu_if.c | 4 +- libpcsxcore/new_dynarec/linkage_arm.S | 25 +- libpcsxcore/new_dynarec/linkage_arm64.S | 25 +- libpcsxcore/new_dynarec/new_dynarec.c | 390 +++++++++++---------- libpcsxcore/new_dynarec/patches/trace_intr | 20 +- libpcsxcore/psxinterpreter.c | 1 + 8 files changed, 249 insertions(+), 220 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 2850d4e3a..a7bdfbda6 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -442,7 +442,6 @@ static void emit_loadreg(int r, int hr) //case HIREG: addr = &hi; break; //case LOREG: addr = &lo; break; case CCREG: addr = &cycle_count; break; - case CSREG: addr = &psxRegs.CP0.n.SR; break; case INVCP: addr = &invc_ptr; break; case ROREG: addr = &ram_offset; break; default: @@ -882,7 +881,7 @@ static void emit_cmovs_imm(int imm,int rt) output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval); } -static void emit_cmovne_reg(int rs,int rt) +static unused void emit_cmovne_reg(int rs,int rt) { assem_debug("movne %s,%s\n",regname[rt],regname[rs]); output_w32(0x11a00000|rd_rn_rm(rt,0,rs)); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 670f3799e..0a29eaf4d 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -484,7 +484,6 @@ static void emit_loadreg(u_int r, u_int hr) //case HIREG: addr = &hi; break; //case LOREG: addr = &lo; break; case CCREG: addr = &cycle_count; break; - case CSREG: addr = &psxRegs.CP0.n.SR; break; case INVCP: addr = &invc_ptr; is64 = 1; break; case ROREG: addr = &ram_offset; is64 = 1; break; default: diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 2590b3ab2..2862c5467 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -28,7 +28,9 @@ void pcsx_mtc0(u32 reg, u32 val) evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); MTC0(&psxRegs, reg, val); gen_interupt(&psxRegs.CP0); - if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.SR & 0x0300) // possible sw irq + + //if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.SR & 0x0300) // possible sw irq + if ((psxRegs.pc & 0x803ffeff) == 0x80000080) pending_exception = 1; } diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index a7f4d796b..6b429b08c 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -239,10 +239,9 @@ FUNCTION(cc_interrupt): add r10, r0, r10 str r1, [fp, #LO_pending_exception] str r10, [fp, #LO_cycle] /* PCSX cycles */ -@@ str r10, [fp, #LO_reg_cop0+36] /* Count - not on PSX */ mov r10, lr - add r0, fp, #(LO_psxRegs + 34*4) /* CP0 */ + add r0, fp, #LO_reg_cop0 /* CP0 */ bl gen_interupt mov lr, r10 ldr r10, [fp, #LO_cycle] @@ -298,7 +297,7 @@ call_psxException: str r2, [fp, #LO_pcaddr] add r10, r3, r10 str r10, [fp, #LO_cycle] /* PCSX cycles */ - add r2, fp, #(LO_psxRegs + 34*4) /* CP0 */ + add r2, fp, #LO_reg_cop0 /* CP0 */ bl psxException /* note: psxException might do recursive recompiler call from it's HLE code, @@ -518,7 +517,7 @@ FUNCTION(jump_handle_swl): mov r12,r0,lsr #12 ldr r3, [r3, r12, lsl #2] lsls r3, #1 - bcs 4f + bcs jump_handle_swx_interp add r3, r0, r3 mov r0, r2 tst r3, #2 @@ -541,11 +540,6 @@ FUNCTION(jump_handle_swl): strhne r1, [r3, #-1] strbeq r12, [r3] bx lr -4: - mov r0, r2 -@ b abort - bx lr @ TODO? - FUNCTION(jump_handle_swr): /* r0 = address, r1 = data, r2 = cycles */ @@ -553,7 +547,7 @@ FUNCTION(jump_handle_swr): mov r12,r0,lsr #12 ldr r3, [r3, r12, lsl #2] lsls r3, #1 - bcs 4f + bcs jump_handle_swx_interp add r3, r0, r3 and r12,r3, #3 mov r0, r2 @@ -567,11 +561,14 @@ FUNCTION(jump_handle_swr): strb r1, [r3] strh r2, [r3, #1] bx lr -4: - mov r0, r2 -@ b abort - bx lr @ TODO? +jump_handle_swx_interp: /* almost never happens */ + ldr r3, [fp, #LO_last_count] + add r0, fp, #LO_psxRegs + add r2, r3, r2 + str r2, [fp, #LO_cycle] /* PCSX cycles */ + bl execI + b jump_to_new_pc .macro rcntx_read_mode0 num /* r0 = address, r2 = cycles */ diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 8d11fcfa2..644b03564 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -97,10 +97,9 @@ FUNCTION(cc_interrupt): add rCC, w0, rCC str wzr, [rFP, #LO_pending_exception] str rCC, [rFP, #LO_cycle] /* PCSX cycles */ -# str rCC, [rFP, #LO_reg_cop0+36] /* Count */ mov x21, lr 1: - add x0, rFP, #(LO_psxRegs + 34*4) /* CP0 */ + add x0, rFP, #LO_reg_cop0 /* CP0 */ bl gen_interupt mov lr, x21 ldr rCC, [rFP, #LO_cycle] @@ -156,7 +155,7 @@ call_psxException: str w2, [rFP, #LO_pcaddr] add rCC, w3, rCC str rCC, [rFP, #LO_cycle] /* PCSX cycles */ - add x2, rFP, #(LO_psxRegs + 34*4) /* CP0 */ + add x2, rFP, #LO_reg_cop0 /* CP0 */ bl psxException /* note: psxException might do recursive recompiler call from it's HLE code, @@ -302,7 +301,7 @@ FUNCTION(jump_handle_swl): orr w4, wzr, w0, lsr #12 ldr x3, [x3, w4, uxtw #3] adds x3, x3, x3 - bcs 4f + bcs jump_handle_swx_interp add x3, x0, x3 mov w0, w2 tbz x3, #1, 10f // & 2 @@ -326,10 +325,6 @@ FUNCTION(jump_handle_swl): lsr w2, w1, #24 strb w2, [x3] ret -4: - mov w0, w2 // todo - bl abort - ret FUNCTION(jump_handle_swr): /* w0 = address, w1 = data, w2 = cycles */ @@ -337,7 +332,7 @@ FUNCTION(jump_handle_swr): orr w4, wzr, w0, lsr #12 ldr x3, [x3, w4, uxtw #3] adds x3, x3, x3 - bcs 4f + bcs jump_handle_swx_interp add x3, x0, x3 mov w0, w2 tbz x3, #1, 10f // & 2 @@ -358,10 +353,14 @@ FUNCTION(jump_handle_swr): 0: str w1, [x3] ret -4: - mov w0, w2 // todo - bl abort - ret + +jump_handle_swx_interp: /* almost never happens */ + ldr w3, [rFP, #LO_last_count] + add x0, rFP, #LO_psxRegs + add w2, w3, w2 + str w2, [rFP, #LO_cycle] /* PCSX cycles */ + bl execI + b jump_to_new_pc FUNCTION(call_gteStall): /* w0 = op_cycles, w1 = cycles */ diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 61b078fe4..e95d165c3 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -320,7 +320,7 @@ static struct compile_info #define HACK_ENABLED(x) ((new_dynarec_hacks | new_dynarec_hacks_pergame) & (x)) - extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 + extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 (CCREG) extern int last_count; // last absolute target, often = next_interupt extern int pcaddr; extern int pending_exception; @@ -333,7 +333,7 @@ static struct compile_info #define LOREG 32 // lo #define HIREG 33 // hi //#define FSREG 34 // FPU status (FCSR) -#define CSREG 35 // Coprocessor status +//#define CSREG 35 // Coprocessor status #define CCREG 36 // Cycle count #define INVCP 37 // Pointer to invalid_code //#define MMREG 38 // Pointer to memory_map @@ -419,7 +419,7 @@ static void load_regs_entry(int t); static void load_all_consts(const signed char regmap[], u_int dirty, int i); static u_int get_host_reglist(const signed char *regmap); -static int get_final_value(int hr, int i, int *value); +static int get_final_value(int hr, int i, u_int *value); static void add_stub(enum stub_type type, void *addr, void *retaddr, u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e); static void add_stub_r(enum stub_type type, void *addr, void *retaddr, @@ -748,6 +748,26 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) return NULL; } +// this doesn't normally happen +static noinline u_int generate_exception(u_int pc) +{ + //if (execBreakCheck(&psxRegs, pc)) + // return psxRegs.pc; + + // generate an address or bus error + psxRegs.CP0.n.Cause &= 0x300; + psxRegs.CP0.n.EPC = pc; + if (pc & 3) { + psxRegs.CP0.n.Cause |= R3000E_AdEL << 2; + psxRegs.CP0.n.BadVAddr = pc; +#ifdef DRC_DBG + last_count -= 2; +#endif + } else + psxRegs.CP0.n.Cause |= R3000E_IBE << 2; + return (psxRegs.pc = 0x80000080); +} + // Get address from virtual address // This is called from the recompiled JR/JALR instructions static void noinline *get_addr(u_int vaddr, int can_compile) @@ -782,22 +802,10 @@ static void noinline *get_addr(u_int vaddr, int can_compile) return NULL; int r = new_recompile_block(vaddr); - if (r == 0) + if (likely(r == 0)) return ndrc_get_addr_ht(vaddr); - // generate an address error -#ifdef DRC_DBG - last_count -= 2; -#endif - psxRegs.CP0.n.Cause &= 0x300; - psxRegs.CP0.n.EPC = vaddr; - if (vaddr & 3) { - psxRegs.CP0.n.Cause |= R3000E_AdEL << 2; - psxRegs.CP0.n.BadVAddr = vaddr; - } else - psxRegs.CP0.n.Cause |= R3000E_IBE << 2; - psxRegs.pc = 0x80000080; - return ndrc_get_addr_ht(0x80000080); + return ndrc_get_addr_ht(generate_exception(vaddr)); } // Look up address in hash table first @@ -1247,6 +1255,7 @@ static const char *fpofs_name(u_int ofs) switch (ofs) { #define ofscase(x) case LO_##x: return " ; " #x ofscase(next_interupt); + ofscase(cycle_count); ofscase(last_count); ofscase(pending_exception); ofscase(stop); @@ -1960,8 +1969,6 @@ static void shiftimm_alloc(struct regstat *current,int i) static void shift_alloc(struct regstat *current,int i) { if(dops[i].rt1) { - if(dops[i].opcode2<=0x07) // SLLV/SRLV/SRAV - { if(dops[i].rs1) alloc_reg(current,i,dops[i].rs1); if(dops[i].rs2) alloc_reg(current,i,dops[i].rs2); alloc_reg(current,i,dops[i].rt1); @@ -1969,9 +1976,6 @@ static void shift_alloc(struct regstat *current,int i) alloc_reg_temp(current,i,-1); cinfo[i].min_free_regs=1; } - } else { // DSLLV/DSRLV/DSRAV - assert(0); - } clear_const(current,dops[i].rs1); clear_const(current,dops[i].rs2); clear_const(current,dops[i].rt1); @@ -2154,17 +2158,11 @@ static void multdiv_alloc(struct regstat *current,int i) // case 0x19: MULTU // case 0x1A: DIV // case 0x1B: DIVU - // case 0x1C: DMULT - // case 0x1D: DMULTU - // case 0x1E: DDIV - // case 0x1F: DDIVU clear_const(current,dops[i].rs1); clear_const(current,dops[i].rs2); alloc_cc(current,i); // for stalls if(dops[i].rs1&&dops[i].rs2) { - if((dops[i].opcode2&4)==0) // 32-bit - { current->u&=~(1LL<u&=~(1LL<>11)&0x1e) == 12) { + alloc_cc(current, i); + dirty_reg(current, CCREG); + } if(dops[i].rs1){ clear_const(current,dops[i].rs1); alloc_reg(current,i,dops[i].rs1); @@ -3474,7 +3472,6 @@ static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) assert(addr >= 0); if(!c) { emit_cmpimm(addr, RAM_SIZE); - if (!offset && s != addr) emit_mov(s, addr); jaddr=out; emit_jno(0); } @@ -3488,10 +3485,6 @@ static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) if (ram_offset) offset_reg = get_ro_reg(i_regs, 0); - if (dops[i].opcode==0x2C||dops[i].opcode==0x2D) { // SDL/SDR - assert(0); - } - emit_testimm(addr,2); case23=out; emit_jne(0); @@ -3578,22 +3571,18 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) } else if(dops[i].opcode2==4) // MTC0 { - signed char s=get_reg(i_regs->regmap,dops[i].rs1); + int s = get_reg(i_regs->regmap, dops[i].rs1); + int cc = get_reg(i_regs->regmap, CCREG); char copr=(source[i]>>11)&0x1f; assert(s>=0); wb_register(dops[i].rs1,i_regs->regmap,i_regs->dirty); - if(copr==9||copr==11||copr==12||copr==13) { + if (copr == 12 || copr == 13) { emit_readword(&last_count,HOST_TEMPREG); - emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc - emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); - emit_addimm(HOST_CCREG,ccadj_,HOST_CCREG); - emit_writeword(HOST_CCREG,&psxRegs.cycle); - } - // What a mess. The status register (12) can enable interrupts, - // so needs a special case to handle a pending interrupt. - // The interrupt must be taken immediately, because a subsequent - // instruction might disable interrupts again. - if(copr==12||copr==13) { + if (cc != HOST_CCREG) + emit_loadreg(CCREG, HOST_CCREG); + emit_add(HOST_CCREG, HOST_TEMPREG, HOST_CCREG); + emit_addimm(HOST_CCREG, ccadj_ + 2, HOST_CCREG); + emit_writeword(HOST_CCREG, &psxRegs.cycle); if (is_delayslot) { // burn cycles to cause cc_interrupt, which will // reschedule next_interupt. Relies on CCREG from above. @@ -3612,31 +3601,27 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_movimm(0,HOST_TEMPREG); emit_writeword(HOST_TEMPREG,&pending_exception); } - if(s==HOST_CCREG) - emit_loadreg(dops[i].rs1,1); - else if(s!=1) - emit_mov(s,1); - emit_movimm(copr,0); + if( s != 1) + emit_mov(s, 1); + emit_movimm(copr, 0); emit_far_call(pcsx_mtc0); - if(copr==9||copr==11||copr==12||copr==13) { + if (copr == 12 || copr == 13) { emit_readword(&psxRegs.cycle,HOST_CCREG); - emit_readword(&next_interupt,HOST_TEMPREG); - emit_addimm(HOST_CCREG,-ccadj_,HOST_CCREG); + emit_readword(&last_count,HOST_TEMPREG); emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); - emit_writeword(HOST_TEMPREG,&last_count); - emit_storereg(CCREG,HOST_CCREG); - } - if(copr==12||copr==13) { + //emit_writeword(HOST_TEMPREG,&last_count); assert(!is_delayslot); emit_readword(&pending_exception,HOST_TEMPREG); emit_test(HOST_TEMPREG,HOST_TEMPREG); void *jaddr = out; emit_jeq(0); emit_readword(&pcaddr, 0); - emit_addimm(HOST_CCREG,2,HOST_CCREG); emit_far_call(ndrc_get_addr_ht); emit_jmpreg(0); set_jump_target(jaddr, out); + emit_addimm(HOST_CCREG, -ccadj_ - 2, HOST_CCREG); + if (cc != HOST_CCREG) + emit_storereg(CCREG, HOST_CCREG); } emit_loadreg(dops[i].rs1,s); } @@ -3968,7 +3953,6 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) s=get_reg(i_regs->regmap,dops[i].rs1); tl=get_reg(i_regs->regmap,FTEMP); offset=cinfo[i].imm; - assert(dops[i].rs1>0); assert(tl>=0); if(i_regs->regmap[HOST_CCREG]==CCREG) @@ -4119,6 +4103,8 @@ static void do_unalignedwritestub(int n) if(cc<0) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,(int)stubs[n].d+1,2); + emit_movimm(start + i*4,3); + emit_writeword(3,&psxRegs.pc); emit_far_call((dops[i].opcode==0x2a?jump_handle_swl:jump_handle_swr)); emit_addimm(0,-((int)stubs[n].d+1),cc<0?2:cc); if(cc<0) @@ -4286,7 +4272,8 @@ static void speculate_register_values(int i) // fallthrough case IMM16: if(dops[i].rt1&&is_const(®s[i],dops[i].rt1)) { - int value,hr=get_reg_w(regs[i].regmap, dops[i].rt1); + int hr = get_reg_w(regs[i].regmap, dops[i].rt1); + u_int value; if(hr>=0) { if(get_final_value(hr,i,&value)) smrv[dops[i].rt1]=value; @@ -4647,7 +4634,7 @@ static void address_generation(int i, const struct regstat *i_regs, signed char } } -static int get_final_value(int hr, int i, int *value) +static int get_final_value(int hr, int i, u_int *value) { int reg=regs[i].regmap[hr]; while(i>hr)&1)) { assert(regmap[hr]<64); if(((regs[i].isconst>>hr)&1)&®map[hr]>0) { - int value,similar=0; + u_int value, similar=0; if(get_final_value(hr,i,&value)) { // see if some other register has similar value for(hr2=0;hr2=0); return_address=start+i*4+8; if(rt>=0) { @@ -5367,7 +5358,8 @@ static void ujump_assemble_write_ra(int i) if(i_regmap[temp]!=PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp); } #endif - emit_movimm(return_address,rt); // PC into link register + if (!((regs[i].loadedconst >> rt) & 1)) + emit_movimm(return_address, rt); // PC into link register #ifdef IMM_PREFETCH emit_prefetch(hash_table_get(return_address)); #endif @@ -5377,7 +5369,6 @@ static void ujump_assemble_write_ra(int i) static void ujump_assemble(int i, const struct regstat *i_regs) { - int ra_done=0; if(i==(cinfo[i].ba-start)>>2) assem_debug("idle loop\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH @@ -5390,17 +5381,13 @@ static void ujump_assemble(int i, const struct regstat *i_regs) if(i_regmap[temp]==PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp); } #endif - if(dops[i].rt1==31&&(dops[i].rt1==dops[i+1].rs1||dops[i].rt1==dops[i+1].rs2)) { + if (dops[i].rt1 == 31) ujump_assemble_write_ra(i); // writeback ra for DS - ra_done=1; - } ds_assemble(i+1,i_regs); uint64_t bc_unneeded=branch_regs[i].u; bc_unneeded|=1|(1LL<=0); return_address=start+i*4+8; #ifdef REG_PREFETCH @@ -5439,7 +5424,8 @@ static void rjump_assemble_write_ra(int i) if(i_regmap[temp]!=PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp); } #endif - emit_movimm(return_address,rt); // PC into link register + if (!((regs[i].loadedconst >> rt) & 1)) + emit_movimm(return_address, rt); // PC into link register #ifdef IMM_PREFETCH emit_prefetch(hash_table_get(return_address)); #endif @@ -5449,7 +5435,6 @@ static void rjump_assemble(int i, const struct regstat *i_regs) { int temp; int rs,cc; - int ra_done=0; rs=get_reg(branch_regs[i].regmap,dops[i].rs1); assert(rs>=0); if (ds_writes_rjump_rs(i)) { @@ -5477,18 +5462,14 @@ static void rjump_assemble(int i, const struct regstat *i_regs) if(rh>=0) do_preload_rhash(rh); } #endif - if(dops[i].rt1!=0&&(dops[i].rt1==dops[i+1].rs1||dops[i].rt1==dops[i+1].rs2)) { + if (dops[i].rt1 != 0) rjump_assemble_write_ra(i); - ra_done=1; - } ds_assemble(i+1,i_regs); uint64_t bc_unneeded=branch_regs[i].u; bc_unneeded|=1|(1LL<=0) { // Save the PC even if the branch is not taken return_address=start+i*4+8; @@ -6004,7 +5985,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) // In-order execution (branch first) //printf("IOE\n"); void *nottaken = NULL; - if (!unconditional) { + if (!unconditional && !nevertaken) { assert(s1l >= 0); emit_test(s1l, s1l); } @@ -6020,7 +6001,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) #endif } } - if (!unconditional) { + if (!unconditional && !nevertaken) { nottaken = out; if (!(dops[i].opcode2 & 1)) // BLTZ/BLTZAL emit_jns(DJT_1); @@ -6067,7 +6048,10 @@ static void sjump_assemble(int i, const struct regstat *i_regs) } // branch not taken if(!unconditional) { - set_jump_target(nottaken, out); + if (!nevertaken) { + assert(nottaken); + set_jump_target(nottaken, out); + } assem_debug("1:\n"); wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,ds_unneeded); load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2); @@ -6601,26 +6585,36 @@ static int apply_hacks(void) return 0; } -static noinline void pass1_disassemble(u_int pagelimit) +static int is_ld_use_hazard(int ld_rt, const struct decoded_insn *op) { - int i, j, done = 0, ni_count = 0; - unsigned int type,op,op2,op3; + return ld_rt != 0 && (ld_rt == op->rs1 || ld_rt == op->rs2) + && op->itype != LOADLR && op->itype != CJUMP && op->itype != SJUMP; +} - for (i = 0; !done; i++) - { - int force_j_to_interpreter = 0; +static void force_intcall(int i) +{ + memset(&dops[i], 0, sizeof(dops[i])); + dops[i].itype = INTCALL; + dops[i].rs1 = CCREG; + dops[i].is_exception = 1; + cinfo[i].ba = -1; +} + +static void disassemble_one(int i, u_int src) +{ + unsigned int type, op, op2, op3; memset(&dops[i], 0, sizeof(dops[i])); memset(&cinfo[i], 0, sizeof(cinfo[i])); cinfo[i].ba = -1; cinfo[i].addr = -1; - dops[i].opcode = op = source[i] >> 26; + dops[i].opcode = op = src >> 26; op2 = 0; type = INTCALL; set_mnemonic(i, "???"); switch(op) { case 0x00: set_mnemonic(i, "special"); - op2=source[i]&0x3f; + op2 = src & 0x3f; switch(op2) { case 0x00: set_mnemonic(i, "SLL"); type=SHIFTIMM; break; @@ -6633,7 +6627,6 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x09: set_mnemonic(i, "JALR"); type=RJUMP; break; case 0x0C: set_mnemonic(i, "SYSCALL"); type=SYSCALL; break; case 0x0D: set_mnemonic(i, "BREAK"); type=SYSCALL; break; - case 0x0F: set_mnemonic(i, "SYNC"); type=OTHER; break; case 0x10: set_mnemonic(i, "MFHI"); type=MOV; break; case 0x11: set_mnemonic(i, "MTHI"); type=MOV; break; case 0x12: set_mnemonic(i, "MFLO"); type=MOV; break; @@ -6656,7 +6649,7 @@ static noinline void pass1_disassemble(u_int pagelimit) break; case 0x01: set_mnemonic(i, "regimm"); type = SJUMP; - op2 = (source[i] >> 16) & 0x1f; + op2 = (src >> 16) & 0x1f; switch(op2) { case 0x10: set_mnemonic(i, "BLTZAL"); break; @@ -6683,9 +6676,9 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x0E: set_mnemonic(i, "XORI"); type=IMM16; break; case 0x0F: set_mnemonic(i, "LUI"); type=IMM16; break; case 0x10: set_mnemonic(i, "COP0"); - op2 = (source[i]>>21) & 0x1f; + op2 = (src >> 21) & 0x1f; if (op2 & 0x10) { - op3 = source[i] & 0x1f; + op3 = src & 0x1f; switch (op3) { case 0x01: case 0x02: case 0x06: case 0x08: type = INTCALL; break; @@ -6699,7 +6692,7 @@ static noinline void pass1_disassemble(u_int pagelimit) u32 rd; case 0x00: set_mnemonic(i, "MFC0"); - rd = (source[i] >> 11) & 0x1F; + rd = (src >> 11) & 0x1F; if (!(0x00000417u & (1u << rd))) type = COP0; break; @@ -6710,18 +6703,18 @@ static noinline void pass1_disassemble(u_int pagelimit) } break; case 0x11: set_mnemonic(i, "COP1"); - op2=(source[i]>>21)&0x1f; + op2 = (src >> 21) & 0x1f; break; case 0x12: set_mnemonic(i, "COP2"); - op2=(source[i]>>21)&0x1f; + op2 = (src >> 21) & 0x1f; if (op2 & 0x10) { type = OTHER; - if (gte_handlers[source[i]&0x3f]!=NULL) { + if (gte_handlers[src & 0x3f] != NULL) { #ifdef DISASM - if (gte_regnames[source[i]&0x3f]!=NULL) - strcpy(insn[i],gte_regnames[source[i]&0x3f]); + if (gte_regnames[src & 0x3f] != NULL) + strcpy(insn[i], gte_regnames[src & 0x3f]); else - snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f); + snprintf(insn[i], sizeof(insn[i]), "COP2 %x", src & 0x3f); #endif type = C2OP; } @@ -6735,7 +6728,7 @@ static noinline void pass1_disassemble(u_int pagelimit) } break; case 0x13: set_mnemonic(i, "COP3"); - op2=(source[i]>>21)&0x1f; + op2 = (src >> 21) & 0x1f; break; case 0x20: set_mnemonic(i, "LB"); type=LOAD; break; case 0x21: set_mnemonic(i, "LH"); type=LOAD; break; @@ -6752,7 +6745,7 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x32: set_mnemonic(i, "LWC2"); type=C2LS; break; case 0x3A: set_mnemonic(i, "SWC2"); type=C2LS; break; case 0x3B: - if (Config.HLE && (source[i] & 0x03ffffff) < ARRAY_SIZE(psxHLEt)) { + if (Config.HLE && (src & 0x03ffffff) < ARRAY_SIZE(psxHLEt)) { set_mnemonic(i, "HLECALL"); type = HLECALL; } @@ -6761,7 +6754,7 @@ static noinline void pass1_disassemble(u_int pagelimit) break; } if (type == INTCALL) - SysPrintf("NI %08x @%08x (%08x)\n", source[i], start + i*4, start); + SysPrintf("NI %08x @%08x (%08x)\n", src, start + i*4, start); dops[i].itype=type; dops[i].opcode2=op2; /* Get registers/immediates */ @@ -6773,33 +6766,33 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rt2 = 0; switch(type) { case LOAD: - dops[i].rs1=(source[i]>>21)&0x1f; - dops[i].rt1=(source[i]>>16)&0x1f; - cinfo[i].imm=(short)source[i]; + dops[i].rs1 = (src >> 21) & 0x1f; + dops[i].rt1 = (src >> 16) & 0x1f; + cinfo[i].imm = (short)src; break; case STORE: case STORELR: - dops[i].rs1=(source[i]>>21)&0x1f; - dops[i].rs2=(source[i]>>16)&0x1f; - cinfo[i].imm=(short)source[i]; + dops[i].rs1 = (src >> 21) & 0x1f; + dops[i].rs2 = (src >> 16) & 0x1f; + cinfo[i].imm = (short)src; break; case LOADLR: // LWL/LWR only load part of the register, // therefore the target register must be treated as a source too - dops[i].rs1=(source[i]>>21)&0x1f; - dops[i].rs2=(source[i]>>16)&0x1f; - dops[i].rt1=(source[i]>>16)&0x1f; - cinfo[i].imm=(short)source[i]; + dops[i].rs1 = (src >> 21) & 0x1f; + dops[i].rs2 = (src >> 16) & 0x1f; + dops[i].rt1 = (src >> 16) & 0x1f; + cinfo[i].imm = (short)src; break; case IMM16: if (op==0x0f) dops[i].rs1=0; // LUI instruction has no source register - else dops[i].rs1=(source[i]>>21)&0x1f; - dops[i].rs2=0; - dops[i].rt1=(source[i]>>16)&0x1f; + else dops[i].rs1 = (src >> 21) & 0x1f; + dops[i].rs2 = 0; + dops[i].rt1 = (src >> 16) & 0x1f; if(op>=0x0c&&op<=0x0e) { // ANDI/ORI/XORI - cinfo[i].imm=(unsigned short)source[i]; + cinfo[i].imm = (unsigned short)src; }else{ - cinfo[i].imm=(short)source[i]; + cinfo[i].imm = (short)src; } break; case UJUMP: @@ -6810,36 +6803,36 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rs2=CCREG; break; case RJUMP: - dops[i].rs1=(source[i]>>21)&0x1f; + dops[i].rs1 = (src >> 21) & 0x1f; // The JALR instruction writes to rd. if (op2&1) { - dops[i].rt1=(source[i]>>11)&0x1f; + dops[i].rt1 = (src >> 11) & 0x1f; } dops[i].rs2=CCREG; break; case CJUMP: - dops[i].rs1=(source[i]>>21)&0x1f; - dops[i].rs2=(source[i]>>16)&0x1f; + dops[i].rs1 = (src >> 21) & 0x1f; + dops[i].rs2 = (src >> 16) & 0x1f; if(op&2) { // BGTZ/BLEZ dops[i].rs2=0; } break; case SJUMP: - dops[i].rs1=(source[i]>>21)&0x1f; - dops[i].rs2=CCREG; + dops[i].rs1 = (src >> 21) & 0x1f; + dops[i].rs2 = CCREG; if (op2 == 0x10 || op2 == 0x11) { // BxxAL dops[i].rt1 = 31; // NOTE: If the branch is not taken, r31 is still overwritten } break; case ALU: - dops[i].rs1=(source[i]>>21)&0x1f; // source - dops[i].rs2=(source[i]>>16)&0x1f; // subtract amount - dops[i].rt1=(source[i]>>11)&0x1f; // destination + dops[i].rs1=(src>>21)&0x1f; // source + dops[i].rs2=(src>>16)&0x1f; // subtract amount + dops[i].rt1=(src>>11)&0x1f; // destination break; case MULTDIV: - dops[i].rs1=(source[i]>>21)&0x1f; // source - dops[i].rs2=(source[i]>>16)&0x1f; // divisor + dops[i].rs1=(src>>21)&0x1f; // source + dops[i].rs2=(src>>16)&0x1f; // divisor dops[i].rt1=HIREG; dops[i].rt2=LOREG; break; @@ -6848,30 +6841,29 @@ static noinline void pass1_disassemble(u_int pagelimit) if(op2==0x11) dops[i].rt1=HIREG; // MTHI if(op2==0x12) dops[i].rs1=LOREG; // MFLO if(op2==0x13) dops[i].rt1=LOREG; // MTLO - if((op2&0x1d)==0x10) dops[i].rt1=(source[i]>>11)&0x1f; // MFxx - if((op2&0x1d)==0x11) dops[i].rs1=(source[i]>>21)&0x1f; // MTxx + if((op2&0x1d)==0x10) dops[i].rt1=(src>>11)&0x1f; // MFxx + if((op2&0x1d)==0x11) dops[i].rs1=(src>>21)&0x1f; // MTxx break; case SHIFT: - dops[i].rs1=(source[i]>>16)&0x1f; // target of shift - dops[i].rs2=(source[i]>>21)&0x1f; // shift amount - dops[i].rt1=(source[i]>>11)&0x1f; // destination + dops[i].rs1=(src>>16)&0x1f; // target of shift + dops[i].rs2=(src>>21)&0x1f; // shift amount + dops[i].rt1=(src>>11)&0x1f; // destination break; case SHIFTIMM: - dops[i].rs1=(source[i]>>16)&0x1f; + dops[i].rs1=(src>>16)&0x1f; dops[i].rs2=0; - dops[i].rt1=(source[i]>>11)&0x1f; - cinfo[i].imm=(source[i]>>6)&0x1f; + dops[i].rt1=(src>>11)&0x1f; + cinfo[i].imm=(src>>6)&0x1f; break; case COP0: - if(op2==0) dops[i].rt1=(source[i]>>16)&0x1F; // MFC0 - if(op2==4) dops[i].rs1=(source[i]>>16)&0x1F; // MTC0 - if(op2==4&&((source[i]>>11)&0x1f)==12) dops[i].rt2=CSREG; // Status + if(op2==0) dops[i].rt1=(src>>16)&0x1F; // MFC0 + if(op2==4) dops[i].rs1=(src>>16)&0x1F; // MTC0 + if(op2==4&&((src>>11)&0x1e)==12) dops[i].rs2=CCREG; break; case COP2: - if(op2<3) dops[i].rt1=(source[i]>>16)&0x1F; // MFC2/CFC2 - if(op2>3) dops[i].rs1=(source[i]>>16)&0x1F; // MTC2/CTC2 - dops[i].rs2=CSREG; - int gr=(source[i]>>11)&0x1F; + if(op2<3) dops[i].rt1=(src>>16)&0x1F; // MFC2/CFC2 + if(op2>3) dops[i].rs1=(src>>16)&0x1F; // MTC2/CTC2 + int gr=(src>>11)&0x1F; switch(op2) { case 0x00: gte_rs[i]=1ll<>21)&0x1F; - cinfo[i].imm=(short)source[i]; - if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2 - else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2 + dops[i].rs1=(src>>21)&0x1F; + cinfo[i].imm=(short)src; + if(op==0x32) gte_rt[i]=1ll<<((src>>16)&0x1F); // LWC2 + else gte_rs[i]=1ll<<((src>>16)&0x1F); // SWC2 break; case C2OP: - gte_rs[i]=gte_reg_reads[source[i]&0x3f]; - gte_rt[i]=gte_reg_writes[source[i]&0x3f]; + gte_rs[i]=gte_reg_reads[src&0x3f]; + gte_rt[i]=gte_reg_writes[src&0x3f]; gte_rt[i]|=1ll<<63; // every op changes flags - if((source[i]&0x3f)==GTE_MVMVA) { - int v = (source[i] >> 15) & 3; + if((src&0x3f)==GTE_MVMVA) { + int v = (src >> 15) & 3; gte_rs[i]&=~0xe3fll; if(v==3) gte_rs[i]|=0xe00ll; else gte_rs[i]|=3ll<<(v*2); @@ -6905,6 +6897,22 @@ static noinline void pass1_disassemble(u_int pagelimit) default: break; } +} + +static noinline void pass1_disassemble(u_int pagelimit) +{ + int i, j, done = 0, ni_count = 0; + + for (i = 0; !done; i++) + { + int force_j_to_interpreter = 0; + unsigned int type, op, op2; + + disassemble_one(i, source[i]); + type = dops[i].itype; + op = dops[i].opcode; + op2 = dops[i].opcode2; + /* Calculate branch target addresses */ if(type==UJUMP) cinfo[i].ba=((start+i*4+4)&0xF0000000)|(((unsigned int)source[i]<<6)>>4); @@ -6950,14 +6958,30 @@ static noinline void pass1_disassemble(u_int pagelimit) SysPrintf("branch in DS @%08x (%08x)\n", start + i*4, start); force_j_to_interpreter = 1; } - // basic load delay detection through a branch + // load delay detection through a branch else if (dops[i].is_delay_load && dops[i].rt1 != 0) { - int t=(cinfo[i-1].ba-start)/4; - if(0 <= t && t < i &&(dops[i].rt1==dops[t].rs1||dops[i].rt1==dops[t].rs2)&&dops[t].itype!=CJUMP&&dops[t].itype!=SJUMP) { + const struct decoded_insn *dop = NULL; + int t = -1; + if (cinfo[i-1].ba != -1) { + t = (cinfo[i-1].ba - start) / 4; + if (t < 0 || t > i) { + u_int limit = 0; + u_int *mem = get_source_start(cinfo[i-1].ba, &limit); + if (mem != NULL) { + disassemble_one(MAXBLOCK - 1, mem[0]); + dop = &dops[MAXBLOCK - 1]; + } + } + else + dop = &dops[t]; + } + if ((dop && is_ld_use_hazard(dops[i].rt1, dop)) + || (!dop && Config.PreciseExceptions)) { // jump target wants DS result - potential load delay effect SysPrintf("load delay in DS @%08x (%08x)\n", start + i*4, start); force_j_to_interpreter = 1; - dops[t+1].bt=1; // expected return from interpreter + if (0 <= t && t < i) + dops[t + 1].bt = 1; // expected return from interpreter } else if(i>=2&&dops[i-2].rt1==2&&dops[i].rt1==2&&dops[i].rs1!=2&&dops[i].rs2!=2&&dops[i-1].rs1!=2&&dops[i-1].rs2!=2&& !(i>=3&&dops[i-3].is_jump)) { @@ -6967,8 +6991,9 @@ static noinline void pass1_disassemble(u_int pagelimit) } } } - else if (i > 0 && dops[i-1].is_delay_load && dops[i-1].rt1 != 0 - && (dops[i].rs1 == dops[i-1].rt1 || dops[i].rs2 == dops[i-1].rt1)) { + else if (i > 0 && dops[i-1].is_delay_load + && is_ld_use_hazard(dops[i-1].rt1, &dops[i]) + && (i < 2 || !dops[i-2].is_ujump)) { SysPrintf("load delay @%08x (%08x)\n", start + i*4, start); for (j = i - 1; j > 0 && dops[j-1].is_delay_load; j--) if (dops[j-1].rt1 != dops[i-1].rt1) @@ -6976,13 +7001,20 @@ static noinline void pass1_disassemble(u_int pagelimit) force_j_to_interpreter = 1; } if (force_j_to_interpreter) { - memset(&dops[j], 0, sizeof(dops[j])); - dops[j].itype = INTCALL; - dops[j].rs1 = CCREG; - cinfo[j].ba = -1; + force_intcall(j); done = 2; i = j; // don't compile the problematic branch/load/etc } + if (dops[i].is_exception && i > 0 && dops[i-1].is_jump) { + SysPrintf("exception in DS @%08x (%08x)\n", start + i*4, start); + i--; + force_intcall(i); + done = 2; + } + if (i >= 2 && (source[i-2] & 0xffe0f800) == 0x40806000) // MTC0 $12 + dops[i].bt = 1; + if (i >= 1 && (source[i-1] & 0xffe0f800) == 0x40806800) // MTC0 $13 + dops[i].bt = 1; /* Is this the end of the block? */ if (i > 0 && dops[i-1].is_ujump) { @@ -7350,8 +7382,6 @@ static noinline void pass3_register_alloc(u_int addr) if (dops[i].rt1!=0) { alloc_reg(¤t,i,dops[i].rt1); dirty_reg(¤t,dops[i].rt1); - assert(dops[i+1].rs1!=dops[i].rt1&&dops[i+1].rs2!=dops[i].rt1); - assert(dops[i+1].rt1!=dops[i].rt1); #ifdef REG_PREFETCH alloc_reg(¤t,i,PTEMP); #endif @@ -7937,7 +7967,7 @@ static noinline void pass4_cull_unused_regs(void) } } // Cycle count is needed at branches. Assume it is needed at the target too. - if(i==0||dops[i].bt||dops[i].itype==CJUMP) { + if (i == 0 || dops[i].bt || dops[i].may_except || dops[i].itype == CJUMP) { if(regmap_pre[i][HOST_CCREG]==CCREG) nr|=1<pc; @@ -178,6 +178,7 @@ index be15f782..6f07478f 100644 regs->code = fetch(regs, memRLUT, pc); psxBSC[regs->code >> 26](regs, regs->code); + psxRegs.cycle += 2; ++ fetchNoCache(regs, memRLUT, regs->pc); // bus err check } static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { @@ -188,15 +189,16 @@ index be15f782..6f07478f 100644 dloadStep(regs); if (execBreakCheck(regs, pc)) -@@ -1195,6 +1198,7 @@ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { +@@ -1187,6 +1191,8 @@ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { regs->pc += 4; regs->code = fetch(regs, memRLUT, pc); psxBSC[regs->code >> 26](regs, regs->code); + psxRegs.cycle += 2; ++ fetchNoCache(regs, memRLUT, regs->pc); // bus err check } static void intExecute() { -@@ -1224,6 +1228,30 @@ void intExecuteBlock(enum blockExecCaller caller) { +@@ -1216,6 +1222,30 @@ void intExecuteBlock(enum blockExecCaller caller) { execI_(memRLUT, regs_); } @@ -227,7 +229,7 @@ index be15f782..6f07478f 100644 static void intClear(u32 Addr, u32 Size) { } -@@ -1271,7 +1299,7 @@ void intApplyConfig() { +@@ -1263,7 +1293,7 @@ void intApplyConfig() { assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall); assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall); diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index e212d8a99..f473ddf6b 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -1340,6 +1340,7 @@ static void intShutdown() { } // single step (may do several ops in case of a branch or load delay) +// called by asm/dynarec void execI(psxRegisters *regs) { do { execIbp(psxMemRLUT, regs); From 90f98e7cf5ed4fdabf6b6ff16d6886fef9dc7bbc Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 30 Jul 2023 00:08:55 +0300 Subject: [PATCH 290/597] drc: try to prevent wrong eviction --- libpcsxcore/new_dynarec/assem_arm.c | 22 +- libpcsxcore/new_dynarec/assem_arm64.c | 12 +- libpcsxcore/new_dynarec/emu_if.c | 1 + libpcsxcore/new_dynarec/new_dynarec.c | 294 ++++++++------------- libpcsxcore/new_dynarec/patches/trace_intr | 17 +- 5 files changed, 145 insertions(+), 201 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index a7bdfbda6..bdb81b4d5 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -217,16 +217,26 @@ static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr) } } - cur->regmap[hr]=reg; - cur->dirty&=~(1<dirty|=dirty<isconst&=~(1<regmap[hr] < 0 || !((cur->noevict >> hr) & 1)); + cur->regmap[hr] = reg; + cur->dirty &= ~(1 << hr); + cur->dirty |= dirty << hr; + cur->isconst &= ~(1u << hr); + cur->noevict |= 1u << hr; } // Alloc cycle count into dedicated register -static void alloc_cc(struct regstat *cur,int i) +static void alloc_cc(struct regstat *cur, int i) { - alloc_arm_reg(cur,i,CCREG,HOST_CCREG); + alloc_arm_reg(cur, i, CCREG, HOST_CCREG); +} + +static void alloc_cc_optional(struct regstat *cur, int i) +{ + if (cur->regmap[HOST_CCREG] < 0) { + alloc_arm_reg(cur, i, CCREG, HOST_CCREG); + cur->noevict &= ~(1u << HOST_CCREG); + } } /* Assembler */ diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 0a29eaf4d..dc5bb4db4 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -101,9 +101,17 @@ static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr) } // Alloc cycle count into dedicated register -static void alloc_cc(struct regstat *cur,int i) +static void alloc_cc(struct regstat *cur, int i) { - alloc_arm_reg(cur,i,CCREG,HOST_CCREG); + alloc_arm_reg(cur, i, CCREG, HOST_CCREG); +} + +static void alloc_cc_optional(struct regstat *cur, int i) +{ + if (cur->regmap[HOST_CCREG] < 0) { + alloc_arm_reg(cur, i, CCREG, HOST_CCREG); + cur->noevict &= ~(1u << HOST_CCREG); + } } /* Special alloc */ diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 2862c5467..06612dbf9 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -298,6 +298,7 @@ static void ari64_notify(enum R3000Anote note, void *data) { break; case R3000ACPU_NOTIFY_AFTER_LOAD: ari64_reset(); + psxInt.Notify(note, data); break; } } diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index e95d165c3..00d307b32 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -164,6 +164,7 @@ struct regstat u_int wasconst; // before; for example 'lw r2, (r2)' wasconst is true u_int isconst; // ... but isconst is false when r2 is known (hr) u_int loadedconst; // host regs that have constants loaded + u_int noevict; // can't evict this hr (alloced by current op) //u_int waswritten; // MIPS regs that were used as store base before }; @@ -982,7 +983,7 @@ static uint32_t get_const(const struct regstat *cur, signed char reg) // Least soon needed registers // Look at the next ten instructions and see which registers // will be used. Try not to reallocate these. -static void lsn(u_char hsn[], int i, int *preferred_reg) +static void lsn(u_char hsn[], int i) { int j; int b=-1; @@ -1656,6 +1657,72 @@ void ndrc_add_jump_out(u_int vaddr, void *src) /* Register allocation */ +static void alloc_set(struct regstat *cur, int reg, int hr) +{ + cur->regmap[hr] = reg; + cur->dirty &= ~(1u << hr); + cur->isconst &= ~(1u << hr); + cur->noevict |= 1u << hr; +} + +static void evict_alloc_reg(struct regstat *cur, int i, int reg, int preferred_hr) +{ + u_char hsn[MAXREG+1]; + int j, r, hr; + memset(hsn, 10, sizeof(hsn)); + lsn(hsn, i); + //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); + if(i>0) { + // Don't evict the cycle count at entry points, otherwise the entry + // stub will have to write it. + if(dops[i].bt&&hsn[CCREG]>2) hsn[CCREG]=2; + if (i>1 && hsn[CCREG] > 2 && dops[i-2].is_jump) hsn[CCREG]=2; + for(j=10;j>=3;j--) + { + // Alloc preferred register if available + if (!((cur->noevict >> preferred_hr) & 1) + && hsn[cur->regmap[preferred_hr]] == j) + { + alloc_set(cur, reg, preferred_hr); + return; + } + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j&&r!=dops[i-1].rs1&&r!=dops[i-1].rs2&&r!=dops[i-1].rt1&&r!=dops[i-1].rt2) { + for(hr=0;hrnoevict >> hr) & 1)) + continue; + if(hr!=HOST_CCREG||jregmap[hr]==r) { + alloc_set(cur, reg, hr); + return; + } + } + } + } + } + } + } + for(j=10;j>=0;j--) + { + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j) { + for(hr=0;hrnoevict >> hr) & 1)) + continue; + if(cur->regmap[hr]==r) { + alloc_set(cur, reg, hr); + return; + } + } + } + } + } + SysPrintf("This shouldn't happen (evict_alloc_reg)\n"); + abort(); +} + // Note: registers are allocated clean (unmodified state) // if you intend to modify the register, you must call dirty_reg(). static void alloc_reg(struct regstat *cur,int i,signed char reg) @@ -1672,25 +1739,23 @@ static void alloc_reg(struct regstat *cur,int i,signed char reg) if((cur->u>>reg)&1) return; // see if it's already allocated - if (get_reg(cur->regmap, reg) >= 0) + if ((hr = get_reg(cur->regmap, reg)) >= 0) { + cur->noevict |= 1u << hr; return; + } // Keep the same mapping if the register was already allocated in a loop preferred_reg = loop_reg(i,reg,preferred_reg); // Try to allocate the preferred register - if(cur->regmap[preferred_reg]==-1) { - cur->regmap[preferred_reg]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg] == -1) { + alloc_set(cur, reg, preferred_reg); return; } r=cur->regmap[preferred_reg]; assert(r < 64); if((cur->u>>r)&1) { - cur->regmap[preferred_reg]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr] < 0) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]); - //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); - if(i>0) { - // Don't evict the cycle count at entry points, otherwise the entry - // stub will have to write it. - if(dops[i].bt&&hsn[CCREG]>2) hsn[CCREG]=2; - if (i>1 && hsn[CCREG] > 2 && dops[i-2].is_jump) hsn[CCREG]=2; - for(j=10;j>=3;j--) - { - // Alloc preferred register if available - if(hsn[r=cur->regmap[preferred_reg]&63]==j) { - for(hr=0;hrregmap[hr]==r) { - cur->regmap[hr]=-1; - cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]=reg; - return; - } - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j&&r!=dops[i-1].rs1&&r!=dops[i-1].rs2&&r!=dops[i-1].rt1&&r!=dops[i-1].rt2) { - for(hr=0;hrregmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<=0;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j) { - for(hr=0;hrregmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==reg) return; + if (hr != EXCLUDE_REG && cur->regmap[hr] == reg) { + cur->noevict |= 1u << hr; + return; + } } // Try to allocate any available register for(hr=HOST_REGS-1;hr>=0;hr--) { if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<u>>r)&1) { if(i==0||((unneeded_reg[i-1]>>r)&1)) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); - if(i>0) { - // Don't evict the cycle count at entry points, otherwise the entry - // stub will have to write it. - if(dops[i].bt&&hsn[CCREG]>2) hsn[CCREG]=2; - if (i>1 && hsn[CCREG] > 2 && dops[i-2].is_jump) hsn[CCREG]=2; - for(j=10;j>=3;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j&&r!=dops[i-1].rs1&&r!=dops[i-1].rs2&&r!=dops[i-1].rt1&&r!=dops[i-1].rt2) { - for(hr=0;hr2) { - if(cur->regmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<=0;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j) { - for(hr=0;hrregmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<u>>dops[i].rt1)&1)) { @@ -2106,7 +2058,8 @@ static void load_alloc(struct regstat *current,int i) } } -static void store_alloc(struct regstat *current,int i) +// this may eat up to 7 registers +static void store_alloc(struct regstat *current, int i) { clear_const(current,dops[i].rs2); if(!(dops[i].rs2)) current->u&=~1LL; // Allow allocating r0 if necessary @@ -2121,16 +2074,14 @@ static void store_alloc(struct regstat *current,int i) if (dops[i].opcode == 0x2a || dops[i].opcode == 0x2e) { // SWL/SWL alloc_reg(current,i,FTEMP); } - if (dops[i].may_except) { - alloc_cc(current, i); // for exceptions - dirty_reg(current, CCREG); - } + if (dops[i].may_except) + alloc_cc_optional(current, i); // for exceptions // We need a temporary register for address generation alloc_reg_temp(current,i,-1); cinfo[i].min_free_regs=1; } -static void c2ls_alloc(struct regstat *current,int i) +static void c2ls_alloc(struct regstat *current, int i) { clear_const(current,dops[i].rt1); if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); @@ -2142,10 +2093,8 @@ static void c2ls_alloc(struct regstat *current,int i) if (dops[i].opcode == 0x3a) // SWC2 alloc_reg(current,i,INVCP); #endif - if (dops[i].may_except) { - alloc_cc(current, i); // for exceptions - dirty_reg(current, CCREG); - } + if (dops[i].may_except) + alloc_cc_optional(current, i); // for exceptions // We need a temporary register for address generation alloc_reg_temp(current,i,-1); cinfo[i].min_free_regs=1; @@ -2161,6 +2110,7 @@ static void multdiv_alloc(struct regstat *current,int i) clear_const(current,dops[i].rs1); clear_const(current,dops[i].rs2); alloc_cc(current,i); // for stalls + dirty_reg(current,CCREG); if(dops[i].rs1&&dops[i].rs2) { current->u&=~(1LL<regmap); if (do_oflow) assert(tmp >= 0); - //if (t < 0 && do_oflow) // broken s2 - // t = tmp; + if (t < 0 && do_oflow) + t = tmp; if (t >= 0) { s1 = get_reg(i_regs->regmap, dops[i].rs1); s2 = get_reg(i_regs->regmap, dops[i].rs2); @@ -7190,15 +7140,6 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) } } } - else if(dops[i].may_except) - { - // SYSCALL instruction, etc or conditional exception - u=1; - } - else if (dops[i].itype == RFE) - { - u=1; - } //u=1; // DEBUG // Written registers are unneeded u|=1LL< Date: Sun, 30 Jul 2023 02:31:02 +0300 Subject: [PATCH 291/597] dma: don't copy out of range it should wrap, but that's not practical so just limit for now libretro/pcsx_rearmed#740 --- libpcsxcore/cdrom.c | 6 ++++-- libpcsxcore/psxdma.c | 38 ++++++++++++++++++++++---------------- libpcsxcore/psxdma.h | 11 +++++++++++ 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index c092f2ca6..7bc57cff8 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1565,7 +1565,7 @@ void cdrWrite3(unsigned char rt) { } void psxDma3(u32 madr, u32 bcr, u32 chcr) { - u32 cdsize; + u32 cdsize, max_words; int size; u8 *ptr; @@ -1580,7 +1580,7 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { switch (chcr & 0x71000000) { case 0x11000000: - ptr = (u8 *)PSXM(madr); + ptr = getDmaRam(madr, &max_words); if (ptr == INVALID_PTR) { CDR_LOG_I("psxDma3() Log: *** DMA 3 *** NULL Pointer!\n"); break; @@ -1597,6 +1597,8 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { size = DATA_SIZE - cdr.FifoOffset; if (size > cdsize) size = cdsize; + if (size > max_words * 4) + size = max_words * 4; if (size > 0) { memcpy(ptr, cdr.Transfer + cdr.FifoOffset, size); diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index 30aa9bdb9..e15f0185d 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -24,6 +24,10 @@ #include "psxdma.h" #include "gpu.h" +#ifndef min +#define min(a, b) ((b) < (a) ? (b) : (a)) +#endif + // Dma0/1 in Mdec.c // Dma3 in CdRom.c @@ -36,15 +40,15 @@ void spuInterrupt() { } void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU + u32 words, words_max, size; u16 *ptr; - u32 words; switch (chcr) { case 0x01000201: //cpu to spu transfer #ifdef PSXDMA_LOG PSXDMA_LOG("*** DMA4 SPU - mem2spu *** %x addr = %x size = %x\n", chcr, madr, bcr); #endif - ptr = (u16 *)PSXM(madr); + ptr = getDmaRam(madr, &words_max); if (ptr == INVALID_PTR) { #ifdef CPU_LOG CPU_LOG("*** DMA4 SPU - mem2spu *** NULL Pointer!!!\n"); @@ -52,8 +56,9 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU break; } words = (bcr >> 16) * (bcr & 0xffff); - SPU_writeDMAMem(ptr, words * 2, psxRegs.cycle); - HW_DMA4_MADR = SWAPu32(madr + words * 4); + size = min(words, words_max) * 2; + SPU_writeDMAMem(ptr, size, psxRegs.cycle); + HW_DMA4_MADR = SWAPu32((madr & ~3) + words * 4); SPUDMA_INT(words * 4); return; @@ -61,7 +66,7 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU #ifdef PSXDMA_LOG PSXDMA_LOG("*** DMA4 SPU - spu2mem *** %x addr = %x size = %x\n", chcr, madr, bcr); #endif - ptr = (u16 *)PSXM(madr); + ptr = getDmaRam(madr, &words_max); if (ptr == INVALID_PTR) { #ifdef CPU_LOG CPU_LOG("*** DMA4 SPU - spu2mem *** NULL Pointer!!!\n"); @@ -69,7 +74,8 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU break; } words = (bcr >> 16) * (bcr & 0xffff); - SPU_readDMAMem(ptr, words * 2, psxRegs.cycle); + size = min(words, words_max) * 2; + SPU_readDMAMem(ptr, size, psxRegs.cycle); psxCpu->Clear(madr, words); HW_DMA4_MADR = SWAPu32(madr + words * 4); @@ -127,17 +133,16 @@ static u32 gpuDmaChainSize(u32 addr) { } void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU - u32 *ptr, madr_next, *madr_next_p; + u32 *ptr, madr_next, *madr_next_p, size; + u32 words, words_max, words_copy; int do_walking; - u32 words; - u32 size; switch (chcr) { case 0x01000200: // vram2mem #ifdef PSXDMA_LOG PSXDMA_LOG("*** DMA2 GPU - vram2mem *** %lx addr = %lx size = %lx\n", chcr, madr, bcr); #endif - ptr = (u32 *)PSXM(madr); + ptr = getDmaRam(madr, &words_max); if (ptr == INVALID_PTR) { #ifdef CPU_LOG CPU_LOG("*** DMA2 GPU - vram2mem *** NULL Pointer!!!\n"); @@ -146,10 +151,11 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU } // BA blocks * BS words (word = 32-bits) words = (bcr >> 16) * (bcr & 0xffff); - GPU_readDataMem(ptr, words); - psxCpu->Clear(madr, words); + words_copy = min(words, words_max); + GPU_readDataMem(ptr, words_copy); + psxCpu->Clear(madr, words_copy); - HW_DMA2_MADR = SWAPu32(madr + words * 4); + HW_DMA2_MADR = SWAPu32((madr & ~3) + words * 4); // already 32-bit word size ((size * 4) / 4) GPUDMA_INT(words / 4); @@ -159,7 +165,7 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU #ifdef PSXDMA_LOG PSXDMA_LOG("*** DMA 2 - GPU mem2vram *** %lx addr = %lx size = %lx\n", chcr, madr, bcr); #endif - ptr = (u32 *)PSXM(madr); + ptr = getDmaRam(madr, &words_max); if (ptr == INVALID_PTR) { #ifdef CPU_LOG CPU_LOG("*** DMA2 GPU - mem2vram *** NULL Pointer!!!\n"); @@ -168,9 +174,9 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU } // BA blocks * BS words (word = 32-bits) words = (bcr >> 16) * (bcr & 0xffff); - GPU_writeDataMem(ptr, words); + GPU_writeDataMem(ptr, min(words, words_max)); - HW_DMA2_MADR = SWAPu32(madr + words * 4); + HW_DMA2_MADR = SWAPu32((madr & ~3) + words * 4); // already 32-bit word size ((size * 4) / 4) GPUDMA_INT(words / 4); diff --git a/libpcsxcore/psxdma.h b/libpcsxcore/psxdma.h index 28495fa87..eaddb3889 100644 --- a/libpcsxcore/psxdma.h +++ b/libpcsxcore/psxdma.h @@ -79,6 +79,17 @@ void gpuInterrupt(); void spuInterrupt(); void gpuotcInterrupt(); +static inline void *getDmaRam(u32 madr, u32 *max_words) +{ + // this should wrap instead of limit + if (!(madr & 0x800000)) { + madr &= 0x1ffffc; + *max_words = (0x200000 - madr) / 4; + return psxM + madr; + } + return INVALID_PTR; +} + #ifdef __cplusplus } #endif From 33a1eda13ebe624fc1233d820c8db7dcd7aba0d8 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 3 Aug 2023 01:20:45 +0300 Subject: [PATCH 292/597] drc: some cleanup --- libpcsxcore/new_dynarec/assem_arm.h | 3 +- libpcsxcore/new_dynarec/assem_arm64.h | 6 ++- libpcsxcore/new_dynarec/emu_if.h | 1 - libpcsxcore/new_dynarec/linkage_arm64.S | 24 ++++++++++ libpcsxcore/new_dynarec/new_dynarec.c | 59 ++++++++++--------------- 5 files changed, 53 insertions(+), 40 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.h b/libpcsxcore/new_dynarec/assem_arm.h index 75273aa88..b35587672 100644 --- a/libpcsxcore/new_dynarec/assem_arm.h +++ b/libpcsxcore/new_dynarec/assem_arm.h @@ -16,7 +16,6 @@ #define HOST_REGS 13 #define HOST_CCREG 10 -#define HOST_BTREG 8 #define EXCLUDE_REG 11 // Note: FP is set to &dynarec_local when executing generated code. @@ -33,6 +32,8 @@ #define PREFERRED_REG_FIRST 4 #define PREFERRED_REG_LAST 9 +#define DRC_DBG_REGMASK CALLER_SAVE_REGS + extern char *invc_ptr; #define TARGET_SIZE_2 24 // 2^24 = 16 megabytes diff --git a/libpcsxcore/new_dynarec/assem_arm64.h b/libpcsxcore/new_dynarec/assem_arm64.h index 6d1a17f0e..f8ee042f3 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.h +++ b/libpcsxcore/new_dynarec/assem_arm64.h @@ -5,7 +5,6 @@ r19-r29: callee-save */ #define HOST_REGS 29 -#define HOST_BTREG 27 #define EXCLUDE_REG -1 #define SP 31 @@ -27,8 +26,11 @@ #define PREFERRED_REG_FIRST 19 #define PREFERRED_REG_LAST 27 +#define DRC_DBG_REGMASK 3 // others done by do_insn_cmp_arm64 +#define do_insn_cmp do_insn_cmp_arm64 + // stack space -#define SSP_CALLEE_REGS (8*12) +#define SSP_CALLEE_REGS (8*12) // new_dyna_start caller's #define SSP_CALLER_REGS (8*20) #define SSP_ALL (SSP_CALLEE_REGS+SSP_CALLER_REGS) diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index 3f4aba6fe..ec307fc4a 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -83,4 +83,3 @@ void pcsx_mtc0_ds(u32 reg, u32 val); /* misc */ extern void SysPrintf(const char *fmt, ...); -#define rdram ((u_char *)psxM) diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 644b03564..501a4fe77 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -374,3 +374,27 @@ FUNCTION(call_gteStall): add rCC, rCC, w0 ret +#ifdef DRC_DBG +#undef do_insn_cmp +FUNCTION(do_insn_cmp_arm64): + stp x2, x3, [sp, #(SSP_CALLEE_REGS + 2*8)] + stp x4, x5, [sp, #(SSP_CALLEE_REGS + 4*8)] + stp x6, x7, [sp, #(SSP_CALLEE_REGS + 6*8)] + stp x8, x9, [sp, #(SSP_CALLEE_REGS + 8*8)] + stp x10, x11, [sp, #(SSP_CALLEE_REGS + 10*8)] + stp x12, x13, [sp, #(SSP_CALLEE_REGS + 12*8)] + stp x14, x15, [sp, #(SSP_CALLEE_REGS + 14*8)] + stp x16, x17, [sp, #(SSP_CALLEE_REGS + 16*8)] + stp x18, x30, [sp, #(SSP_CALLEE_REGS + 18*8)] + bl do_insn_cmp + ldp x2, x3, [sp, #(SSP_CALLEE_REGS + 2*8)] + ldp x4, x5, [sp, #(SSP_CALLEE_REGS + 4*8)] + ldp x6, x7, [sp, #(SSP_CALLEE_REGS + 6*8)] + ldp x8, x9, [sp, #(SSP_CALLEE_REGS + 8*8)] + ldp x10, x11, [sp, #(SSP_CALLEE_REGS + 10*8)] + ldp x12, x13, [sp, #(SSP_CALLEE_REGS + 12*8)] + ldp x14, x15, [sp, #(SSP_CALLEE_REGS + 14*8)] + ldp x16, x17, [sp, #(SSP_CALLEE_REGS + 16*8)] + ldp x18, x30, [sp, #(SSP_CALLEE_REGS + 18*8)] + ret +#endif diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 00d307b32..db751266e 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -338,9 +338,9 @@ static struct compile_info #define CCREG 36 // Cycle count #define INVCP 37 // Pointer to invalid_code //#define MMREG 38 // Pointer to memory_map -#define ROREG 39 // ram offset (if rdram!=0x80000000) +#define ROREG 39 // ram offset (if psxM != 0x80000000) #define TEMPREG 40 -#define FTEMP 40 // FPU temporary register +#define FTEMP 40 // Load/store temporary register (was fpu) #define PTEMP 41 // Prefetch temporary register //#define TLREG 42 // TLB mapping offset #define RHASH 43 // Return address hash @@ -349,7 +349,6 @@ static struct compile_info #define MAXREG 45 #define AGEN1 46 // Address generation temporary register (pass5b_preallocate2) //#define AGEN2 47 // Address generation temporary register -#define BTREG 50 // Branch target temporary register /* instruction types */ #define NOP 0 // No operation @@ -380,7 +379,6 @@ static struct compile_info /* branch codes */ #define TAKEN 1 #define NOTTAKEN 2 -#define NULLDS 3 #define DJT_1 (void *)1l // no function, just a label in assem_debug log #define DJT_2 (void *)2l @@ -1054,12 +1052,8 @@ static void lsn(u_char hsn[], int i) if(dops[i].itype==C2LS) { hsn[FTEMP]=0; } - // Load L/R also uses FTEMP as a temporary register - if(dops[i].itype==LOADLR) { - hsn[FTEMP]=0; - } - // Also SWL/SWR/SDL/SDR - if(dops[i].opcode==0x2a||dops[i].opcode==0x2e||dops[i].opcode==0x2c||dops[i].opcode==0x2d) { + // Load/store L/R also uses FTEMP as a temporary register + if (dops[i].itype == LOADLR || dops[i].itype == STORELR) { hsn[FTEMP]=0; } // Don't remove the miniht registers @@ -1236,7 +1230,11 @@ static const struct { FUNCNAME(do_memhandler_post), #endif #ifdef DRC_DBG +# ifdef __aarch64__ + FUNCNAME(do_insn_cmp_arm64), +# else FUNCNAME(do_insn_cmp), +# endif #endif }; @@ -2837,11 +2835,11 @@ static void *emit_fastpath_cmp_jump(int i, const struct regstat *i_regs, // alignment check u_int op = dops[i].opcode; int mask = ((op & 0x37) == 0x21 || op == 0x25) ? 1 : 3; // LH/SH/LHU - void *jaddr; + void *jaddr2; emit_testimm(addr, mask); - jaddr = out; + jaddr2 = out; emit_jne(0); - add_stub_r(ALIGNMENT_STUB, jaddr, out, i, addr, i_regs, ccadj_, 0); + add_stub_r(ALIGNMENT_STUB, jaddr2, out, i, addr, i_regs, ccadj_, 0); } if(type==MTYPE_8020) { // RAM 80200000+ mirror @@ -4491,7 +4489,7 @@ static void address_generation(int i, const struct regstat *i_regs, signed char { int offset = cinfo[i].imm; int add_offset = offset != 0; - int c=(i_regs->wasconst>>rs)&1; + int c = rs >= 0 && ((i_regs->wasconst >> rs) & 1); if(dops[i].rs1==0) { // Using r0 as a base address assert(ra >= 0); @@ -4942,6 +4940,8 @@ static void drc_dbg_emit_do_cmp(int i, int ccadj_) extern void do_insn_cmp(); //extern int cycle; u_int hr, reglist = get_host_reglist(regs[i].regmap); + reglist |= get_host_reglist(regs[i].regmap_entry); + reglist &= DRC_DBG_REGMASK; assem_debug("//do_insn_cmp %08x\n", start+i*4); save_regs(reglist); @@ -5090,11 +5090,7 @@ static void do_ccstub(int n) assem_debug("do_ccstub %x\n",start+(u_int)stubs[n].b*4); set_jump_target(stubs[n].addr, out); int i=stubs[n].b; - if(stubs[n].d==NULLDS) { - // Delay slot instruction is nullified ("likely" branch) - wb_dirtys(regs[i].regmap,regs[i].dirty); - } - else if(stubs[n].d!=TAKEN) { + if (stubs[n].d != TAKEN) { wb_dirtys(branch_regs[i].regmap,branch_regs[i].dirty); } else { @@ -5259,10 +5255,6 @@ static void do_ccstub(int n) }else if(stubs[n].d==NOTTAKEN) { if(i= 0x80000000 && addr < 0x80000000+RAM_SIZE) { - *limit = (addr & 0x80600000) + 0x00200000; - return (u_int *)(rdram + (addr&0x1fffff)); - } return NULL; } @@ -7203,7 +7192,6 @@ static noinline void pass3_register_alloc(u_int addr) dops[1].bt=1; ds=1; unneeded_reg[0]=1; - current.regmap[HOST_BTREG]=BTREG; } for(i=0;i=0;i--) { if(dops[i].itype==CJUMP||dops[i].itype==SJUMP) From a22ccd6a80307ef5f711332f68de96949cdeee76 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 3 Aug 2023 01:22:58 +0300 Subject: [PATCH 293/597] arm64: use ldp/stp more --- libpcsxcore/new_dynarec/assem_arm64.c | 59 +++++++++++++++++++++++++++ libpcsxcore/new_dynarec/new_dynarec.c | 58 +++++++++++--------------- 2 files changed, 82 insertions(+), 35 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index dc5bb4db4..97e1fb148 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1950,6 +1950,65 @@ static void multdiv_assemble_arm64(int i, const struct regstat *i_regs) } #define multdiv_assemble multdiv_assemble_arm64 +// wb_dirtys making use of stp when possible +static void wb_dirtys(const signed char i_regmap[], u_int i_dirty) +{ + signed char mregs[34+1]; + int r, hr; + memset(mregs, -1, sizeof(mregs)); + for (hr = 0; hr < HOST_REGS; hr++) { + r = i_regmap[hr]; + if (hr == EXCLUDE_REG || r <= 0 || r == CCREG) + continue; + if (!((i_dirty >> hr) & 1)) + continue; + assert(r < 34u); + mregs[r] = hr; + } + for (r = 1; r < 34; r++) { + if (mregs[r] < 0) + continue; + if (mregs[r+1] >= 0) { + uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local; + emit_ldstp(1, 0, mregs[r], mregs[r+1], FP, offset); + r++; + } + else + emit_storereg(r, mregs[r]); + } +} +#define wb_dirtys wb_dirtys + +static void load_all_regs(const signed char i_regmap[]) +{ + signed char mregs[34+1]; + int r, hr; + memset(mregs, -1, sizeof(mregs)); + for (hr = 0; hr < HOST_REGS; hr++) { + r = i_regmap[hr]; + if (hr == EXCLUDE_REG || r < 0 || r == CCREG) + continue; + if ((u_int)r < 34u) + mregs[r] = hr; + else if (r < TEMPREG) + emit_loadreg(r, hr); + } + if (mregs[0] >= 0) + emit_zeroreg(mregs[0]); // we could use arm64's ZR instead of reg alloc + for (r = 1; r < 34; r++) { + if (mregs[r] < 0) + continue; + if (mregs[r+1] >= 0) { + uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local; + emit_ldstp(0, 0, mregs[r], mregs[r+1], FP, offset); + r++; + } + else + emit_loadreg(r, mregs[r]); + } +} +#define load_all_regs load_all_regs + static void do_jump_vaddr(u_int rs) { if (rs != 0) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index db751266e..dcf940d3a 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -158,14 +158,14 @@ struct regstat { signed char regmap_entry[HOST_REGS]; signed char regmap[HOST_REGS]; - uint64_t wasdirty; - uint64_t dirty; - uint64_t u; + u_int wasdirty; + u_int dirty; u_int wasconst; // before; for example 'lw r2, (r2)' wasconst is true u_int isconst; // ... but isconst is false when r2 is known (hr) u_int loadedconst; // host regs that have constants loaded u_int noevict; // can't evict this hr (alloced by current op) //u_int waswritten; // MIPS regs that were used as store base before + uint64_t u; }; struct ht_entry @@ -409,9 +409,9 @@ static void invalidate_block(struct block_info *block); static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_); // Needed by assembler -static void wb_register(signed char r, const signed char regmap[], uint64_t dirty); -static void wb_dirtys(const signed char i_regmap[], uint64_t i_dirty); -static void wb_needed_dirtys(const signed char i_regmap[], uint64_t i_dirty, int addr); +static void wb_register(signed char r, const signed char regmap[], u_int dirty); +static void wb_dirtys(const signed char i_regmap[], u_int i_dirty); +static void wb_needed_dirtys(const signed char i_regmap[], u_int i_dirty, int addr); static void load_all_regs(const signed char i_regmap[]); static void load_needed_regs(const signed char i_regmap[], const signed char next_regmap[]); static void load_regs_entry(int t); @@ -2288,7 +2288,7 @@ static void add_stub_r(enum stub_type type, void *addr, void *retaddr, } // Write out a single register -static void wb_register(signed char r, const signed char regmap[], uint64_t dirty) +static void wb_register(signed char r, const signed char regmap[], u_int dirty) { int hr; for(hr=0;hr>2; @@ -4738,6 +4741,7 @@ static void wb_needed_dirtys(const signed char i_regmap[], uint64_t i_dirty, int } // Load all registers (except cycle count) +#ifndef load_all_regs static void load_all_regs(const signed char i_regmap[]) { int hr; @@ -4754,48 +4758,31 @@ static void load_all_regs(const signed char i_regmap[]) } } } +#endif // Load all current registers also needed by next instruction static void load_needed_regs(const signed char i_regmap[], const signed char next_regmap[]) { + signed char regmap_sel[HOST_REGS]; int hr; - for(hr=0;hr=0) { - if(i_regmap[hr]==0) { - emit_zeroreg(hr); - } - else - if(i_regmap[hr]>0 && i_regmap[hr]= 0) + regmap_sel[hr] = i_regmap[hr]; } + load_all_regs(regmap_sel); } // Load all regs, storing cycle count if necessary static void load_regs_entry(int t) { - int hr; if(dops[t].is_ds) emit_addimm(HOST_CCREG,CLOCK_ADJUST(1),HOST_CCREG); else if(cinfo[t].ccadj) emit_addimm(HOST_CCREG,-cinfo[t].ccadj,HOST_CCREG); if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) { emit_storereg(CCREG,HOST_CCREG); } - // Load 32-bit regs - for(hr=0;hr=0&®s[t].regmap_entry[hr] Date: Thu, 3 Aug 2023 01:24:14 +0300 Subject: [PATCH 294/597] drc: adjust MAXBLOCK it was stopping at MAXBLOCK/2 in most cases anyway, so adjust the arrays for the size that's actually used --- libpcsxcore/new_dynarec/new_dynarec.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index dcf940d3a..bb5b86298 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -81,7 +81,7 @@ static Jit g_jit; #endif #define RAM_SIZE 0x200000 -#define MAXBLOCK 4096 +#define MAXBLOCK 2048 #define MAX_OUTPUT_BLOCK_SIZE 262144 #define EXPIRITY_OFFSET (MAX_OUTPUT_BLOCK_SIZE * 2) #define PAGE_COUNT 1024 @@ -6972,7 +6972,8 @@ static noinline void pass1_disassemble(u_int pagelimit) // Don't recompile stuff that's already compiled if(check_addr(start+i*4+4)) done=1; // Don't get too close to the limit - if(i>MAXBLOCK/2) done=1; + if (i > MAXBLOCK - 64) + done = 1; } if (dops[i].itype == HLECALL) stop = 1; @@ -6992,7 +6993,8 @@ static noinline void pass1_disassemble(u_int pagelimit) //assert(i 8 || dops[i].opcode == 0x11)) { done=stop_after_jal=1; From f9e9616e4ccc5de395bb548e95785565a42eed14 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 3 Aug 2023 01:28:27 +0300 Subject: [PATCH 295/597] drc: adjust alignment checking --- libpcsxcore/new_dynarec/new_dynarec.c | 70 ++++++++++++++++++++------- 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index bb5b86298..892d8e946 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -242,8 +242,13 @@ static struct decoded_insn u_char is_delay_load:1; // is_load + MFC/CFC u_char is_exception:1; // unconditional, also interp. fallback u_char may_except:1; // might generate an exception + u_char ls_type:2; // load/store type (ls_width_type) } dops[MAXBLOCK]; +enum ls_width_type { + LS_8 = 0, LS_16, LS_32, LS_LR +}; + static struct compile_info { int imm; @@ -6657,20 +6662,20 @@ static void disassemble_one(int i, u_int src) case 0x13: set_mnemonic(i, "COP3"); op2 = (src >> 21) & 0x1f; break; - case 0x20: set_mnemonic(i, "LB"); type=LOAD; break; - case 0x21: set_mnemonic(i, "LH"); type=LOAD; break; - case 0x22: set_mnemonic(i, "LWL"); type=LOADLR; break; - case 0x23: set_mnemonic(i, "LW"); type=LOAD; break; - case 0x24: set_mnemonic(i, "LBU"); type=LOAD; break; - case 0x25: set_mnemonic(i, "LHU"); type=LOAD; break; - case 0x26: set_mnemonic(i, "LWR"); type=LOADLR; break; - case 0x28: set_mnemonic(i, "SB"); type=STORE; break; - case 0x29: set_mnemonic(i, "SH"); type=STORE; break; - case 0x2A: set_mnemonic(i, "SWL"); type=STORELR; break; - case 0x2B: set_mnemonic(i, "SW"); type=STORE; break; - case 0x2E: set_mnemonic(i, "SWR"); type=STORELR; break; - case 0x32: set_mnemonic(i, "LWC2"); type=C2LS; break; - case 0x3A: set_mnemonic(i, "SWC2"); type=C2LS; break; + case 0x20: set_mnemonic(i, "LB"); type=LOAD; ls_type = LS_8; break; + case 0x21: set_mnemonic(i, "LH"); type=LOAD; ls_type = LS_16; break; + case 0x22: set_mnemonic(i, "LWL"); type=LOADLR; ls_type = LS_LR; break; + case 0x23: set_mnemonic(i, "LW"); type=LOAD; ls_type = LS_32; break; + case 0x24: set_mnemonic(i, "LBU"); type=LOAD; ls_type = LS_8; break; + case 0x25: set_mnemonic(i, "LHU"); type=LOAD; ls_type = LS_16; break; + case 0x26: set_mnemonic(i, "LWR"); type=LOADLR; ls_type = LS_LR; break; + case 0x28: set_mnemonic(i, "SB"); type=STORE; ls_type = LS_8; break; + case 0x29: set_mnemonic(i, "SH"); type=STORE; ls_type = LS_16; break; + case 0x2A: set_mnemonic(i, "SWL"); type=STORELR; ls_type = LS_LR; break; + case 0x2B: set_mnemonic(i, "SW"); type=STORE; ls_type = LS_32; break; + case 0x2E: set_mnemonic(i, "SWR"); type=STORELR; ls_type = LS_LR; break; + case 0x32: set_mnemonic(i, "LWC2"); type=C2LS; ls_type = LS_32; break; + case 0x3A: set_mnemonic(i, "SWC2"); type=C2LS; ls_type = LS_32; break; case 0x3B: if (Config.HLE && (src & 0x03ffffff) < ARRAY_SIZE(psxHLEt)) { set_mnemonic(i, "HLECALL"); @@ -6682,8 +6687,9 @@ static void disassemble_one(int i, u_int src) } if (type == INTCALL) SysPrintf("NI %08x @%08x (%08x)\n", src, start + i*4, start); - dops[i].itype=type; - dops[i].opcode2=op2; + dops[i].itype = type; + dops[i].opcode2 = op2; + dops[i].ls_type = ls_type; /* Get registers/immediates */ dops[i].use_lt1=0; gte_rs[i]=gte_rt[i]=0; @@ -6829,6 +6835,7 @@ static void disassemble_one(int i, u_int src) static noinline void pass1_disassemble(u_int pagelimit) { int i, j, done = 0, ni_count = 0; + int ds_next = 0; for (i = 0; !done; i++) { @@ -6836,6 +6843,7 @@ static noinline void pass1_disassemble(u_int pagelimit) unsigned int type, op, op2; disassemble_one(i, source[i]); + dops[i].is_ds = ds_next; ds_next = 0; type = dops[i].itype; op = dops[i].opcode; op2 = dops[i].opcode2; @@ -6868,6 +6876,7 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].is_store = type == STORE || type == STORELR || op == 0x3a; // SWC2 dops[i].is_exception = type == SYSCALL || type == HLECALL || type == INTCALL; dops[i].may_except = dops[i].is_exception || (type == ALU && (op2 == 0x20 || op2 == 0x22)) || op == 8; + ds_next = dops[i].is_jump; if (((op & 0x37) == 0x21 || op == 0x25) // LH/SH/LHU && ((cinfo[i].imm & 1) || Config.PreciseExceptions)) @@ -7157,6 +7166,31 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) } } +static noinline void pass2a_unneeded_other(void) +{ + int i, j; + for (i = 0; i < slen; i++) + { + // remove redundant alignment checks + if (dops[i].may_except && (dops[i].is_load || dops[i].is_store) + && dops[i].rt1 != dops[i].rs1 && !dops[i].is_ds) + { + int base = dops[i].rs1, lsb = cinfo[i].imm, ls_type = dops[i].ls_type; + int mask = ls_type == LS_32 ? 3 : 1; + lsb &= mask; + for (j = i + 1; j < slen; j++) { + if (dops[j].bt || dops[j].is_jump) + break; + if ((dops[j].is_load || dops[j].is_store) && dops[j].rs1 == base + && dops[j].ls_type == ls_type && (cinfo[j].imm & mask) == lsb) + dops[j].may_except = 0; + if (dops[j].rt1 == base) + break; + } + } + } +} + static noinline void pass3_register_alloc(u_int addr) { struct regstat current; // Current register allocations/status @@ -7221,7 +7255,7 @@ static noinline void pass3_register_alloc(u_int addr) abort(); } } - dops[i].is_ds=ds; + assert(dops[i].is_ds == ds); if(ds) { ds=0; // Skip delay slot, already allocated as part of branch // ...but we need to alloc it in case something jumps here @@ -8948,6 +8982,8 @@ static int new_recompile_block(u_int addr) pass2_unneeded_regs(0,slen-1,0); + pass2a_unneeded_other(); + /* Pass 3 - Register allocation */ pass3_register_alloc(addr); From 684b6816254a31c40b0d11269aebb2a87fd79b74 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 3 Aug 2023 01:30:56 +0300 Subject: [PATCH 296/597] drc: adjust load/store checks --- libpcsxcore/new_dynarec/new_dynarec.c | 90 ++++++++++++++------------- 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 892d8e946..5de7b9278 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -3223,7 +3223,7 @@ static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) static void do_invstub(int n) { literal_pool(20); - assem_debug("do_invstub\n"); + assem_debug("do_invstub %x\n", start + stubs[n].e*4); u_int reglist = stubs[n].a; u_int addrr = stubs[n].b; int ofs_start = stubs[n].c; @@ -3300,9 +3300,13 @@ static void do_store_smc_check(int i, const struct regstat *i_regs, u_int reglis imm_min -= cinfo[i].imm; imm_max -= cinfo[i].imm; add_stub(INVCODE_STUB, jaddr, out, reglist|(1<regmap); tl=get_reg(i_regs->regmap,dops[i].rs2); s=get_reg(i_regs->regmap,dops[i].rs1); offset=cinfo[i].imm; if(s>=0) { c=(i_regs->wasconst>>s)&1; - if(c) { - memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; + if (c) { + addr_const = constmap[i][s] + offset; + memtarget = ((signed int)addr_const) < (signed int)(0x80000000 + RAM_SIZE); } } assert(tl>=0); assert(addr >= 0); if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<regmap,dops[i].rs2,ccadj_,reglist); } + if (!c || is_ram_addr(addr_const)) + do_store_smc_check(i, i_regs, reglist, addr); + if (c && !memtarget) + inline_writestub(type, i, addr_const, i_regs->regmap, dops[i].rs2, ccadj_, reglist); // basic current block modification detection.. // not looking back as that should be in mips cache already // (see Spyro2 title->attract mode) - if(c&&start+i*4regmap==regs[i].regmap); // not delay slot if(i_regs->regmap==regs[i].regmap) { load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i); @@ -3412,18 +3410,21 @@ static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) void *done0, *done1, *done2; int memtarget=0,c=0; int offset_reg = -1; - u_int reglist=get_host_reglist(i_regs->regmap); + u_int addr_const = ~0; + u_int reglist = get_host_reglist(i_regs->regmap); tl=get_reg(i_regs->regmap,dops[i].rs2); s=get_reg(i_regs->regmap,dops[i].rs1); offset=cinfo[i].imm; if(s>=0) { - c=(i_regs->isconst>>s)&1; - if(c) { - memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; + c = (i_regs->isconst >> s) & 1; + if (c) { + addr_const = constmap[i][s] + offset; + memtarget = ((signed int)addr_const) < (signed int)(0x80000000 + RAM_SIZE); } } assert(tl>=0); assert(addr >= 0); + reglist |= 1u << addr; if(!c) { emit_cmpimm(addr, RAM_SIZE); jaddr=out; @@ -3463,14 +3464,14 @@ static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) if (dops[i].opcode == 0x2A) { // SWL // Write two msb into two least significant bytes if (dops[i].rs2) emit_rorimm(tl, 16, tl); - do_store_hword(addr, -1, tl, offset_reg, 0); + do_store_hword(addr, -1, tl, offset_reg, 1); if (dops[i].rs2) emit_rorimm(tl, 16, tl); } else if (dops[i].opcode == 0x2E) { // SWR // Write 3 lsb into three most significant bytes do_store_byte(addr, tl, offset_reg); if (dops[i].rs2) emit_rorimm(tl, 8, tl); - do_store_hword(addr, 1, tl, offset_reg, 0); + do_store_hword(addr, 1, tl, offset_reg, 1); if (dops[i].rs2) emit_rorimm(tl, 24, tl); } done1=out; @@ -3498,7 +3499,7 @@ static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) // 3 set_jump_target(case3, out); if (dops[i].opcode == 0x2A) { // SWL - do_store_word(addr, -3, tl, offset_reg, 0); + do_store_word(addr, -3, tl, offset_reg, 1); } else if (dops[i].opcode == 0x2E) { // SWR do_store_byte(addr, tl, offset_reg); @@ -3508,9 +3509,10 @@ static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) set_jump_target(done2, out); if (offset_reg == HOST_TEMPREG) host_tempreg_release(); - if(!c||!memtarget) + if (!c || !memtarget) add_stub_r(STORELR_STUB,jaddr,out,i,addr,i_regs,ccadj_,reglist); - do_store_smc_check(i, i_regs, reglist, addr); + if (!c || is_ram_addr(addr_const)) + do_store_smc_check(i, i_regs, reglist, addr); } static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) @@ -3902,6 +3904,7 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) enum stub_type type; int offset_reg = -1; int fastio_reg_override = -1; + u_int addr_const = ~0; u_int reglist=get_host_reglist(i_regs->regmap); u_int copr=(source[i]>>16)&0x1f; s=get_reg(i_regs->regmap,dops[i].rs1); @@ -3918,8 +3921,13 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) if (dops[i].opcode==0x3a) { // SWC2 reglist |= 1<=0) c=(i_regs->wasconst>>s)&1; - memtarget=c&&(((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE); + if (s >= 0) { + c = (i_regs->isconst >> s) & 1; + if (c) { + addr_const = constmap[i][s] + offset; + memtarget = ((signed int)addr_const) < (signed int)(0x80000000 + RAM_SIZE); + } + } cop2_do_stall_check(0, i, i_regs, reglist); @@ -3968,9 +3976,9 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) host_tempreg_release(); if(jaddr2) add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj_,reglist); - if(dops[i].opcode==0x3a) // SWC2 + if (dops[i].opcode == 0x3a && (!c || is_ram_addr(addr_const))) // SWC2 do_store_smc_check(i, i_regs, reglist, ar); - if (dops[i].opcode==0x32) { // LWC2 + if (dops[i].opcode == 0x32) { // LWC2 host_tempreg_acquire(); cop2_put_dreg(copr,tl,HOST_TEMPREG); host_tempreg_release(); @@ -4534,12 +4542,6 @@ static void address_generation(int i, const struct regstat *i_regs, signed char cinfo[i].addr = rs; add_offset = 0; } - else if (dops[i].itype == STORELR) { // overwrites addr - assert(ra >= 0); - assert(rs != ra); - emit_mov(rs, ra); - cinfo[i].addr = ra; - } else cinfo[i].addr = rs; if (add_offset) { @@ -4636,11 +4638,13 @@ static void load_consts(signed char pre[],signed char regmap[],int i) if(i==0||dops[i].bt) regs[i].loadedconst=0; else { - for(hr=0;hr=0&&((regs[i-1].isconst>>hr)&1)&&pre[hr]==regmap[hr] - &®map[hr]==regs[i-1].regmap[hr]&&((regs[i-1].loadedconst>>hr)&1)) + for (hr = 0; hr < HOST_REGS; hr++) { + if (hr == EXCLUDE_REG || regmap[hr] < 0 || pre[hr] != regmap[hr]) + continue; + if ((((regs[i-1].isconst & regs[i-1].loadedconst) >> hr) & 1) + && regmap[hr] == regs[i-1].regmap[hr]) { - regs[i].loadedconst|=1< Date: Fri, 4 Aug 2023 00:22:45 +0300 Subject: [PATCH 297/597] drc: disable some weird code --- libpcsxcore/new_dynarec/new_dynarec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 5de7b9278..2382123f7 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -8396,6 +8396,7 @@ static noinline void pass5b_preallocate2(void) regs[i+2].wasdirty&=~(1<=0); + #if 0 // what is this for? double allocs $0 in ps1_rom.bin if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { regs[i].regmap[hr]=dops[i+1].rs1; @@ -8407,6 +8408,7 @@ static noinline void pass5b_preallocate2(void) regs[i+1].wasdirty&=~(1< Date: Fri, 4 Aug 2023 00:25:21 +0300 Subject: [PATCH 298/597] libretro: accept ps1_rom.bin (and similar) bios too --- frontend/libretro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 2f758a6c8..98eea829f 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2817,7 +2817,7 @@ static bool try_use_bios(const char *path) static bool find_any_bios(const char *dirpath, char *path, size_t path_size) { - static const char *substrings[] = { "scph", "psx", "openbios" }; + static const char *substrings[] = { "scph", "ps", "openbios" }; DIR *dir; struct dirent *ent; bool ret = false; From 05f19a3e278c89176a6ab8e39adab66e4e577362 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Aug 2023 19:22:42 +0300 Subject: [PATCH 299/597] cdriso: clean up mode1 detection libretro/pcsx_rearmed#743 --- libpcsxcore/cdriso.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index eeb2c351a..081a8f2b9 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -1326,7 +1326,6 @@ static long CALLBACK ISOopen(void) { char alt_bin_filename[MAXPATHLEN]; const char *bin_filename; char image_str[1024]; - int is_chd = 0; if (cdHandle != NULL) { return 0; // it's already open @@ -1381,7 +1380,6 @@ static long CALLBACK ISOopen(void) { CDR_getBuffer = ISOgetBuffer_chd; cdimg_read_func = cdread_chd; cdimg_read_sub_func = cdread_sub_chd; - is_chd = 1; } #endif @@ -1422,14 +1420,11 @@ static long CALLBACK ISOopen(void) { } // guess whether it is mode1/2048 - if (ftello(cdHandle) % 2048 == 0) { + if (cdimg_read_func == cdread_normal && ftello(cdHandle) % 2048 == 0) { unsigned int modeTest = 0; fseek(cdHandle, 0, SEEK_SET); if (!fread(&modeTest, sizeof(modeTest), 1, cdHandle)) { -#ifndef NDEBUG SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); -#endif - return -1; } if (SWAP32(modeTest) != 0xffffff00) { strcat(image_str, "[2048]"); @@ -1442,7 +1437,7 @@ static long CALLBACK ISOopen(void) { PrintTracks(); - if (subChanMixed && !is_chd) { + if (subChanMixed && cdimg_read_func == cdread_normal) { cdimg_read_func = cdread_sub_mixed; cdimg_read_sub_func = cdread_sub_sub_mixed; } From 9b7a868b5e00c0aeab9f5cd0b64c81b5a9ef394f Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Aug 2023 19:38:38 +0300 Subject: [PATCH 300/597] update libchdr --- libchdr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libchdr b/libchdr index a03e69319..54bfb871c 160000 --- a/libchdr +++ b/libchdr @@ -1 +1 @@ -Subproject commit a03e69319164f69d781ab8e453f8cf407387bd13 +Subproject commit 54bfb871ccae31903b95a8feb7f2bf7121f304be From 8cddf57556a1fc436981a32637a0e5c9d7f13e3f Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Aug 2023 20:46:16 +0300 Subject: [PATCH 301/597] libretro: understand psx exes --- frontend/libretro.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 98eea829f..5d876ae4a 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -1471,6 +1471,8 @@ bool retro_load_game(const struct retro_game_info *info) size_t i; unsigned int cd_index = 0; bool is_m3u = (strcasestr(info->path, ".m3u") != NULL); + bool is_exe = (strcasestr(info->path, ".exe") != NULL); + int ret; struct retro_input_descriptor desc[] = { #define JOYP(port) \ @@ -1664,7 +1666,7 @@ bool retro_load_game(const struct retro_game_info *info) plugin_call_rearmed_cbs(); /* dfinput_activate(); */ - if (CheckCdrom() == -1) + if (!is_exe && CheckCdrom() == -1) { log_cb(RETRO_LOG_INFO, "unsupported/invalid CD image: %s\n", info->path); return false; @@ -1672,9 +1674,13 @@ bool retro_load_game(const struct retro_game_info *info) SysReset(); - if (LoadCdrom() == -1) + if (is_exe) + ret = Load(info->path); + else + ret = LoadCdrom(); + if (ret != 0) { - log_cb(RETRO_LOG_INFO, "could not load CD\n"); + log_cb(RETRO_LOG_INFO, "could not load %s (%d)\n", is_exe ? "exe" : "CD", ret); return false; } emu_on_new_cd(0); From 0cae7d24abf2c13dead4295067785d3ba961faa4 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Aug 2023 21:01:09 +0300 Subject: [PATCH 302/597] libretro: add (psx) exe and iso to valid_extensions doesn't seem to work though? --- frontend/libretro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 5d876ae4a..807e683f4 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -870,7 +870,7 @@ void retro_get_system_info(struct retro_system_info *info) memset(info, 0, sizeof(*info)); info->library_name = "PCSX-ReARMed"; info->library_version = "r23l" GIT_VERSION; - info->valid_extensions = "bin|cue|img|mdf|pbp|toc|cbn|m3u|chd"; + info->valid_extensions = "bin|cue|img|mdf|pbp|toc|cbn|m3u|chd|iso|exe"; info->need_fullpath = true; } From dc4fa8bcd7d8fb9ccd6c742a350f69e0683350e0 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 9 Aug 2023 01:51:46 +0300 Subject: [PATCH 303/597] psxbios: completely rework exception handling should be much closer to the real thing --- frontend/main.c | 3 - libpcsxcore/debug.c | 1 - libpcsxcore/misc.c | 35 +- libpcsxcore/new_dynarec/emu_if.c | 4 +- libpcsxcore/new_dynarec/linkage_arm.S | 5 +- libpcsxcore/new_dynarec/linkage_arm64.S | 2 + libpcsxcore/new_dynarec/new_dynarec.c | 5 +- libpcsxcore/psxbios.c | 1040 +++++++++++++++-------- libpcsxcore/psxbios.h | 7 +- libpcsxcore/psxcommon.c | 4 - libpcsxcore/psxhle.c | 15 +- libpcsxcore/psxhle.h | 23 +- libpcsxcore/psxinterpreter.c | 4 +- libpcsxcore/r3000a.c | 2 - libpcsxcore/system.h | 1 - 15 files changed, 778 insertions(+), 373 deletions(-) diff --git a/frontend/main.c b/frontend/main.c index 11bc4ed47..1d0083587 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -780,9 +780,6 @@ void SysClose() { } } -void SysUpdate() { -} - int get_state_filename(char *buf, int size, int i) { return get_gameid_filename(buf, size, "." STATES_DIR "%.32s-%.9s.%3.3d", i); diff --git a/libpcsxcore/debug.c b/libpcsxcore/debug.c index 7fac2e421..004fdc03d 100644 --- a/libpcsxcore/debug.c +++ b/libpcsxcore/debug.c @@ -409,7 +409,6 @@ void ProcessDebug() { GetClient(); ProcessCommands(); GPU_updateLace(); - SysUpdate(); } } diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 223266bad..252e1c8c7 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -188,12 +188,26 @@ void BiosBootBypass() { psxRegs.pc = psxRegs.GPR.n.ra; } +static void getFromCnf(char *buf, const char *key, u32 *val) +{ + buf = strstr(buf, key); + if (buf) + buf = strchr(buf, '='); + if (buf) + *val = strtol(buf + 1, NULL, 16); +} + int LoadCdrom() { EXE_HEADER tmpHead; struct iso_directory_record *dir; u8 time[4], *buf; u8 mdir[4096]; char exename[256]; + u32 cnf_tcb = 4; + u32 cnf_event = 16; + u32 cnf_stack = 0; + u32 sp = 0; + int ret; if (!Config.HLE) { if (psxRegs.pc != 0x80030000) // BiosBootBypass'ed or custom BIOS? @@ -224,11 +238,12 @@ int LoadCdrom() { else { // read the SYSTEM.CNF READTRACK(); + buf[1023] = 0; - sscanf((char *)buf + 12, "BOOT = cdrom:\\%255s", exename); - if (GetCdromFile(mdir, time, exename) == -1) { - sscanf((char *)buf + 12, "BOOT = cdrom:%255s", exename); - if (GetCdromFile(mdir, time, exename) == -1) { + ret = sscanf((char *)buf + 12, "BOOT = cdrom:\\%255s", exename); + if (ret < 1 || GetCdromFile(mdir, time, exename) == -1) { + ret = sscanf((char *)buf + 12, "BOOT = cdrom:%255s", exename); + if (ret < 1 || GetCdromFile(mdir, time, exename) == -1) { char *ptr = strstr((char *)buf + 12, "cdrom:"); if (ptr != NULL) { ptr += 6; @@ -244,6 +259,11 @@ int LoadCdrom() { return -1; } } + getFromCnf((char *)buf + 12, "TCB", &cnf_tcb); + getFromCnf((char *)buf + 12, "EVENT", &cnf_event); + getFromCnf((char *)buf + 12, "STACK", &cnf_stack); + if (Config.HLE) + psxBiosCnfLoaded(cnf_tcb, cnf_event); // Read the EXE-Header READTRACK(); @@ -252,7 +272,10 @@ int LoadCdrom() { memcpy(&tmpHead, buf + 12, sizeof(EXE_HEADER)); SysPrintf("manual booting '%s'\n", exename); - SetBootRegs(SWAP32(tmpHead.pc0), SWAP32(tmpHead.gp0), SWAP32(tmpHead.s_addr)); + sp = SWAP32(tmpHead.s_addr); + if (cnf_stack) + sp = cnf_stack; + SetBootRegs(SWAP32(tmpHead.pc0), SWAP32(tmpHead.gp0), sp); tmpHead.t_size = SWAP32(tmpHead.t_size); tmpHead.t_addr = SWAP32(tmpHead.t_addr); @@ -794,8 +817,6 @@ int RecvPcsxInfo() { NET_recvData(&RCntFix_old, sizeof(RCntFix_old), PSE_NET_BLOCKING); NET_recvData(&Config.PsxType, sizeof(Config.PsxType), PSE_NET_BLOCKING); - SysUpdate(); - tmp = Config.Cpu; NET_recvData(&Config.Cpu, sizeof(Config.Cpu), PSE_NET_BLOCKING); if (tmp != Config.Cpu) { diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 06612dbf9..f879ad8cb 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -248,8 +248,6 @@ static void ari64_reset() // (HLE softcall exit and BIOS fastboot end) static void ari64_execute_until() { - schedule_timeslice(); - evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); @@ -262,6 +260,7 @@ static void ari64_execute_until() static void ari64_execute() { while (!stop) { + schedule_timeslice(); ari64_execute_until(); evprintf("drc left @%08x\n", psxRegs.pc); } @@ -272,6 +271,7 @@ static void ari64_execute_block(enum blockExecCaller caller) if (caller == EXEC_CALLER_BOOT) stop++; + next_interupt = psxRegs.cycle + 1; ari64_execute_until(); if (caller == EXEC_CALLER_BOOT) diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 6b429b08c..d2d6d8740 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -303,11 +303,14 @@ call_psxException: /* note: psxException might do recursive recompiler call from it's HLE code, * so be ready for this */ FUNCTION(jump_to_new_pc): + ldr r2, [fp, #LO_stop] ldr r1, [fp, #LO_next_interupt] ldr r10, [fp, #LO_cycle] ldr r0, [fp, #LO_pcaddr] - sub r10, r10, r1 + tst r2, r2 str r1, [fp, #LO_last_count] + sub r10, r10, r1 + bne new_dyna_leave bl ndrc_get_addr_ht mov pc, r0 .size jump_to_new_pc, .-jump_to_new_pc diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 501a4fe77..7b77c62e9 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -161,11 +161,13 @@ call_psxException: /* note: psxException might do recursive recompiler call from it's HLE code, * so be ready for this */ FUNCTION(jump_to_new_pc): + ldr w2, [rFP, #LO_stop] ldr w1, [rFP, #LO_next_interupt] ldr rCC, [rFP, #LO_cycle] ldr w0, [rFP, #LO_pcaddr] sub rCC, rCC, w1 str w1, [rFP, #LO_last_count] + cbnz w2, new_dyna_leave bl ndrc_get_addr_ht br x0 .size jump_to_new_pc, .-jump_to_new_pc diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 2382123f7..37bdc3e7c 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -6989,9 +6989,9 @@ static noinline void pass1_disassemble(u_int pagelimit) done = 1; } if (dops[i].itype == HLECALL) - stop = 1; + done = 1; else if (dops[i].itype == INTCALL) - stop = 2; + done = 2; else if (dops[i].is_exception) done = stop_after_jal ? 1 : 2; if (done == 2) { @@ -8917,7 +8917,6 @@ static int new_recompile_block(u_int addr) new_dynarec_did_compile=1; if (Config.HLE && start == 0x80001000) // hlecall { - // XXX: is this enough? Maybe check hleSoftCall? void *beginning = start_block(); emit_movimm(start,0); diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 13a7197f7..6624207e1 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -34,6 +34,7 @@ #include "psxhw.h" #include "gpu.h" #include "sio.h" +#include "psxhle.h" #include #if (defined(__GNUC__) && __GNUC__ >= 5) || defined(__clang__) @@ -80,7 +81,7 @@ char *biosA0n[256] = { "dev_card_close", "dev_card_firstfile", "dev_card_nextfile","dev_card_erase", "dev_card_undelete","dev_card_format", "dev_card_rename", "dev_card_6f", // 0x70 - "_bu_init", "_96_init", "_96_remove", "sys_a0_73", + "_bu_init", "_96_init", "CdRemove", "sys_a0_73", "sys_a0_74", "sys_a0_75", "sys_a0_76", "sys_a0_77", "_96_CdSeekL", "sys_a0_79", "sys_a0_7a", "sys_a0_7b", "_96_CdGetStatus", "sys_a0_7d", "_96_CdRead", "sys_a0_7f", @@ -216,10 +217,13 @@ typedef struct { */ typedef struct { - s32 status; - s32 mode; + u32 status; + u32 mode; u32 reg[32]; - u32 func; + u32 epc; + u32 hi, lo; + u32 sr, cause; + u32 unused[9]; } TCB; typedef struct { @@ -253,7 +257,6 @@ typedef struct { u32 mcfile; } FileDesc; -static u32 *jmp_int = NULL; static int *pad_buf = NULL; static char *pad_buf1 = NULL, *pad_buf2 = NULL; static int pad_buf1len, pad_buf2len; @@ -270,37 +273,105 @@ static EvCB *ThEV; // 0xff static u32 heap_size = 0; static u32 *heap_addr = NULL; static u32 *heap_end = NULL; -static u32 SysIntRP[8]; static int CardState = -1; -static TCB ThreadCB[8]; static int CurThread = 0; static FileDesc FDesc[32]; static u32 card_active_chan = 0; -boolean hleSoftCall = FALSE; +// fixed RAM offsets, SCPH1001 compatible +#define A_TT_ExCB 0x0100 +#define A_TT_PCB 0x0108 +#define A_TT_TCB 0x0110 +#define A_A0_TABLE 0x0200 +#define A_B0_TABLE 0x0874 +#define A_C0_TABLE 0x0674 +#define A_SYSCALL 0x0650 +#define A_EXCEPTION 0x0c80 +#define A_EXC_SP 0x6cf0 +#define A_EEXIT_DEF 0x6cf4 +#define A_EEXIT_PTR 0x75d0 +#define A_EXC_STACK 0x85d8 // exception stack top +#define A_RCNT_VBL_ACK 0x8600 +#define A_EXC_GP 0xf450 + +#define HLEOP(n) SWAPu32((0x3b << 26) | (n)); + +static u32 loadRam32(u32 addr) +{ + assert(!(addr & 0x5f800000)); + return SWAP32(*((u32 *)psxM + ((addr & 0x1fffff) >> 2))); +} -static inline void softCall(u32 pc) { - pc0 = pc; - ra = 0x80001000; +static void *castRam32ptr(u32 addr) +{ + assert(!(addr & 0x5f800003)); + return psxM + (addr & 0x1ffffc); +} + +static void *loadRam32ptr(u32 addr) +{ + return castRam32ptr(loadRam32(addr)); +} + +static void storeRam32(u32 addr, u32 d) +{ + assert(!(addr & 0x5f800000)); + *((u32 *)psxM + ((addr & 0x1fffff) >> 2)) = SWAP32(d); +} - hleSoftCall = TRUE; +static void mips_return(u32 val) +{ + v0 = val; + pc0 = ra; +} - while (pc0 != 0x80001000) psxCpu->ExecuteBlock(EXEC_CALLER_HLE); +static void use_cycles(u32 cycle) +{ + psxRegs.cycle += cycle * 2; +} - hleSoftCall = FALSE; +static void mips_return_c(u32 val, u32 cycle) +{ + use_cycles(cycle); + mips_return(val); } -static inline void softCall2(u32 pc) { +static void mips_return_void_c(u32 cycle) +{ + use_cycles(cycle); + pc0 = ra; +} + +static int returned_from_exception(void) +{ + // 0x80000080 means it took another exception just after return + return pc0 == k0 || pc0 == 0x80000080; +} + +static inline void softCall(u32 pc) { u32 sra = ra; + u32 ssr = psxRegs.CP0.n.SR; pc0 = pc; ra = 0x80001000; + psxRegs.CP0.n.SR &= ~0x404; // disable interrupts - hleSoftCall = TRUE; + while (pc0 != 0x80001000) + psxCpu->ExecuteBlock(EXEC_CALLER_HLE); - while (pc0 != 0x80001000) psxCpu->ExecuteBlock(EXEC_CALLER_HLE); ra = sra; + psxRegs.CP0.n.SR = ssr; +} - hleSoftCall = FALSE; +static inline void softCallInException(u32 pc) { + u32 sra = ra; + pc0 = pc; + ra = 0x80001000; + + while (!returned_from_exception() && pc0 != 0x80001000) + psxCpu->ExecuteBlock(EXEC_CALLER_HLE); + + if (pc0 == 0x80001000) + ra = sra; } static inline void DeliverEvent(u32 ev, u32 spec) { @@ -308,25 +379,10 @@ static inline void DeliverEvent(u32 ev, u32 spec) { // EventCB[ev][spec].status = EvStALREADY; if (EventCB[ev][spec].mode == EvMdINTR) { - softCall2(EventCB[ev][spec].fhandler); + softCall(EventCB[ev][spec].fhandler); } else EventCB[ev][spec].status = EvStALREADY; } -static unsigned interrupt_r26=0x8004E8B0; - -static inline void SaveRegs() { - memcpy(regs, psxRegs.GPR.r, 32*4); - regs[32] = psxRegs.GPR.n.lo; - regs[33] = psxRegs.GPR.n.hi; - regs[34] = psxRegs.pc; -} - -static inline void LoadRegs() { - memcpy(psxRegs.GPR.r, regs, 32*4); - psxRegs.GPR.n.lo = regs[32]; - psxRegs.GPR.n.hi = regs[33]; -} - /* * // * // * @@ -359,6 +415,10 @@ static inline void LoadRegs() { else v0 = length; \ } +#ifndef PSXBIOS_LOG +//#define PSXBIOS_LOG printf +#define PSXBIOS_LOG(...) +#endif /* Internally redirects to "FileRead(fd,tempbuf,1)".*/ /* For some strange reason, the returned character is sign-expanded; */ @@ -477,40 +537,46 @@ void psxBios_atol() { // 0x11 psxBios_atoi(); } -void psxBios_setjmp() { // 0x13 - u32 *jmp_buf = (u32 *)Ra0; +struct jmp_buf_ { + u32 ra_, sp_, fp_; + u32 s[8]; + u32 gp_; +}; + +static void psxBios_setjmp() { // 0x13 + struct jmp_buf_ *jmp_buf = castRam32ptr(a0); int i; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x13]); -#endif + PSXBIOS_LOG("psxBios_%s %x\n", biosA0n[0x13], a0); - jmp_buf[0] = ra; - jmp_buf[1] = sp; - jmp_buf[2] = fp; + jmp_buf->ra_ = SWAP32(ra); + jmp_buf->sp_ = SWAP32(sp); + jmp_buf->fp_ = SWAP32(fp); for (i = 0; i < 8; i++) // s0-s7 - jmp_buf[3 + i] = psxRegs.GPR.r[16 + i]; - jmp_buf[11] = gp; + jmp_buf->s[i] = SWAP32(psxRegs.GPR.r[16 + i]); + jmp_buf->gp_ = SWAP32(gp); - v0 = 0; pc0 = ra; + mips_return_c(0, 15); } -void psxBios_longjmp() { // 0x14 - u32 *jmp_buf = (u32 *)Ra0; +static void longjmp_load(const struct jmp_buf_ *jmp_buf) +{ int i; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x14]); -#endif - - ra = jmp_buf[0]; /* ra */ - sp = jmp_buf[1]; /* sp */ - fp = jmp_buf[2]; /* fp */ + ra = SWAP32(jmp_buf->ra_); + sp = SWAP32(jmp_buf->sp_); + fp = SWAP32(jmp_buf->fp_); for (i = 0; i < 8; i++) // s0-s7 - psxRegs.GPR.r[16 + i] = jmp_buf[3 + i]; - gp = jmp_buf[11]; /* gp */ + psxRegs.GPR.r[16 + i] = SWAP32(jmp_buf->s[i]); + gp = SWAP32(jmp_buf->gp_);; +} - v0 = a1; pc0 = ra; +void psxBios_longjmp() { // 0x14 + struct jmp_buf_ *jmp_buf = castRam32ptr(a0); + + PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x14]); + longjmp_load(jmp_buf); + mips_return_c(a1, 15); } void psxBios_strcat() { // 0x15 @@ -967,7 +1033,7 @@ static inline int qscmp(char *a, char *b) { a0 = sa0 + (a - (char *)PSXM(sa0)); a1 = sa0 + (b - (char *)PSXM(sa0)); - softCall2(qscmpfunc); + softCall(qscmpfunc); a0 = sa0; return (s32)v0; @@ -1348,6 +1414,14 @@ void psxBios_format() { // 0x41 pc0 = ra; } +static void psxBios_SystemErrorUnresolvedException() { + if (loadRam32(0xfffc) != 0x12345678) { // prevent log flood + SysPrintf("psxBios_%s\n", biosA0n[0x40]); + storeRam32(0xfffc, 0x12345678); + } + mips_return_void_c(1000); +} + /* * long Load(char *name, struct EXEC *header); */ @@ -1542,12 +1616,36 @@ void psxBios__96_init() { // 71 pc0 = ra; } -void psxBios__96_remove() { // 72 -#ifdef PSXBIOS_LOG +static void psxBios_SysDeqIntRP_(); + +static void psxBios_DequeueCdIntr_() { + a0 = 0; a1 = 0x91d0; + psxBios_SysDeqIntRP_(); + a0 = 0; a1 = 0x91e0; + psxBios_SysDeqIntRP_(); + use_cycles(16); +} + +static void psxBios_DequeueCdIntr() { // a3 + PSXBIOS_LOG("psxBios_%s\n", biosA0n[0xa3]); + psxBios_DequeueCdIntr_(); +} + +static void psxBios_CdRemove() { // 56, 72 PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x72]); -#endif - pc0 = ra; + // CloseEvent 0xf1000000 + // CloseEvent 0xf1000001 + // CloseEvent 0xf1000002 + // CloseEvent 0xf1000003 + // CloseEvent 0xf1000004 + psxBios_DequeueCdIntr_(); + + // EnterCriticalSection - should be done at the beginning, + // but this way is much easier to implement + a0 = 1; + pc0 = A_SYSCALL; + use_cycles(30); } void psxBios_SetMem() { // 9f @@ -1854,33 +1952,34 @@ void psxBios_DisableEvent() { // 0d */ void psxBios_OpenTh() { // 0e + TCB *tcb = loadRam32ptr(A_TT_TCB); + u32 limit = loadRam32(A_TT_TCB + 4) / 0xc0u; int th; - for (th=1; th<8; th++) + for (th = 1; th < limit; th++) { - if (ThreadCB[th].status == 0) break; + if (tcb[th].status != SWAP32(0x4000)) break; } - if (th == 8) { + if (th == limit) { // Feb 2019 - Added out-of-bounds fix caught by cppcheck: // When no free TCB is found, return 0xffffffff according to Nocash doc. #ifdef PSXBIOS_LOG PSXBIOS_LOG("\t%s() WARNING! No Free TCBs found!\n", __func__); #endif - v0 = 0xffffffff; - pc0 = ra; + mips_return_c(0xffffffff, 20); return; } -#ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x0e], th); -#endif - ThreadCB[th].status = 1; - ThreadCB[th].func = a0; - ThreadCB[th].reg[29] = a1; - ThreadCB[th].reg[28] = a2; + tcb[th].status = SWAP32(0x4000); + tcb[th].mode = SWAP32(0x1000); + tcb[th].epc = SWAP32(a0); + tcb[th].reg[30] = SWAP32(a1); // fp + tcb[th].reg[29] = SWAP32(a1); // sp + tcb[th].reg[28] = SWAP32(a2); // gp - v0 = th; pc0 = ra; + mips_return_c(0xff000000 + th, 34); } /* @@ -1888,15 +1987,17 @@ void psxBios_OpenTh() { // 0e */ void psxBios_CloseTh() { // 0f - int th = a0 & 0xff; + TCB *tcb = loadRam32ptr(A_TT_TCB); + u32 limit = loadRam32(A_TT_TCB + 4) / 0xc0u; + u32 th = a0 & 0xff; #ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x0f], th); #endif /* The return value is always 1 (even if the handle was already closed). */ v0 = 1; - if (ThreadCB[th].status != 0) { - ThreadCB[th].status = 0; + if (th < limit && tcb[th].status == SWAP32(0x4000)) { + tcb[th].status = SWAP32(0x1000); } pc0 = ra; @@ -1907,27 +2008,18 @@ void psxBios_CloseTh() { // 0f */ void psxBios_ChangeTh() { // 10 - int th = a0 & 0xff; + u32 tcbBase = loadRam32(A_TT_TCB); + u32 th = a0 & 0xffff; #ifdef PSXBIOS_LOG // PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x10], th); #endif - /* The return value is always 1. */ - v0 = 1; - if (ThreadCB[th].status == 0 || CurThread == th) { - pc0 = ra; - } else { - if (ThreadCB[CurThread].status == 2) { - ThreadCB[CurThread].status = 1; - ThreadCB[CurThread].func = ra; - memcpy(ThreadCB[CurThread].reg, psxRegs.GPR.r, 32*4); - } - - memcpy(psxRegs.GPR.r, ThreadCB[th].reg, 32*4); - pc0 = ThreadCB[th].func; - ThreadCB[th].status = 2; - CurThread = th; - } + // without doing any argument checks, just issue a syscall + // (like the real bios does) + a0 = 3; + a1 = tcbBase + th * sizeof(TCB); + pc0 = A_SYSCALL; + use_cycles(15); } void psxBios_InitPAD() { // 0x12 @@ -1989,32 +2081,37 @@ void psxBios_PAD_dr() { // 16 v0 = -1; pc0 = ra; } -void psxBios_ReturnFromException() { // 17 - LoadRegs(); +static void psxBios_ReturnFromException() { // 17 + u32 tcbPtr = loadRam32(A_TT_PCB); + const TCB *tcb = loadRam32ptr(tcbPtr); + int i; + + for (i = 1; i < 32; i++) + psxRegs.GPR.r[i] = SWAP32(tcb->reg[i]); + psxRegs.GPR.n.lo = SWAP32(tcb->lo); + psxRegs.GPR.n.hi = SWAP32(tcb->hi); + psxRegs.CP0.n.SR = SWAP32(tcb->sr); - pc0 = psxRegs.CP0.n.EPC; - k0 = interrupt_r26; - if (psxRegs.CP0.n.Cause & 0x80000000) pc0 += 4; + //printf("%s %08x->%08x %u\n", __func__, pc0, tcb->epc, psxRegs.cycle); + pc0 = k0 = SWAP32(tcb->epc); psxRegs.CP0.n.SR = (psxRegs.CP0.n.SR & ~0x0f) | ((psxRegs.CP0.n.SR & 0x3c) >> 2); + use_cycles(53); + psxBranchTest(); } void psxBios_ResetEntryInt() { // 18 -#ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x18]); -#endif - jmp_int = NULL; - pc0 = ra; + storeRam32(A_EEXIT_PTR, A_EEXIT_DEF); + mips_return_void_c(5); } void psxBios_HookEntryInt() { // 19 -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x19]); -#endif + PSXBIOS_LOG("psxBios_%s %x\n", biosB0n[0x19], a0); - jmp_int = (u32*)Ra0; - pc0 = ra; + storeRam32(A_EEXIT_PTR, a0); + mips_return_void_c(3); } void psxBios_UnDeliverEvent() { // 0x20 @@ -2558,6 +2655,7 @@ void psxBios__new_card() { // 0x50 /* According to a user, this allows Final Fantasy Tactics to save/load properly */ void psxBios__get_error(void) // 55 { + PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x55]); v0 = 0; pc0 = ra; } @@ -2565,6 +2663,7 @@ void psxBios__get_error(void) // 55 void psxBios_Krom2RawAdd() { // 0x51 int i = 0; + PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x51]); const u32 table_8140[][2] = { {0x8140, 0x0000}, {0x8180, 0x0762}, {0x81ad, 0x0cc6}, {0x81b8, 0x0ca8}, {0x81c0, 0x0f00}, {0x81c8, 0x0d98}, {0x81cf, 0x10c2}, {0x81da, 0x0e6a}, @@ -2606,19 +2705,17 @@ void psxBios_Krom2RawAdd() { // 0x51 } void psxBios_GetC0Table() { // 56 -#ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x56]); -#endif + log_unhandled("GetC0Table @%08x\n", ra); - v0 = 0x674; pc0 = ra; + mips_return_c(A_C0_TABLE, 3); } void psxBios_GetB0Table() { // 57 -#ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x57]); -#endif + log_unhandled("GetB0Table @%08x\n", ra); - v0 = 0x874; pc0 = ra; + mips_return_c(A_B0_TABLE, 3); } void psxBios__card_chan() { // 0x58 @@ -2663,63 +2760,160 @@ void psxBios__card_wait() { // 5d */ void psxBios_SysEnqIntRP() { // 02 -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s: %x\n", biosC0n[0x02] ,a0); -#endif + u32 old, base = loadRam32(A_TT_ExCB); + PSXBIOS_LOG("psxBios_%s %x %x\n", biosC0n[0x02], a0, a1); - SysIntRP[a0] = a1; - - v0 = 0; pc0 = ra; + old = loadRam32(base + (a0 << 3)); + storeRam32(base + (a0 << 3), a1); + storeRam32(a1, old); + mips_return_c(0, 9); } /* * int SysDeqIntRP(int index , long *queue); */ -void psxBios_SysDeqIntRP() { // 03 -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s: %x\n", biosC0n[0x03], a0); -#endif +static void psxBios_SysDeqIntRP_() { // 03 + u32 ptr, next, base = loadRam32(A_TT_ExCB); + u32 lim = 0, ret = 0; + + // as in original: no arg checks of any kind, bug if a1 == 0 + ptr = loadRam32(base + (a0 << 3)); + while (ptr) { + next = loadRam32(ptr); + if (ptr == a1) { + storeRam32(base + (a0 << 3), next); + ret = ptr; + use_cycles(6); + break; + } + while (next && next != a1 && lim++ < 100) { + ptr = next; + next = loadRam32(ptr); + use_cycles(8); + } + if (next == a1) { + next = loadRam32(next); + storeRam32(ptr, next); + ret = ptr; + use_cycles(6); + } + break; + } + if (lim == 100) + PSXBIOS_LOG("bad chain %u %x\n", a0, base); - SysIntRP[a0] = 0; + mips_return_c(ret, 12); +} - v0 = 0; pc0 = ra; +static void psxBios_SysDeqIntRP() { // 03 + PSXBIOS_LOG("psxBios_%s %x %x\n", biosC0n[0x03], a0, a1); + psxBios_SysDeqIntRP_(); } void psxBios_ChangeClearRCnt() { // 0a - u32 *ptr; + u32 ret; -#ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s: %x, %x\n", biosC0n[0x0a], a0, a1); -#endif - ptr = (u32*)PSXM((a0 << 2) + 0x8600); - v0 = *ptr; - *ptr = a1; - -// psxRegs.CP0.n.SR|= 0x404; - pc0 = ra; + ret = loadRam32(A_RCNT_VBL_ACK + (a0 << 2)); + storeRam32(A_RCNT_VBL_ACK + (a0 << 2), a1); + mips_return_c(ret, 8); } void psxBios_dummy() { -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("unk %x call: %x\n", pc0 & 0x1fffff, t1); -#endif - pc0 = ra; + u32 pc = (pc0 & 0x1fffff) - 4; + char **ntab = pc == 0xa0 ? biosA0n : pc == 0xb0 ? biosB0n + : pc == 0xc0 ? biosC0n : NULL; + PSXBIOS_LOG("unk %x call: %x ra=%x (%s)\n", + pc, t1, ra, ntab ? ntab[t1 & 0xff] : "???"); + (void)pc; (void)ntab; + mips_return_c(0, 100); } void (*biosA0[256])(); -void (*biosB0[256])(); -void (*biosC0[256])(); +// C0 and B0 overlap (end of C0 is start of B0) +void (*biosC0[256+128])(); +void (**biosB0)() = biosC0 + 128; #include "sjisfont.h" +void setup_mips_code() +{ + u32 *ptr; + ptr = (u32 *)&psxM[A_SYSCALL]; + ptr[0x00/4] = SWAPu32(0x0000000c); // syscall 0 + ptr[0x04/4] = SWAPu32(0x03e00008); // jr $ra + ptr[0x08/4] = SWAPu32(0x00000000); // nop + + ptr = (u32 *)&psxM[A_EXCEPTION]; + memset(ptr, 0, 0xc0); // nops (to be patched by games sometimes) + ptr[0x10/4] = SWAPu32(0x8c1a0108); // lw $k0, (0x108) // PCB + ptr[0x14/4] = SWAPu32(0x00000000); // nop + ptr[0x18/4] = SWAPu32(0x8f5a0000); // lw $k0, ($k0) // TCB + ptr[0x1c/4] = SWAPu32(0x00000000); // nop + ptr[0x20/4] = SWAPu32(0x275a0008); // addiu $k0, $k0, 8 // regs + ptr[0x24/4] = SWAPu32(0xaf5f007c); // sw $ra, 0x7c($k0) + ptr[0x28/4] = SWAPu32(0xaf410004); // sw $at, 0x04($k0) + ptr[0x2c/4] = SWAPu32(0xaf420008); // sw $v0, 0x08($k0) + ptr[0x30/4] = SWAPu32(0xaf43000c); // sw $v1, 0x0c($k0) + + ptr[0x60/4] = SWAPu32(0x40037000); // mfc0 $v1, EPC + ptr[0x64/4] = SWAPu32(0x40026800); // mfc0 $v0, Cause + ptr[0x68/4] = SWAPu32(0x24630004); // addiu $v1, $v1, 4 + ptr[0x6c/4] = SWAPu32(0xaf430080); // sw $v1, 0x80($k0) + + ptr[0xb0/4] = HLEOP(hleop_exception); +} + +static const struct { + u32 addr; + enum hle_op op; +} chainfns[] = { + { 0xbfc050a4, hleop_exc0_0_1 }, + { 0xbfc04fbc, hleop_exc0_0_2 }, + { 0xbfc0506c, hleop_exc0_1_1 }, + { 0xbfc04dec, hleop_exc0_1_2 }, + { 0x1a00, hleop_exc0_2_2 }, + { 0x19c8, hleop_exc1_0_1 }, + { 0x18bc, hleop_exc1_0_2 }, + { 0x1990, hleop_exc1_1_1 }, + { 0x1858, hleop_exc1_1_2 }, + { 0x1958, hleop_exc1_2_1 }, + { 0x17f4, hleop_exc1_2_2 }, + { 0x1920, hleop_exc1_3_1 }, + { 0x1794, hleop_exc1_3_2 }, + { 0x2458, hleop_exc3_0_2 }, +}; + +static int chain_hle_op(u32 handler) +{ + size_t i; + + for (i = 0; i < sizeof(chainfns) / sizeof(chainfns[0]); i++) + if (chainfns[i].addr == handler) + return chainfns[i].op; + return hleop_dummy; +} + +static void write_chain(u32 *d, u32 next, u32 handler1, u32 handler2) +{ + d[0] = SWAPu32(next); + d[1] = SWAPu32(handler1); + d[2] = SWAPu32(handler2); + + // install hle traps + PSXMu32ref(handler1) = HLEOP(chain_hle_op(handler1)); + PSXMu32ref(handler2) = HLEOP(chain_hle_op(handler2)); +} + void psxBiosInit() { u32 base, size; - u32 *ptr; + u32 *ptr, *ram32; int i; uLongf len; + memset(psxM, 0, 0x10000); for(i = 0; i < 256; i++) { biosA0[i] = NULL; biosB0[i] = NULL; @@ -2800,7 +2994,7 @@ void psxBiosInit() { biosA0[0x3b] = psxBios_getchar; biosA0[0x3c] = psxBios_putchar; //biosA0[0x3d] = psxBios_gets; - //biosA0[0x40] = psxBios_sys_a0_40; + biosA0[0x40] = psxBios_SystemErrorUnresolvedException; //biosA0[0x41] = psxBios_LoadTest; biosA0[0x42] = psxBios_Load; biosA0[0x43] = psxBios_Exec; @@ -2822,7 +3016,7 @@ void psxBiosInit() { //biosA0[0x53] = psxBios_sys_a0_53; //biosA0[0x54] = psxBios__96_init_a54; //biosA0[0x55] = psxBios__bu_init_a55; - //biosA0[0x56] = psxBios__96_remove_a56; + biosA0[0x56] = psxBios_CdRemove; //biosA0[0x57] = psxBios_sys_a0_57; //biosA0[0x58] = psxBios_sys_a0_58; //biosA0[0x59] = psxBios_sys_a0_59; @@ -2850,7 +3044,7 @@ void psxBiosInit() { //biosA0[0x6f] = psxBios_dev_card_6f; biosA0[0x70] = psxBios__bu_init; biosA0[0x71] = psxBios__96_init; - biosA0[0x72] = psxBios__96_remove; + biosA0[0x72] = psxBios_CdRemove; //biosA0[0x73] = psxBios_sys_a0_73; //biosA0[0x74] = psxBios_sys_a0_74; //biosA0[0x75] = psxBios_sys_a0_75; @@ -2880,10 +3074,10 @@ void psxBiosInit() { //biosA0[0x8d] = psxBios_sys_a0_8d; //biosA0[0x8e] = psxBios_sys_a0_8e; //biosA0[0x8f] = psxBios_sys_a0_8f; - //biosA0[0x90] = psxBios_sys_a0_90; - //biosA0[0x91] = psxBios_sys_a0_91; - //biosA0[0x92] = psxBios_sys_a0_92; - //biosA0[0x93] = psxBios_sys_a0_93; + biosA0[0x90] = hleExc0_1_2; + biosA0[0x91] = hleExc0_0_2; + biosA0[0x92] = hleExc0_1_1; + biosA0[0x93] = hleExc0_0_1; //biosA0[0x94] = psxBios_sys_a0_94; //biosA0[0x95] = psxBios_sys_a0_95; //biosA0[0x96] = psxBios_AddCDROMDevice; @@ -2899,7 +3093,7 @@ void psxBiosInit() { //biosA0[0xa0] = psxBios__boot; //biosA0[0xa1] = psxBios_SystemError; //biosA0[0xa2] = psxBios_EnqueueCdIntr; - //biosA0[0xa3] = psxBios_DequeueCdIntr; + biosA0[0xa3] = psxBios_DequeueCdIntr; //biosA0[0xa4] = psxBios_sys_a0_a4; //biosA0[0xa5] = psxBios_ReadSector; biosA0[0xa6] = psxBios_get_cd_status; @@ -3053,18 +3247,7 @@ void psxBiosInit() { SwEV = EventCB + 32 * 4; ThEV = EventCB + 32 * 5; - ptr = (u32 *)&psxM[0x0874]; // b0 table - ptr[0] = SWAPu32(0x4c54 - 0x884); - - ptr = (u32 *)&psxM[0x0674]; // c0 table - ptr[6] = SWAPu32(0xc80); - - memset(SysIntRP, 0, sizeof(SysIntRP)); - memset(ThreadCB, 0, sizeof(ThreadCB)); - ThreadCB[0].status = 2; // main thread - pad_stopped = 1; - jmp_int = NULL; pad_buf = NULL; pad_buf1 = NULL; pad_buf2 = NULL; @@ -3077,33 +3260,6 @@ void psxBiosInit() { memset(FDesc, 0, sizeof(FDesc)); card_active_chan = 0; - psxMu32ref(0x0150) = SWAPu32(0x160); - psxMu32ref(0x0154) = SWAPu32(0x320); - psxMu32ref(0x0160) = SWAPu32(0x248); - strcpy((char *)&psxM[0x248], "bu"); -/* psxMu32ref(0x0ca8) = SWAPu32(0x1f410004); - psxMu32ref(0x0cf0) = SWAPu32(0x3c020000); - psxMu32ref(0x0cf4) = SWAPu32(0x2442641c); - psxMu32ref(0x09e0) = SWAPu32(0x43d0); - psxMu32ref(0x4d98) = SWAPu32(0x946f000a); -*/ - // opcode HLE - psxRu32ref(0x0000) = SWAPu32((0x3b << 26) | 4); - /* Whatever this does, it actually breaks CTR, even without the uninitiliazed memory patch. - Normally games shouldn't read from address 0 yet they do. See explanation below in details. */ - //psxMu32ref(0x0000) = SWAPu32((0x3b << 26) | 0); - psxMu32ref(0x00a0) = SWAPu32((0x3b << 26) | 1); - psxMu32ref(0x00b0) = SWAPu32((0x3b << 26) | 2); - psxMu32ref(0x00c0) = SWAPu32((0x3b << 26) | 3); - psxMu32ref(0x4c54) = SWAPu32((0x3b << 26) | 0); - psxMu32ref(0x8000) = SWAPu32((0x3b << 26) | 5); - psxMu32ref(0x07a0) = SWAPu32((0x3b << 26) | 0); - psxMu32ref(0x0884) = SWAPu32((0x3b << 26) | 0); - psxMu32ref(0x0894) = SWAPu32((0x3b << 26) | 0); - - // initial stack pointer for BIOS interrupt - psxMu32ref(0x6c80) = SWAPu32(0x000085c8); - // initial RNG seed psxMu32ref(0x9010) = SWAPu32(0xac20cc00); @@ -3116,8 +3272,6 @@ void psxBiosInit() { // memory size 2 MB psxHu32ref(0x1060) = SWAPu32(0x00000b88); - hleSoftCall = FALSE; - /* Some games like R-Types, CTR, Fade to Black read from adress 0x00000000 due to uninitialized pointers. See Garbage Area at Address 00000000h in Nocash PSX Specfications for more information. Here are some examples of games not working with this fix in place : @@ -3125,19 +3279,114 @@ void psxBiosInit() { Crash Team Racing will softlock after the Sony logo. */ - psxMu32ref(0x0000) = SWAPu32(0x00000003); - /* - But overwritten by 00000003h after soon. - psxMu32ref(0x0000) = SWAPu32(0x00001A3C); - */ - psxMu32ref(0x0004) = SWAPu32(0x800C5A27); - psxMu32ref(0x0008) = SWAPu32(0x08000403); - psxMu32ref(0x000C) = SWAPu32(0x00000000); + ram32 = (u32 *)psxM; + ram32[0x0000/4] = SWAPu32(0x00000003); // lui $k0, 0 (overwritten by 3) + ram32[0x0004/4] = SWAPu32(0x275a0000 + A_EXCEPTION); // addiu $k0, $k0, 0xc80 + ram32[0x0008/4] = SWAPu32(0x03400008); // jr $k0 + ram32[0x000c/4] = SWAPu32(0x00000000); // nop + + ram32[0x0060/4] = SWAPu32(0x00000002); // ram size? + ram32[0x0068/4] = SWAPu32(0x000000ff); // unknown + + ram32[0x0080/4] = SWAPu32(0x3c1a0000); // lui $k0, 0 // exception vector + ram32[0x0084/4] = SWAPu32(0x275a0000 + A_EXCEPTION); // addiu $k0, $k0, 0xc80 + ram32[0x0088/4] = SWAPu32(0x03400008); // jr $k0 + ram32[0x008c/4] = SWAPu32(0x00000000); // nop + + ram32[0x00a0/4] = HLEOP(hleop_a0); + ram32[0x00b0/4] = HLEOP(hleop_b0); + ram32[0x00c0/4] = HLEOP(hleop_c0); + + // "table of tables". Some games modify it + assert(A_TT_ExCB == 0x0100); + ram32[0x0100/4] = SWAPu32(0x0000e004); // ExCB - exception chains + ram32[0x0104/4] = SWAPu32(0x00000020); // ExCB size + ram32[0x0108/4] = SWAPu32(0x0000e1ec); // PCB - process control + ram32[0x010c/4] = SWAPu32(0x00000004); // PCB size + ram32[0x0110/4] = SWAPu32(0x0000e1f4); // TCB - thread control + ram32[0x0114/4] = SWAPu32(0x00000300); // TCB size + ram32[0x0120/4] = SWAPu32(0x0000e028); // EvCB - event control + ram32[0x0124/4] = SWAPu32(0x000001c0); // EvCB size + ram32[0x0140/4] = SWAPu32(0x00008648); // FCB - file control + ram32[0x0144/4] = SWAPu32(0x000002c0); // FCB size + ram32[0x0150/4] = SWAPu32(0x00006ee0); // DCB - device control + ram32[0x0154/4] = SWAPu32(0x00000320); // DCB size + + ram32[0xe000/4] = SWAPu32(0x00000020); // SysMalloc block size + ram32[0xe004/4] = SWAPu32(0x000091e0); // chain0 + ram32[0xe00c/4] = SWAPu32(0x00006d88); // chain1 + ram32[0xe014/4] = SWAPu32(0x00000000); // chain2 + ram32[0xe01c/4] = SWAPu32(0x00006d98); // chain3 + + ram32[0xe1ec/4] = SWAPu32(0x0000e1f4); // TCB + ram32[0xe1f0/4] = SWAPu32(0x00000300); // SysMalloc block size + ram32[0xe1f4/4] = SWAPu32(0x00004000); // first TCB + + ram32[0x6ee0/4] = SWAPu32(0x0000eff0); // DCB + strcpy((char *)&ram32[0xeff0/4], "bu"); + + // default exception handler chains + write_chain(&ram32[0x91e0/4], 0x91d0, 0xbfc050a4, 0xbfc04fbc); // chain0.e0 + write_chain(&ram32[0x91d0/4], 0x6da8, 0xbfc0506c, 0xbfc04dec); // chain0.e1 + write_chain(&ram32[0x6da8/4], 0, 0, 0x1a00); // chain0.e2 + write_chain(&ram32[0x6d88/4], 0x6d78, 0x19c8, 0x18bc); // chain1.e0 + write_chain(&ram32[0x6d78/4], 0x6d68, 0x1990, 0x1858); // chain1.e1 + write_chain(&ram32[0x6d68/4], 0x6d58, 0x1958, 0x17f4); // chain1.e2 + write_chain(&ram32[0x6d58/4], 0, 0x1920, 0x1794); // chain1.e3 + write_chain(&ram32[0x6d98/4], 0, 0, 0x2458); // chain3.e0 + + setup_mips_code(); + + // fill the api jumptables with fake entries as some games patch them + // (or rather the funcs listed there) + ptr = (u32 *)&psxM[A_A0_TABLE]; + for (i = 0; i < 256; i++) + ptr[i] = SWAP32(0x1000); + + ptr = (u32 *)&psxM[A_B0_TABLE]; + for (i = 0; i < 256; i++) + ptr[i] = SWAP32(0x2000); + // B(5b) is special because games patch (sometimes even jump to) + // code at fixed offsets from it, nocash lists offsets: + // patch: +3d8, +4dc, +594, +62c, +9c8, +1988 + // call: +7a0=4b70, +884=4c54, +894=4c64 + ptr[0x5b] = SWAP32(0x43d0); + ram32[0x4b70/4] = SWAP32(0x03e00008); // jr $ra + ram32[0x4c54/4] = SWAP32(0x03e00008); // jr $ra + ram32[0x4c64/4] = SWAP32(0x03e00008); // jr $ra + + ptr = (u32 *)&psxM[A_C0_TABLE]; + for (i = 0; i < 256/2; i++) + ptr[i] = SWAP32(0x3000); + ptr[6] = SWAP32(A_EXCEPTION); + + // more HLE traps + ram32[0x1000/4] = HLEOP(hleop_dummy); + ram32[0x2000/4] = HLEOP(hleop_dummy); + ram32[0x3000/4] = HLEOP(hleop_dummy); + ram32[0x4c54/4] = HLEOP(hleop_dummy); // for B12_InitPad? + ram32[0x8000/4] = HLEOP(hleop_execret); + + ram32[A_EEXIT_PTR/4] = SWAP32(A_EEXIT_DEF); + ram32[A_EXC_SP/4] = SWAP32(A_EXC_STACK); + ram32[A_RCNT_VBL_ACK/4 + 0] = SWAP32(1); + ram32[A_RCNT_VBL_ACK/4 + 1] = SWAP32(1); + ram32[A_RCNT_VBL_ACK/4 + 2] = SWAP32(1); + ram32[A_RCNT_VBL_ACK/4 + 3] = SWAP32(1); + + psxRegs.CP0.n.SR &= ~0x400000; // use ram vector } void psxBiosShutdown() { } +void psxBiosCnfLoaded(u32 tcbs, u32 events) { + if (tcbs > 4) + log_unhandled("FIXME: TCB = %x\n", tcbs); + if (events > 16) + log_unhandled("FIXME: EVENT = %x\n", tcbs); +} + #define psxBios_PADpoll(pad) { \ PAD##pad##_startPoll(pad); \ pad_buf##pad[0] = 0; \ @@ -3154,179 +3403,293 @@ void psxBiosShutdown() { } \ } -void biosInterrupt() { +static void biosPadHLE() { int i, bufcount; -// if (psxHu32(0x1070) & 0x1) { // Vsync - if (pad_buf != NULL) { - u32 *buf = (u32*)pad_buf; - - if (!Config.UseNet) { - PAD1_startPoll(1); - if (PAD1_poll(0x42) == 0x23) { - PAD1_poll(0); - *buf = PAD1_poll(0) << 8; - *buf |= PAD1_poll(0); - PAD1_poll(0); - *buf &= ~((PAD1_poll(0) > 0x20) ? 1 << 6 : 0); - *buf &= ~((PAD1_poll(0) > 0x20) ? 1 << 7 : 0); - } else { - PAD1_poll(0); - *buf = PAD1_poll(0) << 8; - *buf|= PAD1_poll(0); - } + if (pad_buf != NULL) { + u32 *buf = (u32*)pad_buf; + + PAD1_startPoll(1); + if (PAD1_poll(0x42) == 0x23) { + PAD1_poll(0); + *buf = PAD1_poll(0) << 8; + *buf |= PAD1_poll(0); + PAD1_poll(0); + *buf &= ~((PAD1_poll(0) > 0x20) ? 1 << 6 : 0); + *buf &= ~((PAD1_poll(0) > 0x20) ? 1 << 7 : 0); + } else { + PAD1_poll(0); + *buf = PAD1_poll(0) << 8; + *buf|= PAD1_poll(0); + } - PAD2_startPoll(2); - if (PAD2_poll(0x42) == 0x23) { - PAD2_poll(0); - *buf |= PAD2_poll(0) << 24; - *buf |= PAD2_poll(0) << 16; - PAD2_poll(0); - *buf &= ~((PAD2_poll(0) > 0x20) ? 1 << 22 : 0); - *buf &= ~((PAD2_poll(0) > 0x20) ? 1 << 23 : 0); - } else { - PAD2_poll(0); - *buf |= PAD2_poll(0) << 24; - *buf |= PAD2_poll(0) << 16; - } - } else { - u16 data; + PAD2_startPoll(2); + if (PAD2_poll(0x42) == 0x23) { + PAD2_poll(0); + *buf |= PAD2_poll(0) << 24; + *buf |= PAD2_poll(0) << 16; + PAD2_poll(0); + *buf &= ~((PAD2_poll(0) > 0x20) ? 1 << 22 : 0); + *buf &= ~((PAD2_poll(0) > 0x20) ? 1 << 23 : 0); + } else { + PAD2_poll(0); + *buf |= PAD2_poll(0) << 24; + *buf |= PAD2_poll(0) << 16; + } + } + if (!pad_stopped) { + if (pad_buf1) { + psxBios_PADpoll(1); + } - PAD1_startPoll(1); - PAD1_poll(0x42); - PAD1_poll(0); - data = PAD1_poll(0) << 8; - data |= PAD1_poll(0); + if (pad_buf2) { + psxBios_PADpoll(2); + } + } +} + +static void handle_chain_x_x_1(u32 enable, u32 irqbit) +{ + use_cycles(10); + if (enable) { + psxHwWrite16(0x1f801070, ~(1u << irqbit)); + psxBios_ReturnFromException(); + } + else + pc0 = ra; +} - if (NET_sendPadData(&data, 2) == -1) - netError(); +// hleExc0_{0,1}* are usually removed by A(56)/A(72) on the game's startup, +// so this is only partially implemented +void hleExc0_0_1() // A(93h) - CdromDmaIrqFunc2 +{ + u32 cdrom_dma_ack_enable = 1; // a000b93c + handle_chain_x_x_1(cdrom_dma_ack_enable, 3); // IRQ3 DMA +} - if (NET_recvPadData(&((u16*)buf)[0], 1) == -1) - netError(); - if (NET_recvPadData(&((u16*)buf)[1], 2) == -1) - netError(); - } - } - if (Config.UseNet && pad_buf1 != NULL && pad_buf2 != NULL) { - psxBios_PADpoll(1); +void hleExc0_0_2() // A(91h) - CdromDmaIrqFunc1 +{ + u32 ret = 0; + //PSXBIOS_LOG("%s\n", __func__); - if (NET_sendPadData(pad_buf1, i) == -1) - netError(); + if (psxHu32(0x1074) & psxHu32(0x1070) & 8) { // IRQ3 DMA + psxHwWrite32(0x1f8010f4, (psxHu32(0x10f4) & 0xffffff) | 0x88000000); + //if (--cdrom_irq_counter == 0) // 0xa0009180 + // DeliverEvent(); // 0xf0000003, 0x10 + use_cycles(22); + ret = 1; + } + mips_return_c(ret, 20); +} - if (NET_recvPadData(pad_buf1, 1) == -1) - netError(); - if (NET_recvPadData(pad_buf2, 2) == -1) - netError(); - } else { - if (!pad_stopped) { - if (pad_buf1) { - psxBios_PADpoll(1); - } +void hleExc0_1_1() // A(92h) - CdromIoIrqFunc2 +{ + u32 cdrom_irq_ack_enable = 1; // a000b938 + handle_chain_x_x_1(cdrom_irq_ack_enable, 2); // IRQ2 cdrom +} - if (pad_buf2) { - psxBios_PADpoll(2); - } - } - } +void hleExc0_1_2() // A(90h) - CdromIoIrqFunc1 +{ + u32 ret = 0; + if (psxHu32(0x1074) & psxHu32(0x1070) & 4) { // IRQ2 cdrom + PSXBIOS_LOG("%s TODO\n", __func__); + ret = 1; + } + mips_return_c(ret, 20); +} - if (psxHu32(0x1070) & 0x1) { // Vsync - if (RcEV[3][1].status == EvStACTIVE) { - softCall(RcEV[3][1].fhandler); -// hwWrite32(0x1f801070, ~(1)); +void hleExc0_2_2_syscall() // not in any A/B/C table +{ + u32 code = (psxRegs.CP0.n.Cause & 0x3c) >> 2; + u32 tcbPtr = loadRam32(A_TT_PCB); + TCB *tcb = loadRam32ptr(tcbPtr); + + if (code != R3000E_Syscall) { + if (code != 0) { + // DeliverEvent(); // 0xf0000010, 0x1000 + psxBios_SystemErrorUnresolvedException(); } + mips_return_c(0, 17); + return; } - if (psxHu32(0x1070) & 0x70) { // Rcnt 0,1,2 - int i; + //printf("%s c=%d a0=%d\n", __func__, code, a0); + tcb->epc += SWAP32(4); + switch (a0) { + case 0: // noop + break; - for (i = 0; i < 3; i++) { - if (psxHu32(0x1070) & (1 << (i + 4))) { - if (RcEV[i][1].status == EvStACTIVE) { - softCall(RcEV[i][1].fhandler); - } - psxHwWrite32(0x1f801070, ~(1 << (i + 4))); - } + case 1: { // EnterCritical - disable irqs + u32 was_enabled = ((SWAP32(tcb->sr) & 0x404) == 0x404); + tcb->reg[2] = SWAP32(was_enabled); + tcb->sr &= SWAP32(~0x404); + break; + } + case 2: // ExitCritical - enable irqs + tcb->sr |= SWAP32(0x404); + break; + + case 3: { // ChangeThreadSubFunction + u32 tcbPtr = loadRam32(A_TT_PCB); + storeRam32(tcbPtr, a1); + break; } + default: + // DeliverEvent(); // 0xf0000010, 0x4000 + break; } + use_cycles(30); + psxBios_ReturnFromException(); } -void psxBiosException() { - int i; +void hleExc1_0_1(void) +{ + u32 vbl_irq_ack_enable = loadRam32(A_RCNT_VBL_ACK + 0x0c); // 860c + handle_chain_x_x_1(vbl_irq_ack_enable, 0); // IRQ0 vblank +} - switch (psxRegs.CP0.n.Cause & 0x3c) { - case 0x00: // Interrupt - interrupt_r26=psxRegs.CP0.n.EPC; -#ifdef PSXCPU_LOG -// PSXCPU_LOG("interrupt\n"); -#endif - SaveRegs(); +static void handle_chain_1_x_2(u32 ev_index, u32 irqbit) +{ + u32 ret = 0; + if (psxHu32(0x1074) & psxHu32(0x1070) & (1u << irqbit)) { + // DeliverEvent 0xf2000000 + ev_index, 2 + if (RcEV[ev_index][1].status == EvStACTIVE) { + softCall(RcEV[ev_index][1].fhandler); + } + ret = 1; + } + mips_return_c(ret, 22); +} - sp = psxMu32(0x6c80); // create new stack for interrupt handlers +void hleExc1_0_2(void) +{ + handle_chain_1_x_2(3, 0); // IRQ0 vblank +} - biosInterrupt(); +void hleExc1_1_1(void) +{ + u32 rcnt_irq_ack_enable = loadRam32(A_RCNT_VBL_ACK + 0x08); // 8608 + handle_chain_x_x_1(rcnt_irq_ack_enable, 6); // IRQ6 rcnt2 +} - for (i = 0; i < 8; i++) { - if (SysIntRP[i]) { - u32 *queue = (u32 *)PSXM(SysIntRP[i]); +void hleExc1_1_2(void) +{ + handle_chain_1_x_2(2, 6); // IRQ6 rcnt2 +} - s0 = queue[2]; - softCall(queue[1]); - } - } +void hleExc1_2_1(void) +{ + u32 rcnt_irq_ack_enable = loadRam32(A_RCNT_VBL_ACK + 0x04); // 8604 + handle_chain_x_x_1(rcnt_irq_ack_enable, 5); // IRQ5 rcnt1 +} - if (jmp_int != NULL) { - int i; +void hleExc1_2_2(void) +{ + handle_chain_1_x_2(1, 5); // IRQ5 rcnt1 +} - psxHwWrite32(0x1f801070, 0xffffffff); +void hleExc1_3_1(void) +{ + u32 rcnt_irq_ack_enable = loadRam32(A_RCNT_VBL_ACK + 0x00); // 8600 + handle_chain_x_x_1(rcnt_irq_ack_enable, 4); // IRQ4 rcnt0 +} - ra = jmp_int[0]; - sp = jmp_int[1]; - fp = jmp_int[2]; - for (i = 0; i < 8; i++) // s0-s7 - psxRegs.GPR.r[16 + i] = jmp_int[3 + i]; - gp = jmp_int[11]; +void hleExc1_3_2(void) +{ + handle_chain_1_x_2(0, 4); // IRQ4 rcnt0 +} - v0 = 1; - pc0 = ra; - return; - } - psxHwWrite16(0x1f801070, 0); - break; +void hleExc3_0_2_defint(void) +{ + static const struct { + u8 ev, irqbit; + } tab[] = { + { 3, 2 }, // cdrom + { 9, 9 }, // spu + { 2, 1 }, // gpu + { 10, 10 }, // io + { 11, 8 }, // sio + { 1, 0 }, // vbl + { 5, 4 }, // rcnt0 + { 6, 5 }, // rcnt1 + { 6, 6 }, // rcnt2 (bug) + { 8, 7 }, // sio rx + { 4, 3 }, // sio + }; + size_t i; + for (i = 0; i < sizeof(tab) / sizeof(tab[0]); i++) { + if (psxHu32(0x1074) & psxHu32(0x1070) & (1u << tab[i].irqbit)) { + // DeliverEvent 0xf0000000 + ev, 0x1000 + use_cycles(7); + } - case 0x20: // Syscall -#ifdef PSXCPU_LOG - PSXCPU_LOG("syscall exp %x\n", a0); -#endif - switch (a0) { - case 1: // EnterCritical - disable irq's - /* Fixes Medievil 2 not loading up new game, Digimon World not booting up and possibly others */ - v0 = (psxRegs.CP0.n.SR & 0x404) == 0x404; - psxRegs.CP0.n.SR &= ~0x404; - break; + } + mips_return_c(0, 11 + 7*11 + 7*11 + 12); +} - case 2: // ExitCritical - enable irq's - psxRegs.CP0.n.SR |= 0x404; - break; - /* Normally this should cover SYS(00h, SYS(04h but they don't do anything relevant so... */ - default: - break; - } - pc0 = psxRegs.CP0.n.EPC + 4; +void psxBiosException() { + u32 tcbPtr = loadRam32(A_TT_PCB); + u32 *chains = loadRam32ptr(A_TT_ExCB); + TCB *tcb = loadRam32ptr(tcbPtr); + u32 ptr, *chain; + int c, lim; + int i; - psxRegs.CP0.n.SR = (psxRegs.CP0.n.SR & ~0x0f) | ((psxRegs.CP0.n.SR & 0x3c) >> 2); - return; + // save the regs + // $at, $v0, $v1 already saved by the mips code at A_EXCEPTION + for (i = 4; i < 32; i++) { + if (i == 26) // $k0 + continue; + tcb->reg[i] = SWAP32(psxRegs.GPR.r[i]); + } + tcb->lo = SWAP32(psxRegs.GPR.n.lo); + tcb->hi = SWAP32(psxRegs.GPR.n.hi); + tcb->epc = SWAP32(psxRegs.CP0.n.EPC); + tcb->sr = SWAP32(psxRegs.CP0.n.SR); + tcb->cause = SWAP32(psxRegs.CP0.n.Cause); + sp = fp = loadRam32(A_EXC_SP); + gp = A_EXC_GP; + use_cycles(46); + + // do the chains (always 4) + for (c = lim = 0; c < 4; c++) { + if (chains[c * 2] == 0) + continue; + ptr = SWAP32(chains[c * 2]); + for (; ptr && lim < 100; ptr = SWAP32(chain[0])) { + chain = castRam32ptr(ptr); + use_cycles(14); + lim++; + if (chain[2] == 0) + continue; + softCallInException(SWAP32(chain[2])); + if (returned_from_exception()) + return; - default: -#ifdef PSXCPU_LOG - PSXCPU_LOG("unknown bios exception!\n"); -#endif - break; + if (v0 == 0 || chain[1] == 0) + continue; + softCallInException(SWAP32(chain[1])); + if (returned_from_exception()) + return; + } } + assert(lim < 100); - pc0 = psxRegs.CP0.n.EPC; - if (psxRegs.CP0.n.Cause & 0x80000000) pc0+=4; + // TODO make this a chain entry + if (psxHu32(0x1070) & 1) + biosPadHLE(); - psxRegs.CP0.n.SR = (psxRegs.CP0.n.SR & ~0x0f) | ((psxRegs.CP0.n.SR & 0x3c) >> 2); + // return from exception (custom or default) + use_cycles(23); + ptr = loadRam32(A_EEXIT_PTR); + if (ptr != A_EEXIT_DEF) { + const struct jmp_buf_ *jmp_buf = castRam32ptr(ptr); + longjmp_load(jmp_buf); + v0 = 1; + pc0 = ra; + return; + } + psxBios_ReturnFromException(); } #define bfreeze(ptr, size) { \ @@ -3352,7 +3715,6 @@ void psxBiosException() { void psxBiosFreeze(int Mode) { u32 base = 0x40000; - bfreezepsxMptr(jmp_int, u32); bfreezepsxMptr(pad_buf, int); bfreezepsxMptr(pad_buf1, char); bfreezepsxMptr(pad_buf2, char); @@ -3360,9 +3722,7 @@ void psxBiosFreeze(int Mode) { bfreezel(&pad_buf1len); bfreezel(&pad_buf2len); bfreezes(regs); - bfreezes(SysIntRP); bfreezel(&CardState); - bfreezes(ThreadCB); bfreezel(&CurThread); bfreezes(FDesc); bfreezel(&card_active_chan); diff --git a/libpcsxcore/psxbios.h b/libpcsxcore/psxbios.h index 635b35a59..fdbf2e505 100644 --- a/libpcsxcore/psxbios.h +++ b/libpcsxcore/psxbios.h @@ -38,12 +38,11 @@ void psxBiosInit(); void psxBiosShutdown(); void psxBiosException(); void psxBiosFreeze(int Mode); +void psxBiosCnfLoaded(u32 tcbs, u32 events); extern void (*biosA0[256])(); -extern void (*biosB0[256])(); -extern void (*biosC0[256])(); - -extern boolean hleSoftCall; +extern void (**biosB0)(); +extern void (*biosC0[256+128])(); #ifdef __cplusplus } diff --git a/libpcsxcore/psxcommon.c b/libpcsxcore/psxcommon.c index 8313304c6..ada81a88a 100644 --- a/libpcsxcore/psxcommon.c +++ b/libpcsxcore/psxcommon.c @@ -52,10 +52,6 @@ void EmuShutdown() { } void EmuUpdate() { - // Do not allow hotkeys inside a softcall from HLE BIOS - if (!Config.HLE || !hleSoftCall) - SysUpdate(); - ApplyCheats(); // reamed hack diff --git a/libpcsxcore/psxhle.c b/libpcsxcore/psxhle.c index 7ca81b477..5ef484032 100644 --- a/libpcsxcore/psxhle.c +++ b/libpcsxcore/psxhle.c @@ -30,7 +30,10 @@ #endif static void hleDummy() { + log_unhandled("hleDummy called @%08x ra=%08x\n", + psxRegs.pc - 4, psxRegs.GPR.n.ra); psxRegs.pc = psxRegs.GPR.n.ra; + psxRegs.cycle += 1000; psxBranchTest(); } @@ -95,8 +98,14 @@ static void hleExecRet() { psxRegs.pc = psxRegs.GPR.n.ra; } -void (* const psxHLEt[8])() = { +void (* const psxHLEt[22])() = { hleDummy, hleA0, hleB0, hleC0, - hleBootstrap, hleExecRet, - hleDummy, hleDummy + hleBootstrap, hleExecRet, psxBiosException, hleDummy, + hleExc0_0_1, hleExc0_0_2, + hleExc0_1_1, hleExc0_1_2, hleExc0_2_2_syscall, + hleExc1_0_1, hleExc1_0_2, + hleExc1_1_1, hleExc1_1_2, + hleExc1_2_1, hleExc1_2_2, + hleExc1_3_1, hleExc1_3_2, + hleExc3_0_2_defint, }; diff --git a/libpcsxcore/psxhle.h b/libpcsxcore/psxhle.h index 04126345e..5535ab008 100644 --- a/libpcsxcore/psxhle.h +++ b/libpcsxcore/psxhle.h @@ -28,7 +28,28 @@ extern "C" { #include "r3000a.h" #include "plugins.h" -extern void (* const psxHLEt[8])(); +void hleExc0_0_1(); void hleExc0_0_2(); +void hleExc0_1_1(); void hleExc0_1_2(); +void hleExc0_2_2_syscall(); +void hleExc1_0_1(); void hleExc1_0_2(); +void hleExc1_1_1(); void hleExc1_1_2(); +void hleExc1_2_1(); void hleExc1_2_2(); +void hleExc1_3_1(); void hleExc1_3_2(); +void hleExc3_0_2_defint(); + +enum hle_op { + hleop_dummy = 0, hleop_a0, hleop_b0, hleop_c0, + hleop_bootstrap, hleop_execret, hleop_exception, hleop_unused, + hleop_exc0_0_1, hleop_exc0_0_2, + hleop_exc0_1_1, hleop_exc0_1_2, hleop_exc0_2_2, + hleop_exc1_0_1, hleop_exc1_0_2, + hleop_exc1_1_1, hleop_exc1_1_2, + hleop_exc1_2_1, hleop_exc1_2_2, + hleop_exc1_3_1, hleop_exc1_3_2, + hleop_exc3_0_2, +}; + +extern void (* const psxHLEt[22])(); #ifdef __cplusplus } diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index f473ddf6b..5756bee55 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -112,7 +112,8 @@ static void intException(psxRegisters *regs, u32 pc, u32 cause) if (cause != 0x20) { //FILE *f = fopen("/tmp/psx_ram.bin", "wb"); //fwrite(psxM, 1, 0x200000, f); fclose(f); - log_unhandled("exception %08x @%08x\n", cause, pc); + log_unhandled("exception %08x @%08x ra=%08x\n", + cause, pc, regs->GPR.n.ra); } dloadFlush(regs); regs->pc = pc; @@ -1118,6 +1119,7 @@ OP(psxHLE) { return; } psxHLEt[hleCode](); + branchSeen = 1; } static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = { diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index dffbf6e74..b5fe3bd16 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -119,8 +119,6 @@ void psxException(u32 cause, enum R3000Abdt bdt, psxCP0Regs *cp0) { // Set the SR cp0->n.SR = (cp0->n.SR & ~0x3f) | ((cp0->n.SR & 0x0f) << 2); - - if (Config.HLE) psxBiosException(); } void psxBranchTest() { diff --git a/libpcsxcore/system.h b/libpcsxcore/system.h index c380aa473..fe4ab404a 100644 --- a/libpcsxcore/system.h +++ b/libpcsxcore/system.h @@ -32,7 +32,6 @@ void *SysLoadLibrary(const char *lib); // Loads Library void *SysLoadSym(void *lib, const char *sym); // Loads Symbol from Library const char *SysLibError(); // Gets previous error loading sysbols void SysCloseLibrary(void *lib); // Closes Library -void SysUpdate(); // Called on VBlank (to update i.e. pads) void SysRunGui(); // Returns to the Gui void SysClose(); // Close mem and plugins From 73d2a9037e5ea290cac10a9f26860a6405b55a0c Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 10 Aug 2023 02:29:53 +0300 Subject: [PATCH 304/597] spu: some cdda/xa reverb support not for threaded spu as it'd race with writes and I don't want to use a mutex or do extra copying there libretro/pcsx_rearmed#733 --- plugins/dfsound/registers.h | 6 ++++-- plugins/dfsound/spu.c | 7 ++++--- plugins/dfsound/xa.c | 31 ++++++++++++++++++++++++------- plugins/dfsound/xa.h | 25 ------------------------- 4 files changed, 32 insertions(+), 37 deletions(-) delete mode 100644 plugins/dfsound/xa.h diff --git a/plugins/dfsound/registers.h b/plugins/dfsound/registers.h index 28641b81e..a296431ff 100644 --- a/plugins/dfsound/registers.h +++ b/plugins/dfsound/registers.h @@ -145,8 +145,10 @@ #define H_SPU_ADSRLevel22 0x0d68 #define H_SPU_ADSRLevel23 0x0d78 -#define CTRL_IRQ 0x40 -#define CTRL_REVERB 0x80 +#define CTRL_CD 0x0001 +#define CTRL_CDREVERB 0x0004 +#define CTRL_IRQ 0x0040 +#define CTRL_REVERB 0x0080 #define CTRL_NOISE 0x3f00 #define CTRL_MUTE 0x4000 #define CTRL_ON 0x8000 diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index f6730d64a..038f946ee 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -814,6 +814,8 @@ static void do_channels(int ns_to) mix_chan(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume); } + MixXA(spu.SSumLR, RVB, ns_to, spu.decode_pos); + if (spu.rvb->StartAddr) { if (do_rvb) REVERBDo(spu.SSumLR, RVB, ns_to, spu.rvb->CurrAddr); @@ -1066,6 +1068,7 @@ static void sync_worker_thread(int force) work = &worker->i[worker->i_reaped & WORK_I_MASK]; thread_work_wait_sync(work, force); + MixXA(work->SSumLR, RVB, work->ns_to, work->decode_pos); do_samples_finish(work->SSumLR, work->ns_to, work->channels_silent, work->decode_pos); @@ -1192,12 +1195,10 @@ static void do_samples_finish(int *SSumLR, int ns_to, spu.decode_dirty_ch &= ~(1<<3); } - MixXA(SSumLR, ns_to, decode_pos); - vol_l = vol_l * spu_config.iVolume >> 10; vol_r = vol_r * spu_config.iVolume >> 10; - if (!(spu.spuCtrl & 0x4000) || !(vol_l | vol_r)) + if (!(spu.spuCtrl & CTRL_MUTE) || !(vol_l | vol_r)) { // muted? (rare) memset(spu.pS, 0, ns_to * 2 * sizeof(spu.pS[0])); diff --git a/plugins/dfsound/xa.c b/plugins/dfsound/xa.c index 397ed592f..23924d3b1 100644 --- a/plugins/dfsound/xa.c +++ b/plugins/dfsound/xa.c @@ -39,7 +39,7 @@ static int gauss_window[8] = {0, 0, 0, 0, 0, 0, 0, 0}; // MIX XA & CDDA //////////////////////////////////////////////////////////////////////// -INLINE void MixXA(int *SSumLR, int ns_to, int decode_pos) +INLINE void MixXA(int *SSumLR, int *RVB, int ns_to, int decode_pos) { int cursor = decode_pos; int ns; @@ -51,15 +51,23 @@ INLINE void MixXA(int *SSumLR, int ns_to, int decode_pos) if(spu.XAPlay == spu.XAFeed) spu.XARepeat--; - for(ns = 0; ns < ns_to*2; ) + for(ns = 0; ns < ns_to*2; ns += 2) { if(spu.XAPlay != spu.XAFeed) v=*spu.XAPlay++; if(spu.XAPlay == spu.XAEnd) spu.XAPlay=spu.XAStart; l = ((int)(short)v * spu.iLeftXAVol) >> 15; r = ((int)(short)(v >> 16) * spu.iLeftXAVol) >> 15; - SSumLR[ns++] += l; - SSumLR[ns++] += r; + if (spu.spuCtrl & CTRL_CD) + { + SSumLR[ns+0] += l; + SSumLR[ns+1] += r; + } + if (unlikely(spu.spuCtrl & CTRL_CDREVERB)) + { + RVB[ns+0] += l; + RVB[ns+1] += r; + } spu.spuMem[cursor] = HTOLE16(v); spu.spuMem[cursor + 0x400/2] = HTOLE16(v >> 16); @@ -71,15 +79,23 @@ INLINE void MixXA(int *SSumLR, int ns_to, int decode_pos) // hence this 'ns_to < 8' else if(spu.CDDAPlay != spu.CDDAFeed || ns_to < 8) { - for(ns = 0; ns < ns_to*2; ) + for(ns = 0; ns < ns_to*2; ns += 2) { if(spu.CDDAPlay != spu.CDDAFeed) v=*spu.CDDAPlay++; if(spu.CDDAPlay == spu.CDDAEnd) spu.CDDAPlay=spu.CDDAStart; l = ((int)(short)v * spu.iLeftXAVol) >> 15; r = ((int)(short)(v >> 16) * spu.iLeftXAVol) >> 15; - SSumLR[ns++] += l; - SSumLR[ns++] += r; + if (spu.spuCtrl & CTRL_CD) + { + SSumLR[ns+0] += l; + SSumLR[ns+1] += r; + } + if (unlikely(spu.spuCtrl & CTRL_CDREVERB)) + { + RVB[ns+0] += l; + RVB[ns+1] += r; + } spu.spuMem[cursor] = HTOLE16(v); spu.spuMem[cursor + 0x400/2] = HTOLE16(v >> 16); @@ -420,3 +436,4 @@ INLINE int FeedCDDA(unsigned char *pcm, int nBytes) } #endif +// vim:shiftwidth=1:expandtab diff --git a/plugins/dfsound/xa.h b/plugins/dfsound/xa.h deleted file mode 100644 index 137fe4362..000000000 --- a/plugins/dfsound/xa.h +++ /dev/null @@ -1,25 +0,0 @@ -/*************************************************************************** - xa.h - description - ------------------- - begin : Wed May 15 2002 - copyright : (C) 2002 by Pete Bernert - email : BlackDove@addcom.de - ***************************************************************************/ -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. See also the license.txt file for * - * additional informations. * - * * - ***************************************************************************/ - -#ifndef __P_XA_H__ -#define __P_XA_H__ - -INLINE void MixXA(void); -INLINE void FeedXA(xa_decode_t *xap); -INLINE int FeedCDDA(unsigned char *pcm, int nBytes); - -#endif /* __P_XA_H__ */ From 4ee246ed0e02c61f0b33251d86ea74a4a0380a47 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 10 Aug 2023 20:53:40 +0300 Subject: [PATCH 305/597] drc: fix reg alloc for div libretro/pcsx_rearmed#745 --- libpcsxcore/new_dynarec/new_dynarec.c | 30 +++++++++------------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 37bdc3e7c..d7c243655 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -2114,28 +2114,18 @@ static void multdiv_alloc(struct regstat *current,int i) clear_const(current,dops[i].rs2); alloc_cc(current,i); // for stalls dirty_reg(current,CCREG); - if(dops[i].rs1&&dops[i].rs2) + current->u &= ~(1ull << HIREG); + current->u &= ~(1ull << LOREG); + alloc_reg(current, i, HIREG); + alloc_reg(current, i, LOREG); + dirty_reg(current, HIREG); + dirty_reg(current, LOREG); + if ((dops[i].opcode2 & 0x3e) == 0x1a || (dops[i].rs1 && dops[i].rs2)) // div(u) { - current->u&=~(1LL<u&=~(1LL< Date: Fri, 11 Aug 2023 02:01:11 +0300 Subject: [PATCH 306/597] misc: accept a path without slash --- libpcsxcore/misc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 252e1c8c7..bba81b1e8 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -304,10 +304,16 @@ int LoadCdromFile(const char *filename, EXE_HEADER *head) { u8 time[4],*buf; u8 mdir[4096]; char exename[256]; + const char *p1, *p2; u32 size, addr; void *mem; - sscanf(filename, "cdrom:\\%255s", exename); + p1 = filename; + if ((p2 = strchr(p1, ':'))) + p1 = p2 + 1; + while (*p1 == '\\') + p1++; + snprintf(exename, sizeof(exename), "%s", p1); time[0] = itob(0); time[1] = itob(2); time[2] = itob(0x10); From 7650b7540c8cdfdad23e6a5ce5e591ab5f7c0bff Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 11 Aug 2023 02:08:29 +0300 Subject: [PATCH 307/597] psxbios: handle dynamic CB alloc SYSTEM.CNF TCB/EVENT settings are used now --- libpcsxcore/psxbios.c | 159 +++++++++++++++++++++++++++++++----------- libpcsxcore/r3000a.c | 6 +- 2 files changed, 121 insertions(+), 44 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 6624207e1..85f79fd2f 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -289,6 +289,9 @@ static u32 card_active_chan = 0; #define A_EXCEPTION 0x0c80 #define A_EXC_SP 0x6cf0 #define A_EEXIT_DEF 0x6cf4 +#define A_KMALLOC_PTR 0x7460 +#define A_KMALLOC_SIZE 0x7464 +#define A_KMALLOC_END 0x7468 #define A_EEXIT_PTR 0x75d0 #define A_EXC_STACK 0x85d8 // exception stack top #define A_RCNT_VBL_ACK 0x8600 @@ -1430,15 +1433,12 @@ void psxBios_Load() { // 0x42 EXE_HEADER eheader; void *pa1; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s: %s, %x\n", biosA0n[0x42], Ra0, a1); -#endif - pa1 = Ra1; if (pa1 && LoadCdromFile(Ra0, &eheader) == 0) { memcpy(pa1, ((char*)&eheader)+16, sizeof(EXEC)); v0 = 1; } else v0 = 0; + PSXBIOS_LOG("psxBios_%s: %s, %d -> %d\n", biosA0n[0x42], Ra0, a1, v0); pc0 = ra; } @@ -1729,6 +1729,15 @@ void psxBios__card_load() { // ac /* System calls B0 */ +static u32 psxBios_SysMalloc_(u32 size); + +static void psxBios_SysMalloc() { // B 00 + u32 ret = psxBios_SysMalloc_(a0); + + PSXBIOS_LOG("psxBios_%s 0x%x -> %x\n", biosB0n[0x00], a0, ret); + mips_return_c(ret, 33); +} + void psxBios_SetRCnt() { // 02 #ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x02]); @@ -2755,11 +2764,34 @@ void psxBios__card_wait() { // 5d /* System calls C0 */ +static void psxBios_SysEnqIntRP(); + +static void psxBios_InitRCnt() { // 00 + int i; + PSXBIOS_LOG("psxBios_%s %x\n", biosC0n[0x00], a0); + psxHwWrite16(0x1f801074, psxHu32(0x1074) & ~0x71); + for (i = 0; i < 3; i++) { + psxHwWrite16(0x1f801100 + i*0x10 + 4, 0); + psxHwWrite16(0x1f801100 + i*0x10 + 8, 0); + psxHwWrite16(0x1f801100 + i*0x10 + 0, 0); + } + a1 = 0x6d88; + psxBios_SysEnqIntRP(); + mips_return_c(0, 9); +} + +static void psxBios_InitException() { // 01 + PSXBIOS_LOG("psxBios_%s %x\n", biosC0n[0x01], a0); + a1 = 0x6da8; + psxBios_SysEnqIntRP(); + mips_return_c(0, 9); +} + /* * int SysEnqIntRP(int index , long *queue); */ -void psxBios_SysEnqIntRP() { // 02 +static void psxBios_SysEnqIntRP() { // 02 u32 old, base = loadRam32(A_TT_ExCB); PSXBIOS_LOG("psxBios_%s %x %x\n", biosC0n[0x02], a0, a1); @@ -2811,7 +2843,31 @@ static void psxBios_SysDeqIntRP() { // 03 psxBios_SysDeqIntRP_(); } -void psxBios_ChangeClearRCnt() { // 0a +static void psxBios_SysInitMemory_(u32 base, u32 size) { + storeRam32(base, 0); + storeRam32(A_KMALLOC_PTR, base); + storeRam32(A_KMALLOC_SIZE, size); + storeRam32(A_KMALLOC_END, base + (size & ~3) + 4); +} + +// this should be much more complicated, but maybe that'll be enough +static u32 psxBios_SysMalloc_(u32 size) { + u32 ptr = loadRam32(A_KMALLOC_PTR); + + size = (size + 3) & ~3; + storeRam32(A_KMALLOC_PTR, ptr + 4 + size); + storeRam32(ptr, size); + return ptr + 4; +} + +static void psxBios_SysInitMemory() { // 08 + PSXBIOS_LOG("psxBios_%s %x %x\n", biosC0n[0x08], a0, a1); + + psxBios_SysInitMemory_(a0, a1); + mips_return_void_c(12); +} + +static void psxBios_ChangeClearRCnt() { // 0a u32 ret; PSXBIOS_LOG("psxBios_%s: %x, %x\n", biosC0n[0x0a], a0, a1); @@ -2821,6 +2877,14 @@ void psxBios_ChangeClearRCnt() { // 0a mips_return_c(ret, 8); } +static void psxBios_InitDefInt() { // 0c + PSXBIOS_LOG("psxBios_%s %x\n", biosC0n[0x0c], a0); + // should also clear the autoack table + a1 = 0x6d98; + psxBios_SysEnqIntRP(); + mips_return_c(0, 20 + 6*2); +} + void psxBios_dummy() { u32 pc = (pc0 & 0x1fffff) - 4; char **ntab = pc == 0xa0 ? biosA0n : pc == 0xb0 ? biosB0n @@ -2907,6 +2971,44 @@ static void write_chain(u32 *d, u32 next, u32 handler1, u32 handler2) PSXMu32ref(handler2) = HLEOP(chain_hle_op(handler2)); } +static void setup_tt(u32 tcb_cnt, u32 evcb_cnt) +{ + u32 *ram32 = (u32 *)psxM; + u32 s_excb = 0x20, s_evcb = 0x1c * evcb_cnt; + u32 s_pcb = 4, s_tcb = 0xc0 * tcb_cnt; + u32 p_excb, p_evcb, p_pcb, p_tcb; + + memset(ram32 + 0xe000/4, 0, s_excb + s_evcb + s_pcb + s_tcb + 5*4); + psxBios_SysInitMemory_(0xa000e000, 0x2000); + p_excb = psxBios_SysMalloc_(s_excb); + p_evcb = psxBios_SysMalloc_(s_evcb); + p_pcb = psxBios_SysMalloc_(s_pcb); + p_tcb = psxBios_SysMalloc_(s_tcb); + + // "table of tables". Some games modify it + assert(A_TT_ExCB == 0x0100); + ram32[0x0100/4] = SWAPu32(p_excb); // ExCB - exception chains + ram32[0x0104/4] = SWAPu32(s_excb); // ExCB size + ram32[0x0108/4] = SWAPu32(p_pcb); // PCB - process control + ram32[0x010c/4] = SWAPu32(s_pcb); // PCB size + ram32[0x0110/4] = SWAPu32(p_tcb); // TCB - thread control + ram32[0x0114/4] = SWAPu32(s_tcb); // TCB size + ram32[0x0120/4] = SWAPu32(p_evcb); // EvCB - event control + ram32[0x0124/4] = SWAPu32(s_evcb); // EvCB size + ram32[0x0140/4] = SWAPu32(0x8648); // FCB - file control + ram32[0x0144/4] = SWAPu32(0x02c0); // FCB size + ram32[0x0150/4] = SWAPu32(0x6ee0); // DCB - device control + ram32[0x0154/4] = SWAPu32(0x0320); // DCB size + + storeRam32(p_excb + 0*4, 0x91e0); // chain0 + storeRam32(p_excb + 2*4, 0x6d88); // chain1 + storeRam32(p_excb + 4*4, 0x0000); // chain2 + storeRam32(p_excb + 6*4, 0x6d98); // chain3 + + storeRam32(p_pcb, p_tcb); + storeRam32(p_tcb, 0x4000); // first TCB +} + void psxBiosInit() { u32 base, size; u32 *ptr, *ram32; @@ -3112,7 +3214,7 @@ void psxBiosInit() { //biosA0[0xb3] = psxBios_sys_a0_b3; //biosA0[0xb4] = psxBios_sub_function; //*******************B0 CALLS**************************** - //biosB0[0x00] = psxBios_SysMalloc; + biosB0[0x00] = psxBios_SysMalloc; //biosB0[0x01] = psxBios_sys_b0_01; biosB0[0x02] = psxBios_SetRCnt; biosB0[0x03] = psxBios_GetRCnt; @@ -3205,19 +3307,19 @@ void psxBiosInit() { biosB0[0x5c] = psxBios__card_status; biosB0[0x5d] = psxBios__card_wait; //*******************C0 CALLS**************************** - //biosC0[0x00] = psxBios_InitRCnt; - //biosC0[0x01] = psxBios_InitException; + biosC0[0x00] = psxBios_InitRCnt; + biosC0[0x01] = psxBios_InitException; biosC0[0x02] = psxBios_SysEnqIntRP; biosC0[0x03] = psxBios_SysDeqIntRP; //biosC0[0x04] = psxBios_get_free_EvCB_slot; //biosC0[0x05] = psxBios_get_free_TCB_slot; //biosC0[0x06] = psxBios_ExceptionHandler; //biosC0[0x07] = psxBios_InstallExeptionHandler; - //biosC0[0x08] = psxBios_SysInitMemory; + biosC0[0x08] = psxBios_SysInitMemory; //biosC0[0x09] = psxBios_SysInitKMem; biosC0[0x0a] = psxBios_ChangeClearRCnt; //biosC0[0x0b] = psxBios_SystemError; - //biosC0[0x0c] = psxBios_InitDefInt; + biosC0[0x0c] = psxBios_InitDefInt; //biosC0[0x0d] = psxBios_sys_c0_0d; //biosC0[0x0e] = psxBios_sys_c0_0e; //biosC0[0x0f] = psxBios_sys_c0_0f; @@ -3297,30 +3399,7 @@ void psxBiosInit() { ram32[0x00b0/4] = HLEOP(hleop_b0); ram32[0x00c0/4] = HLEOP(hleop_c0); - // "table of tables". Some games modify it - assert(A_TT_ExCB == 0x0100); - ram32[0x0100/4] = SWAPu32(0x0000e004); // ExCB - exception chains - ram32[0x0104/4] = SWAPu32(0x00000020); // ExCB size - ram32[0x0108/4] = SWAPu32(0x0000e1ec); // PCB - process control - ram32[0x010c/4] = SWAPu32(0x00000004); // PCB size - ram32[0x0110/4] = SWAPu32(0x0000e1f4); // TCB - thread control - ram32[0x0114/4] = SWAPu32(0x00000300); // TCB size - ram32[0x0120/4] = SWAPu32(0x0000e028); // EvCB - event control - ram32[0x0124/4] = SWAPu32(0x000001c0); // EvCB size - ram32[0x0140/4] = SWAPu32(0x00008648); // FCB - file control - ram32[0x0144/4] = SWAPu32(0x000002c0); // FCB size - ram32[0x0150/4] = SWAPu32(0x00006ee0); // DCB - device control - ram32[0x0154/4] = SWAPu32(0x00000320); // DCB size - - ram32[0xe000/4] = SWAPu32(0x00000020); // SysMalloc block size - ram32[0xe004/4] = SWAPu32(0x000091e0); // chain0 - ram32[0xe00c/4] = SWAPu32(0x00006d88); // chain1 - ram32[0xe014/4] = SWAPu32(0x00000000); // chain2 - ram32[0xe01c/4] = SWAPu32(0x00006d98); // chain3 - - ram32[0xe1ec/4] = SWAPu32(0x0000e1f4); // TCB - ram32[0xe1f0/4] = SWAPu32(0x00000300); // SysMalloc block size - ram32[0xe1f4/4] = SWAPu32(0x00004000); // first TCB + setup_tt(4, 16); ram32[0x6ee0/4] = SWAPu32(0x0000eff0); // DCB strcpy((char *)&ram32[0xeff0/4], "bu"); @@ -3373,18 +3452,14 @@ void psxBiosInit() { ram32[A_RCNT_VBL_ACK/4 + 1] = SWAP32(1); ram32[A_RCNT_VBL_ACK/4 + 2] = SWAP32(1); ram32[A_RCNT_VBL_ACK/4 + 3] = SWAP32(1); - - psxRegs.CP0.n.SR &= ~0x400000; // use ram vector } void psxBiosShutdown() { } -void psxBiosCnfLoaded(u32 tcbs, u32 events) { - if (tcbs > 4) - log_unhandled("FIXME: TCB = %x\n", tcbs); - if (events > 16) - log_unhandled("FIXME: EVENT = %x\n", tcbs); +void psxBiosCnfLoaded(u32 tcb_cnt, u32 evcb_cnt) { + if (tcb_cnt != 4 || evcb_cnt != 16) + setup_tt(tcb_cnt, evcb_cnt); } #define psxBios_PADpoll(pad) { \ diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index b5fe3bd16..8351c949e 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -61,8 +61,10 @@ void psxReset() { psxRegs.CP0.n.SR = 0x10600000; // COP0 enabled | BEV = 1 | TS = 1 psxRegs.CP0.n.PRid = 0x00000002; // PRevID = Revision ID, same as R3000A - if (Config.HLE) - psxRegs.CP0.n.SR |= 1u << 30; // COP2 enabled + if (Config.HLE) { + psxRegs.CP0.n.SR |= 1u << 30; // COP2 enabled + psxRegs.CP0.n.SR &= ~(1u << 22); // RAM exception vector + } psxCpu->ApplyConfig(); psxCpu->Reset(); From d512faf7162c167d4ad34c5d13601697c6205ae1 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 11 Aug 2023 02:10:55 +0300 Subject: [PATCH 308/597] try to fix reset in HLE mode --- frontend/libretro.c | 2 ++ frontend/menu.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/frontend/libretro.c b/frontend/libretro.c index 807e683f4..4d29e1652 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2726,6 +2726,8 @@ void retro_run(void) { rebootemu = 0; SysReset(); + if (Config.HLE) + LoadCdrom(); } print_internal_fps(); diff --git a/frontend/menu.c b/frontend/menu.c index b48050626..9586baed7 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -2016,6 +2016,10 @@ static int reset_game(void) ClosePlugins(); OpenPlugins(); SysReset(); + if (Config.HLE) { + if (LoadCdrom() == -1) + return -1; + } return 0; } From 0b1da49108ea0344f762d78c03c2d6eff2b21069 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 12 Aug 2023 19:01:22 +0300 Subject: [PATCH 309/597] emulate pending irq bit somewhat --- libpcsxcore/new_dynarec/events.c | 7 +++++-- libpcsxcore/r3000a.c | 16 ++++++---------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/libpcsxcore/new_dynarec/events.c b/libpcsxcore/new_dynarec/events.c index 71aed6b2c..2a06c1f96 100644 --- a/libpcsxcore/new_dynarec/events.c +++ b/libpcsxcore/new_dynarec/events.c @@ -68,8 +68,11 @@ static void irq_test(psxCP0Regs *cp0) } } - if ((psxHu32(0x1070) & psxHu32(0x1074)) && (cp0->n.SR & 0x401) == 0x401) { - psxException(0x400, 0, cp0); + cp0->n.Cause &= ~0x400; + if (psxHu32(0x1070) & psxHu32(0x1074)) + cp0->n.Cause |= 0x400; + if (((cp0->n.Cause | 1) & cp0->n.SR & 0x401) == 0x401) { + psxException(0, 0, cp0); pending_exception = 1; } } diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 8351c949e..0be8a53c4 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -109,7 +109,7 @@ void psxException(u32 cause, enum R3000Abdt bdt, psxCP0Regs *cp0) { } // Set the Cause - cp0->n.Cause = (bdt << 30) | (cp0->n.Cause & 0x300) | cause; + cp0->n.Cause = (bdt << 30) | (cp0->n.Cause & 0x700) | cause; // Set the EPC & PC cp0->n.EPC = bdt ? psxRegs.pc - 4 : psxRegs.pc; @@ -196,15 +196,11 @@ void psxBranchTest() { } } - if (psxHu32(0x1070) & psxHu32(0x1074)) { - if ((psxRegs.CP0.n.SR & 0x401) == 0x401) { -#ifdef PSXCPU_LOG - PSXCPU_LOG("Interrupt: %x %x\n", psxHu32(0x1070), psxHu32(0x1074)); -#endif -// SysPrintf("Interrupt (%x): %x %x\n", psxRegs.cycle, psxHu32(0x1070), psxHu32(0x1074)); - psxException(0x400, 0, &psxRegs.CP0); - } - } + psxRegs.CP0.n.Cause &= ~0x400; + if (psxHu32(0x1070) & psxHu32(0x1074)) + psxRegs.CP0.n.Cause |= 0x400; + if (((psxRegs.CP0.n.Cause | 1) & psxRegs.CP0.n.SR & 0x401) == 0x401) + psxException(0, 0, &psxRegs.CP0); } void psxJumpTest() { From 906b1599387d4ecbf225282ba5123f83444c88fb Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 12 Aug 2023 19:05:35 +0300 Subject: [PATCH 310/597] log unhandled io more clearly --- libpcsxcore/new_dynarec/pcsxmem.c | 5 +- libpcsxcore/psxhw.c | 255 +++++++++++++++++++----------- plugins/dfsound/registers.c | 13 ++ 3 files changed, 176 insertions(+), 97 deletions(-) diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index 190f8fc7b..1f37dc29c 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -385,6 +385,7 @@ void new_dyna_pcsx_mem_init(void) } map_item(&mem_iortab[IOMEM32(0x1040)], io_read_sio32, 1); + map_item(&mem_iortab[IOMEM16(0x1044)], sioReadStat16, 1); map_item(&mem_iortab[IOMEM32(0x1100)], io_rcnt_read_count0, 1); map_item(&mem_iortab[IOMEM32(0x1104)], io_rcnt_read_mode0, 1); map_item(&mem_iortab[IOMEM32(0x1108)], io_rcnt_read_target0, 1); @@ -468,7 +469,7 @@ void new_dyna_pcsx_mem_init(void) map_item(&mem_iowtab[IOMEM8(0x1802)], cdrWrite2, 1); map_item(&mem_iowtab[IOMEM8(0x1803)], cdrWrite3, 1); - for (i = 0x1c00; i < 0x1e00; i += 2) { + for (i = 0x1c00; i < 0x2000; i += 2) { map_item(&mem_iowtab[IOMEM16(i)], io_spu_write16, 1); map_item(&mem_iowtab[IOMEM32(i)], io_spu_write32, 1); } @@ -494,7 +495,7 @@ void new_dyna_pcsx_mem_reset(void) // plugins might change so update the pointers map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); - for (i = 0x1c00; i < 0x1e00; i += 2) + for (i = 0x1c00; i < 0x2000; i += 2) map_item(&mem_iortab[IOMEM16(i)], SPU_readRegister, 1); map_item(&mem_iowtab[IOMEM32(0x1810)], GPU_writeData, 1); diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index 27ddfeab5..10a2695f4 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -28,6 +28,9 @@ //#undef PSXHW_LOG //#define PSXHW_LOG printf +#ifndef PAD_LOG +#define PAD_LOG(...) +#endif void psxHwReset() { memset(psxH, 0, 0x10000); @@ -42,15 +45,46 @@ u8 psxHwRead8(u32 add) { unsigned char hard; switch (add & 0x1fffffff) { - case 0x1f801040: hard = sioRead8();break; -#ifdef ENABLE_SIO1API - case 0x1f801050: hard = SIO1_readData8(); break; -#endif + case 0x1f801040: hard = sioRead8(); break; case 0x1f801800: hard = cdrRead0(); break; case 0x1f801801: hard = cdrRead1(); break; case 0x1f801802: hard = cdrRead2(); break; case 0x1f801803: hard = cdrRead3(); break; + + case 0x1f801041: case 0x1f801042: case 0x1f801043: + case 0x1f801044: case 0x1f801045: + case 0x1f801046: case 0x1f801047: + case 0x1f801048: case 0x1f801049: + case 0x1f80104a: case 0x1f80104b: + case 0x1f80104c: case 0x1f80104d: + case 0x1f80104e: case 0x1f80104f: + case 0x1f801050: case 0x1f801051: + case 0x1f801054: case 0x1f801055: + case 0x1f801058: case 0x1f801059: + case 0x1f80105a: case 0x1f80105b: + case 0x1f80105c: case 0x1f80105d: + case 0x1f801100: case 0x1f801101: + case 0x1f801104: case 0x1f801105: + case 0x1f801108: case 0x1f801109: + case 0x1f801110: case 0x1f801111: + case 0x1f801114: case 0x1f801115: + case 0x1f801118: case 0x1f801119: + case 0x1f801120: case 0x1f801121: + case 0x1f801124: case 0x1f801125: + case 0x1f801128: case 0x1f801129: + case 0x1f801810: case 0x1f801811: + case 0x1f801812: case 0x1f801813: + case 0x1f801814: case 0x1f801815: + case 0x1f801816: case 0x1f801817: + case 0x1f801820: case 0x1f801821: + case 0x1f801822: case 0x1f801823: + case 0x1f801824: case 0x1f801825: + case 0x1f801826: case 0x1f801827: + log_unhandled("unhandled r8 %08x @%08x\n", add, psxRegs.pc); + // falthrough default: + if (0x1f801c00 <= add && add < 0x1f802000) + log_unhandled("spu r8 %02x @%08x\n", add, psxRegs.pc); hard = psxHu8(add); #ifdef PSXHW_LOG PSXHW_LOG("*Unkwnown 8bit read at address %x\n", add); @@ -71,64 +105,38 @@ u16 psxHwRead16(u32 add) { #ifdef PSXHW_LOG case 0x1f801070: PSXHW_LOG("IREG 16bit read %x\n", psxHu16(0x1070)); return psxHu16(0x1070); -#endif -#ifdef PSXHW_LOG case 0x1f801074: PSXHW_LOG("IMASK 16bit read %x\n", psxHu16(0x1074)); return psxHu16(0x1074); #endif - case 0x1f801040: hard = sioRead8(); hard|= sioRead8() << 8; -#ifdef PAD_LOG PAD_LOG("sio read16 %x; ret = %x\n", add&0xf, hard); -#endif return hard; case 0x1f801044: hard = sioReadStat16(); -#ifdef PAD_LOG PAD_LOG("sio read16 %x; ret = %x\n", add&0xf, hard); -#endif return hard; case 0x1f801048: hard = sioReadMode16(); -#ifdef PAD_LOG PAD_LOG("sio read16 %x; ret = %x\n", add&0xf, hard); -#endif return hard; case 0x1f80104a: hard = sioReadCtrl16(); -#ifdef PAD_LOG PAD_LOG("sio read16 %x; ret = %x\n", add&0xf, hard); -#endif return hard; case 0x1f80104e: hard = sioReadBaud16(); -#ifdef PAD_LOG PAD_LOG("sio read16 %x; ret = %x\n", add&0xf, hard); -#endif - return hard; -#ifdef ENABLE_SIO1API - case 0x1f801050: - hard = SIO1_readData16(); - return hard; - case 0x1f801054: - hard = SIO1_readStat16(); - return hard; - case 0x1f80105a: - hard = SIO1_readCtrl16(); - return hard; - case 0x1f80105e: - hard = SIO1_readBaud16(); return hard; -#else + /* Fixes Armored Core misdetecting the Link cable being detected. * We want to turn that thing off and force it to do local multiplayer instead. * Thanks Sony for the fix, they fixed it in their PS Classic fork. */ case 0x1f801054: return 0x80; -#endif + case 0x1f801100: hard = psxRcntRcount(0); #ifdef PSXHW_LOG @@ -187,20 +195,33 @@ u16 psxHwRead16(u32 add) { //case 0x1f802030: hard = //int_2000???? //case 0x1f802040: hard =//dip switches...?? + case 0x1f801042: + case 0x1f801046: + case 0x1f80104c: + case 0x1f801050: + case 0x1f801058: + case 0x1f80105a: + case 0x1f80105c: case 0x1f801800: case 0x1f801802: - log_unhandled("cdrom r16 %x\n", add); + case 0x1f801810: + case 0x1f801812: + case 0x1f801814: + case 0x1f801816: + case 0x1f801820: + case 0x1f801822: + case 0x1f801824: + case 0x1f801826: + log_unhandled("unhandled r16 %08x @%08x\n", add, psxRegs.pc); // falthrough default: - if (add >= 0x1f801c00 && add < 0x1f801e00) { - hard = SPU_readRegister(add); - } else { - hard = psxHu16(add); + if (0x1f801c00 <= add && add < 0x1f802000) + return SPU_readRegister(add); + hard = psxHu16(add); #ifdef PSXHW_LOG - PSXHW_LOG("*Unkwnown 16bit read at address %x\n", add); + PSXHW_LOG("*Unkwnown 16bit read at address %x\n", add); #endif - } - return hard; + return hard; } #ifdef PSXHW_LOG @@ -218,25 +239,18 @@ u32 psxHwRead32(u32 add) { hard |= sioRead8() << 8; hard |= sioRead8() << 16; hard |= sioRead8() << 24; -#ifdef PAD_LOG PAD_LOG("sio read32 ;ret = %x\n", hard); -#endif - return hard; -#ifdef ENABLE_SIO1API - case 0x1f801050: - hard = SIO1_readData32(); - return hard; -#endif + return hard; + case 0x1f801044: + hard = sioReadStat16(); + PAD_LOG("sio read32 %x; ret = %x\n", add&0xf, hard); + return hard; #ifdef PSXHW_LOG case 0x1f801060: PSXHW_LOG("RAM size read %x\n", psxHu32(0x1060)); return psxHu32(0x1060); -#endif -#ifdef PSXHW_LOG case 0x1f801070: PSXHW_LOG("IREG 32bit read %x\n", psxHu32(0x1070)); return psxHu32(0x1070); -#endif -#ifdef PSXHW_LOG case 0x1f801074: PSXHW_LOG("IMASK 32bit read %x\n", psxHu32(0x1074)); return psxHu32(0x1074); #endif @@ -349,11 +363,22 @@ u32 psxHwRead32(u32 add) { #endif return hard; + case 0x1f801048: + case 0x1f80104c: + case 0x1f801050: + case 0x1f801054: + case 0x1f801058: + case 0x1f80105c: case 0x1f801800: - log_unhandled("cdrom r32 %x\n", add); + log_unhandled("unhandled r32 %08x @%08x\n", add, psxRegs.pc); // falthrough default: - hard = psxHu32(add); + if (0x1f801c00 <= add && add < 0x1f802000) { + hard = SPU_readRegister(add); + hard |= SPU_readRegister(add + 2) << 16; + return hard; + } + hard = psxHu32(add); #ifdef PSXHW_LOG PSXHW_LOG("*Unkwnown 32bit read at address %x\n", add); #endif @@ -368,15 +393,50 @@ u32 psxHwRead32(u32 add) { void psxHwWrite8(u32 add, u8 value) { switch (add & 0x1fffffff) { case 0x1f801040: sioWrite8(value); break; -#ifdef ENABLE_SIO1API - case 0x1f801050: SIO1_writeData8(value); break; -#endif case 0x1f801800: cdrWrite0(value); break; case 0x1f801801: cdrWrite1(value); break; case 0x1f801802: cdrWrite2(value); break; case 0x1f801803: cdrWrite3(value); break; + case 0x1f801041: case 0x1f801042: case 0x1f801043: + case 0x1f801044: case 0x1f801045: + case 0x1f801046: case 0x1f801047: + case 0x1f801048: case 0x1f801049: + case 0x1f80104a: case 0x1f80104b: + case 0x1f80104c: case 0x1f80104d: + case 0x1f80104e: case 0x1f80104f: + case 0x1f801050: case 0x1f801051: + case 0x1f801054: case 0x1f801055: + case 0x1f801058: case 0x1f801059: + case 0x1f80105a: case 0x1f80105b: + case 0x1f80105c: case 0x1f80105d: + case 0x1f801100: case 0x1f801101: + case 0x1f801104: case 0x1f801105: + case 0x1f801108: case 0x1f801109: + case 0x1f801110: case 0x1f801111: + case 0x1f801114: case 0x1f801115: + case 0x1f801118: case 0x1f801119: + case 0x1f801120: case 0x1f801121: + case 0x1f801124: case 0x1f801125: + case 0x1f801128: case 0x1f801129: + case 0x1f801810: case 0x1f801811: + case 0x1f801812: case 0x1f801813: + case 0x1f801814: case 0x1f801815: + case 0x1f801816: case 0x1f801817: + case 0x1f801820: case 0x1f801821: + case 0x1f801822: case 0x1f801823: + case 0x1f801824: case 0x1f801825: + case 0x1f801826: case 0x1f801827: + log_unhandled("unhandled w8 %08x @%08x\n", add, psxRegs.pc); + // falthrough default: + if (0x1f801c00 <= add && add < 0x1f802000) { + log_unhandled("spu w8 %02x @%08x\n", value, psxRegs.pc); + if (!(add & 1)) + SPU_writeRegister(add, value, psxRegs.cycle); + return; + } + psxHu8(add) = value; #ifdef PSXHW_LOG PSXHW_LOG("*Unknown 8bit write at address %x value %x\n", add, value); @@ -394,48 +454,24 @@ void psxHwWrite16(u32 add, u16 value) { case 0x1f801040: sioWrite8((unsigned char)value); sioWrite8((unsigned char)(value>>8)); -#ifdef PAD_LOG PAD_LOG ("sio write16 %x, %x\n", add&0xf, value); -#endif return; case 0x1f801044: sioWriteStat16(value); -#ifdef PAD_LOG PAD_LOG ("sio write16 %x, %x\n", add&0xf, value); -#endif return; case 0x1f801048: - sioWriteMode16(value); -#ifdef PAD_LOG + sioWriteMode16(value); PAD_LOG ("sio write16 %x, %x\n", add&0xf, value); -#endif return; case 0x1f80104a: // control register sioWriteCtrl16(value); -#ifdef PAD_LOG PAD_LOG ("sio write16 %x, %x\n", add&0xf, value); -#endif return; case 0x1f80104e: // baudrate register - sioWriteBaud16(value); -#ifdef PAD_LOG + sioWriteBaud16(value); PAD_LOG ("sio write16 %x, %x\n", add&0xf, value); -#endif - return; -#ifdef ENABLE_SIO1API - case 0x1f801050: - SIO1_writeData16(value); - return; - case 0x1f801054: - SIO1_writeStat16(value); return; - case 0x1f80105a: - SIO1_writeCtrl16(value); - return; - case 0x1f80105e: - SIO1_writeBaud16(value); - return; -#endif case 0x1f801070: #ifdef PSXHW_LOG PSXHW_LOG("IREG 16bit write %x\n", value); @@ -448,8 +484,11 @@ void psxHwWrite16(u32 add, u16 value) { PSXHW_LOG("IMASK 16bit write %x\n", value); #endif psxHu16ref(0x1074) = SWAPu16(value); - if (psxHu16ref(0x1070) & SWAPu16(value)) + if (psxHu16ref(0x1070) & SWAPu16(value)) { + //if ((psxRegs.CP0.n.SR & 0x401) == 0x401) + // log_unhandled("irq on unmask @%08x\n", psxRegs.pc); new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); + } return; case 0x1f801100: @@ -500,8 +539,28 @@ void psxHwWrite16(u32 add, u16 value) { #endif psxRcntWtarget(2, value); return; + case 0x1f801042: + case 0x1f801046: + case 0x1f80104c: + case 0x1f801050: + case 0x1f801054: + case 0x1f801058: + case 0x1f80105a: + case 0x1f80105c: + case 0x1f801800: + case 0x1f801802: + case 0x1f801810: + case 0x1f801812: + case 0x1f801814: + case 0x1f801816: + case 0x1f801820: + case 0x1f801822: + case 0x1f801824: + case 0x1f801826: + log_unhandled("unhandled w16 %08x @%08x\n", add, psxRegs.pc); + // falthrough default: - if (add>=0x1f801c00 && add<0x1f801e00) { + if (0x1f801c00 <= add && add < 0x1f802000) { SPU_writeRegister(add, value, psxRegs.cycle); return; } @@ -535,15 +594,8 @@ void psxHwWrite32(u32 add, u32 value) { sioWrite8((unsigned char)((value&0xff) >> 8)); sioWrite8((unsigned char)((value&0xff) >> 16)); sioWrite8((unsigned char)((value&0xff) >> 24)); -#ifdef PAD_LOG PAD_LOG("sio write32 %x\n", value); -#endif - return; -#ifdef ENABLE_SIO1API - case 0x1f801050: - SIO1_writeData32(value); - return; -#endif + return; #ifdef PSXHW_LOG case 0x1f801060: PSXHW_LOG("RAM size write %x\n", value); @@ -562,8 +614,11 @@ void psxHwWrite32(u32 add, u32 value) { PSXHW_LOG("IMASK 32bit write %x\n", value); #endif psxHu32ref(0x1074) = SWAPu32(value); - if (psxHu32ref(0x1070) & SWAPu32(value)) + if (psxHu32ref(0x1070) & SWAPu32(value)) { + if ((psxRegs.CP0.n.SR & 0x401) == 0x401) + log_unhandled("irq on unmask @%08x\n", psxRegs.pc); new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); + } return; #ifdef PSXHW_LOG @@ -753,9 +808,19 @@ void psxHwWrite32(u32 add, u32 value) { #endif psxRcntWtarget(2, value & 0xffff); return; + case 0x1f801044: + case 0x1f801048: + case 0x1f80104c: + case 0x1f801050: + case 0x1f801054: + case 0x1f801058: + case 0x1f80105c: + case 0x1f801800: + log_unhandled("unhandled w32 %08x @%08x\n", add, psxRegs.pc); + // falthrough default: // Dukes of Hazard 2 - car engine noise - if (add>=0x1f801c00 && add<0x1f801e00) { + if (0x1f801c00 <= add && add < 0x1f802000) { SPU_writeRegister(add, value&0xffff, psxRegs.cycle); SPU_writeRegister(add + 2, value>>16, psxRegs.cycle); return; diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index adc9c3a54..580589975 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -353,6 +353,19 @@ unsigned short CALLBACK SPUreadRegister(unsigned long reg) case H_SPUMute2: log_unhandled("r isOn: %08lx\n", reg); break; + + case 0x0dac: + case H_SPUirqAddr: + case H_CDLeft: + case H_CDRight: + case H_ExtLeft: + case H_ExtRight: + break; + + default: + if (r >= 0xda0) + log_unhandled("spu r %08lx\n", reg); + break; } return spu.regArea[(r-0xc00)>>1]; From 4d2f73bbb5d0ecbd8f21ce9d4c1fea1b27902648 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 13 Aug 2023 01:59:20 +0300 Subject: [PATCH 311/597] psxbios: rewrite event handling no more mapping to some internal array --- libpcsxcore/psxbios.c | 445 ++++++++++++++++++++---------------------- 1 file changed, 215 insertions(+), 230 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 85f79fd2f..4ecbcb6c1 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -193,28 +193,21 @@ char *biosC0n[256] = { #define Rsp ((char *)PSXM(sp)) typedef struct { - u32 desc; - s32 status; - s32 mode; + u32 class; + u32 status; + u32 spec; + u32 mode; u32 fhandler; -} EvCB[32]; + u32 unused[2]; +} EvCB; -#define EvStUNUSED 0x0000 -#define EvStWAIT 0x1000 -#define EvStACTIVE 0x2000 -#define EvStALREADY 0x4000 +#define EvStUNUSED 0x0000 +#define EvStDISABLED 0x1000 +#define EvStACTIVE 0x2000 +#define EvStALREADY 0x4000 -#define EvMdINTR 0x1000 -#define EvMdNOINTR 0x2000 - -/* -typedef struct { - s32 next; - s32 func1; - s32 func2; - s32 pad; -} SysRPst; -*/ +#define EvMdCALL 0x1000 +#define EvMdMARK 0x2000 typedef struct { u32 status; @@ -261,20 +254,9 @@ static int *pad_buf = NULL; static char *pad_buf1 = NULL, *pad_buf2 = NULL; static int pad_buf1len, pad_buf2len; static int pad_stopped = 0; - -static u32 regs[35]; -static EvCB *EventCB; -static EvCB *HwEV; // 0xf0 -static EvCB *EvEV; // 0xf1 -static EvCB *RcEV; // 0xf2 -static EvCB *UeEV; // 0xf3 -static EvCB *SwEV; // 0xf4 -static EvCB *ThEV; // 0xff static u32 heap_size = 0; static u32 *heap_addr = NULL; static u32 *heap_end = NULL; -static int CardState = -1; -static int CurThread = 0; static FileDesc FDesc[32]; static u32 card_active_chan = 0; @@ -282,6 +264,7 @@ static u32 card_active_chan = 0; #define A_TT_ExCB 0x0100 #define A_TT_PCB 0x0108 #define A_TT_TCB 0x0110 +#define A_TT_EvCB 0x0120 #define A_A0_TABLE 0x0200 #define A_B0_TABLE 0x0874 #define A_C0_TABLE 0x0674 @@ -295,6 +278,7 @@ static u32 card_active_chan = 0; #define A_EEXIT_PTR 0x75d0 #define A_EXC_STACK 0x85d8 // exception stack top #define A_RCNT_VBL_ACK 0x8600 +#define A_CD_EVENTS 0xb9b8 #define A_EXC_GP 0xf450 #define HLEOP(n) SWAPu32((0x3b << 26) | (n)); @@ -377,14 +361,10 @@ static inline void softCallInException(u32 pc) { ra = sra; } -static inline void DeliverEvent(u32 ev, u32 spec) { - if (EventCB[ev][spec].status != EvStACTIVE) return; - -// EventCB[ev][spec].status = EvStALREADY; - if (EventCB[ev][spec].mode == EvMdINTR) { - softCall(EventCB[ev][spec].fhandler); - } else EventCB[ev][spec].status = EvStALREADY; -} +static u32 OpenEvent(u32 class, u32 spec, u32 mode, u32 func); +static u32 DeliverEvent(u32 class, u32 spec); +static u32 UnDeliverEvent(u32 class, u32 spec); +static void CloseEvent(u32 ev); /* * // * @@ -397,8 +377,8 @@ static inline void DeliverEvent(u32 ev, u32 spec) { ptr = Mcd##mcd##Data + 8192 * FDesc[1 + mcd].mcfile + FDesc[1 + mcd].offset; \ memcpy(Ra1, ptr, length); \ if (FDesc[1 + mcd].mode & 0x8000) { \ - DeliverEvent(0x11, 0x2); /* 0xf0000011, 0x0004 */ \ - DeliverEvent(0x81, 0x2); /* 0xf4000001, 0x0004 */ \ + DeliverEvent(0xf0000011, 0x0004); \ + DeliverEvent(0xf4000001, 0x0004); \ v0 = 0; } \ else v0 = length; \ FDesc[1 + mcd].offset += v0; \ @@ -412,8 +392,8 @@ static inline void DeliverEvent(u32 ev, u32 spec) { FDesc[1 + mcd].offset += length; \ SaveMcd(Config.Mcd##mcd, Mcd##mcd##Data, offset, length); \ if (FDesc[1 + mcd].mode & 0x8000) { \ - DeliverEvent(0x11, 0x2); /* 0xf0000011, 0x0004 */ \ - DeliverEvent(0x81, 0x2); /* 0xf4000001, 0x0004 */ \ + DeliverEvent(0xf0000011, 0x0004); \ + DeliverEvent(0xf4000001, 0x0004); \ v0 = 0; } \ else v0 = length; \ } @@ -1602,8 +1582,8 @@ void psxBios__bu_init() { // 70 PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x70]); #endif - DeliverEvent(0x11, 0x2); // 0xf0000011, 0x0004 - DeliverEvent(0x81, 0x2); // 0xf4000001, 0x0004 + DeliverEvent(0xf0000011, 0x0004); + DeliverEvent(0xf4000001, 0x0004); pc0 = ra; } @@ -1634,11 +1614,11 @@ static void psxBios_DequeueCdIntr() { // a3 static void psxBios_CdRemove() { // 56, 72 PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x72]); - // CloseEvent 0xf1000000 - // CloseEvent 0xf1000001 - // CloseEvent 0xf1000002 - // CloseEvent 0xf1000003 - // CloseEvent 0xf1000004 + CloseEvent(loadRam32(A_CD_EVENTS + 0x00)); + CloseEvent(loadRam32(A_CD_EVENTS + 0x04)); + CloseEvent(loadRam32(A_CD_EVENTS + 0x08)); + CloseEvent(loadRam32(A_CD_EVENTS + 0x0c)); + CloseEvent(loadRam32(A_CD_EVENTS + 0x10)); psxBios_DequeueCdIntr_(); // EnterCriticalSection - should be done at the beginning, @@ -1693,24 +1673,24 @@ void psxBios__card_info() { // ab switch (port) { case 0x0: case 0x1: - ret = 0x2; + ret = 0x0004; if (McdDisable[port & 1]) - ret = 0x8; + ret = 0x0100; break; default: #ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s: UNKNOWN PORT 0x%x\n", biosA0n[0xab], card_active_chan); #endif - ret = 0x11; + ret = 0x0302; break; } if (McdDisable[0] && McdDisable[1]) - ret = 0x8; + ret = 0x0100; - DeliverEvent(0x11, 0x2); // 0xf0000011, 0x0004 -// DeliverEvent(0x81, 0x2); // 0xf4000001, 0x0004 - DeliverEvent(0x81, ret); // 0xf4000001, 0x0004 + DeliverEvent(0xf0000011, 0x0004); +// DeliverEvent(0xf4000001, 0x0004); + DeliverEvent(0xf4000001, ret); v0 = 1; pc0 = ra; } @@ -1721,8 +1701,8 @@ void psxBios__card_load() { // ac card_active_chan = a0; -// DeliverEvent(0x11, 0x2); // 0xf0000011, 0x0004 - DeliverEvent(0x81, 0x2); // 0xf4000001, 0x0004 +// DeliverEvent(0xf0000011, 0x0004); + DeliverEvent(0xf4000001, 0x0004); v0 = 1; pc0 = ra; } @@ -1806,154 +1786,176 @@ void psxBios_ResetRCnt() { // 06 pc0 = ra; } +static u32 DeliverEvent(u32 class, u32 spec) { + EvCB *ev = (EvCB *)loadRam32ptr(A_TT_EvCB); + u32 evcb_len = loadRam32(A_TT_EvCB + 4); + u32 ret = loadRam32(A_TT_EvCB) + evcb_len; + u32 i, lim = evcb_len / 0x1c; -/* gets ev for use with EventCB */ -#define GetEv() \ - ev = (a0 >> 24) & 0xf; \ - if (ev == 0xf) ev = 0x5; \ - ev*= 32; \ - ev+= a0&0x1f; - -/* gets spec for use with EventCB */ -#define GetSpec() \ - spec = 0; \ - switch (a1) { \ - case 0x0301: spec = 16; break; \ - case 0x0302: spec = 17; break; \ - default: \ - for (i=0; i<16; i++) if (a1 & (1 << i)) { spec = i; break; } \ - break; \ + for (i = 0; i < lim; i++, ev++) { + use_cycles(8); + if (SWAP32(ev->status) != EvStACTIVE) + continue; + use_cycles(4); + if (SWAP32(ev->class) != class) + continue; + use_cycles(4); + if (SWAP32(ev->spec) != spec) + continue; + use_cycles(6); + ret = SWAP32(ev->mode); + if (ret == EvMdMARK) { + ev->status = SWAP32(EvStALREADY); + continue; + } + use_cycles(8); + if (ret == EvMdCALL) { + ret = SWAP32(ev->fhandler); + if (ret) { + v0 = ret; + softCall(ret); + ret = v0; + } + } } + use_cycles(29); + return ret; +} -void psxBios_DeliverEvent() { // 07 - int ev, spec; - int i; - - GetEv(); - GetSpec(); - -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s %x,%x\n", biosB0n[0x07], ev, spec); -#endif - - DeliverEvent(ev, spec); +static u32 UnDeliverEvent(u32 class, u32 spec) { + EvCB *ev = (EvCB *)loadRam32ptr(A_TT_EvCB); + u32 evcb_len = loadRam32(A_TT_EvCB + 4); + u32 ret = loadRam32(A_TT_EvCB) + evcb_len; + u32 i, lim = evcb_len / 0x1c; - pc0 = ra; + for (i = 0; i < lim; i++, ev++) { + use_cycles(8); + if (SWAP32(ev->status) != EvStALREADY) + continue; + use_cycles(4); + if (SWAP32(ev->class) != class) + continue; + use_cycles(4); + if (SWAP32(ev->spec) != spec) + continue; + use_cycles(6); + if (SWAP32(ev->mode) == EvMdMARK) + ev->status = SWAP32(EvStACTIVE); + } + use_cycles(28); + return ret; } -void psxBios_OpenEvent() { // 08 - int ev, spec; - int i; - - GetEv(); - GetSpec(); +static void psxBios_DeliverEvent() { // 07 + u32 ret; + PSXBIOS_LOG("psxBios_%s %x %04x\n", biosB0n[0x07], a0, a1); -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s %x,%x (class:%x, spec:%x, mode:%x, func:%x)\n", biosB0n[0x08], ev, spec, a0, a1, a2, a3); -#endif + ret = DeliverEvent(a0, a1); + mips_return(ret); +} - EventCB[ev][spec].status = EvStWAIT; - EventCB[ev][spec].mode = a2; - EventCB[ev][spec].fhandler = a3; +static s32 get_free_EvCB_slot() { + EvCB *ev = (EvCB *)loadRam32ptr(A_TT_EvCB); + u32 i, lim = loadRam32(A_TT_EvCB + 4) / 0x1c; - v0 = ev | (spec << 8); - pc0 = ra; + use_cycles(19); + for (i = 0; i < lim; i++, ev++) { + use_cycles(8); + if (ev->status == SWAP32(EvStUNUSED)) + return i; + } + return -1; } -void psxBios_CloseEvent() { // 09 - int ev, spec; - - ev = a0 & 0xff; - spec = (a0 >> 8) & 0xff; +static u32 OpenEvent(u32 class, u32 spec, u32 mode, u32 func) { + u32 ret = get_free_EvCB_slot(); + if ((s32)ret >= 0) { + EvCB *ev = (EvCB *)loadRam32ptr(A_TT_EvCB) + ret; + ev->class = SWAP32(class); + ev->status = SWAP32(EvStDISABLED); + ev->spec = SWAP32(spec); + ev->mode = SWAP32(mode); + ev->fhandler = SWAP32(func); + ret |= 0xf1000000u; + } + return ret; +} -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s %x,%x\n", biosB0n[0x09], ev, spec); -#endif +static void psxBios_OpenEvent() { // 08 + u32 ret = OpenEvent(a0, a1, a2, a3); + PSXBIOS_LOG("psxBios_%s (class:%x, spec:%04x, mode:%04x, func:%x) -> %x\n", + biosB0n[0x08], a0, a1, a2, a3, ret); + mips_return_c(ret, 36); +} - EventCB[ev][spec].status = EvStUNUSED; +static void CloseEvent(u32 ev) +{ + u32 base = loadRam32(A_TT_EvCB); + storeRam32(base + (ev & 0xffff) * sizeof(EvCB) + 4, EvStUNUSED); +} - v0 = 1; pc0 = ra; +static void psxBios_CloseEvent() { // 09 + PSXBIOS_LOG("psxBios_%s %x (%x)\n", biosB0n[0x09], a0, + loadRam32(loadRam32(A_TT_EvCB) + (a0 & 0xffff) * sizeof(EvCB) + 4)); + CloseEvent(a0); + mips_return_c(1, 10); } -void psxBios_WaitEvent() { // 0a - int ev, spec; +static void psxBios_WaitEvent() { // 0a + u32 base = loadRam32(A_TT_EvCB); + u32 status = loadRam32(base + (a0 & 0xffff) * sizeof(EvCB) + 4); + PSXBIOS_LOG("psxBios_%s %x (status=%x)\n", biosB0n[0x0a], a0, status); - ev = a0 & 0xff; - spec = (a0 >> 8) & 0xff; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s %x,%x\n", biosB0n[0x0a], ev, spec); -#endif - if (EventCB[ev][spec].status == EvStUNUSED) - { - v0 = 0; - pc0 = ra; + use_cycles(15); + if (status == EvStALREADY) { + storeRam32(base + (a0 & 0xffff) * sizeof(EvCB) + 4, EvStACTIVE); + mips_return(1); return; } - - if (EventCB[ev][spec].status == EvStALREADY) + if (status != EvStACTIVE) { - /* Callback events (mode=EvMdINTR) do never set the ready flag (and thus WaitEvent would hang forever). */ - if (!(EventCB[ev][spec].mode == EvMdINTR)) EventCB[ev][spec].status = EvStACTIVE; - v0 = 1; - pc0 = ra; + mips_return_c(0, 2); return; } - v0 = 0; - pc0 = ra; + // retrigger this hlecall after the next emulation event + pc0 -= 4; + if ((s32)(next_interupt - psxRegs.cycle) > 0) + psxRegs.cycle = next_interupt; + psxBranchTest(); } -void psxBios_TestEvent() { // 0b - int ev, spec; - - ev = a0 & 0xff; - spec = (a0 >> 8) & 0xff; - - if (EventCB[ev][spec].status == EvStALREADY) - { - if (!(EventCB[ev][spec].mode == EvMdINTR)) EventCB[ev][spec].status = EvStACTIVE; - v0 = 1; - } - else - { - v0 = 0; +static void psxBios_TestEvent() { // 0b + u32 base = loadRam32(A_TT_EvCB); + u32 status = loadRam32(base + (a0 & 0xffff) * sizeof(EvCB) + 4); + u32 ret = 0; + PSXBIOS_LOG("psxBios_%s %x %x\n", biosB0n[0x0b], a0, status); + if (status == EvStALREADY) { + storeRam32(base + (a0 & 0xffff) * sizeof(EvCB) + 4, EvStACTIVE); + ret = 1; } -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s %x,%x: %x\n", biosB0n[0x0b], ev, spec, v0); -#endif - - pc0 = ra; + mips_return_c(ret, 15); } -void psxBios_EnableEvent() { // 0c - int ev, spec; +static void psxBios_EnableEvent() { // 0c + u32 base = loadRam32(A_TT_EvCB); + u32 status = loadRam32(base + (a0 & 0xffff) * sizeof(EvCB) + 4); + PSXBIOS_LOG("psxBios_%s %x (%x)\n", biosB0n[0x0c], a0, status); + if (status != EvStUNUSED) + storeRam32(base + (a0 & 0xffff) * sizeof(EvCB) + 4, EvStACTIVE); - ev = a0 & 0xff; - spec = (a0 >> 8) & 0xff; - -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s %x,%x\n", biosB0n[0x0c], ev, spec); -#endif - - EventCB[ev][spec].status = EvStACTIVE; - - v0 = 1; pc0 = ra; + mips_return_c(1, 15); } -void psxBios_DisableEvent() { // 0d - int ev, spec; - - ev = a0 & 0xff; - spec = (a0 >> 8) & 0xff; - -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s %x,%x\n", biosB0n[0x0d], ev, spec); -#endif - - EventCB[ev][spec].status = EvStWAIT; +static void psxBios_DisableEvent() { // 0d + u32 base = loadRam32(A_TT_EvCB); + u32 status = loadRam32(base + (a0 & 0xffff) * sizeof(EvCB) + 4); + PSXBIOS_LOG("psxBios_%s %x: %x\n", biosB0n[0x0d], a0, status); + if (status != EvStUNUSED) + storeRam32(base + (a0 & 0xffff) * sizeof(EvCB) + 4, EvStDISABLED); - v0 = 1; pc0 = ra; + mips_return_c(1, 15); } /* @@ -2123,22 +2125,12 @@ void psxBios_HookEntryInt() { // 19 mips_return_void_c(3); } -void psxBios_UnDeliverEvent() { // 0x20 - int ev, spec; - int i; - - GetEv(); - GetSpec(); - -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s %x,%x\n", biosB0n[0x20], ev, spec); -#endif - - if (EventCB[ev][spec].status == EvStALREADY && - EventCB[ev][spec].mode == EvMdNOINTR) - EventCB[ev][spec].status = EvStACTIVE; +static void psxBios_UnDeliverEvent() { // 0x20 + u32 ret; + PSXBIOS_LOG("psxBios_%s %x %x\n", biosB0n[0x20], a0, a1); - pc0 = ra; + ret = UnDeliverEvent(a0, a1); + mips_return(ret); } char ffile[64], *pfile; @@ -2246,8 +2238,8 @@ void psxBios_lseek() { // 0x33 case 0: // SEEK_SET FDesc[a0].offset = a1; v0 = a1; -// DeliverEvent(0x11, 0x2); // 0xf0000011, 0x0004 -// DeliverEvent(0x81, 0x2); // 0xf4000001, 0x0004 +// DeliverEvent(0xf0000011, 0x0004); +// DeliverEvent(0xf4000001, 0x0004); break; case 1: // SEEK_CUR @@ -2426,11 +2418,11 @@ void psxBios_firstfile() { // 42 nfile = 0; if (!strncmp(pa0, "bu00", 4)) { // firstfile() calls _card_read() internally, so deliver it's event - DeliverEvent(0x11, 0x2); + DeliverEvent(0xf0000011, 0x0004); bufile(1); } else if (!strncmp(pa0, "bu10", 4)) { // firstfile() calls _card_read() internally, so deliver it's event - DeliverEvent(0x11, 0x2); + DeliverEvent(0xf0000011, 0x0004); bufile(2); } } @@ -2558,8 +2550,6 @@ void psxBios_InitCARD() { // 4a PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x4a], a0); #endif - CardState = 0; - pc0 = ra; } @@ -2568,8 +2558,6 @@ void psxBios_StartCARD() { // 4b PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x4b]); #endif - if (CardState == 0) CardState = 1; - pc0 = ra; } @@ -2578,8 +2566,6 @@ void psxBios_StopCARD() { // 4c PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x4c]); #endif - if (CardState == 1) CardState = 0; - pc0 = ra; } @@ -2613,8 +2599,8 @@ void psxBios__card_write() { // 0x4e } } - DeliverEvent(0x11, 0x2); // 0xf0000011, 0x0004 -// DeliverEvent(0x81, 0x2); // 0xf4000001, 0x0004 + DeliverEvent(0xf0000011, 0x0004); +// DeliverEvent(0xf4000001, 0x0004); v0 = 1; pc0 = ra; } @@ -2647,8 +2633,8 @@ void psxBios__card_read() { // 0x4f } } - DeliverEvent(0x11, 0x2); // 0xf0000011, 0x0004 -// DeliverEvent(0x81, 0x2); // 0xf4000001, 0x0004 + DeliverEvent(0xf0000011, 0x0004); +// DeliverEvent(0xf4000001, 0x0004); v0 = 1; pc0 = ra; } @@ -2843,6 +2829,12 @@ static void psxBios_SysDeqIntRP() { // 03 psxBios_SysDeqIntRP_(); } +static void psxBios_get_free_EvCB_slot() { // 04 + PSXBIOS_LOG("psxBios_%s\n", biosC0n[0x04]); + s32 ret = get_free_EvCB_slot(); + mips_return_c(ret, 0); +} + static void psxBios_SysInitMemory_(u32 base, u32 size) { storeRam32(base, 0); storeRam32(A_KMALLOC_PTR, base); @@ -3007,11 +2999,18 @@ static void setup_tt(u32 tcb_cnt, u32 evcb_cnt) storeRam32(p_pcb, p_tcb); storeRam32(p_tcb, 0x4000); // first TCB + + // default events + storeRam32(A_CD_EVENTS + 0x00, OpenEvent(0xf0000003, 0x0010, EvMdMARK, 0)); + storeRam32(A_CD_EVENTS + 0x04, OpenEvent(0xf0000003, 0x0020, EvMdMARK, 0)); + storeRam32(A_CD_EVENTS + 0x08, OpenEvent(0xf0000003, 0x0040, EvMdMARK, 0)); + storeRam32(A_CD_EVENTS + 0x0c, OpenEvent(0xf0000003, 0x0080, EvMdMARK, 0)); + storeRam32(A_CD_EVENTS + 0x10, OpenEvent(0xf0000003, 0x8000, EvMdMARK, 0)); + DeliverEvent(0xf0000003, 0x0010); } void psxBiosInit() { - u32 base, size; - u32 *ptr, *ram32; + u32 *ptr, *ram32, *rom32; int i; uLongf len; @@ -3311,7 +3310,7 @@ void psxBiosInit() { biosC0[0x01] = psxBios_InitException; biosC0[0x02] = psxBios_SysEnqIntRP; biosC0[0x03] = psxBios_SysDeqIntRP; - //biosC0[0x04] = psxBios_get_free_EvCB_slot; + biosC0[0x04] = psxBios_get_free_EvCB_slot; //biosC0[0x05] = psxBios_get_free_TCB_slot; //biosC0[0x06] = psxBios_ExceptionHandler; //biosC0[0x07] = psxBios_InstallExeptionHandler; @@ -3338,16 +3337,6 @@ void psxBiosInit() { //biosC0[0x1c] = psxBios_PatchAOTable; //************** THE END *************************************** /**/ - base = 0x1000; - size = sizeof(EvCB) * 32; - EventCB = (void *)&psxR[base]; base += size * 6; - memset(EventCB, 0, size * 6); - HwEV = EventCB; - EvEV = EventCB + 32; - RcEV = EventCB + 32 * 2; - UeEV = EventCB + 32 * 3; - SwEV = EventCB + 32 * 4; - ThEV = EventCB + 32 * 5; pad_stopped = 1; pad_buf = NULL; @@ -3357,14 +3346,18 @@ void psxBiosInit() { heap_addr = NULL; heap_end = NULL; heap_size = 0; - CardState = -1; - CurThread = 0; memset(FDesc, 0, sizeof(FDesc)); card_active_chan = 0; // initial RNG seed psxMu32ref(0x9010) = SWAPu32(0xac20cc00); + rom32 = (u32 *)psxR; + rom32[0x100/4] = SWAP32(0x19951204); + rom32[0x104/4] = SWAP32(3); + strcpy(psxR + 0x108, "PCSX authors"); + strcpy(psxR + 0x12c, "PCSX HLE"); + // fonts len = 0x80000 - 0x66000; uncompress((Bytef *)(psxR + 0x66000), &len, font_8140, sizeof(font_8140)); @@ -3550,7 +3543,7 @@ void hleExc0_0_2() // A(91h) - CdromDmaIrqFunc1 if (psxHu32(0x1074) & psxHu32(0x1070) & 8) { // IRQ3 DMA psxHwWrite32(0x1f8010f4, (psxHu32(0x10f4) & 0xffffff) | 0x88000000); //if (--cdrom_irq_counter == 0) // 0xa0009180 - // DeliverEvent(); // 0xf0000003, 0x10 + // DeliverEvent(0xf0000003, 0x10); use_cycles(22); ret = 1; } @@ -3581,7 +3574,7 @@ void hleExc0_2_2_syscall() // not in any A/B/C table if (code != R3000E_Syscall) { if (code != 0) { - // DeliverEvent(); // 0xf0000010, 0x1000 + DeliverEvent(0xf0000010, 0x1000); psxBios_SystemErrorUnresolvedException(); } mips_return_c(0, 17); @@ -3610,7 +3603,7 @@ void hleExc0_2_2_syscall() // not in any A/B/C table break; } default: - // DeliverEvent(); // 0xf0000010, 0x4000 + DeliverEvent(0xf0000010, 0x4000); break; } use_cycles(30); @@ -3627,10 +3620,7 @@ static void handle_chain_1_x_2(u32 ev_index, u32 irqbit) { u32 ret = 0; if (psxHu32(0x1074) & psxHu32(0x1070) & (1u << irqbit)) { - // DeliverEvent 0xf2000000 + ev_index, 2 - if (RcEV[ev_index][1].status == EvStACTIVE) { - softCall(RcEV[ev_index][1].fhandler); - } + DeliverEvent(0xf2000000 + ev_index, 0x0002); ret = 1; } mips_return_c(ret, 22); @@ -3694,7 +3684,7 @@ void hleExc3_0_2_defint(void) size_t i; for (i = 0; i < sizeof(tab) / sizeof(tab[0]); i++) { if (psxHu32(0x1074) & psxHu32(0x1070) & (1u << tab[i].irqbit)) { - // DeliverEvent 0xf0000000 + ev, 0x1000 + DeliverEvent(0xf0000000 + tab[i].ev, 0x1000); use_cycles(7); } @@ -3794,11 +3784,6 @@ void psxBiosFreeze(int Mode) { bfreezepsxMptr(pad_buf1, char); bfreezepsxMptr(pad_buf2, char); bfreezepsxMptr(heap_addr, u32); - bfreezel(&pad_buf1len); - bfreezel(&pad_buf2len); - bfreezes(regs); - bfreezel(&CardState); - bfreezel(&CurThread); bfreezes(FDesc); bfreezel(&card_active_chan); bfreezel(&pad_stopped); From 38266cd6b0f79b210b4eae9ebd20151eb522d9d3 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 13 Aug 2023 19:15:31 +0300 Subject: [PATCH 312/597] dma: clean up libretro/pcsx_rearmed#740 --- libpcsxcore/psxdma.c | 105 ++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 57 deletions(-) diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index e15f0185d..31424b3dd 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -27,6 +27,9 @@ #ifndef min #define min(a, b) ((b) < (a) ? (b) : (a)) #endif +#ifndef PSXDMA_LOG +#define PSXDMA_LOG(...) +#endif // Dma0/1 in Mdec.c // Dma3 in CdRom.c @@ -40,45 +43,38 @@ void spuInterrupt() { } void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU - u32 words, words_max, size; + u32 words, words_max = 0, words_copy; u16 *ptr; + madr &= ~3; + ptr = getDmaRam(madr, &words_max); + if (ptr == INVALID_PTR) + log_unhandled("bad dma4 madr %x\n", madr); + + words = words_copy = (bcr >> 16) * (bcr & 0xffff); + if (words_copy > words_max) { + log_unhandled("bad dma4 madr %x bcr %x\n", madr, bcr); + words_copy = words_max; + } + switch (chcr) { case 0x01000201: //cpu to spu transfer -#ifdef PSXDMA_LOG PSXDMA_LOG("*** DMA4 SPU - mem2spu *** %x addr = %x size = %x\n", chcr, madr, bcr); -#endif - ptr = getDmaRam(madr, &words_max); - if (ptr == INVALID_PTR) { -#ifdef CPU_LOG - CPU_LOG("*** DMA4 SPU - mem2spu *** NULL Pointer!!!\n"); -#endif + if (ptr == INVALID_PTR) break; - } - words = (bcr >> 16) * (bcr & 0xffff); - size = min(words, words_max) * 2; - SPU_writeDMAMem(ptr, size, psxRegs.cycle); - HW_DMA4_MADR = SWAPu32((madr & ~3) + words * 4); + SPU_writeDMAMem(ptr, words_copy * 2, psxRegs.cycle); + HW_DMA4_MADR = SWAPu32(madr + words_copy * 2); SPUDMA_INT(words * 4); return; case 0x01000200: //spu to cpu transfer -#ifdef PSXDMA_LOG PSXDMA_LOG("*** DMA4 SPU - spu2mem *** %x addr = %x size = %x\n", chcr, madr, bcr); -#endif - ptr = getDmaRam(madr, &words_max); - if (ptr == INVALID_PTR) { -#ifdef CPU_LOG - CPU_LOG("*** DMA4 SPU - spu2mem *** NULL Pointer!!!\n"); -#endif + if (ptr == INVALID_PTR) break; - } - words = (bcr >> 16) * (bcr & 0xffff); - size = min(words, words_max) * 2; - SPU_readDMAMem(ptr, size, psxRegs.cycle); - psxCpu->Clear(madr, words); + SPU_readDMAMem(ptr, words_copy * 2, psxRegs.cycle); + psxCpu->Clear(madr, words_copy); - HW_DMA4_MADR = SWAPu32(madr + words * 4); + HW_DMA4_MADR = SWAPu32(madr + words_copy * 4); SPUDMA_INT(words * 4); return; @@ -134,58 +130,56 @@ static u32 gpuDmaChainSize(u32 addr) { void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU u32 *ptr, madr_next, *madr_next_p, size; - u32 words, words_max, words_copy; + u32 words, words_left, words_max, words_copy; int do_walking; + madr &= ~3; switch (chcr) { case 0x01000200: // vram2mem -#ifdef PSXDMA_LOG PSXDMA_LOG("*** DMA2 GPU - vram2mem *** %lx addr = %lx size = %lx\n", chcr, madr, bcr); -#endif ptr = getDmaRam(madr, &words_max); if (ptr == INVALID_PTR) { -#ifdef CPU_LOG - CPU_LOG("*** DMA2 GPU - vram2mem *** NULL Pointer!!!\n"); -#endif + log_unhandled("bad dma2 madr %x\n", madr); break; } // BA blocks * BS words (word = 32-bits) - words = (bcr >> 16) * (bcr & 0xffff); - words_copy = min(words, words_max); + words = words_copy = (bcr >> 16) * (bcr & 0xffff); + if (words > words_max) { + log_unhandled("bad dma2 madr %x bcr %x\n", madr, bcr); + words_copy = words_max; + } GPU_readDataMem(ptr, words_copy); psxCpu->Clear(madr, words_copy); - HW_DMA2_MADR = SWAPu32((madr & ~3) + words * 4); + HW_DMA2_MADR = SWAPu32(madr + words_copy * 4); // already 32-bit word size ((size * 4) / 4) GPUDMA_INT(words / 4); return; case 0x01000201: // mem2vram -#ifdef PSXDMA_LOG PSXDMA_LOG("*** DMA 2 - GPU mem2vram *** %lx addr = %lx size = %lx\n", chcr, madr, bcr); -#endif - ptr = getDmaRam(madr, &words_max); - if (ptr == INVALID_PTR) { -#ifdef CPU_LOG - CPU_LOG("*** DMA2 GPU - mem2vram *** NULL Pointer!!!\n"); -#endif - break; + words = words_left = (bcr >> 16) * (bcr & 0xffff); + while (words_left > 0) { + ptr = getDmaRam(madr, &words_max); + if (ptr == INVALID_PTR) { + log_unhandled("bad2 dma madr %x\n", madr); + break; + } + words_copy = min(words_left, words_max); + GPU_writeDataMem(ptr, words_copy); + words_left -= words_copy; + madr += words_copy * 4; } - // BA blocks * BS words (word = 32-bits) - words = (bcr >> 16) * (bcr & 0xffff); - GPU_writeDataMem(ptr, min(words, words_max)); - HW_DMA2_MADR = SWAPu32((madr & ~3) + words * 4); + HW_DMA2_MADR = SWAPu32(madr); // already 32-bit word size ((size * 4) / 4) GPUDMA_INT(words / 4); return; case 0x01000401: // dma chain -#ifdef PSXDMA_LOG PSXDMA_LOG("*** DMA 2 - GPU dma chain *** %lx addr = %lx size = %lx\n", chcr, madr, bcr); -#endif // when not emulating walking progress, end immediately madr_next = 0xffffff; @@ -235,18 +229,15 @@ void gpuInterrupt() { } void psxDma6(u32 madr, u32 bcr, u32 chcr) { - u32 words; - u32 *mem = (u32 *)PSXM(madr); + u32 words, words_max; + u32 *mem; -#ifdef PSXDMA_LOG PSXDMA_LOG("*** DMA6 OT *** %x addr = %x size = %x\n", chcr, madr, bcr); -#endif if (chcr == 0x11000002) { + mem = getDmaRam(madr, &words_max); if (mem == INVALID_PTR) { -#ifdef CPU_LOG - CPU_LOG("*** DMA6 OT *** NULL Pointer!!!\n"); -#endif + log_unhandled("bad6 dma madr %x\n", madr); HW_DMA6_CHCR &= SWAP32(~0x01000000); DMA_INTERRUPT(6); return; @@ -255,7 +246,7 @@ void psxDma6(u32 madr, u32 bcr, u32 chcr) { // already 32-bit size words = bcr; - while (bcr--) { + while (bcr-- && mem > (u32 *)psxM) { *mem-- = SWAP32((madr - 4) & 0xffffff); madr -= 4; } From ea72f34a365392de7fdcdcb31c53307f2d12f90c Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 14 Aug 2023 01:36:03 +0300 Subject: [PATCH 313/597] psxbios: implement some pad reading details --- libpcsxcore/psxbios.c | 237 ++++++++++++++++++++++++++---------------- libpcsxcore/psxhle.c | 3 +- libpcsxcore/psxhle.h | 4 +- 3 files changed, 155 insertions(+), 89 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 4ecbcb6c1..313bc5070 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -251,9 +251,6 @@ typedef struct { } FileDesc; static int *pad_buf = NULL; -static char *pad_buf1 = NULL, *pad_buf2 = NULL; -static int pad_buf1len, pad_buf2len; -static int pad_stopped = 0; static u32 heap_size = 0; static u32 *heap_addr = NULL; static u32 *heap_end = NULL; @@ -275,9 +272,17 @@ static u32 card_active_chan = 0; #define A_KMALLOC_PTR 0x7460 #define A_KMALLOC_SIZE 0x7464 #define A_KMALLOC_END 0x7468 +#define A_PADCRD_CHN_E 0x74a8 // pad/card irq chain entry +#define A_PAD_IRQR_ENA 0x74b8 // pad read on vint irq (nocash 'pad_enable_flag') +#define A_CARD_IRQR_ENA 0x74bc // same for card +#define A_PAD_INBUF 0x74c8 // 2x buffers for rx pad data +#define A_PAD_OUTBUF 0x74d0 // 2x buffers for tx pad data +#define A_PAD_IN_LEN 0x74d8 +#define A_PAD_OUT_LEN 0x74e0 #define A_EEXIT_PTR 0x75d0 #define A_EXC_STACK 0x85d8 // exception stack top #define A_RCNT_VBL_ACK 0x8600 +#define A_PAD_ACK_VBL 0x8914 // enable vint ack by pad reading code #define A_CD_EVENTS 0xb9b8 #define A_EXC_GP 0xf450 @@ -289,17 +294,34 @@ static u32 loadRam32(u32 addr) return SWAP32(*((u32 *)psxM + ((addr & 0x1fffff) >> 2))); } +static void *castRam8ptr(u32 addr) +{ + assert(!(addr & 0x5f800000)); + return psxM + (addr & 0x1fffff); +} + static void *castRam32ptr(u32 addr) { assert(!(addr & 0x5f800003)); return psxM + (addr & 0x1ffffc); } +static void *loadRam8ptr(u32 addr) +{ + return castRam8ptr(loadRam32(addr)); +} + static void *loadRam32ptr(u32 addr) { return castRam32ptr(loadRam32(addr)); } +static void storeRam8(u32 addr, u8 d) +{ + assert(!(addr & 0x5f800000)); + *((u8 *)psxM + (addr & 0x1fffff)) = d; +} + static void storeRam32(u32 addr, u32 d) { assert(!(addr & 0x5f800000)); @@ -1596,13 +1618,13 @@ void psxBios__96_init() { // 71 pc0 = ra; } -static void psxBios_SysDeqIntRP_(); +static void write_chain(u32 *d, u32 next, u32 handler1, u32 handler2); +static void psxBios_SysEnqIntRP_(u32 priority, u32 chain_eptr); +static void psxBios_SysDeqIntRP_(u32 priority, u32 chain_rm_eptr); static void psxBios_DequeueCdIntr_() { - a0 = 0; a1 = 0x91d0; - psxBios_SysDeqIntRP_(); - a0 = 0; a1 = 0x91e0; - psxBios_SysDeqIntRP_(); + psxBios_SysDeqIntRP_(0, 0x91d0); + psxBios_SysDeqIntRP_(0, 0x91e0); use_cycles(16); } @@ -2034,36 +2056,55 @@ void psxBios_ChangeTh() { // 10 } void psxBios_InitPAD() { // 0x12 -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x12]); -#endif + u32 i, *ram32 = (u32 *)psxM; + PSXBIOS_LOG("psxBios_%s %x %x %x %x\n", biosB0n[0x12], a0, a1, a2, a3); + + // printf("%s", "PS-X Control PAD Driver Ver 3.0"); + // PAD_dr_enable = 0; + ram32[A_PAD_OUTBUF/4 + 0] = 0; + ram32[A_PAD_OUTBUF/4 + 1] = 0; + ram32[A_PAD_OUT_LEN/4 + 0] = 0; + ram32[A_PAD_OUT_LEN/4 + 1] = 0; + ram32[A_PAD_INBUF/4 + 0] = SWAP32(a0); + ram32[A_PAD_INBUF/4 + 1] = SWAP32(a2); + ram32[A_PAD_IN_LEN/4 + 0] = SWAP32(a1); + ram32[A_PAD_IN_LEN/4 + 1] = SWAP32(a3); + + for (i = 0; i < a1; i++) { + use_cycles(4); + storeRam8(a0 + i, 0); + } + for (i = 0; i < a3; i++) { + use_cycles(4); + storeRam8(a2 + i, 0); + } + write_chain(ram32 + A_PADCRD_CHN_E/4, 0, 0x49bc, 0x4a4c); - pad_buf1 = (char*)Ra0; - pad_buf1len = a1; - pad_buf2 = (char*)Ra2; - pad_buf2len = a3; + ram32[A_PAD_IRQR_ENA/4] = SWAP32(1); - v0 = 1; pc0 = ra; + mips_return_c(1, 200); } void psxBios_StartPAD() { // 13 -#ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x13]); -#endif - pad_stopped = 0; - psxHwWrite16(0x1f801074, (unsigned short)(psxHwRead16(0x1f801074) | 0x1)); + + psxBios_SysDeqIntRP_(2, A_PADCRD_CHN_E); + psxBios_SysEnqIntRP_(2, A_PADCRD_CHN_E); + psxHwWrite16(0x1f801070, ~1); + psxHwWrite16(0x1f801074, psxHu32(0x1074) | 1); + storeRam32(A_PAD_ACK_VBL, 1); + storeRam32(A_RCNT_VBL_ACK + (3 << 2), 0); psxRegs.CP0.n.SR |= 0x401; - pc0 = ra; + + mips_return_c(1, 300); } void psxBios_StopPAD() { // 14 -#ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x14]); -#endif - pad_stopped = 1; - pad_buf1 = NULL; - pad_buf2 = NULL; - pc0 = ra; + storeRam32(A_RCNT_VBL_ACK + (3 << 2), 1); + psxBios_SysDeqIntRP_(2, A_PADCRD_CHN_E); + psxRegs.CP0.n.SR |= 0x401; + mips_return_void_c(200); } void psxBios_PAD_init() { // 15 @@ -2546,27 +2587,37 @@ void psxBios_delete() { // 45 } void psxBios_InitCARD() { // 4a -#ifdef PSXBIOS_LOG + u32 *ram32 = (u32 *)psxM; PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x4a], a0); -#endif + write_chain(ram32 + A_PADCRD_CHN_E/4, 0, 0x49bc, 0x4a4c); + // (maybe) todo: early_card_irq, FlushCache etc - pc0 = ra; + ram32[A_PAD_IRQR_ENA/4] = SWAP32(a0); + + mips_return_c(0, 300); } void psxBios_StartCARD() { // 4b -#ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x4b]); -#endif + psxBios_SysDeqIntRP_(2, A_PADCRD_CHN_E); + psxBios_SysEnqIntRP_(2, A_PADCRD_CHN_E); - pc0 = ra; + psxHwWrite16(0x1f801074, psxHu32(0x1074) | 1); + storeRam32(A_PAD_ACK_VBL, 1); + storeRam32(A_RCNT_VBL_ACK + (3 << 2), 0); + storeRam32(A_CARD_IRQR_ENA, 1); + psxRegs.CP0.n.SR |= 0x401; + + mips_return_c(1, 200); } void psxBios_StopCARD() { // 4c -#ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x4c]); -#endif - - pc0 = ra; + storeRam32(A_RCNT_VBL_ACK + (3 << 2), 1); + psxBios_SysDeqIntRP_(2, A_PADCRD_CHN_E); + storeRam32(A_CARD_IRQR_ENA, 0); + psxRegs.CP0.n.SR |= 0x401; + mips_return_void_c(200); } void psxBios__card_write() { // 0x4e @@ -2722,12 +2773,13 @@ void psxBios__card_chan() { // 0x58 pc0 = ra; } -void psxBios_ChangeClearPad() { // 5b -#ifdef PSXBIOS_LOG +static void psxBios_ChangeClearPad() { // 5b + u32 ret; PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x5b], a0); -#endif + ret = loadRam32(A_PAD_ACK_VBL); + storeRam32(A_PAD_ACK_VBL, a0); - pc0 = ra; + mips_return_c(ret, 6); } void psxBios__card_status() { // 5c @@ -2750,8 +2802,6 @@ void psxBios__card_wait() { // 5d /* System calls C0 */ -static void psxBios_SysEnqIntRP(); - static void psxBios_InitRCnt() { // 00 int i; PSXBIOS_LOG("psxBios_%s %x\n", biosC0n[0x00], a0); @@ -2761,15 +2811,13 @@ static void psxBios_InitRCnt() { // 00 psxHwWrite16(0x1f801100 + i*0x10 + 8, 0); psxHwWrite16(0x1f801100 + i*0x10 + 0, 0); } - a1 = 0x6d88; - psxBios_SysEnqIntRP(); + psxBios_SysEnqIntRP_(a0, 0x6d88); mips_return_c(0, 9); } static void psxBios_InitException() { // 01 PSXBIOS_LOG("psxBios_%s %x\n", biosC0n[0x01], a0); - a1 = 0x6da8; - psxBios_SysEnqIntRP(); + psxBios_SysEnqIntRP_(a0, 0x6da8); mips_return_c(0, 9); } @@ -2777,40 +2825,44 @@ static void psxBios_InitException() { // 01 * int SysEnqIntRP(int index , long *queue); */ -static void psxBios_SysEnqIntRP() { // 02 +static void psxBios_SysEnqIntRP_(u32 priority, u32 chain_eptr) { u32 old, base = loadRam32(A_TT_ExCB); - PSXBIOS_LOG("psxBios_%s %x %x\n", biosC0n[0x02], a0, a1); - old = loadRam32(base + (a0 << 3)); - storeRam32(base + (a0 << 3), a1); - storeRam32(a1, old); + old = loadRam32(base + (priority << 3)); + storeRam32(base + (priority << 3), chain_eptr); + storeRam32(chain_eptr, old); mips_return_c(0, 9); } +static void psxBios_SysEnqIntRP() { // 02 + PSXBIOS_LOG("psxBios_%s %x %x\n", biosC0n[0x02], a0, a1); + psxBios_SysEnqIntRP_(a0, a1); +} + /* * int SysDeqIntRP(int index , long *queue); */ -static void psxBios_SysDeqIntRP_() { // 03 +static void psxBios_SysDeqIntRP_(u32 priority, u32 chain_rm_eptr) { u32 ptr, next, base = loadRam32(A_TT_ExCB); u32 lim = 0, ret = 0; // as in original: no arg checks of any kind, bug if a1 == 0 - ptr = loadRam32(base + (a0 << 3)); + ptr = loadRam32(base + (priority << 3)); while (ptr) { next = loadRam32(ptr); - if (ptr == a1) { - storeRam32(base + (a0 << 3), next); + if (ptr == chain_rm_eptr) { + storeRam32(base + (priority << 3), next); ret = ptr; use_cycles(6); break; } - while (next && next != a1 && lim++ < 100) { + while (next && next != chain_rm_eptr && lim++ < 100) { ptr = next; next = loadRam32(ptr); use_cycles(8); } - if (next == a1) { + if (next == chain_rm_eptr) { next = loadRam32(next); storeRam32(ptr, next); ret = ptr; @@ -2819,14 +2871,14 @@ static void psxBios_SysDeqIntRP_() { // 03 break; } if (lim == 100) - PSXBIOS_LOG("bad chain %u %x\n", a0, base); + PSXBIOS_LOG("bad chain %u %x\n", priority, base); mips_return_c(ret, 12); } static void psxBios_SysDeqIntRP() { // 03 PSXBIOS_LOG("psxBios_%s %x %x\n", biosC0n[0x03], a0, a1); - psxBios_SysDeqIntRP_(); + psxBios_SysDeqIntRP_(a0, a1); } static void psxBios_get_free_EvCB_slot() { // 04 @@ -2872,8 +2924,7 @@ static void psxBios_ChangeClearRCnt() { // 0a static void psxBios_InitDefInt() { // 0c PSXBIOS_LOG("psxBios_%s %x\n", biosC0n[0x0c], a0); // should also clear the autoack table - a1 = 0x6d98; - psxBios_SysEnqIntRP(); + psxBios_SysEnqIntRP_(a0, 0x6d98); mips_return_c(0, 20 + 6*2); } @@ -2940,6 +2991,8 @@ static const struct { { 0x1920, hleop_exc1_3_1 }, { 0x1794, hleop_exc1_3_2 }, { 0x2458, hleop_exc3_0_2 }, + { 0x49bc, hleop_exc_padcard1 }, + { 0x4a4c, hleop_exc_padcard2 }, }; static int chain_hle_op(u32 handler) @@ -2958,7 +3011,7 @@ static void write_chain(u32 *d, u32 next, u32 handler1, u32 handler2) d[1] = SWAPu32(handler1); d[2] = SWAPu32(handler2); - // install hle traps + // install the hle traps PSXMu32ref(handler1) = HLEOP(chain_hle_op(handler1)); PSXMu32ref(handler2) = HLEOP(chain_hle_op(handler2)); } @@ -3338,11 +3391,7 @@ void psxBiosInit() { //************** THE END *************************************** /**/ - pad_stopped = 1; pad_buf = NULL; - pad_buf1 = NULL; - pad_buf2 = NULL; - pad_buf1len = pad_buf2len = 0; heap_addr = NULL; heap_end = NULL; heap_size = 0; @@ -3423,9 +3472,14 @@ void psxBiosInit() { // patch: +3d8, +4dc, +594, +62c, +9c8, +1988 // call: +7a0=4b70, +884=4c54, +894=4c64 ptr[0x5b] = SWAP32(0x43d0); - ram32[0x4b70/4] = SWAP32(0x03e00008); // jr $ra - ram32[0x4c54/4] = SWAP32(0x03e00008); // jr $ra + ram32[0x4b70/4] = SWAP32(0x03e00008); // jr $ra // setPadOutputBuf + + ram32[0x4c54/4] = SWAP32(0x240e0001); // mov $t6, 1 + ram32[0x4c58/4] = SWAP32(0x03e00008); // jr $ra + ram32[0x4c5c/4] = SWAP32(0xac0e0000 + A_PAD_IRQR_ENA); // sw $t6, ... + ram32[0x4c64/4] = SWAP32(0x03e00008); // jr $ra + ram32[0x4c68/4] = SWAP32(0xac000000 + A_PAD_IRQR_ENA); // sw $0, ... ptr = (u32 *)&psxM[A_C0_TABLE]; for (i = 0; i < 256/2; i++) @@ -3472,8 +3526,6 @@ void psxBiosCnfLoaded(u32 tcb_cnt, u32 evcb_cnt) { } static void biosPadHLE() { - int i, bufcount; - if (pad_buf != NULL) { u32 *buf = (u32*)pad_buf; @@ -3505,15 +3557,6 @@ static void biosPadHLE() { *buf |= PAD2_poll(0) << 16; } } - if (!pad_stopped) { - if (pad_buf1) { - psxBios_PADpoll(1); - } - - if (pad_buf2) { - psxBios_PADpoll(2); - } - } } static void handle_chain_x_x_1(u32 enable, u32 irqbit) @@ -3692,6 +3735,33 @@ void hleExc3_0_2_defint(void) mips_return_c(0, 11 + 7*11 + 7*11 + 12); } +void hleExcPadCard1(void) +{ + if (loadRam32(A_PAD_IRQR_ENA)) { + u8 *pad_buf1 = loadRam8ptr(A_PAD_INBUF + 0); + u8 *pad_buf2 = loadRam8ptr(A_PAD_INBUF + 4); + int i, bufcount; + + psxBios_PADpoll(1); + psxBios_PADpoll(2); + biosPadHLE(); + use_cycles(100); + } + if (loadRam32(A_PAD_ACK_VBL)) + psxHwWrite16(0x1f801070, ~1); + if (loadRam32(A_CARD_IRQR_ENA)) { + // todo, maybe + } + + mips_return_c(0, 18); +} + +void hleExcPadCard2(void) +{ + u32 ret = psxHu32(0x1074) & psxHu32(0x1070) & 1; + mips_return_c(ret, 15); +} + void psxBiosException() { u32 tcbPtr = loadRam32(A_TT_PCB); u32 *chains = loadRam32ptr(A_TT_ExCB); @@ -3740,10 +3810,6 @@ void psxBiosException() { } assert(lim < 100); - // TODO make this a chain entry - if (psxHu32(0x1070) & 1) - biosPadHLE(); - // return from exception (custom or default) use_cycles(23); ptr = loadRam32(A_EEXIT_PTR); @@ -3781,11 +3847,8 @@ void psxBiosFreeze(int Mode) { u32 base = 0x40000; bfreezepsxMptr(pad_buf, int); - bfreezepsxMptr(pad_buf1, char); - bfreezepsxMptr(pad_buf2, char); bfreezepsxMptr(heap_addr, u32); bfreezes(FDesc); bfreezel(&card_active_chan); - bfreezel(&pad_stopped); bfreezel(&heap_size); } diff --git a/libpcsxcore/psxhle.c b/libpcsxcore/psxhle.c index 5ef484032..c3276b305 100644 --- a/libpcsxcore/psxhle.c +++ b/libpcsxcore/psxhle.c @@ -98,7 +98,7 @@ static void hleExecRet() { psxRegs.pc = psxRegs.GPR.n.ra; } -void (* const psxHLEt[22])() = { +void (* const psxHLEt[24])() = { hleDummy, hleA0, hleB0, hleC0, hleBootstrap, hleExecRet, psxBiosException, hleDummy, hleExc0_0_1, hleExc0_0_2, @@ -108,4 +108,5 @@ void (* const psxHLEt[22])() = { hleExc1_2_1, hleExc1_2_2, hleExc1_3_1, hleExc1_3_2, hleExc3_0_2_defint, + hleExcPadCard1, hleExcPadCard2, }; diff --git a/libpcsxcore/psxhle.h b/libpcsxcore/psxhle.h index 5535ab008..b5508725f 100644 --- a/libpcsxcore/psxhle.h +++ b/libpcsxcore/psxhle.h @@ -36,6 +36,7 @@ void hleExc1_1_1(); void hleExc1_1_2(); void hleExc1_2_1(); void hleExc1_2_2(); void hleExc1_3_1(); void hleExc1_3_2(); void hleExc3_0_2_defint(); +void hleExcPadCard1(); void hleExcPadCard2(); enum hle_op { hleop_dummy = 0, hleop_a0, hleop_b0, hleop_c0, @@ -47,9 +48,10 @@ enum hle_op { hleop_exc1_2_1, hleop_exc1_2_2, hleop_exc1_3_1, hleop_exc1_3_2, hleop_exc3_0_2, + hleop_exc_padcard1, hleop_exc_padcard2, }; -extern void (* const psxHLEt[22])(); +extern void (* const psxHLEt[24])(); #ifdef __cplusplus } From f708aab763c74aac1c5f797472e91745e627fe0d Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 14 Aug 2023 23:19:37 +0300 Subject: [PATCH 314/597] configure: rm some leftover --- configure | 1 - 1 file changed, 1 deletion(-) diff --git a/configure b/configure index 2352ec6aa..39587aa4d 100755 --- a/configure +++ b/configure @@ -153,7 +153,6 @@ if [ "$show_help" = "yes" ]; then echo " --disable-neon enable/disable ARM NEON optimizations [guessed]" echo " --disable-dynarec disable dynamic recompiler" echo " (dynarec is only available and enabled on ARM)" - echo " --disable-icache-emu Disables the instruction cache emulation" echo "influential environment variables:" echo " CROSS_COMPILE CC CXX AS AR CFLAGS ASFLAGS LDFLAGS LDLIBS" exit 1 From 14b3bd95c0a1304fb6de3c34b9d4dbfc065ce7fb Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 14 Aug 2023 23:44:25 +0300 Subject: [PATCH 315/597] psxbios: try to improve initial boot state --- libpcsxcore/misc.c | 30 +++---------- libpcsxcore/misc.h | 1 - libpcsxcore/psxbios.c | 97 +++++++++++++++++++++++++++++++++++++++---- libpcsxcore/psxbios.h | 1 + libpcsxcore/psxhle.c | 2 +- libpcsxcore/r3000a.c | 9 ++-- libpcsxcore/r3000a.h | 2 +- 7 files changed, 102 insertions(+), 40 deletions(-) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index bba81b1e8..ab3e1a486 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -144,30 +144,6 @@ int GetCdromFile(u8 *mdir, u8 *time, char *filename) { return retval; } -static const unsigned int gpu_ctl_def[] = { - 0x00000000, 0x01000000, 0x03000000, 0x04000000, - 0x05000800, 0x06c60260, 0x0703fc10, 0x08000027, -}; - -static const unsigned int gpu_data_def[] = { - 0xe100360b, 0xe2000000, 0xe3000800, 0xe4077e7f, - 0xe5001000, 0xe6000000, - 0x02000000, 0x00000000, 0x01ff03ff, -}; - -void BiosLikeGPUSetup() -{ - int i; - - for (i = 0; i < sizeof(gpu_ctl_def) / sizeof(gpu_ctl_def[0]); i++) - GPU_writeStatus(gpu_ctl_def[i]); - - for (i = 0; i < sizeof(gpu_data_def) / sizeof(gpu_data_def[0]); i++) - GPU_writeData(gpu_data_def[i]); - - HW_GPU_STATUS |= SWAP32(PSXGPU_nBUSY); -} - static void SetBootRegs(u32 pc, u32 gp, u32 sp) { //printf("%s %08x %08x %08x\n", __func__, pc, gp, sp); @@ -176,6 +152,10 @@ static void SetBootRegs(u32 pc, u32 gp, u32 sp) psxRegs.pc = pc; psxRegs.GPR.n.gp = gp; psxRegs.GPR.n.sp = sp ? sp : 0x801fff00; + psxRegs.GPR.n.fp = psxRegs.GPR.n.sp; + + psxRegs.GPR.n.t0 = psxRegs.GPR.n.sp; // mimic A(43) + psxRegs.GPR.n.t3 = pc; psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); } @@ -271,7 +251,7 @@ int LoadCdrom() { memcpy(&tmpHead, buf + 12, sizeof(EXE_HEADER)); - SysPrintf("manual booting '%s'\n", exename); + SysPrintf("manual booting '%s' pc=%x\n", exename, SWAP32(tmpHead.pc0)); sp = SWAP32(tmpHead.s_addr); if (cnf_stack) sp = cnf_stack; diff --git a/libpcsxcore/misc.h b/libpcsxcore/misc.h index da99885a4..c5eb327a8 100644 --- a/libpcsxcore/misc.h +++ b/libpcsxcore/misc.h @@ -56,7 +56,6 @@ typedef struct { extern char CdromId[10]; extern char CdromLabel[33]; -void BiosLikeGPUSetup(); void BiosBootBypass(); int LoadCdrom(); diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 313bc5070..c7cbf09ba 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -181,7 +181,7 @@ char *biosC0n[256] = { #define k1 (psxRegs.GPR.n.k1) #define gp (psxRegs.GPR.n.gp) #define sp (psxRegs.GPR.n.sp) -#define fp (psxRegs.GPR.n.s8) +#define fp (psxRegs.GPR.n.fp) #define ra (psxRegs.GPR.n.ra) #define pc0 (psxRegs.pc) @@ -2943,9 +2943,7 @@ void (*biosA0[256])(); void (*biosC0[256+128])(); void (**biosB0)() = biosC0 + 128; -#include "sjisfont.h" - -void setup_mips_code() +static void setup_mips_code() { u32 *ptr; ptr = (u32 *)&psxM[A_SYSCALL]; @@ -2967,7 +2965,6 @@ void setup_mips_code() ptr[0x60/4] = SWAPu32(0x40037000); // mfc0 $v1, EPC ptr[0x64/4] = SWAPu32(0x40026800); // mfc0 $v0, Cause - ptr[0x68/4] = SWAPu32(0x24630004); // addiu $v1, $v1, 4 ptr[0x6c/4] = SWAPu32(0xaf430080); // sw $v1, 0x80($k0) ptr[0xb0/4] = HLEOP(hleop_exception); @@ -3062,6 +3059,91 @@ static void setup_tt(u32 tcb_cnt, u32 evcb_cnt) DeliverEvent(0xf0000003, 0x0010); } +static const u32 gpu_ctl_def[] = { + 0x00000000, 0x01000000, 0x03000000, 0x04000000, + 0x05000800, 0x06c60260, 0x0703fc10, 0x08000027 +}; + +static const u32 gpu_data_def[] = { + 0xe100360b, 0xe2000000, 0xe3000800, 0xe4077e7f, + 0xe5001000, 0xe6000000, + 0x02000000, 0x00000000, 0x01ff03ff +}; + +// from 1f801d80 +static const u16 spu_config[] = { + 0x3fff, 0x37ef, 0x5ebc, 0x5ebc, 0x0000, 0x0000, 0x0000, 0x00a0, + 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x00ff, 0x0000, 0x0000, + 0x0000, 0xe128, 0x0000, 0x0200, 0xf0f0, 0xc085, 0x0004, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x033d, 0x0231, 0x7e00, 0x5000, 0xb400, 0xb000, 0x4c00, 0xb000, + 0x6000, 0x5400, 0x1ed6, 0x1a31, 0x1d14, 0x183b, 0x1bc2, 0x16b2, + 0x1a32, 0x15ef, 0x15ee, 0x1055, 0x1334, 0x0f2d, 0x11f6, 0x0c5d, + 0x1056, 0x0ae1, 0x0ae0, 0x07a2, 0x0464, 0x0232, 0x8000, 0x8000 +}; + +void psxBiosSetupBootState(void) +{ + boolean hle = Config.HLE; + u32 *hw = (u32 *)psxH; + int i; + + // see also SetBootRegs() + if (hle) { + v0 = 1; v1 = 4; + a0 = 1; a2 = a3 = 0; a3 = 0x2a; + t2 = 0x2d; t4 = 0x23; t5 = 0x2b; t6 = 0xa0010000; + s0 = 0xa000b870; + k0 = 0xbfc0d968; k1 = 0xf1c; + ra = 0xf0001234; // just to easily detect attempts to return + psxRegs.CP0.n.Cause = 0x20; + psxRegs.CP0.n.EPC = 0xbfc0d964; // EnterCriticalSection syscall + + hw[0x1000/4] = SWAP32(0x1f000000); + hw[0x1004/4] = SWAP32(0x1f802000); + hw[0x1008/4] = SWAP32(0x0013243f); + hw[0x100c/4] = SWAP32(0x00003022); + hw[0x1010/4] = SWAP32(0x0013243f); + hw[0x1014/4] = SWAP32(0x200931e1); + hw[0x1018/4] = SWAP32(0x00020943); + hw[0x101c/4] = SWAP32(0x00070777); + hw[0x1020/4] = SWAP32(0x0000132c); + hw[0x1060/4] = SWAP32(0x00000b88); + hw[0x1070/4] = SWAP32(0x00000001); + hw[0x1074/4] = SWAP32(0x0000000c); + hw[0x2040/4] = SWAP32(0x00000900); + } + + hw[0x10a0/4] = SWAP32(0x00ffffff); + hw[0x10a8/4] = SWAP32(0x00000401); + hw[0x10b0/4] = SWAP32(0x0008b000); + hw[0x10b4/4] = SWAP32(0x00010200); + hw[0x10e0/4] = SWAP32(0x000eccf4); + hw[0x10e4/4] = SWAP32(0x00000400); + hw[0x10e8/4] = SWAP32(0x00000002); + hw[0x10f0/4] = SWAP32(0x00009099); + hw[0x10f4/4] = SWAP32(0x8c8c0000); + + if (hle) { + psxRcntWmode(0, 0); + psxRcntWmode(1, 0); + psxRcntWmode(2, 0); + } + + // gpu + for (i = 0; i < sizeof(gpu_ctl_def) / sizeof(gpu_ctl_def[0]); i++) + GPU_writeStatus(gpu_ctl_def[i]); + for (i = 0; i < sizeof(gpu_data_def) / sizeof(gpu_data_def[0]); i++) + GPU_writeData(gpu_data_def[i]); + HW_GPU_STATUS |= SWAP32(PSXGPU_nBUSY); + + // spu + for (i = 0x1f801d80; i < sizeof(spu_config) / sizeof(spu_config[0]); i++) + SPU_writeRegister(0x1f801d80 + i*2, spu_config[i], psxRegs.cycle); +} + +#include "sjisfont.h" + void psxBiosInit() { u32 *ptr, *ram32, *rom32; int i; @@ -3413,9 +3495,6 @@ void psxBiosInit() { len = 0x80000 - 0x69d68; uncompress((Bytef *)(psxR + 0x69d68), &len, font_889f, sizeof(font_889f)); - // memory size 2 MB - psxHu32ref(0x1060) = SWAPu32(0x00000b88); - /* Some games like R-Types, CTR, Fade to Black read from adress 0x00000000 due to uninitialized pointers. See Garbage Area at Address 00000000h in Nocash PSX Specfications for more information. Here are some examples of games not working with this fix in place : @@ -3779,7 +3858,7 @@ void psxBiosException() { } tcb->lo = SWAP32(psxRegs.GPR.n.lo); tcb->hi = SWAP32(psxRegs.GPR.n.hi); - tcb->epc = SWAP32(psxRegs.CP0.n.EPC); + //tcb->epc = SWAP32(psxRegs.CP0.n.EPC); // done by asm tcb->sr = SWAP32(psxRegs.CP0.n.SR); tcb->cause = SWAP32(psxRegs.CP0.n.Cause); sp = fp = loadRam32(A_EXC_SP); diff --git a/libpcsxcore/psxbios.h b/libpcsxcore/psxbios.h index fdbf2e505..2a4fa80a9 100644 --- a/libpcsxcore/psxbios.h +++ b/libpcsxcore/psxbios.h @@ -39,6 +39,7 @@ void psxBiosShutdown(); void psxBiosException(); void psxBiosFreeze(int Mode); void psxBiosCnfLoaded(u32 tcbs, u32 events); +void psxBiosSetupBootState(void); extern void (*biosA0[256])(); extern void (**biosB0)(); diff --git a/libpcsxcore/psxhle.c b/libpcsxcore/psxhle.c index c3276b305..379ffd1f9 100644 --- a/libpcsxcore/psxhle.c +++ b/libpcsxcore/psxhle.c @@ -90,7 +90,7 @@ static void hleExecRet() { psxRegs.GPR.n.ra = header->ret; psxRegs.GPR.n.sp = header->_sp; - psxRegs.GPR.n.s8 = header->_fp; + psxRegs.GPR.n.fp = header->_fp; psxRegs.GPR.n.gp = header->_gp; psxRegs.GPR.n.s0 = header->base; diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 0be8a53c4..488810680 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -53,6 +53,7 @@ int psxInit() { } void psxReset() { + boolean introBypassed = FALSE; psxMemReset(); memset(&psxRegs, 0, sizeof(psxRegs)); @@ -72,13 +73,15 @@ void psxReset() { psxHwReset(); psxBiosInit(); - BiosLikeGPUSetup(); // a bit of a hack but whatever - if (!Config.HLE) { psxExecuteBios(); - if (psxRegs.pc == 0x80030000 && !Config.SlowBoot) + if (psxRegs.pc == 0x80030000 && !Config.SlowBoot) { BiosBootBypass(); + introBypassed = TRUE; + } } + if (Config.HLE || introBypassed) + psxBiosSetupBootState(); #ifdef EMU_LOG EMU_LOG("*BIOS END*\n"); diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 668231477..912a41f6d 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -88,7 +88,7 @@ typedef union { u32 r0, at, v0, v1, a0, a1, a2, a3, t0, t1, t2, t3, t4, t5, t6, t7, s0, s1, s2, s3, s4, s5, s6, s7, - t8, t9, k0, k1, gp, sp, s8, ra, lo, hi; + t8, t9, k0, k1, gp, sp, fp, ra, lo, hi; } n; u32 r[34]; /* Lo, Hi in r[32] and r[33] */ PAIR p[34]; From 206a936ef243684c027c3a7827259b07113e8af5 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 14 Aug 2023 23:47:07 +0300 Subject: [PATCH 316/597] dma: should clear both start bits at least that's how I understand nocash --- libpcsxcore/psxdma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index 31424b3dd..42fb3bab6 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -238,7 +238,7 @@ void psxDma6(u32 madr, u32 bcr, u32 chcr) { mem = getDmaRam(madr, &words_max); if (mem == INVALID_PTR) { log_unhandled("bad6 dma madr %x\n", madr); - HW_DMA6_CHCR &= SWAP32(~0x01000000); + HW_DMA6_CHCR &= SWAP32(~0x11000000); DMA_INTERRUPT(6); return; } @@ -263,7 +263,7 @@ void psxDma6(u32 madr, u32 bcr, u32 chcr) { log_unhandled("*** DMA6 OT - unknown *** %x addr = %x size = %x\n", chcr, madr, bcr); } - HW_DMA6_CHCR &= SWAP32(~0x01000000); + HW_DMA6_CHCR &= SWAP32(~0x11000000); DMA_INTERRUPT(6); } @@ -271,7 +271,7 @@ void gpuotcInterrupt() { if (HW_DMA6_CHCR & SWAP32(0x01000000)) { - HW_DMA6_CHCR &= SWAP32(~0x01000000); + HW_DMA6_CHCR &= SWAP32(~0x11000000); DMA_INTERRUPT(6); } } From 31cd6032b51dd31def3cee4d778e480c8fb0df67 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 14 Aug 2023 23:49:23 +0300 Subject: [PATCH 317/597] psxbios: some assorted changes --- libpcsxcore/psxbios.c | 153 +++++++++++++++++++++++++----------------- libpcsxcore/psxhle.c | 10 +-- 2 files changed, 96 insertions(+), 67 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index c7cbf09ba..7e88c9cde 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -35,14 +35,17 @@ #include "gpu.h" #include "sio.h" #include "psxhle.h" +#include "psxinterpreter.h" #include #if (defined(__GNUC__) && __GNUC__ >= 5) || defined(__clang__) #pragma GCC diagnostic ignored "-Wpointer-sign" #endif -#undef SysPrintf -#define SysPrintf if (Config.PsxOut) printf +#ifndef PSXBIOS_LOG +//#define PSXBIOS_LOG printf +#define PSXBIOS_LOG(...) +#endif char *biosA0n[256] = { // 0x00 @@ -66,9 +69,9 @@ char *biosA0n[256] = { "realloc", "InitHeap", "_exit", "getchar", "putchar", "gets", "puts", "printf", // 0x40 - "sys_a0_40", "LoadTest", "Load", "Exec", + "SystemErrorUnresolvedException", "LoadTest", "Load", "Exec", "FlushCache", "InstallInterruptHandler", "GPU_dw", "mem2vram", - "SendGPUStatus", "GPU_cw", "GPU_cwb", "SendPackets", + "SendGPUStatus", "GPU_cw", "GPU_cwb", "SendPackets", "sys_a0_4c", "GetGPUStatus", "GPU_sync", "sys_a0_4f", // 0x50 "sys_a0_50", "LoadExec", "GetSysSp", "sys_a0_53", @@ -102,7 +105,7 @@ char *biosA0n[256] = { "_card_load", "_card_auto", "bufs_cd_4", "sys_a0_af", // 0xb0 "sys_a0_b0", "sys_a0_b1", "do_a_long_jmp", "sys_a0_b3", - "?? sub_function", + "GetSystemInfo", }; char *biosB0n[256] = { @@ -224,10 +227,10 @@ typedef struct { u32 gp0; u32 t_addr; u32 t_size; - u32 d_addr; + u32 d_addr; // 10 u32 d_size; u32 b_addr; - u32 b_size; + u32 b_size; // 1c u32 S_addr; u32 s_size; u32 _sp, _fp, _gp, ret, base; @@ -360,25 +363,31 @@ static int returned_from_exception(void) static inline void softCall(u32 pc) { u32 sra = ra; u32 ssr = psxRegs.CP0.n.SR; + u32 lim = 0; pc0 = pc; ra = 0x80001000; psxRegs.CP0.n.SR &= ~0x404; // disable interrupts - while (pc0 != 0x80001000) + while (pc0 != 0x80001000 && ++lim < 1000000) psxCpu->ExecuteBlock(EXEC_CALLER_HLE); + if (lim == 1000000) + PSXBIOS_LOG("softCall @%x hit lim\n", pc); ra = sra; - psxRegs.CP0.n.SR = ssr; + psxRegs.CP0.n.SR |= ssr & 0x404; } static inline void softCallInException(u32 pc) { u32 sra = ra; + u32 lim = 0; pc0 = pc; ra = 0x80001000; - while (!returned_from_exception() && pc0 != 0x80001000) + while (!returned_from_exception() && pc0 != 0x80001000 && ++lim < 1000000) psxCpu->ExecuteBlock(EXEC_CALLER_HLE); + if (lim == 1000000) + PSXBIOS_LOG("softCallInException @%x hit lim\n", pc); if (pc0 == 0x80001000) ra = sra; } @@ -395,7 +404,7 @@ static void CloseEvent(u32 ev); #define buread(Ra1, mcd, length) { \ - SysPrintf("read %d: %x,%x (%s)\n", FDesc[1 + mcd].mcfile, FDesc[1 + mcd].offset, a2, Mcd##mcd##Data + 128 * FDesc[1 + mcd].mcfile + 0xa); \ + PSXBIOS_LOG("read %d: %x,%x (%s)\n", FDesc[1 + mcd].mcfile, FDesc[1 + mcd].offset, a2, Mcd##mcd##Data + 128 * FDesc[1 + mcd].mcfile + 0xa); \ ptr = Mcd##mcd##Data + 8192 * FDesc[1 + mcd].mcfile + FDesc[1 + mcd].offset; \ memcpy(Ra1, ptr, length); \ if (FDesc[1 + mcd].mode & 0x8000) { \ @@ -408,7 +417,7 @@ static void CloseEvent(u32 ev); #define buwrite(Ra1, mcd, length) { \ u32 offset = + 8192 * FDesc[1 + mcd].mcfile + FDesc[1 + mcd].offset; \ - SysPrintf("write %d: %x,%x\n", FDesc[1 + mcd].mcfile, FDesc[1 + mcd].offset, a2); \ + PSXBIOS_LOG("write %d: %x,%x\n", FDesc[1 + mcd].mcfile, FDesc[1 + mcd].offset, a2); \ ptr = Mcd##mcd##Data + offset; \ memcpy(ptr, Ra1, length); \ FDesc[1 + mcd].offset += length; \ @@ -420,11 +429,6 @@ static void CloseEvent(u32 ev); else v0 = length; \ } -#ifndef PSXBIOS_LOG -//#define PSXBIOS_LOG printf -#define PSXBIOS_LOG(...) -#endif - /* Internally redirects to "FileRead(fd,tempbuf,1)".*/ /* For some strange reason, the returned character is sign-expanded; */ /* So if a return value of FFFFFFFFh could mean either character FFh, or error. */ @@ -1242,11 +1246,8 @@ void psxBios_malloc() { // 0x33 void psxBios_free() { // 0x34 -#ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x34]); -#endif - - SysPrintf("free %x: %x bytes\n", a0, *(u32*)(Ra0-4)); + PSXBIOS_LOG("free %x: %x bytes\n", a0, *(u32*)(Ra0-4)); if (a0) *(u32*)(Ra0-4) |= 1; // set chunk to free @@ -1314,7 +1315,7 @@ void psxBios_InitHeap() { // 0x39 /* HACKFIX: Commenting out this line fixes GTA2 crash */ //*heap_addr = SWAP32(size | 1); - SysPrintf("InitHeap %x,%x : %x %x\n",a0,a1, (int)((uptr)heap_addr-(uptr)psxM), size); + PSXBIOS_LOG("InitHeap %x,%x : %x %x\n",a0,a1, (int)((uptr)heap_addr-(uptr)psxM), size); pc0 = ra; } @@ -1391,7 +1392,8 @@ static void psxBios_printf_psxout() { // 0x3f if (psp != INVALID_PTR) memcpy(psp, save, 4 * 4); - SysPrintf("%s", tmp); + if (Config.PsxOut) + SysPrintf("%s", tmp); } void psxBios_printf() { // 0x3f @@ -1421,7 +1423,7 @@ void psxBios_format() { // 0x41 static void psxBios_SystemErrorUnresolvedException() { if (loadRam32(0xfffc) != 0x12345678) { // prevent log flood - SysPrintf("psxBios_%s\n", biosA0n[0x40]); + SysPrintf("psxBios_%s called from %08x\n", biosA0n[0x40], ra); storeRam32(0xfffc, 0x12345678); } mips_return_void_c(1000); @@ -1450,27 +1452,30 @@ void psxBios_Load() { // 0x42 */ void psxBios_Exec() { // 43 - EXEC *header = (EXEC*)Ra0; - u32 tmp; + EXEC *header = (EXEC *)castRam32ptr(a0); + u32 ptr; + s32 len; -#ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s: %x, %x, %x\n", biosA0n[0x43], a0, a1, a2); -#endif - header->_sp = sp; - header->_fp = fp; - header->_sp = sp; - header->_gp = gp; - header->ret = ra; - header->base = s0; + header->_sp = SWAP32(sp); + header->_fp = SWAP32(fp); + header->_sp = SWAP32(sp); + header->_gp = SWAP32(gp); + header->ret = SWAP32(ra); + header->base = SWAP32(s0); - if (header->S_addr != 0) { - tmp = header->S_addr + header->s_size; - sp = tmp; - fp = sp; - } + ptr = SWAP32(header->b_addr); + len = SWAP32(header->b_size); + if (len != 0) do { + storeRam32(ptr, 0); + len -= 4; ptr += 4; + } while (len > 0); + + if (header->S_addr != 0) + sp = fp = SWAP32(header->S_addr) + SWAP32(header->s_size); - gp = header->gp0; + gp = SWAP32(header->gp0); s0 = a0; @@ -1478,7 +1483,7 @@ void psxBios_Exec() { // 43 a1 = a2; ra = 0x8000; - pc0 = header->_pc0; + pc0 = SWAP32(header->_pc0); } void psxBios_FlushCache() { // 44 @@ -1661,16 +1666,16 @@ void psxBios_SetMem() { // 9f case 2: psxHu32ref(0x1060) = SWAP32(new); psxMu32ref(0x060) = a0; - SysPrintf("Change effective memory : %d MBytes\n",a0); + PSXBIOS_LOG("Change effective memory : %d MBytes\n",a0); break; case 8: psxHu32ref(0x1060) = SWAP32(new | 0x300); psxMu32ref(0x060) = a0; - SysPrintf("Change effective memory : %d MBytes\n",a0); + PSXBIOS_LOG("Change effective memory : %d MBytes\n",a0); default: - SysPrintf("Effective memory must be 2/8 MBytes\n"); + PSXBIOS_LOG("Effective memory must be 2/8 MBytes\n"); break; } @@ -1729,6 +1734,19 @@ void psxBios__card_load() { // ac v0 = 1; pc0 = ra; } +static void psxBios_GetSystemInfo() { // b4 + u32 ret = 0; + //PSXBIOS_LOG("psxBios_%s %x\n", biosA0n[0xb4], a0); + SysPrintf("psxBios_%s %x\n", biosA0n[0xb4], a0); + switch (a0) { + case 0: + case 1: ret = SWAP32(((u32 *)psxR)[0x100/4 + a0]); break; + case 2: ret = 0xbfc0012c; break; + case 5: ret = loadRam32(0x60) << 10; break; + } + mips_return_c(ret, 20); +} + /* System calls B0 */ static u32 psxBios_SysMalloc_(u32 size); @@ -2044,9 +2062,9 @@ void psxBios_ChangeTh() { // 10 u32 tcbBase = loadRam32(A_TT_TCB); u32 th = a0 & 0xffff; -#ifdef PSXBIOS_LOG -// PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x10], th); -#endif + // this is quite spammy + //PSXBIOS_LOG("psxBios_%s %x\n", biosB0n[0x10], th); + // without doing any argument checks, just issue a syscall // (like the real bios does) a0 = 3; @@ -2136,18 +2154,22 @@ void psxBios_PAD_dr() { // 16 static void psxBios_ReturnFromException() { // 17 u32 tcbPtr = loadRam32(A_TT_PCB); const TCB *tcb = loadRam32ptr(tcbPtr); + u32 sr; int i; for (i = 1; i < 32; i++) psxRegs.GPR.r[i] = SWAP32(tcb->reg[i]); psxRegs.GPR.n.lo = SWAP32(tcb->lo); psxRegs.GPR.n.hi = SWAP32(tcb->hi); - psxRegs.CP0.n.SR = SWAP32(tcb->sr); + sr = SWAP32(tcb->sr); //printf("%s %08x->%08x %u\n", __func__, pc0, tcb->epc, psxRegs.cycle); pc0 = k0 = SWAP32(tcb->epc); - psxRegs.CP0.n.SR = (psxRegs.CP0.n.SR & ~0x0f) | ((psxRegs.CP0.n.SR & 0x3c) >> 2); + // the interpreter wants to know about sr changes, so do a MTC0 + sr = (sr & ~0x0f) | ((sr & 0x3c) >> 2); + MTC0(&psxRegs, 12, sr); + use_cycles(53); psxBranchTest(); } @@ -2191,7 +2213,7 @@ static void buopen(int mcd, char *ptr, char *cfg) if ((*fptr & 0xF0) != 0x50) continue; if (strcmp(FDesc[1 + mcd].name, fptr+0xa)) continue; FDesc[1 + mcd].mcfile = i; - SysPrintf("open %s\n", fptr+0xa); + PSXBIOS_LOG("open %s\n", fptr+0xa); v0 = 1 + mcd; break; } @@ -2230,7 +2252,7 @@ static void buopen(int mcd, char *ptr, char *cfg) pptr[8] = pptr[9] = 0xff; for (j=0, xor=0; j<127; j++) xor^= pptr[j]; pptr[127] = xor; - SysPrintf("openC %s %d\n", ptr, nblk); + PSXBIOS_LOG("openC %s %d\n", ptr, nblk); v0 = 1 + mcd; /* just go ahead and resave them all */ SaveMcd(cfg, ptr, 128, 128 * 15); @@ -2325,9 +2347,8 @@ void psxBios_write() { // 0x35/0x03 char *ptr; void *pa1 = Ra1; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s: %x,%x,%x\n", biosB0n[0x35], a0, a1, a2); -#endif + if (a0 != 1) // stdout + PSXBIOS_LOG("psxBios_%s: %x,%x,%x\n", biosB0n[0x35], a0, a1, a2); v0 = -1; if (pa1 == INVALID_PTR) { @@ -2339,7 +2360,7 @@ void psxBios_write() { // 0x35/0x03 char *ptr = pa1; v0 = a2; - while (a2 > 0) { + if (Config.PsxOut) while (a2 > 0) { SysPrintf("%c", *ptr++); a2--; } pc0 = ra; return; @@ -2386,12 +2407,12 @@ void psxBios_close() { // 0x36 } void psxBios_putchar() { // 3d - SysPrintf("%c", (char)a0); + if (Config.PsxOut) SysPrintf("%c", (char)a0); pc0 = ra; } void psxBios_puts() { // 3e/3f - SysPrintf("%s", Ra0); + if (Config.PsxOut) SysPrintf("%s", Ra0); pc0 = ra; } @@ -2428,7 +2449,7 @@ static size_t strlen_internal(char* p) strcpy(dir->name+i, ptr+i); break; } \ match = 0; break; \ } \ - SysPrintf("%d : %s = %s + %s (match=%d)\n", nfile, dir->name, pfile, ptr, match); \ + PSXBIOS_LOG("%d : %s = %s + %s (match=%d)\n", nfile, dir->name, pfile, ptr, match); \ if (match == 0) { continue; } \ dir->size = 8192; \ v0 = _dir; \ @@ -2552,7 +2573,7 @@ void psxBios_rename() { // 44 if (strcmp(Ra0+5, ptr+0xa)) continue; \ *ptr = (*ptr & 0xf) | 0xA0; \ SaveMcd(Config.Mcd##mcd, Mcd##mcd##Data, 128 * i, 1); \ - SysPrintf("delete %s\n", ptr+0xa); \ + PSXBIOS_LOG("delete %s\n", ptr+0xa); \ v0 = 1; \ break; \ } \ @@ -3230,6 +3251,8 @@ void psxBiosInit() { biosA0[0x3b] = psxBios_getchar; biosA0[0x3c] = psxBios_putchar; //biosA0[0x3d] = psxBios_gets; + biosA0[0x3e] = psxBios_puts; + biosA0[0x3f] = psxBios_printf; biosA0[0x40] = psxBios_SystemErrorUnresolvedException; //biosA0[0x41] = psxBios_LoadTest; biosA0[0x42] = psxBios_Load; @@ -3346,7 +3369,7 @@ void psxBiosInit() { //biosA0[0xb1] = psxBios_sys_a0_b1; //biosA0[0xb2] = psxBios_do_a_long_jmp //biosA0[0xb3] = psxBios_sys_a0_b3; - //biosA0[0xb4] = psxBios_sub_function; + biosA0[0xb4] = psxBios_GetSystemInfo; //*******************B0 CALLS**************************** biosB0[0x00] = psxBios_SysMalloc; //biosB0[0x01] = psxBios_sys_b0_01; @@ -3409,7 +3432,9 @@ void psxBiosInit() { //biosB0[0x3a] = psxBios_getc; //biosB0[0x3b] = psxBios_putc; biosB0[0x3c] = psxBios_getchar; + biosB0[0x3d] = psxBios_putchar; //biosB0[0x3e] = psxBios_gets; + biosB0[0x3f] = psxBios_puts; //biosB0[0x40] = psxBios_cd; biosB0[0x41] = psxBios_format; biosB0[0x42] = psxBios_firstfile; @@ -3483,11 +3508,15 @@ void psxBiosInit() { // initial RNG seed psxMu32ref(0x9010) = SWAPu32(0xac20cc00); + // somewhat pretend to be a SCPH1001 BIOS + // some games look for these and take an exception if they're missing rom32 = (u32 *)psxR; rom32[0x100/4] = SWAP32(0x19951204); rom32[0x104/4] = SWAP32(3); strcpy(psxR + 0x108, "PCSX authors"); - strcpy(psxR + 0x12c, "PCSX HLE"); + strcpy(psxR + 0x12c, "CEX-3000 PCSX HLE"); // see psxBios_GetSystemInfo + strcpy(psxR + 0x7ff32, "System ROM Version 2.2 12/04/95 A"); + strcpy(psxR + 0x7ff54, "GPL-2.0-or-later"); // fonts len = 0x80000 - 0x66000; @@ -3697,7 +3726,7 @@ void hleExc0_2_2_syscall() // not in any A/B/C table if (code != R3000E_Syscall) { if (code != 0) { DeliverEvent(0xf0000010, 0x1000); - psxBios_SystemErrorUnresolvedException(); + //psxBios_SystemErrorUnresolvedException(); } mips_return_c(0, 17); return; diff --git a/libpcsxcore/psxhle.c b/libpcsxcore/psxhle.c index 379ffd1f9..175b86ab4 100644 --- a/libpcsxcore/psxhle.c +++ b/libpcsxcore/psxhle.c @@ -88,11 +88,11 @@ static void hleExecRet() { PSXHLE_LOG("ExecRet %x: %x\n", psxRegs.GPR.n.s0, header->ret); - psxRegs.GPR.n.ra = header->ret; - psxRegs.GPR.n.sp = header->_sp; - psxRegs.GPR.n.fp = header->_fp; - psxRegs.GPR.n.gp = header->_gp; - psxRegs.GPR.n.s0 = header->base; + psxRegs.GPR.n.ra = SWAP32(header->ret); + psxRegs.GPR.n.sp = SWAP32(header->_sp); + psxRegs.GPR.n.fp = SWAP32(header->_fp); + psxRegs.GPR.n.gp = SWAP32(header->_gp); + psxRegs.GPR.n.s0 = SWAP32(header->base); psxRegs.GPR.n.v0 = 1; psxRegs.pc = psxRegs.GPR.n.ra; From 4b22d9501e7de7b7991e5cf3163e4b48806a0913 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 16 Aug 2023 01:01:37 +0300 Subject: [PATCH 318/597] spu: implement volume regs somewhat crash2 seems to read them, unclear what for --- plugins/dfsound/externals.h | 9 +++++++-- plugins/dfsound/registers.c | 25 +++++++++++++++++++++---- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index b63ac3c4e..dd05a5ab7 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -118,8 +118,13 @@ typedef struct unsigned int prevflags:3; // flags from previous block unsigned int bIgnoreLoop:1; // Ignore loop unsigned int bNewPitch:1; // pitch changed - int iLeftVolume; // left volume - int iRightVolume; // right volume + union { + struct { + int iLeftVolume; // left volume + int iRightVolume; // right volume + }; + int iVolume[2]; + }; ADSRInfoEx ADSRX; int iRawPitch; // raw pitch (0...3fff) } SPUCHAN; diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index 580589975..ae7ed24e7 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -37,17 +37,19 @@ static void ReverbOn(int start,int end,unsigned short val); // WRITE REGISTERS: called by main emu //////////////////////////////////////////////////////////////////////// -static const uint32_t ignore_dupe[8] = { +static const uint32_t ignore_dupe[16] = { // ch 0-15 c40 c80 cc0 0x7f7f7f7f, 0x7f7f7f7f, 0x7f7f7f7f, 0x7f7f7f7f, // ch 16-24 d40 control reverb - 0x7f7f7f7f, 0x7f7f7f7f, 0xff05ff0f, 0xffffffff + 0x7f7f7f7f, 0x7f7f7f7f, 0xff05ff0f, 0xffffffff, + 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, + 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, unsigned int cycles) { - int r = reg & 0xfff; + int r = reg & 0xffe; int rofs = (r - 0xc00) >> 1; int changed = spu.regArea[rofs] != val; spu.regArea[rofs] = val; @@ -119,6 +121,12 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, } return; } + else if (0x0e00 <= r && r < 0x0e60) + { + int ch = (r >> 2) & 0x1f; + log_unhandled("c%02d w %cvol %04x\n", ch, (r & 2) ? 'r' : 'l', val); + spu.s_chan[ch].iVolume[(r >> 1) & 1] = (signed short)val >> 1; + } switch(r) { @@ -300,7 +308,7 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, unsigned short CALLBACK SPUreadRegister(unsigned long reg) { - const unsigned long r=reg&0xfff; + const unsigned long r = reg & 0xffe; if(r>=0x0c00 && r<0x0d80) { @@ -323,6 +331,13 @@ unsigned short CALLBACK SPUreadRegister(unsigned long reg) } } } + else if (0x0e00 <= r && r < 0x0e60) + { + int ch = (r >> 2) & 0x1f; + int v = spu.s_chan[ch].iVolume[(r >> 1) & 1] << 1; + log_unhandled("c%02d r %cvol %04x\n", ch, (r & 2) ? 'r' : 'l', v); + return v; + } switch(r) { @@ -478,6 +493,7 @@ static void SetVolumeL(unsigned char ch,short vol) // LEFT VOLUME vol&=0x3fff; spu.s_chan[ch].iLeftVolume=vol; // store volume + //spu.regArea[(0xe00-0xc00)/2 + ch*2 + 0] = vol << 1; } //////////////////////////////////////////////////////////////////////// @@ -505,6 +521,7 @@ static void SetVolumeR(unsigned char ch,short vol) // RIGHT VOLUME vol&=0x3fff; spu.s_chan[ch].iRightVolume=vol; + //spu.regArea[(0xe00-0xc00)/2 + ch*2 + 1] = vol << 1; } //////////////////////////////////////////////////////////////////////// From 66cc6abb94419190a96ae48c62f13d7cdd76f7a6 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 15 Aug 2023 00:16:22 +0300 Subject: [PATCH 319/597] spu: clear on init Otherwise some residual stuff seems to carry over between games, especially in hle mode. --- plugins/dfsound/spu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 038f946ee..2cb0587a8 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1482,6 +1482,7 @@ long CALLBACK SPUinit(void) { int i; + memset(&spu, 0, sizeof(spu)); spu.spuMemC = calloc(1, 512 * 1024); InitADSR(); From 2bce5171a00ae99c46d20c6607a2ce7f3bb0994c Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 15 Aug 2023 23:06:33 +0300 Subject: [PATCH 320/597] psxbios: more pad stuff maybe done? --- libpcsxcore/psxbios.c | 106 +++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 58 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 7e88c9cde..be3bc8447 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -253,7 +253,6 @@ typedef struct { u32 mcfile; } FileDesc; -static int *pad_buf = NULL; static u32 heap_size = 0; static u32 *heap_addr = NULL; static u32 *heap_end = NULL; @@ -275,13 +274,16 @@ static u32 card_active_chan = 0; #define A_KMALLOC_PTR 0x7460 #define A_KMALLOC_SIZE 0x7464 #define A_KMALLOC_END 0x7468 -#define A_PADCRD_CHN_E 0x74a8 // pad/card irq chain entry +#define A_PADCRD_CHN_E 0x74a8 // pad/card irq chain entry, see hleExcPadCard1() #define A_PAD_IRQR_ENA 0x74b8 // pad read on vint irq (nocash 'pad_enable_flag') #define A_CARD_IRQR_ENA 0x74bc // same for card #define A_PAD_INBUF 0x74c8 // 2x buffers for rx pad data #define A_PAD_OUTBUF 0x74d0 // 2x buffers for tx pad data #define A_PAD_IN_LEN 0x74d8 #define A_PAD_OUT_LEN 0x74e0 +#define A_PAD_DR_DST 0x74c4 +#define A_PAD_DR_BUF1 0x7570 +#define A_PAD_DR_BUF2 0x7598 #define A_EEXIT_PTR 0x75d0 #define A_EXC_STACK 0x85d8 // exception stack top #define A_RCNT_VBL_ACK 0x8600 @@ -2078,7 +2080,7 @@ void psxBios_InitPAD() { // 0x12 PSXBIOS_LOG("psxBios_%s %x %x %x %x\n", biosB0n[0x12], a0, a1, a2, a3); // printf("%s", "PS-X Control PAD Driver Ver 3.0"); - // PAD_dr_enable = 0; + ram32[A_PAD_DR_DST/4] = 0; ram32[A_PAD_OUTBUF/4 + 0] = 0; ram32[A_PAD_OUTBUF/4 + 1] = 0; ram32[A_PAD_OUT_LEN/4 + 0] = 0; @@ -2125,30 +2127,53 @@ void psxBios_StopPAD() { // 14 mips_return_void_c(200); } -void psxBios_PAD_init() { // 15 -#ifdef PSXBIOS_LOG +static void psxBios_PAD_init() { // 15 + u32 ret = 0; PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x15]); -#endif - if (!(a0 == 0x20000000 || a0 == 0x20000001)) + if (a0 == 0x20000000 || a0 == 0x20000001) { - v0 = 0; - pc0 = ra; - return; + u32 dst = a1; + a0 = A_PAD_DR_BUF1; a1 = 0x22; + a2 = A_PAD_DR_BUF2; a3 = 0x22; + psxBios_InitPAD(); + psxBios_StartPAD(); + storeRam32(A_PAD_DR_DST, dst); + ret = 2; + } + mips_return_c(ret, 100); +} + +static u32 psxBios_PAD_dr_() { + u8 *dst = loadRam32ptr(A_PAD_DR_DST); + u8 *buf1 = castRam8ptr(A_PAD_DR_BUF1); + u8 *buf2 = castRam8ptr(A_PAD_DR_BUF2); + dst[0] = dst[1] = dst[2] = dst[3] = ~0; + if (buf1[0] == 0 && (buf1[1] == 0x23 || buf1[1] == 0x41)) + { + dst[0] = buf1[3], dst[1] = buf1[2]; + if (buf1[1] == 0x23) { + dst[0] |= 0xc7, dst[1] |= 7; + if (buf1[5] >= 0x10) dst[0] &= ~(1u << 6); + if (buf1[6] >= 0x10) dst[0] &= ~(1u << 7); + } } - psxHwWrite16(0x1f801074, (u16)(psxHwRead16(0x1f801074) | 0x1)); - pad_buf = (int *)Ra1; - *pad_buf = -1; - psxRegs.CP0.n.SR |= 0x401; - v0 = 2; - pc0 = ra; + if (buf2[0] == 0 && (buf2[1] == 0x23 || buf2[1] == 0x41)) + { + dst[2] = buf2[3], dst[3] = buf2[2]; + if (buf2[1] == 0x23) { + dst[2] |= 0xc7, dst[3] |= 7; + if (buf2[5] >= 0x10) dst[2] &= ~(1u << 6); + if (buf2[6] >= 0x10) dst[2] &= ~(1u << 7); + } + } + use_cycles(55); + return SWAP32(*(u32 *)dst); } -void psxBios_PAD_dr() { // 16 -#ifdef PSXBIOS_LOG +static void psxBios_PAD_dr() { // 16 PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x16]); -#endif - - v0 = -1; pc0 = ra; + u32 ret = psxBios_PAD_dr_(); + mips_return(ret); } static void psxBios_ReturnFromException() { // 17 @@ -3498,7 +3523,6 @@ void psxBiosInit() { //************** THE END *************************************** /**/ - pad_buf = NULL; heap_addr = NULL; heap_end = NULL; heap_size = 0; @@ -3633,40 +3657,6 @@ void psxBiosCnfLoaded(u32 tcb_cnt, u32 evcb_cnt) { } \ } -static void biosPadHLE() { - if (pad_buf != NULL) { - u32 *buf = (u32*)pad_buf; - - PAD1_startPoll(1); - if (PAD1_poll(0x42) == 0x23) { - PAD1_poll(0); - *buf = PAD1_poll(0) << 8; - *buf |= PAD1_poll(0); - PAD1_poll(0); - *buf &= ~((PAD1_poll(0) > 0x20) ? 1 << 6 : 0); - *buf &= ~((PAD1_poll(0) > 0x20) ? 1 << 7 : 0); - } else { - PAD1_poll(0); - *buf = PAD1_poll(0) << 8; - *buf|= PAD1_poll(0); - } - - PAD2_startPoll(2); - if (PAD2_poll(0x42) == 0x23) { - PAD2_poll(0); - *buf |= PAD2_poll(0) << 24; - *buf |= PAD2_poll(0) << 16; - PAD2_poll(0); - *buf &= ~((PAD2_poll(0) > 0x20) ? 1 << 22 : 0); - *buf &= ~((PAD2_poll(0) > 0x20) ? 1 << 23 : 0); - } else { - PAD2_poll(0); - *buf |= PAD2_poll(0) << 24; - *buf |= PAD2_poll(0) << 16; - } - } -} - static void handle_chain_x_x_1(u32 enable, u32 irqbit) { use_cycles(10); @@ -3852,8 +3842,9 @@ void hleExcPadCard1(void) psxBios_PADpoll(1); psxBios_PADpoll(2); - biosPadHLE(); use_cycles(100); + if (loadRam32(A_PAD_DR_DST)) + psxBios_PAD_dr_(); } if (loadRam32(A_PAD_ACK_VBL)) psxHwWrite16(0x1f801070, ~1); @@ -3954,7 +3945,6 @@ void psxBiosException() { void psxBiosFreeze(int Mode) { u32 base = 0x40000; - bfreezepsxMptr(pad_buf, int); bfreezepsxMptr(heap_addr, u32); bfreezes(FDesc); bfreezel(&card_active_chan); From e099a4a86c8d2cd8de1967d6e5816b2ae5ae315e Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 16 Aug 2023 22:59:42 +0300 Subject: [PATCH 321/597] spu: add forgotten prevflags checking forgot in 3113a160828f9353715e4b8cc1041d9c86162df2 --- plugins/dfsound/spu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 2cb0587a8..4dc762f6f 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -493,6 +493,8 @@ static void scan_for_irq(int ch, unsigned int *upd_samples) pos = s_chan->spos; sinc = s_chan->sinc; end = pos + *upd_samples * sinc; + if (s_chan->prevflags & 1) // 1: stop/loop + block = s_chan->pLoop; pos += (28 - s_chan->iSBPos) << 16; while (pos < end) From 4b4164bb8f5019f743d2879b64d8f8f25430685d Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 16 Aug 2023 23:24:28 +0300 Subject: [PATCH 322/597] cdrom: more timing hacks --- libpcsxcore/cdrom.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 7bc57cff8..90ec0d30c 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -102,7 +102,7 @@ static struct { u16 CmdInProgress; u8 Irq1Pending; u8 unused5; - u32 unused6; + u32 LastReadCycles; u8 unused7; @@ -562,26 +562,18 @@ static void cdrPlayInterrupt_Autopause() } } +// LastReadCycles static int cdrSeekTime(unsigned char *target) { int diff = msf2sec(cdr.SetSectorPlay) - msf2sec(target); - int seekTime = abs(diff) * (cdReadTime / 200); - /* - * Gameblabla : - * It was originally set to 1000000 for Driver, however it is not high enough for Worms Pinball - * and was unreliable for that game. - * I also tested it against Mednafen and Driver's titlescreen music starts 25 frames later, not immediatly. - * - * Obviously, this isn't perfect but right now, it should be a bit better. - * Games to test this against if you change that setting : - * - Driver (titlescreen music delay and retry mission) - * - Worms Pinball (Will either not boot or crash in the memory card screen) - * - Viewpoint (short pauses if the delay in the ingame music is too long) - * - * It seems that 3386880 * 5 is too much for Driver's titlescreen and it starts skipping. - * However, 1000000 is not enough for Worms Pinball to reliably boot. - */ - if(seekTime > 3386880 * 2) seekTime = 3386880 * 2; + int pausePenalty, seekTime = abs(diff) * (cdReadTime / 2000); + seekTime = MAX_VALUE(seekTime, 20000); + + // need this stupidly long penalty or else Spyro2 intro desyncs + pausePenalty = (s32)(psxRegs.cycle - cdr.LastReadCycles) > cdReadTime * 4 ? cdReadTime * 25 : 0; + seekTime += pausePenalty; + + seekTime = MIN_VALUE(seekTime, PSXCLK * 2 / 3); CDR_LOG("seek: %.2f %.2f\n", (float)seekTime / PSXCLK, (float)seekTime / cdReadTime); return seekTime; } @@ -634,6 +626,8 @@ static void msfiAdd(u8 *msfi, u32 count) void cdrPlayReadInterrupt(void) { + cdr.LastReadCycles = psxRegs.cycle; + if (cdr.Reading) { cdrReadInterrupt(); return; @@ -914,7 +908,7 @@ void cdrInterrupt(void) { } else { - second_resp_time = (((cdr.Mode & MODE_SPEED) ? 2 : 1) * 1000000); + second_resp_time = (((cdr.Mode & MODE_SPEED) ? 1 : 2) * 1097107); } SetPlaySeekRead(cdr.StatP, 0); break; From d91ab53739f331e6ce979463afdbbe7b5fd638cd Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 17 Aug 2023 02:38:32 +0300 Subject: [PATCH 323/597] gpu_neon: change enhancement_buf selection Not sure what was the idea there but it caused rightmost column of things simetimes to not get rendered, like in Alundra 2 loading screens and FMVs. --- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 6a88beb78..376225801 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -798,7 +798,7 @@ static void update_enhancement_buf_table_from_hres(psx_gpu_struct *psx_gpu) s = psx_gpu->enhancement_x_threshold; for (x = 0; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) { - if (b < 3 && x * ENH_BUF_TABLE_STEP >= s - ENH_BUF_TABLE_STEP - 1) + if (b < 3 && x * ENH_BUF_TABLE_STEP >= s) { s += psx_gpu->enhancement_x_threshold; b++; From 1e50f05b17adcc929367f95bb63e26f8784646ce Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 17 Aug 2023 22:49:26 +0300 Subject: [PATCH 324/597] psxbios: remove some globals --- libpcsxcore/psxbios.c | 185 ++++++++++++++++++------------------------ 1 file changed, 77 insertions(+), 108 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index be3bc8447..678377e13 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -253,11 +253,11 @@ typedef struct { u32 mcfile; } FileDesc; -static u32 heap_size = 0; -static u32 *heap_addr = NULL; -static u32 *heap_end = NULL; +// todo: FileDesc layout is wrong +// todo: get rid of these globals static FileDesc FDesc[32]; -static u32 card_active_chan = 0; +static char ffile[64], *pfile; +static int nfile; // fixed RAM offsets, SCPH1001 compatible #define A_TT_ExCB 0x0100 @@ -282,12 +282,17 @@ static u32 card_active_chan = 0; #define A_PAD_IN_LEN 0x74d8 #define A_PAD_OUT_LEN 0x74e0 #define A_PAD_DR_DST 0x74c4 +#define A_CARD_CHAN1 0x7500 #define A_PAD_DR_BUF1 0x7570 #define A_PAD_DR_BUF2 0x7598 #define A_EEXIT_PTR 0x75d0 #define A_EXC_STACK 0x85d8 // exception stack top #define A_RCNT_VBL_ACK 0x8600 #define A_PAD_ACK_VBL 0x8914 // enable vint ack by pad reading code +#define A_HEAP_BASE 0x9000 +#define A_HEAP_SIZE 0x9004 +#define A_HEAP_END 0x9008 +#define A_HEAP_FLAG 0x900c #define A_CD_EVENTS 0xb9b8 #define A_EXC_GP 0xf450 @@ -912,24 +917,20 @@ void psxBios_bcopy() { // 0x27 pc0 = ra; } -void psxBios_bzero() { // 0x28 - char *p = (char *)Ra0; - v0 = a0; +static void psxBios_bzero() { // 0x28 /* Same as memset here (See memset below) */ - if (a1 > 0x7FFFFFFF || a1 == 0) + u32 ret = a0; + if (a0 == 0 || (s32)a1 <= 0) { - v0 = 0; - pc0 = ra; + mips_return_c(0, 6); return; } - else if (a0 == 0) - { - pc0 = ra; - return; + while ((s32)a1-- > 0) { + storeRam8(a0++, 0); + use_cycles(4); } - while ((s32)a1-- > 0) *p++ = '\0'; - a1 = 0; - pc0 = ra; + // todo: many more cycles due to uncached bios mem + mips_return_c(ret, 5); } void psxBios_bcmp() { // 0x29 @@ -963,23 +964,19 @@ void psxBios_memcpy() { // 0x2a pc0 = ra; } -void psxBios_memset() { // 0x2b - char *p = (char *)Ra0; - v0 = a0; - if (a2 > 0x7FFFFFFF || a2 == 0) +static void psxBios_memset() { // 0x2b + u32 ret = a0; + if (a0 == 0 || (s32)a2 <= 0) { - v0 = 0; - pc0 = ra; + mips_return_c(0, 6); return; } - if (a0 == 0) - { - pc0 = ra; - return; + while ((s32)a2-- > 0) { + storeRam8(a0++, a1); + use_cycles(4); } - while ((s32)a2-- > 0) *p++ = (char)a1; - a2 = 0; - v0 = a0; pc0 = ra; + // todo: many more cycles due to uncached bios mem + mips_return_c(ret, 5); } void psxBios_memmove() { // 0x2c @@ -1142,14 +1139,16 @@ void psxBios_qsort() { // 0x31 pc0 = ra; } -void psxBios_malloc() { // 0x33 +// this isn't how the real bios works, but maybe good enough +static void psxBios_malloc() { // 0x33 + u32 *heap_addr, *heap_end; u32 *chunk, *newchunk = NULL; unsigned int dsize = 0, csize, cstat; int colflag; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x33]); -#endif - if (!a0 || (!heap_size || !heap_addr)) { + PSXBIOS_LOG("psxBios_%s %x\n", biosA0n[0x33], a0); + heap_addr = loadRam32ptr(A_HEAP_BASE); + heap_end = loadRam32ptr(A_HEAP_END); + if (heap_addr >= heap_end) { v0 = 0; pc0 = ra; return; @@ -1246,27 +1245,24 @@ void psxBios_malloc() { // 0x33 pc0 = ra; } -void psxBios_free() { // 0x34 - - PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x34]); - PSXBIOS_LOG("free %x: %x bytes\n", a0, *(u32*)(Ra0-4)); - - if (a0) - *(u32*)(Ra0-4) |= 1; // set chunk to free - pc0 = ra; +static void psxBios_free() { // 0x34 + PSXBIOS_LOG("psxBios_%s %x (%x bytes)\n", biosA0n[0x34], a0, loadRam32(a0 - 4)); + storeRam32(a0 - 4, loadRam32(a0 - 4) | 1); // set chunk to free + mips_return_void_c(5); } -void psxBios_calloc() { // 0x37 - void *pv0; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x37]); -#endif +static void psxBios_calloc() { // 0x37 + u32 ret, size; + PSXBIOS_LOG("psxBios_%s %x %x\n", biosA0n[0x37], a0, a1); - a0 = a0 * a1; + a0 = size = a0 * a1; psxBios_malloc(); - pv0 = Rv0; - if (pv0) - memset(pv0, 0, a0); + ret = v0; + if (ret) { + a0 = ret; a1 = size; + psxBios_bzero(); + } + mips_return_c(ret, 21); } void psxBios_realloc() { // 0x38 @@ -1299,27 +1295,16 @@ void psxBios_realloc() { // 0x38 /* InitHeap(void *block , int n) */ -void psxBios_InitHeap() { // 0x39 - unsigned int size; - -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x39]); -#endif - - if (((a0 & 0x1fffff) + a1)>= 0x200000) size = 0x1ffffc - (a0 & 0x1fffff); - else size = a1; +static void psxBios_InitHeap() { // 0x39 + PSXBIOS_LOG("psxBios_%s %x %x\n", biosA0n[0x39], a0, a1); - size &= 0xfffffffc; + storeRam32(A_HEAP_BASE, a0); + storeRam32(A_HEAP_SIZE, a1); + storeRam32(A_HEAP_END, a0 + (a1 & ~3) + 4); + storeRam32(A_HEAP_FLAG, 0); + storeRam32(a0, 0); - heap_addr = (u32 *)Ra0; - heap_size = size; - heap_end = (u32 *)((u8 *)heap_addr + heap_size); - /* HACKFIX: Commenting out this line fixes GTA2 crash */ - //*heap_addr = SWAP32(size | 1); - - PSXBIOS_LOG("InitHeap %x,%x : %x %x\n",a0,a1, (int)((uptr)heap_addr-(uptr)psxM), size); - - pc0 = ra; + mips_return_void_c(14); } void psxBios_getchar() { //0x3b @@ -1685,19 +1670,18 @@ void psxBios_SetMem() { // 9f } /* TODO FIXME : Not compliant. -1 indicates failure but using 1 for now. */ -void psxBios_get_cd_status(void) //a6 +static void psxBios_get_cd_status() // a6 { + PSXBIOS_LOG("psxBios_%s\n", biosA0n[0xa6]); v0 = 1; pc0 = ra; } -void psxBios__card_info() { // ab -#ifdef PSXBIOS_LOG +static void psxBios__card_info() { // ab PSXBIOS_LOG("psxBios_%s: %x\n", biosA0n[0xab], a0); -#endif u32 ret, port; - card_active_chan = a0; - port = card_active_chan >> 4; + storeRam32(A_CARD_CHAN1, a0); + port = a0 >> 4; switch (port) { case 0x0: @@ -1707,9 +1691,7 @@ void psxBios__card_info() { // ab ret = 0x0100; break; default: -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s: UNKNOWN PORT 0x%x\n", biosA0n[0xab], card_active_chan); -#endif + PSXBIOS_LOG("psxBios_%s: UNKNOWN PORT 0x%x\n", biosA0n[0xab], a0); ret = 0x0302; break; } @@ -1728,7 +1710,7 @@ void psxBios__card_load() { // ac PSXBIOS_LOG("psxBios_%s: %x\n", biosA0n[0xac], a0); #endif - card_active_chan = a0; + storeRam32(A_CARD_CHAN1, a0); // DeliverEvent(0xf0000011, 0x0004); DeliverEvent(0xf4000001, 0x0004); @@ -2221,9 +2203,6 @@ static void psxBios_UnDeliverEvent() { // 0x20 mips_return(ret); } -char ffile[64], *pfile; -int nfile; - static void buopen(int mcd, char *ptr, char *cfg) { int i; @@ -2683,7 +2662,7 @@ void psxBios__card_write() { // 0x4e v0 = 0; pc0 = ra; return; } - card_active_chan = a0; + storeRam32(A_CARD_CHAN1, a0); port = a0 >> 4; if (pa2 != INVALID_PTR) { @@ -2719,7 +2698,7 @@ void psxBios__card_read() { // 0x4f v0 = 0; pc0 = ra; return; } - card_active_chan = a0; + storeRam32(A_CARD_CHAN1, a0); port = a0 >> 4; if (pa2 != INVALID_PTR) { @@ -2810,13 +2789,14 @@ void psxBios_GetB0Table() { // 57 mips_return_c(A_B0_TABLE, 3); } -void psxBios__card_chan() { // 0x58 -#ifdef PSXBIOS_LOG +static void psxBios__card_chan() { // 0x58 + u32 ret; PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x58]); -#endif - v0 = card_active_chan; - pc0 = ra; + // todo: should return active slot chan + // (active - which was last processed by irq code) + ret = loadRam32(A_CARD_CHAN1); + mips_return_c(ret, 8); } static void psxBios_ChangeClearPad() { // 5b @@ -2828,21 +2808,17 @@ static void psxBios_ChangeClearPad() { // 5b mips_return_c(ret, 6); } -void psxBios__card_status() { // 5c -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x5c], a0); -#endif +static void psxBios__card_status() { // 5c + PSXBIOS_LOG("psxBios_%s %x\n", biosB0n[0x5c], a0); - v0 = card_active_chan; + v0 = 1; // ready pc0 = ra; } -void psxBios__card_wait() { // 5d -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x5d], a0); -#endif +static void psxBios__card_wait() { // 5d + PSXBIOS_LOG("psxBios_%s %x\n", biosB0n[0x5d], a0); - v0 = 1; + v0 = 1; // ready pc0 = ra; } @@ -3523,11 +3499,7 @@ void psxBiosInit() { //************** THE END *************************************** /**/ - heap_addr = NULL; - heap_end = NULL; - heap_size = 0; memset(FDesc, 0, sizeof(FDesc)); - card_active_chan = 0; // initial RNG seed psxMu32ref(0x9010) = SWAPu32(0xac20cc00); @@ -3945,8 +3917,5 @@ void psxBiosException() { void psxBiosFreeze(int Mode) { u32 base = 0x40000; - bfreezepsxMptr(heap_addr, u32); bfreezes(FDesc); - bfreezel(&card_active_chan); - bfreezel(&heap_size); } From 7b9a83e8ae6b60682d3898fb6394c53c4673dd0a Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 20 Aug 2023 00:04:59 +0300 Subject: [PATCH 325/597] more timing hacks --- libpcsxcore/database.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index a2e1822df..5edb9611a 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -69,6 +69,10 @@ cycle_multiplier_overrides[] = { "SLES02558", 125 }, { "SLES12558", 125 }, #endif + /* Discworld Noir - audio skips if CPU runs too fast */ + { "SLES01549", 222 }, + { "SLES02063", 222 }, + { "SLES02064", 222 }, }; /* Function for automatic patching according to GameID. */ From 308c6e678a2f0a56a9dee35307070550354f580c Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 17 Aug 2023 01:55:11 +0300 Subject: [PATCH 326/597] try to emulate borders properly --- frontend/cspace.c | 2 +- frontend/libretro.c | 55 +++++++++++----- frontend/libretro_core_options.h | 44 +++++++++++++ frontend/menu.c | 8 ++- frontend/plugin_lib.c | 43 ++++++------ frontend/plugin_lib.h | 5 +- plugins/gpu_neon/psx_gpu_if.c | 2 +- plugins/gpulib/gpu.c | 110 +++++++++++++++++++++++++------ plugins/gpulib/gpu.h | 8 ++- plugins/gpulib/vout_pl.c | 40 +++++++---- 10 files changed, 238 insertions(+), 79 deletions(-) diff --git a/frontend/cspace.c b/frontend/cspace.c index 785b3d137..a3e3301fb 100644 --- a/frontend/cspace.c +++ b/frontend/cspace.c @@ -215,7 +215,7 @@ void bgr555_to_uyvy(void *d, const void *s, int pixels) int r0, g0, b0, r1, g1, b1; int y0, y1, u, v; - for (; pixels > 0; src += 2, dst++, pixels -= 2) + for (; pixels > 1; src += 2, dst++, pixels -= 2) { b0 = (src[0] >> 10) & 0x1f; g0 = (src[0] >> 5) & 0x1f; diff --git a/frontend/libretro.c b/frontend/libretro.c index 4d29e1652..6a3a97c99 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -80,7 +80,7 @@ static unsigned msg_interface_version = 0; static void *vout_buf; static void *vout_buf_ptr; static int vout_width, vout_height; -static int vout_doffs_old, vout_fb_dirty; +static int vout_fb_dirty; static bool vout_can_dupe; static bool duping_enable; static bool found_bios; @@ -267,29 +267,22 @@ static void convert(void *buf, size_t bytes) } #endif -static void vout_flip(const void *vram, int stride, int bgr24, int w, int h) +static void vout_flip(const void *vram, int stride, int bgr24, + int x, int y, int w, int h, int dims_changed) { unsigned short *dest = vout_buf_ptr; const unsigned short *src = vram; int dstride = vout_width, h1 = h; - int doffs; - if (vram == NULL) + if (vram == NULL || dims_changed) { + memset(vout_buf_ptr, 0, dstride * vout_height * 2); // blanking - memset(vout_buf_ptr, 0, dstride * h * 2); - goto out; + if (vram == NULL) + goto out; } - doffs = (vout_height - h) * dstride; - doffs += (dstride - w) / 2 & ~1; - if (doffs != vout_doffs_old) - { - // clear borders - memset(vout_buf_ptr, 0, dstride * h * 2); - vout_doffs_old = doffs; - } - dest += doffs; + dest += x + y * dstride; if (bgr24) { @@ -2152,11 +2145,37 @@ static void update_variables(bool in_flight) if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "disabled") == 0) - Config.GpuListWalking = 0; + Config.GpuListWalking = 0; else if (strcmp(var.value, "enabled") == 0) - Config.GpuListWalking = 1; + Config.GpuListWalking = 1; else // auto - Config.GpuListWalking = -1; + Config.GpuListWalking = -1; + } + + var.value = NULL; + var.key = "pcsx_rearmed_screen_centering"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "game") == 0) + pl_rearmed_cbs.screen_centering_type = 1; + else if (strcmp(var.value, "manual") == 0) + pl_rearmed_cbs.screen_centering_type = 2; + else // auto + pl_rearmed_cbs.screen_centering_type = 0; + } + + var.value = NULL; + var.key = "pcsx_rearmed_screen_centering_x"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + pl_rearmed_cbs.screen_centering_x = atoi(var.value); + } + + var.value = NULL; + var.key = "pcsx_rearmed_screen_centering_y"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + pl_rearmed_cbs.screen_centering_y = atoi(var.value); } #ifdef THREAD_RENDERING diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index e20503ec0..ef25f7b6c 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -435,6 +435,50 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "auto", }, + { + "pcsx_rearmed_screen_centering", + "(GPU) Screen centering", + NULL, + "The PSX has a feature allowing it to shift the image position on screen. Some (mostly PAL) games used this feature in a strange way making the image miscentered and causing borders to appear. With 'Auto' the emulator tries to correct this miscentering automatically. 'Game-controlled' uses the settings supplied by the game. 'Manual' allows to override those values with the settings below.", + NULL, + "video", + { + { "auto", "Auto" }, + { "game", "Game-controlled" }, + { "manual", "Manual" }, + { NULL, NULL }, + }, + "auto", + }, +#define V(x) { #x, NULL } + { + "pcsx_rearmed_screen_centering_x", + "(GPU) Manual screen centering X", + NULL, + "X offset of the frame buffer. Only effective when 'Screen centering' is set to 'Manual'.", + NULL, + "video", + { + V(-16), V(-14), V(-12), V(-10), V(-8), V(-6), V(-4), V(-2), V(0), V(2), V(4), V(6), V(8), V(10), V(12), V(14), V(16), + { NULL, NULL }, + }, + "0", + }, + { + "pcsx_rearmed_screen_centering_y", + "(GPU) Manual screen centering Y", + NULL, + "Y offset of the frame buffer. Only effective when 'Screen centering' is set to 'Manual'.", + NULL, + "video", + { + V(-16), V(-15), V(-14), V(-13), V(-12), V(-11), V(-10), V(-9), V(-8), V(-7), V(-6), V(-5), V(-4), V(-3), V(-2), V(-1), + V(0), V(1), V(2), V(3), V(4), V(5), V(6), V(7), V(8), V(9), V(10), V(11), V(12), V(13), V(14), V(15), V(16), + { NULL, NULL }, + }, + "0", + }, +#undef V #ifdef GPU_NEON { "pcsx_rearmed_neon_interlace_enable_v2", diff --git a/frontend/menu.c b/frontend/menu.c index 9586baed7..fb712247b 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -87,6 +87,7 @@ typedef enum MA_OPT_VOUT_MODE, MA_OPT_SCANLINES, MA_OPT_SCANLINE_LEVEL, + MA_OPT_CENTERING, } menu_id; static int last_vout_w, last_vout_h, last_vout_bpp; @@ -450,6 +451,9 @@ static const struct { CE_INTVAL_P(gpu_peopsgl.iVRamSize), CE_INTVAL_P(gpu_peopsgl.iTexGarbageCollection), CE_INTVAL_P(gpu_peopsgl.dwActFixes), + CE_INTVAL_P(screen_centering_type), + CE_INTVAL_P(screen_centering_x), + CE_INTVAL_P(screen_centering_y), CE_INTVAL(spu_config.iUseReverb), CE_INTVAL(spu_config.iXAPitch), CE_INTVAL(spu_config.iUseInterpolation), @@ -1252,6 +1256,7 @@ static const char *men_soft_filter[] = { "None", #endif NULL }; static const char *men_dummy[] = { NULL }; +static const char *men_centering[] = { "Auto", "Ingame", "Force", NULL }; static const char h_scaler[] = "int. 2x - scales w. or h. 2x if it fits on screen\n" "int. 4:3 - uses integer if possible, else fractional"; static const char h_cscaler[] = "Displays the scaler layer, you can resize it\n" @@ -1316,6 +1321,7 @@ static int menu_loop_cscaler(int id, int keys) static menu_entry e_menu_gfx_options[] = { + mee_enum ("Screen centering", MA_OPT_CENTERING, pl_rearmed_cbs.screen_centering_type, men_centering), mee_enum_h ("Scaler", MA_OPT_VARSCALER, g_scaler, men_scaler, h_scaler), mee_enum ("Video output mode", MA_OPT_VOUT_MODE, plat_target.vout_method, men_dummy), mee_onoff ("Software Scaling", MA_OPT_SCALER2, soft_scaling, 1), @@ -2581,7 +2587,7 @@ void menu_init(void) i = plat_target.cpu_clock_set != NULL && plat_target.cpu_clock_get != NULL && cpu_clock_st > 0; - me_enable(e_menu_gfx_options, MA_OPT_CPU_CLOCKS, i); + me_enable(e_menu_options, MA_OPT_CPU_CLOCKS, i); i = me_id2offset(e_menu_gfx_options, MA_OPT_VOUT_MODE); e_menu_gfx_options[i].data = plat_target.vout_methods; diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index d5cec766c..bdf09c715 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "libpicofe/fonts.h" #include "libpicofe/input.h" @@ -118,9 +119,9 @@ static void print_fps(int h, int border) pl_rearmed_cbs.vsps_cur); } -static void print_cpu_usage(int w, int h, int border) +static void print_cpu_usage(int x, int h) { - hud_printf(pl_vout_buf, pl_vout_w, pl_vout_w - border - 28, + hud_printf(pl_vout_buf, pl_vout_w, x - 28, h - HUD_HEIGHT, "%3d", pl_rearmed_cbs.cpu_usage); } @@ -154,13 +155,11 @@ static __attribute__((noinline)) void draw_active_chans(int vout_w, int vout_h) } } -static void print_hud(int w, int h, int xborder) +static void print_hud(int x, int w, int h) { - if (h < 16) + if (h < 192) return; - if (w < pl_vout_w) - xborder += (pl_vout_w - w) / 2; if (h > pl_vout_h) h = pl_vout_h; @@ -168,12 +167,12 @@ static void print_hud(int w, int h, int xborder) draw_active_chans(w, h); if (hud_msg[0] != 0) - print_msg(h, xborder); + print_msg(h, x); else if (g_opts & OPT_SHOWFPS) - print_fps(h, xborder); + print_fps(h, x); if (g_opts & OPT_SHOWCPU) - print_cpu_usage(w, h, xborder); + print_cpu_usage(x + w, h); } /* update scaler target size according to user settings */ @@ -262,11 +261,7 @@ static void pl_vout_set_mode(int w, int h, int raw_w, int raw_h, int bpp) if (pl_rearmed_cbs.only_16bpp) vout_bpp = 16; - // don't use very low heights - if (vout_h < 192) { - buf_yoffset = (192 - vout_h) / 2; - vout_h = 192; - } + assert(vout_h >= 192); pl_vout_scale_w = pl_vout_scale_h = 1; #ifdef __ARM_NEON__ @@ -307,14 +302,15 @@ static void pl_vout_set_mode(int w, int h, int raw_w, int raw_h, int bpp) menu_notify_mode_change(pl_vout_w, pl_vout_h, pl_vout_bpp); } -static void pl_vout_flip(const void *vram, int stride, int bgr24, int w, int h) +static void pl_vout_flip(const void *vram, int stride, int bgr24, + int x, int y, int w, int h, int dims_changed) { - static int doffs_old, clear_counter; + static int clear_counter; unsigned char *dest = pl_vout_buf; const unsigned short *src = vram; int dstride = pl_vout_w, h1 = h; int h_full = pl_vout_h - pl_vout_yoffset; - int doffs; + int xoffs = 0, doffs; pcnt_start(PCNT_BLIT); @@ -328,12 +324,15 @@ static void pl_vout_flip(const void *vram, int stride, int bgr24, int w, int h) goto out_hud; } - // borders - doffs = (dstride - w * pl_vout_scale_w) / 2 & ~1; + assert(x + w <= pl_vout_w); + assert(y + h <= pl_vout_h); + + // offset + xoffs = x * pl_vout_scale_w; + doffs = xoffs + y * dstride; - if (doffs > doffs_old) + if (dims_changed) clear_counter = 2; - doffs_old = doffs; if (clear_counter > 0) { if (pl_plat_clear) @@ -409,7 +408,7 @@ static void pl_vout_flip(const void *vram, int stride, int bgr24, int w, int h) } out_hud: - print_hud(w * pl_vout_scale_w, h * pl_vout_scale_h, 0); + print_hud(xoffs, w * pl_vout_scale_w, (y + h) * pl_vout_scale_h); out: pcnt_end(PCNT_BLIT); diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index 3f8b5c413..4984d3054 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -54,7 +54,7 @@ struct rearmed_cbs { int (*pl_vout_open)(void); void (*pl_vout_set_mode)(int w, int h, int raw_w, int raw_h, int bpp); void (*pl_vout_flip)(const void *vram, int stride, int bgr24, - int w, int h); + int x, int y, int w, int h, int dims_changed); void (*pl_vout_close)(void); void *(*mmap)(unsigned int size); void (*munmap)(void *ptr, unsigned int size); @@ -107,6 +107,9 @@ struct rearmed_cbs { } gpu_peopsgl; // misc int gpu_caps; + int screen_centering_type; // 0 - auto, 1 - game conrolled, 2 - manual + int screen_centering_x; + int screen_centering_y; }; extern struct rearmed_cbs pl_rearmed_cbs; diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index 353b603ce..30faee256 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -52,7 +52,7 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd) #define ENHANCEMENT_BUF_SIZE (1024 * 1024 * 2 * 4 + 4096 * 2) -static uint16_t *get_enhancement_bufer(int *x, int *y, int *w, int *h, +static void *get_enhancement_bufer(int *x, int *y, int *w, int *h, int *vram_h) { uint16_t *ret = select_enhancement_buf_ptr(&egpu, *x); diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index e9714e4c9..931583f39 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -9,6 +9,7 @@ */ #include +#include #include #include "gpu.h" @@ -61,28 +62,90 @@ static noinline void do_reset(void) gpu.regs[3] = 1; gpu.screen.hres = gpu.screen.w = 256; gpu.screen.vres = gpu.screen.h = 240; + gpu.screen.x = gpu.screen.y = 0; } static noinline void update_width(void) { + static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 }; + static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 }; + uint8_t hdiv = hdivs[(gpu.status >> 16) & 7]; + int hres = hres_all[(gpu.status >> 16) & 7]; + int pal = gpu.status & PSX_GPU_STATUS_PAL; int sw = gpu.screen.x2 - gpu.screen.x1; - if (sw <= 0 || sw >= 2560) - // full width - gpu.screen.w = gpu.screen.hres; - else - gpu.screen.w = sw * gpu.screen.hres / 2560; + int x = 0, x_auto; + if (sw <= 0) + /* nothing displayed? */; + else { + int s = pal ? 656 : 608; // or 600? pal is just a guess + x = (gpu.screen.x1 - s) / hdiv; + x = (x + 1) & ~1; // blitter limitation + sw /= hdiv; + sw = (sw + 2) & ~3; // according to nocash + switch (gpu.state.screen_centering_type) { + case 1: + break; + case 2: + x = gpu.state.screen_centering_x; + break; + default: + // correct if slightly miscentered + x_auto = (hres - sw) / 2 & ~3; + if ((uint32_t)x_auto <= 8u && abs(x) < 24) + x = x_auto; + } + if (x + sw > hres) + sw = hres - x; + // .x range check is done in vout_update() + } + // reduce the unpleasant right border that a few games have + if (gpu.state.screen_centering_type == 0 + && x <= 4 && hres - (x + sw) >= 4) + hres -= 4; + gpu.screen.x = x; + gpu.screen.w = sw; + gpu.screen.hres = hres; + gpu.state.dims_changed = 1; + //printf("xx %d %d -> %2d, %d / %d\n", + // gpu.screen.x1, gpu.screen.x2, x, sw, hres); } static noinline void update_height(void) { - // TODO: emulate this properly.. + int pal = gpu.status & PSX_GPU_STATUS_PAL; + int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT; + int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro int sh = gpu.screen.y2 - gpu.screen.y1; - if (gpu.status & PSX_GPU_STATUS_DHEIGHT) - sh *= 2; - if (sh <= 0 || sh > gpu.screen.vres) - sh = gpu.screen.vres; - + int center_tol = 16; + int vres = 240; + + if (pal && (sh > 240 || gpu.screen.vres == 256)) + vres = 256; + if (dheight) + y *= 2, sh *= 2, vres *= 2, center_tol *= 2; + if (sh <= 0) + /* nothing displayed? */; + else { + switch (gpu.state.screen_centering_type) { + case 1: + break; + case 2: + y = gpu.state.screen_centering_y; + break; + default: + // correct if slightly miscentered + if ((uint32_t)(vres - sh) <= 1 && abs(y) <= center_tol) + y = 0; + } + if (y + sh > vres) + sh = vres - y; + } + gpu.screen.y = y; gpu.screen.h = sh; + gpu.screen.vres = vres; + gpu.state.dims_changed = 1; + //printf("yy %d %d -> %d, %d / %d\n", + // gpu.screen.y1, gpu.screen.y2, y, sh, vres); } static noinline void decide_frameskip(void) @@ -115,8 +178,8 @@ static noinline int decide_frameskip_allow(uint32_t cmd_e3) uint32_t x = cmd_e3 & 0x3ff; uint32_t y = (cmd_e3 >> 10) & 0x3ff; gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) || - (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w || - (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h; + (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w || + (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h; return gpu.frameskip.allow; } @@ -192,8 +255,6 @@ long GPUshutdown(void) void GPUwriteStatus(uint32_t data) { - static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 }; - static const short vres[4] = { 240, 480, 256, 480 }; uint32_t cmd = data >> 24; if (cmd < ARRAY_SIZE(gpu.regs)) { @@ -212,8 +273,10 @@ void GPUwriteStatus(uint32_t data) do_cmd_reset(); break; case 0x03: - if (data & 1) + if (data & 1) { gpu.status |= PSX_GPU_STATUS_BLANKING; + gpu.state.dims_changed = 1; // for hud clearing + } else gpu.status &= ~PSX_GPU_STATUS_BLANKING; break; @@ -222,8 +285,8 @@ void GPUwriteStatus(uint32_t data) gpu.status |= PSX_GPU_STATUS_DMA(data & 3); break; case 0x05: - gpu.screen.x = data & 0x3ff; - gpu.screen.y = (data >> 10) & 0x1ff; + gpu.screen.src_x = data & 0x3ff; + gpu.screen.src_y = (data >> 10) & 0x1ff; if (gpu.frameskip.set) { decide_frameskip_allow(gpu.ex_regs[3]); if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) { @@ -244,8 +307,6 @@ void GPUwriteStatus(uint32_t data) break; case 0x08: gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10); - gpu.screen.hres = hres[(gpu.status >> 16) & 7]; - gpu.screen.vres = vres[(gpu.status >> 19) & 3]; update_width(); update_height(); renderer_notify_res_change(); @@ -752,6 +813,15 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs) gpu.state.frame_count = cbs->gpu_frame_count; gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace; gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable; + if (gpu.state.screen_centering_type != cbs->screen_centering_type + || gpu.state.screen_centering_x != cbs->screen_centering_x + || gpu.state.screen_centering_y != cbs->screen_centering_y) { + gpu.state.screen_centering_type = cbs->screen_centering_type; + gpu.state.screen_centering_x = cbs->screen_centering_x; + gpu.state.screen_centering_y = cbs->screen_centering_y; + update_width(); + update_height(); + } gpu.mmap = cbs->mmap; gpu.munmap = cbs->munmap; diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 4637a71c0..446a0234d 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -34,6 +34,7 @@ extern "C" { #define BIT(x) (1 << (x)) #define PSX_GPU_STATUS_DHEIGHT BIT(19) +#define PSX_GPU_STATUS_PAL BIT(20) #define PSX_GPU_STATUS_RGB24 BIT(21) #define PSX_GPU_STATUS_INTERLACE BIT(22) #define PSX_GPU_STATUS_BLANKING BIT(23) @@ -53,6 +54,7 @@ struct psx_gpu { int x, y, w, h; int x1, x2; int y1, y2; + int src_x, src_y; } screen; struct { int x, y, w, h; @@ -67,6 +69,7 @@ struct psx_gpu { uint32_t blanked:1; uint32_t enhancement_enable:1; uint32_t enhancement_active:1; + uint32_t dims_changed:1; uint32_t *frame_count; uint32_t *hcnt; /* hsync count */ struct { @@ -77,6 +80,9 @@ struct psx_gpu { } last_list; uint32_t last_vram_read_frame; uint32_t w_out_old, h_out_old, status_vo_old; + int screen_centering_type; // 0 - auto, 1 - game conrolled, 2 - manual + int screen_centering_x; + int screen_centering_y; } state; struct { int32_t set:3; /* -1 auto, 0 off, 1-3 fixed */ @@ -88,7 +94,7 @@ struct psx_gpu { uint32_t last_flip_frame; uint32_t pending_fill[3]; } frameskip; - uint16_t *(*get_enhancement_bufer) + void *(*get_enhancement_bufer) (int *x, int *y, int *w, int *h, int *vram_h); void *(*mmap)(unsigned int size); void (*munmap)(void *ptr, unsigned int size); diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index a6a3f63c8..26827d0e9 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -28,7 +28,7 @@ int vout_finish(void) static void check_mode_change(int force) { int w = gpu.screen.hres; - int h = gpu.screen.h; + int h = gpu.screen.vres; int w_out = w; int h_out = h; @@ -56,47 +56,59 @@ static void check_mode_change(int force) void vout_update(void) { + int bpp = (gpu.status & PSX_GPU_STATUS_RGB24) ? 24 : 16; + uint8_t *vram = (uint8_t *)gpu.vram; + int src_x = gpu.screen.src_x; + int src_y = gpu.screen.src_y; int x = gpu.screen.x; int y = gpu.screen.y; int w = gpu.screen.w; int h = gpu.screen.h; - uint16_t *vram = gpu.vram; int vram_h = 512; + int src_x2 = 0; + + if (x < 0) { w += x; src_x2 = -x; x = 0; } + if (y < 0) { h += y; src_y -= y; y = 0; } - if (w == 0 || h == 0) + if (w <= 0 || h <= 0) return; check_mode_change(0); - if (gpu.state.enhancement_active) - vram = gpu.get_enhancement_bufer(&x, &y, &w, &h, &vram_h); + if (gpu.state.enhancement_active) { + vram = gpu.get_enhancement_bufer(&src_x, &src_y, &w, &h, &vram_h); + x *= 2; y *= 2; + } - if (y + h > vram_h) { - if (y + h - vram_h > h / 2) { + if (src_y + h > vram_h) { + if (src_y + h - vram_h > h / 2) { // wrap - h -= vram_h - y; - y = 0; + h -= vram_h - src_y; + src_y = 0; } else // clip - h = vram_h - y; + h = vram_h - src_y; } - vram += y * 1024 + x; + vram += (src_y * 1024 + src_x) * 2; + vram += src_x2 * bpp / 8; - cbs->pl_vout_flip(vram, 1024, !!(gpu.status & PSX_GPU_STATUS_RGB24), w, h); + cbs->pl_vout_flip(vram, 1024, !!(gpu.status & PSX_GPU_STATUS_RGB24), + x, y, w, h, gpu.state.dims_changed); + gpu.state.dims_changed = 0; } void vout_blank(void) { int w = gpu.screen.hres; - int h = gpu.screen.h; + int h = gpu.screen.vres; check_mode_change(0); if (gpu.state.enhancement_active) { w *= 2; h *= 2; } - cbs->pl_vout_flip(NULL, 1024, !!(gpu.status & PSX_GPU_STATUS_RGB24), w, h); + cbs->pl_vout_flip(NULL, 1024, !!(gpu.status & PSX_GPU_STATUS_RGB24), 0, 0, w, h, 0); } long GPUopen(void **unused) From 4a1d78d40df22668e4c17e08fb0034f2781b8576 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 18 Aug 2023 00:59:35 +0300 Subject: [PATCH 327/597] frontend: accept more bios --- frontend/libretro.c | 59 ++++++++++++++++++++++++++++++--------------- frontend/menu.c | 5 ++-- 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 6a3a97c99..a115fc675 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2816,7 +2816,7 @@ void retro_run(void) set_vout_fb(); } -static bool try_use_bios(const char *path) +static bool try_use_bios(const char *path, bool preferred_only) { long size; const char *name; @@ -2828,12 +2828,20 @@ static bool try_use_bios(const char *path) size = ftell(fp); fclose(fp); - if (size != 512 * 1024) - return false; - name = strrchr(path, SLASH); if (name++ == NULL) name = path; + + if (preferred_only && size != 512 * 1024) + return false; + if (size != 512 * 1024 && size != 4 * 1024 * 1024) + return false; + if (strstr(name, "unirom")) + return false; + // jp bios have an addidional region check + if (preferred_only && (strcasestr(name, "00.") || strcasestr(name, "j.bin"))) + return false; + snprintf(Config.Bios, sizeof(Config.Bios), "%s", name); return true; } @@ -2844,7 +2852,8 @@ static bool try_use_bios(const char *path) static bool find_any_bios(const char *dirpath, char *path, size_t path_size) { - static const char *substrings[] = { "scph", "ps", "openbios" }; + static const char *substr_pref[] = { "scph", "ps" }; + static const char *substr_alt[] = { "scph", "ps", "openbios" }; DIR *dir; struct dirent *ent; bool ret = false; @@ -2854,27 +2863,39 @@ static bool find_any_bios(const char *dirpath, char *path, size_t path_size) if (dir == NULL) return false; - for (i = 0; i < (sizeof(substrings) / sizeof(substrings[0])); i++) + // try to find a "better" bios + while ((ent = readdir(dir))) { - const char *substr = substrings[i]; - size_t len = strlen(substr); - rewinddir(dir); - while ((ent = readdir(dir))) + for (i = 0; i < sizeof(substr_pref) / sizeof(substr_pref[0]); i++) { - if ((strncasecmp(ent->d_name, substr, len) != 0)) - continue; - if (strstr(ent->d_name, "unirom")) + const char *substr = substr_pref[i]; + if ((strncasecmp(ent->d_name, substr, strlen(substr)) != 0)) continue; + snprintf(path, path_size, "%s%c%s", dirpath, SLASH, ent->d_name); + ret = try_use_bios(path, true); + if (ret) + goto finish; + } + } + // another pass to look for anything fitting, even ps2 bios + rewinddir(dir); + while ((ent = readdir(dir))) + { + for (i = 0; i < sizeof(substr_alt) / sizeof(substr_alt[0]); i++) + { + const char *substr = substr_alt[i]; + if ((strncasecmp(ent->d_name, substr, strlen(substr)) != 0)) + continue; snprintf(path, path_size, "%s%c%s", dirpath, SLASH, ent->d_name); - ret = try_use_bios(path); + ret = try_use_bios(path, false); if (ret) - { - closedir(dir); - return ret; - } + goto finish; } } + + +finish: closedir(dir); return ret; } @@ -2971,7 +2992,7 @@ static void loadPSXBios(void) for (i = 0; i < sizeof(bios) / sizeof(bios[0]); i++) { snprintf(path, sizeof(path), "%s%c%s.bin", dir, SLASH, bios[i]); - found_bios = try_use_bios(path); + found_bios = try_use_bios(path, true); if (found_bios) break; } diff --git a/frontend/menu.c b/frontend/menu.c index fb712247b..901c72d51 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -111,7 +111,7 @@ int soft_filter; #define DEFAULT_PSX_CLOCK_S "50" #endif -static const char *bioses[24]; +static const char *bioses[32]; static const char *gpu_plugins[16]; static const char *spu_plugins[16]; static const char *memcards[32]; @@ -2450,7 +2450,8 @@ static void scan_bios_plugins(void) continue; snprintf(fname, sizeof(fname), "%s/%s", Config.BiosDir, ent->d_name); - if (stat(fname, &st) != 0 || st.st_size != 512*1024) { + if (stat(fname, &st) != 0 + || (st.st_size != 512*1024 && st.st_size != 4*1024*1024)) { printf("bad BIOS file: %s\n", ent->d_name); continue; } From 0b4038f8edd327a3a9a2fbdefbc25ece921bc2ab Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 20 Aug 2023 22:38:03 +0300 Subject: [PATCH 328/597] gpu_neon: rework buffer selection to fix MGS codec and maybe more --- plugins/dfxvideo/gpulib_if.c | 6 +- plugins/gpu-gles/gpulib_if.c | 6 +- plugins/gpu_neon/psx_gpu/psx_gpu.c | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu.h | 14 +- plugins/gpu_neon/psx_gpu/psx_gpu_4x.c | 6 +- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 171 ++++++++++++++--------- plugins/gpu_neon/psx_gpu_if.c | 108 ++++++++++---- plugins/gpu_senquack/gpulib_if.cpp | 6 +- plugins/gpu_unai/gpulib_if.cpp | 6 +- plugins/gpulib/gpu.c | 18 +-- plugins/gpulib/gpu.h | 10 +- plugins/gpulib/vout_pl.c | 3 + 12 files changed, 247 insertions(+), 109 deletions(-) diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index 3a41cd7cc..ba7f16a08 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -299,6 +299,10 @@ void renderer_notify_res_change(void) { } +void renderer_notify_scanout_x_change(int x, int w) +{ +} + extern const unsigned char cmd_lengths[256]; int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) @@ -414,7 +418,7 @@ void renderer_sync_ecmds(uint32_t *ecmds_) cmdSTP((unsigned char *)&ecmds[6]); } -void renderer_update_caches(int x, int y, int w, int h) +void renderer_update_caches(int x, int y, int w, int h, int state_changed) { } diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c index 69285daa7..b592175b8 100644 --- a/plugins/gpu-gles/gpulib_if.c +++ b/plugins/gpu-gles/gpulib_if.c @@ -514,6 +514,10 @@ void renderer_notify_res_change(void) { } +void renderer_notify_scanout_x_change(int x, int w) +{ +} + extern const unsigned char cmd_lengths[256]; // XXX: mostly dupe code from soft peops @@ -622,7 +626,7 @@ void renderer_sync_ecmds(uint32_t *ecmds) cmdSTP((unsigned char *)&ecmds[6]); } -void renderer_update_caches(int x, int y, int w, int h) +void renderer_update_caches(int x, int y, int w, int h, int state_changed) { VRAMWrite.x = x; VRAMWrite.y = y; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 85cf89faa..e252d04ee 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -4963,7 +4963,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN; - psx_gpu->enhancement_x_threshold = 256; + psx_gpu->saved_hres = 256; } u64 get_us(void) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index bdd9caec3..0ef957f29 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -185,18 +185,22 @@ typedef struct u32 *reciprocal_table_ptr; // enhancement stuff - u16 *enhancement_buf_ptr; - u16 *enhancement_current_buf_ptr; - u32 enhancement_x_threshold; + u16 *enhancement_buf_ptr; // main alloc + u16 *enhancement_current_buf_ptr; // offset into above, 4 bufs + u32 saved_hres; s16 saved_viewport_start_x; s16 saved_viewport_start_y; s16 saved_viewport_end_x; s16 saved_viewport_end_y; - u8 enhancement_buf_by_x16[64]; + u8 enhancement_buf_by_x16[64]; // 0-3 specifying which buf + u16 enhancement_buf_start[4]; // x pos where buf[n] begins + + u16 enhancement_scanout_x[4]; + u16 enhancement_scanout_select; // Align up to 64 byte boundary to keep the upcoming buffers cache line // aligned, also make reachable with single immediate addition - u8 reserved_a[160]; + u8 reserved_a[142]; // 8KB block_struct blocks[MAX_BLOCKS_PER_ROW]; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c index 942b3d30f..d7ec34095 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c @@ -1,6 +1,10 @@ +#define select_enhancement_buf_index(psx_gpu, x) \ + ((psx_gpu)->enhancement_buf_by_x16[(u32)(x) / \ + (1024u / sizeof((psx_gpu)->enhancement_buf_by_x16))]) + #define select_enhancement_buf_ptr(psx_gpu, x) \ ((psx_gpu)->enhancement_buf_ptr + \ - ((psx_gpu)->enhancement_buf_by_x16[(x) / 16] << 20)) + (select_enhancement_buf_index(psx_gpu, x) << 20)) #if !defined(NEON_BUILD) || defined(SIMD_BUILD) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 376225801..32c32fdb5 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -250,10 +250,9 @@ static void do_fill(psx_gpu_struct *psx_gpu, u32 x, u32 y, #define SET_Ex(r, v) #endif -vertex_struct vertexes[4] __attribute__((aligned(32))); - u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) { + vertex_struct vertexes[4] __attribute__((aligned(16))) = {}; u32 current_command = 0, command_length; u32 *list_start = list; @@ -790,26 +789,61 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) #define ENH_BUF_TABLE_STEP (1024 / sizeof(psx_gpu->enhancement_buf_by_x16)) +static int is_new_scanout(psx_gpu_struct *psx_gpu, int x) +{ + int i, scanout_x; + for (i = 0; i < ARRAY_SIZE(psx_gpu->enhancement_scanout_x); i++) + { + scanout_x = psx_gpu->enhancement_scanout_x[i]; + if (x <= scanout_x && scanout_x < x + ENH_BUF_TABLE_STEP) + { + if (x != scanout_x) + log_anomaly("unaligned scanout x: %d,%d\n", scanout_x, x); + return 1; + } + } + return 0; +} + static void update_enhancement_buf_table_from_hres(psx_gpu_struct *psx_gpu) { - u32 b, x, s; + u32 b, x; b = 0; - s = psx_gpu->enhancement_x_threshold; - for (x = 0; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) + psx_gpu->enhancement_buf_by_x16[0] = b; + psx_gpu->enhancement_buf_start[0] = 0; + for (x = 1; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) { - if (b < 3 && x * ENH_BUF_TABLE_STEP >= s) - { - s += psx_gpu->enhancement_x_threshold; + if (b < 3 && is_new_scanout(psx_gpu, x * ENH_BUF_TABLE_STEP)) { b++; + psx_gpu->enhancement_buf_start[b] = x * ENH_BUF_TABLE_STEP; } + psx_gpu->enhancement_buf_by_x16[x] = b; } +#if 0 + printf("buf_by_x16:\n"); + for (b = 0; b < 3; b++) { + int first = -1, count = 0; + for (x = 0; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) { + if (psx_gpu->enhancement_buf_by_x16[x] == b) { + if (first < 0) first = x; + count++; + } + } + if (count) { + assert(first * ENH_BUF_TABLE_STEP == psx_gpu->enhancement_buf_start[b]); + printf("%d: %3zd-%zd\n", b, first * ENH_BUF_TABLE_STEP, + (first + count) * ENH_BUF_TABLE_STEP); + } + } +#endif } static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu, u32 x0, u32 len) { +#if 0 u32 x, b; for (x = x0, b = 0; x >= len; b++) @@ -819,6 +853,7 @@ static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu, memset(psx_gpu->enhancement_buf_by_x16 + x0 / ENH_BUF_TABLE_STEP, b, (len + ENH_BUF_TABLE_STEP - 1) / ENH_BUF_TABLE_STEP); +#endif } #define select_enhancement_buf(psx_gpu) \ @@ -844,30 +879,27 @@ static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu, } #define shift_vertices3(v) { \ - v[0]->x *= 2; \ - v[0]->y *= 2; \ - v[1]->x *= 2; \ - v[1]->y *= 2; \ - v[2]->x *= 2; \ - v[2]->y *= 2; \ + v[0]->x <<= 1; \ + v[0]->y <<= 1; \ + v[1]->x <<= 1; \ + v[1]->y <<= 1; \ + v[2]->x <<= 1; \ + v[2]->y <<= 1; \ } #define unshift_vertices3(v) { \ - v[0]->x /= 2; \ - v[0]->y /= 2; \ - v[1]->x /= 2; \ - v[1]->y /= 2; \ - v[2]->x /= 2; \ - v[2]->y /= 2; \ + v[0]->x >>= 1; \ + v[0]->y >>= 1; \ + v[1]->x >>= 1; \ + v[1]->y >>= 1; \ + v[2]->x >>= 1; \ + v[2]->y >>= 1; \ } #define shift_triangle_area() \ psx_gpu->triangle_area *= 4 -extern void scale2x_tiles8(void *dst, const void *src, int w8, int h); - #ifndef NEON_BUILD -// TODO? void scale2x_tiles8(void *dst, const void *src, int w8, int h) { uint16_t* d = (uint16_t*)dst; @@ -938,6 +970,16 @@ void scale2x_tiles8(void *dst, const void *src, int w8, int h) static int disable_main_render; +static int check_enhanced_range(psx_gpu_struct *psx_gpu, int x, int x_end) +{ + // simple reject to avoid oveflowing the 1024 width + // (assume some offscreen render-to-texture thing) + if (x >= (int)(psx_gpu->saved_viewport_start_x + 512)) + return 0; + + return 1; +} + static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 current_command) { @@ -949,37 +991,22 @@ static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, if (!disable_main_render) render_triangle_p(psx_gpu, vertex_ptrs, current_command); + if (!check_enhanced_range(psx_gpu, vertex_ptrs[0]->x, vertex_ptrs[2]->x)) + return; + enhancement_enable(); shift_vertices3(vertex_ptrs); shift_triangle_area(); render_triangle_p(psx_gpu, vertex_ptrs, current_command); + unshift_vertices3(vertex_ptrs); } static void do_quad_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 current_command) { - vertex_struct *vertex_ptrs[3]; - - if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) { - if (!disable_main_render) - render_triangle_p(psx_gpu, vertex_ptrs, current_command); - - enhancement_enable(); - shift_vertices3(vertex_ptrs); - shift_triangle_area(); - render_triangle_p(psx_gpu, vertex_ptrs, current_command); - unshift_vertices3(vertex_ptrs); - } + do_triangle_enhanced(psx_gpu, vertexes, current_command); enhancement_disable(); - if (prepare_triangle(psx_gpu, &vertexes[1], vertex_ptrs)) { - if (!disable_main_render) - render_triangle_p(psx_gpu, vertex_ptrs, current_command); - - enhancement_enable(); - shift_vertices3(vertex_ptrs); - shift_triangle_area(); - render_triangle_p(psx_gpu, vertex_ptrs, current_command); - } + do_triangle_enhanced(psx_gpu, &vertexes[1], current_command); } #if 0 @@ -1062,6 +1089,7 @@ static void do_sprite_enhanced(psx_gpu_struct *psx_gpu, int x, int y, u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) { + vertex_struct vertexes[4] __attribute__((aligned(16))) = {}; u32 current_command = 0, command_length; u32 *list_start = list; @@ -1097,12 +1125,24 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 width = list_s16[4] & 0x3FF; u32 height = list_s16[5] & 0x1FF; u32 color = list[0] & 0xFFFFFF; + u32 i1, i2; x &= ~0xF; width = ((width + 0xF) & ~0xF); + if (width == 0 || height == 0) + break; do_fill(psx_gpu, x, y, width, height, color); + i1 = select_enhancement_buf_index(psx_gpu, x); + i2 = select_enhancement_buf_index(psx_gpu, x + width - 1); + if (i1 != i2) { + sync_enhancement_buffers(x, y, width, height); + break; + } + if (x >= psx_gpu->enhancement_buf_start[i1] + psx_gpu->saved_hres) + break; + psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x); x *= 2; y *= 2; @@ -1353,7 +1393,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 height = list_s16[5] & 0x1FF; render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]); - do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); + + if (check_enhanced_range(psx_gpu, x, x + width)) + do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); break; } @@ -1370,7 +1412,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, u, v, width, height, current_command, list[0]); - do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); + + if (check_enhanced_range(psx_gpu, x, x + width)) + do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); break; } @@ -1383,7 +1427,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]); - do_sprite_enhanced(psx_gpu, x, y, 0, 0, 1, 1, list[0]); + + if (check_enhanced_range(psx_gpu, x, x + 1)) + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 1, 1, list[0]); break; } @@ -1396,7 +1442,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]); - do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]); + + if (check_enhanced_range(psx_gpu, x, x + 8)) + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]); break; } @@ -1414,7 +1462,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, u, v, 8, 8, current_command, list[0]); - do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]); + + if (check_enhanced_range(psx_gpu, x, x + 8)) + do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]); break; } @@ -1427,7 +1477,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]); - do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]); + + if (check_enhanced_range(psx_gpu, x, x + 16)) + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]); break; } @@ -1444,7 +1496,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, set_clut(psx_gpu, list_s16[5]); render_sprite(psx_gpu, x, y, u, v, 16, 16, current_command, list[0]); - do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]); + + if (check_enhanced_range(psx_gpu, x, x + 16)) + do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]); break; } @@ -1456,21 +1510,12 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 dy = list_s16[5] & 0x1FF; u32 w = ((list_s16[6] - 1) & 0x3FF) + 1; u32 h = ((list_s16[7] - 1) & 0x1FF) + 1; - u16 *buf; if (sx == dx && sy == dy && psx_gpu->mask_msb == 0) break; render_block_move(psx_gpu, sx, sy, dx, dy, w, h); - if (dy + h > 512) - h = 512 - dy; - sx = sx & ~7; // FIXME? - dx = dx * 2 & ~7; - dy *= 2; - w = (w + 7) / 8; - buf = select_enhancement_buf_ptr(psx_gpu, dx / 2); - scale2x_tiles8(buf + dy * 1024 + dx, - psx_gpu->vram_ptr + sy * 1024 + sx, w, h); + sync_enhancement_buffers(dx, dy, w, h); break; } @@ -1542,7 +1587,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, psx_gpu->saved_viewport_start_y = viewport_start_y; w = (u32)psx_gpu->viewport_end_x - (u32)viewport_start_x + 1; - d = psx_gpu->enhancement_x_threshold - w; + d = psx_gpu->saved_hres - w; if(-16 <= d && d <= 16) { update_enhancement_buf_table_from_x(psx_gpu, @@ -1579,7 +1624,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, psx_gpu->saved_viewport_end_y = viewport_end_y; w = (u32)viewport_end_x - (u32)psx_gpu->viewport_start_x + 1; - d = psx_gpu->enhancement_x_threshold - w; + d = psx_gpu->saved_hres - w; if(-16 <= d && d <= 16) { update_enhancement_buf_table_from_x(psx_gpu, diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index 30faee256..69a2a1bbc 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -9,8 +9,11 @@ */ #include +#include #include +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + extern const unsigned char cmd_lengths[256]; #define command_lengths cmd_lengths @@ -21,9 +24,12 @@ static int initialized; #define SET_Ex(r, v) \ ex_regs[r] = v +static __attribute__((noinline)) void +sync_enhancement_buffers(int x, int y, int w, int h); + +#include "../gpulib/gpu.h" #include "psx_gpu/psx_gpu.c" #include "psx_gpu/psx_gpu_parse.c" -#include "../gpulib/gpu.h" static psx_gpu_struct egpu __attribute__((aligned(256))); @@ -110,8 +116,12 @@ static __attribute__((noinline)) void sync_enhancement_buffers(int x, int y, int w, int h) { const int step_x = 1024 / sizeof(egpu.enhancement_buf_by_x16); + int hres = egpu.saved_hres; + int x_buf, w1, s, fb_index; u16 *src, *dst; - int w1, fb_index; + + if (egpu.enhancement_buf_ptr == NULL) + return; w += x & (step_x - 1); x &= ~(step_x - 1); @@ -119,18 +129,29 @@ sync_enhancement_buffers(int x, int y, int w, int h) if (y + h > 512) h = 512 - y; + // find x_buf which is an offset into this enhancement_buf + fb_index = egpu.enhancement_buf_by_x16[x / step_x]; + x_buf = x - egpu.enhancement_buf_start[fb_index]; + while (w > 0) { fb_index = egpu.enhancement_buf_by_x16[x / step_x]; - for (w1 = 0; w > 0; w1++, w -= step_x) + for (w1 = 0; w > 0 && x_buf < hres; x_buf += step_x, w1++, w -= step_x) if (fb_index != egpu.enhancement_buf_by_x16[x / step_x + w1]) break; + // skip further unneeded data, if any + for (s = 0; w > 0; s++, w -= step_x) + if (fb_index != egpu.enhancement_buf_by_x16[x / step_x + w1 + s]) + break; - src = gpu.vram + y * 1024 + x; - dst = select_enhancement_buf_ptr(&egpu, x); - dst += (y * 1024 + x) * 2; - scale2x_tiles8(dst, src, w1 * step_x / 8, h); + if (w1 > 0) { + src = gpu.vram + y * 1024 + x; + dst = select_enhancement_buf_ptr(&egpu, x); + dst += (y * 1024 + x) * 2; + scale2x_tiles8(dst, src, w1 * step_x / 8, h); + } - x += w1 * step_x; + x += (w1 + s) * step_x; + x_buf = 0; } } @@ -139,11 +160,18 @@ void renderer_sync_ecmds(uint32_t *ecmds) gpu_parse(&egpu, ecmds + 1, 6 * 4, NULL); } -void renderer_update_caches(int x, int y, int w, int h) +void renderer_update_caches(int x, int y, int w, int h, int state_changed) { update_texture_cache_region(&egpu, x, y, x + w - 1, y + h - 1); - if (gpu.state.enhancement_active && !(gpu.status & PSX_GPU_STATUS_RGB24)) + + if (gpu.state.enhancement_active) { + if (state_changed) { + egpu.saved_hres = 0; + renderer_notify_res_change(); + return; + } sync_enhancement_buffers(x, y, w, h); + } } void renderer_flush_queues(void) @@ -162,11 +190,44 @@ void renderer_set_interlace(int enable, int is_odd) void renderer_notify_res_change(void) { - // note: must keep it multiple of 8 - if (egpu.enhancement_x_threshold != gpu.screen.hres) + renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres); +} + +void renderer_notify_scanout_x_change(int x, int w) +{ + int hres = (w + 15) & ~15; + int max_bufs = ARRAY_SIZE(egpu.enhancement_scanout_x); + int need_update = 0; + int i; + + if (!gpu.state.enhancement_active) + return; + + assert(!(max_bufs & (max_bufs - 1))); + if (egpu.saved_hres != hres) { + for (i = 0; i < max_bufs; i++) + egpu.enhancement_scanout_x[i] = x; + need_update = 1; + } + + if (egpu.enhancement_scanout_x[egpu.enhancement_scanout_select] != x) { - egpu.enhancement_x_threshold = gpu.screen.hres; + // maybe triple buffering? + for (i = 0; i < max_bufs; i++) + if (egpu.enhancement_scanout_x[i] == x) + break; + if (i == max_bufs) + need_update = 1; + + egpu.enhancement_scanout_x[egpu.enhancement_scanout_select] = x; + } + egpu.enhancement_scanout_select++; + egpu.enhancement_scanout_select &= max_bufs - 1; + if (need_update) + { + egpu.saved_hres = hres; update_enhancement_buf_table_from_hres(&egpu); + sync_enhancement_buffers(0, 0, 1024, 512); } } @@ -174,23 +235,18 @@ void renderer_notify_res_change(void) void renderer_set_config(const struct rearmed_cbs *cbs) { - static int enhancement_was_on; - - disable_main_render = cbs->gpu_neon.enhancement_no_main; - if (egpu.enhancement_buf_ptr != NULL && cbs->gpu_neon.enhancement_enable - && !enhancement_was_on) - { - sync_enhancement_buffers(0, 0, 1024, 512); - } - enhancement_was_on = cbs->gpu_neon.enhancement_enable; - if (!initialized) { initialize_psx_gpu(&egpu, gpu.vram); initialized = 1; } - - if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL) - map_enhancement_buffer(); if (cbs->pl_set_gpu_caps) cbs->pl_set_gpu_caps(GPU_CAP_SUPPORTS_2X); + + disable_main_render = cbs->gpu_neon.enhancement_no_main; + if (gpu.state.enhancement_enable) { + if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL) + map_enhancement_buffer(); + } } + +// vim:ts=2:sw=2:expandtab diff --git a/plugins/gpu_senquack/gpulib_if.cpp b/plugins/gpu_senquack/gpulib_if.cpp index 5efc7d9de..0bc63c6d7 100644 --- a/plugins/gpu_senquack/gpulib_if.cpp +++ b/plugins/gpu_senquack/gpulib_if.cpp @@ -144,6 +144,10 @@ void renderer_notify_res_change(void) */ } +void renderer_notify_scanout_x_change(int x, int w) +{ +} + #ifdef USE_GPULIB // Handles GP0 draw settings commands 0xE1...0xE6 static void gpuGP0Cmd_0xEx(gpu_senquack_t &gpu_senquack, u32 cmd_word) @@ -613,7 +617,7 @@ void renderer_sync_ecmds(uint32_t *ecmds) do_cmd_list(&ecmds[1], 6, &dummy); } -void renderer_update_caches(int x, int y, int w, int h) +void renderer_update_caches(int x, int y, int w, int h, int state_changed) { } diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 45eac41d2..0064aaa37 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -163,6 +163,10 @@ void renderer_notify_res_change(void) { } +void renderer_notify_scanout_x_change(int x, int w) +{ +} + extern const unsigned char cmd_lengths[256]; int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) @@ -520,7 +524,7 @@ void renderer_sync_ecmds(uint32_t *ecmds) do_cmd_list(&ecmds[1], 6, &dummy); } -void renderer_update_caches(int x, int y, int w, int h) +void renderer_update_caches(int x, int y, int w, int h, int state_changed) { } diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 931583f39..b23f8a88f 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -24,13 +24,8 @@ #define noinline #endif -#define gpu_log(fmt, ...) \ - printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__) - //#define log_io gpu_log #define log_io(...) -//#define log_anomaly gpu_log -#define log_anomaly(...) struct psx_gpu gpu; @@ -63,6 +58,7 @@ static noinline void do_reset(void) gpu.screen.hres = gpu.screen.w = 256; gpu.screen.vres = gpu.screen.h = 240; gpu.screen.x = gpu.screen.y = 0; + renderer_notify_res_change(); } static noinline void update_width(void) @@ -225,9 +221,11 @@ long GPUinit(void) ret = vout_init(); ret |= renderer_init(); + memset(&gpu.state, 0, sizeof(gpu.state)); + memset(&gpu.frameskip, 0, sizeof(gpu.frameskip)); + gpu.zero = 0; gpu.state.frame_count = &gpu.zero; gpu.state.hcnt = &gpu.zero; - gpu.frameskip.active = 0; gpu.cmd_len = 0; do_reset(); @@ -287,6 +285,7 @@ void GPUwriteStatus(uint32_t data) case 0x05: gpu.screen.src_x = data & 0x3ff; gpu.screen.src_y = (data >> 10) & 0x1ff; + renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres); if (gpu.frameskip.set) { decide_frameskip_allow(gpu.ex_regs[3]); if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) { @@ -434,7 +433,7 @@ static void finish_vram_transfer(int is_read) gpu.status &= ~PSX_GPU_STATUS_IMG; else renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y, - gpu.dma_start.w, gpu.dma_start.h); + gpu.dma_start.w, gpu.dma_start.h, 0); } static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) @@ -740,7 +739,7 @@ long GPUfreeze(uint32_t type, struct GPUFreeze *freeze) GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1)); } renderer_sync_ecmds(gpu.ex_regs); - renderer_update_caches(0, 0, 1024, 512); + renderer_update_caches(0, 0, 1024, 512, 1); break; } @@ -775,6 +774,9 @@ void GPUupdateLace(void) } vout_update(); + if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active) + renderer_update_caches(0, 0, 1024, 512, 1); + gpu.state.enhancement_was_active = gpu.state.enhancement_active; gpu.state.fb_dirty = 0; gpu.state.blanked = 0; } diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 446a0234d..b6bd60afa 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -13,6 +13,12 @@ #include +#define gpu_log(fmt, ...) \ + printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__) + +//#define log_anomaly gpu_log +#define log_anomaly(...) + #ifdef __cplusplus extern "C" { #endif @@ -69,6 +75,7 @@ struct psx_gpu { uint32_t blanked:1; uint32_t enhancement_enable:1; uint32_t enhancement_active:1; + uint32_t enhancement_was_active:1; uint32_t dims_changed:1; uint32_t *frame_count; uint32_t *hcnt; /* hsync count */ @@ -111,11 +118,12 @@ struct rearmed_cbs; int renderer_init(void); void renderer_finish(void); void renderer_sync_ecmds(uint32_t * ecmds); -void renderer_update_caches(int x, int y, int w, int h); +void renderer_update_caches(int x, int y, int w, int h, int state_changed); void renderer_flush_queues(void); void renderer_set_interlace(int enable, int is_odd); void renderer_set_config(const struct rearmed_cbs *config); void renderer_notify_res_change(void); +void renderer_notify_scanout_x_change(int x, int w); int vout_init(void); int vout_finish(void); diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index 26827d0e9..eadf57ce6 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -75,8 +75,11 @@ void vout_update(void) check_mode_change(0); if (gpu.state.enhancement_active) { + if (!gpu.state.enhancement_was_active) + return; // buffer not ready yet vram = gpu.get_enhancement_bufer(&src_x, &src_y, &w, &h, &vram_h); x *= 2; y *= 2; + src_x2 *= 2; } if (src_y + h > vram_h) { From 2d658c89305e390860565529ff1fff45af2429c6 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 24 Aug 2023 23:07:56 +0300 Subject: [PATCH 329/597] gpu_neon: don't crash on large primitives in enhancement mode --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 96 +++++++++++++-------- plugins/gpu_neon/psx_gpu/psx_gpu.h | 18 ++-- plugins/gpu_neon/psx_gpu/psx_gpu_4x.c | 6 +- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 30 +++---- plugins/gpu_neon/psx_gpu/psx_gpu_simd.c | 40 +++++---- plugins/gpu_neon/psx_gpu/psx_gpu_simd.h | 10 +-- plugins/gpu_neon/psx_gpu/vector_ops.h | 9 ++ 7 files changed, 125 insertions(+), 84 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index e252d04ee..370d8f2a6 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "common.h" #ifndef NEON_BUILD @@ -772,24 +773,23 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, printf("mismatch on %s %s: %x vs %x\n", #_a, #_b, _a, _b) \ -#ifndef NDEBUG -#define setup_spans_debug_check(span_edge_data_element) \ -{ \ - u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data; \ - if (_num_spans > MAX_SPANS) \ - *(volatile int *)0 = 1; \ - if (_num_spans < psx_gpu->num_spans) \ - { \ - if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW) \ - *(volatile int *)0 = 2; \ - if(span_edge_data_element.y >= 2048) \ - *(volatile int *)0 = 3; \ - } \ -} \ - +#if !defined(NEON_BUILD) && !defined(NDEBUG) +static void setup_spans_debug_check(psx_gpu_struct *psx_gpu, + edge_data_struct *span_edge_data_element) +{ + u32 _num_spans = span_edge_data_element - psx_gpu->span_edge_data; + if (_num_spans > MAX_SPANS) + *(volatile int *)0 = 1; + if (_num_spans < psx_gpu->num_spans) + { + if(span_edge_data_element->num_blocks > MAX_BLOCKS_PER_ROW) + *(volatile int *)0 = 2; + if(span_edge_data_element->y >= 2048) + *(volatile int *)0 = 3; + } +} #else -#define setup_spans_debug_check(span_edge_data_element) \ - +#define setup_spans_debug_check(psx_gpu, span_edge_data_element) #endif #define setup_spans_prologue_alternate_yes() \ @@ -856,6 +856,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, span_b_offset = psx_gpu->span_b_offset; \ \ vec_8x16u c_0x0001; \ + vec_4x16u c_max_blocks_per_row; \ \ dup_8x16b(c_0x0001, 0x0001); \ dup_8x16b(left_edge, psx_gpu->viewport_start_x); \ @@ -864,6 +865,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, dup_4x16b(c_0x04, 0x04); \ dup_4x16b(c_0x07, 0x07); \ dup_4x16b(c_0xFFFE, 0xFFFE); \ + dup_4x16b(c_max_blocks_per_row, MAX_BLOCKS_PER_ROW); \ #define compute_edge_delta_x2() \ @@ -1087,6 +1089,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, and_4x16b(span_shift, left_right_x_16.high, c_0x07); \ shl_variable_4x16b(span_shift, c_0xFFFE, span_shift); \ shr_4x16b(left_right_x_16.high, left_right_x_16.high, 3); \ + min_4x16b(left_right_x_16.high, left_right_x_16.high, c_max_blocks_per_row); \ \ u32 i; \ for(i = 0; i < 4; i++) \ @@ -1095,7 +1098,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, span_edge_data[i].num_blocks = left_right_x_16.high.e[i]; \ span_edge_data[i].right_mask = span_shift.e[i]; \ span_edge_data[i].y = y_x4.e[i]; \ - setup_spans_debug_check(span_edge_data[i]); \ + setup_spans_debug_check(psx_gpu, &span_edge_data[i]); \ } \ \ span_edge_data += 4; \ @@ -1125,7 +1128,9 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, \ setup_spans_prologue_b(); \ \ - if(height > 0) \ + if (height > 512) \ + height = 512; \ + if (height > 0) \ { \ y_x4.e[0] = y_a; \ y_x4.e[1] = y_a + 1; \ @@ -1173,7 +1178,9 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, \ setup_spans_prologue_b(); \ \ - if(height > 0) \ + if (height > 512) \ + height = 512; \ + if (height > 0) \ { \ y_x4.e[0] = y_a; \ y_x4.e[1] = y_a - 1; \ @@ -1363,7 +1370,9 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_spans_prologue_b(); - if(height_minor_a > 0) + if (height_minor_a > 512) + height_minor_a = 512; + if (height_minor_a > 0) { y_x4.e[0] = y_a; y_x4.e[1] = y_a - 1; @@ -1405,7 +1414,9 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_spans_clip(increment, no); } - if(height_minor_b > 0) + if (height_minor_b > 512) + height_minor_b = 512; + if (height_minor_b > 0) { y_x4.e[0] = y_a; y_x4.e[1] = y_a + 1; @@ -3045,6 +3056,7 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu, } } } + assert(psx_gpu->span_edge_data[0].y < 1024u); u32 render_state = flags & (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | @@ -3914,17 +3926,9 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, #ifndef NEON_BUILD -void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, +void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { - if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE | - RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 && - (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0) - { - setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color); - return; - } - u32 right_width = ((width - 1) & 0x7) + 1; u32 right_mask_bits = (0xFF << right_width); u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x; @@ -3992,8 +3996,9 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, #endif -void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, - s32 u, s32 v, s32 width, s32 height, u32 color) +static void __attribute__((noinline)) +setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color) { u32 r = color & 0xFF; u32 g = (color >> 8) & 0xFF; @@ -4007,7 +4012,7 @@ void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 num_width; - if(psx_gpu->num_blocks > MAX_BLOCKS) + if(psx_gpu->num_blocks) { flush_render_block_buffer(psx_gpu); } @@ -4051,6 +4056,29 @@ void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, } } +void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color); + +void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color) +{ + if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE | + RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 && + (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0) + { + setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color); + return; + } + + while (width > 0) + { + s32 w1 = width > 512 ? 512 : width; + setup_sprite_untextured_512(psx_gpu, x, y, 0, 0, w1, height, color); + x += 512; + width -= 512; + } +} + #define setup_sprite_blocks_switch_textured(texture_mode) \ setup_sprite_##texture_mode \ diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 0ef957f29..4eb622dfb 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -15,6 +15,14 @@ #ifndef PSX_GPU_H #define PSX_GPU_H +#define MAX_SPANS 512 +#define MAX_BLOCKS 64 +#define MAX_BLOCKS_PER_ROW 128 + +#define SPAN_DATA_BLOCKS_SIZE 32 + +#ifndef __ASSEMBLER__ + #include "vector_types.h" typedef enum @@ -101,12 +109,6 @@ typedef struct vec_8x16u dither_offsets; } block_struct; -#define MAX_SPANS 512 -#define MAX_BLOCKS 64 -#define MAX_BLOCKS_PER_ROW 128 - -#define SPAN_DATA_BLOCKS_SIZE 32 - typedef struct render_block_handler_struct render_block_handler_struct; typedef struct @@ -260,5 +262,5 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, const vertex_struct * __restrict__ a, const vertex_struct * __restrict__ b, const vertex_struct * __restrict__ c); -#endif - +#endif // __ASSEMBLER__ +#endif // PSX_GPU_H diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c index d7ec34095..bd6c7a1f7 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c @@ -237,7 +237,11 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, static void setup_sprite_untextured_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { - setup_sprite_untextured(psx_gpu, x, y, u, v, width * 2, height * 2, color); + width *= 2; + height *= 2; + if (width > 1024) + width = 1024; + setup_sprite_untextured(psx_gpu, x, y, u, v, width, height, color); } #define setup_sprite_blocks_switch_textured_4x(texture_mode) \ diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index c62c1baa6..f0ba39f39 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -13,15 +13,9 @@ * General Public License for more details. */ -#define MAX_SPANS 512 -#define MAX_BLOCKS 64 -#define MAX_BLOCKS_PER_ROW 128 - -#define RENDER_STATE_MASK_EVALUATE 0x20 -#define RENDER_FLAGS_MODULATE_TEXELS 0x1 -#define RENDER_FLAGS_BLEND 0x2 #define RENDER_INTERLACE_ENABLED 0x1 +#include "psx_gpu.h" #include "psx_gpu_offsets.h" #define psx_gpu_b_dx_offset (psx_gpu_b_block_span_offset + 4) @@ -228,7 +222,6 @@ #ifdef __MACH__ #define flush_render_block_buffer _flush_render_block_buffer -#define setup_sprite_untextured_simple _setup_sprite_untextured_simple #define update_texture_8bpp_cache _update_texture_8bpp_cache #endif @@ -565,6 +558,8 @@ function(compute_all_gradients) #define left_x_32_low d22 #define left_x_32_high d23 +#define tmp_max_blocks d20 + #define edges_xy q0 #define edges_dx_dy d2 #define edge_shifts d3 @@ -819,8 +814,10 @@ function(compute_all_gradients) str b, [span_b_offset], #4; \ setup_spans_adjust_interpolants_##direction(); \ \ + vmov.u16 tmp_max_blocks, #MAX_BLOCKS_PER_ROW; \ vshr.u16 left_right_x_16_high, left_right_x_16_high, #3; \ vshl.u16 span_shifts, c_0xFFFE, span_shifts; \ + vmin.u16 left_right_x_16_high, left_right_x_16_high, tmp_max_blocks; \ \ vst4.u16 { left_right_x_16, span_shifts_y }, [span_edge_data]!; \ \ @@ -867,8 +864,10 @@ function(compute_all_gradients) str b, [span_b_offset], #4; \ setup_spans_adjust_interpolants_##direction(); \ \ - vshl.u16 span_shifts, c_0xFFFE, span_shifts; \ + vmov.u16 tmp_max_blocks, #MAX_BLOCKS_PER_ROW; \ vshr.u16 left_right_x_16_high, left_right_x_16_high, #3; \ + vshl.u16 span_shifts, c_0xFFFE, span_shifts; \ + vmin.u16 left_right_x_16_high, left_right_x_16_high, tmp_max_blocks; \ \ vst4.u16 { left_right_x_16, span_shifts_y }, [span_edge_data]!; \ \ @@ -908,7 +907,9 @@ function(compute_all_gradients) ble 1f; \ \ orr temp, y_a, y_a, lsl #16; \ + cmp height, #512; \ add temp, temp, #(1 << 16); \ + movgt height, #512; \ add y_a, temp, #2; \ add y_a, y_a, #(2 << 16); \ vmov y_x4, temp, y_a; \ @@ -963,7 +964,9 @@ function(compute_all_gradients) ble 1f; \ \ orr temp, y_a, y_a, lsl #16; \ + cmp height, #512; \ sub temp, temp, #(1 << 16); \ + movgt height, #512; \ sub y_a, temp, #2; \ sub y_a, y_a, #(2 << 16); \ vmov y_x4, temp, y_a; \ @@ -5826,14 +5829,7 @@ function(setup_sprite_16bpp_4x) .align 3 -function(setup_sprite_untextured) - ldrh r12, [psx_gpu, #psx_gpu_render_state_offset] - tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \ - | RENDER_FLAGS_BLEND) - ldrbeq r12, [psx_gpu, #psx_gpu_render_mode_offset] - tsteq r12, #RENDER_INTERLACE_ENABLED - beq setup_sprite_untextured_simple - +function(setup_sprite_untextured_512) stmdb sp!, { r4 - r11, r14 } ldr width, [sp, #40] diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c index 00392549d..ac4af9daa 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c @@ -115,6 +115,7 @@ typedef union #define gvhaddq_u16(d, a, b) d.u16 = vhaddq_u16(a.u16, b.u16) #define gvmax_s16(d, a, b) d.s16 = vmax_s16(a.s16, b.s16) #define gvmin_s16(d, a, b) d.s16 = vmin_s16(a.s16, b.s16) +#define gvmin_u16(d, a, b) d.u16 = vmin_u16(a.u16, b.u16) #define gvminq_u8(d, a, b) d.u8 = vminq_u8(a.u8, b.u8) #define gvminq_u16(d, a, b) d.u16 = vminq_u16(a.u16, b.u16) #define gvmla_s32(d, a, b) d.s32 = vmla_s32(d.s32, a.s32, b.s32) @@ -353,7 +354,8 @@ typedef union } #endif // !__SSSE3__ #ifdef __SSE4_1__ -#define gvminq_u16(d, a, b) d.m = _mm_min_epu16(a.m, b.m) +#define gvmin_u16(d, a, b) d.m = _mm_min_epu16(a.m, b.m) +#define gvminq_u16 gvmin_u16 #define gvmovl_u8(d, s) d.m = _mm_cvtepu8_epi16(s.m) #define gvmovl_s8(d, s) d.m = _mm_cvtepi8_epi16(s.m) #define gvmovl_s32(d, s) d.m = _mm_cvtepi32_epi64(s.m) @@ -463,11 +465,12 @@ typedef union // can do this because the caller needs the msb clear #define gvhaddq_u16(d, a, b) d.u16 = (a.u16 + b.u16) >> 1 #endif -#ifndef gvminq_u16 -#define gvminq_u16(d, a, b) { \ +#ifndef gvmin_u16 +#define gvmin_u16(d, a, b) { \ gvu16 t_ = a.u16 < b.u16; \ d.u16 = (a.u16 & t_) | (b.u16 & ~t_); \ } +#define gvminq_u16 gvmin_u16 #endif #ifndef gvmlsq_s32 #define gvmlsq_s32(d, a, b) d.s32 -= a.s32 * b.s32 @@ -1093,6 +1096,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, span_b_offset = psx_gpu->span_b_offset; \ \ vec_8x16u c_0x0001; \ + vec_4x16u c_max_blocks_per_row; \ \ gvdupq_n_u16(c_0x0001, 0x0001); \ gvdupq_n_u16(left_edge, psx_gpu->viewport_start_x); \ @@ -1101,6 +1105,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, gvdup_n_u16(c_0x04, 0x04); \ gvdup_n_u16(c_0x07, 0x07); \ gvdup_n_u16(c_0xFFFE, 0xFFFE); \ + gvdup_n_u16(c_max_blocks_per_row, MAX_BLOCKS_PER_ROW); \ #if defined(__ARM_NEON) || defined(__ARM_NEON__) // better encoding, remaining bits are unused anyway @@ -1351,6 +1356,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, gvand(span_shift, left_right_x_16_hi, c_0x07); \ setup_spans_make_span_shift(span_shift); \ gvshr_n_u16(left_right_x_16_hi, left_right_x_16_hi, 3); \ + gvmin_u16(left_right_x_16_hi, left_right_x_16_hi, c_max_blocks_per_row); \ \ gvst4_pi_u16(left_right_x_16_lo, left_right_x_16_hi, span_shift, y_x4, \ span_edge_data); \ @@ -1380,7 +1386,9 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, \ setup_spans_prologue_b(); \ \ - if(height > 0) \ + if (height > 512) \ + height = 512; \ + if (height > 0) \ { \ u64 y_x4_ = ((u64)(y_a + 3) << 48) | ((u64)(u16)(y_a + 2) << 32) \ | (u32)((y_a + 1) << 16) | (u16)y_a; \ @@ -1426,7 +1434,9 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, \ setup_spans_prologue_b(); \ \ - if(height > 0) \ + if (height > 512) \ + height = 512; \ + if (height > 0) \ { \ u64 y_x4_ = ((u64)(y_a - 3) << 48) | ((u64)(u16)(y_a - 2) << 32) \ | (u32)((y_a - 1) << 16) | (u16)y_a; \ @@ -1642,7 +1652,9 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_spans_prologue_b(); - if(height_minor_a > 0) + if (height_minor_a > 512) + height_minor_a = 512; + if (height_minor_a > 0) { u64 y_x4_ = ((u64)(y_a - 3) << 48) | ((u64)(u16)(y_a - 2) << 32) | (u32)((y_a - 1) << 16) | (u16)y_a; @@ -1683,7 +1695,9 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_spans_clip(increment, no); } - if(height_minor_b > 0) + if (height_minor_b > 512) + height_minor_b = 512; + if (height_minor_b > 0) { u64 y_x4_ = ((u64)(y_a + 3) << 48) | ((u64)(u16)(y_a + 2) << 32) | (u32)((y_a + 1) << 16) | (u16)y_a; @@ -3167,19 +3181,11 @@ void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu) { } -void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, +void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { - if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE | - RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 && - (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0) - { - setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color); - return; - } - #if 0 - setup_sprite_untextured_(psx_gpu, x, y, u, v, width, height, color); + setup_sprite_untextured_512_(psx_gpu, x, y, u, v, width, height, color); return; #endif u32 right_width = ((width - 1) & 0x7) + 1; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.h b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.h index a8080aff8..3d1d1bdd1 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.h @@ -84,8 +84,7 @@ #define setup_sprite_4bpp_4x setup_sprite_4bpp_4x_ #define setup_sprite_8bpp_4x setup_sprite_8bpp_4x_ #define setup_sprite_16bpp_4x setup_sprite_16bpp_4x_ -#define setup_sprite_untextured setup_sprite_untextured_ -#define setup_sprite_untextured_simple setup_sprite_untextured_simple_ +#define setup_sprite_untextured_512 setup_sprite_untextured_512_ #define scale2x_tiles8 scale2x_tiles8_ #endif @@ -205,10 +204,8 @@ void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color); -void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, +void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color); -void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, - s32 u, s32 v, s32 width, s32 height, u32 color); void scale2x_tiles8(void *dst, const void *src, int w8, int h); @@ -275,7 +272,6 @@ void scale2x_tiles8(void *dst, const void *src, int w8, int h); #undef setup_sprite_4bpp_4x #undef setup_sprite_8bpp_4x #undef setup_sprite_16bpp_4x -#undef setup_sprite_untextured -#undef setup_sprite_untextured_simple +#undef setup_sprite_untextured_512 #undef scale2x_tiles8 #endif diff --git a/plugins/gpu_neon/psx_gpu/vector_ops.h b/plugins/gpu_neon/psx_gpu/vector_ops.h index 6f2bcbf7b..6bc76433e 100644 --- a/plugins/gpu_neon/psx_gpu/vector_ops.h +++ b/plugins/gpu_neon/psx_gpu/vector_ops.h @@ -525,6 +525,15 @@ (dest).e[_i] = result; \ }) \ +#define min_4x16b(dest, source_a, source_b) \ + foreach_element(4, \ + { \ + s32 result = (source_a).e[_i]; \ + if((source_b).e[_i] < result) \ + result = (source_b).e[_i]; \ + (dest).e[_i] = result; \ + }) \ + #define min_8x16b(dest, source_a, source_b) \ foreach_element(8, \ { \ From 39db3a96ec328dfc3c6f228605b6edb27b92dd68 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 25 Aug 2023 00:06:31 +0300 Subject: [PATCH 330/597] gpu_neon: some hack to enhanced mode --- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 72 +++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 32c32fdb5..4dd21e732 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -875,7 +875,7 @@ static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu, psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y * 2; \ psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2 + 1; \ psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2 + 1; \ - psx_gpu->uvrgb_phase = 0x1000; \ + psx_gpu->uvrgb_phase = 0x7fff; \ } #define shift_vertices3(v) { \ @@ -980,6 +980,74 @@ static int check_enhanced_range(psx_gpu_struct *psx_gpu, int x, int x_end) return 1; } +static int is_in_array(int val, int array[], int len) +{ + int i; + for (i = 0; i < len; i++) + if (array[i] == val) + return 1; + return 0; +} + +static int make_members_unique(int array[], int len) +{ + int i, j; + for (i = j = 1; i < len; i++) + if (!is_in_array(array[i], array, j)) + array[j++] = array[i]; + + if (array[0] > array[1]) { + i = array[0]; array[0] = array[1]; array[1] = i; + } + return j; +} + +static void patch_u(vertex_struct *vertex_ptrs, int count, int old, int new) +{ + int i; + for (i = 0; i < count; i++) + if (vertex_ptrs[i].u == old) + vertex_ptrs[i].u = new; +} + +static void patch_v(vertex_struct *vertex_ptrs, int count, int old, int new) +{ + int i; + for (i = 0; i < count; i++) + if (vertex_ptrs[i].v == old) + vertex_ptrs[i].v = new; +} + +static void uv_hack(vertex_struct *vertex_ptrs, int vertex_count) +{ + int i, u[4], v[4]; + + for (i = 0; i < vertex_count; i++) { + u[i] = vertex_ptrs[i].u; + v[i] = vertex_ptrs[i].v; + } + if (make_members_unique(u, vertex_count) == 2 && u[1] - u[0] >= 8) { + if ((u[0] & 7) == 7) { + patch_u(vertex_ptrs, vertex_count, u[0], u[0] + 1); + //printf("u hack: %3u-%3u -> %3u-%3u\n", u[0], u[1], u[0]+1, u[1]); + } + else if ((u[1] & 7) == 0 || u[1] - u[0] > 128) { + patch_u(vertex_ptrs, vertex_count, u[1], u[1] - 1); + //printf("u hack: %3u-%3u -> %3u-%3u\n", u[0], u[1], u[0], u[1]-1); + } + } + if (make_members_unique(v, vertex_count) == 2 && ((v[0] - v[1]) & 7) == 0) { + if ((v[0] & 7) == 7) { + patch_v(vertex_ptrs, vertex_count, v[0], v[0] + 1); + //printf("v hack: %3u-%3u -> %3u-%3u\n", v[0], v[1], v[0]+1, v[1]); + } + else if ((v[1] & 7) == 0) { + patch_v(vertex_ptrs, vertex_count, v[1], v[1] - 1); + //printf("v hack: %3u-%3u -> %3u-%3u\n", v[0], v[1], v[0], v[1]-1); + } + } +} + static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 current_command) { @@ -1202,6 +1270,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv(2, 10); get_vertex_data_xy_uv(3, 14); + uv_hack(vertexes, 4); do_quad_enhanced(psx_gpu, vertexes, current_command); break; } @@ -1259,6 +1328,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv_rgb(2, 12); get_vertex_data_xy_uv_rgb(3, 18); + uv_hack(vertexes, 4); do_quad_enhanced(psx_gpu, vertexes, current_command); break; } From 20a3a441bc4176d91cd4d41f8dedf12a9f448abb Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 25 Aug 2023 22:39:19 +0300 Subject: [PATCH 331/597] spu: rm bunch of unused functions --- frontend/plugin.c | 16 ---------------- libpcsxcore/plugins.c | 20 ++------------------ libpcsxcore/plugins.h | 12 ------------ plugins/dfsound/dma.c | 32 -------------------------------- plugins/dfsound/spu.c | 27 --------------------------- 5 files changed, 2 insertions(+), 105 deletions(-) diff --git a/frontend/plugin.c b/frontend/plugin.c index d2d3dbab2..0bfc2aabc 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -28,19 +28,13 @@ extern long CALLBACK SPUopen(void); extern long CALLBACK SPUinit(void); extern long CALLBACK SPUshutdown(void); extern long CALLBACK SPUclose(void); -extern void CALLBACK SPUplaySample(unsigned char); extern void CALLBACK SPUwriteRegister(unsigned long, unsigned short, unsigned int); extern unsigned short CALLBACK SPUreadRegister(unsigned long); -extern void CALLBACK SPUwriteDMA(unsigned short); -extern unsigned short CALLBACK SPUreadDMA(void); extern void CALLBACK SPUwriteDMAMem(unsigned short *, int, unsigned int); extern void CALLBACK SPUreadDMAMem(unsigned short *, int, unsigned int); extern void CALLBACK SPUplayADPCMchannel(void *, unsigned int, int); extern void CALLBACK SPUregisterCallback(void (*cb)(void)); extern void CALLBACK SPUregisterScheduleCb(void (*cb)(unsigned int)); -extern long CALLBACK SPUconfigure(void); -extern long CALLBACK SPUtest(void); -extern void CALLBACK SPUabout(void); extern long CALLBACK SPUfreeze(unsigned int, void *, unsigned int); extern void CALLBACK SPUasync(unsigned int, unsigned int); extern int CALLBACK SPUplayCDDAchannel(short *, int, unsigned int, int); @@ -123,18 +117,12 @@ static const struct { DUMMY_CDR(CDRreadCDDA), DUMMY_CDR(CDRgetTE), /* SPU */ - DIRECT_SPU(SPUconfigure), - DIRECT_SPU(SPUabout), DIRECT_SPU(SPUinit), DIRECT_SPU(SPUshutdown), - DIRECT_SPU(SPUtest), DIRECT_SPU(SPUopen), DIRECT_SPU(SPUclose), -// DIRECT_SPU(SPUplaySample), // unused? DIRECT_SPU(SPUwriteRegister), DIRECT_SPU(SPUreadRegister), - DIRECT_SPU(SPUwriteDMA), - DIRECT_SPU(SPUreadDMA), DIRECT_SPU(SPUwriteDMAMem), DIRECT_SPU(SPUreadDMAMem), DIRECT_SPU(SPUplayADPCMchannel), @@ -261,8 +249,6 @@ pc_hook_func (GPU_updateLace, (void), (), PCNT_GPU) pc_hook_func (SPU_writeRegister, (unsigned long a0, unsigned short a1, uint32_t a2), (a0, a1, a2), PCNT_SPU) pc_hook_func_ret(unsigned short,SPU_readRegister, (unsigned long a0), (a0), PCNT_SPU) -pc_hook_func (SPU_writeDMA, (unsigned short a0), (a0), PCNT_SPU) -pc_hook_func_ret(unsigned short,SPU_readDMA, (void), (), PCNT_SPU) pc_hook_func (SPU_writeDMAMem, (unsigned short *a0, int a1, uint32_t a2), (a0, a1, a2), PCNT_SPU) pc_hook_func (SPU_readDMAMem, (unsigned short *a0, int a1, uint32_t a2), (a0, a1, a2), PCNT_SPU) pc_hook_func (SPU_playADPCMchannel, (void *a0, unsigned int a1, int a2), (a0, a1, a2), PCNT_SPU) @@ -288,8 +274,6 @@ void pcnt_hook_plugins(void) hook_it(GPU_updateLace); hook_it(SPU_writeRegister); hook_it(SPU_readRegister); - hook_it(SPU_writeDMA); - hook_it(SPU_readDMA); hook_it(SPU_writeDMAMem); hook_it(SPU_readDMAMem); hook_it(SPU_playADPCMchannel); diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index 23474f1bf..a77c728c2 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -70,18 +70,12 @@ CDRsetfilename CDR_setfilename; CDRreadCDDA CDR_readCDDA; CDRgetTE CDR_getTE; -SPUconfigure SPU_configure; -SPUabout SPU_about; SPUinit SPU_init; SPUshutdown SPU_shutdown; -SPUtest SPU_test; SPUopen SPU_open; SPUclose SPU_close; -SPUplaySample SPU_playSample; SPUwriteRegister SPU_writeRegister; SPUreadRegister SPU_readRegister; -SPUwriteDMA SPU_writeDMA; -SPUreadDMA SPU_readDMA; SPUwriteDMAMem SPU_writeDMAMem; SPUreadDMAMem SPU_readDMAMem; SPUplayADPCMchannel SPU_playADPCMchannel; @@ -313,12 +307,8 @@ static int LoadCDRplugin(const char *CDRdll) { return 0; } -void *hSPUDriver = NULL; - -long CALLBACK SPU__configure(void) { return 0; } -void CALLBACK SPU__about(void) {} -long CALLBACK SPU__test(void) { return 0; } -void CALLBACK SPU__registerScheduleCb(void (CALLBACK *cb)(unsigned int)) {} +static void *hSPUDriver = NULL; +static void CALLBACK SPU__registerScheduleCb(void (CALLBACK *cb)(unsigned int)) {} #define LoadSpuSym1(dest, name) \ LoadSym(SPU_##dest, SPU##dest, name, TRUE); @@ -335,7 +325,6 @@ static int LoadSPUplugin(const char *SPUdll) { hSPUDriver = SysLoadLibrary(SPUdll); if (hSPUDriver == NULL) { - SPU_configure = NULL; SysMessage (_("Could not load SPU plugin %s!"), SPUdll); return -1; } drv = hSPUDriver; @@ -343,13 +332,8 @@ static int LoadSPUplugin(const char *SPUdll) { LoadSpuSym1(shutdown, "SPUshutdown"); LoadSpuSym1(open, "SPUopen"); LoadSpuSym1(close, "SPUclose"); - LoadSpuSym0(configure, "SPUconfigure"); - LoadSpuSym0(about, "SPUabout"); - LoadSpuSym0(test, "SPUtest"); LoadSpuSym1(writeRegister, "SPUwriteRegister"); LoadSpuSym1(readRegister, "SPUreadRegister"); - LoadSpuSym1(writeDMA, "SPUwriteDMA"); - LoadSpuSym1(readDMA, "SPUreadDMA"); LoadSpuSym1(writeDMAMem, "SPUwriteDMAMem"); LoadSpuSym1(readDMAMem, "SPUreadDMAMem"); LoadSpuSym1(playADPCMchannel, "SPUplayADPCMchannel"); diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index ac89d1ff4..f74489f3d 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -163,19 +163,13 @@ extern CDRgetTE CDR_getTE; typedef long (CALLBACK* SPUinit)(void); typedef long (CALLBACK* SPUshutdown)(void); typedef long (CALLBACK* SPUclose)(void); -typedef void (CALLBACK* SPUplaySample)(unsigned char); typedef void (CALLBACK* SPUwriteRegister)(unsigned long, unsigned short, unsigned int); typedef unsigned short (CALLBACK* SPUreadRegister)(unsigned long); -typedef void (CALLBACK* SPUwriteDMA)(unsigned short); -typedef unsigned short (CALLBACK* SPUreadDMA)(void); typedef void (CALLBACK* SPUwriteDMAMem)(unsigned short *, int, unsigned int); typedef void (CALLBACK* SPUreadDMAMem)(unsigned short *, int, unsigned int); typedef void (CALLBACK* SPUplayADPCMchannel)(xa_decode_t *, unsigned int, int); typedef void (CALLBACK* SPUregisterCallback)(void (CALLBACK *callback)(void)); typedef void (CALLBACK* SPUregisterScheduleCb)(void (CALLBACK *callback)(unsigned int cycles_after)); -typedef long (CALLBACK* SPUconfigure)(void); -typedef long (CALLBACK* SPUtest)(void); -typedef void (CALLBACK* SPUabout)(void); typedef struct { unsigned char PluginName[8]; uint32_t PluginVersion; @@ -195,18 +189,12 @@ typedef void (CALLBACK* SPUasync)(uint32_t, uint32_t); typedef int (CALLBACK* SPUplayCDDAchannel)(short *, int, unsigned int, int); // SPU function pointers -extern SPUconfigure SPU_configure; -extern SPUabout SPU_about; extern SPUinit SPU_init; extern SPUshutdown SPU_shutdown; -extern SPUtest SPU_test; extern SPUopen SPU_open; extern SPUclose SPU_close; -extern SPUplaySample SPU_playSample; extern SPUwriteRegister SPU_writeRegister; extern SPUreadRegister SPU_readRegister; -extern SPUwriteDMA SPU_writeDMA; -extern SPUreadDMA SPU_readDMA; extern SPUwriteDMAMem SPU_writeDMAMem; extern SPUreadDMAMem SPU_readDMAMem; extern SPUplayADPCMchannel SPU_playADPCMchannel; diff --git a/plugins/dfsound/dma.c b/plugins/dfsound/dma.c index eb85a7316..533d95e90 100644 --- a/plugins/dfsound/dma.c +++ b/plugins/dfsound/dma.c @@ -22,20 +22,6 @@ #include "externals.h" #include "registers.h" -//////////////////////////////////////////////////////////////////////// -// READ DMA (one value) -//////////////////////////////////////////////////////////////////////// - -unsigned short CALLBACK SPUreadDMA(void) -{ - unsigned short s = *(unsigned short *)(spu.spuMemC + spu.spuAddr); - check_irq_io(spu.spuAddr); - spu.spuAddr += 2; - spu.spuAddr &= 0x7fffe; - - return s; -} - //////////////////////////////////////////////////////////////////////// // READ DMA (many values) //////////////////////////////////////////////////////////////////////// @@ -60,24 +46,6 @@ void CALLBACK SPUreadDMAMem(unsigned short *pusPSXMem, int iSize, spu.spuAddr = addr; } -//////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////// -// WRITE DMA (one value) -//////////////////////////////////////////////////////////////////////// - -void CALLBACK SPUwriteDMA(unsigned short val) -{ - *(unsigned short *)(spu.spuMemC + spu.spuAddr) = val; - - check_irq_io(spu.spuAddr); - spu.spuAddr += 2; - spu.spuAddr &= 0x7fffe; - spu.bMemDirty = 1; -} - //////////////////////////////////////////////////////////////////////// // WRITE DMA (many values) //////////////////////////////////////////////////////////////////////// diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 4dc762f6f..0c4e4f86a 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1563,33 +1563,6 @@ long CALLBACK SPUshutdown(void) return 0; } -// SPUTEST: we don't test, we are always fine ;) -long CALLBACK SPUtest(void) -{ - return 0; -} - -// SPUCONFIGURE: call config dialog -long CALLBACK SPUconfigure(void) -{ -#ifdef _MACOSX - DoConfiguration(); -#else -// StartCfgTool("CFG"); -#endif - return 0; -} - -// SPUABOUT: show about window -void CALLBACK SPUabout(void) -{ -#ifdef _MACOSX - DoAbout(); -#else -// StartCfgTool("ABOUT"); -#endif -} - // SETUP CALLBACKS // this functions will be called once, // passes a callback that should be called on SPU-IRQ/cdda volume change From 3c7a8977ddbdbfb4a8840a487fadade29bd939d6 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 25 Aug 2023 23:32:14 +0300 Subject: [PATCH 332/597] spu: adjust irqs again this begs for hw testing, but I don't have the setup right now... libretro/pcsx_rearmed#746 --- libpcsxcore/psxdma.c | 3 +++ plugins/dfsound/dma.c | 11 +++++++++++ plugins/dfsound/externals.h | 2 +- plugins/dfsound/registers.c | 7 ++++++- plugins/dfsound/spu.c | 5 ++++- 5 files changed, 25 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index 42fb3bab6..c0aee7ed4 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -64,6 +64,9 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU break; SPU_writeDMAMem(ptr, words_copy * 2, psxRegs.cycle); HW_DMA4_MADR = SWAPu32(madr + words_copy * 2); + // This should be much slower, like 12+ cycles/byte, it's like + // that because the CPU runs too fast and fifo is not emulated. + // See also set_dma_end(). SPUDMA_INT(words * 4); return; diff --git a/plugins/dfsound/dma.c b/plugins/dfsound/dma.c index 533d95e90..ada007f01 100644 --- a/plugins/dfsound/dma.c +++ b/plugins/dfsound/dma.c @@ -22,6 +22,15 @@ #include "externals.h" #include "registers.h" +static void set_dma_end(int iSize, unsigned int cycles) +{ + // this must be > psxdma.c dma irq + // Road Rash also wants a considerable delay, maybe because of fifo? + cycles += iSize * 20; // maybe + cycles |= 1; // indicates dma is active + spu.cycles_dma_end = cycles; +} + //////////////////////////////////////////////////////////////////////// // READ DMA (many values) //////////////////////////////////////////////////////////////////////// @@ -44,6 +53,7 @@ void CALLBACK SPUreadDMAMem(unsigned short *pusPSXMem, int iSize, if (irq && (spu.spuCtrl & CTRL_IRQ)) log_unhandled("rdma spu irq: %x/%x+%x\n", irq_addr, spu.spuAddr, iSize * 2); spu.spuAddr = addr; + set_dma_end(iSize, cycles); } //////////////////////////////////////////////////////////////////////// @@ -78,6 +88,7 @@ void CALLBACK SPUwriteDMAMem(unsigned short *pusPSXMem, int iSize, if (irq && (spu.spuCtrl & CTRL_IRQ)) // unhandled because need to implement delay log_unhandled("wdma spu irq: %x/%x+%x\n", irq_addr, spu.spuAddr, iSize * 2); spu.spuAddr = addr; + set_dma_end(iSize, cycles); } //////////////////////////////////////////////////////////////////////// diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index dd05a5ab7..f638b94fa 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -197,6 +197,7 @@ typedef struct unsigned char * pSpuIrq; unsigned int cycles_played; + unsigned int cycles_dma_end; int decode_pos; int decode_dirty_ch; unsigned int bSpuInit:1; @@ -240,7 +241,6 @@ typedef struct int * SB; int * SSumLR; - int pad[29]; unsigned short regArea[0x400]; } SPUInfo; diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index ae7ed24e7..bcac4d9ad 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -133,12 +133,14 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, //-------------------------------------------------// case H_SPUaddr: spu.spuAddr = (unsigned long) val<<3; + //check_irq_io(spu.spuAddr); break; //-------------------------------------------------// case H_SPUdata: *(unsigned short *)(spu.spuMemC + spu.spuAddr) = HTOLE16(val); spu.spuAddr += 2; spu.spuAddr &= 0x7fffe; + check_irq_io(spu.spuAddr); break; //-------------------------------------------------// case H_SPUctrl: @@ -160,7 +162,8 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, case H_SPUirqAddr: //if (val & 1) // log_unhandled("w irq with lsb: %08lx %04x\n", reg, val); - spu.pSpuIrq=spu.spuMemC+(((unsigned long) val<<3)&~0xf); + spu.pSpuIrq = spu.spuMemC + (((int)val << 3) & ~0xf); + //check_irq_io(spu.spuAddr); goto upd_irq; //-------------------------------------------------// case H_SPUrvolL: @@ -350,11 +353,13 @@ unsigned short CALLBACK SPUreadRegister(unsigned long reg) case H_SPUaddr: return (unsigned short)(spu.spuAddr>>3); + // this reportedly doesn't work on real hw case H_SPUdata: { unsigned short s = LE16TOH(*(unsigned short *)(spu.spuMemC + spu.spuAddr)); spu.spuAddr += 2; spu.spuAddr &= 0x7fffe; + //check_irq_io(spu.spuAddr); return s; } diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 0c4e4f86a..fead03c0b 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1154,7 +1154,10 @@ void do_samples(unsigned int cycles_to, int do_direct) do_irq(); } } - check_irq_io(spu.spuAddr); + if (!spu.cycles_dma_end || (int)(spu.cycles_dma_end - cycles_to) < 0) { + spu.cycles_dma_end = 0; + check_irq_io(spu.spuAddr); + } if (unlikely(spu.rvb->dirty)) REVERBPrep(); From a01b90c372831080aa3f9dedf5251f76f300038c Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 27 Aug 2023 01:01:46 +0300 Subject: [PATCH 333/597] reduce some code duplication --- libpcsxcore/new_dynarec/pcsxmem.c | 47 +++------------------- libpcsxcore/psxhw.c | 67 +++++++++++++++++++------------ libpcsxcore/psxhw.h | 4 ++ 3 files changed, 51 insertions(+), 67 deletions(-) diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index 1f37dc29c..87aa17c54 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -157,43 +157,6 @@ make_rcnt_funcs(0) make_rcnt_funcs(1) make_rcnt_funcs(2) -static void io_write_ireg16(u32 value) -{ - psxHu16ref(0x1070) &= value; -} - -static void io_write_imask16(u32 value) -{ - psxHu16ref(0x1074) = value; - if (psxHu16ref(0x1070) & value) - new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); -} - -static void io_write_ireg32(u32 value) -{ - psxHu32ref(0x1070) &= value; -} - -static void io_write_imask32(u32 value) -{ - psxHu32ref(0x1074) = value; - if (psxHu32ref(0x1070) & value) - new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); -} - -static void io_write_dma_icr32(u32 value) -{ - u32 tmp = value & 0x00ff803f; - tmp |= (SWAPu32(HW_DMA_ICR) & ~value) & 0x7f000000; - if ((tmp & HW_DMA_ICR_GLOBAL_ENABLE && tmp & 0x7f000000) - || tmp & HW_DMA_ICR_BUS_ERROR) { - if (!(SWAPu32(HW_DMA_ICR) & HW_DMA_ICR_IRQ_SENT)) - psxHu32ref(0x1070) |= SWAP32(8); - tmp |= HW_DMA_ICR_IRQ_SENT; - } - HW_DMA_ICR = SWAPu32(tmp); -} - #define make_dma_func(n) \ static void io_write_chcr##n(u32 value) \ { \ @@ -423,15 +386,15 @@ void new_dyna_pcsx_mem_init(void) // write(u32 data) map_item(&mem_iowtab[IOMEM32(0x1040)], io_write_sio32, 1); - map_item(&mem_iowtab[IOMEM32(0x1070)], io_write_ireg32, 1); - map_item(&mem_iowtab[IOMEM32(0x1074)], io_write_imask32, 1); + map_item(&mem_iowtab[IOMEM32(0x1070)], psxHwWriteIstat, 1); + map_item(&mem_iowtab[IOMEM32(0x1074)], psxHwWriteImask, 1); map_item(&mem_iowtab[IOMEM32(0x1088)], io_write_chcr0, 1); map_item(&mem_iowtab[IOMEM32(0x1098)], io_write_chcr1, 1); map_item(&mem_iowtab[IOMEM32(0x10a8)], io_write_chcr2, 1); map_item(&mem_iowtab[IOMEM32(0x10b8)], io_write_chcr3, 1); map_item(&mem_iowtab[IOMEM32(0x10c8)], io_write_chcr4, 1); map_item(&mem_iowtab[IOMEM32(0x10e8)], io_write_chcr6, 1); - map_item(&mem_iowtab[IOMEM32(0x10f4)], io_write_dma_icr32, 1); + map_item(&mem_iowtab[IOMEM32(0x10f4)], psxHwWriteDmaIcr32, 1); map_item(&mem_iowtab[IOMEM32(0x1100)], io_rcnt_write_count0, 1); map_item(&mem_iowtab[IOMEM32(0x1104)], io_rcnt_write_mode0, 1); map_item(&mem_iowtab[IOMEM32(0x1108)], io_rcnt_write_target0, 1); @@ -451,8 +414,8 @@ void new_dyna_pcsx_mem_init(void) map_item(&mem_iowtab[IOMEM16(0x1048)], sioWriteMode16, 1); map_item(&mem_iowtab[IOMEM16(0x104a)], sioWriteCtrl16, 1); map_item(&mem_iowtab[IOMEM16(0x104e)], sioWriteBaud16, 1); - map_item(&mem_iowtab[IOMEM16(0x1070)], io_write_ireg16, 1); - map_item(&mem_iowtab[IOMEM16(0x1074)], io_write_imask16, 1); + map_item(&mem_iowtab[IOMEM16(0x1070)], psxHwWriteIstat, 1); + map_item(&mem_iowtab[IOMEM16(0x1074)], psxHwWriteImask, 1); map_item(&mem_iowtab[IOMEM16(0x1100)], io_rcnt_write_count0, 1); map_item(&mem_iowtab[IOMEM16(0x1104)], io_rcnt_write_mode0, 1); map_item(&mem_iowtab[IOMEM16(0x1108)], io_rcnt_write_target0, 1); diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index 10a2695f4..e85d8b5c9 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -41,6 +41,43 @@ void psxHwReset() { HW_GPU_STATUS = SWAP32(0x14802000); } +void psxHwWriteIstat(u32 value) +{ + u32 stat = psxHu16(0x1070) & SWAPu16(value); + psxHu16ref(0x1070) = SWAPu16(stat); + + psxRegs.CP0.n.Cause &= ~0x400; + if (stat & psxHu16(0x1074)) + psxRegs.CP0.n.Cause |= 0x400; +} + +void psxHwWriteImask(u32 value) +{ + u32 stat = psxHu16(0x1070); + psxHu16ref(0x1074) = SWAPu16(value); + if (stat & SWAPu16(value)) { + //if ((psxRegs.CP0.n.SR & 0x401) == 0x401) + // log_unhandled("irq on unmask @%08x\n", psxRegs.pc); + new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); + } + psxRegs.CP0.n.Cause &= ~0x400; + if (stat & value) + psxRegs.CP0.n.Cause |= 0x400; +} + +void psxHwWriteDmaIcr32(u32 value) +{ + u32 tmp = value & 0x00ff803f; + tmp |= (SWAPu32(HW_DMA_ICR) & ~value) & 0x7f000000; + if ((tmp & HW_DMA_ICR_GLOBAL_ENABLE && tmp & 0x7f000000) + || tmp & HW_DMA_ICR_BUS_ERROR) { + if (!(SWAPu32(HW_DMA_ICR) & HW_DMA_ICR_IRQ_SENT)) + psxHu32ref(0x1070) |= SWAP32(8); + tmp |= HW_DMA_ICR_IRQ_SENT; + } + HW_DMA_ICR = SWAPu32(tmp); +} + u8 psxHwRead8(u32 add) { unsigned char hard; @@ -476,19 +513,14 @@ void psxHwWrite16(u32 add, u16 value) { #ifdef PSXHW_LOG PSXHW_LOG("IREG 16bit write %x\n", value); #endif - psxHu16ref(0x1070) &= SWAPu16(value); + psxHwWriteIstat(value); return; case 0x1f801074: #ifdef PSXHW_LOG PSXHW_LOG("IMASK 16bit write %x\n", value); #endif - psxHu16ref(0x1074) = SWAPu16(value); - if (psxHu16ref(0x1070) & SWAPu16(value)) { - //if ((psxRegs.CP0.n.SR & 0x401) == 0x401) - // log_unhandled("irq on unmask @%08x\n", psxRegs.pc); - new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); - } + psxHwWriteImask(value); return; case 0x1f801100: @@ -607,18 +639,13 @@ void psxHwWrite32(u32 add, u32 value) { #ifdef PSXHW_LOG PSXHW_LOG("IREG 32bit write %x\n", value); #endif - psxHu32ref(0x1070) &= SWAPu32(value); + psxHwWriteIstat(value); return; case 0x1f801074: #ifdef PSXHW_LOG PSXHW_LOG("IMASK 32bit write %x\n", value); #endif - psxHu32ref(0x1074) = SWAPu32(value); - if (psxHu32ref(0x1070) & SWAPu32(value)) { - if ((psxRegs.CP0.n.SR & 0x401) == 0x401) - log_unhandled("irq on unmask @%08x\n", psxRegs.pc); - new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); - } + psxHwWriteImask(value); return; #ifdef PSXHW_LOG @@ -729,18 +756,8 @@ void psxHwWrite32(u32 add, u32 value) { #ifdef PSXHW_LOG PSXHW_LOG("DMA ICR 32bit write %x\n", value); #endif - { - u32 tmp = value & 0x00ff803f; - tmp |= (SWAPu32(HW_DMA_ICR) & ~value) & 0x7f000000; - if ((tmp & HW_DMA_ICR_GLOBAL_ENABLE && tmp & 0x7f000000) - || tmp & HW_DMA_ICR_BUS_ERROR) { - if (!(SWAPu32(HW_DMA_ICR) & HW_DMA_ICR_IRQ_SENT)) - psxHu32ref(0x1070) |= SWAP32(8); - tmp |= HW_DMA_ICR_IRQ_SENT; - } - HW_DMA_ICR = SWAPu32(tmp); + psxHwWriteDmaIcr32(value); return; - } case 0x1f801810: #ifdef PSXHW_LOG diff --git a/libpcsxcore/psxhw.h b/libpcsxcore/psxhw.h index e83939f26..2bde9edcb 100644 --- a/libpcsxcore/psxhw.h +++ b/libpcsxcore/psxhw.h @@ -82,6 +82,10 @@ void psxHwWrite16(u32 add, u16 value); void psxHwWrite32(u32 add, u32 value); int psxHwFreeze(void *f, int Mode); +void psxHwWriteIstat(u32 value); +void psxHwWriteImask(u32 value); +void psxHwWriteDmaIcr32(u32 value); + #ifdef __cplusplus } #endif From 4f13a57754959faa674e898d5d9147d302e72684 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 27 Aug 2023 01:07:19 +0300 Subject: [PATCH 334/597] drc: adjust debugging code to work with hle bios --- libpcsxcore/new_dynarec/new_dynarec.c | 16 +++++ libpcsxcore/new_dynarec/patches/trace_drc_chk | 43 ++++--------- libpcsxcore/new_dynarec/patches/trace_intr | 64 +++++++++++-------- 3 files changed, 66 insertions(+), 57 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index d7c243655..ede1f93c9 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -4945,6 +4945,10 @@ static void drc_dbg_emit_do_cmp(int i, int ccadj_) emit_storereg(reg, 0); } } + if (dops[i].opcode == 0x0f) { // LUI + emit_movimm(cinfo[i].imm << 16, 0); + emit_storereg(dops[i].rt1, 0); + } emit_movimm(start+i*4,0); emit_writeword(0,&pcaddr); int cc = get_reg(regs[i].regmap_entry, CCREG); @@ -4960,8 +4964,18 @@ static void drc_dbg_emit_do_cmp(int i, int ccadj_) restore_regs(reglist); assem_debug("\\\\do_insn_cmp\n"); } +static void drc_dbg_emit_wb_dirtys(int i, const struct regstat *i_regs) +{ + // write-out non-consts, consts are likely different because of get_final_value() + if (i_regs->dirty & ~i_regs->loadedconst) { + assem_debug("/ drc_dbg_wb\n"); + wb_dirtys(i_regs->regmap, i_regs->dirty & ~i_regs->loadedconst); + assem_debug("\\ drc_dbg_wb\n"); + } +} #else #define drc_dbg_emit_do_cmp(x,y) +#define drc_dbg_emit_wb_dirtys(x,y) #endif // Used when a branch jumps into the delay slot of another branch @@ -5685,6 +5699,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG); load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP); ds_assemble(i+1,&branch_regs[i]); + drc_dbg_emit_wb_dirtys(i+1, &branch_regs[i]); cc=get_reg(branch_regs[i].regmap,CCREG); if(cc==-1) { emit_loadreg(CCREG,cc=HOST_CCREG); @@ -9114,6 +9129,7 @@ static int new_recompile_block(u_int addr) ds = assemble(i, ®s[i], cinfo[i].ccadj); + drc_dbg_emit_wb_dirtys(i, ®s[i]); if (dops[i].is_ujump) literal_pool(1024); else diff --git a/libpcsxcore/new_dynarec/patches/trace_drc_chk b/libpcsxcore/new_dynarec/patches/trace_drc_chk index 8de3ba1fc..5dab31754 100644 --- a/libpcsxcore/new_dynarec/patches/trace_drc_chk +++ b/libpcsxcore/new_dynarec/patches/trace_drc_chk @@ -1,17 +1,17 @@ diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c -index 2d3348e8..a85d2cd4 100644 +index ede1f93c..1c8965f0 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c -@@ -318,7 +318,7 @@ static struct compile_info +@@ -324,7 +324,7 @@ static struct compile_info int new_dynarec_hacks_old; int new_dynarec_did_compile; - #define HACK_ENABLED(x) ((new_dynarec_hacks | new_dynarec_hacks_pergame) & (x)) + #define HACK_ENABLED(x) ((NDHACK_NO_STALLS) & (x)) - extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 + extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 (CCREG) extern int last_count; // last absolute target, often = next_interupt -@@ -598,6 +598,7 @@ static int cycle_multiplier_active; +@@ -602,6 +602,7 @@ static int cycle_multiplier_active; static int CLOCK_ADJUST(int x) { @@ -19,7 +19,7 @@ index 2d3348e8..a85d2cd4 100644 int m = cycle_multiplier_active; int s = (x >> 31) | 1; return (x * m + s * 50) / 100; -@@ -752,6 +753,9 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) +@@ -776,6 +777,9 @@ static noinline u_int generate_exception(u_int pc) // This is called from the recompiled JR/JALR instructions static void noinline *get_addr(u_int vaddr, int can_compile) { @@ -29,7 +29,7 @@ index 2d3348e8..a85d2cd4 100644 u_int start_page = get_page_prev(vaddr); u_int i, page, end_page = get_page(vaddr); void *found_clean = NULL; -@@ -7164,7 +7168,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) +@@ -7157,7 +7161,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) // R0 is always unneeded u|=1; // Save it @@ -38,7 +38,7 @@ index 2d3348e8..a85d2cd4 100644 gte_unneeded[i]=gte_u; /* printf("ur (%d,%d) %x: ",istart,iend,start+i*4); -@@ -8315,6 +8319,7 @@ static noinline void pass5a_preallocate1(void) +@@ -8299,6 +8303,7 @@ static noinline void pass5a_preallocate1(void) static noinline void pass5b_preallocate2(void) { int i, hr; @@ -46,22 +46,7 @@ index 2d3348e8..a85d2cd4 100644 for(i=0;isubCycleStep >= 0x10000); regs->subCycle += regs->subCycleStep; @@ -139,9 +124,9 @@ index e212d8a9..b98b694e 100644 regs->subCycle &= 0xffff; } -@@ -1341,8 +1341,14 @@ static void intShutdown() { - +@@ -1344,8 +1344,14 @@ static void intShutdown() { // single step (may do several ops in case of a branch or load delay) + // called by asm/dynarec void execI(psxRegisters *regs) { + extern int last_count; + void do_insn_cmp(void); diff --git a/libpcsxcore/new_dynarec/patches/trace_intr b/libpcsxcore/new_dynarec/patches/trace_intr index 1c7a36ea6..3f01180d5 100644 --- a/libpcsxcore/new_dynarec/patches/trace_intr +++ b/libpcsxcore/new_dynarec/patches/trace_intr @@ -1,5 +1,5 @@ diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c -index 06612dbf..9a9d7b05 100644 +index f879ad8c..0ec366d0 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -323,13 +323,18 @@ static void ari64_shutdown() @@ -33,7 +33,7 @@ index 06612dbf..9a9d7b05 100644 { static psxRegisters oldregs; diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c -index 190f8fc7..5feb7a02 100644 +index 1f37dc29..357f753e 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -289,6 +289,8 @@ static void write_biu(u32 value) @@ -87,13 +87,13 @@ index 18bd6a4e..bc2eb3f6 100644 count = _psxRcntRcount( index ); diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c -index 27ddfeab..d7c6ff05 100644 +index 10a2695f..7e4a64da 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c -@@ -377,13 +377,14 @@ void psxHwWrite8(u32 add, u8 value) { - case 0x1f801803: cdrWrite3(value); break; +@@ -437,13 +437,14 @@ void psxHwWrite8(u32 add, u8 value) { + return; + } - default: + if (add < 0x1f802000) psxHu8(add) = value; #ifdef PSXHW_LOG @@ -106,7 +106,7 @@ index 27ddfeab..d7c6ff05 100644 #ifdef PSXHW_LOG PSXHW_LOG("*Known 8bit write at address %x value %x\n", add, value); #endif -@@ -506,6 +507,7 @@ void psxHwWrite16(u32 add, u16 value) { +@@ -565,6 +566,7 @@ void psxHwWrite16(u32 add, u16 value) { return; } @@ -114,7 +114,7 @@ index 27ddfeab..d7c6ff05 100644 psxHu16ref(add) = SWAPu16(value); #ifdef PSXHW_LOG PSXHW_LOG("*Unknown 16bit write at address %x value %x\n", add, value); -@@ -701,9 +703,9 @@ void psxHwWrite32(u32 add, u32 value) { +@@ -756,9 +758,9 @@ void psxHwWrite32(u32 add, u32 value) { return; case 0x1f801820: @@ -126,7 +126,7 @@ index 27ddfeab..d7c6ff05 100644 case 0x1f801100: #ifdef PSXHW_LOG -@@ -761,6 +763,7 @@ void psxHwWrite32(u32 add, u32 value) { +@@ -826,6 +828,7 @@ void psxHwWrite32(u32 add, u32 value) { return; } @@ -135,10 +135,10 @@ index 27ddfeab..d7c6ff05 100644 #ifdef PSXHW_LOG PSXHW_LOG("*Unknown 32bit write at address %x value %x\n", add, value); diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c -index f473ddf6..49c4143b 100644 +index 5756bee5..4bf9248d 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c -@@ -237,7 +237,7 @@ static inline void addCycle(psxRegisters *regs) +@@ -238,7 +238,7 @@ static inline void addCycle(psxRegisters *regs) { assert(regs->subCycleStep >= 0x10000); regs->subCycle += regs->subCycleStep; @@ -147,7 +147,7 @@ index f473ddf6..49c4143b 100644 regs->subCycle &= 0xffff; } -@@ -434,7 +434,9 @@ static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) { +@@ -435,7 +435,9 @@ static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) { regs->CP0.n.Target = pc_final; regs->branching = 0; @@ -157,7 +157,7 @@ index f473ddf6..49c4143b 100644 } static void doBranchReg(psxRegisters *regs, u32 tar) { -@@ -959,7 +961,7 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) { +@@ -960,7 +962,7 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) { } } @@ -166,7 +166,15 @@ index f473ddf6..49c4143b 100644 // no exception static inline void psxNULLne(psxRegisters *regs) { -@@ -1167,18 +1169,20 @@ static void intReset() { +@@ -1120,6 +1122,7 @@ OP(psxHLE) { + } + psxHLEt[hleCode](); + branchSeen = 1; ++ psxRegs.cycle -= 2; + } + + static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = { +@@ -1169,18 +1172,20 @@ static void intReset() { static inline void execI_(u8 **memRLUT, psxRegisters *regs) { u32 pc = regs->pc; @@ -189,7 +197,7 @@ index f473ddf6..49c4143b 100644 dloadStep(regs); if (execBreakCheck(regs, pc)) -@@ -1187,6 +1191,8 @@ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { +@@ -1189,6 +1194,8 @@ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { regs->pc += 4; regs->code = fetch(regs, memRLUT, pc); psxBSC[regs->code >> 26](regs, regs->code); @@ -198,7 +206,7 @@ index f473ddf6..49c4143b 100644 } static void intExecute() { -@@ -1216,6 +1222,30 @@ void intExecuteBlock(enum blockExecCaller caller) { +@@ -1218,6 +1225,30 @@ void intExecuteBlock(enum blockExecCaller caller) { execI_(memRLUT, regs_); } @@ -229,7 +237,7 @@ index f473ddf6..49c4143b 100644 static void intClear(u32 Addr, u32 Size) { } -@@ -1244,7 +1274,7 @@ static void setupCop(u32 sr) +@@ -1246,7 +1277,7 @@ static void setupCop(u32 sr) else psxBSC[17] = psxCOPd; if (sr & (1u << 30)) @@ -238,7 +246,7 @@ index f473ddf6..49c4143b 100644 else psxBSC[18] = psxCOPd; if (sr & (1u << 31)) -@@ -1263,7 +1293,7 @@ void intApplyConfig() { +@@ -1265,7 +1296,7 @@ void intApplyConfig() { assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall); assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall); @@ -248,10 +256,10 @@ index f473ddf6..49c4143b 100644 psxBSC[50] = gteLWC2; psxBSC[58] = gteSWC2; diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c -index 54219ae0..41168ced 100644 +index 42755e52..4fa4316b 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c -@@ -278,10 +278,13 @@ void psxMemOnIsolate(int enable) +@@ -289,10 +289,13 @@ void psxMemOnIsolate(int enable) : R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL); } @@ -265,7 +273,7 @@ index 54219ae0..41168ced 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -307,6 +310,7 @@ u16 psxMemRead16(u32 mem) { +@@ -318,6 +321,7 @@ u16 psxMemRead16(u32 mem) { char *p; u32 t; @@ -273,7 +281,7 @@ index 54219ae0..41168ced 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -332,6 +336,7 @@ u32 psxMemRead32(u32 mem) { +@@ -343,6 +347,7 @@ u32 psxMemRead32(u32 mem) { char *p; u32 t; @@ -281,7 +289,7 @@ index 54219ae0..41168ced 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -359,6 +364,7 @@ void psxMemWrite8(u32 mem, u8 value) { +@@ -370,6 +375,7 @@ void psxMemWrite8(u32 mem, u8 value) { char *p; u32 t; @@ -289,7 +297,7 @@ index 54219ae0..41168ced 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -386,6 +392,7 @@ void psxMemWrite16(u32 mem, u16 value) { +@@ -397,6 +403,7 @@ void psxMemWrite16(u32 mem, u16 value) { char *p; u32 t; @@ -297,7 +305,7 @@ index 54219ae0..41168ced 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -413,6 +420,7 @@ void psxMemWrite32(u32 mem, u32 value) { +@@ -424,6 +431,7 @@ void psxMemWrite32(u32 mem, u32 value) { char *p; u32 t; @@ -305,7 +313,7 @@ index 54219ae0..41168ced 100644 // if ((mem&0x1fffff) == 0x71E18 || value == 0x48088800) SysPrintf("t2fix!!\n"); t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { -@@ -431,6 +439,8 @@ void psxMemWrite32(u32 mem, u32 value) { +@@ -442,6 +450,8 @@ void psxMemWrite32(u32 mem, u32 value) { #endif } else { if (mem == 0xfffe0130) { @@ -315,10 +323,10 @@ index 54219ae0..41168ced 100644 return; } diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c -index dffbf6e7..0a3bdb65 100644 +index 48881068..47c40940 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c -@@ -124,6 +124,8 @@ void psxException(u32 cause, enum R3000Abdt bdt, psxCP0Regs *cp0) { +@@ -127,6 +127,8 @@ void psxException(u32 cause, enum R3000Abdt bdt, psxCP0Regs *cp0) { } void psxBranchTest() { From 86c70511f865fa7b01fe0147f1c891b8fbc10a97 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 27 Aug 2023 01:08:45 +0300 Subject: [PATCH 335/597] psxbios: add missing clearing for copy related functions notaz/pcsx_rearmed#300 --- libpcsxcore/misc.c | 11 ++-- libpcsxcore/psxbios.c | 128 +++++++++++++++++++++++++++--------------- 2 files changed, 91 insertions(+), 48 deletions(-) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index ab3e1a486..3d1647102 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -261,7 +261,7 @@ int LoadCdrom() { tmpHead.t_addr = SWAP32(tmpHead.t_addr); psxCpu->Clear(tmpHead.t_addr, tmpHead.t_size / 4); - psxCpu->Reset(); + //psxCpu->Reset(); // Read the rest of the main executable while (tmpHead.t_size & ~2047) { @@ -288,6 +288,9 @@ int LoadCdromFile(const char *filename, EXE_HEADER *head) { u32 size, addr; void *mem; + if (filename == INVALID_PTR) + return -1; + p1 = filename; if ((p2 = strchr(p1, ':'))) p1 = p2 + 1; @@ -311,11 +314,11 @@ int LoadCdromFile(const char *filename, EXE_HEADER *head) { READTRACK(); memcpy(head, buf + 12, sizeof(EXE_HEADER)); - size = head->t_size; - addr = head->t_addr; + size = SWAP32(head->t_size); + addr = SWAP32(head->t_addr); psxCpu->Clear(addr, size / 4); - psxCpu->Reset(); + //psxCpu->Reset(); while (size & ~2047) { incTime(); diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 678377e13..5b7633e7d 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -414,6 +414,7 @@ static void CloseEvent(u32 ev); PSXBIOS_LOG("read %d: %x,%x (%s)\n", FDesc[1 + mcd].mcfile, FDesc[1 + mcd].offset, a2, Mcd##mcd##Data + 128 * FDesc[1 + mcd].mcfile + 0xa); \ ptr = Mcd##mcd##Data + 8192 * FDesc[1 + mcd].mcfile + FDesc[1 + mcd].offset; \ memcpy(Ra1, ptr, length); \ + psxCpu->Clear(a1, (length + 3) / 4); \ if (FDesc[1 + mcd].mode & 0x8000) { \ DeliverEvent(0xf0000011, 0x0004); \ DeliverEvent(0xf4000001, 0x0004); \ @@ -904,33 +905,54 @@ void psxBios_tolower() { // 0x26 pc0 = ra; } -void psxBios_bcopy() { // 0x27 - char *p1 = (char *)Ra1, *p2 = (char *)Ra0; - v0 = a0; - if (a0 == 0 || a2 > 0x7FFFFFFF) - { - pc0 = ra; - return; +static void do_memset(u32 dst, u32 v, s32 len) +{ + u32 d = dst; + s32 l = len; + while (l-- > 0) { + u8 *db = PSXM(d); + if (db != INVALID_PTR) + *db = v; + d++; } - while ((s32)a2-- > 0) *p1++ = *p2++; - a2 = 0; - pc0 = ra; + psxCpu->Clear(dst, (len + 3) / 4); +} + +static void do_memcpy(u32 dst, u32 src, s32 len) +{ + u32 d = dst, s = src; + s32 l = len; + while (l-- > 0) { + const u8 *sb = PSXM(s); + u8 *db = PSXM(d); + if (db != INVALID_PTR && sb != INVALID_PTR) + *db = *sb; + d++; + s++; + } + psxCpu->Clear(dst, (len + 3) / 4); +} + +static void psxBios_memcpy(); + +static void psxBios_bcopy() { // 0x27 + psxBios_memcpy(); // identical } static void psxBios_bzero() { // 0x28 /* Same as memset here (See memset below) */ - u32 ret = a0; + u32 ret = a0, cycles; if (a0 == 0 || (s32)a1 <= 0) { mips_return_c(0, 6); return; } - while ((s32)a1-- > 0) { - storeRam8(a0++, 0); - use_cycles(4); - } + do_memset(a0, 0, a1); + cycles = a1 * 4; + a0 += a1; + a1 = 0; // todo: many more cycles due to uncached bios mem - mips_return_c(ret, 5); + mips_return_c(ret, cycles + 5); } void psxBios_bcmp() { // 0x29 @@ -949,53 +971,70 @@ void psxBios_bcmp() { // 0x29 v0 = 0; pc0 = ra; } -void psxBios_memcpy() { // 0x2a - char *p1 = (char *)Ra0, *p2 = (char *)Ra1; - v0 = a0; - if (a0 == 0 || a2 > 0x7FFFFFFF) +static void psxBios_memcpy() { // 0x2a + u32 ret = a0, cycles = 0; + if (a0 == 0) { - pc0 = ra; + mips_return_c(0, 4); return; } - while ((s32)a2-- > 0) { - *p1++ = *p2++; + if ((s32)a2 > 0) { + do_memcpy(a0, a1, a2); + cycles = a2 * 6; + v1 = a0; + a0 += a2; + a1 += a2; + a2 = 0; } - a2 = 0; - pc0 = ra; + mips_return_c(ret, cycles + 5); } static void psxBios_memset() { // 0x2b - u32 ret = a0; + u32 ret = a0, cycles; if (a0 == 0 || (s32)a2 <= 0) { mips_return_c(0, 6); return; } - while ((s32)a2-- > 0) { - storeRam8(a0++, a1); - use_cycles(4); - } + do_memset(a0, a1, a2); + cycles = a2 * 4; + a0 += a2; + a2 = 0; // todo: many more cycles due to uncached bios mem - mips_return_c(ret, 5); + mips_return_c(ret, cycles + 5); } void psxBios_memmove() { // 0x2c - char *p1 = (char *)Ra0, *p2 = (char *)Ra1; - v0 = a0; - if (a0 == 0 || a2 > 0x7FFFFFFF) + u32 ret = a0, cycles = 0; + if (a0 == 0) { - pc0 = ra; + mips_return_c(0, 4); return; } - if (p2 <= p1 && p2 + a2 > p1) { - a2++; // BUG: copy one more byte here - p1 += a2; - p2 += a2; - while ((s32)a2-- > 0) *--p1 = *--p2; - } else { - while ((s32)a2-- > 0) *p1++ = *p2++; + v1 = a0; + if ((s32)a2 > 0 && a0 > a1 && a0 < a1 + a2) { + u32 dst = a0, len = a2 + 1; + a0 += a2; + a1 += a2; + while ((s32)a2 >= 0) { // BUG: copies one more byte here + const u8 *sb = PSXM(a1); + u8 *db = PSXM(a0); + if (db != INVALID_PTR && sb != INVALID_PTR) + *db = *sb; + a0--; + a1--; + a2--; + } + psxCpu->Clear(dst, (len + 3) / 4); + cycles = 10 + len * 8; + } else if ((s32)a2 > 0) { + do_memcpy(a0, a1, a2); + cycles = a2 * 6; + a0 += a2; + a1 += a2; + a2 = 0; } - pc0 = ra; + mips_return_c(ret, cycles + 5); } void psxBios_memcmp() { // 0x2d @@ -1425,8 +1464,9 @@ void psxBios_Load() { // 0x42 void *pa1; pa1 = Ra1; - if (pa1 && LoadCdromFile(Ra0, &eheader) == 0) { + if (pa1 != INVALID_PTR && LoadCdromFile(Ra0, &eheader) == 0) { memcpy(pa1, ((char*)&eheader)+16, sizeof(EXEC)); + psxCpu->Clear(a1, sizeof(EXEC) / 4); v0 = 1; } else v0 = 0; PSXBIOS_LOG("psxBios_%s: %s, %d -> %d\n", biosA0n[0x42], Ra0, a1, v0); From a5539d92699a2e4b117ba05ee3343513cc5c9f8a Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 27 Aug 2023 02:04:46 +0300 Subject: [PATCH 336/597] drc: allow to execute bios even in hle mode some instructions are now placed there --- libpcsxcore/new_dynarec/new_dynarec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index ede1f93c9..c4be88ff0 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -6340,9 +6340,9 @@ static u_int *get_source_start(u_int addr, u_int *limit) *limit = (addr & 0xa0600000) + 0x00200000; return (u_int *)(psxM + (addr & 0x1fffff)); } - else if (!Config.HLE && ( + else if ( /* (0x9fc00000 <= addr && addr < 0x9fc80000) ||*/ - (0xbfc00000 <= addr && addr < 0xbfc80000))) + (0xbfc00000 <= addr && addr < 0xbfc80000)) { // BIOS. The multiplier should be much higher as it's uncached 8bit mem, // but timings in PCSX are too tied to the interpreter's 2-per-insn assumption From 660b4e517baaedd6424465646ab4d56f4a168ede Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 27 Aug 2023 17:39:54 +0300 Subject: [PATCH 337/597] psxbios: unbreak bcopy wasn't paying attention --- libpcsxcore/psxbios.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 5b7633e7d..17d50446f 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -935,8 +935,23 @@ static void do_memcpy(u32 dst, u32 src, s32 len) static void psxBios_memcpy(); -static void psxBios_bcopy() { // 0x27 - psxBios_memcpy(); // identical +static void psxBios_bcopy() { // 0x27 - memcpy with args swapped + //PSXBIOS_LOG("psxBios_%s %x %x %x\n", biosA0n[0x27], a0, a1, a2); + u32 ret = a0, cycles = 0; + if (a0 == 0) // ...but it checks src this time + { + mips_return_c(0, 4); + return; + } + v1 = a0; + if ((s32)a2 > 0) { + do_memcpy(a1, a0, a2); + cycles = a2 * 6; + a0 += a2; + a1 += a2; + a2 = 0; + } + mips_return_c(ret, cycles + 5); } static void psxBios_bzero() { // 0x28 From fed9fd6f2cba07f2db3715d7f35f911a7b582517 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 27 Aug 2023 18:23:57 +0300 Subject: [PATCH 338/597] psxbios: Load() flushes cache --- libpcsxcore/psxbios.c | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 17d50446f..0f8beb094 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -344,6 +344,11 @@ static void mips_return(u32 val) pc0 = ra; } +static void mips_return_void(void) +{ + pc0 = ra; +} + static void use_cycles(u32 cycle) { psxRegs.cycle += cycle * 2; @@ -993,10 +998,10 @@ static void psxBios_memcpy() { // 0x2a mips_return_c(0, 4); return; } + v1 = a0; if ((s32)a2 > 0) { do_memcpy(a0, a1, a2); cycles = a2 * 6; - v1 = a0; a0 += a2; a1 += a2; a2 = 0; @@ -1470,6 +1475,14 @@ static void psxBios_SystemErrorUnresolvedException() { mips_return_void_c(1000); } +static void FlushCache() { + psxCpu->Notify(R3000ACPU_NOTIFY_CACHE_ISOLATED, NULL); + psxCpu->Notify(R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL); + k0 = 0xbfc0193c; + // runs from uncached mem so tons of cycles + use_cycles(500); +} + /* * long Load(char *name, struct EXEC *header); */ @@ -1482,6 +1495,7 @@ void psxBios_Load() { // 0x42 if (pa1 != INVALID_PTR && LoadCdromFile(Ra0, &eheader) == 0) { memcpy(pa1, ((char*)&eheader)+16, sizeof(EXEC)); psxCpu->Clear(a1, sizeof(EXEC) / 4); + FlushCache(); v0 = 1; } else v0 = 0; PSXBIOS_LOG("psxBios_%s: %s, %d -> %d\n", biosA0n[0x42], Ra0, a1, v0); @@ -1528,13 +1542,10 @@ void psxBios_Exec() { // 43 pc0 = SWAP32(header->_pc0); } -void psxBios_FlushCache() { // 44 -#ifdef PSXBIOS_LOG +static void psxBios_FlushCache() { // 44 PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x44]); -#endif - psxCpu->Notify(R3000ACPU_NOTIFY_CACHE_ISOLATED, NULL); - psxCpu->Notify(R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL); - pc0 = ra; + FlushCache(); + mips_return_void(); } void psxBios_GPU_dw() { // 0x46 @@ -2670,11 +2681,12 @@ void psxBios_InitCARD() { // 4a u32 *ram32 = (u32 *)psxM; PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x4a], a0); write_chain(ram32 + A_PADCRD_CHN_E/4, 0, 0x49bc, 0x4a4c); - // (maybe) todo: early_card_irq, FlushCache etc + // (maybe) todo: early_card_irq, etc ram32[A_PAD_IRQR_ENA/4] = SWAP32(a0); - mips_return_c(0, 300); + psxBios_FlushCache(); + mips_return_c(0, 34+13+15+6); } void psxBios_StartCARD() { // 4b @@ -3086,8 +3098,8 @@ static void write_chain(u32 *d, u32 next, u32 handler1, u32 handler2) d[2] = SWAPu32(handler2); // install the hle traps - PSXMu32ref(handler1) = HLEOP(chain_hle_op(handler1)); - PSXMu32ref(handler2) = HLEOP(chain_hle_op(handler2)); + if (handler1) PSXMu32ref(handler1) = HLEOP(chain_hle_op(handler1)); + if (handler2) PSXMu32ref(handler2) = HLEOP(chain_hle_op(handler2)); } static void setup_tt(u32 tcb_cnt, u32 evcb_cnt) @@ -3575,6 +3587,13 @@ void psxBiosInit() { len = 0x80000 - 0x69d68; uncompress((Bytef *)(psxR + 0x69d68), &len, font_889f, sizeof(font_889f)); + // trap attempts to call bios directly + rom32[0x00000/4] = HLEOP(hleop_dummy); + rom32[0x00180/4] = HLEOP(hleop_dummy); + rom32[0x3fffc/4] = HLEOP(hleop_dummy); + rom32[0x65ffc/4] = HLEOP(hleop_dummy); + rom32[0x7ff2c/4] = HLEOP(hleop_dummy); + /* Some games like R-Types, CTR, Fade to Black read from adress 0x00000000 due to uninitialized pointers. See Garbage Area at Address 00000000h in Nocash PSX Specfications for more information. Here are some examples of games not working with this fix in place : From 23948df34dc798cf21b40bf15d76a8035c6e01f0 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 27 Aug 2023 23:21:36 +0300 Subject: [PATCH 339/597] gpulib: don't corrupt memory on garbage commands --- plugins/gpulib/gpu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index b23f8a88f..9cf5841d9 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -614,6 +614,10 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr log_io(".chain %08lx #%d+%d\n", (long)(list - rambase) * 4, len, gpu.cmd_len); if (unlikely(gpu.cmd_len > 0)) { + if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) { + log_anomaly("cmd_buffer overflow, likely garbage commands\n"); + gpu.cmd_len = 0; + } memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4); gpu.cmd_len += len; flush_cmd_buffer(); From 4d4e34c6eeae1e91e595ab6802652f312a560342 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 27 Aug 2023 23:22:53 +0300 Subject: [PATCH 340/597] psxbios: fix wrong regs in syscall handler --- libpcsxcore/psxbios.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 0f8beb094..4c097db87 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -3755,9 +3755,9 @@ void hleExc0_1_2() // A(90h) - CdromIoIrqFunc1 void hleExc0_2_2_syscall() // not in any A/B/C table { - u32 code = (psxRegs.CP0.n.Cause & 0x3c) >> 2; u32 tcbPtr = loadRam32(A_TT_PCB); TCB *tcb = loadRam32ptr(tcbPtr); + u32 code = SWAP32(tcb->cause) >> 2; if (code != R3000E_Syscall) { if (code != 0) { @@ -3768,9 +3768,9 @@ void hleExc0_2_2_syscall() // not in any A/B/C table return; } - //printf("%s c=%d a0=%d\n", __func__, code, a0); + //printf("%s c=%d a0=%d\n", __func__, code, SWAP32(tcb->reg[4])); tcb->epc += SWAP32(4); - switch (a0) { + switch (SWAP32(tcb->reg[4])) { // a0 case 0: // noop break; @@ -3786,7 +3786,7 @@ void hleExc0_2_2_syscall() // not in any A/B/C table case 3: { // ChangeThreadSubFunction u32 tcbPtr = loadRam32(A_TT_PCB); - storeRam32(tcbPtr, a1); + storeRam32(tcbPtr, SWAP32(tcb->reg[5])); // a1 break; } default: From 7c3332fbf8ea98bf43f5de9e966bbae77dd9f61d Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 28 Aug 2023 23:27:57 +0300 Subject: [PATCH 341/597] psxbios: assorted changes fixes of more bugs, I'm not sure --- libpcsxcore/psxbios.c | 80 +++++++++++++++--------------------- libpcsxcore/psxinterpreter.c | 1 + 2 files changed, 34 insertions(+), 47 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 4c097db87..734f3e397 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -256,7 +256,7 @@ typedef struct { // todo: FileDesc layout is wrong // todo: get rid of these globals static FileDesc FDesc[32]; -static char ffile[64], *pfile; +static char ffile[64]; static int nfile; // fixed RAM offsets, SCPH1001 compatible @@ -293,6 +293,7 @@ static int nfile; #define A_HEAP_SIZE 0x9004 #define A_HEAP_END 0x9008 #define A_HEAP_FLAG 0x900c +#define A_RND_SEED 0x9010 #define A_CD_EVENTS 0xb9b8 #define A_EXC_GP 0xf450 @@ -393,6 +394,10 @@ static inline void softCallInException(u32 pc) { u32 sra = ra; u32 lim = 0; pc0 = pc; + + assert(ra != 0x80001000); + if (ra == 0x80001000) + return; ra = 0x80001000; while (!returned_from_exception() && pc0 != 0x80001000 && ++lim < 1000000) @@ -1080,16 +1085,16 @@ void psxBios_memchr() { // 0x2e v0 = 0; pc0 = ra; } -void psxBios_rand() { // 0x2f - u32 s = psxMu32(0x9010) * 1103515245 + 12345; - v0 = (s >> 16) & 0x7fff; - psxMu32ref(0x9010) = SWAPu32(s); - pc0 = ra; +static void psxBios_rand() { // 0x2f + u32 s = loadRam32(A_RND_SEED) * 1103515245 + 12345; + storeRam32(A_RND_SEED, s); + v1 = s; + mips_return_c((s >> 16) & 0x7fff, 12+37); } -void psxBios_srand() { // 0x30 - psxMu32ref(0x9010) = SWAPu32(a0); - pc0 = ra; +static void psxBios_srand() { // 0x30 + storeRam32(A_RND_SEED, a0); + mips_return_void_c(3); } static u32 qscmpfunc, qswidth; @@ -1571,7 +1576,7 @@ void psxBios_GPU_dw() { // 0x46 void psxBios_mem2vram() { // 0x47 int size; - gpuSyncPluginSR(); + gpuSyncPluginSR(); // flush GPU_writeData(0xa0000000); GPU_writeData((a1<<0x10)|(a0&0xffff)); GPU_writeData((a3<<0x10)|(a2&0xffff)); @@ -1593,8 +1598,8 @@ void psxBios_SendGPU() { // 0x48 } void psxBios_GPU_cw() { // 0x49 - gpuSyncPluginSR(); GPU_writeData(a0); + gpuSyncPluginSR(); v0 = HW_GPU_STATUS; pc0 = ra; } @@ -1882,6 +1887,7 @@ static u32 DeliverEvent(u32 class, u32 spec) { u32 ret = loadRam32(A_TT_EvCB) + evcb_len; u32 i, lim = evcb_len / 0x1c; + //printf("%s %08x %x\n", __func__, class, spec); for (i = 0; i < lim; i++, ev++) { use_cycles(8); if (SWAP32(ev->status) != EvStACTIVE) @@ -2339,9 +2345,7 @@ static void buopen(int mcd, char *ptr, char *cfg) void psxBios_open() { // 0x32 void *pa0 = Ra0; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s: %s,%x\n", biosB0n[0x32], Ra0, a1); -#endif + PSXBIOS_LOG("psxBios_%s %s %x\n", biosB0n[0x32], Ra0, a1); v0 = -1; @@ -2498,7 +2502,9 @@ static size_t strlen_internal(char* p) #define bufile(mcd) { \ size_t size_of_name = strlen_internal(dir->name); \ + v0 = 0; \ while (nfile < 16) { \ + char *pfile = ffile+5; \ int match=1; \ \ ptr = Mcd##mcd##Data + 128 * (nfile + 1); \ @@ -2531,22 +2537,17 @@ static size_t strlen_internal(char* p) * struct DIRENTRY* firstfile(char *name,struct DIRENTRY *dir); */ -void psxBios_firstfile() { // 42 - struct DIRENTRY *dir = (struct DIRENTRY *)Ra1; - void *pa0 = Ra0; +static void psxBios_firstfile() { // 42 + struct DIRENTRY *dir = (struct DIRENTRY *)castRam8ptr(a1); + char *pa0 = castRam8ptr(a0); u32 _dir = a1; char *ptr; int i; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s: %s\n", biosB0n[0x42], Ra0); -#endif - v0 = 0; - if (pa0 != INVALID_PTR) { - strcpy(ffile, pa0); - pfile = ffile+5; + { + snprintf(ffile, sizeof(ffile), "%s", pa0); nfile = 0; if (!strncmp(pa0, "bu00", 4)) { // firstfile() calls _card_read() internally, so deliver it's event @@ -2558,6 +2559,7 @@ void psxBios_firstfile() { // 42 bufile(2); } } + PSXBIOS_LOG("psxBios_%s %s %x -> %x\n", biosB0n[0x42], pa0, a1, v0); pc0 = ra; } @@ -2572,19 +2574,15 @@ void psxBios_nextfile() { // 43 char *ptr; int i; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s: %s\n", biosB0n[0x43], dir->name); -#endif - v0 = 0; if (!strncmp(ffile, "bu00", 4)) { bufile(1); } - - if (!strncmp(ffile, "bu10", 4)) { + else if (!strncmp(ffile, "bu10", 4)) { bufile(2); } + PSXBIOS_LOG("psxBios_%s %s -> %x\n", biosB0n[0x43], dir->name, v0); pc0 = ra; } @@ -3568,9 +3566,6 @@ void psxBiosInit() { memset(FDesc, 0, sizeof(FDesc)); - // initial RNG seed - psxMu32ref(0x9010) = SWAPu32(0xac20cc00); - // somewhat pretend to be a SCPH1001 BIOS // some games look for these and take an exception if they're missing rom32 = (u32 *)psxR; @@ -3668,7 +3663,6 @@ void psxBiosInit() { ram32[0x1000/4] = HLEOP(hleop_dummy); ram32[0x2000/4] = HLEOP(hleop_dummy); ram32[0x3000/4] = HLEOP(hleop_dummy); - ram32[0x4c54/4] = HLEOP(hleop_dummy); // for B12_InitPad? ram32[0x8000/4] = HLEOP(hleop_execret); ram32[A_EEXIT_PTR/4] = SWAP32(A_EEXIT_DEF); @@ -3677,6 +3671,7 @@ void psxBiosInit() { ram32[A_RCNT_VBL_ACK/4 + 1] = SWAP32(1); ram32[A_RCNT_VBL_ACK/4 + 2] = SWAP32(1); ram32[A_RCNT_VBL_ACK/4 + 3] = SWAP32(1); + ram32[A_RND_SEED/4] = SWAPu32(0x24040001); // was 0xac20cc00 } void psxBiosShutdown() { @@ -3757,7 +3752,7 @@ void hleExc0_2_2_syscall() // not in any A/B/C table { u32 tcbPtr = loadRam32(A_TT_PCB); TCB *tcb = loadRam32ptr(tcbPtr); - u32 code = SWAP32(tcb->cause) >> 2; + u32 code = (SWAP32(tcb->cause) & 0x3c) >> 2; if (code != R3000E_Syscall) { if (code != 0) { @@ -3975,21 +3970,12 @@ void psxBiosException() { } #define bfreezes(ptr) bfreeze(ptr, sizeof(ptr)) -#define bfreezel(ptr) bfreeze(ptr, sizeof(*ptr)) - -#define bfreezepsxMptr(ptr, type) { \ - if (Mode == 1) { \ - if (ptr) psxRu32ref(base) = SWAPu32((s8 *)(ptr) - psxM); \ - else psxRu32ref(base) = 0; \ - } else { \ - if (psxRu32(base) != 0) ptr = (type *)(psxM + psxRu32(base)); \ - else (ptr) = NULL; \ - } \ - base += sizeof(u32); \ -} +#define bfreezel(ptr) bfreeze(ptr, sizeof(*(ptr))) void psxBiosFreeze(int Mode) { u32 base = 0x40000; bfreezes(FDesc); + bfreezes(ffile); + bfreezel(&nfile); } diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 5756bee55..e3c5baf44 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -1118,6 +1118,7 @@ OP(psxHLE) { psxSWCx(regs_, code); return; } + dloadFlush(regs_); psxHLEt[hleCode](); branchSeen = 1; } From cfa5a2aff5202aadca7d19c76b61c80cec5b327c Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 28 Aug 2023 23:55:01 +0300 Subject: [PATCH 342/597] some missing error handling pointed out by gcc analyzer --- frontend/libpicofe | 2 +- frontend/main.c | 2 +- libpcsxcore/cheat.c | 3 ++- libpcsxcore/misc.c | 44 +++++++++++++++++++++++++------------------- libpcsxcore/ppf.c | 3 +++ libpcsxcore/sio.c | 2 ++ 6 files changed, 34 insertions(+), 22 deletions(-) diff --git a/frontend/libpicofe b/frontend/libpicofe index 7167e5f33..5dd225ecd 160000 --- a/frontend/libpicofe +++ b/frontend/libpicofe @@ -1 +1 @@ -Subproject commit 7167e5f3376f0d0692ae102ed2df1ef5d2cc199a +Subproject commit 5dd225ecd6d5a04fd8e6f16c8f8ee65ee88c6fed diff --git a/frontend/main.c b/frontend/main.c index 1d0083587..be93282e8 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -736,7 +736,7 @@ static void toggle_fast_forward(int force_off) static void SignalExit(int sig) { // only to restore framebuffer/resolution on some devices plat_finish(); - exit(1); + _exit(1); } #endif diff --git a/libpcsxcore/cheat.c b/libpcsxcore/cheat.c index a016aeec8..7e9dc240c 100644 --- a/libpcsxcore/cheat.c +++ b/libpcsxcore/cheat.c @@ -340,7 +340,6 @@ int AddCheat(const char *descr, char *code) { } } - Cheats[NumCheats].Descr = strdup(descr[0] ? descr : _("(Untitled)")); Cheats[NumCheats].Enabled = 0; Cheats[NumCheats].WasEnabled = 0; Cheats[NumCheats].First = NumCodes; @@ -392,6 +391,7 @@ int AddCheat(const char *descr, char *code) { return -1; } + Cheats[NumCheats].Descr = strdup(descr[0] ? descr : _("(Untitled)")); NumCheats++; return 0; } @@ -400,6 +400,7 @@ void RemoveCheat(int index) { assert(index >= 0 && index < NumCheats); free(Cheats[index].Descr); + Cheats[index].Descr = NULL; while (index < NumCheats - 1) { Cheats[index] = Cheats[index + 1]; diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 3d1647102..702f690fb 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -628,11 +628,12 @@ static const u32 SaveVersion = 0x8b410006; int SaveState(const char *file) { void *f; - GPUFreeze_t *gpufP; - SPUFreezeHdr_t *spufH; - SPUFreeze_t *spufP; + GPUFreeze_t *gpufP = NULL; + SPUFreezeHdr_t spufH; + SPUFreeze_t *spufP = NULL; + unsigned char *pMem = NULL; + int result = -1; int Size; - unsigned char *pMem; f = SaveFuncs.open(file, "wb"); if (f == NULL) return -1; @@ -644,7 +645,7 @@ int SaveState(const char *file) { SaveFuncs.write(f, (void *)&Config.HLE, sizeof(boolean)); pMem = (unsigned char *)malloc(128 * 96 * 3); - if (pMem == NULL) return -1; + if (pMem == NULL) goto cleanup; GPU_getScreenPic(pMem); SaveFuncs.write(f, pMem, 128 * 96 * 3); free(pMem); @@ -660,20 +661,20 @@ int SaveState(const char *file) { // gpu gpufP = (GPUFreeze_t *)malloc(sizeof(GPUFreeze_t)); + if (gpufP == NULL) goto cleanup; gpufP->ulFreezeVersion = 1; GPU_freeze(1, gpufP); SaveFuncs.write(f, gpufP, sizeof(GPUFreeze_t)); - free(gpufP); + free(gpufP); gpufP = NULL; // spu - spufH = malloc(sizeof(*spufH)); - SPU_freeze(2, (SPUFreeze_t *)spufH, psxRegs.cycle); - Size = spufH->Size; SaveFuncs.write(f, &Size, 4); - free(spufH); + SPU_freeze(2, (SPUFreeze_t *)&spufH, psxRegs.cycle); + Size = spufH.Size; SaveFuncs.write(f, &Size, 4); spufP = (SPUFreeze_t *) malloc(Size); + if (spufP == NULL) goto cleanup; SPU_freeze(1, spufP, psxRegs.cycle); SaveFuncs.write(f, spufP, Size); - free(spufP); + free(spufP); spufP = NULL; sioFreeze(f, 1); cdrFreeze(f, 1); @@ -682,19 +683,21 @@ int SaveState(const char *file) { mdecFreeze(f, 1); new_dyna_freeze(f, 1); + result = 0; +cleanup: SaveFuncs.close(f); - - return 0; + return result; } int LoadState(const char *file) { void *f; - GPUFreeze_t *gpufP; - SPUFreeze_t *spufP; + GPUFreeze_t *gpufP = NULL; + SPUFreeze_t *spufP = NULL; int Size; char header[32]; u32 version; boolean hle; + int result = -1; f = SaveFuncs.open(file, "rb"); if (f == NULL) return -1; @@ -704,8 +707,8 @@ int LoadState(const char *file) { SaveFuncs.read(f, &hle, sizeof(boolean)); if (strncmp("STv4 PCSX", header, 9) != 0 || version != SaveVersion) { - SaveFuncs.close(f); - return -1; + SysPrintf("incompatible savestate version %x\n", version); + goto cleanup; } Config.HLE = hle; @@ -726,6 +729,7 @@ int LoadState(const char *file) { // gpu gpufP = (GPUFreeze_t *)malloc(sizeof(GPUFreeze_t)); + if (gpufP == NULL) goto cleanup; SaveFuncs.read(f, gpufP, sizeof(GPUFreeze_t)); GPU_freeze(0, gpufP); free(gpufP); @@ -735,6 +739,7 @@ int LoadState(const char *file) { // spu SaveFuncs.read(f, &Size, 4); spufP = (SPUFreeze_t *)malloc(Size); + if (spufP == NULL) goto cleanup; SaveFuncs.read(f, spufP, Size); SPU_freeze(0, spufP, psxRegs.cycle); free(spufP); @@ -746,9 +751,10 @@ int LoadState(const char *file) { mdecFreeze(f, 0); new_dyna_freeze(f, 0); + result = 0; +cleanup: SaveFuncs.close(f); - - return 0; + return result; } int CheckState(const char *file) { diff --git a/libpcsxcore/ppf.c b/libpcsxcore/ppf.c index 454290d0d..2ce1a9d9a 100644 --- a/libpcsxcore/ppf.c +++ b/libpcsxcore/ppf.c @@ -58,6 +58,7 @@ static void FillPPFCache() { if (iPPFNum <= 0) return; pc = ppfCache = (PPF_CACHE *)malloc(iPPFNum * sizeof(PPF_CACHE)); + if (pc == NULL) return; iPPFNum--; p = ppfHead; @@ -133,6 +134,7 @@ void CheckPPFCache(unsigned char *pB, unsigned char m, unsigned char s, unsigned static void AddToPPF(s32 ladr, s32 pos, s32 anz, unsigned char *ppfmem) { if (ppfHead == NULL) { ppfHead = (PPF_DATA *)malloc(sizeof(PPF_DATA) + anz); + if (ppfHead == NULL) return; ppfHead->addr = ladr; ppfHead->pNext = NULL; ppfHead->pos = pos; @@ -164,6 +166,7 @@ static void AddToPPF(s32 ladr, s32 pos, s32 anz, unsigned char *ppfmem) { } padd = (PPF_DATA *)malloc(sizeof(PPF_DATA) + anz); + if (padd == NULL) return; padd->addr = ladr; padd->pNext = p; padd->pos = pos; diff --git a/libpcsxcore/sio.c b/libpcsxcore/sio.c index 6478338de..7aa669bf5 100644 --- a/libpcsxcore/sio.c +++ b/libpcsxcore/sio.c @@ -678,6 +678,7 @@ void ConvertMcd(char *mcd, char *data) { fclose(f); } f = fopen(mcd, "r+"); + if (f == NULL) return; s = s + 3904; fputc('1', f); s--; fputc('2', f); s--; @@ -712,6 +713,7 @@ void ConvertMcd(char *mcd, char *data) { fclose(f); } f = fopen(mcd, "r+"); + if (f == NULL) return; s = s + 64; fputc('V', f); s--; fputc('g', f); s--; From b23cf6d99d6023da79344b3438278cf3fb35092f Mon Sep 17 00:00:00 2001 From: Julien Loir <6706489+Namaneo@users.noreply.github.com> Date: Mon, 28 Aug 2023 22:46:34 +0200 Subject: [PATCH 343/597] Fix emscripten build --- frontend/plugin.c | 105 ++++++++++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 41 deletions(-) diff --git a/frontend/plugin.c b/frontend/plugin.c index 0bfc2aabc..f77b6e1e8 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -13,15 +13,37 @@ #include "plugin.h" #include "psemu_plugin_defs.h" #include "../libpcsxcore/system.h" +#include "../libpcsxcore/psxcommon.h" #include "../plugins/cdrcimg/cdrcimg.h" // this can't be __stdcall like it was in PSEmu API as too many functions are mixed up #undef CALLBACK #define CALLBACK -static long CALLBACK dummy_func() { - return 0; -} +/* CDR */ +struct CdrStat; +static long CALLBACK CDRinit(void) { return 0; } +static long CALLBACK CDRshutdown(void) { return 0; } +static long CALLBACK CDRopen(void) { return 0; } +static long CALLBACK CDRclose(void) { return 0; } +static long CALLBACK CDRgetTN(unsigned char *_) { return 0; } +static long CALLBACK CDRgetTD(unsigned char _, unsigned char *__) { return 0; } +static boolean CALLBACK CDRreadTrack(unsigned char *_) { return FALSE; } +static unsigned char * CALLBACK CDRgetBuffer(void) { return NULL; } +static unsigned char * CALLBACK CDRgetBufferSub(int sector) { return NULL; } +static long CALLBACK CDRconfigure(void) { return 0; } +static long CALLBACK CDRtest(void) { return 0; } +static void CALLBACK CDRabout(void) { return; } +static long CALLBACK CDRplay(unsigned char *_) { return 0; } +static long CALLBACK CDRstop(void) { return 0; } +static long CALLBACK CDRsetfilename(char *_) { return 0; } +static long CALLBACK CDRgetStatus(struct CdrStat *_) { return 0; } +static char * CALLBACK CDRgetDriveLetter(void) { return NULL; } +static long CALLBACK CDRreadCDDA(unsigned char _, unsigned char __, unsigned char ___, unsigned char *____) { return 0; } +static long CALLBACK CDRgetTE(unsigned char _, unsigned char *__, unsigned char *___, unsigned char *____) { return 0; } + +/* GPU */ +static void CALLBACK GPUdisplayText(char *_) { return; } /* SPU */ extern long CALLBACK SPUopen(void); @@ -40,6 +62,12 @@ extern void CALLBACK SPUasync(unsigned int, unsigned int); extern int CALLBACK SPUplayCDDAchannel(short *, int, unsigned int, int); /* PAD */ +static long CALLBACK PADinit(long _) { return 0; } +static long CALLBACK PADopen(unsigned long *_) { return 0; } +static long CALLBACK PADshutdown(void) { return 0; } +static long CALLBACK PADclose(void) { return 0; } +static void CALLBACK PADsetSensitive(int _) { return; } + static long CALLBACK PADreadPort1(PadDataS *pad) { pad->controllerType = in_type[0]; @@ -78,15 +106,10 @@ extern void GPUvBlank(int, int); extern void GPUrearmedCallbacks(const struct rearmed_cbs *cbs); -#define DUMMY(id, name) \ - { id, #name, dummy_func } - #define DIRECT(id, name) \ { id, #name, name } -#define DUMMY_GPU(name) DUMMY(PLUGIN_GPU, name) -#define DUMMY_CDR(name) DUMMY(PLUGIN_CDR, name) -#define DUMMY_PAD(name) DUMMY(PLUGIN_PAD, name) +#define DIRECT_CDR(name) DIRECT(PLUGIN_CDR, name) #define DIRECT_SPU(name) DIRECT(PLUGIN_SPU, name) #define DIRECT_GPU(name) DIRECT(PLUGIN_GPU, name) #define DIRECT_PAD(name) DIRECT(PLUGIN_PAD, name) @@ -97,25 +120,25 @@ static const struct { void *func; } plugin_funcs[] = { /* CDR */ - DUMMY_CDR(CDRinit), - DUMMY_CDR(CDRshutdown), - DUMMY_CDR(CDRopen), - DUMMY_CDR(CDRclose), - DUMMY_CDR(CDRtest), - DUMMY_CDR(CDRgetTN), - DUMMY_CDR(CDRgetTD), - DUMMY_CDR(CDRreadTrack), - DUMMY_CDR(CDRgetBuffer), - DUMMY_CDR(CDRgetBufferSub), - DUMMY_CDR(CDRplay), - DUMMY_CDR(CDRstop), - DUMMY_CDR(CDRgetStatus), - DUMMY_CDR(CDRgetDriveLetter), - DUMMY_CDR(CDRconfigure), - DUMMY_CDR(CDRabout), - DUMMY_CDR(CDRsetfilename), - DUMMY_CDR(CDRreadCDDA), - DUMMY_CDR(CDRgetTE), + DIRECT_CDR(CDRinit), + DIRECT_CDR(CDRshutdown), + DIRECT_CDR(CDRopen), + DIRECT_CDR(CDRclose), + DIRECT_CDR(CDRtest), + DIRECT_CDR(CDRgetTN), + DIRECT_CDR(CDRgetTD), + DIRECT_CDR(CDRreadTrack), + DIRECT_CDR(CDRgetBuffer), + DIRECT_CDR(CDRgetBufferSub), + DIRECT_CDR(CDRplay), + DIRECT_CDR(CDRstop), + DIRECT_CDR(CDRgetStatus), + DIRECT_CDR(CDRgetDriveLetter), + DIRECT_CDR(CDRconfigure), + DIRECT_CDR(CDRabout), + DIRECT_CDR(CDRsetfilename), + DIRECT_CDR(CDRreadCDDA), + DIRECT_CDR(CDRgetTE), /* SPU */ DIRECT_SPU(SPUinit), DIRECT_SPU(SPUshutdown), @@ -132,21 +155,21 @@ static const struct { DIRECT_SPU(SPUasync), DIRECT_SPU(SPUplayCDDAchannel), /* PAD */ - DUMMY_PAD(PADinit), - DUMMY_PAD(PADshutdown), - DUMMY_PAD(PADopen), - DUMMY_PAD(PADclose), - DUMMY_PAD(PADsetSensitive), + DIRECT_PAD(PADinit), + DIRECT_PAD(PADshutdown), + DIRECT_PAD(PADopen), + DIRECT_PAD(PADclose), + DIRECT_PAD(PADsetSensitive), DIRECT_PAD(PADreadPort1), DIRECT_PAD(PADreadPort2), /* - DUMMY_PAD(PADquery), - DUMMY_PAD(PADconfigure), - DUMMY_PAD(PADtest), - DUMMY_PAD(PADabout), - DUMMY_PAD(PADkeypressed), - DUMMY_PAD(PADstartPoll), - DUMMY_PAD(PADpoll), + DIRECT_PAD(PADquery), + DIRECT_PAD(PADconfigure), + DIRECT_PAD(PADtest), + DIRECT_PAD(PADabout), + DIRECT_PAD(PADkeypressed), + DIRECT_PAD(PADstartPoll), + DIRECT_PAD(PADpoll), */ /* GPU */ DIRECT_GPU(GPUupdateLace), @@ -165,7 +188,7 @@ static const struct { DIRECT_GPU(GPUvBlank), DIRECT_GPU(GPUrearmedCallbacks), - DUMMY_GPU(GPUdisplayText), + DIRECT_GPU(GPUdisplayText), /* DIRECT_GPU(GPUkeypressed), DIRECT_GPU(GPUmakeSnapshot), From 6fb444d65d471c40872f7b559c7fe2520bf5ef47 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 29 Aug 2023 03:28:40 +0300 Subject: [PATCH 344/597] try to unbreak bigendian always struggling with it libretro/pcsx_rearmed#753 --- libpcsxcore/psxhw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index e85d8b5c9..8397f3914 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -43,7 +43,7 @@ void psxHwReset() { void psxHwWriteIstat(u32 value) { - u32 stat = psxHu16(0x1070) & SWAPu16(value); + u32 stat = psxHu16(0x1070) & value; psxHu16ref(0x1070) = SWAPu16(stat); psxRegs.CP0.n.Cause &= ~0x400; @@ -55,7 +55,7 @@ void psxHwWriteImask(u32 value) { u32 stat = psxHu16(0x1070); psxHu16ref(0x1074) = SWAPu16(value); - if (stat & SWAPu16(value)) { + if (stat & value) { //if ((psxRegs.CP0.n.SR & 0x401) == 0x401) // log_unhandled("irq on unmask @%08x\n", psxRegs.pc); new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); From 5c8119b8680a38d4571b8083a2475d3d4649b983 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 30 Aug 2023 00:43:15 +0300 Subject: [PATCH 345/597] psxbios: implement get/setconf --- libpcsxcore/misc.c | 2 +- libpcsxcore/psxbios.c | 82 ++++++++++++++++++++++++++++++------------- libpcsxcore/psxbios.h | 2 +- 3 files changed, 60 insertions(+), 26 deletions(-) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 702f690fb..a16e64287 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -243,7 +243,7 @@ int LoadCdrom() { getFromCnf((char *)buf + 12, "EVENT", &cnf_event); getFromCnf((char *)buf + 12, "STACK", &cnf_stack); if (Config.HLE) - psxBiosCnfLoaded(cnf_tcb, cnf_event); + psxBiosCnfLoaded(cnf_tcb, cnf_event, cnf_stack); // Read the EXE-Header READTRACK(); diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 734f3e397..fbd1af41a 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -294,6 +294,9 @@ static int nfile; #define A_HEAP_END 0x9008 #define A_HEAP_FLAG 0x900c #define A_RND_SEED 0x9010 +#define A_CONF_TCB 0xb940 +#define A_CONF_EvCB 0xb944 +#define A_CONF_SP 0xb948 #define A_CD_EVENTS 0xb9b8 #define A_EXC_GP 0xf450 @@ -1713,6 +1716,23 @@ static void psxBios_CdRemove() { // 56, 72 use_cycles(30); } +static void setup_tt(u32 tcb_cnt, u32 evcb_cnt, u32 stack); + +static void psxBios_SetConf() { // 9c + PSXBIOS_LOG("psxBios_%s %x %x %x\n", biosA0n[0x9c], a0, a1, a2); + setup_tt(a1, a0, a2); + psxRegs.CP0.n.SR |= 0x401; + mips_return_void_c(500); +} + +static void psxBios_GetConf() { // 9d + PSXBIOS_LOG("psxBios_%s %x %x %x\n", biosA0n[0x9d], a0, a1, a2); + storeRam32(a0, loadRam32(A_CONF_EvCB)); + storeRam32(a1, loadRam32(A_CONF_TCB)); + storeRam32(a2, loadRam32(A_CONF_SP)); + mips_return_void_c(10); +} + void psxBios_SetMem() { // 9f u32 new = psxHu32(0x1060); @@ -2077,7 +2097,7 @@ void psxBios_OpenTh() { // 0e mips_return_c(0xffffffff, 20); return; } - PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x0e], th); + PSXBIOS_LOG("psxBios_%s -> %x\n", biosB0n[0x0e], 0xff000000 + th); tcb[th].status = SWAP32(0x4000); tcb[th].mode = SWAP32(0x1000); @@ -2093,21 +2113,15 @@ void psxBios_OpenTh() { // 0e * int CloseTh(long thread); */ -void psxBios_CloseTh() { // 0f - TCB *tcb = loadRam32ptr(A_TT_TCB); - u32 limit = loadRam32(A_TT_TCB + 4) / 0xc0u; - u32 th = a0 & 0xff; +static void psxBios_CloseTh() { // 0f + u32 tcb = loadRam32(A_TT_TCB); + u32 th = a0 & 0xffff; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x0f], th); -#endif - /* The return value is always 1 (even if the handle was already closed). */ - v0 = 1; - if (th < limit && tcb[th].status == SWAP32(0x4000)) { - tcb[th].status = SWAP32(0x1000); - } + PSXBIOS_LOG("psxBios_%s %x\n", biosB0n[0x0f], a0); + // in the usual bios fashion no checks, just write and return 1 + storeRam32(tcb + th * sizeof(TCB), 0x1000); - pc0 = ra; + mips_return_c(1, 11); } /* @@ -3100,12 +3114,21 @@ static void write_chain(u32 *d, u32 next, u32 handler1, u32 handler2) if (handler2) PSXMu32ref(handler2) = HLEOP(chain_hle_op(handler2)); } -static void setup_tt(u32 tcb_cnt, u32 evcb_cnt) +static void setup_tt(u32 tcb_cnt, u32 evcb_cnt, u32 stack) { u32 *ram32 = (u32 *)psxM; - u32 s_excb = 0x20, s_evcb = 0x1c * evcb_cnt; - u32 s_pcb = 4, s_tcb = 0xc0 * tcb_cnt; + u32 s_excb = 0x20, s_evcb, s_pcb = 4, s_tcb; u32 p_excb, p_evcb, p_pcb, p_tcb; + u32 i; + + PSXBIOS_LOG("setup: tcb %u, evcb %u\n", tcb_cnt, evcb_cnt); + + // the real bios doesn't care, but we just don't + // want to crash in case of garbage parameters + if (tcb_cnt > 1024) tcb_cnt = 1024; + if (evcb_cnt > 1024) evcb_cnt = 1024; + s_evcb = 0x1c * evcb_cnt; + s_tcb = 0xc0 * tcb_cnt; memset(ram32 + 0xe000/4, 0, s_excb + s_evcb + s_pcb + s_tcb + 5*4); psxBios_SysInitMemory_(0xa000e000, 0x2000); @@ -3136,6 +3159,8 @@ static void setup_tt(u32 tcb_cnt, u32 evcb_cnt) storeRam32(p_pcb, p_tcb); storeRam32(p_tcb, 0x4000); // first TCB + for (i = 1; i < tcb_cnt; i++) + storeRam32(p_tcb + sizeof(TCB) * i, 0x1000); // default events storeRam32(A_CD_EVENTS + 0x00, OpenEvent(0xf0000003, 0x0010, EvMdMARK, 0)); @@ -3143,7 +3168,10 @@ static void setup_tt(u32 tcb_cnt, u32 evcb_cnt) storeRam32(A_CD_EVENTS + 0x08, OpenEvent(0xf0000003, 0x0040, EvMdMARK, 0)); storeRam32(A_CD_EVENTS + 0x0c, OpenEvent(0xf0000003, 0x0080, EvMdMARK, 0)); storeRam32(A_CD_EVENTS + 0x10, OpenEvent(0xf0000003, 0x8000, EvMdMARK, 0)); - DeliverEvent(0xf0000003, 0x0010); + + storeRam32(A_CONF_EvCB, evcb_cnt); + storeRam32(A_CONF_TCB, tcb_cnt); + storeRam32(A_CONF_SP, stack); } static const u32 gpu_ctl_def[] = { @@ -3411,8 +3439,8 @@ void psxBiosInit() { //biosA0[0x99] = psxBios_EnableKernelIORedirection; //biosA0[0x9a] = psxBios_sys_a0_9a; //biosA0[0x9b] = psxBios_sys_a0_9b; - //biosA0[0x9c] = psxBios_SetConf; - //biosA0[0x9d] = psxBios_GetConf; + biosA0[0x9c] = psxBios_SetConf; + biosA0[0x9d] = psxBios_GetConf; //biosA0[0x9e] = psxBios_sys_a0_9e; biosA0[0x9f] = psxBios_SetMem; //biosA0[0xa0] = psxBios__boot; @@ -3614,7 +3642,8 @@ void psxBiosInit() { ram32[0x00b0/4] = HLEOP(hleop_b0); ram32[0x00c0/4] = HLEOP(hleop_c0); - setup_tt(4, 16); + setup_tt(4, 16, 0x801fff00); + DeliverEvent(0xf0000003, 0x0010); ram32[0x6ee0/4] = SWAPu32(0x0000eff0); // DCB strcpy((char *)&ram32[0xeff0/4], "bu"); @@ -3677,9 +3706,14 @@ void psxBiosInit() { void psxBiosShutdown() { } -void psxBiosCnfLoaded(u32 tcb_cnt, u32 evcb_cnt) { - if (tcb_cnt != 4 || evcb_cnt != 16) - setup_tt(tcb_cnt, evcb_cnt); +void psxBiosCnfLoaded(u32 tcb_cnt, u32 evcb_cnt, u32 stack) { + if (stack == 0) + stack = 0x801FFF00; + if (tcb_cnt != 4 || evcb_cnt != 16) { + setup_tt(tcb_cnt, evcb_cnt, stack); + DeliverEvent(0xf0000003, 0x0010); + } + storeRam32(A_CONF_SP, stack); } #define psxBios_PADpoll(pad) { \ diff --git a/libpcsxcore/psxbios.h b/libpcsxcore/psxbios.h index 2a4fa80a9..4ebbd2b69 100644 --- a/libpcsxcore/psxbios.h +++ b/libpcsxcore/psxbios.h @@ -38,7 +38,7 @@ void psxBiosInit(); void psxBiosShutdown(); void psxBiosException(); void psxBiosFreeze(int Mode); -void psxBiosCnfLoaded(u32 tcbs, u32 events); +void psxBiosCnfLoaded(u32 tcb_cnt, u32 evcb_cnt, u32 sp); void psxBiosSetupBootState(void); extern void (*biosA0[256])(); From 71e413beb2288211f8864040d325de6e1676c413 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 30 Aug 2023 23:30:39 +0300 Subject: [PATCH 346/597] make GPUopen consistent everywhere libretro/pcsx_rearmed#752 --- plugins/gpu-gles/gpuPlugin.c | 2 +- plugins/gpu-gles/gpuPlugin.h | 2 +- plugins/gpu-gles/gpulib_if.c | 4 ++-- plugins/gpulib/gpu.h | 2 +- plugins/gpulib/vout_pl.c | 2 +- plugins/gpulib/vout_sdl.c | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/plugins/gpu-gles/gpuPlugin.c b/plugins/gpu-gles/gpuPlugin.c index 6d3ca14c0..906d01eec 100644 --- a/plugins/gpu-gles/gpuPlugin.c +++ b/plugins/gpu-gles/gpuPlugin.c @@ -453,7 +453,7 @@ return 0; // some PAD or SPU plugins would not work anymore) //////////////////////////////////////////////////////////////////////// -long CALLBACK GPUopen(int hwndGPU) +long CALLBACK GPUopen(unsigned long *disp, char *cap, char *cfg) { iResX=800;iResY=480; iColDepth=8; diff --git a/plugins/gpu-gles/gpuPlugin.h b/plugins/gpu-gles/gpuPlugin.h index 7a72fbd21..556d7f536 100644 --- a/plugins/gpu-gles/gpuPlugin.h +++ b/plugins/gpu-gles/gpuPlugin.h @@ -65,7 +65,7 @@ typedef struct { #if 0 long CALLBACK GPUinit(); long CALLBACK GPUshutdown(); -long CALLBACK GPUopen(int hwndGPU); +long CALLBACK GPUopen(unsigned long *disp, char *cap, char *cfg); long CALLBACK GPUclose(); unsigned long CALLBACK GPUreadData(void); void CALLBACK GPUreadDataMem(unsigned long * pMem, int iSize); diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c index b592175b8..923f652e3 100644 --- a/plugins/gpu-gles/gpulib_if.c +++ b/plugins/gpu-gles/gpulib_if.c @@ -679,7 +679,7 @@ void vout_set_config(const struct rearmed_cbs *cbs) static struct rearmed_cbs *cbs; -long GPUopen(void **dpy) +long GPUopen(unsigned long *disp, char *cap, char *cfg) { int ret; @@ -738,7 +738,7 @@ void renderer_set_config(const struct rearmed_cbs *cbs_) if (is_opened && cbs->gles_display != NULL && cbs->gles_surface != NULL) { // HACK.. GPUclose(); - GPUopen(NULL); + GPUopen(NULL, NULL, NULL); } set_vram(gpu.vram); diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index b6bd60afa..2f7a464c2 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -145,7 +145,7 @@ uint32_t GPUreadStatus(void); void GPUwriteStatus(uint32_t data); long GPUfreeze(uint32_t type, struct GPUFreeze *freeze); void GPUupdateLace(void); -long GPUopen(void **dpy); +long GPUopen(unsigned long *disp, char *cap, char *cfg); long GPUclose(void); void GPUvBlank(int is_vblank, int lcf); void GPUrearmedCallbacks(const struct rearmed_cbs *cbs_); diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index eadf57ce6..cae35a3f2 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -114,7 +114,7 @@ void vout_blank(void) cbs->pl_vout_flip(NULL, 1024, !!(gpu.status & PSX_GPU_STATUS_RGB24), 0, 0, w, h, 0); } -long GPUopen(void **unused) +long GPUopen(unsigned long *disp, char *cap, char *cfg) { gpu.frameskip.active = 0; gpu.frameskip.frame_ready = 1; diff --git a/plugins/gpulib/vout_sdl.c b/plugins/gpulib/vout_sdl.c index 56ab811c0..81272b293 100644 --- a/plugins/gpulib/vout_sdl.c +++ b/plugins/gpulib/vout_sdl.c @@ -81,9 +81,9 @@ void vout_blank(void) { } -long GPUopen(void **dpy) +long GPUopen(unsigned long *disp, char *cap, char *cfg) { - *dpy = x11_display; + *disp = (long)x11_display; return 0; } From a830538149ed1216279407c85061b9937444ad1c Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 31 Aug 2023 23:32:18 +0300 Subject: [PATCH 347/597] frontend: don't frameskip on fast forward It just glitches everything. Standalone only, libretro does it's own thing. --- frontend/main.c | 8 ++++---- frontend/menu.c | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/frontend/main.c b/frontend/main.c index be93282e8..092a844a1 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -703,8 +703,8 @@ static void toggle_fast_forward(int force_off) { static int fast_forward; static int normal_g_opts; - static int normal_frameskip; static int normal_enhancement_enable; + //static int normal_frameskip; if (force_off && !fast_forward) return; @@ -712,16 +712,16 @@ static void toggle_fast_forward(int force_off) fast_forward = !fast_forward; if (fast_forward) { normal_g_opts = g_opts; - normal_frameskip = pl_rearmed_cbs.frameskip; + //normal_frameskip = pl_rearmed_cbs.frameskip; normal_enhancement_enable = pl_rearmed_cbs.gpu_neon.enhancement_enable; g_opts |= OPT_NO_FRAMELIM; - pl_rearmed_cbs.frameskip = 3; + // pl_rearmed_cbs.frameskip = 3; // too broken pl_rearmed_cbs.gpu_neon.enhancement_enable = 0; } else { g_opts = normal_g_opts; - pl_rearmed_cbs.frameskip = normal_frameskip; + //pl_rearmed_cbs.frameskip = normal_frameskip; pl_rearmed_cbs.gpu_neon.enhancement_enable = normal_enhancement_enable; diff --git a/frontend/menu.c b/frontend/menu.c index 901c72d51..ee60df6f9 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -91,7 +91,8 @@ typedef enum } menu_id; static int last_vout_w, last_vout_h, last_vout_bpp; -static int cpu_clock, cpu_clock_st, volume_boost, frameskip; +static int cpu_clock, cpu_clock_st, volume_boost; +static int frameskip = 1; // 0 - auto, 1 - off static char last_selected_fname[MAXPATHLEN]; static int config_save_counter, region, in_type_sel1, in_type_sel2; static int psx_clock; @@ -337,7 +338,7 @@ static void menu_set_defconfig(void) g_scaler = SCALE_4_3; g_gamma = 100; volume_boost = 0; - frameskip = 0; + frameskip = 1; // 1 - off analog_deadzone = 50; soft_scaling = 1; soft_filter = 0; @@ -424,7 +425,7 @@ static const struct { CE_INTVAL(g_autostateld_opt), CE_INTVAL_N("adev0_is_nublike", in_adev_is_nublike[0]), CE_INTVAL_N("adev1_is_nublike", in_adev_is_nublike[1]), - CE_INTVAL_V(frameskip, 3), + CE_INTVAL_V(frameskip, 4), CE_INTVAL_P(gpu_peops.iUseDither), CE_INTVAL_P(gpu_peops.dwActFixes), CE_INTVAL_P(gpu_unai.lineskip), From 47c15995b0a92b55272accea2b4033bc4872c46c Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 31 Aug 2023 23:51:19 +0300 Subject: [PATCH 348/597] gpu: handle wrapping somewhat instead of crashing outright --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 22 ++++++++++++++++++++-- plugins/gpu_neon/psx_gpu_if.c | 1 + 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 370d8f2a6..af24e7703 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -260,8 +260,8 @@ u32 invalidate_texture_cache_region_viewport(psx_gpu_struct *psx_gpu, u32 x1, return mask; } -void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1, - u32 x2, u32 y2) +static void update_texture_cache_region_(psx_gpu_struct *psx_gpu, + u32 x1, u32 y1, u32 x2, u32 y2) { u32 mask = texture_region_mask(x1, y1, x2, y2); u32 texture_page; @@ -313,6 +313,22 @@ void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1, } } +void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1, + u32 x2, u32 y2) +{ + s32 w = x2 - x1; + do + { + x2 = x1 + w; + if (x2 > 1023) + x2 = 1023; + update_texture_cache_region_(psx_gpu, x1, y1, x2, y2); + w -= x2 - x1; + x1 = 0; + } + while (unlikely(w > 0)); +} + #ifndef NEON_BUILD void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu) @@ -5057,3 +5073,5 @@ void triangle_benchmark(psx_gpu_struct *psx_gpu) #endif #include "psx_gpu_4x.c" + +// vim:ts=2:sw=2:expandtab diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index 69a2a1bbc..4a8b76fcc 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -151,6 +151,7 @@ sync_enhancement_buffers(int x, int y, int w, int h) } x += (w1 + s) * step_x; + x &= 0x3ff; x_buf = 0; } } From f7cfdeaf523c698f962812f171822d801d042f23 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 31 Aug 2023 23:50:09 +0300 Subject: [PATCH 349/597] psxbios: rework firstfile/nextfile --- libpcsxcore/psxbios.c | 125 ++++++++++++++++++++++-------------------- 1 file changed, 65 insertions(+), 60 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index fbd1af41a..dc726e481 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -2504,47 +2504,61 @@ void psxBios_puts() { // 3e/3f pc0 = ra; } +static void bufile(const u8 *mcd_data, u32 dir_) { + struct DIRENTRY *dir = (struct DIRENTRY *)castRam8ptr(dir_); + const char *pfile = ffile + 5; + const u8 *data = mcd_data; + int i = 0, match = 0; + int blocks = 1; + u32 head = 0; -/* To avoid any issues with different behaviour when using the libc's own strlen instead. - * We want to mimic the PSX's behaviour in this case for bufile. */ -static size_t strlen_internal(char* p) -{ - size_t size_of_array = 0; - while (*p++) size_of_array++; - return size_of_array; -} - -#define bufile(mcd) { \ - size_t size_of_name = strlen_internal(dir->name); \ - v0 = 0; \ - while (nfile < 16) { \ - char *pfile = ffile+5; \ - int match=1; \ - \ - ptr = Mcd##mcd##Data + 128 * (nfile + 1); \ - nfile++; \ - if ((*ptr & 0xF0) != 0x50) continue; \ - /* Bug link files show up as free block. */ \ - if (!ptr[0xa]) continue; \ - ptr+= 0xa; \ - if (pfile[0] == 0) { \ - strncpy(dir->name, ptr, sizeof(dir->name) - 1); \ - if (size_of_name < sizeof(dir->name)) dir->name[size_of_name] = '\0'; \ - } else for (i=0; i<20; i++) { \ - if (pfile[i] == ptr[i]) { \ - dir->name[i] = ptr[i]; continue; } \ - if (pfile[i] == '?') { \ - dir->name[i] = ptr[i]; continue; } \ - if (pfile[i] == '*') { \ - strcpy(dir->name+i, ptr+i); break; } \ - match = 0; break; \ - } \ - PSXBIOS_LOG("%d : %s = %s + %s (match=%d)\n", nfile, dir->name, pfile, ptr, match); \ - if (match == 0) { continue; } \ - dir->size = 8192; \ - v0 = _dir; \ - break; \ - } \ + v0 = 0; + for (; nfile <= 15 && !match; nfile++) { + const char *name; + + head = nfile * 0x40; + data = mcd_data + 128 * nfile; + name = (const char *)data + 0x0a; + if ((data[0] & 0xF0) != 0x50) continue; + /* Bug link files show up as free block. */ + if (!name[0]) continue; + match = 1; + for (i = 0; i < 20; i++) { + if (pfile[i] == name[i] || pfile[i] == '?') + dir->name[i] = name[i]; + else if (pfile[i] == '*') { + int len = strlen(name + i); + if (i + len > 20) + len = 20 - i; + memcpy(dir->name + i, name + i, len + 1); + i += len; + break; + } + else { + match = 0; + break; + } + if (!name[i]) + break; + } + PSXBIOS_LOG("%d : %s = %s + %s (match=%d)\n", + nfile, dir->name, pfile, name, match); + } + for (; nfile <= 15; nfile++, blocks++) { + const u8 *data2 = mcd_data + 128 * nfile; + const char *name = data2 + 0x0a; + if ((data2[0] & 0xF0) != 0x50 || name[0]) + break; + } + if (match) { + // nul char of full lenth name seems to overwrite .attr + dir->attr = SWAP32(i < 20 ? data[0] & 0xf0 : 0); // ? + dir->size = 8192 * blocks; + dir->head = head; + v0 = dir_; + } + PSXBIOS_LOG(" -> %x '%s' %x %x %x %x\n", v0, v0 ? dir->name : "", + dir->attr, dir->size, dir->next, dir->head); } /* @@ -2552,28 +2566,26 @@ static size_t strlen_internal(char* p) */ static void psxBios_firstfile() { // 42 - struct DIRENTRY *dir = (struct DIRENTRY *)castRam8ptr(a1); char *pa0 = castRam8ptr(a0); - u32 _dir = a1; - char *ptr; - int i; + PSXBIOS_LOG("psxBios_%s %s %x\n", biosB0n[0x42], pa0, a1); v0 = 0; { snprintf(ffile, sizeof(ffile), "%s", pa0); - nfile = 0; + if (ffile[5] == 0) + strcpy(ffile + 5, "*"); // maybe? + nfile = 1; if (!strncmp(pa0, "bu00", 4)) { // firstfile() calls _card_read() internally, so deliver it's event DeliverEvent(0xf0000011, 0x0004); - bufile(1); + bufile(Mcd1Data, a1); } else if (!strncmp(pa0, "bu10", 4)) { // firstfile() calls _card_read() internally, so deliver it's event DeliverEvent(0xf0000011, 0x0004); - bufile(2); + bufile(Mcd2Data, a1); } } - PSXBIOS_LOG("psxBios_%s %s %x -> %x\n", biosB0n[0x42], pa0, a1, v0); pc0 = ra; } @@ -2583,20 +2595,13 @@ static void psxBios_firstfile() { // 42 */ void psxBios_nextfile() { // 43 - struct DIRENTRY *dir = (struct DIRENTRY *)Ra0; - u32 _dir = a0; - char *ptr; - int i; + PSXBIOS_LOG("psxBios_%s %x\n", biosB0n[0x43], a0); v0 = 0; - - if (!strncmp(ffile, "bu00", 4)) { - bufile(1); - } - else if (!strncmp(ffile, "bu10", 4)) { - bufile(2); - } - PSXBIOS_LOG("psxBios_%s %s -> %x\n", biosB0n[0x43], dir->name, v0); + if (!strncmp(ffile, "bu00", 4)) + bufile(Mcd1Data, a0); + else if (!strncmp(ffile, "bu10", 4)) + bufile(Mcd2Data, a0); pc0 = ra; } From 2646cb48cd02fcd2d09a1882e276bb8fb03b2e14 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 1 Sep 2023 01:29:28 +0300 Subject: [PATCH 350/597] gpu_neon: more complicated overflow check --- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 4dd21e732..c7562993a 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -972,9 +972,13 @@ static int disable_main_render; static int check_enhanced_range(psx_gpu_struct *psx_gpu, int x, int x_end) { - // simple reject to avoid oveflowing the 1024 width + // reject to avoid oveflowing the 1024 width // (assume some offscreen render-to-texture thing) - if (x >= (int)(psx_gpu->saved_viewport_start_x + 512)) + int fb_index; + if (x < 0) + return 1; + fb_index = select_enhancement_buf_index(psx_gpu, x); + if (x >= psx_gpu->enhancement_buf_start[fb_index] + 512) return 0; return 1; From 1da9b9ae28406f3bec5b2bd5905783971b991bec Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 2 Sep 2023 22:54:16 +0300 Subject: [PATCH 351/597] initial hle support for lightrec --- frontend/libretro.c | 7 +------ libpcsxcore/new_dynarec/emu_if.c | 3 ++- libpcsxcore/psxbios.c | 10 ++++++++++ libpcsxcore/r3000a.h | 2 +- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index a115fc675..b1af9f15a 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -1948,18 +1948,13 @@ static void update_variables(bool in_flight) { R3000Acpu *prev_cpu = psxCpu; -#if defined(LIGHTREC) - bool can_use_dynarec = found_bios; -#else - bool can_use_dynarec = 1; -#endif #ifdef _3DS if (!__ctr_svchax) Config.Cpu = CPU_INTERPRETER; else #endif - if (strcmp(var.value, "disabled") == 0 || !can_use_dynarec) + if (strcmp(var.value, "disabled") == 0) Config.Cpu = CPU_INTERPRETER; else if (strcmp(var.value, "enabled") == 0) Config.Cpu = CPU_DYNAREC; diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index f879ad8cb..e21003c02 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -297,7 +297,8 @@ static void ari64_notify(enum R3000Anote note, void *data) { case R3000ACPU_NOTIFY_BEFORE_SAVE: break; case R3000ACPU_NOTIFY_AFTER_LOAD: - ari64_reset(); + if (data == NULL) + ari64_reset(); psxInt.Notify(note, data); break; } diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index dc726e481..fdac5562f 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -47,6 +47,8 @@ #define PSXBIOS_LOG(...) #endif +#define PTR_1 (void *)(size_t)1 + char *biosA0n[256] = { // 0x00 "open", "lseek", "read", "write", @@ -384,9 +386,13 @@ static inline void softCall(u32 pc) { ra = 0x80001000; psxRegs.CP0.n.SR &= ~0x404; // disable interrupts + psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, PTR_1); + while (pc0 != 0x80001000 && ++lim < 1000000) psxCpu->ExecuteBlock(EXEC_CALLER_HLE); + psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, PTR_1); + if (lim == 1000000) PSXBIOS_LOG("softCall @%x hit lim\n", pc); ra = sra; @@ -403,9 +409,13 @@ static inline void softCallInException(u32 pc) { return; ra = 0x80001000; + psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, PTR_1); + while (!returned_from_exception() && pc0 != 0x80001000 && ++lim < 1000000) psxCpu->ExecuteBlock(EXEC_CALLER_HLE); + psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, PTR_1); + if (lim == 1000000) PSXBIOS_LOG("softCallInException @%x hit lim\n", pc); if (pc0 == 0x80001000) diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 912a41f6d..fb5e1db63 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -45,7 +45,7 @@ enum R3000Aexception { enum R3000Anote { R3000ACPU_NOTIFY_CACHE_ISOLATED = 0, R3000ACPU_NOTIFY_CACHE_UNISOLATED = 1, - R3000ACPU_NOTIFY_BEFORE_SAVE, + R3000ACPU_NOTIFY_BEFORE_SAVE, // data arg - hle if non-null R3000ACPU_NOTIFY_AFTER_LOAD, }; From 11d23573173ec4b5074eb35665c6012a46034d5c Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 4 Sep 2023 23:09:05 +0300 Subject: [PATCH 352/597] preliminary irq10 support libretro/pcsx_rearmed#723 --- libpcsxcore/cdrom.c | 2 - libpcsxcore/new_dynarec/events.c | 6 +-- libpcsxcore/new_dynarec/pcsxmem.c | 28 ++++++----- libpcsxcore/psxbios.c | 10 ++-- libpcsxcore/psxcounters.c | 80 ++++++++++++++++++++++++++----- libpcsxcore/psxcounters.h | 4 +- libpcsxcore/psxhw.c | 12 ++--- libpcsxcore/r3000a.c | 45 +++++++++++++++++ libpcsxcore/r3000a.h | 5 +- 9 files changed, 151 insertions(+), 41 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 90ec0d30c..28358bf65 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1711,8 +1711,6 @@ int cdrFreeze(void *f, int Mode) { Find_CurTrack(cdr.SetSectorPlay); if (!Config.Cdda) CDR_play(cdr.SetSectorPlay); - if (psxRegs.interrupt & (1 << PSXINT_CDRPLAY_OLD)) - CDRPLAYREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime, 1); } if ((cdr.freeze_ver & 0xffffff00) != 0x63647200) { diff --git a/libpcsxcore/new_dynarec/events.c b/libpcsxcore/new_dynarec/events.c index 2a06c1f96..b1d427c94 100644 --- a/libpcsxcore/new_dynarec/events.c +++ b/libpcsxcore/new_dynarec/events.c @@ -30,10 +30,6 @@ void schedule_timeslice(void) next_interupt = c + min; } -static void unusedInterrupt() -{ -} - typedef void (irq_func)(); static irq_func * const irq_funcs[] = { @@ -47,7 +43,7 @@ static irq_func * const irq_funcs[] = { [PSXINT_GPUOTCDMA] = gpuotcInterrupt, [PSXINT_CDRDMA] = cdrDmaInterrupt, [PSXINT_CDRLID] = cdrLidSeekInterrupt, - [PSXINT_CDRPLAY_OLD] = unusedInterrupt, + [PSXINT_IRQ10] = irq10Interrupt, [PSXINT_SPU_UPDATE] = spuUpdate, [PSXINT_RCNT] = psxRcntUpdate, }; diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index 87aa17c54..8f79c50ae 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -101,7 +101,11 @@ static void io_write_sio32(u32 value) static void map_rcnt_rcount0(u32 mode) { - if (mode & 0x100) { // pixel clock + if (mode & 0x001) { // sync mode + map_item(&mem_iortab[IOMEM32(0x1100)], psxRcntRcount0, 1); + map_item(&mem_iortab[IOMEM16(0x1100)], psxRcntRcount0, 1); + } + else if (mode & 0x100) { // pixel clock map_item(&mem_iortab[IOMEM32(0x1100)], rcnt0_read_count_m1, 1); map_item(&mem_iortab[IOMEM16(0x1100)], rcnt0_read_count_m1, 1); } @@ -113,7 +117,11 @@ static void map_rcnt_rcount0(u32 mode) static void map_rcnt_rcount1(u32 mode) { - if (mode & 0x100) { // hcnt + if (mode & 0x001) { // sync mode + map_item(&mem_iortab[IOMEM32(0x1110)], psxRcntRcount1, 1); + map_item(&mem_iortab[IOMEM16(0x1110)], psxRcntRcount1, 1); + } + else if (mode & 0x100) { // hcnt map_item(&mem_iortab[IOMEM32(0x1110)], rcnt1_read_count_m1, 1); map_item(&mem_iortab[IOMEM16(0x1110)], rcnt1_read_count_m1, 1); } @@ -125,7 +133,7 @@ static void map_rcnt_rcount1(u32 mode) static void map_rcnt_rcount2(u32 mode) { - if (mode & 0x01) { // gate + if ((mode & 7) == 1 || (mode & 7) == 7) { // sync mode map_item(&mem_iortab[IOMEM32(0x1120)], &psxH[0x1000], 0); map_item(&mem_iortab[IOMEM16(0x1120)], &psxH[0x1000], 0); } @@ -146,7 +154,6 @@ static void map_rcnt_rcount2(u32 mode) #endif #define make_rcnt_funcs(i) \ -static u32 io_rcnt_read_count##i() { return psxRcntRcount(i); } \ static u32 io_rcnt_read_mode##i() { return psxRcntRmode(i); } \ static u32 io_rcnt_read_target##i() { return psxRcntRtarget(i); } \ static void io_rcnt_write_count##i(u32 val) { psxRcntWcount(i, val & 0xffff); } \ @@ -348,14 +355,13 @@ void new_dyna_pcsx_mem_init(void) } map_item(&mem_iortab[IOMEM32(0x1040)], io_read_sio32, 1); - map_item(&mem_iortab[IOMEM16(0x1044)], sioReadStat16, 1); - map_item(&mem_iortab[IOMEM32(0x1100)], io_rcnt_read_count0, 1); + map_item(&mem_iortab[IOMEM32(0x1100)], psxRcntRcount0, 1); map_item(&mem_iortab[IOMEM32(0x1104)], io_rcnt_read_mode0, 1); map_item(&mem_iortab[IOMEM32(0x1108)], io_rcnt_read_target0, 1); - map_item(&mem_iortab[IOMEM32(0x1110)], io_rcnt_read_count1, 1); + map_item(&mem_iortab[IOMEM32(0x1110)], psxRcntRcount1, 1); map_item(&mem_iortab[IOMEM32(0x1114)], io_rcnt_read_mode1, 1); map_item(&mem_iortab[IOMEM32(0x1118)], io_rcnt_read_target1, 1); - map_item(&mem_iortab[IOMEM32(0x1120)], io_rcnt_read_count2, 1); + map_item(&mem_iortab[IOMEM32(0x1120)], psxRcntRcount2, 1); map_item(&mem_iortab[IOMEM32(0x1124)], io_rcnt_read_mode2, 1); map_item(&mem_iortab[IOMEM32(0x1128)], io_rcnt_read_target2, 1); // map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); @@ -368,13 +374,13 @@ void new_dyna_pcsx_mem_init(void) map_item(&mem_iortab[IOMEM16(0x1048)], sioReadMode16, 1); map_item(&mem_iortab[IOMEM16(0x104a)], sioReadCtrl16, 1); map_item(&mem_iortab[IOMEM16(0x104e)], sioReadBaud16, 1); - map_item(&mem_iortab[IOMEM16(0x1100)], io_rcnt_read_count0, 1); + map_item(&mem_iortab[IOMEM16(0x1100)], psxRcntRcount0, 1); map_item(&mem_iortab[IOMEM16(0x1104)], io_rcnt_read_mode0, 1); map_item(&mem_iortab[IOMEM16(0x1108)], io_rcnt_read_target0, 1); - map_item(&mem_iortab[IOMEM16(0x1110)], io_rcnt_read_count1, 1); + map_item(&mem_iortab[IOMEM16(0x1110)], psxRcntRcount1, 1); map_item(&mem_iortab[IOMEM16(0x1114)], io_rcnt_read_mode1, 1); map_item(&mem_iortab[IOMEM16(0x1118)], io_rcnt_read_target1, 1); - map_item(&mem_iortab[IOMEM16(0x1120)], io_rcnt_read_count2, 1); + map_item(&mem_iortab[IOMEM16(0x1120)], psxRcntRcount2, 1); map_item(&mem_iortab[IOMEM16(0x1124)], io_rcnt_read_mode2, 1); map_item(&mem_iortab[IOMEM16(0x1128)], io_rcnt_read_target2, 1); diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index fdac5562f..19e0fe415 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -1869,9 +1869,13 @@ void psxBios_GetRCnt() { // 03 PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x03]); #endif - a0&= 0x3; - if (a0 != 3) v0 = psxRcntRcount(a0); - else v0 = 0; + switch (a0 & 0x3) + { + case 0: v0 = psxRcntRcount0(); break; + case 1: v0 = psxRcntRcount1(); break; + case 2: v0 = psxRcntRcount2(); break; + case 3: v0 = 0; break; + } pc0 = ra; } diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index 18bd6a4e3..388fb89da 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -30,11 +30,12 @@ enum { - Rc0Gate = 0x0001, // 0 not implemented - Rc1Gate = 0x0001, // 0 not implemented - Rc2Disable = 0x0001, // 0 partially implemented - RcUnknown1 = 0x0002, // 1 ? - RcUnknown2 = 0x0004, // 2 ? + RcSyncModeEnable = 0x0001, // 0 + Rc01BlankPause = 0 << 1, // 1,2 + Rc01UnblankReset = 1 << 1, // 1,2 + Rc01UnblankReset2 = 2 << 1, // 1,2 + Rc2Stop = 0 << 1, // 1,2 + Rc2Stop2 = 3 << 1, // 1,2 RcCountToTarget = 0x0008, // 3 RcIrqOnTarget = 0x0010, // 4 RcIrqOnOverflow = 0x0020, // 5 @@ -187,7 +188,8 @@ void _psxRcntWmode( u32 index, u32 value ) } // TODO: wcount must work. - if( value & Rc2Disable ) + if( (value & 7) == (RcSyncModeEnable | Rc2Stop) || + (value & 7) == (RcSyncModeEnable | Rc2Stop2) ) { rcnts[index].rate = 0xffffffff; } @@ -315,14 +317,26 @@ static void scheduleRcntBase(void) void psxRcntUpdate() { - u32 cycle; + u32 cycle, cycles_passed; cycle = psxRegs.cycle; // rcnt 0. - while( cycle - rcnts[0].cycleStart >= rcnts[0].cycle ) + cycles_passed = cycle - rcnts[0].cycleStart; + while( cycles_passed >= rcnts[0].cycle ) { - psxRcntReset( 0 ); + if (((rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset) || + (rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset2)) + && cycles_passed > PSXCLK / 60 / 263) + { + u32 q = cycles_passed / (PSXCLK / 60 / 263 + 1u); + rcnts[0].cycleStart += q * (PSXCLK / 60) / 263u; + break; + } + else + psxRcntReset( 0 ); + + cycles_passed = cycle - rcnts[0].cycleStart; } // rcnt 1. @@ -361,7 +375,7 @@ void psxRcntUpdate() // Update lace. if( hSyncCount >= HSyncTotal[Config.PsxType] ) { - u32 status, field = 0; + u32 status, field = 0, i; rcnts[3].cycleStart += Config.PsxType ? PSXCLK / 50 : PSXCLK / 60; hSyncCount = 0; frame_counter++; @@ -375,6 +389,15 @@ void psxRcntUpdate() } HW_GPU_STATUS = SWAP32(status); GPU_vBlank(0, field); + + for (i = 0; i < 2; i++) + { + if ((rcnts[i].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset) || + (rcnts[i].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset2)) + { + rcnts[i].cycleStart = rcnts[3].cycleStart; + } + } } scheduleRcntBase(); @@ -420,13 +443,46 @@ void psxRcntWtarget( u32 index, u32 value ) /******************************************************************************/ -u32 psxRcntRcount( u32 index ) +u32 psxRcntRcount0() +{ + u32 index = 0; + u32 count; + + if ((rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset) || + (rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset2)) + { + count = psxRegs.cycle - rcnts[index].cycleStart; + count = ((16u * count) % (16u * PSXCLK / 60 / 263)) / 16u; + rcnts[index].cycleStart = psxRegs.cycle - count; + } + else + count = _psxRcntRcount( index ); + + verboseLog( 2, "[RCNT 0] rcount: %04x m: %04x\n", count, rcnts[index].mode); + + return count; +} + +u32 psxRcntRcount1() +{ + u32 index = 1; + u32 count; + + count = _psxRcntRcount( index ); + + verboseLog( 2, "[RCNT 1] rcount: %04x m: %04x\n", count, rcnts[index].mode); + + return count; +} + +u32 psxRcntRcount2() { + u32 index = 2; u32 count; count = _psxRcntRcount( index ); - verboseLog( 2, "[RCNT %i] rcount: %x\n", index, count ); + verboseLog( 2, "[RCNT 2] rcount: %04x m: %04x\n", count, rcnts[index].mode); return count; } diff --git a/libpcsxcore/psxcounters.h b/libpcsxcore/psxcounters.h index 4b7b6b41c..03cd46843 100644 --- a/libpcsxcore/psxcounters.h +++ b/libpcsxcore/psxcounters.h @@ -48,7 +48,9 @@ void psxRcntWcount(u32 index, u32 value); void psxRcntWmode(u32 index, u32 value); void psxRcntWtarget(u32 index, u32 value); -u32 psxRcntRcount(u32 index); +u32 psxRcntRcount0(); +u32 psxRcntRcount1(); +u32 psxRcntRcount2(); u32 psxRcntRmode(u32 index); u32 psxRcntRtarget(u32 index); diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index 8397f3914..fb365c07d 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -175,7 +175,7 @@ u16 psxHwRead16(u32 add) { return 0x80; case 0x1f801100: - hard = psxRcntRcount(0); + hard = psxRcntRcount0(); #ifdef PSXHW_LOG PSXHW_LOG("T0 count read16: %x\n", hard); #endif @@ -193,7 +193,7 @@ u16 psxHwRead16(u32 add) { #endif return hard; case 0x1f801110: - hard = psxRcntRcount(1); + hard = psxRcntRcount1(); #ifdef PSXHW_LOG PSXHW_LOG("T1 count read16: %x\n", hard); #endif @@ -211,7 +211,7 @@ u16 psxHwRead16(u32 add) { #endif return hard; case 0x1f801120: - hard = psxRcntRcount(2); + hard = psxRcntRcount2(); #ifdef PSXHW_LOG PSXHW_LOG("T2 count read16: %x\n", hard); #endif @@ -346,7 +346,7 @@ u32 psxHwRead32(u32 add) { // time for rootcounters :) case 0x1f801100: - hard = psxRcntRcount(0); + hard = psxRcntRcount0(); #ifdef PSXHW_LOG PSXHW_LOG("T0 count read32: %x\n", hard); #endif @@ -364,7 +364,7 @@ u32 psxHwRead32(u32 add) { #endif return hard; case 0x1f801110: - hard = psxRcntRcount(1); + hard = psxRcntRcount1(); #ifdef PSXHW_LOG PSXHW_LOG("T1 count read32: %x\n", hard); #endif @@ -382,7 +382,7 @@ u32 psxHwRead32(u32 add) { #endif return hard; case 0x1f801120: - hard = psxRcntRcount(2); + hard = psxRcntRcount2(); #ifdef PSXHW_LOG PSXHW_LOG("T2 count read32: %x\n", hard); #endif diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 488810680..9b4793628 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -191,6 +191,12 @@ void psxBranchTest() { cdrLidSeekInterrupt(); } } + if (psxRegs.interrupt & (1 << PSXINT_IRQ10)) { // irq10 - controller port pin8 + if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_IRQ10].sCycle) >= psxRegs.intCycle[PSXINT_IRQ10].cycle) { + psxRegs.interrupt &= ~(1 << PSXINT_IRQ10); + irq10Interrupt(); + } + } if (psxRegs.interrupt & (1 << PSXINT_SPU_UPDATE)) { // scheduled spu update if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_SPU_UPDATE].sCycle) >= psxRegs.intCycle[PSXINT_SPU_UPDATE].cycle) { psxRegs.interrupt &= ~(1 << PSXINT_SPU_UPDATE); @@ -248,3 +254,42 @@ void psxExecuteBios() { SysPrintf("non-standard BIOS detected (%d, %08x)\n", i, psxRegs.pc); } +// irq10 stuff, very preliminary +static int irq10count; + +static void psxScheduleIrq10One(u32 cycles_abs) { + // schedule relative to frame start + u32 c = cycles_abs - rcnts[3].cycleStart; + assert((s32)c >= 0); + psxRegs.interrupt |= 1 << PSXINT_IRQ10; + psxRegs.intCycle[PSXINT_IRQ10].cycle = c; + psxRegs.intCycle[PSXINT_IRQ10].sCycle = rcnts[3].cycleStart; + new_dyna_set_event(PSXINT_IRQ10, c); +} + +void irq10Interrupt() { + u32 prevc = psxRegs.intCycle[PSXINT_IRQ10].sCycle + + psxRegs.intCycle[PSXINT_IRQ10].cycle; + + psxHu32ref(0x1070) |= SWAPu32(0x400); + +#if 0 + s32 framec = psxRegs.cycle - rcnts[3].cycleStart; + printf("%d:%03d irq10 #%d %3d m=%d,%d\n", frame_counter, + (s32)((float)framec / (PSXCLK / 60 / 263.0f)), + irq10count, psxRegs.cycle - prevc, + (psxRegs.CP0.n.SR & 0x401) != 0x401, !(psxHu32(0x1074) & 0x400)); +#endif + if (--irq10count > 0) + psxScheduleIrq10One(prevc + PSXCLK / 60 / 263); +} + +void psxScheduleIrq10(int irq_count, int x_cycles, int y) { + //printf("%s %d, %d, %d\n", __func__, irq_count, x_cycles, y); + u32 cycles_per_frame = Config.PsxType ? PSXCLK / 50 : PSXCLK / 60; + u32 cycles = rcnts[3].cycleStart + cycles_per_frame; + cycles += y * cycles_per_frame / (Config.PsxType ? 314 : 263); + cycles += x_cycles; + psxScheduleIrq10One(cycles); + irq10count = irq_count; +} diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index fb5e1db63..3a903b1a5 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -180,7 +180,7 @@ enum { PSXINT_NEWDRC_CHECK, PSXINT_RCNT, PSXINT_CDRLID, - PSXINT_CDRPLAY_OLD, /* unused */ + PSXINT_IRQ10, PSXINT_SPU_UPDATE, PSXINT_COUNT }; @@ -257,6 +257,9 @@ void psxBranchTest(); void psxExecuteBios(); void psxJumpTest(); +void irq10Interrupt(); +void psxScheduleIrq10(int irq_count, int x_cycles, int y); + #ifdef __cplusplus } #endif From cb245e568086e132cd73ae52620cabe314cdade6 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 5 Sep 2023 02:03:23 +0300 Subject: [PATCH 353/597] gpulib: new debug compile option for raw fb display --- plugins/gpulib/gpu.c | 2 ++ plugins/gpulib/gpu.h | 2 ++ plugins/gpulib/vout_pl.c | 6 ++++++ 3 files changed, 10 insertions(+) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 9cf5841d9..6751ec7bc 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -756,6 +756,7 @@ void GPUupdateLace(void) flush_cmd_buffer(); renderer_flush_queues(); +#ifndef RAW_FB_DISPLAY if (gpu.status & PSX_GPU_STATUS_BLANKING) { if (!gpu.state.blanked) { vout_blank(); @@ -767,6 +768,7 @@ void GPUupdateLace(void) if (!gpu.state.fb_dirty) return; +#endif if (gpu.frameskip.set) { if (!gpu.frameskip.frame_ready) { diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 2f7a464c2..1582ee15f 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -13,6 +13,8 @@ #include +//#define RAW_FB_DISPLAY + #define gpu_log(fmt, ...) \ printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__) diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index cae35a3f2..f9ac0f30f 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -32,6 +32,9 @@ static void check_mode_change(int force) int w_out = w; int h_out = h; +#ifdef RAW_FB_DISPLAY + w = w_out = 1024, h = h_out = 512; +#endif gpu.state.enhancement_active = gpu.get_enhancement_bufer != NULL && gpu.state.enhancement_enable && w <= 512 && h <= 256 && !(gpu.status & PSX_GPU_STATUS_RGB24); @@ -67,6 +70,9 @@ void vout_update(void) int vram_h = 512; int src_x2 = 0; +#ifdef RAW_FB_DISPLAY + w = 1024, h = 512, x = src_x = y = src_y = 0; +#endif if (x < 0) { w += x; src_x2 = -x; x = 0; } if (y < 0) { h += y; src_y -= y; y = 0; } From 5fe1a2b17ff2f336a63e36ac791c99096775d5cf Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 5 Sep 2023 02:05:01 +0300 Subject: [PATCH 354/597] gpulib: add some missed sync and flush notaz/pcsx_rearmed#303 --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 2 ++ plugins/gpulib/gpu.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index af24e7703..a0bff3e99 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -4975,6 +4975,8 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->texture_page_ptr = psx_gpu->vram_ptr; psx_gpu->clut_ptr = psx_gpu->vram_ptr; + psx_gpu->viewport_start_x = psx_gpu->viewport_start_y = 0; + psx_gpu->viewport_end_x = psx_gpu->viewport_end_y = 0; psx_gpu->mask_msb = 0; psx_gpu->texture_window_x = 0; diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 6751ec7bc..1bf25af11 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -58,6 +58,7 @@ static noinline void do_reset(void) gpu.screen.hres = gpu.screen.w = 256; gpu.screen.vres = gpu.screen.h = 240; gpu.screen.x = gpu.screen.y = 0; + renderer_sync_ecmds(gpu.ex_regs); renderer_notify_res_change(); } @@ -179,8 +180,12 @@ static noinline int decide_frameskip_allow(uint32_t cmd_e3) return gpu.frameskip.allow; } +static void flush_cmd_buffer(void); + static noinline void get_gpu_info(uint32_t data) { + if (unlikely(gpu.cmd_len > 0)) + flush_cmd_buffer(); switch (data & 0x0f) { case 0x02: case 0x03: From a004140ae8cfc83a3741efeed9ed867908d6a026 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 5 Sep 2023 21:07:01 +0300 Subject: [PATCH 355/597] fix irq10 for the dynarec libretro/pcsx_rearmed#723 --- libpcsxcore/r3000a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 9b4793628..df627251e 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -264,7 +264,7 @@ static void psxScheduleIrq10One(u32 cycles_abs) { psxRegs.interrupt |= 1 << PSXINT_IRQ10; psxRegs.intCycle[PSXINT_IRQ10].cycle = c; psxRegs.intCycle[PSXINT_IRQ10].sCycle = rcnts[3].cycleStart; - new_dyna_set_event(PSXINT_IRQ10, c); + new_dyna_set_event_abs(PSXINT_IRQ10, cycles_abs); } void irq10Interrupt() { From 825757532f4b9a30f25c491267d544e02730e2e8 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 5 Sep 2023 22:03:32 +0300 Subject: [PATCH 356/597] psxbios: don't overwrite ra --- libpcsxcore/psxbios.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 19e0fe415..00b5d76b6 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -3964,8 +3964,8 @@ void psxBiosException() { int i; // save the regs - // $at, $v0, $v1 already saved by the mips code at A_EXCEPTION - for (i = 4; i < 32; i++) { + // $at, $v0, $v1, $ra already saved by the mips code at A_EXCEPTION + for (i = 4; i < 31; i++) { if (i == 26) // $k0 continue; tcb->reg[i] = SWAP32(psxRegs.GPR.r[i]); From 0a50313e5c9a6470e37e1dab99022eb481b6b5f7 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 6 Sep 2023 01:05:48 +0300 Subject: [PATCH 357/597] psxbios: primitive chdir implementation --- libpcsxcore/misc.c | 2 +- libpcsxcore/psxbios.c | 53 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index a16e64287..feabe15b5 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -112,7 +112,7 @@ int GetCdromFile(u8 *mdir, u8 *time, char *filename) { int i; // only try to scan if a filename is given - if (!strlen(filename)) return -1; + if (filename == INVALID_PTR || !strlen(filename)) return -1; i = 0; while (i < 4096) { diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 00b5d76b6..c5a439def 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -260,6 +260,8 @@ typedef struct { static FileDesc FDesc[32]; static char ffile[64]; static int nfile; +static char cdir[8*8+8]; +static u32 floodchk; // fixed RAM offsets, SCPH1001 compatible #define A_TT_ExCB 0x0100 @@ -1465,7 +1467,19 @@ void psxBios_printf() { // 0x3f pc0 = ra; } -void psxBios_format() { // 0x41 +static void psxBios_cd() { // 0x40 + const char *p, *dir = castRam8ptr(a0); + PSXBIOS_LOG("psxBios_%s %x(%s)\n", biosB0n[0x40], a0, dir); + if ((p = strchr(dir, ':'))) + dir = ++p; + if (*dir == '\\') + dir++; + snprintf(cdir, sizeof(cdir), "%s", dir); + mips_return_c(1, 100); +} + +static void psxBios_format() { // 0x41 + PSXBIOS_LOG("psxBios_%s %x(%s)\n", biosB0n[0x41], a0, Ra0); if (strcmp(Ra0, "bu00:") == 0 && Config.Mcd1[0] != '\0') { CreateMcd(Config.Mcd1); @@ -1486,9 +1500,9 @@ void psxBios_format() { // 0x41 } static void psxBios_SystemErrorUnresolvedException() { - if (loadRam32(0xfffc) != 0x12345678) { // prevent log flood + if (floodchk != 0x12340a40) { // prevent log flood SysPrintf("psxBios_%s called from %08x\n", biosA0n[0x40], ra); - storeRam32(0xfffc, 0x12345678); + floodchk = 0x12340a40; } mips_return_void_c(1000); } @@ -1507,16 +1521,33 @@ static void FlushCache() { void psxBios_Load() { // 0x42 EXE_HEADER eheader; + char path[256]; + char *pa0, *p; void *pa1; + pa0 = Ra0; pa1 = Ra1; - if (pa1 != INVALID_PTR && LoadCdromFile(Ra0, &eheader) == 0) { + PSXBIOS_LOG("psxBios_%s %x(%s), %x\n", biosA0n[0x42], a0, pa0, a1); + if (pa0 == INVALID_PTR || pa1 == INVALID_PTR) { + mips_return(0); + return; + } + if ((p = strchr(pa0, ':'))) + pa0 = ++p; + if (*pa0 == '\\') + pa0++; + if (cdir[0]) + snprintf(path, sizeof(path), "%s\\%s", cdir, (char *)pa0); + else + snprintf(path, sizeof(path), "%s", (char *)pa0); + + if (LoadCdromFile(path, &eheader) == 0) { memcpy(pa1, ((char*)&eheader)+16, sizeof(EXEC)); psxCpu->Clear(a1, sizeof(EXEC) / 4); FlushCache(); v0 = 1; } else v0 = 0; - PSXBIOS_LOG("psxBios_%s: %s, %d -> %d\n", biosA0n[0x42], Ra0, a1, v0); + PSXBIOS_LOG(" -> %d\n", v0); pc0 = ra; } @@ -1948,6 +1979,7 @@ static u32 DeliverEvent(u32 class, u32 spec) { } } } + floodchk = 0; use_cycles(29); return ret; } @@ -2059,7 +2091,11 @@ static void psxBios_TestEvent() { // 0b u32 base = loadRam32(A_TT_EvCB); u32 status = loadRam32(base + (a0 & 0xffff) * sizeof(EvCB) + 4); u32 ret = 0; - PSXBIOS_LOG("psxBios_%s %x %x\n", biosB0n[0x0b], a0, status); + + if (psxRegs.cycle - floodchk > 16*1024u) { // prevent log flood + PSXBIOS_LOG("psxBios_%s %x %x\n", biosB0n[0x0b], a0, status); + floodchk = psxRegs.cycle; + } if (status == EvStALREADY) { storeRam32(base + (a0 & 0xffff) * sizeof(EvCB) + 4, EvStACTIVE); ret = 1; @@ -3548,7 +3584,7 @@ void psxBiosInit() { biosB0[0x3d] = psxBios_putchar; //biosB0[0x3e] = psxBios_gets; biosB0[0x3f] = psxBios_puts; - //biosB0[0x40] = psxBios_cd; + biosB0[0x40] = psxBios_cd; biosB0[0x41] = psxBios_format; biosB0[0x42] = psxBios_firstfile; biosB0[0x43] = psxBios_nextfile; @@ -3612,6 +3648,8 @@ void psxBiosInit() { /**/ memset(FDesc, 0, sizeof(FDesc)); + memset(cdir, 0, sizeof(cdir)); + floodchk = 0; // somewhat pretend to be a SCPH1001 BIOS // some games look for these and take an exception if they're missing @@ -4031,4 +4069,5 @@ void psxBiosFreeze(int Mode) { bfreezes(FDesc); bfreezes(ffile); bfreezel(&nfile); + bfreezes(cdir); } From 72583812ef6aae62ec1614772f31dd41056f17e4 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 9 Sep 2023 00:39:34 +0300 Subject: [PATCH 358/597] gpulib: handle vram copy in gpulib use internal buffering according to mednafen notaz/pcsx_rearmed#289 --- plugins/dfxvideo/gpulib_if.c | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu.c | 3 +- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 41 ++++------- plugins/gpu_senquack/gpulib_if.cpp | 7 +- plugins/gpu_unai/gpulib_if.cpp | 8 ++- plugins/gpulib/gpu.c | 89 ++++++++++++++++++++---- 6 files changed, 102 insertions(+), 48 deletions(-) diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index ba7f16a08..978e7d84c 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -321,7 +321,7 @@ int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) } #ifndef TEST - if (cmd == 0xa0 || cmd == 0xc0) + if (0x80 <= cmd && cmd < 0xe0) break; // image i/o, forward to upper layer else if ((cmd & 0xf8) == 0xe0) gpu.ex_regs[cmd & 7] = GETLE32(list); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index a0bff3e99..fbacbd5f0 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -4888,6 +4888,7 @@ void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, } } +#ifndef PCSX void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y, u32 width, u32 height, u32 pitch) { @@ -4919,7 +4920,7 @@ void render_block_move(psx_gpu_struct *psx_gpu, u32 source_x, u32 source_y, render_block_copy(psx_gpu, psx_gpu->vram_ptr + source_x + (source_y * 1024), dest_x, dest_y, width, height, 1024); } - +#endif void initialize_reciprocal_table(void) { diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index c7562993a..5badf6b9a 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -606,7 +606,13 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) break; } - case 0x80: // vid -> vid +#ifdef PCSX + case 0x80 ... 0x9F: // vid -> vid + case 0xA0 ... 0xBF: // sys -> vid + case 0xC0 ... 0xDF: // vid -> sys + goto breakloop; +#else + case 0x80 ... 0x9F: // vid -> vid { u32 sx = list_s16[2] & 0x3FF; u32 sy = list_s16[3] & 0x1FF; @@ -622,12 +628,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) break; } -#ifdef PCSX - case 0xA0: // sys -> vid - case 0xC0: // vid -> sys - goto breakloop; -#else - case 0xA0: // sys -> vid + case 0xA0 ... 0xBF: // sys -> vid { u32 load_x = list_s16[2] & 0x3FF; u32 load_y = list_s16[3] & 0x1FF; @@ -645,8 +646,8 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) break; } - case 0xC0: // vid -> sys - break; + case 0xC0 ... 0xDF: // vid -> sys + break; #endif case 0xE1: @@ -1575,26 +1576,10 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]); break; } - - case 0x80: // vid -> vid - { - u32 sx = list_s16[2] & 0x3FF; - u32 sy = list_s16[3] & 0x1FF; - u32 dx = list_s16[4] & 0x3FF; - u32 dy = list_s16[5] & 0x1FF; - u32 w = ((list_s16[6] - 1) & 0x3FF) + 1; - u32 h = ((list_s16[7] - 1) & 0x1FF) + 1; - if (sx == dx && sy == dy && psx_gpu->mask_msb == 0) - break; - - render_block_move(psx_gpu, sx, sy, dx, dy, w, h); - sync_enhancement_buffers(dx, dy, w, h); - break; - } - - case 0xA0: // sys -> vid - case 0xC0: // vid -> sys + case 0x80 ... 0x9F: // vid -> vid + case 0xA0 ... 0xBF: // sys -> vid + case 0xC0 ... 0xDF: // vid -> sys goto breakloop; case 0xE1: diff --git a/plugins/gpu_senquack/gpulib_if.cpp b/plugins/gpu_senquack/gpulib_if.cpp index 0bc63c6d7..72dcc6d67 100644 --- a/plugins/gpu_senquack/gpulib_if.cpp +++ b/plugins/gpu_senquack/gpulib_if.cpp @@ -575,11 +575,11 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) gpuDrawS(packet, driver); } break; +#ifdef TEST case 0x80: // vid -> vid gpuMoveImage(packet); break; -#ifdef TEST case 0xA0: // sys -> vid { u32 load_width = list[2] & 0xffff; @@ -592,8 +592,9 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0xC0: break; #else - case 0xA0: // sys ->vid - case 0xC0: // vid -> sys + case 0x80 ... 0x9F: // vid -> vid + case 0xA0 ... 0xBF: // sys -> vid + case 0xC0 ... 0xDF: // vid -> sys // Handled by gpulib goto breakloop; #endif diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 0064aaa37..1c461421a 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -438,10 +438,10 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]); break; +#ifdef TEST case 0x80: // vid -> vid gpuMoveImage(); // prim handles updateLace && skip break; -#ifdef TEST case 0xA0: // sys -> vid { u32 load_width = list[2] & 0xffff; @@ -454,8 +454,10 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) case 0xC0: break; #else - case 0xA0: // sys ->vid - case 0xC0: // vid -> sys + case 0x80 ... 0x9F: // vid -> vid + case 0xA0 ... 0xBF: // sys -> vid + case 0xC0 ... 0xDF: // vid -> sys + // Handled by gpulib goto breakloop; #endif case 0xE1: { diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 1bf25af11..dfaff58e2 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -336,23 +336,33 @@ const unsigned char cmd_lengths[256] = 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, - 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 80 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // a0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)] -static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read) +static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb) +{ + int i; + for (i = 0; i < l; i++) + dst[i] = src[i] | msb; +} + +static inline void do_vram_line(int x, int y, uint16_t *mem, int l, + int is_read, uint16_t msb) { uint16_t *vram = VRAM_MEM_XY(x, y); - if (is_read) + if (unlikely(is_read)) memcpy(mem, vram, l * 2); + else if (unlikely(msb)) + cpy_msb(vram, mem, l, msb); else memcpy(vram, mem, l * 2); } @@ -360,6 +370,7 @@ static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read) static int do_vram_io(uint32_t *data, int count, int is_read) { int count_initial = count; + uint16_t msb = gpu.ex_regs[6] << 15; uint16_t *sdata = (uint16_t *)data; int x = gpu.dma.x, y = gpu.dma.y; int w = gpu.dma.w, h = gpu.dma.h; @@ -372,7 +383,7 @@ static int do_vram_io(uint32_t *data, int count, int is_read) if (count < l) l = count; - do_vram_line(x + o, y, sdata, l, is_read); + do_vram_line(x + o, y, sdata, l, is_read, msb); if (o + l < w) o += l; @@ -387,13 +398,13 @@ static int do_vram_io(uint32_t *data, int count, int is_read) for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) { y &= 511; - do_vram_line(x, y, sdata, w, is_read); + do_vram_line(x, y, sdata, w, is_read, msb); } if (h > 0) { if (count > 0) { y &= 511; - do_vram_line(x, y, sdata, count, is_read); + do_vram_line(x, y, sdata, count, is_read, msb); o = count; count = 0; } @@ -441,6 +452,51 @@ static void finish_vram_transfer(int is_read) gpu.dma_start.w, gpu.dma_start.h, 0); } +static void do_vram_copy(const uint32_t *params) +{ + const uint32_t sx = LE32TOH(params[0]) & 0x3FF; + const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF; + const uint32_t dx = LE32TOH(params[1]) & 0x3FF; + const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF; + uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1; + uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1; + uint16_t msb = gpu.ex_regs[6] << 15; + uint16_t lbuf[128]; + uint32_t x, y; + + if (sx == dx && sy == dy && msb == 0) + return; + + renderer_flush_queues(); + + if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb)) + { + for (y = 0; y < h; y++) + { + const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff); + uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff); + for (x = 0; x < w; x += ARRAY_SIZE(lbuf)) + { + uint32_t x1, w1 = w - x; + if (w1 > ARRAY_SIZE(lbuf)) + w1 = ARRAY_SIZE(lbuf); + for (x1 = 0; x1 < w1; x1++) + lbuf[x1] = src[(sx + x + x1) & 0x3ff]; + for (x1 = 0; x1 < w1; x1++) + dst[(dx + x + x1) & 0x3ff] = lbuf[x1] | msb; + } + } + } + else + { + uint32_t sy1 = sy, dy1 = dy; + for (y = 0; y < h; y++, sy1++, dy1++) + memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2); + } + + renderer_update_caches(dx, dy, w, h, 0); +} + static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) { int cmd = 0, pos = 0, len, dummy, v; @@ -496,7 +552,7 @@ static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) cmd = -1; break; // incomplete cmd } - if (0xa0 <= cmd && cmd <= 0xdf) + if (0x80 <= cmd && cmd <= 0xdf) break; // image i/o pos += len; @@ -536,6 +592,15 @@ static noinline int do_cmd_buffer(uint32_t *data, int count) pos += 3; continue; } + else if ((cmd & 0xe0) == 0x80) { + if (unlikely((pos+3) >= count)) { + cmd = -1; // incomplete cmd, can't consume yet + break; + } + do_vram_copy(data + pos + 1); + pos += 4; + continue; + } // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0)) From 4f38d1f8434e47c5505489144aac0ba40eaf2264 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 10 Sep 2023 18:18:26 +0300 Subject: [PATCH 359/597] cdrom: add a hack for bad rips The game gets track times using CdlGetTN/CdlGetTD, but the rip only keeps the data track. The game then wants to play a track using a hardcoded track number, reading out of range of the list it got before, sending garbage to CDC with infinite retries. notaz/pcsx_rearmed#302 --- libpcsxcore/cdrom.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 28358bf65..6f48df05f 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -109,7 +109,7 @@ static struct { u8 DriveState; u8 FastForward; u8 FastBackward; - u8 unused8; + u8 errorRetryhack; u8 AttenuatorLeftToLeft, AttenuatorLeftToRight; u8 AttenuatorRightToRight, AttenuatorRightToLeft; @@ -749,6 +749,8 @@ void cdrInterrupt(void) { if (((cdr.Param[0] & 0x0F) > 0x09) || (cdr.Param[0] > 0x99) || ((cdr.Param[1] & 0x0F) > 0x09) || (cdr.Param[1] >= 0x60) || ((cdr.Param[2] & 0x0F) > 0x09) || (cdr.Param[2] >= 0x75)) { CDR_LOG_I("Invalid/out of range seek to %02X:%02X:%02X\n", cdr.Param[0], cdr.Param[1], cdr.Param[2]); + if (++cdr.errorRetryhack > 100) + break; error = ERROR_INVALIDARG; goto set_error; } @@ -759,6 +761,7 @@ void cdrInterrupt(void) { memcpy(cdr.SetSector, set_loc, 3); cdr.SetSector[3] = 0; cdr.SetlocPending = 1; + cdr.errorRetryhack = 0; } break; From e9183d95fbc7a6ea5851d33955751d43a86a191f Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 10 Sep 2023 22:03:19 +0300 Subject: [PATCH 360/597] psxinterpreter: log reserved insn once --- libpcsxcore/psxinterpreter.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index e3c5baf44..2ffab69a2 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -128,6 +128,19 @@ static void intExceptionInsn(psxRegisters *regs, u32 cause) intException(regs, regs->pc - 4, cause); } +static noinline void intExceptionReservedInsn(psxRegisters *regs) +{ +#ifdef DO_EXCEPTION_RESERVEDI + static u32 ppc_ = ~0u; + if (regs->pc != ppc_) { + SysPrintf("reserved instruction %08x @%08x ra=%08x\n", + regs->code, regs->pc - 4, regs->GPR.n.ra); + ppc_ = regs->pc; + } + intExceptionInsn(regs, R3000E_RI << 2); +#endif +} + // 29 Enable for 80000000-ffffffff // 30 Enable for 00000000-7fffffff // 31 Enable exception @@ -923,10 +936,8 @@ OP(psxSWRe) { if (checkST(regs_, _oB_ , 0)) doSWR(regs_, _Rt_, _oB_); } *********************************************************/ OP(psxMFC0) { u32 r = _Rd_; -#ifdef DO_EXCEPTION_RESERVEDI if (unlikely(0x00000417u & (1u << r))) - intExceptionInsn(regs_, R3000E_RI << 2); -#endif + intExceptionReservedInsn(regs_); doLoad(regs_, _Rt_, regs_->CP0.r[r]); } @@ -974,9 +985,7 @@ static inline void psxNULLne(psxRegisters *regs) { OP(psxNULL) { psxNULLne(regs_); -#ifdef DO_EXCEPTION_RESERVEDI - intExceptionInsn(regs_, R3000E_RI << 2); -#endif + intExceptionReservedInsn(regs_); } void gteNULL(struct psxCP2Regs *regs) { From 1e93efc0aa8dd1666f485663199cd0dbf8d10f1c Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 10 Sep 2023 22:06:56 +0300 Subject: [PATCH 361/597] libretro: allow unlimited cheat length notaz/pcsx_rearmed#306 --- frontend/libretro.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index b1af9f15a..f275ca323 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -991,12 +991,13 @@ void retro_cheat_reset(void) void retro_cheat_set(unsigned index, bool enabled, const char *code) { - char buf[256]; - int ret; + int ret = -1; + char *buf; - // cheat funcs are destructive, need a copy.. - strncpy(buf, code, sizeof(buf)); - buf[sizeof(buf) - 1] = 0; + // cheat funcs are destructive, need a copy... + buf = strdup(code); + if (buf == NULL) + goto finish; //Prepare buffered cheat for PCSX's AddCheat fucntion. int cursor = 0; @@ -1022,10 +1023,12 @@ void retro_cheat_set(unsigned index, bool enabled, const char *code) else ret = AddCheat("", buf); +finish: if (ret != 0) SysPrintf("Failed to set cheat %#u\n", index); else if (index < NumCheats) Cheats[index].Enabled = enabled; + free(buf); } // just in case, maybe a win-rt port in the future? From 1351a8fbef932e26a56e841b6c43de6d907fde5c Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 10 Sep 2023 23:26:54 +0300 Subject: [PATCH 362/597] adjust irq10 and rcnt for pal libretro/pcsx_rearmed#723 --- libpcsxcore/psxcounters.c | 50 ++++++++++++++++++++++++++------------- libpcsxcore/r3000a.c | 7 ++++-- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index 388fb89da..9ff679e20 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -61,9 +61,8 @@ enum static const u32 CountToOverflow = 0; static const u32 CountToTarget = 1; -static const u32 FrameRate[] = { 60, 50 }; -static const u32 HSyncTotal[] = { 263, 314 }; // actually one more on odd lines for PAL -#define VBlankStart 240 +static const u32 HSyncTotal[] = { 263, 314 }; +#define VBlankStart 240 // todo: depend on the actual GPU setting #define VERBOSE_LEVEL 0 @@ -79,6 +78,15 @@ u32 psxNextCounter = 0, psxNextsCounter = 0; /******************************************************************************/ +static inline +u32 lineCycles(void) +{ + if (Config.PsxType) + return PSXCLK / 50 / HSyncTotal[1]; + else + return PSXCLK / 60 / HSyncTotal[0]; +} + static inline void setIrq( u32 irq ) { @@ -170,7 +178,7 @@ void _psxRcntWmode( u32 index, u32 value ) case 1: if( value & Rc1HSyncClock ) { - rcnts[index].rate = (PSXCLK / (FrameRate[Config.PsxType] * HSyncTotal[Config.PsxType])); + rcnts[index].rate = lineCycles(); } else { @@ -327,10 +335,10 @@ void psxRcntUpdate() { if (((rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset) || (rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset2)) - && cycles_passed > PSXCLK / 60 / 263) + && cycles_passed > lineCycles()) { - u32 q = cycles_passed / (PSXCLK / 60 / 263 + 1u); - rcnts[0].cycleStart += q * (PSXCLK / 60) / 263u; + u32 q = cycles_passed / (lineCycles() + 1u); + rcnts[0].cycleStart += q * lineCycles(); break; } else @@ -375,7 +383,7 @@ void psxRcntUpdate() // Update lace. if( hSyncCount >= HSyncTotal[Config.PsxType] ) { - u32 status, field = 0, i; + u32 status, field = 0; rcnts[3].cycleStart += Config.PsxType ? PSXCLK / 50 : PSXCLK / 60; hSyncCount = 0; frame_counter++; @@ -390,13 +398,21 @@ void psxRcntUpdate() HW_GPU_STATUS = SWAP32(status); GPU_vBlank(0, field); - for (i = 0; i < 2; i++) + if ((rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset) || + (rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset2)) + { + rcnts[0].cycleStart = rcnts[3].cycleStart; + } + + if ((rcnts[1].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset) || + (rcnts[1].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset2)) + { + rcnts[1].cycleStart = rcnts[3].cycleStart; + } + else if (rcnts[1].mode & Rc1HSyncClock) { - if ((rcnts[i].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset) || - (rcnts[i].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset2)) - { - rcnts[i].cycleStart = rcnts[3].cycleStart; - } + // adjust to remove the rounding error + _psxRcntWcount(1, (psxRegs.cycle - rcnts[1].cycleStart) / rcnts[1].rate); } } @@ -452,7 +468,8 @@ u32 psxRcntRcount0() (rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset2)) { count = psxRegs.cycle - rcnts[index].cycleStart; - count = ((16u * count) % (16u * PSXCLK / 60 / 263)) / 16u; + //count = ((16u * count) % (16u * PSXCLK / 60 / 263)) / 16u; + count = count % lineCycles(); rcnts[index].cycleStart = psxRegs.cycle - count; } else @@ -526,8 +543,6 @@ void psxRcntInit() // rcnt base. rcnts[3].rate = 1; - rcnts[3].mode = RcCountToTarget; - rcnts[3].target = (PSXCLK / (FrameRate[Config.PsxType] * HSyncTotal[Config.PsxType])); for( i = 0; i < CounterQuantity; ++i ) { @@ -537,6 +552,7 @@ void psxRcntInit() hSyncCount = 0; hsync_steps = 1; + scheduleRcntBase(); psxRcntSet(); } diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index df627251e..69772d44c 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -280,8 +280,11 @@ void irq10Interrupt() { irq10count, psxRegs.cycle - prevc, (psxRegs.CP0.n.SR & 0x401) != 0x401, !(psxHu32(0x1074) & 0x400)); #endif - if (--irq10count > 0) - psxScheduleIrq10One(prevc + PSXCLK / 60 / 263); + if (--irq10count > 0) { + u32 cycles_per_line = Config.PsxType + ? PSXCLK / 50 / 314 : PSXCLK / 60 / 263; + psxScheduleIrq10One(prevc + cycles_per_line); + } } void psxScheduleIrq10(int irq_count, int x_cycles, int y) { From 2db412ade2b09ca04da81d91b75bbf6475dbde5a Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 13 Sep 2023 22:23:58 +0300 Subject: [PATCH 363/597] switch over to libretro input code too much trouble with 2 separate codebases --- Makefile | 3 - frontend/libretro.c | 17 +- frontend/menu.c | 3 - frontend/pl_gun_ts.c | 6 +- frontend/plugin.c | 68 +- frontend/plugin_lib.c | 13 +- include/psemu_plugin_defs.h | 6 +- libpcsxcore/plugins.c | 2036 +++++++++++++++++++++-------------- libpcsxcore/plugins.h | 5 +- libpcsxcore/psxbios.c | 19 +- libpcsxcore/sio.c | 112 +- maemo/hildon.c | 1 - maemo/main.c | 2 - plugins/dfinput/externals.h | 18 - plugins/dfinput/guncon.c | 68 -- plugins/dfinput/main.c | 71 -- plugins/dfinput/main.h | 23 - plugins/dfinput/pad.c | 308 ------ 18 files changed, 1343 insertions(+), 1436 deletions(-) delete mode 100644 plugins/dfinput/externals.h delete mode 100644 plugins/dfinput/guncon.c delete mode 100644 plugins/dfinput/main.c delete mode 100644 plugins/dfinput/main.h delete mode 100644 plugins/dfinput/pad.c diff --git a/Makefile b/Makefile index 8e5cb3e33..ef4a1f741 100644 --- a/Makefile +++ b/Makefile @@ -174,9 +174,6 @@ libchdr/src/%.o: CFLAGS += -Wno-unused -Ilibchdr/deps/lzma-19.00/include libchdr/deps/lzma-19.00/src/%.o: CFLAGS += -Wno-unused -D_7ZIP_ST -Ilibchdr/deps/lzma-19.00/include #endif -# dfinput -OBJS += plugins/dfinput/main.o plugins/dfinput/pad.o plugins/dfinput/guncon.o - # frontend/gui OBJS += frontend/cspace.o ifeq "$(HAVE_NEON_ASM)" "1" diff --git a/frontend/libretro.c b/frontend/libretro.c index f275ca323..8d6b5cc7e 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -29,7 +29,6 @@ #include "../libpcsxcore/r3000a.h" #include "../plugins/dfsound/out.h" #include "../plugins/dfsound/spu_config.h" -#include "../plugins/dfinput/externals.h" #include "cspace.h" #include "main.h" #include "menu.h" @@ -514,7 +513,7 @@ void plat_trigger_vibrate(int pad, int low, int high) } } -void pl_update_gun(int *xn, int *yn, int *xres, int *yres, int *in) +void pl_gun_byte2(int port, unsigned char byte) { } @@ -2468,17 +2467,19 @@ static void update_input_guncon(int port, int ret) //Mouse range is -32767 -> 32767 //1% is about 655 //Use the left analog stick field to store the absolute coordinates - //Fix cursor to top-left when gun is detected as "offscreen" + + int gunx = input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_X); + int guny = input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_Y); + + //Have the Libretro API let /libpcsxcore/plugins.c know when the lightgun is pointed offscreen + //Offscreen value is chosen to be well out of range of any possible scaling done via core options if (input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_IS_OFFSCREEN) || input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_RELOAD)) { - in_analog_left[port][0] = -32767; - in_analog_left[port][1] = -32767; + in_analog_left[port][0] = (65536 - 512) * 64; + in_analog_left[port][1] = (65536 - 512) * 64; } else { - int gunx = input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_X); - int guny = input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_Y); - in_analog_left[port][0] = (gunx * GunconAdjustRatioX) + (GunconAdjustX * 655); in_analog_left[port][1] = (guny * GunconAdjustRatioY) + (GunconAdjustY * 655); } diff --git a/frontend/menu.c b/frontend/menu.c index ee60df6f9..8e7cd5041 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -36,7 +36,6 @@ #include "../libpcsxcore/cdriso.h" #include "../libpcsxcore/cheat.h" #include "../libpcsxcore/new_dynarec/new_dynarec.h" -#include "../plugins/dfinput/externals.h" #include "../plugins/dfsound/spu_config.h" #include "psemu_plugin_defs.h" #include "arm_features.h" @@ -2696,8 +2695,6 @@ void menu_prepare_emu(void) if (ret) fprintf(stderr, "Warning: GPU_open returned %d\n", ret); } - - dfinput_activate(); } void menu_update_msg(const char *msg) diff --git a/frontend/pl_gun_ts.c b/frontend/pl_gun_ts.c index 315c70c10..6c05b7c3f 100644 --- a/frontend/pl_gun_ts.c +++ b/frontend/pl_gun_ts.c @@ -15,8 +15,6 @@ #include "plugin_lib.h" #include "pl_gun_ts.h" #include "menu.h" -#include "../plugins/dfinput/externals.h" -#include "../plugins/dfinput/main.h" #ifdef MAEMO #define N900_TSMAX_X 4096 @@ -57,9 +55,9 @@ int pl_gun_ts_update_raw(struct tsdev *ts, int *x, int *y, int *p) limit(gun_x, 0, 1023); limit(gun_y, 0, 1023); if (sp && !(g_opts & OPT_TSGUN_NOTRIGGER)) - gun_in |= GUNIN_TRIGGER; + gun_in |= 1; else - gun_in &= ~GUNIN_TRIGGER; + gun_in &= ~1; } } diff --git a/frontend/plugin.c b/frontend/plugin.c index f77b6e1e8..2c95a67af 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -68,23 +68,65 @@ static long CALLBACK PADshutdown(void) { return 0; } static long CALLBACK PADclose(void) { return 0; } static void CALLBACK PADsetSensitive(int _) { return; } -static long CALLBACK PADreadPort1(PadDataS *pad) -{ - pad->controllerType = in_type[0]; - pad->buttonStatus = ~in_keystate[0]; - if (in_type[0] == PSE_PAD_TYPE_ANALOGPAD) { - pad->leftJoyX = in_analog_left[0][0]; - pad->leftJoyY = in_analog_left[0][1]; - pad->rightJoyX = in_analog_right[0][0]; - pad->rightJoyY = in_analog_right[0][1]; +static long CALLBACK PADreadPort1(PadDataS *pad) { + int pad_index = pad->requestPadIndex; + + pad->controllerType = in_type[pad_index]; + pad->buttonStatus = ~in_keystate[pad_index]; + + if (multitap1 == 1) + pad->portMultitap = 1; + else + pad->portMultitap = 0; + + if (in_type[pad_index] == PSE_PAD_TYPE_ANALOGJOY || in_type[pad_index] == PSE_PAD_TYPE_ANALOGPAD || in_type[pad_index] == PSE_PAD_TYPE_NEGCON || in_type[pad_index] == PSE_PAD_TYPE_GUNCON) + { + pad->leftJoyX = in_analog_left[pad_index][0]; + pad->leftJoyY = in_analog_left[pad_index][1]; + pad->rightJoyX = in_analog_right[pad_index][0]; + pad->rightJoyY = in_analog_right[pad_index][1]; + + pad->absoluteX = in_analog_left[pad_index][0]; + pad->absoluteY = in_analog_left[pad_index][1]; + } + + if (in_type[pad_index] == PSE_PAD_TYPE_MOUSE) + { + pad->moveX = in_mouse[pad_index][0]; + pad->moveY = in_mouse[pad_index][1]; } + return 0; } -static long CALLBACK PADreadPort2(PadDataS *pad) -{ - pad->controllerType = in_type[1]; - pad->buttonStatus = ~in_keystate[0] >> 16; +static long CALLBACK PADreadPort2(PadDataS *pad) { + int pad_index = pad->requestPadIndex; + + pad->controllerType = in_type[pad_index]; + pad->buttonStatus = ~in_keystate[pad_index]; + + if (multitap2 == 1) + pad->portMultitap = 2; + else + pad->portMultitap = 0; + + if (in_type[pad_index] == PSE_PAD_TYPE_ANALOGJOY || in_type[pad_index] == PSE_PAD_TYPE_ANALOGPAD || in_type[pad_index] == PSE_PAD_TYPE_NEGCON || in_type[pad_index] == PSE_PAD_TYPE_GUNCON) + { + pad->leftJoyX = in_analog_left[pad_index][0]; + pad->leftJoyY = in_analog_left[pad_index][1]; + pad->rightJoyX = in_analog_right[pad_index][0]; + pad->rightJoyY = in_analog_right[pad_index][1]; + + pad->absoluteX = in_analog_left[pad_index][0]; + pad->absoluteY = in_analog_left[pad_index][1]; + } + + if (in_type[pad_index] == PSE_PAD_TYPE_MOUSE) + { + pad->moveX = in_mouse[pad_index][0]; + pad->moveY = in_mouse[pad_index][1]; + } + return 0; } diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index bdf09c715..8a6b6adb1 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -33,7 +33,6 @@ #include "psemu_plugin_defs.h" #include "../libpcsxcore/new_dynarec/new_dynarec.h" #include "../libpcsxcore/psxmem_map.h" -#include "../plugins/dfinput/externals.h" #define HUD_HEIGHT 10 @@ -620,18 +619,18 @@ static void update_input(void) emu_set_action(emu_act); in_keystate[0] = actions[IN_BINDTYPE_PLAYER12]; + + // fixme + //if (in_type[0] == PSE_PAD_TYPE_GUNCON && tsdev) + // pl_gun_ts_update(tsdev, xn, yn, in); + // in_analog_left[0][0] = xn } #else /* MAEMO */ extern void update_input(void); #endif -void pl_update_gun(int *xn, int *yn, int *xres, int *yres, int *in) +void pl_gun_byte2(int port, unsigned char byte) { - if (tsdev) - pl_gun_ts_update(tsdev, xn, yn, in); - - *xres = psx_w; - *yres = psx_h; } #define MAX_LAG_FRAMES 3 diff --git a/include/psemu_plugin_defs.h b/include/psemu_plugin_defs.h index b855eacc2..2d688f207 100644 --- a/include/psemu_plugin_defs.h +++ b/include/psemu_plugin_defs.h @@ -141,7 +141,7 @@ typedef struct long PADquery(void); unsigned char PADstartPoll(int); - unsigned char PADpoll(unsigned char); + unsigned char PADpoll(unsigned char, int *); */ @@ -222,7 +222,9 @@ typedef struct //configuration mode Request 0x43 int configMode; - unsigned char reserved[87]; + + unsigned char txData[32]; + unsigned char reserved[56]; //Lightgun values int absoluteX,absoluteY; diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index a77c728c2..a34969f42 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -1,813 +1,1227 @@ -/*************************************************************************** - * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU General Public License * - * along with this program; if not, write to the * - * Free Software Foundation, Inc., * - * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * - ***************************************************************************/ - -/* -* Plugin library callback/access functions. -*/ - -#include "plugins.h" -#include "cdriso.h" - -static char IsoFile[MAXPATHLEN] = ""; -static s64 cdOpenCaseTime = 0; - -GPUupdateLace GPU_updateLace; -GPUinit GPU_init; -GPUshutdown GPU_shutdown; -GPUconfigure GPU_configure; -GPUtest GPU_test; -GPUabout GPU_about; -GPUopen GPU_open; -GPUclose GPU_close; -GPUreadStatus GPU_readStatus; -GPUreadData GPU_readData; -GPUreadDataMem GPU_readDataMem; -GPUwriteStatus GPU_writeStatus; -GPUwriteData GPU_writeData; -GPUwriteDataMem GPU_writeDataMem; -GPUdmaChain GPU_dmaChain; -GPUkeypressed GPU_keypressed; -GPUdisplayText GPU_displayText; -GPUmakeSnapshot GPU_makeSnapshot; -GPUfreeze GPU_freeze; -GPUgetScreenPic GPU_getScreenPic; -GPUshowScreenPic GPU_showScreenPic; -GPUvBlank GPU_vBlank; - -CDRinit CDR_init; -CDRshutdown CDR_shutdown; -CDRopen CDR_open; -CDRclose CDR_close; -CDRtest CDR_test; -CDRgetTN CDR_getTN; -CDRgetTD CDR_getTD; -CDRreadTrack CDR_readTrack; -CDRgetBuffer CDR_getBuffer; -CDRplay CDR_play; -CDRstop CDR_stop; -CDRgetStatus CDR_getStatus; -CDRgetDriveLetter CDR_getDriveLetter; -CDRgetBufferSub CDR_getBufferSub; -CDRconfigure CDR_configure; -CDRabout CDR_about; -CDRsetfilename CDR_setfilename; -CDRreadCDDA CDR_readCDDA; -CDRgetTE CDR_getTE; - -SPUinit SPU_init; -SPUshutdown SPU_shutdown; -SPUopen SPU_open; -SPUclose SPU_close; -SPUwriteRegister SPU_writeRegister; -SPUreadRegister SPU_readRegister; -SPUwriteDMAMem SPU_writeDMAMem; -SPUreadDMAMem SPU_readDMAMem; -SPUplayADPCMchannel SPU_playADPCMchannel; -SPUfreeze SPU_freeze; -SPUregisterCallback SPU_registerCallback; -SPUregisterScheduleCb SPU_registerScheduleCb; -SPUasync SPU_async; -SPUplayCDDAchannel SPU_playCDDAchannel; - -PADconfigure PAD1_configure; -PADabout PAD1_about; -PADinit PAD1_init; -PADshutdown PAD1_shutdown; -PADtest PAD1_test; -PADopen PAD1_open; -PADclose PAD1_close; -PADquery PAD1_query; -PADreadPort1 PAD1_readPort1; -PADkeypressed PAD1_keypressed; -PADstartPoll PAD1_startPoll; -PADpoll PAD1_poll; -PADsetSensitive PAD1_setSensitive; - -PADconfigure PAD2_configure; -PADabout PAD2_about; -PADinit PAD2_init; -PADshutdown PAD2_shutdown; -PADtest PAD2_test; -PADopen PAD2_open; -PADclose PAD2_close; -PADquery PAD2_query; -PADreadPort2 PAD2_readPort2; -PADkeypressed PAD2_keypressed; -PADstartPoll PAD2_startPoll; -PADpoll PAD2_poll; -PADsetSensitive PAD2_setSensitive; - -NETinit NET_init; -NETshutdown NET_shutdown; -NETopen NET_open; -NETclose NET_close; -NETtest NET_test; -NETconfigure NET_configure; -NETabout NET_about; -NETpause NET_pause; -NETresume NET_resume; -NETqueryPlayer NET_queryPlayer; -NETsendData NET_sendData; -NETrecvData NET_recvData; -NETsendPadData NET_sendPadData; -NETrecvPadData NET_recvPadData; -NETsetInfo NET_setInfo; -NETkeypressed NET_keypressed; - -#ifdef ENABLE_SIO1API - -SIO1init SIO1_init; -SIO1shutdown SIO1_shutdown; -SIO1open SIO1_open; -SIO1close SIO1_close; -SIO1test SIO1_test; -SIO1configure SIO1_configure; -SIO1about SIO1_about; -SIO1pause SIO1_pause; -SIO1resume SIO1_resume; -SIO1keypressed SIO1_keypressed; -SIO1writeData8 SIO1_writeData8; -SIO1writeData16 SIO1_writeData16; -SIO1writeData32 SIO1_writeData32; -SIO1writeStat16 SIO1_writeStat16; -SIO1writeStat32 SIO1_writeStat32; -SIO1writeMode16 SIO1_writeMode16; -SIO1writeMode32 SIO1_writeMode32; -SIO1writeCtrl16 SIO1_writeCtrl16; -SIO1writeCtrl32 SIO1_writeCtrl32; -SIO1writeBaud16 SIO1_writeBaud16; -SIO1writeBaud32 SIO1_writeBaud32; -SIO1readData8 SIO1_readData8; -SIO1readData16 SIO1_readData16; -SIO1readData32 SIO1_readData32; -SIO1readStat16 SIO1_readStat16; -SIO1readStat32 SIO1_readStat32; -SIO1readMode16 SIO1_readMode16; -SIO1readMode32 SIO1_readMode32; -SIO1readCtrl16 SIO1_readCtrl16; -SIO1readCtrl32 SIO1_readCtrl32; -SIO1readBaud16 SIO1_readBaud16; -SIO1readBaud32 SIO1_readBaud32; -SIO1registerCallback SIO1_registerCallback; - -#endif - -static const char *err; - -#define CheckErr(func) { \ - err = SysLibError(); \ - if (err != NULL) { SysMessage(_("Error loading %s: %s"), func, err); return -1; } \ -} - -#define LoadSym(dest, src, name, checkerr) { \ - dest = (src)SysLoadSym(drv, name); \ - if (checkerr) { CheckErr(name); } else SysLibError(); \ -} - -void *hGPUDriver = NULL; - -void CALLBACK GPU__displayText(char *pText) { - SysPrintf("%s\n", pText); -} - -long CALLBACK GPU__configure(void) { return 0; } -long CALLBACK GPU__test(void) { return 0; } -void CALLBACK GPU__about(void) {} -void CALLBACK GPU__makeSnapshot(void) {} -void CALLBACK GPU__keypressed(int key) {} -long CALLBACK GPU__getScreenPic(unsigned char *pMem) { return -1; } -long CALLBACK GPU__showScreenPic(unsigned char *pMem) { return -1; } -void CALLBACK GPU__vBlank(int val) {} - -#define LoadGpuSym1(dest, name) \ - LoadSym(GPU_##dest, GPU##dest, name, TRUE); - -#define LoadGpuSym0(dest, name) \ - LoadSym(GPU_##dest, GPU##dest, name, FALSE); \ - if (GPU_##dest == NULL) GPU_##dest = (GPU##dest) GPU__##dest; - -#define LoadGpuSymN(dest, name) \ - LoadSym(GPU_##dest, GPU##dest, name, FALSE); - -static int LoadGPUplugin(const char *GPUdll) { - void *drv; - - hGPUDriver = SysLoadLibrary(GPUdll); - if (hGPUDriver == NULL) { - GPU_configure = NULL; - SysMessage (_("Could not load GPU plugin %s!"), GPUdll); return -1; - } - drv = hGPUDriver; - LoadGpuSym1(init, "GPUinit"); - LoadGpuSym1(shutdown, "GPUshutdown"); - LoadGpuSym1(open, "GPUopen"); - LoadGpuSym1(close, "GPUclose"); - LoadGpuSym1(readData, "GPUreadData"); - LoadGpuSym1(readDataMem, "GPUreadDataMem"); - LoadGpuSym1(readStatus, "GPUreadStatus"); - LoadGpuSym1(writeData, "GPUwriteData"); - LoadGpuSym1(writeDataMem, "GPUwriteDataMem"); - LoadGpuSym1(writeStatus, "GPUwriteStatus"); - LoadGpuSym1(dmaChain, "GPUdmaChain"); - LoadGpuSym1(updateLace, "GPUupdateLace"); - LoadGpuSym0(keypressed, "GPUkeypressed"); - LoadGpuSym0(displayText, "GPUdisplayText"); - LoadGpuSym0(makeSnapshot, "GPUmakeSnapshot"); - LoadGpuSym1(freeze, "GPUfreeze"); - LoadGpuSym0(getScreenPic, "GPUgetScreenPic"); - LoadGpuSym0(showScreenPic, "GPUshowScreenPic"); - LoadGpuSym0(vBlank, "GPUvBlank"); - LoadGpuSym0(configure, "GPUconfigure"); - LoadGpuSym0(test, "GPUtest"); - LoadGpuSym0(about, "GPUabout"); - - return 0; -} - -void *hCDRDriver = NULL; - -long CALLBACK CDR__play(unsigned char *sector) { return 0; } -long CALLBACK CDR__stop(void) { return 0; } - -long CALLBACK CDR__getStatus(struct CdrStat *stat) { - if (cdOpenCaseTime < 0 || cdOpenCaseTime > (s64)time(NULL)) - stat->Status = 0x10; - else - stat->Status = 0; - - return 0; -} - -char* CALLBACK CDR__getDriveLetter(void) { return NULL; } -long CALLBACK CDR__configure(void) { return 0; } -long CALLBACK CDR__test(void) { return 0; } -void CALLBACK CDR__about(void) {} -long CALLBACK CDR__setfilename(char*filename) { return 0; } - -#define LoadCdrSym1(dest, name) \ - LoadSym(CDR_##dest, CDR##dest, name, TRUE); - -#define LoadCdrSym0(dest, name) \ - LoadSym(CDR_##dest, CDR##dest, name, FALSE); \ - if (CDR_##dest == NULL) CDR_##dest = (CDR##dest) CDR__##dest; - -#define LoadCdrSymN(dest, name) \ - LoadSym(CDR_##dest, CDR##dest, name, FALSE); - -static int LoadCDRplugin(const char *CDRdll) { - void *drv; - - if (CDRdll == NULL) { - cdrIsoInit(); - return 0; - } - - hCDRDriver = SysLoadLibrary(CDRdll); - if (hCDRDriver == NULL) { - CDR_configure = NULL; - SysMessage (_("Could not load CD-ROM plugin %s!"), CDRdll); return -1; - } - drv = hCDRDriver; - LoadCdrSym1(init, "CDRinit"); - LoadCdrSym1(shutdown, "CDRshutdown"); - LoadCdrSym1(open, "CDRopen"); - LoadCdrSym1(close, "CDRclose"); - LoadCdrSym1(getTN, "CDRgetTN"); - LoadCdrSym1(getTD, "CDRgetTD"); - LoadCdrSym1(readTrack, "CDRreadTrack"); - LoadCdrSym1(getBuffer, "CDRgetBuffer"); - LoadCdrSym1(getBufferSub, "CDRgetBufferSub"); - LoadCdrSym0(play, "CDRplay"); - LoadCdrSym0(stop, "CDRstop"); - LoadCdrSym0(getStatus, "CDRgetStatus"); - LoadCdrSym0(getDriveLetter, "CDRgetDriveLetter"); - LoadCdrSym0(configure, "CDRconfigure"); - LoadCdrSym0(test, "CDRtest"); - LoadCdrSym0(about, "CDRabout"); - LoadCdrSym0(setfilename, "CDRsetfilename"); - LoadCdrSymN(readCDDA, "CDRreadCDDA"); - LoadCdrSymN(getTE, "CDRgetTE"); - - return 0; -} - +/*************************************************************************** + * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * + ***************************************************************************/ + +/* +* Plugin library callback/access functions. +*/ + +#include "plugins.h" +#include "cdriso.h" + +static char IsoFile[MAXPATHLEN] = ""; +static s64 cdOpenCaseTime = 0; + +GPUupdateLace GPU_updateLace; +GPUinit GPU_init; +GPUshutdown GPU_shutdown; +GPUconfigure GPU_configure; +GPUtest GPU_test; +GPUabout GPU_about; +GPUopen GPU_open; +GPUclose GPU_close; +GPUreadStatus GPU_readStatus; +GPUreadData GPU_readData; +GPUreadDataMem GPU_readDataMem; +GPUwriteStatus GPU_writeStatus; +GPUwriteData GPU_writeData; +GPUwriteDataMem GPU_writeDataMem; +GPUdmaChain GPU_dmaChain; +GPUkeypressed GPU_keypressed; +GPUdisplayText GPU_displayText; +GPUmakeSnapshot GPU_makeSnapshot; +GPUfreeze GPU_freeze; +GPUgetScreenPic GPU_getScreenPic; +GPUshowScreenPic GPU_showScreenPic; +GPUvBlank GPU_vBlank; + +CDRinit CDR_init; +CDRshutdown CDR_shutdown; +CDRopen CDR_open; +CDRclose CDR_close; +CDRtest CDR_test; +CDRgetTN CDR_getTN; +CDRgetTD CDR_getTD; +CDRreadTrack CDR_readTrack; +CDRgetBuffer CDR_getBuffer; +CDRplay CDR_play; +CDRstop CDR_stop; +CDRgetStatus CDR_getStatus; +CDRgetDriveLetter CDR_getDriveLetter; +CDRgetBufferSub CDR_getBufferSub; +CDRconfigure CDR_configure; +CDRabout CDR_about; +CDRsetfilename CDR_setfilename; +CDRreadCDDA CDR_readCDDA; +CDRgetTE CDR_getTE; + +SPUinit SPU_init; +SPUshutdown SPU_shutdown; +SPUopen SPU_open; +SPUclose SPU_close; +SPUwriteRegister SPU_writeRegister; +SPUreadRegister SPU_readRegister; +SPUwriteDMAMem SPU_writeDMAMem; +SPUreadDMAMem SPU_readDMAMem; +SPUplayADPCMchannel SPU_playADPCMchannel; +SPUfreeze SPU_freeze; +SPUregisterCallback SPU_registerCallback; +SPUregisterScheduleCb SPU_registerScheduleCb; +SPUasync SPU_async; +SPUplayCDDAchannel SPU_playCDDAchannel; + +PADconfigure PAD1_configure; +PADabout PAD1_about; +PADinit PAD1_init; +PADshutdown PAD1_shutdown; +PADtest PAD1_test; +PADopen PAD1_open; +PADclose PAD1_close; +PADquery PAD1_query; +PADreadPort1 PAD1_readPort1; +PADkeypressed PAD1_keypressed; +PADstartPoll PAD1_startPoll; +PADpoll PAD1_poll; +PADsetSensitive PAD1_setSensitive; + +PADconfigure PAD2_configure; +PADabout PAD2_about; +PADinit PAD2_init; +PADshutdown PAD2_shutdown; +PADtest PAD2_test; +PADopen PAD2_open; +PADclose PAD2_close; +PADquery PAD2_query; +PADreadPort2 PAD2_readPort2; +PADkeypressed PAD2_keypressed; +PADstartPoll PAD2_startPoll; +PADpoll PAD2_poll; +PADsetSensitive PAD2_setSensitive; + +NETinit NET_init; +NETshutdown NET_shutdown; +NETopen NET_open; +NETclose NET_close; +NETtest NET_test; +NETconfigure NET_configure; +NETabout NET_about; +NETpause NET_pause; +NETresume NET_resume; +NETqueryPlayer NET_queryPlayer; +NETsendData NET_sendData; +NETrecvData NET_recvData; +NETsendPadData NET_sendPadData; +NETrecvPadData NET_recvPadData; +NETsetInfo NET_setInfo; +NETkeypressed NET_keypressed; + +#ifdef ENABLE_SIO1API + +SIO1init SIO1_init; +SIO1shutdown SIO1_shutdown; +SIO1open SIO1_open; +SIO1close SIO1_close; +SIO1test SIO1_test; +SIO1configure SIO1_configure; +SIO1about SIO1_about; +SIO1pause SIO1_pause; +SIO1resume SIO1_resume; +SIO1keypressed SIO1_keypressed; +SIO1writeData8 SIO1_writeData8; +SIO1writeData16 SIO1_writeData16; +SIO1writeData32 SIO1_writeData32; +SIO1writeStat16 SIO1_writeStat16; +SIO1writeStat32 SIO1_writeStat32; +SIO1writeMode16 SIO1_writeMode16; +SIO1writeMode32 SIO1_writeMode32; +SIO1writeCtrl16 SIO1_writeCtrl16; +SIO1writeCtrl32 SIO1_writeCtrl32; +SIO1writeBaud16 SIO1_writeBaud16; +SIO1writeBaud32 SIO1_writeBaud32; +SIO1readData8 SIO1_readData8; +SIO1readData16 SIO1_readData16; +SIO1readData32 SIO1_readData32; +SIO1readStat16 SIO1_readStat16; +SIO1readStat32 SIO1_readStat32; +SIO1readMode16 SIO1_readMode16; +SIO1readMode32 SIO1_readMode32; +SIO1readCtrl16 SIO1_readCtrl16; +SIO1readCtrl32 SIO1_readCtrl32; +SIO1readBaud16 SIO1_readBaud16; +SIO1readBaud32 SIO1_readBaud32; +SIO1registerCallback SIO1_registerCallback; + +#endif + +static const char *err; + +#define CheckErr(func) { \ + err = SysLibError(); \ + if (err != NULL) { SysMessage(_("Error loading %s: %s"), func, err); return -1; } \ +} + +#define LoadSym(dest, src, name, checkerr) { \ + dest = (src)SysLoadSym(drv, name); \ + if (checkerr) { CheckErr(name); } else SysLibError(); \ +} + +void *hGPUDriver = NULL; + +void CALLBACK GPU__displayText(char *pText) { + SysPrintf("%s\n", pText); +} + +long CALLBACK GPU__configure(void) { return 0; } +long CALLBACK GPU__test(void) { return 0; } +void CALLBACK GPU__about(void) {} +void CALLBACK GPU__makeSnapshot(void) {} +void CALLBACK GPU__keypressed(int key) {} +long CALLBACK GPU__getScreenPic(unsigned char *pMem) { return -1; } +long CALLBACK GPU__showScreenPic(unsigned char *pMem) { return -1; } +void CALLBACK GPU__vBlank(int val) {} + +#define LoadGpuSym1(dest, name) \ + LoadSym(GPU_##dest, GPU##dest, name, TRUE); + +#define LoadGpuSym0(dest, name) \ + LoadSym(GPU_##dest, GPU##dest, name, FALSE); \ + if (GPU_##dest == NULL) GPU_##dest = (GPU##dest) GPU__##dest; + +#define LoadGpuSymN(dest, name) \ + LoadSym(GPU_##dest, GPU##dest, name, FALSE); + +static int LoadGPUplugin(const char *GPUdll) { + void *drv; + + hGPUDriver = SysLoadLibrary(GPUdll); + if (hGPUDriver == NULL) { + GPU_configure = NULL; + SysMessage (_("Could not load GPU plugin %s!"), GPUdll); return -1; + } + drv = hGPUDriver; + LoadGpuSym1(init, "GPUinit"); + LoadGpuSym1(shutdown, "GPUshutdown"); + LoadGpuSym1(open, "GPUopen"); + LoadGpuSym1(close, "GPUclose"); + LoadGpuSym1(readData, "GPUreadData"); + LoadGpuSym1(readDataMem, "GPUreadDataMem"); + LoadGpuSym1(readStatus, "GPUreadStatus"); + LoadGpuSym1(writeData, "GPUwriteData"); + LoadGpuSym1(writeDataMem, "GPUwriteDataMem"); + LoadGpuSym1(writeStatus, "GPUwriteStatus"); + LoadGpuSym1(dmaChain, "GPUdmaChain"); + LoadGpuSym1(updateLace, "GPUupdateLace"); + LoadGpuSym0(keypressed, "GPUkeypressed"); + LoadGpuSym0(displayText, "GPUdisplayText"); + LoadGpuSym0(makeSnapshot, "GPUmakeSnapshot"); + LoadGpuSym1(freeze, "GPUfreeze"); + LoadGpuSym0(getScreenPic, "GPUgetScreenPic"); + LoadGpuSym0(showScreenPic, "GPUshowScreenPic"); + LoadGpuSym0(vBlank, "GPUvBlank"); + LoadGpuSym0(configure, "GPUconfigure"); + LoadGpuSym0(test, "GPUtest"); + LoadGpuSym0(about, "GPUabout"); + + return 0; +} + +void *hCDRDriver = NULL; + +long CALLBACK CDR__play(unsigned char *sector) { return 0; } +long CALLBACK CDR__stop(void) { return 0; } + +long CALLBACK CDR__getStatus(struct CdrStat *stat) { + if (cdOpenCaseTime < 0 || cdOpenCaseTime > (s64)time(NULL)) + stat->Status = 0x10; + else + stat->Status = 0; + + return 0; +} + +char* CALLBACK CDR__getDriveLetter(void) { return NULL; } +long CALLBACK CDR__configure(void) { return 0; } +long CALLBACK CDR__test(void) { return 0; } +void CALLBACK CDR__about(void) {} +long CALLBACK CDR__setfilename(char*filename) { return 0; } + +#define LoadCdrSym1(dest, name) \ + LoadSym(CDR_##dest, CDR##dest, name, TRUE); + +#define LoadCdrSym0(dest, name) \ + LoadSym(CDR_##dest, CDR##dest, name, FALSE); \ + if (CDR_##dest == NULL) CDR_##dest = (CDR##dest) CDR__##dest; + +#define LoadCdrSymN(dest, name) \ + LoadSym(CDR_##dest, CDR##dest, name, FALSE); + +static int LoadCDRplugin(const char *CDRdll) { + void *drv; + + if (CDRdll == NULL) { + cdrIsoInit(); + return 0; + } + + hCDRDriver = SysLoadLibrary(CDRdll); + if (hCDRDriver == NULL) { + CDR_configure = NULL; + SysMessage (_("Could not load CD-ROM plugin %s!"), CDRdll); return -1; + } + drv = hCDRDriver; + LoadCdrSym1(init, "CDRinit"); + LoadCdrSym1(shutdown, "CDRshutdown"); + LoadCdrSym1(open, "CDRopen"); + LoadCdrSym1(close, "CDRclose"); + LoadCdrSym1(getTN, "CDRgetTN"); + LoadCdrSym1(getTD, "CDRgetTD"); + LoadCdrSym1(readTrack, "CDRreadTrack"); + LoadCdrSym1(getBuffer, "CDRgetBuffer"); + LoadCdrSym1(getBufferSub, "CDRgetBufferSub"); + LoadCdrSym0(play, "CDRplay"); + LoadCdrSym0(stop, "CDRstop"); + LoadCdrSym0(getStatus, "CDRgetStatus"); + LoadCdrSym0(getDriveLetter, "CDRgetDriveLetter"); + LoadCdrSym0(configure, "CDRconfigure"); + LoadCdrSym0(test, "CDRtest"); + LoadCdrSym0(about, "CDRabout"); + LoadCdrSym0(setfilename, "CDRsetfilename"); + LoadCdrSymN(readCDDA, "CDRreadCDDA"); + LoadCdrSymN(getTE, "CDRgetTE"); + + return 0; +} + static void *hSPUDriver = NULL; static void CALLBACK SPU__registerScheduleCb(void (CALLBACK *cb)(unsigned int)) {} - -#define LoadSpuSym1(dest, name) \ - LoadSym(SPU_##dest, SPU##dest, name, TRUE); - -#define LoadSpuSym0(dest, name) \ - LoadSym(SPU_##dest, SPU##dest, name, FALSE); \ - if (SPU_##dest == NULL) SPU_##dest = (SPU##dest) SPU__##dest; - -#define LoadSpuSymN(dest, name) \ - LoadSym(SPU_##dest, SPU##dest, name, FALSE); - -static int LoadSPUplugin(const char *SPUdll) { - void *drv; - - hSPUDriver = SysLoadLibrary(SPUdll); - if (hSPUDriver == NULL) { - SysMessage (_("Could not load SPU plugin %s!"), SPUdll); return -1; - } - drv = hSPUDriver; - LoadSpuSym1(init, "SPUinit"); - LoadSpuSym1(shutdown, "SPUshutdown"); - LoadSpuSym1(open, "SPUopen"); - LoadSpuSym1(close, "SPUclose"); - LoadSpuSym1(writeRegister, "SPUwriteRegister"); - LoadSpuSym1(readRegister, "SPUreadRegister"); - LoadSpuSym1(writeDMAMem, "SPUwriteDMAMem"); - LoadSpuSym1(readDMAMem, "SPUreadDMAMem"); - LoadSpuSym1(playADPCMchannel, "SPUplayADPCMchannel"); - LoadSpuSym1(freeze, "SPUfreeze"); - LoadSpuSym1(registerCallback, "SPUregisterCallback"); - LoadSpuSym0(registerScheduleCb, "SPUregisterScheduleCb"); - LoadSpuSymN(async, "SPUasync"); - LoadSpuSymN(playCDDAchannel, "SPUplayCDDAchannel"); - - return 0; -} - -void *hPAD1Driver = NULL; -void *hPAD2Driver = NULL; - -static unsigned char buf[256]; -unsigned char stdpar[10] = { 0x00, 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; -unsigned char mousepar[8] = { 0x00, 0x12, 0x5a, 0xff, 0xff, 0xff, 0xff }; -unsigned char analogpar[9] = { 0x00, 0xff, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - -static int bufcount, bufc; - -PadDataS padd1, padd2; - -unsigned char _PADstartPoll(PadDataS *pad) { - bufc = 0; - - switch (pad->controllerType) { - case PSE_PAD_TYPE_MOUSE: - mousepar[3] = pad->buttonStatus & 0xff; - mousepar[4] = pad->buttonStatus >> 8; - mousepar[5] = pad->moveX; - mousepar[6] = pad->moveY; - - memcpy(buf, mousepar, 7); - bufcount = 6; - break; - case PSE_PAD_TYPE_NEGCON: // npc101/npc104(slph00001/slph00069) - analogpar[1] = 0x23; - analogpar[3] = pad->buttonStatus & 0xff; - analogpar[4] = pad->buttonStatus >> 8; - analogpar[5] = pad->rightJoyX; - analogpar[6] = pad->rightJoyY; - analogpar[7] = pad->leftJoyX; - analogpar[8] = pad->leftJoyY; - - memcpy(buf, analogpar, 9); - bufcount = 8; - break; - case PSE_PAD_TYPE_ANALOGPAD: // scph1150 - analogpar[1] = 0x73; - analogpar[3] = pad->buttonStatus & 0xff; - analogpar[4] = pad->buttonStatus >> 8; - analogpar[5] = pad->rightJoyX; - analogpar[6] = pad->rightJoyY; - analogpar[7] = pad->leftJoyX; - analogpar[8] = pad->leftJoyY; - - memcpy(buf, analogpar, 9); - bufcount = 8; - break; - case PSE_PAD_TYPE_ANALOGJOY: // scph1110 - analogpar[1] = 0x53; - analogpar[3] = pad->buttonStatus & 0xff; - analogpar[4] = pad->buttonStatus >> 8; - analogpar[5] = pad->rightJoyX; - analogpar[6] = pad->rightJoyY; - analogpar[7] = pad->leftJoyX; - analogpar[8] = pad->leftJoyY; - - memcpy(buf, analogpar, 9); - bufcount = 8; - break; - case PSE_PAD_TYPE_STANDARD: - default: - stdpar[3] = pad->buttonStatus & 0xff; - stdpar[4] = pad->buttonStatus >> 8; - - memcpy(buf, stdpar, 5); - bufcount = 4; - } - - return buf[bufc++]; -} - -unsigned char _PADpoll(unsigned char value) { - if (bufc > bufcount) return 0; - return buf[bufc++]; -} - -unsigned char CALLBACK PAD1__startPoll(int pad) { - PadDataS padd; - - PAD1_readPort1(&padd); - - return _PADstartPoll(&padd); -} - -unsigned char CALLBACK PAD1__poll(unsigned char value) { - return _PADpoll(value); -} - -long CALLBACK PAD1__configure(void) { return 0; } -void CALLBACK PAD1__about(void) {} -long CALLBACK PAD1__test(void) { return 0; } -long CALLBACK PAD1__query(void) { return 3; } -long CALLBACK PAD1__keypressed() { return 0; } - -#define LoadPad1Sym1(dest, name) \ - LoadSym(PAD1_##dest, PAD##dest, name, TRUE); - -#define LoadPad1SymN(dest, name) \ - LoadSym(PAD1_##dest, PAD##dest, name, FALSE); - -#define LoadPad1Sym0(dest, name) \ - LoadSym(PAD1_##dest, PAD##dest, name, FALSE); \ - if (PAD1_##dest == NULL) PAD1_##dest = (PAD##dest) PAD1__##dest; - -static int LoadPAD1plugin(const char *PAD1dll) { - void *drv; - - hPAD1Driver = SysLoadLibrary(PAD1dll); - if (hPAD1Driver == NULL) { - PAD1_configure = NULL; - SysMessage (_("Could not load Controller 1 plugin %s!"), PAD1dll); return -1; - } - drv = hPAD1Driver; - LoadPad1Sym1(init, "PADinit"); - LoadPad1Sym1(shutdown, "PADshutdown"); - LoadPad1Sym1(open, "PADopen"); - LoadPad1Sym1(close, "PADclose"); - LoadPad1Sym0(query, "PADquery"); - LoadPad1Sym1(readPort1, "PADreadPort1"); - LoadPad1Sym0(configure, "PADconfigure"); - LoadPad1Sym0(test, "PADtest"); - LoadPad1Sym0(about, "PADabout"); - LoadPad1Sym0(keypressed, "PADkeypressed"); - LoadPad1Sym0(startPoll, "PADstartPoll"); - LoadPad1Sym0(poll, "PADpoll"); - LoadPad1SymN(setSensitive, "PADsetSensitive"); - - return 0; -} - -unsigned char CALLBACK PAD2__startPoll(int pad) { - PadDataS padd; - - PAD2_readPort2(&padd); - - return _PADstartPoll(&padd); -} - -unsigned char CALLBACK PAD2__poll(unsigned char value) { - return _PADpoll(value); -} - -long CALLBACK PAD2__configure(void) { return 0; } -void CALLBACK PAD2__about(void) {} -long CALLBACK PAD2__test(void) { return 0; } -long CALLBACK PAD2__query(void) { return PSE_PAD_USE_PORT1 | PSE_PAD_USE_PORT2; } -long CALLBACK PAD2__keypressed() { return 0; } - -#define LoadPad2Sym1(dest, name) \ - LoadSym(PAD2_##dest, PAD##dest, name, TRUE); - -#define LoadPad2Sym0(dest, name) \ - LoadSym(PAD2_##dest, PAD##dest, name, FALSE); \ - if (PAD2_##dest == NULL) PAD2_##dest = (PAD##dest) PAD2__##dest; - -#define LoadPad2SymN(dest, name) \ - LoadSym(PAD2_##dest, PAD##dest, name, FALSE); - -static int LoadPAD2plugin(const char *PAD2dll) { - void *drv; - - hPAD2Driver = SysLoadLibrary(PAD2dll); - if (hPAD2Driver == NULL) { - PAD2_configure = NULL; - SysMessage (_("Could not load Controller 2 plugin %s!"), PAD2dll); return -1; - } - drv = hPAD2Driver; - LoadPad2Sym1(init, "PADinit"); - LoadPad2Sym1(shutdown, "PADshutdown"); - LoadPad2Sym1(open, "PADopen"); - LoadPad2Sym1(close, "PADclose"); - LoadPad2Sym0(query, "PADquery"); - LoadPad2Sym1(readPort2, "PADreadPort2"); - LoadPad2Sym0(configure, "PADconfigure"); - LoadPad2Sym0(test, "PADtest"); - LoadPad2Sym0(about, "PADabout"); - LoadPad2Sym0(keypressed, "PADkeypressed"); - LoadPad2Sym0(startPoll, "PADstartPoll"); - LoadPad2Sym0(poll, "PADpoll"); - LoadPad2SymN(setSensitive, "PADsetSensitive"); - - return 0; -} - -void *hNETDriver = NULL; - -void CALLBACK NET__setInfo(netInfo *info) {} -void CALLBACK NET__keypressed(int key) {} -long CALLBACK NET__configure(void) { return 0; } -long CALLBACK NET__test(void) { return 0; } -void CALLBACK NET__about(void) {} - -#define LoadNetSym1(dest, name) \ - LoadSym(NET_##dest, NET##dest, name, TRUE); - -#define LoadNetSymN(dest, name) \ - LoadSym(NET_##dest, NET##dest, name, FALSE); - -#define LoadNetSym0(dest, name) \ - LoadSym(NET_##dest, NET##dest, name, FALSE); \ - if (NET_##dest == NULL) NET_##dest = (NET##dest) NET__##dest; - -static int LoadNETplugin(const char *NETdll) { - void *drv; - - hNETDriver = SysLoadLibrary(NETdll); - if (hNETDriver == NULL) { - SysMessage (_("Could not load NetPlay plugin %s!"), NETdll); return -1; - } - drv = hNETDriver; - LoadNetSym1(init, "NETinit"); - LoadNetSym1(shutdown, "NETshutdown"); - LoadNetSym1(open, "NETopen"); - LoadNetSym1(close, "NETclose"); - LoadNetSymN(sendData, "NETsendData"); - LoadNetSymN(recvData, "NETrecvData"); - LoadNetSym1(sendPadData, "NETsendPadData"); - LoadNetSym1(recvPadData, "NETrecvPadData"); - LoadNetSym1(queryPlayer, "NETqueryPlayer"); - LoadNetSym1(pause, "NETpause"); - LoadNetSym1(resume, "NETresume"); - LoadNetSym0(setInfo, "NETsetInfo"); - LoadNetSym0(keypressed, "NETkeypressed"); - LoadNetSym0(configure, "NETconfigure"); - LoadNetSym0(test, "NETtest"); - LoadNetSym0(about, "NETabout"); - - return 0; -} - -#ifdef ENABLE_SIO1API - -void *hSIO1Driver = NULL; - -long CALLBACK SIO1__init(void) { return 0; } -long CALLBACK SIO1__shutdown(void) { return 0; } -long CALLBACK SIO1__open(void) { return 0; } -long CALLBACK SIO1__close(void) { return 0; } -long CALLBACK SIO1__configure(void) { return 0; } -long CALLBACK SIO1__test(void) { return 0; } -void CALLBACK SIO1__about(void) {} -void CALLBACK SIO1__pause(void) {} -void CALLBACK SIO1__resume(void) {} -long CALLBACK SIO1__keypressed(int key) { return 0; } -void CALLBACK SIO1__writeData8(unsigned char val) {} -void CALLBACK SIO1__writeData16(unsigned short val) {} -void CALLBACK SIO1__writeData32(unsigned long val) {} -void CALLBACK SIO1__writeStat16(unsigned short val) {} -void CALLBACK SIO1__writeStat32(unsigned long val) {} -void CALLBACK SIO1__writeMode16(unsigned short val) {} -void CALLBACK SIO1__writeMode32(unsigned long val) {} -void CALLBACK SIO1__writeCtrl16(unsigned short val) {} -void CALLBACK SIO1__writeCtrl32(unsigned long val) {} -void CALLBACK SIO1__writeBaud16(unsigned short val) {} -void CALLBACK SIO1__writeBaud32(unsigned long val) {} -unsigned char CALLBACK SIO1__readData8(void) { return 0; } -unsigned short CALLBACK SIO1__readData16(void) { return 0; } -unsigned long CALLBACK SIO1__readData32(void) { return 0; } -unsigned short CALLBACK SIO1__readStat16(void) { return 0; } -unsigned long CALLBACK SIO1__readStat32(void) { return 0; } -unsigned short CALLBACK SIO1__readMode16(void) { return 0; } -unsigned long CALLBACK SIO1__readMode32(void) { return 0; } -unsigned short CALLBACK SIO1__readCtrl16(void) { return 0; } -unsigned long CALLBACK SIO1__readCtrl32(void) { return 0; } -unsigned short CALLBACK SIO1__readBaud16(void) { return 0; } -unsigned long CALLBACK SIO1__readBaud32(void) { return 0; } -void CALLBACK SIO1__registerCallback(void (CALLBACK *callback)(void)) {}; - -void CALLBACK SIO1irq(void) { - psxHu32ref(0x1070) |= SWAPu32(0x100); -} - -#define LoadSio1Sym1(dest, name) \ - LoadSym(SIO1_##dest, SIO1##dest, name, TRUE); - -#define LoadSio1SymN(dest, name) \ - LoadSym(SIO1_##dest, SIO1##dest, name, FALSE); - -#define LoadSio1Sym0(dest, name) \ - LoadSym(SIO1_##dest, SIO1##dest, name, FALSE); \ - if (SIO1_##dest == NULL) SIO1_##dest = (SIO1##dest) SIO1__##dest; - -static int LoadSIO1plugin(const char *SIO1dll) { - void *drv; - - hSIO1Driver = SysLoadLibrary(SIO1dll); - if (hSIO1Driver == NULL) { - SysMessage (_("Could not load SIO1 plugin %s!"), SIO1dll); return -1; - } - drv = hSIO1Driver; - - LoadSio1Sym0(init, "SIO1init"); - LoadSio1Sym0(shutdown, "SIO1shutdown"); - LoadSio1Sym0(open, "SIO1open"); - LoadSio1Sym0(close, "SIO1close"); - LoadSio1Sym0(pause, "SIO1pause"); - LoadSio1Sym0(resume, "SIO1resume"); - LoadSio1Sym0(keypressed, "SIO1keypressed"); - LoadSio1Sym0(configure, "SIO1configure"); - LoadSio1Sym0(test, "SIO1test"); - LoadSio1Sym0(about, "SIO1about"); - LoadSio1Sym0(writeData8, "SIO1writeData8"); - LoadSio1Sym0(writeData16, "SIO1writeData16"); - LoadSio1Sym0(writeData32, "SIO1writeData32"); - LoadSio1Sym0(writeStat16, "SIO1writeStat16"); - LoadSio1Sym0(writeStat32, "SIO1writeStat32"); - LoadSio1Sym0(writeMode16, "SIO1writeMode16"); - LoadSio1Sym0(writeMode32, "SIO1writeMode32"); - LoadSio1Sym0(writeCtrl16, "SIO1writeCtrl16"); - LoadSio1Sym0(writeCtrl32, "SIO1writeCtrl32"); - LoadSio1Sym0(writeBaud16, "SIO1writeBaud16"); - LoadSio1Sym0(writeBaud32, "SIO1writeBaud32"); - LoadSio1Sym0(readData16, "SIO1readData16"); - LoadSio1Sym0(readData32, "SIO1readData32"); - LoadSio1Sym0(readStat16, "SIO1readStat16"); - LoadSio1Sym0(readStat32, "SIO1readStat32"); - LoadSio1Sym0(readMode16, "SIO1readMode16"); - LoadSio1Sym0(readMode32, "SIO1readMode32"); - LoadSio1Sym0(readCtrl16, "SIO1readCtrl16"); - LoadSio1Sym0(readCtrl32, "SIO1readCtrl32"); - LoadSio1Sym0(readBaud16, "SIO1readBaud16"); - LoadSio1Sym0(readBaud32, "SIO1readBaud32"); - LoadSio1Sym0(registerCallback, "SIO1registerCallback"); - - return 0; -} - -#endif - -int LoadPlugins() { - int ret; - char Plugin[MAXPATHLEN * 2]; - - ReleasePlugins(); - SysLibError(); - - if (UsingIso()) { - LoadCDRplugin(NULL); - } else { - sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Cdr); - if (LoadCDRplugin(Plugin) == -1) return -1; - } - - sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Gpu); - if (LoadGPUplugin(Plugin) == -1) return -1; - - sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Spu); - if (LoadSPUplugin(Plugin) == -1) return -1; - - sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Pad1); - if (LoadPAD1plugin(Plugin) == -1) return -1; - - sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Pad2); - if (LoadPAD2plugin(Plugin) == -1) return -1; - - if (strcmp("Disabled", Config.Net) == 0 || strcmp("", Config.Net) == 0) - Config.UseNet = FALSE; - else { - Config.UseNet = TRUE; - sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Net); - if (LoadNETplugin(Plugin) == -1) Config.UseNet = FALSE; - } - -#ifdef ENABLE_SIO1API - sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Sio1); - if (LoadSIO1plugin(Plugin) == -1) return -1; -#endif - - ret = CDR_init(); - if (ret < 0) { SysMessage (_("Error initializing CD-ROM plugin: %d"), ret); return -1; } - ret = GPU_init(); - if (ret < 0) { SysMessage (_("Error initializing GPU plugin: %d"), ret); return -1; } - ret = SPU_init(); - if (ret < 0) { SysMessage (_("Error initializing SPU plugin: %d"), ret); return -1; } - ret = PAD1_init(1); - if (ret < 0) { SysMessage (_("Error initializing Controller 1 plugin: %d"), ret); return -1; } - ret = PAD2_init(2); - if (ret < 0) { SysMessage (_("Error initializing Controller 2 plugin: %d"), ret); return -1; } - - if (Config.UseNet) { - ret = NET_init(); - if (ret < 0) { SysMessage (_("Error initializing NetPlay plugin: %d"), ret); return -1; } - } - -#ifdef ENABLE_SIO1API - ret = SIO1_init(); - if (ret < 0) { SysMessage (_("Error initializing SIO1 plugin: %d"), ret); return -1; } -#endif - - SysPrintf(_("Plugins loaded.\n")); - return 0; -} - -void ReleasePlugins() { - if (Config.UseNet) { - int ret = NET_close(); - if (ret < 0) Config.UseNet = FALSE; - } - NetOpened = FALSE; - - if (hCDRDriver != NULL || cdrIsoActive()) CDR_shutdown(); - if (hGPUDriver != NULL) GPU_shutdown(); - if (hSPUDriver != NULL) SPU_shutdown(); - if (hPAD1Driver != NULL) PAD1_shutdown(); - if (hPAD2Driver != NULL) PAD2_shutdown(); - - if (Config.UseNet && hNETDriver != NULL) NET_shutdown(); - - if (hCDRDriver != NULL) { SysCloseLibrary(hCDRDriver); hCDRDriver = NULL; } - if (hGPUDriver != NULL) { SysCloseLibrary(hGPUDriver); hGPUDriver = NULL; } - if (hSPUDriver != NULL) { SysCloseLibrary(hSPUDriver); hSPUDriver = NULL; } - if (hPAD1Driver != NULL) { SysCloseLibrary(hPAD1Driver); hPAD1Driver = NULL; } - if (hPAD2Driver != NULL) { SysCloseLibrary(hPAD2Driver); hPAD2Driver = NULL; } - - if (Config.UseNet && hNETDriver != NULL) { - SysCloseLibrary(hNETDriver); hNETDriver = NULL; - } - -#ifdef ENABLE_SIO1API - if (hSIO1Driver != NULL) { - SIO1_shutdown(); - SysCloseLibrary(hSIO1Driver); - hSIO1Driver = NULL; - } -#endif -} - -// for CD swap -int ReloadCdromPlugin() -{ - if (hCDRDriver != NULL || cdrIsoActive()) CDR_shutdown(); - if (hCDRDriver != NULL) { SysCloseLibrary(hCDRDriver); hCDRDriver = NULL; } - - if (UsingIso()) { - LoadCDRplugin(NULL); - } else { - char Plugin[MAXPATHLEN * 2]; - sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Cdr); - if (LoadCDRplugin(Plugin) == -1) return -1; - } - - return CDR_init(); -} - -void SetIsoFile(const char *filename) { - if (filename == NULL) { - IsoFile[0] = '\0'; - return; - } - strncpy(IsoFile, filename, MAXPATHLEN - 1); -} - -const char *GetIsoFile(void) { - return IsoFile; -} - -boolean UsingIso(void) { - return (IsoFile[0] != '\0'); -} - -void SetCdOpenCaseTime(s64 time) { - cdOpenCaseTime = time; -} + +#define LoadSpuSym1(dest, name) \ + LoadSym(SPU_##dest, SPU##dest, name, TRUE); + +#define LoadSpuSym0(dest, name) \ + LoadSym(SPU_##dest, SPU##dest, name, FALSE); \ + if (SPU_##dest == NULL) SPU_##dest = (SPU##dest) SPU__##dest; + +#define LoadSpuSymN(dest, name) \ + LoadSym(SPU_##dest, SPU##dest, name, FALSE); + +static int LoadSPUplugin(const char *SPUdll) { + void *drv; + + hSPUDriver = SysLoadLibrary(SPUdll); + if (hSPUDriver == NULL) { + SysMessage (_("Could not load SPU plugin %s!"), SPUdll); return -1; + } + drv = hSPUDriver; + LoadSpuSym1(init, "SPUinit"); + LoadSpuSym1(shutdown, "SPUshutdown"); + LoadSpuSym1(open, "SPUopen"); + LoadSpuSym1(close, "SPUclose"); + LoadSpuSym1(writeRegister, "SPUwriteRegister"); + LoadSpuSym1(readRegister, "SPUreadRegister"); + LoadSpuSym1(writeDMAMem, "SPUwriteDMAMem"); + LoadSpuSym1(readDMAMem, "SPUreadDMAMem"); + LoadSpuSym1(playADPCMchannel, "SPUplayADPCMchannel"); + LoadSpuSym1(freeze, "SPUfreeze"); + LoadSpuSym1(registerCallback, "SPUregisterCallback"); + LoadSpuSym0(registerScheduleCb, "SPUregisterScheduleCb"); + LoadSpuSymN(async, "SPUasync"); + LoadSpuSymN(playCDDAchannel, "SPUplayCDDAchannel"); + + return 0; +} + +extern int in_type[8]; + +void *hPAD1Driver = NULL; +void *hPAD2Driver = NULL; + +static int multitap1; +static int multitap2; +//Pad information, keystate, mode, config mode, vibration +static PadDataS pad[8]; + +static int reqPos, respSize; +static int ledStateReq44[8]; +static int PadMode[8]; /* 0 : digital 1: analog */ + +static unsigned char buf[256]; +static unsigned char bufMulti[34] = { 0x80, 0x5a, + 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +unsigned char stdpar[8] = { 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +unsigned char multitappar[34] = { 0x80, 0x5a, + 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +//response for request 44, 45, 46, 47, 4C, 4D +static unsigned char resp45[8] = {0xF3, 0x5A, 0x01, 0x02, 0x00, 0x02, 0x01, 0x00}; +static unsigned char resp46_00[8] = {0xF3, 0x5A, 0x00, 0x00, 0x01, 0x02, 0x00, 0x0A}; +static unsigned char resp46_01[8] = {0xF3, 0x5A, 0x00, 0x00, 0x01, 0x01, 0x01, 0x14}; +static unsigned char resp47[8] = {0xF3, 0x5A, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00}; +static unsigned char resp4C_00[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00}; +static unsigned char resp4C_01[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00}; +static unsigned char resp4D[8] = {0xF3, 0x5A, 0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF}; + +//fixed reponse of request number 41, 48, 49, 4A, 4B, 4E, 4F +static unsigned char resp40[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static unsigned char resp41[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static unsigned char resp43[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static unsigned char resp44[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static unsigned char resp49[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static unsigned char resp4A[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static unsigned char resp4B[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static unsigned char resp4E[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static unsigned char resp4F[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + +// Resquest of psx core +enum { + // REQUEST + // first call of this request for the pad, the pad is configured as an digital pad. + // 0x0X, 0x42, 0x0Y, 0xZZ, 0xAA, 0x00, 0x00, 0x00, 0x00 + // X pad number (used for the multitap, first request response 0x00, 0x80, 0x5A, (8 bytes pad A), (8 bytes pad B), (8 bytes pad C), (8 bytes pad D) + // Y if 1 : psx request the full length response for the multitap, 3 bytes header and 4 block of 8 bytes per pad + // Y if 0 : psx request a pad key state + // ZZ rumble small motor 00-> OFF, 01 -> ON + // AA rumble large motor speed 0x00 -> 0xFF + // RESPONSE + // header 3 Bytes + // 0x00 + // PadId -> 0x41 for digital pas, 0x73 for analog pad + // 0x5A mode has not change (no press on analog button on the center of pad), 0x00 the analog button have been pressed and the mode switch + // 6 Bytes for keystates + CMD_READ_DATA_AND_VIBRATE = 0x42, + + // REQUEST + // Header + // 0x0N, 0x43, 0x00, XX, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + // XX = 00 -> Normal mode : Seconde bytes of response = padId + // XX = 01 -> Configuration mode : Seconde bytes of response = 0xF3 + // RESPONSE + // enter in config mode example : + // req : 01 43 00 01 00 00 00 00 00 00 + // res : 00 41 5A buttons state, analog states + // exit config mode : + // req : 01 43 00 00 00 00 00 00 00 00 + // res : 00 F3 5A buttons state, analog states + CMD_CONFIG_MODE = 0x43, + + // Set led State + // REQUEST + // 0x0N, 0x44, 0x00, VAL, SEL, 0x00, 0x00, 0x00, 0x00 + // If sel = 2 then + // VAL = 00 -> OFF + // VAL = 01 -> ON + // RESPONSE + // 0x00, 0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + CMD_SET_MODE_AND_LOCK = 0x44, + + // Get Analog Led state + // REQUEST + // 0x0N, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + // RESPONSE + // 0x00, 0xF3, 0x5A, 0x01, 0x02, VAL, 0x02, 0x01, 0x00 + // VAL = 00 Led OFF + // VAL = 01 Led ON + CMD_QUERY_MODEL_AND_MODE = 0x45, + + //Get Variable A + // REQUEST + // 0x0N, 0x46, 0x00, 0xXX, 0x00, 0x00, 0x00, 0x00, 0x00 + // RESPONSE + // XX=00 + // 0x00, 0xF3, 0x5A, 0x00, 0x00, 0x01, 0x02, 0x00, 0x0A + // XX=01 + // 0x00, 0xF3, 0x5A, 0x00, 0x00, 0x01, 0x01, 0x01, 0x14 + CMD_QUERY_ACT = 0x46, + + // REQUEST + // 0x0N, 0x47, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + // RESPONSE + // 0x00, 0xF3, 0x5A, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00 + CMD_QUERY_COMB = 0x47, + + // REQUEST + // 0x0N, 0x4C, 0x00, 0xXX, 0x00, 0x00, 0x00, 0x00, 0x00 + // RESPONSE + // XX = 0 + // 0x00, 0xF3, 0x5A, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00 + // XX = 1 + // 0x00, 0xF3, 0x5A, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00 + CMD_QUERY_MODE = 0x4C, + + // REQUEST + // 0x0N, 0x4D, 0x00, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF + // RESPONSE + // 0x00, 0xF3, 0x5A, old value or + // AA = 01 unlock large motor (and swap VAL1 and VAL2) + // BB = 01 unlock large motor (default) + // CC, DD, EE, FF = all FF -> unlock small motor + // + // default repsonse for analog pad with 2 motor : 0x00 0xF3 0x5A 0x00 0x01 0xFF 0xFF 0xFF 0xFF + // + CMD_VIBRATION_TOGGLE = 0x4D, + REQ40 = 0x40, + REQ41 = 0x41, + REQ49 = 0x49, + REQ4A = 0x4A, + REQ4B = 0x4B, + REQ4E = 0x4E, + REQ4F = 0x4F +}; + + + + +//NO MULTITAP + +void initBufForRequest(int padIndex, char value){ + switch (value){ + //Pad keystate already in buffer + //case CMD_READ_DATA_AND_VIBRATE : + // break; + case CMD_CONFIG_MODE : + if (pad[padIndex].configMode == 1) { + memcpy(buf, resp43, 8); + break; + } + //else, not in config mode, pad keystate return (already in the buffer) + break; + case CMD_SET_MODE_AND_LOCK : + memcpy(buf, resp44, 8); + break; + case CMD_QUERY_MODEL_AND_MODE : + memcpy(buf, resp45, 8); + buf[4] = PadMode[padIndex]; + break; + case CMD_QUERY_ACT : + memcpy(buf, resp46_00, 8); + break; + case CMD_QUERY_COMB : + memcpy(buf, resp47, 8); + break; + case CMD_QUERY_MODE : + memcpy(buf, resp4C_00, 8); + break; + case CMD_VIBRATION_TOGGLE : + memcpy(buf, resp4D, 8); + break; + case REQ40 : + memcpy(buf, resp40, 8); + break; + case REQ41 : + memcpy(buf, resp41, 8); + break; + case REQ49 : + memcpy(buf, resp49, 8); + break; + case REQ4A : + memcpy(buf, resp4A, 8); + break; + case REQ4B : + memcpy(buf, resp4B, 8); + break; + case REQ4E : + memcpy(buf, resp4E, 8); + break; + case REQ4F : + memcpy(buf, resp4F, 8); + break; + } +} + + + + +static void reqIndex2Treatment(int padIndex, char value) { + switch (pad[padIndex].txData[0]) { + case CMD_CONFIG_MODE : + //0x43 + if (value == 0) { + pad[padIndex].configMode = 0; + } else { + pad[padIndex].configMode = 1; + } + break; + case CMD_SET_MODE_AND_LOCK : + //0x44 store the led state for change mode if the next value = 0x02 + //0x01 analog ON + //0x00 analog OFF + ledStateReq44[padIndex] = value; + PadMode[padIndex] = value; + break; + case CMD_QUERY_ACT : + //0x46 + if (value == 1) { + memcpy(buf, resp46_01, 8); + } + break; + case CMD_QUERY_MODE : + if (value == 1) { + memcpy(buf, resp4C_01, 8); + } + break; + case CMD_VIBRATION_TOGGLE : + //0x4D + memcpy(buf, resp4D, 8); + break; + case CMD_READ_DATA_AND_VIBRATE: + //mem the vibration value for small motor; + pad[padIndex].Vib[0] = value; + break; + } +} + +void vibrate(int padIndex){ + if (pad[padIndex].Vib[0] != pad[padIndex].VibF[0] || pad[padIndex].Vib[1] != pad[padIndex].VibF[1]) { + //value is different update Value and call libretro for vibration + pad[padIndex].VibF[0] = pad[padIndex].Vib[0]; + pad[padIndex].VibF[1] = pad[padIndex].Vib[1]; + plat_trigger_vibrate(padIndex, pad[padIndex].VibF[0], pad[padIndex].VibF[1]); + //printf("vibration pad %i", padIndex); + } +} + + + + +//Build response for 0x42 request Pad in port +void _PADstartPoll(PadDataS *pad) { + switch (pad->controllerType) { + case PSE_PAD_TYPE_MOUSE: + stdpar[0] = 0x12; + stdpar[1] = 0x5a; + stdpar[2] = pad->buttonStatus & 0xff; + stdpar[3] = pad->buttonStatus >> 8; + stdpar[4] = pad->moveX; + stdpar[5] = pad->moveY; + memcpy(buf, stdpar, 6); + respSize = 6; + break; + case PSE_PAD_TYPE_NEGCON: // npc101/npc104(slph00001/slph00069) + stdpar[0] = 0x23; + stdpar[1] = 0x5a; + stdpar[2] = pad->buttonStatus & 0xff; + stdpar[3] = pad->buttonStatus >> 8; + stdpar[4] = pad->rightJoyX; + stdpar[5] = pad->rightJoyY; + stdpar[6] = pad->leftJoyX; + stdpar[7] = pad->leftJoyY; + memcpy(buf, stdpar, 8); + respSize = 8; + break; + case PSE_PAD_TYPE_GUNCON: // GUNCON - gun controller SLPH-00034 from Namco + stdpar[0] = 0x63; + stdpar[1] = 0x5a; + stdpar[2] = pad->buttonStatus & 0xff; + stdpar[3] = pad->buttonStatus >> 8; + + //This code assumes an X resolution of 256 and a Y resolution of 240 + int xres = 256; + int yres = 240; + + //The code wants an input range for x and y of 0-1023 we passed in -32767 -> 32767 + int absX = (pad->absoluteX / 64) + 512; + int absY = (pad->absoluteY / 64) + 512; + + if (absX == 65536 || absY == 65536) { + stdpar[4] = 0x01; + stdpar[5] = 0x00; + stdpar[6] = 0x0A; + stdpar[7] = 0x00; + } + else { + stdpar[4] = 0x5a - (xres - 256) / 3 + (((xres - 256) / 3 + 356) * absX >> 10); + stdpar[5] = (0x5a - (xres - 256) / 3 + (((xres - 256) / 3 + 356) * absX >> 10)) >> 8; + stdpar[6] = 0x20 + (yres * absY >> 10); + stdpar[7] = (0x20 + (yres * absY >> 10)) >> 8; + } + + memcpy(buf, stdpar, 8); + respSize = 8; + break; + case PSE_PAD_TYPE_GUN: // GUN CONTROLLER - gun controller SLPH-00014 from Konami + stdpar[0] = 0x31; + stdpar[1] = 0x5a; + stdpar[2] = pad->buttonStatus & 0xff; + stdpar[3] = pad->buttonStatus >> 8; + memcpy(buf, stdpar, 4); + respSize = 4; + break; + case PSE_PAD_TYPE_ANALOGPAD: // scph1150 + stdpar[0] = 0x73; + stdpar[1] = 0x5a; + stdpar[2] = pad->buttonStatus & 0xff; + stdpar[3] = pad->buttonStatus >> 8; + stdpar[4] = pad->rightJoyX; + stdpar[5] = pad->rightJoyY; + stdpar[6] = pad->leftJoyX; + stdpar[7] = pad->leftJoyY; + memcpy(buf, stdpar, 8); + respSize = 8; + break; + case PSE_PAD_TYPE_ANALOGJOY: // scph1110 + stdpar[0] = 0x53; + stdpar[1] = 0x5a; + stdpar[2] = pad->buttonStatus & 0xff; + stdpar[3] = pad->buttonStatus >> 8; + stdpar[4] = pad->rightJoyX; + stdpar[5] = pad->rightJoyY; + stdpar[6] = pad->leftJoyX; + stdpar[7] = pad->leftJoyY; + memcpy(buf, stdpar, 8); + respSize = 8; + break; + case PSE_PAD_TYPE_STANDARD: + stdpar[0] = 0x41; + stdpar[1] = 0x5a; + stdpar[2] = pad->buttonStatus & 0xff; + stdpar[3] = pad->buttonStatus >> 8; + memcpy(buf, stdpar, 4); + respSize = 4; + break; + default: + respSize = 0; + break; + } +} + + +//Build response for 0x42 request Multitap in port +//Response header for multitap : 0x80, 0x5A, (Pad information port 1-2A), (Pad information port 1-2B), (Pad information port 1-2C), (Pad information port 1-2D) +void _PADstartPollMultitap(PadDataS* padd) { + int i, offset; + for(i = 0; i < 4; i++) { + offset = 2 + (i * 8); + _PADstartPoll(&padd[i]); + memcpy(multitappar+offset, stdpar, 8); + } + memcpy(bufMulti, multitappar, 34); + respSize = 34; +} + +static void PADpoll_dualshock(int port, unsigned char value) +{ + switch (reqPos) { + case 0: + initBufForRequest(port, value); + break; + case 2: + reqIndex2Treatment(port, value); + break; + case 3: + if (pad[port].txData[0] == CMD_READ_DATA_AND_VIBRATE) { + // vibration value for the Large motor + pad[port].Vib[1] = value; + + vibrate(port); + } + break; + } +} + +static unsigned char PADpoll_(int port, unsigned char value, int *more_data) { + if (reqPos < sizeof(pad[port].txData)) + pad[port].txData[reqPos] = value; + + if (reqPos == 0 && value != 0x42 && in_type[port] != PSE_PAD_TYPE_ANALOGPAD) + respSize = 1; + + switch (in_type[port]) { + case PSE_PAD_TYPE_ANALOGPAD: + PADpoll_dualshock(port, value); + break; + case PSE_PAD_TYPE_GUN: + if (reqPos == 2) + pl_gun_byte2(port, value); + break; + } + + *more_data = reqPos < respSize - 1; + if (reqPos >= respSize) + return 0xff; // no response/HiZ + + return buf[reqPos++]; +} + +static unsigned char PADpollMultitap(int port, unsigned char value, int *more_data) { + *more_data = reqPos < respSize - 1; + if (reqPos >= respSize) return 0xff; + return bufMulti[reqPos++]; +} + + +// refresh the button state on port 1. +// int pad is not needed. +unsigned char CALLBACK PAD1__startPoll(int pad) { + reqPos = 0; + // first call the pad provide if a multitap is connected between the psx and himself + // just one pad is on port 1 : NO MULTITAP + if (multitap1 == 0) { + PadDataS padd; + padd.requestPadIndex = 0; + PAD1_readPort1(&padd); + _PADstartPoll(&padd); + } else { + // a multitap is plugged : refresh all pad. + int i; + PadDataS padd[4]; + for(i = 0; i < 4; i++) { + padd[i].requestPadIndex = i; + PAD1_readPort1(&padd[i]); + } + _PADstartPollMultitap(padd); + } + //printf("\npad 1 : "); + return 0xff; +} + +unsigned char CALLBACK PAD1__poll(unsigned char value, int *more_data) { + char tmp; + if (multitap1 == 1) { + tmp = PADpollMultitap(0, value, more_data); + } else { + tmp = PADpoll_(0, value, more_data); + } + //printf("%2x:%2x, ",value,tmp); + return tmp; + +} + + +long CALLBACK PAD1__configure(void) { return 0; } +void CALLBACK PAD1__about(void) {} +long CALLBACK PAD1__test(void) { return 0; } +long CALLBACK PAD1__query(void) { return 3; } +long CALLBACK PAD1__keypressed() { return 0; } + +#define LoadPad1Sym1(dest, name) \ + LoadSym(PAD1_##dest, PAD##dest, name, TRUE); + +#define LoadPad1SymN(dest, name) \ + LoadSym(PAD1_##dest, PAD##dest, name, FALSE); + +#define LoadPad1Sym0(dest, name) \ + LoadSym(PAD1_##dest, PAD##dest, name, FALSE); \ + if (PAD1_##dest == NULL) PAD1_##dest = (PAD##dest) PAD1__##dest; + +static int LoadPAD1plugin(const char *PAD1dll) { + PadDataS padd; + void *drv; + + hPAD1Driver = SysLoadLibrary(PAD1dll); + if (hPAD1Driver == NULL) { + PAD1_configure = NULL; + SysMessage (_("Could not load Controller 1 plugin %s!"), PAD1dll); return -1; + } + drv = hPAD1Driver; + LoadPad1Sym1(init, "PADinit"); + LoadPad1Sym1(shutdown, "PADshutdown"); + LoadPad1Sym1(open, "PADopen"); + LoadPad1Sym1(close, "PADclose"); + LoadPad1Sym0(query, "PADquery"); + LoadPad1Sym1(readPort1, "PADreadPort1"); + LoadPad1Sym0(configure, "PADconfigure"); + LoadPad1Sym0(test, "PADtest"); + LoadPad1Sym0(about, "PADabout"); + LoadPad1Sym0(keypressed, "PADkeypressed"); + LoadPad1Sym0(startPoll, "PADstartPoll"); + LoadPad1Sym0(poll, "PADpoll"); + LoadPad1SymN(setSensitive, "PADsetSensitive"); + + padd.requestPadIndex = 0; + PAD1_readPort1(&padd); + multitap1 = padd.portMultitap; + + return 0; +} + +unsigned char CALLBACK PAD2__startPoll(int pad) { + int pad_index; + + reqPos = 0; + if (multitap1 == 0 && (multitap2 == 0 || multitap2 == 2)) { + pad_index = 1; + } else if(multitap1 == 1 && (multitap2 == 0 || multitap2 == 2)) { + pad_index = 4; + } else { + pad_index = 0; + } + + // just one pad is on port 1 : NO MULTITAP + if (multitap2 == 0) { + PadDataS padd; + padd.requestPadIndex = pad_index; + PAD2_readPort2(&padd); + _PADstartPoll(&padd); + } else { + // a multitap is plugged : refresh all pad. + int i; + PadDataS padd[4]; + for(i = 0; i < 4; i++) { + padd[i].requestPadIndex = i+pad_index; + PAD2_readPort2(&padd[i]); + } + _PADstartPollMultitap(padd); + } + //printf("\npad 2 : "); + return 0xff; +} + +unsigned char CALLBACK PAD2__poll(unsigned char value, int *more_data) { + char tmp; + if (multitap2 == 2) { + tmp = PADpollMultitap(1, value, more_data); + } else { + tmp = PADpoll_(1, value, more_data); + } + //printf("%2x:%2x, ",value,tmp); + return tmp; +} + +long CALLBACK PAD2__configure(void) { return 0; } +void CALLBACK PAD2__about(void) {} +long CALLBACK PAD2__test(void) { return 0; } +long CALLBACK PAD2__query(void) { return PSE_PAD_USE_PORT1 | PSE_PAD_USE_PORT2; } +long CALLBACK PAD2__keypressed() { return 0; } + +#define LoadPad2Sym1(dest, name) \ + LoadSym(PAD2_##dest, PAD##dest, name, TRUE); + +#define LoadPad2Sym0(dest, name) \ + LoadSym(PAD2_##dest, PAD##dest, name, FALSE); \ + if (PAD2_##dest == NULL) PAD2_##dest = (PAD##dest) PAD2__##dest; + +#define LoadPad2SymN(dest, name) \ + LoadSym(PAD2_##dest, PAD##dest, name, FALSE); + +static int LoadPAD2plugin(const char *PAD2dll) { + PadDataS padd; + void *drv; + + hPAD2Driver = SysLoadLibrary(PAD2dll); + if (hPAD2Driver == NULL) { + PAD2_configure = NULL; + SysMessage (_("Could not load Controller 2 plugin %s!"), PAD2dll); return -1; + } + drv = hPAD2Driver; + LoadPad2Sym1(init, "PADinit"); + LoadPad2Sym1(shutdown, "PADshutdown"); + LoadPad2Sym1(open, "PADopen"); + LoadPad2Sym1(close, "PADclose"); + LoadPad2Sym0(query, "PADquery"); + LoadPad2Sym1(readPort2, "PADreadPort2"); + LoadPad2Sym0(configure, "PADconfigure"); + LoadPad2Sym0(test, "PADtest"); + LoadPad2Sym0(about, "PADabout"); + LoadPad2Sym0(keypressed, "PADkeypressed"); + LoadPad2Sym0(startPoll, "PADstartPoll"); + LoadPad2Sym0(poll, "PADpoll"); + LoadPad2SymN(setSensitive, "PADsetSensitive"); + + padd.requestPadIndex = 0; + PAD2_readPort2(&padd); + multitap2 = padd.portMultitap; + + return 0; +} + +void *hNETDriver = NULL; + +void CALLBACK NET__setInfo(netInfo *info) {} +void CALLBACK NET__keypressed(int key) {} +long CALLBACK NET__configure(void) { return 0; } +long CALLBACK NET__test(void) { return 0; } +void CALLBACK NET__about(void) {} + +#define LoadNetSym1(dest, name) \ + LoadSym(NET_##dest, NET##dest, name, TRUE); + +#define LoadNetSymN(dest, name) \ + LoadSym(NET_##dest, NET##dest, name, FALSE); + +#define LoadNetSym0(dest, name) \ + LoadSym(NET_##dest, NET##dest, name, FALSE); \ + if (NET_##dest == NULL) NET_##dest = (NET##dest) NET__##dest; + +static int LoadNETplugin(const char *NETdll) { + void *drv; + + hNETDriver = SysLoadLibrary(NETdll); + if (hNETDriver == NULL) { + SysMessage (_("Could not load NetPlay plugin %s!"), NETdll); return -1; + } + drv = hNETDriver; + LoadNetSym1(init, "NETinit"); + LoadNetSym1(shutdown, "NETshutdown"); + LoadNetSym1(open, "NETopen"); + LoadNetSym1(close, "NETclose"); + LoadNetSymN(sendData, "NETsendData"); + LoadNetSymN(recvData, "NETrecvData"); + LoadNetSym1(sendPadData, "NETsendPadData"); + LoadNetSym1(recvPadData, "NETrecvPadData"); + LoadNetSym1(queryPlayer, "NETqueryPlayer"); + LoadNetSym1(pause, "NETpause"); + LoadNetSym1(resume, "NETresume"); + LoadNetSym0(setInfo, "NETsetInfo"); + LoadNetSym0(keypressed, "NETkeypressed"); + LoadNetSym0(configure, "NETconfigure"); + LoadNetSym0(test, "NETtest"); + LoadNetSym0(about, "NETabout"); + + return 0; +} + +#ifdef ENABLE_SIO1API + +void *hSIO1Driver = NULL; + +long CALLBACK SIO1__init(void) { return 0; } +long CALLBACK SIO1__shutdown(void) { return 0; } +long CALLBACK SIO1__open(void) { return 0; } +long CALLBACK SIO1__close(void) { return 0; } +long CALLBACK SIO1__configure(void) { return 0; } +long CALLBACK SIO1__test(void) { return 0; } +void CALLBACK SIO1__about(void) {} +void CALLBACK SIO1__pause(void) {} +void CALLBACK SIO1__resume(void) {} +long CALLBACK SIO1__keypressed(int key) { return 0; } +void CALLBACK SIO1__writeData8(unsigned char val) {} +void CALLBACK SIO1__writeData16(unsigned short val) {} +void CALLBACK SIO1__writeData32(unsigned long val) {} +void CALLBACK SIO1__writeStat16(unsigned short val) {} +void CALLBACK SIO1__writeStat32(unsigned long val) {} +void CALLBACK SIO1__writeMode16(unsigned short val) {} +void CALLBACK SIO1__writeMode32(unsigned long val) {} +void CALLBACK SIO1__writeCtrl16(unsigned short val) {} +void CALLBACK SIO1__writeCtrl32(unsigned long val) {} +void CALLBACK SIO1__writeBaud16(unsigned short val) {} +void CALLBACK SIO1__writeBaud32(unsigned long val) {} +unsigned char CALLBACK SIO1__readData8(void) { return 0; } +unsigned short CALLBACK SIO1__readData16(void) { return 0; } +unsigned long CALLBACK SIO1__readData32(void) { return 0; } +unsigned short CALLBACK SIO1__readStat16(void) { return 0; } +unsigned long CALLBACK SIO1__readStat32(void) { return 0; } +unsigned short CALLBACK SIO1__readMode16(void) { return 0; } +unsigned long CALLBACK SIO1__readMode32(void) { return 0; } +unsigned short CALLBACK SIO1__readCtrl16(void) { return 0; } +unsigned long CALLBACK SIO1__readCtrl32(void) { return 0; } +unsigned short CALLBACK SIO1__readBaud16(void) { return 0; } +unsigned long CALLBACK SIO1__readBaud32(void) { return 0; } +void CALLBACK SIO1__registerCallback(void (CALLBACK *callback)(void)) {}; + +void CALLBACK SIO1irq(void) { + psxHu32ref(0x1070) |= SWAPu32(0x100); +} + +#define LoadSio1Sym1(dest, name) \ + LoadSym(SIO1_##dest, SIO1##dest, name, TRUE); + +#define LoadSio1SymN(dest, name) \ + LoadSym(SIO1_##dest, SIO1##dest, name, FALSE); + +#define LoadSio1Sym0(dest, name) \ + LoadSym(SIO1_##dest, SIO1##dest, name, FALSE); \ + if (SIO1_##dest == NULL) SIO1_##dest = (SIO1##dest) SIO1__##dest; + +static int LoadSIO1plugin(const char *SIO1dll) { + void *drv; + + hSIO1Driver = SysLoadLibrary(SIO1dll); + if (hSIO1Driver == NULL) { + SysMessage (_("Could not load SIO1 plugin %s!"), SIO1dll); return -1; + } + drv = hSIO1Driver; + + LoadSio1Sym0(init, "SIO1init"); + LoadSio1Sym0(shutdown, "SIO1shutdown"); + LoadSio1Sym0(open, "SIO1open"); + LoadSio1Sym0(close, "SIO1close"); + LoadSio1Sym0(pause, "SIO1pause"); + LoadSio1Sym0(resume, "SIO1resume"); + LoadSio1Sym0(keypressed, "SIO1keypressed"); + LoadSio1Sym0(configure, "SIO1configure"); + LoadSio1Sym0(test, "SIO1test"); + LoadSio1Sym0(about, "SIO1about"); + LoadSio1Sym0(writeData8, "SIO1writeData8"); + LoadSio1Sym0(writeData16, "SIO1writeData16"); + LoadSio1Sym0(writeData32, "SIO1writeData32"); + LoadSio1Sym0(writeStat16, "SIO1writeStat16"); + LoadSio1Sym0(writeStat32, "SIO1writeStat32"); + LoadSio1Sym0(writeMode16, "SIO1writeMode16"); + LoadSio1Sym0(writeMode32, "SIO1writeMode32"); + LoadSio1Sym0(writeCtrl16, "SIO1writeCtrl16"); + LoadSio1Sym0(writeCtrl32, "SIO1writeCtrl32"); + LoadSio1Sym0(writeBaud16, "SIO1writeBaud16"); + LoadSio1Sym0(writeBaud32, "SIO1writeBaud32"); + LoadSio1Sym0(readData16, "SIO1readData16"); + LoadSio1Sym0(readData32, "SIO1readData32"); + LoadSio1Sym0(readStat16, "SIO1readStat16"); + LoadSio1Sym0(readStat32, "SIO1readStat32"); + LoadSio1Sym0(readMode16, "SIO1readMode16"); + LoadSio1Sym0(readMode32, "SIO1readMode32"); + LoadSio1Sym0(readCtrl16, "SIO1readCtrl16"); + LoadSio1Sym0(readCtrl32, "SIO1readCtrl32"); + LoadSio1Sym0(readBaud16, "SIO1readBaud16"); + LoadSio1Sym0(readBaud32, "SIO1readBaud32"); + LoadSio1Sym0(registerCallback, "SIO1registerCallback"); + + return 0; +} + +#endif + +int LoadPlugins() { + int ret; + char Plugin[MAXPATHLEN * 2]; + + ReleasePlugins(); + SysLibError(); + + if (UsingIso()) { + LoadCDRplugin(NULL); + } else { + sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Cdr); + if (LoadCDRplugin(Plugin) == -1) return -1; + } + + sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Gpu); + if (LoadGPUplugin(Plugin) == -1) return -1; + + sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Spu); + if (LoadSPUplugin(Plugin) == -1) return -1; + + sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Pad1); + if (LoadPAD1plugin(Plugin) == -1) return -1; + + sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Pad2); + if (LoadPAD2plugin(Plugin) == -1) return -1; + + if (strcmp("Disabled", Config.Net) == 0 || strcmp("", Config.Net) == 0) + Config.UseNet = FALSE; + else { + Config.UseNet = TRUE; + sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Net); + if (LoadNETplugin(Plugin) == -1) Config.UseNet = FALSE; + } + +#ifdef ENABLE_SIO1API + sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Sio1); + if (LoadSIO1plugin(Plugin) == -1) return -1; +#endif + + ret = CDR_init(); + if (ret < 0) { SysMessage (_("Error initializing CD-ROM plugin: %d"), ret); return -1; } + ret = GPU_init(); + if (ret < 0) { SysMessage (_("Error initializing GPU plugin: %d"), ret); return -1; } + ret = SPU_init(); + if (ret < 0) { SysMessage (_("Error initializing SPU plugin: %d"), ret); return -1; } + ret = PAD1_init(1); + if (ret < 0) { SysMessage (_("Error initializing Controller 1 plugin: %d"), ret); return -1; } + ret = PAD2_init(2); + if (ret < 0) { SysMessage (_("Error initializing Controller 2 plugin: %d"), ret); return -1; } + + if (Config.UseNet) { + ret = NET_init(); + if (ret < 0) { SysMessage (_("Error initializing NetPlay plugin: %d"), ret); return -1; } + } + +#ifdef ENABLE_SIO1API + ret = SIO1_init(); + if (ret < 0) { SysMessage (_("Error initializing SIO1 plugin: %d"), ret); return -1; } +#endif + + SysPrintf(_("Plugins loaded.\n")); + return 0; +} + +void ReleasePlugins() { + if (Config.UseNet) { + int ret = NET_close(); + if (ret < 0) Config.UseNet = FALSE; + } + NetOpened = FALSE; + + if (hCDRDriver != NULL || cdrIsoActive()) CDR_shutdown(); + if (hGPUDriver != NULL) GPU_shutdown(); + if (hSPUDriver != NULL) SPU_shutdown(); + if (hPAD1Driver != NULL) PAD1_shutdown(); + if (hPAD2Driver != NULL) PAD2_shutdown(); + + if (Config.UseNet && hNETDriver != NULL) NET_shutdown(); + + if (hCDRDriver != NULL) { SysCloseLibrary(hCDRDriver); hCDRDriver = NULL; } + if (hGPUDriver != NULL) { SysCloseLibrary(hGPUDriver); hGPUDriver = NULL; } + if (hSPUDriver != NULL) { SysCloseLibrary(hSPUDriver); hSPUDriver = NULL; } + if (hPAD1Driver != NULL) { SysCloseLibrary(hPAD1Driver); hPAD1Driver = NULL; } + if (hPAD2Driver != NULL) { SysCloseLibrary(hPAD2Driver); hPAD2Driver = NULL; } + + if (Config.UseNet && hNETDriver != NULL) { + SysCloseLibrary(hNETDriver); hNETDriver = NULL; + } + +#ifdef ENABLE_SIO1API + if (hSIO1Driver != NULL) { + SIO1_shutdown(); + SysCloseLibrary(hSIO1Driver); + hSIO1Driver = NULL; + } +#endif +} + +// for CD swap +int ReloadCdromPlugin() +{ + if (hCDRDriver != NULL || cdrIsoActive()) CDR_shutdown(); + if (hCDRDriver != NULL) { SysCloseLibrary(hCDRDriver); hCDRDriver = NULL; } + + if (UsingIso()) { + LoadCDRplugin(NULL); + } else { + char Plugin[MAXPATHLEN * 2]; + sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Cdr); + if (LoadCDRplugin(Plugin) == -1) return -1; + } + + return CDR_init(); +} + +void SetIsoFile(const char *filename) { + if (filename == NULL) { + IsoFile[0] = '\0'; + return; + } + strncpy(IsoFile, filename, MAXPATHLEN - 1); +} + +const char *GetIsoFile(void) { + return IsoFile; +} + +boolean UsingIso(void) { + return (IsoFile[0] != '\0'); +} + +void SetCdOpenCaseTime(s64 time) { + cdOpenCaseTime = time; +} diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index f74489f3d..c5140cce1 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -216,7 +216,7 @@ typedef long (CALLBACK* PADreadPort1)(PadDataS*); typedef long (CALLBACK* PADreadPort2)(PadDataS*); typedef long (CALLBACK* PADkeypressed)(void); typedef unsigned char (CALLBACK* PADstartPoll)(int); -typedef unsigned char (CALLBACK* PADpoll)(unsigned char); +typedef unsigned char (CALLBACK* PADpoll)(unsigned char, int *); typedef void (CALLBACK* PADsetSensitive)(int); // PAD function pointers @@ -379,6 +379,9 @@ const char *GetIsoFile(void); boolean UsingIso(void); void SetCdOpenCaseTime(s64 time); +extern void pl_gun_byte2(int port, unsigned char byte); +extern void plat_trigger_vibrate(int pad, int low, int high); + #ifdef __cplusplus } #endif diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index c5a439def..88f994f80 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -1,6 +1,6 @@ /*************************************************************************** * Copyright (C) 2019 Ryan Schultz, PCSX-df Team, PCSX team, gameblabla, * - * dmitrysmagin, senquack * + * dmitrysmagin, senquack * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -3774,18 +3774,12 @@ void psxBiosCnfLoaded(u32 tcb_cnt, u32 evcb_cnt, u32 stack) { } #define psxBios_PADpoll(pad) { \ - PAD##pad##_startPoll(pad); \ - pad_buf##pad[0] = 0; \ - pad_buf##pad[1] = PAD##pad##_poll(0x42); \ - if (!(pad_buf##pad[1] & 0x0f)) { \ - bufcount = 32; \ - } else { \ - bufcount = (pad_buf##pad[1] & 0x0f) * 2; \ - } \ - PAD##pad##_poll(0); \ + int i, more_data = 0; \ + pad_buf##pad[0] = PAD##pad##_startPoll(pad); \ + pad_buf##pad[1] = PAD##pad##_poll(0x42, &more_data); \ i = 2; \ - while (bufcount--) { \ - pad_buf##pad[i++] = PAD##pad##_poll(0); \ + while (more_data) { \ + pad_buf##pad[i++] = PAD##pad##_poll(0, &more_data); \ } \ } @@ -3970,7 +3964,6 @@ void hleExcPadCard1(void) if (loadRam32(A_PAD_IRQR_ENA)) { u8 *pad_buf1 = loadRam8ptr(A_PAD_INBUF + 0); u8 *pad_buf2 = loadRam8ptr(A_PAD_INBUF + 4); - int i, bufcount; psxBios_PADpoll(1); psxBios_PADpoll(2); diff --git a/libpcsxcore/sio.c b/libpcsxcore/sio.c index 7aa669bf5..ab6baa12a 100644 --- a/libpcsxcore/sio.c +++ b/libpcsxcore/sio.c @@ -84,60 +84,43 @@ char McdDisable[2]; #define SIO_CYCLES 535 void sioWrite8(unsigned char value) { -#ifdef PAD_LOG - PAD_LOG("sio write8 %x\n", value); + int more_data = 0; +#if 0 + s32 framec = psxRegs.cycle - rcnts[3].cycleStart; + printf("%d:%03d sio write8 %04x %02x\n", frame_counter, + (s32)(framec / (PSXCLK / 60 / 263.0f)), CtrlReg, value); #endif switch (padst) { - case 1: SIO_INT(SIO_CYCLES); + case 1: if ((value & 0x40) == 0x40) { padst = 2; parp = 1; - if (!Config.UseNet) { - switch (CtrlReg & 0x2002) { - case 0x0002: - buf[parp] = PAD1_poll(value); - break; - case 0x2002: - buf[parp] = PAD2_poll(value); - break; - } - }/* else { -// SysPrintf("%x: %x, %x, %x, %x\n", CtrlReg&0x2002, buf[2], buf[3], buf[4], buf[5]); - }*/ - - if (!(buf[parp] & 0x0f)) { - bufcount = 2 + 32; - } else { - bufcount = 2 + (buf[parp] & 0x0f) * 2; + switch (CtrlReg & 0x2002) { + case 0x0002: + buf[parp] = PAD1_poll(value, &more_data); + break; + case 0x2002: + buf[parp] = PAD2_poll(value, &more_data); + break; } - if (buf[parp] == 0x41) { - switch (value) { - case 0x43: - buf[1] = 0x43; - break; - case 0x45: - buf[1] = 0xf3; - break; - } + + if (more_data) { + bufcount = parp + 1; + SIO_INT(SIO_CYCLES); } } else padst = 0; return; case 2: parp++; -/* if (buf[1] == 0x45) { - buf[parp] = 0; - SIO_INT(SIO_CYCLES); - return; - }*/ - if (!Config.UseNet) { - switch (CtrlReg & 0x2002) { - case 0x0002: buf[parp] = PAD1_poll(value); break; - case 0x2002: buf[parp] = PAD2_poll(value); break; - } + switch (CtrlReg & 0x2002) { + case 0x0002: buf[parp] = PAD1_poll(value, &more_data); break; + case 0x2002: buf[parp] = PAD2_poll(value, &more_data); break; } - if (parp == bufcount) { padst = 0; return; } - SIO_INT(SIO_CYCLES); + if (more_data) { + bufcount = parp + 1; + SIO_INT(SIO_CYCLES); + } return; } @@ -227,44 +210,11 @@ void sioWrite8(unsigned char value) { case 0x01: // start pad StatReg |= RX_RDY; // Transfer is Ready - if (!Config.UseNet) { - switch (CtrlReg & 0x2002) { - case 0x0002: buf[0] = PAD1_startPoll(1); break; - case 0x2002: buf[0] = PAD2_startPoll(2); break; - } - } else { - if ((CtrlReg & 0x2002) == 0x0002) { - int i, j; - - PAD1_startPoll(1); - buf[0] = 0; - buf[1] = PAD1_poll(0x42); - if (!(buf[1] & 0x0f)) { - bufcount = 32; - } else { - bufcount = (buf[1] & 0x0f) * 2; - } - buf[2] = PAD1_poll(0); - i = 3; - j = bufcount; - while (j--) { - buf[i++] = PAD1_poll(0); - } - bufcount+= 3; - - if (NET_sendPadData(buf, bufcount) == -1) - netError(); - - if (NET_recvPadData(buf, 1) == -1) - netError(); - if (NET_recvPadData(buf + 128, 2) == -1) - netError(); - } else { - memcpy(buf, buf + 128, 32); - } + switch (CtrlReg & 0x2002) { + case 0x0002: buf[0] = PAD1_startPoll(1); break; + case 0x2002: buf[0] = PAD2_startPoll(2); break; } - - bufcount = 2; + bufcount = 1; parp = 0; padst = 1; SIO_INT(SIO_CYCLES); @@ -351,8 +301,10 @@ unsigned char sioRead8() { } } -#ifdef PAD_LOG - PAD_LOG("sio read8 ;ret = %x\n", ret); +#if 0 + s32 framec = psxRegs.cycle - rcnts[3].cycleStart; + printf("%d:%03d sio read8 %04x %02x\n", frame_counter, + (s32)((float)framec / (PSXCLK / 60 / 263.0f)), CtrlReg, ret); #endif return ret; } diff --git a/maemo/hildon.c b/maemo/hildon.c index 7e9cd9fe8..81af5d8ea 100644 --- a/maemo/hildon.c +++ b/maemo/hildon.c @@ -13,7 +13,6 @@ #include "../include/psemu_plugin_defs.h" #include "../libpcsxcore/cdrom.h" #include "../libpcsxcore/cdriso.h" -#include "../plugins/dfinput/main.h" #include "../frontend/libpicofe/readpng.h" #include "maemo_common.h" #include diff --git a/maemo/main.c b/maemo/main.c index 77dbcd6be..44fec658d 100644 --- a/maemo/main.c +++ b/maemo/main.c @@ -17,7 +17,6 @@ #include "../libpcsxcore/misc.h" #include "../libpcsxcore/cdriso.h" #include "../libpcsxcore/new_dynarec/new_dynarec.h" -#include "../plugins/dfinput/main.h" #include "../plugins/dfsound/spu_config.h" #include "maemo_common.h" @@ -393,7 +392,6 @@ int main(int argc, char **argv) if (Config.HLE) printf("Note: running without BIOS, expect compatibility problems\n"); - dfinput_activate(); pl_timing_prepare(Config.PsxType); while (1) diff --git a/plugins/dfinput/externals.h b/plugins/dfinput/externals.h deleted file mode 100644 index 2e216fdc1..000000000 --- a/plugins/dfinput/externals.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef __P_EXTERNALS_H__ -#define __P_EXTERNALS_H__ - -void dfinput_activate(void); - -/* get gunstate from emu frontend, - * xn, yn - layer position normalized to 0..1023 */ -#define GUNIN_TRIGGER (1<<0) -#define GUNIN_BTNA (1<<1) -#define GUNIN_BTNB (1<<2) -#define GUNIN_TRIGGER2 (1<<3) /* offscreen trigger */ -extern void pl_update_gun(int *xn, int *yn, int *xres, int *yres, int *in); - -/* vibration trigger to frontend */ -extern int in_enable_vibration; -extern void plat_trigger_vibrate(int pad, int low, int high); - -#endif /* __P_EXTERNALS_H__ */ diff --git a/plugins/dfinput/guncon.c b/plugins/dfinput/guncon.c deleted file mode 100644 index 981a7574e..000000000 --- a/plugins/dfinput/guncon.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * (C) Gražvydas "notaz" Ignotas, 2011 - * - * This work is licensed under the terms of any of these licenses - * (at your option): - * - GNU GPL, version 2 or later. - * - GNU LGPL, version 2.1 or later. - * See the COPYING file in the top-level directory. - */ - -#include -#include "main.h" - -static unsigned char buf[8]; - -unsigned char PADpoll_guncon(unsigned char value) -{ - if (CurByte == 0) { - CurCmd = value; - CurByte++; - return 0x63; // regardless of cmd - } - - if (CurCmd != 0x42 || CurByte >= 8) - return 0xff; // verified - - return buf[CurByte++]; -} - -unsigned char PADstartPoll_guncon(int pad) -{ - int x, y, xn = 0, yn = 0, in = 0, xres = 256, yres = 240; - CurByte = 0; - - buf[2] = buf[3] = 0xff; - pl_update_gun(&xn, &yn, &xres, &yres, &in); - - // while y = const + line counter, what is x? - // for 256 mode, hw dumped offsets x, y: 0x5a, 0x20 - //x = 0x5a + (356 * xn >> 10); - x = 0x5a - (xres - 256) / 3 + (((xres - 256) / 3 + 356) * xn >> 10); - y = 0x20 + (yres * yn >> 10); - - if (in & GUNIN_TRIGGER) - buf[3] &= ~0x20; - if (in & GUNIN_BTNA) - buf[2] &= ~0x08; - if (in & GUNIN_BTNB) - buf[3] &= ~0x40; - if (in & GUNIN_TRIGGER2) { - buf[3] &= ~0x20; - x = 1; - y = 10; - } - buf[4] = x; - buf[5] = x >> 8; - buf[6] = y; - buf[7] = y >> 8; - - return 0xff; -} - -void guncon_init(void) -{ - memset(buf, 0xff, sizeof(buf)); - buf[1] = 0x5a; -} - diff --git a/plugins/dfinput/main.c b/plugins/dfinput/main.c deleted file mode 100644 index 4f1d03f6c..000000000 --- a/plugins/dfinput/main.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * (C) Gražvydas "notaz" Ignotas, 2011 - * - * This work is licensed under the terms of any of these licenses - * (at your option): - * - GNU GPL, version 2 or later. - * - GNU LGPL, version 2.1 or later. - * See the COPYING file in the top-level directory. - */ - -#include "main.h" - -unsigned char CurPad, CurByte, CurCmd, CmdLen; - -/* since this is not a proper plugin, so we'll hook emu internals in a hackish way like this */ -extern void *PAD1_startPoll, *PAD1_poll; -extern void *PAD2_startPoll, *PAD2_poll; -extern unsigned char CALLBACK PAD1__startPoll(int pad); -extern unsigned char CALLBACK PAD2__startPoll(int pad); -extern unsigned char CALLBACK PAD1__poll(unsigned char value); -extern unsigned char CALLBACK PAD2__poll(unsigned char value); - -#ifndef HAVE_LIBRETRO - -static int old_controller_type1 = -1, old_controller_type2 = -1; - -#define select_pad(n) \ - if (pad.controllerType != old_controller_type##n) \ - { \ - switch (pad.controllerType) \ - { \ - case PSE_PAD_TYPE_ANALOGPAD: \ - PAD##n##_startPoll = PADstartPoll_pad; \ - PAD##n##_poll = PADpoll_pad; \ - pad_init(); \ - break; \ - case PSE_PAD_TYPE_GUNCON: \ - PAD##n##_startPoll = PADstartPoll_guncon; \ - PAD##n##_poll = PADpoll_guncon; \ - guncon_init(); \ - break; \ - case PSE_PAD_TYPE_NEGCON: \ - case PSE_PAD_TYPE_GUN: \ - default: \ - PAD##n##_startPoll = PAD##n##__startPoll; \ - PAD##n##_poll = PAD##n##__poll; \ - break; \ - } \ - } - -void dfinput_activate(void) -{ - PadDataS pad; - - pad.portMultitap = -1; - pad.requestPadIndex = 0; - PAD1_readPort1(&pad); - select_pad(1); - - pad.requestPadIndex = 1; - PAD2_readPort2(&pad); - select_pad(2); -} - -#else // use libretro's libpcsxcore/plugins.c code - -void dfinput_activate(void) -{ -} - -#endif diff --git a/plugins/dfinput/main.h b/plugins/dfinput/main.h deleted file mode 100644 index 96cebfac4..000000000 --- a/plugins/dfinput/main.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __P_MAIN_H__ -#define __P_MAIN_H__ - -#include "psemu_plugin_defs.h" -#include "externals.h" - -extern unsigned char CurPad, CurByte, CurCmd, CmdLen; - -/* analog pad */ -unsigned char PADpoll_pad(unsigned char value); -unsigned char PADstartPoll_pad(int pad); -void pad_init(void); - -/* GunCon */ -unsigned char PADpoll_guncon(unsigned char value); -unsigned char PADstartPoll_guncon(int pad); -void guncon_init(void); - -/* get button state and pad type from main emu */ -extern long (*PAD1_readPort1)(PadDataS *pad); -extern long (*PAD2_readPort2)(PadDataS *pad); - -#endif /* __P_MAIN_H__ */ diff --git a/plugins/dfinput/pad.c b/plugins/dfinput/pad.c deleted file mode 100644 index 3e333660c..000000000 --- a/plugins/dfinput/pad.c +++ /dev/null @@ -1,308 +0,0 @@ -/* - * Copyright (c) 2009, Wei Mingzhi . - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - * this is only pure emulation code to handle analogs, - * extracted from dfinput. - */ - -#include - -#include "psemu_plugin_defs.h" -#include "main.h" - -enum { - ANALOG_LEFT = 0, - ANALOG_RIGHT, - - ANALOG_TOTAL -}; - -enum { - CMD_READ_DATA_AND_VIBRATE = 0x42, - CMD_CONFIG_MODE = 0x43, - CMD_SET_MODE_AND_LOCK = 0x44, - CMD_QUERY_MODEL_AND_MODE = 0x45, - CMD_QUERY_ACT = 0x46, // ?? - CMD_QUERY_COMB = 0x47, // ?? - CMD_QUERY_MODE = 0x4C, // QUERY_MODE ?? - CMD_VIBRATION_TOGGLE = 0x4D, -}; - -#ifndef HAVE_LIBRETRO -static struct { - uint8_t PadMode; - uint8_t PadID; - uint8_t ConfigMode; - PadDataS pad; -} padstate[2]; - -static uint8_t stdpar[2][8] = { - {0xFF, 0x5A, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}, - {0xFF, 0x5A, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80} -}; - -static uint8_t unk46[2][8] = { - {0xFF, 0x5A, 0x00, 0x00, 0x01, 0x02, 0x00, 0x0A}, - {0xFF, 0x5A, 0x00, 0x00, 0x01, 0x02, 0x00, 0x0A} -}; - -static uint8_t unk47[2][8] = { - {0xFF, 0x5A, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00}, - {0xFF, 0x5A, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00} -}; - -static uint8_t unk4c[2][8] = { - {0xFF, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xFF, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} -}; - -static uint8_t unk4d[2][8] = { - {0xFF, 0x5A, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, - {0xFF, 0x5A, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF} -}; - -static uint8_t stdcfg[2][8] = { - {0xFF, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xFF, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} -}; - -static uint8_t stdmode[2][8] = { - {0xFF, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xFF, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} -}; - -static uint8_t stdmodel[2][8] = { - {0xFF, - 0x5A, - 0x01, // 03 - dualshock2, 01 - dualshock - 0x02, // number of modes - 0x01, // current mode: 01 - analog, 00 - digital - 0x02, - 0x01, - 0x00}, - {0xFF, - 0x5A, - 0x01, // 03 - dualshock2, 01 - dualshock - 0x02, // number of modes - 0x01, // current mode: 01 - analog, 00 - digital - 0x02, - 0x01, - 0x00} -}; - -static uint8_t *buf; - -static uint8_t do_cmd(void) -{ - PadDataS *pad = &padstate[CurPad].pad; - int pad_num = CurPad; - - CmdLen = 8; - switch (CurCmd) { - case CMD_SET_MODE_AND_LOCK: - buf = stdmode[pad_num]; - return 0xF3; - - case CMD_QUERY_MODEL_AND_MODE: - buf = stdmodel[pad_num]; - buf[4] = padstate[pad_num].PadMode; - return 0xF3; - - case CMD_QUERY_ACT: - buf = unk46[pad_num]; - return 0xF3; - - case CMD_QUERY_COMB: - buf = unk47[pad_num]; - return 0xF3; - - case CMD_QUERY_MODE: - buf = unk4c[pad_num]; - return 0xF3; - - case CMD_VIBRATION_TOGGLE: - buf = unk4d[pad_num]; - return 0xF3; - - case CMD_CONFIG_MODE: - if (padstate[pad_num].ConfigMode) { - buf = stdcfg[pad_num]; - return 0xF3; - } - // else FALLTHROUGH - - case CMD_READ_DATA_AND_VIBRATE: - default: - buf = stdpar[pad_num]; - - buf[2] = pad->buttonStatus; - buf[3] = pad->buttonStatus >> 8; - - if (padstate[pad_num].PadMode == 1) { - buf[4] = pad->rightJoyX; - buf[5] = pad->rightJoyY; - buf[6] = pad->leftJoyX; - buf[7] = pad->leftJoyY; - } else { - CmdLen = 4; - } - - return padstate[pad_num].PadID; - } -} - -static void do_cmd2(unsigned char value) -{ - switch (CurCmd) { - case CMD_CONFIG_MODE: - padstate[CurPad].ConfigMode = value; - break; - - case CMD_SET_MODE_AND_LOCK: - padstate[CurPad].PadMode = value; - padstate[CurPad].PadID = value ? 0x73 : 0x41; - break; - - case CMD_QUERY_ACT: - switch (value) { - case 0: // default - buf[5] = 0x02; - buf[6] = 0x00; - buf[7] = 0x0A; - break; - - case 1: // Param std conf change - buf[5] = 0x01; - buf[6] = 0x01; - buf[7] = 0x14; - break; - } - break; - - case CMD_QUERY_MODE: - switch (value) { - case 0: // mode 0 - digital mode - buf[5] = PSE_PAD_TYPE_STANDARD; - break; - - case 1: // mode 1 - analog mode - buf[5] = PSE_PAD_TYPE_ANALOGPAD; - break; - } - break; - } -} - -static void do_vibration(unsigned char value) -{ - int changed = 0; - int i; - - switch (CurCmd) { - case CMD_READ_DATA_AND_VIBRATE: - for (i = 0; i < 2; i++) { - if (padstate[CurPad].pad.Vib[i] == CurByte - && padstate[CurPad].pad.VibF[i] != value) { - padstate[CurPad].pad.VibF[i] = value; - changed = 1; - } - } - - if (!in_enable_vibration || !changed) - break; - - plat_trigger_vibrate(CurPad, - padstate[CurPad].pad.VibF[0], - padstate[CurPad].pad.VibF[1]); - break; - case CMD_VIBRATION_TOGGLE: - for (i = 0; i < 2; i++) { - if (padstate[CurPad].pad.Vib[i] == CurByte) - buf[CurByte] = 0; - } - if (value < 2) { - padstate[CurPad].pad.Vib[value] = CurByte; - if((padstate[CurPad].PadID & 0x0f) < (CurByte - 1) / 2) { - padstate[CurPad].PadID = (padstate[CurPad].PadID & 0xf0) + (CurByte - 1) / 2; - } - } - break; - } -} -#endif - -#if 0 -#include -unsigned char PADpoll_(unsigned char value); -unsigned char PADpoll(unsigned char value) { - unsigned char b = CurByte, r = PADpoll_(value); - printf("poll[%d] %02x %02x\n", b, value, r); - return r; -} -#define PADpoll PADpoll_ -#endif - -#ifndef HAVE_LIBRETRO -unsigned char PADpoll_pad(unsigned char value) { - if (CurByte == 0) { - CurCmd = value; - CurByte++; - - // Don't enable Analog/Vibration for a standard pad - if (padstate[CurPad].pad.controllerType != PSE_PAD_TYPE_ANALOGPAD) - CurCmd = CMD_READ_DATA_AND_VIBRATE; - - return do_cmd(); - } - - if (CurByte >= CmdLen) - return 0xff; // verified - - if (CurByte == 2) - do_cmd2(value); - - if (padstate[CurPad].pad.controllerType == PSE_PAD_TYPE_ANALOGPAD) - do_vibration(value); - - return buf[CurByte++]; -} - -unsigned char PADstartPoll_pad(int pad) { - CurPad = pad - 1; - CurByte = 0; - - if (pad == 1) - PAD1_readPort1(&padstate[0].pad); - else - PAD2_readPort2(&padstate[1].pad); - - return 0xFF; -} - -void pad_init(void) -{ - int i; - - PAD1_readPort1(&padstate[0].pad); - PAD2_readPort2(&padstate[1].pad); - - for (i = 0; i < 2; i++) { - padstate[i].PadID = padstate[i].pad.controllerType == PSE_PAD_TYPE_ANALOGPAD ? 0x73 : 0x41; - padstate[i].PadMode = padstate[i].pad.controllerType == PSE_PAD_TYPE_ANALOGPAD; - } -} -#endif From 8aecce153fe3f821a078cb9db00eac5c1540bb03 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 13 Sep 2023 23:58:27 +0300 Subject: [PATCH 364/597] psxbios: unbreak input --- libpcsxcore/psxbios.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 88f994f80..996a242b7 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -3775,8 +3775,10 @@ void psxBiosCnfLoaded(u32 tcb_cnt, u32 evcb_cnt, u32 stack) { #define psxBios_PADpoll(pad) { \ int i, more_data = 0; \ - pad_buf##pad[0] = PAD##pad##_startPoll(pad); \ + PAD##pad##_startPoll(pad); \ pad_buf##pad[1] = PAD##pad##_poll(0x42, &more_data); \ + pad_buf##pad[0] = more_data ? 0 : 0xff; \ + PAD##pad##_poll(0, &more_data); \ i = 2; \ while (more_data) { \ pad_buf##pad[i++] = PAD##pad##_poll(0, &more_data); \ From 27f734f970b95071264238cd1382632d880b4611 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 14 Sep 2023 00:10:41 +0300 Subject: [PATCH 365/597] psxbios: fix warnings --- Makefile | 2 +- libpcsxcore/psxbios.c | 24 +++++++++++------------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index ef4a1f741..86473ab45 100644 --- a/Makefile +++ b/Makefile @@ -170,7 +170,7 @@ OBJS += libchdr/deps/lzma-19.00/src/Alloc.o libchdr/deps/lzma-19.00/src/Bra86.o OBJS += libchdr/deps/lzma-19.00/src/LzFind.o libchdr/deps/lzma-19.00/src/Lzma86Dec.o libchdr/deps/lzma-19.00/src/LzmaDec.o libchdr/deps/lzma-19.00/src/LzmaEnc.o libchdr/deps/lzma-19.00/src/Sort.o CFLAGS += -DHAVE_CHD -Ilibchdr/include libpcsxcore/cdriso.o: CFLAGS += -Wno-unused-function -libchdr/src/%.o: CFLAGS += -Wno-unused -Ilibchdr/deps/lzma-19.00/include +libchdr/src/%.o: CFLAGS += -Wno-unused -Ilibchdr/deps/lzma-19.00/include -std=gnu11 libchdr/deps/lzma-19.00/src/%.o: CFLAGS += -Wno-unused -D_7ZIP_ST -Ilibchdr/deps/lzma-19.00/include #endif diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 996a242b7..fb7d13d17 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -38,10 +38,6 @@ #include "psxinterpreter.h" #include -#if (defined(__GNUC__) && __GNUC__ >= 5) || defined(__clang__) -#pragma GCC diagnostic ignored "-Wpointer-sign" -#endif - #ifndef PSXBIOS_LOG //#define PSXBIOS_LOG printf #define PSXBIOS_LOG(...) @@ -2596,7 +2592,7 @@ static void bufile(const u8 *mcd_data, u32 dir_) { } for (; nfile <= 15; nfile++, blocks++) { const u8 *data2 = mcd_data + 128 * nfile; - const char *name = data2 + 0x0a; + const char *name = (const char *)data2 + 0x0a; if ((data2[0] & 0xF0) != 0x50 || name[0]) break; } @@ -2629,11 +2625,11 @@ static void psxBios_firstfile() { // 42 if (!strncmp(pa0, "bu00", 4)) { // firstfile() calls _card_read() internally, so deliver it's event DeliverEvent(0xf0000011, 0x0004); - bufile(Mcd1Data, a1); + bufile((u8 *)Mcd1Data, a1); } else if (!strncmp(pa0, "bu10", 4)) { // firstfile() calls _card_read() internally, so deliver it's event DeliverEvent(0xf0000011, 0x0004); - bufile(Mcd2Data, a1); + bufile((u8 *)Mcd2Data, a1); } } @@ -2649,9 +2645,9 @@ void psxBios_nextfile() { // 43 v0 = 0; if (!strncmp(ffile, "bu00", 4)) - bufile(Mcd1Data, a0); + bufile((u8 *)Mcd1Data, a0); else if (!strncmp(ffile, "bu10", 4)) - bufile(Mcd2Data, a0); + bufile((u8 *)Mcd2Data, a0); pc0 = ra; } @@ -3316,6 +3312,7 @@ void psxBiosSetupBootState(void) void psxBiosInit() { u32 *ptr, *ram32, *rom32; + char *romc; int i; uLongf len; @@ -3656,10 +3653,11 @@ void psxBiosInit() { rom32 = (u32 *)psxR; rom32[0x100/4] = SWAP32(0x19951204); rom32[0x104/4] = SWAP32(3); - strcpy(psxR + 0x108, "PCSX authors"); - strcpy(psxR + 0x12c, "CEX-3000 PCSX HLE"); // see psxBios_GetSystemInfo - strcpy(psxR + 0x7ff32, "System ROM Version 2.2 12/04/95 A"); - strcpy(psxR + 0x7ff54, "GPL-2.0-or-later"); + romc = (char *)psxR; + strcpy(romc + 0x108, "PCSX authors"); + strcpy(romc + 0x12c, "CEX-3000 PCSX HLE"); // see psxBios_GetSystemInfo + strcpy(romc + 0x7ff32, "System ROM Version 2.2 12/04/95 A"); + strcpy(romc + 0x7ff54, "GPL-2.0-or-later"); // fonts len = 0x80000 - 0x66000; From bd9ad3d8eb63cee4e69913f121548badf834dd9e Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 14 Sep 2023 01:19:28 +0300 Subject: [PATCH 366/597] yet more timing hacks libretro/pcsx_rearmed#758 --- libpcsxcore/database.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 5edb9611a..9db351c31 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -53,6 +53,7 @@ static const struct } cycle_multiplier_overrides[] = { + /* note: values are = (10000 / gui_option) */ /* Internal Section - fussy about timings */ { "SLPS01868", 202 }, /* Super Robot Taisen Alpha - on the edge with 175, @@ -73,6 +74,12 @@ cycle_multiplier_overrides[] = { "SLES01549", 222 }, { "SLES02063", 222 }, { "SLES02064", 222 }, + /* Judge Dredd - could also be poor MDEC timing */ + { "SLUS00630", 128 }, + { "SLES00755", 128 }, + /* Digimon World */ + { "SLUS01032", 153 }, + { "SLES02914", 153 }, }; /* Function for automatic patching according to GameID. */ From f6eb0b1c75fd9103a1ad18aed5d00aa0c41fa24e Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 15 Sep 2023 01:18:13 +0300 Subject: [PATCH 367/597] cleanup the input mess, part2 --- frontend/libretro.c | 17 +- frontend/libretro_core_options.h | 4 +- frontend/plugin.c | 10 +- frontend/plugin_lib.c | 6 + include/psemu_plugin_defs.h | 10 +- libpcsxcore/plugins.c | 267 +++++++++++++------------------ libpcsxcore/plugins.h | 1 + 7 files changed, 140 insertions(+), 175 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 8d6b5cc7e..965f93085 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -80,6 +80,7 @@ static void *vout_buf; static void *vout_buf_ptr; static int vout_width, vout_height; static int vout_fb_dirty; +static int psx_w, psx_h; static bool vout_can_dupe; static bool duping_enable; static bool found_bios; @@ -239,6 +240,8 @@ static void vout_set_mode(int w, int h, int raw_w, int raw_h, int bpp) { vout_width = w; vout_height = h; + psx_w = raw_w; + psx_h = raw_h; if (previous_width != vout_width || previous_height != vout_height) { @@ -501,6 +504,12 @@ void pl_timing_prepare(int is_pal) is_pal_mode = is_pal; } +void plat_get_psx_resolution(int *xres, int *yres) +{ + *xres = psx_w; + *yres = psx_h; +} + void plat_trigger_vibrate(int pad, int low, int high) { if (!rumble_cb) @@ -2475,13 +2484,13 @@ static void update_input_guncon(int port, int ret) //Offscreen value is chosen to be well out of range of any possible scaling done via core options if (input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_IS_OFFSCREEN) || input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_RELOAD)) { - in_analog_left[port][0] = (65536 - 512) * 64; - in_analog_left[port][1] = (65536 - 512) * 64; + in_analog_left[port][0] = 65536; + in_analog_left[port][1] = 65536; } else { - in_analog_left[port][0] = (gunx * GunconAdjustRatioX) + (GunconAdjustX * 655); - in_analog_left[port][1] = (guny * GunconAdjustRatioY) + (GunconAdjustY * 655); + in_analog_left[port][0] = ((gunx * GunconAdjustRatioX) + (GunconAdjustX * 655)) / 64 + 512; + in_analog_left[port][1] = ((guny * GunconAdjustRatioY) + (GunconAdjustY * 655)) / 64 + 512; } //GUNCON has 3 controls, Trigger,A,B which equal Circle,Start,Cross diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index ef25f7b6c..8e746805e 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -854,9 +854,9 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, { "pcsx_rearmed_multitap", - "Multitap Mode (Restart)", + "Multitap Mode", NULL, - "Connect a virtual PSX Multitap peripheral to either controller 'Port 1' or controller 'Port 2' for 5 player simultaneous input, or to both 'Ports 1 and 2' for 8 player input. Mutlitap usage requires compatible games. To avoid input defects, option should be disabled when running games that have no support for Multitap features.", + "Connect a virtual PSX Multitap peripheral to either controller 'Port 1' or controller 'Port 2' for 5 player simultaneous input, or to both 'Ports 1 and 2' for 8 player input. Mutlitap usage requires compatible games.", NULL, "input", { diff --git a/frontend/plugin.c b/frontend/plugin.c index 2c95a67af..3a0710a58 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -74,10 +74,7 @@ static long CALLBACK PADreadPort1(PadDataS *pad) { pad->controllerType = in_type[pad_index]; pad->buttonStatus = ~in_keystate[pad_index]; - if (multitap1 == 1) - pad->portMultitap = 1; - else - pad->portMultitap = 0; + pad->portMultitap = multitap1; if (in_type[pad_index] == PSE_PAD_TYPE_ANALOGJOY || in_type[pad_index] == PSE_PAD_TYPE_ANALOGPAD || in_type[pad_index] == PSE_PAD_TYPE_NEGCON || in_type[pad_index] == PSE_PAD_TYPE_GUNCON) { @@ -105,10 +102,7 @@ static long CALLBACK PADreadPort2(PadDataS *pad) { pad->controllerType = in_type[pad_index]; pad->buttonStatus = ~in_keystate[pad_index]; - if (multitap2 == 1) - pad->portMultitap = 2; - else - pad->portMultitap = 0; + pad->portMultitap = multitap2; if (in_type[pad_index] == PSE_PAD_TYPE_ANALOGJOY || in_type[pad_index] == PSE_PAD_TYPE_ANALOGPAD || in_type[pad_index] == PSE_PAD_TYPE_NEGCON || in_type[pad_index] == PSE_PAD_TYPE_GUNCON) { diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 8a6b6adb1..0f0231237 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -633,6 +633,12 @@ void pl_gun_byte2(int port, unsigned char byte) { } +void plat_get_psx_resolution(int *xres, int *yres) +{ + *xres = psx_w; + *yres = psx_h; +} + #define MAX_LAG_FRAMES 3 #define tvdiff(tv, tv_old) \ diff --git a/include/psemu_plugin_defs.h b/include/psemu_plugin_defs.h index 2d688f207..40a67605b 100644 --- a/include/psemu_plugin_defs.h +++ b/include/psemu_plugin_defs.h @@ -200,9 +200,6 @@ typedef struct // controller type - fill it withe predefined values above unsigned char controllerType; - //0 : no multitap between psx and pad - //1 : multitap between psx and pad on port 1 - //2 : multitap between psx and pad on port 2 int portMultitap; int requestPadIndex; @@ -223,8 +220,11 @@ typedef struct //configuration mode Request 0x43 int configMode; - unsigned char txData[32]; - unsigned char reserved[56]; + unsigned char txData[34]; + + unsigned char multitapLongModeEnabled; + unsigned char PadMode; // 0 : digital 1: analog + unsigned char reserved[52]; //Lightgun values int absoluteX,absoluteY; diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index a34969f42..93af3d02c 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -351,28 +351,14 @@ extern int in_type[8]; void *hPAD1Driver = NULL; void *hPAD2Driver = NULL; -static int multitap1; -static int multitap2; -//Pad information, keystate, mode, config mode, vibration -static PadDataS pad[8]; +// Pad information, keystate, mode, config mode, vibration +static PadDataS pads[8]; static int reqPos, respSize; -static int ledStateReq44[8]; -static int PadMode[8]; /* 0 : digital 1: analog */ static unsigned char buf[256]; -static unsigned char bufMulti[34] = { 0x80, 0x5a, - 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - -unsigned char stdpar[8] = { 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; -unsigned char multitappar[34] = { 0x80, 0x5a, - 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +static unsigned char stdpar[8] = { 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; //response for request 44, 45, 46, 47, 4C, 4D static unsigned char resp45[8] = {0xF3, 0x5A, 0x01, 0x02, 0x00, 0x02, 0x01, 0x00}; @@ -491,17 +477,13 @@ enum { }; - - -//NO MULTITAP - -void initBufForRequest(int padIndex, char value){ +static void initBufForRequest(int padIndex, char value) { switch (value){ //Pad keystate already in buffer //case CMD_READ_DATA_AND_VIBRATE : // break; case CMD_CONFIG_MODE : - if (pad[padIndex].configMode == 1) { + if (pads[padIndex].configMode) { memcpy(buf, resp43, 8); break; } @@ -512,7 +494,7 @@ void initBufForRequest(int padIndex, char value){ break; case CMD_QUERY_MODEL_AND_MODE : memcpy(buf, resp45, 8); - buf[4] = PadMode[padIndex]; + buf[4] = pads[padIndex].PadMode; break; case CMD_QUERY_ACT : memcpy(buf, resp46_00, 8); @@ -550,25 +532,22 @@ void initBufForRequest(int padIndex, char value){ } } - - - static void reqIndex2Treatment(int padIndex, char value) { - switch (pad[padIndex].txData[0]) { + switch (pads[padIndex].txData[0]) { case CMD_CONFIG_MODE : //0x43 if (value == 0) { - pad[padIndex].configMode = 0; + pads[padIndex].configMode = 0; } else { - pad[padIndex].configMode = 1; + pads[padIndex].configMode = 1; } break; case CMD_SET_MODE_AND_LOCK : //0x44 store the led state for change mode if the next value = 0x02 //0x01 analog ON //0x00 analog OFF - ledStateReq44[padIndex] = value; - PadMode[padIndex] = value; + //ledStateReq44[padIndex] = value; + pads[padIndex].PadMode = value; break; case CMD_QUERY_ACT : //0x46 @@ -587,26 +566,24 @@ static void reqIndex2Treatment(int padIndex, char value) { break; case CMD_READ_DATA_AND_VIBRATE: //mem the vibration value for small motor; - pad[padIndex].Vib[0] = value; + pads[padIndex].Vib[0] = value; break; } } -void vibrate(int padIndex){ - if (pad[padIndex].Vib[0] != pad[padIndex].VibF[0] || pad[padIndex].Vib[1] != pad[padIndex].VibF[1]) { +static void vibrate(int padIndex) { + PadDataS *pad = &pads[padIndex]; + if (pad->Vib[0] != pad->VibF[0] || pad->Vib[1] != pad->VibF[1]) { //value is different update Value and call libretro for vibration - pad[padIndex].VibF[0] = pad[padIndex].Vib[0]; - pad[padIndex].VibF[1] = pad[padIndex].Vib[1]; - plat_trigger_vibrate(padIndex, pad[padIndex].VibF[0], pad[padIndex].VibF[1]); + pad->VibF[0] = pad->Vib[0]; + pad->VibF[1] = pad->Vib[1]; + plat_trigger_vibrate(padIndex, pad->VibF[0], pad->VibF[1]); //printf("vibration pad %i", padIndex); } } - - - -//Build response for 0x42 request Pad in port -void _PADstartPoll(PadDataS *pad) { +// Build response for 0x42 request Pad in port +static void PADstartPoll_(PadDataS *pad) { switch (pad->controllerType) { case PSE_PAD_TYPE_MOUSE: stdpar[0] = 0x12; @@ -636,25 +613,25 @@ void _PADstartPoll(PadDataS *pad) { stdpar[2] = pad->buttonStatus & 0xff; stdpar[3] = pad->buttonStatus >> 8; - //This code assumes an X resolution of 256 and a Y resolution of 240 - int xres = 256; - int yres = 240; - - //The code wants an input range for x and y of 0-1023 we passed in -32767 -> 32767 - int absX = (pad->absoluteX / 64) + 512; - int absY = (pad->absoluteY / 64) + 512; + int absX = pad->absoluteX; + int absY = pad->absoluteY; + int xres = 256, yres = 240; if (absX == 65536 || absY == 65536) { - stdpar[4] = 0x01; - stdpar[5] = 0x00; - stdpar[6] = 0x0A; - stdpar[7] = 0x00; + stdpar[4] = 0x01; + stdpar[5] = 0x00; + stdpar[6] = 0x0A; + stdpar[7] = 0x00; } else { - stdpar[4] = 0x5a - (xres - 256) / 3 + (((xres - 256) / 3 + 356) * absX >> 10); - stdpar[5] = (0x5a - (xres - 256) / 3 + (((xres - 256) / 3 + 356) * absX >> 10)) >> 8; - stdpar[6] = 0x20 + (yres * absY >> 10); - stdpar[7] = (0x20 + (yres * absY >> 10)) >> 8; + plat_get_psx_resolution(&xres, &yres); + int x = 0x5a - (xres - 256) / 3 + (((xres - 256) / 3 + 356) * absX >> 10); + int y = 0x20 + (yres * absY >> 10); + + stdpar[4] = x; + stdpar[5] = x >> 8; + stdpar[6] = y; + stdpar[7] = y >> 8; } memcpy(buf, stdpar, 8); @@ -706,23 +683,9 @@ void _PADstartPoll(PadDataS *pad) { } } - -//Build response for 0x42 request Multitap in port -//Response header for multitap : 0x80, 0x5A, (Pad information port 1-2A), (Pad information port 1-2B), (Pad information port 1-2C), (Pad information port 1-2D) -void _PADstartPollMultitap(PadDataS* padd) { - int i, offset; - for(i = 0; i < 4; i++) { - offset = 2 + (i * 8); - _PADstartPoll(&padd[i]); - memcpy(multitappar+offset, stdpar, 8); - } - memcpy(bufMulti, multitappar, 34); - respSize = 34; -} - -static void PADpoll_dualshock(int port, unsigned char value) +static void PADpoll_dualshock(int port, unsigned char value, int pos) { - switch (reqPos) { + switch (pos) { case 0: initBufForRequest(port, value); break; @@ -730,9 +693,9 @@ static void PADpoll_dualshock(int port, unsigned char value) reqIndex2Treatment(port, value); break; case 3: - if (pad[port].txData[0] == CMD_READ_DATA_AND_VIBRATE) { + if (pads[port].txData[0] == CMD_READ_DATA_AND_VIBRATE) { // vibration value for the Large motor - pad[port].Vib[1] = value; + pads[port].Vib[1] = value; vibrate(port); } @@ -740,72 +703,90 @@ static void PADpoll_dualshock(int port, unsigned char value) } } -static unsigned char PADpoll_(int port, unsigned char value, int *more_data) { - if (reqPos < sizeof(pad[port].txData)) - pad[port].txData[reqPos] = value; - - if (reqPos == 0 && value != 0x42 && in_type[port] != PSE_PAD_TYPE_ANALOGPAD) +static unsigned char PADpoll_(int port, unsigned char value, int pos, int *more_data) { + if (pos == 0 && value != 0x42 && in_type[port] != PSE_PAD_TYPE_ANALOGPAD) respSize = 1; switch (in_type[port]) { case PSE_PAD_TYPE_ANALOGPAD: - PADpoll_dualshock(port, value); + PADpoll_dualshock(port, value, pos); break; case PSE_PAD_TYPE_GUN: - if (reqPos == 2) + if (pos == 2) pl_gun_byte2(port, value); break; } - *more_data = reqPos < respSize - 1; - if (reqPos >= respSize) + *more_data = pos < respSize - 1; + if (pos >= respSize) return 0xff; // no response/HiZ - return buf[reqPos++]; + return buf[pos]; } -static unsigned char PADpollMultitap(int port, unsigned char value, int *more_data) { - *more_data = reqPos < respSize - 1; - if (reqPos >= respSize) return 0xff; - return bufMulti[reqPos++]; +// response: 0x80, 0x5A, 8 bytes each for ports A, B, C, D +static unsigned char PADpollMultitap(int port, unsigned char value, int pos, int *more_data) { + unsigned int devByte, dev; + int unused = 0; + + if (pos == 0) { + *more_data = (value == 0x42); + return 0x80; + } + *more_data = pos < 34 - 1; + if (pos == 1) + return 0x5a; + if (pos >= 34) + return 0xff; + + devByte = pos - 2; + dev = devByte / 8; + if (devByte % 8 == 0) + PADstartPoll_(&pads[port + dev]); + return PADpoll_(port + dev, value, devByte % 8, &unused); } +static unsigned char PADpollMain(int port, unsigned char value, int *more_data) { + unsigned char ret; + int pos = reqPos++; + + if (pos < sizeof(pads[port].txData)) + pads[port].txData[pos] = value; + if (!pads[port].portMultitap || !pads[port].multitapLongModeEnabled) + ret = PADpoll_(port, value, pos, more_data); + else + ret = PADpollMultitap(port, value, pos, more_data); + return ret; + +} // refresh the button state on port 1. // int pad is not needed. -unsigned char CALLBACK PAD1__startPoll(int pad) { +unsigned char CALLBACK PAD1__startPoll(int unused) { + int i; + reqPos = 0; - // first call the pad provide if a multitap is connected between the psx and himself - // just one pad is on port 1 : NO MULTITAP - if (multitap1 == 0) { - PadDataS padd; - padd.requestPadIndex = 0; - PAD1_readPort1(&padd); - _PADstartPoll(&padd); + pads[0].requestPadIndex = 0; + PAD1_readPort1(&pads[0]); + + pads[0].multitapLongModeEnabled = 0; + if (pads[0].portMultitap) + pads[0].multitapLongModeEnabled = pads[0].txData[1] & 1; + + if (!pads[0].portMultitap || !pads[0].multitapLongModeEnabled) { + PADstartPoll_(&pads[0]); } else { - // a multitap is plugged : refresh all pad. - int i; - PadDataS padd[4]; - for(i = 0; i < 4; i++) { - padd[i].requestPadIndex = i; - PAD1_readPort1(&padd[i]); + // a multitap is plugged and enabled: refresh pads 1-3 + for (i = 1; i < 4; i++) { + pads[i].requestPadIndex = i; + PAD1_readPort1(&pads[i]); } - _PADstartPollMultitap(padd); } - //printf("\npad 1 : "); return 0xff; } unsigned char CALLBACK PAD1__poll(unsigned char value, int *more_data) { - char tmp; - if (multitap1 == 1) { - tmp = PADpollMultitap(0, value, more_data); - } else { - tmp = PADpoll_(0, value, more_data); - } - //printf("%2x:%2x, ",value,tmp); - return tmp; - + return PADpollMain(0, value, more_data); } @@ -826,7 +807,6 @@ long CALLBACK PAD1__keypressed() { return 0; } if (PAD1_##dest == NULL) PAD1_##dest = (PAD##dest) PAD1__##dest; static int LoadPAD1plugin(const char *PAD1dll) { - PadDataS padd; void *drv; hPAD1Driver = SysLoadLibrary(PAD1dll); @@ -849,54 +829,34 @@ static int LoadPAD1plugin(const char *PAD1dll) { LoadPad1Sym0(poll, "PADpoll"); LoadPad1SymN(setSensitive, "PADsetSensitive"); - padd.requestPadIndex = 0; - PAD1_readPort1(&padd); - multitap1 = padd.portMultitap; - return 0; } unsigned char CALLBACK PAD2__startPoll(int pad) { - int pad_index; + int pad_index = pads[0].portMultitap ? 4 : 1; + int i; reqPos = 0; - if (multitap1 == 0 && (multitap2 == 0 || multitap2 == 2)) { - pad_index = 1; - } else if(multitap1 == 1 && (multitap2 == 0 || multitap2 == 2)) { - pad_index = 4; - } else { - pad_index = 0; - } + pads[pad_index].requestPadIndex = pad_index; + PAD2_readPort2(&pads[pad_index]); - // just one pad is on port 1 : NO MULTITAP - if (multitap2 == 0) { - PadDataS padd; - padd.requestPadIndex = pad_index; - PAD2_readPort2(&padd); - _PADstartPoll(&padd); + pads[pad_index].multitapLongModeEnabled = 0; + if (pads[pad_index].portMultitap) + pads[pad_index].multitapLongModeEnabled = pads[pad_index].txData[1] & 1; + + if (!pads[pad_index].portMultitap || !pads[pad_index].multitapLongModeEnabled) { + PADstartPoll_(&pads[pad_index]); } else { - // a multitap is plugged : refresh all pad. - int i; - PadDataS padd[4]; - for(i = 0; i < 4; i++) { - padd[i].requestPadIndex = i+pad_index; - PAD2_readPort2(&padd[i]); + for (i = 1; i < 4; i++) { + pads[pad_index + i].requestPadIndex = pad_index + i; + PAD2_readPort2(&pads[pad_index + i]); } - _PADstartPollMultitap(padd); } - //printf("\npad 2 : "); return 0xff; } unsigned char CALLBACK PAD2__poll(unsigned char value, int *more_data) { - char tmp; - if (multitap2 == 2) { - tmp = PADpollMultitap(1, value, more_data); - } else { - tmp = PADpoll_(1, value, more_data); - } - //printf("%2x:%2x, ",value,tmp); - return tmp; + return PADpollMain(pads[0].portMultitap ? 4 : 1, value, more_data); } long CALLBACK PAD2__configure(void) { return 0; } @@ -916,7 +876,6 @@ long CALLBACK PAD2__keypressed() { return 0; } LoadSym(PAD2_##dest, PAD##dest, name, FALSE); static int LoadPAD2plugin(const char *PAD2dll) { - PadDataS padd; void *drv; hPAD2Driver = SysLoadLibrary(PAD2dll); @@ -939,10 +898,6 @@ static int LoadPAD2plugin(const char *PAD2dll) { LoadPad2Sym0(poll, "PADpoll"); LoadPad2SymN(setSensitive, "PADsetSensitive"); - padd.requestPadIndex = 0; - PAD2_readPort2(&padd); - multitap2 = padd.portMultitap; - return 0; } diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index c5140cce1..b7af7c3a4 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -381,6 +381,7 @@ void SetCdOpenCaseTime(s64 time); extern void pl_gun_byte2(int port, unsigned char byte); extern void plat_trigger_vibrate(int pad, int low, int high); +extern void plat_get_psx_resolution(int *xres, int *yres); #ifdef __cplusplus } From ab88daca6f1367543d88b35e04a7999f3e36a0ff Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 16 Sep 2023 02:41:06 +0300 Subject: [PATCH 368/597] attempt to improve guncon defaults The previous change wrongly introduced resolution into the calculation which is unneeded because input is not pixel coordinates. --- frontend/libretro.c | 6 ------ frontend/plugin.c | 2 ++ frontend/plugin_lib.c | 6 ------ libpcsxcore/plugins.c | 16 +++++++++++----- libpcsxcore/plugins.h | 2 ++ plugins/gpulib/gpu.c | 8 ++++++++ plugins/gpulib/gpu.h | 1 + 7 files changed, 24 insertions(+), 17 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 965f93085..a26d46626 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -504,12 +504,6 @@ void pl_timing_prepare(int is_pal) is_pal_mode = is_pal; } -void plat_get_psx_resolution(int *xres, int *yres) -{ - *xres = psx_w; - *yres = psx_h; -} - void plat_trigger_vibrate(int pad, int low, int high) { if (!rumble_cb) diff --git a/frontend/plugin.c b/frontend/plugin.c index 3a0710a58..3374141ed 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -139,6 +139,7 @@ extern long GPUdmaChain(uint32_t *,uint32_t); extern void GPUupdateLace(void); extern long GPUfreeze(uint32_t, void *); extern void GPUvBlank(int, int); +extern void GPUgetScreenInfo(int *y, int *base_hres); extern void GPUrearmedCallbacks(const struct rearmed_cbs *cbs); @@ -222,6 +223,7 @@ static const struct { DIRECT_GPU(GPUdmaChain), DIRECT_GPU(GPUfreeze), DIRECT_GPU(GPUvBlank), + DIRECT_GPU(GPUgetScreenInfo), DIRECT_GPU(GPUrearmedCallbacks), DIRECT_GPU(GPUdisplayText), diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 0f0231237..8a6b6adb1 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -633,12 +633,6 @@ void pl_gun_byte2(int port, unsigned char byte) { } -void plat_get_psx_resolution(int *xres, int *yres) -{ - *xres = psx_w; - *yres = psx_h; -} - #define MAX_LAG_FRAMES 3 #define tvdiff(tv, tv_old) \ diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index 93af3d02c..2dacfd5a1 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -49,6 +49,7 @@ GPUfreeze GPU_freeze; GPUgetScreenPic GPU_getScreenPic; GPUshowScreenPic GPU_showScreenPic; GPUvBlank GPU_vBlank; +GPUgetScreenInfo GPU_getScreenInfo; CDRinit CDR_init; CDRshutdown CDR_shutdown; @@ -194,6 +195,7 @@ void CALLBACK GPU__keypressed(int key) {} long CALLBACK GPU__getScreenPic(unsigned char *pMem) { return -1; } long CALLBACK GPU__showScreenPic(unsigned char *pMem) { return -1; } void CALLBACK GPU__vBlank(int val) {} +void CALLBACK GPU__getScreenInfo(int *y, int *base_hres) {} #define LoadGpuSym1(dest, name) \ LoadSym(GPU_##dest, GPU##dest, name, TRUE); @@ -233,6 +235,7 @@ static int LoadGPUplugin(const char *GPUdll) { LoadGpuSym0(getScreenPic, "GPUgetScreenPic"); LoadGpuSym0(showScreenPic, "GPUshowScreenPic"); LoadGpuSym0(vBlank, "GPUvBlank"); + LoadGpuSym0(getScreenInfo, "GPUgetScreenInfo"); LoadGpuSym0(configure, "GPUconfigure"); LoadGpuSym0(test, "GPUtest"); LoadGpuSym0(about, "GPUabout"); @@ -613,9 +616,8 @@ static void PADstartPoll_(PadDataS *pad) { stdpar[2] = pad->buttonStatus & 0xff; stdpar[3] = pad->buttonStatus >> 8; - int absX = pad->absoluteX; + int absX = pad->absoluteX; // 0-1023 int absY = pad->absoluteY; - int xres = 256, yres = 240; if (absX == 65536 || absY == 65536) { stdpar[4] = 0x01; @@ -624,9 +626,13 @@ static void PADstartPoll_(PadDataS *pad) { stdpar[7] = 0x00; } else { - plat_get_psx_resolution(&xres, &yres); - int x = 0x5a - (xres - 256) / 3 + (((xres - 256) / 3 + 356) * absX >> 10); - int y = 0x20 + (yres * absY >> 10); + int y_ofs = 0, yres = 240; + GPU_getScreenInfo(&y_ofs, &yres); + int y_top = (Config.PsxType ? 0x30 : 0x19) + y_ofs; + int w = Config.PsxType ? 385 : 378; + int x = 0x40 + (w * absX >> 10); + int y = y_top + (yres * absY >> 10); + //printf("%3d %3d %4x %4x\n", absX, absY, x, y); stdpar[4] = x; stdpar[5] = x >> 8; diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index b7af7c3a4..c563470bf 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -76,6 +76,7 @@ typedef long (CALLBACK* GPUfreeze)(uint32_t, GPUFreeze_t *); typedef long (CALLBACK* GPUgetScreenPic)(unsigned char *); typedef long (CALLBACK* GPUshowScreenPic)(unsigned char *); typedef void (CALLBACK* GPUvBlank)(int, int); +typedef void (CALLBACK* GPUgetScreenInfo)(int *, int *); // GPU function pointers extern GPUupdateLace GPU_updateLace; @@ -100,6 +101,7 @@ extern GPUfreeze GPU_freeze; extern GPUgetScreenPic GPU_getScreenPic; extern GPUshowScreenPic GPU_showScreenPic; extern GPUvBlank GPU_vBlank; +extern GPUgetScreenInfo GPU_getScreenInfo; // CD-ROM Functions typedef long (CALLBACK* CDRinit)(void); diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index dfaff58e2..c84414418 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -879,6 +879,14 @@ void GPUvBlank(int is_vblank, int lcf) } } +void GPUgetScreenInfo(int *y, int *base_hres) +{ + *y = gpu.screen.y; + *base_hres = gpu.screen.vres; + if (gpu.status & PSX_GPU_STATUS_DHEIGHT) + *base_hres >>= 1; +} + #include "../../frontend/plugin_lib.h" void GPUrearmedCallbacks(const struct rearmed_cbs *cbs) diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 1582ee15f..dbca8081a 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -150,6 +150,7 @@ void GPUupdateLace(void); long GPUopen(unsigned long *disp, char *cap, char *cfg); long GPUclose(void); void GPUvBlank(int is_vblank, int lcf); +void GPUgetScreenInfo(int *y, int *base_hres); void GPUrearmedCallbacks(const struct rearmed_cbs *cbs_); #ifdef __cplusplus From 46fe949677745250de387994657de2004fe3b29b Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 19 Sep 2023 01:34:44 +0300 Subject: [PATCH 369/597] psxbios: maybe more accurate malloc --- libpcsxcore/psxbios.c | 176 +++++++++++++++++++----------------------- 1 file changed, 81 insertions(+), 95 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index fb7d13d17..a41cf54ec 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -292,8 +292,10 @@ static u32 floodchk; #define A_HEAP_BASE 0x9000 #define A_HEAP_SIZE 0x9004 #define A_HEAP_END 0x9008 -#define A_HEAP_FLAG 0x900c +#define A_HEAP_INIT_FLG 0x900c #define A_RND_SEED 0x9010 +#define A_HEAP_FRSTCHNK 0xb060 +#define A_HEAP_CURCHNK 0xb064 #define A_CONF_TCB 0xb940 #define A_CONF_EvCB 0xb944 #define A_CONF_SP 0xb948 @@ -1214,114 +1216,98 @@ void psxBios_qsort() { // 0x31 pc0 = ra; } -// this isn't how the real bios works, but maybe good enough +static int malloc_heap_grow(u32 size) { + u32 heap_addr, heap_end, heap_addr_new; + + heap_addr = loadRam32(A_HEAP_BASE); + heap_end = loadRam32(A_HEAP_END); + heap_addr_new = heap_addr + 4 + size; + if (heap_addr_new >= heap_end) + return -1; + storeRam32(A_HEAP_BASE, heap_addr_new); + storeRam32(heap_addr - 4, size | 1); + storeRam32(heap_addr + size, ~1); // terminator + return 0; +} + static void psxBios_malloc() { // 0x33 - u32 *heap_addr, *heap_end; - u32 *chunk, *newchunk = NULL; - unsigned int dsize = 0, csize, cstat; - int colflag; - PSXBIOS_LOG("psxBios_%s %x\n", biosA0n[0x33], a0); - heap_addr = loadRam32ptr(A_HEAP_BASE); - heap_end = loadRam32ptr(A_HEAP_END); - if (heap_addr >= heap_end) { - v0 = 0; - pc0 = ra; - return; - } + u32 size = (a0 + 3) & ~3; + u32 limit = 32*1024; + u32 tries = 2, i; + u32 ret; - // scan through heap and combine free chunks of space - chunk = heap_addr; - colflag = 0; - while(chunk < heap_end) { - // get size and status of actual chunk - csize = ((u32)*chunk) & 0xfffffffc; - cstat = ((u32)*chunk) & 1; - - // most probably broken heap descriptor - // this fixes Burning Road - if (*chunk == 0) { - newchunk = chunk; - dsize = ((uptr)heap_end - (uptr)chunk) - 4; - colflag = 1; - break; + PSXBIOS_LOG("psxBios_%s %d\n", biosA0n[0x33], a0); + + if (!loadRam32(A_HEAP_INIT_FLG)) { + u32 heap_addr = loadRam32(A_HEAP_BASE); + storeRam32(heap_addr, ~1); + storeRam32(A_HEAP_FRSTCHNK, heap_addr); + storeRam32(A_HEAP_CURCHNK, heap_addr); + storeRam32(A_HEAP_BASE, heap_addr + 4); + if (malloc_heap_grow(size)) { + PSXBIOS_LOG("malloc: init OOM\n"); + mips_return_c(0, 20); + return; } + storeRam32(A_HEAP_INIT_FLG, 1); + } - // it's a free chunk - if(cstat == 1) { - if(colflag == 0) { - newchunk = chunk; - dsize = csize; - colflag = 1; // let's begin a new collection of free memory + for (i = 0; tries > 0 && i < limit; i++) + { + u32 chunk = loadRam32(A_HEAP_CURCHNK); + u32 chunk_hdr = loadRam32(chunk); + u32 next_chunk = chunk + 4 + (chunk_hdr & ~3); + u32 next_chunk_hdr = loadRam32(next_chunk); + use_cycles(20); + //printf(" c %08x %08x\n", chunk, chunk_hdr); + if (chunk_hdr & 1) { + // free chunk + if (chunk_hdr > (size | 1)) { + // split + u32 p2size = (chunk_hdr & ~3) - size - 4; + storeRam32(chunk + 4 + size, p2size | 1); + chunk_hdr = size | 1; + } + if (chunk_hdr == (size | 1)) { + storeRam32(chunk, size); + break; + } + // chunk too small + if (next_chunk_hdr & 1) { + // merge + u32 msize = (chunk_hdr & ~3) + 4 + (next_chunk_hdr & ~3); + storeRam32(chunk, msize | 1); + continue; } - else dsize += (csize+4); // add the new size including header } - // not a free chunk: did we start a collection ? + if (chunk_hdr == ~1) { + // last chunk + if (tries == 2) + storeRam32(A_HEAP_CURCHNK, loadRam32(A_HEAP_FRSTCHNK)); + tries--; + } else { - if(colflag == 1) { // collection is over - colflag = 0; - *newchunk = SWAP32(dsize | 1); - } + // go to the next chunk + storeRam32(A_HEAP_CURCHNK, next_chunk); } - - // next chunk - chunk = (u32*)((uptr)chunk + csize + 4); } - // if neccessary free memory on end of heap - if (colflag == 1) - *newchunk = SWAP32(dsize | 1); - - chunk = heap_addr; - csize = ((u32)*chunk) & 0xfffffffc; - cstat = ((u32)*chunk) & 1; - dsize = (a0 + 3) & 0xfffffffc; - // exit on uninitialized heap - if (chunk == NULL) { - printf("malloc %x,%x: Uninitialized Heap!\n", v0, a0); - v0 = 0; - pc0 = ra; - return; + if (i == limit) + ret = 0; + else if (tries == 0 && malloc_heap_grow(size)) + ret = 0; + else { + u32 chunk = loadRam32(A_HEAP_CURCHNK); + storeRam32(chunk, loadRam32(chunk) & ~3); + ret = chunk + 4; } - // search an unused chunk that is big enough until the end of the heap - while ((dsize > csize || cstat==0) && chunk < heap_end ) { - chunk = (u32*)((uptr)chunk + csize + 4); - - // catch out of memory - if(chunk >= heap_end) { - printf("malloc %x,%x: Out of memory error!\n", - v0, a0); - v0 = 0; pc0 = ra; - return; - } - - csize = ((u32)*chunk) & 0xfffffffc; - cstat = ((u32)*chunk) & 1; - } - - // allocate memory - if(dsize == csize) { - // chunk has same size - *chunk &= 0xfffffffc; - } else if (dsize > csize) { - v0 = 0; pc0 = ra; - return; - } else { - // split free chunk - *chunk = SWAP32(dsize); - newchunk = (u32*)((uptr)chunk + dsize + 4); - *newchunk = SWAP32(((csize - dsize - 4) & 0xfffffffc) | 1); - } - - // return pointer to allocated memory - v0 = ((uptr)chunk - (uptr)psxM) + 4; - v0|= 0x80000000; - //printf ("malloc %x,%x\n", v0, a0); - pc0 = ra; + PSXBIOS_LOG(" -> %08x\n", ret); + mips_return_c(ret, 40); } static void psxBios_free() { // 0x34 - PSXBIOS_LOG("psxBios_%s %x (%x bytes)\n", biosA0n[0x34], a0, loadRam32(a0 - 4)); + PSXBIOS_LOG("psxBios_%s %x (%d bytes)\n", biosA0n[0x34], a0, loadRam32(a0 - 4)); storeRam32(a0 - 4, loadRam32(a0 - 4) | 1); // set chunk to free mips_return_void_c(5); } @@ -1376,7 +1362,7 @@ static void psxBios_InitHeap() { // 0x39 storeRam32(A_HEAP_BASE, a0); storeRam32(A_HEAP_SIZE, a1); storeRam32(A_HEAP_END, a0 + (a1 & ~3) + 4); - storeRam32(A_HEAP_FLAG, 0); + storeRam32(A_HEAP_INIT_FLG, 0); storeRam32(a0, 0); mips_return_void_c(14); From 0890ae159b413fa4a5c84c0db1bf7e2eb05f4849 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 19 Sep 2023 01:35:30 +0300 Subject: [PATCH 370/597] psxbios: allow custom api overrides BallBlazer Champions hooks custom heap funcs notaz/pcsx_rearmed#307 --- Makefile | 2 +- libpcsxcore/psxbios.c | 159 +++++++++++++++++++++++++++++++++++------- libpcsxcore/psxhle.c | 112 ----------------------------- libpcsxcore/psxhle.h | 10 --- 4 files changed, 136 insertions(+), 147 deletions(-) delete mode 100644 libpcsxcore/psxhle.c diff --git a/Makefile b/Makefile index 86473ab45..63ffcb628 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,7 @@ endif OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cheat.o libpcsxcore/database.o \ libpcsxcore/decode_xa.o libpcsxcore/mdec.o \ libpcsxcore/misc.o libpcsxcore/plugins.o libpcsxcore/ppf.o libpcsxcore/psxbios.o \ - libpcsxcore/psxcommon.o libpcsxcore/psxcounters.o libpcsxcore/psxdma.o libpcsxcore/psxhle.o \ + libpcsxcore/psxcommon.o libpcsxcore/psxcounters.o libpcsxcore/psxdma.o \ libpcsxcore/psxhw.o libpcsxcore/psxinterpreter.o libpcsxcore/psxmem.o libpcsxcore/r3000a.o \ libpcsxcore/sio.o libpcsxcore/spu.o OBJS += libpcsxcore/gte.o libpcsxcore/gte_nf.o libpcsxcore/gte_divider.o diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index a41cf54ec..83bc684a3 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -302,6 +302,11 @@ static u32 floodchk; #define A_CD_EVENTS 0xb9b8 #define A_EXC_GP 0xf450 +#define A_A0_DUMMY 0x1010 +#define A_B0_DUMMY 0x2010 +#define A_C0_DUMMY 0x3010 +#define A_B0_5B_DUMMY 0x43d0 + #define HLEOP(n) SWAPu32((0x3b << 26) | (n)); static u32 loadRam32(u32 addr) @@ -1375,7 +1380,7 @@ void psxBios_getchar() { //0x3b static void psxBios_printf_psxout() { // 0x3f char tmp[1024]; char tmp2[1024]; - u32 save[4]; + u32 save[4] = { 0, }; char *ptmp = tmp; int n=1, i=0, j; void *psp; @@ -3294,6 +3299,11 @@ void psxBiosSetupBootState(void) SPU_writeRegister(0x1f801d80 + i*2, spu_config[i], psxRegs.cycle); } +static void hleExc0_0_1(); +static void hleExc0_0_2(); +static void hleExc0_1_1(); +static void hleExc0_1_2(); + #include "sjisfont.h" void psxBiosInit() { @@ -3705,16 +3715,16 @@ void psxBiosInit() { // (or rather the funcs listed there) ptr = (u32 *)&psxM[A_A0_TABLE]; for (i = 0; i < 256; i++) - ptr[i] = SWAP32(0x1000); + ptr[i] = SWAP32(A_A0_DUMMY); ptr = (u32 *)&psxM[A_B0_TABLE]; for (i = 0; i < 256; i++) - ptr[i] = SWAP32(0x2000); + ptr[i] = SWAP32(A_B0_DUMMY); // B(5b) is special because games patch (sometimes even jump to) // code at fixed offsets from it, nocash lists offsets: // patch: +3d8, +4dc, +594, +62c, +9c8, +1988 // call: +7a0=4b70, +884=4c54, +894=4c64 - ptr[0x5b] = SWAP32(0x43d0); + ptr[0x5b] = SWAP32(A_B0_5B_DUMMY); // 0x43d0 ram32[0x4b70/4] = SWAP32(0x03e00008); // jr $ra // setPadOutputBuf ram32[0x4c54/4] = SWAP32(0x240e0001); // mov $t6, 1 @@ -3726,13 +3736,14 @@ void psxBiosInit() { ptr = (u32 *)&psxM[A_C0_TABLE]; for (i = 0; i < 256/2; i++) - ptr[i] = SWAP32(0x3000); + ptr[i] = SWAP32(A_C0_DUMMY); ptr[6] = SWAP32(A_EXCEPTION); // more HLE traps - ram32[0x1000/4] = HLEOP(hleop_dummy); - ram32[0x2000/4] = HLEOP(hleop_dummy); - ram32[0x3000/4] = HLEOP(hleop_dummy); + ram32[A_A0_DUMMY/4] = HLEOP(hleop_dummy); + ram32[A_B0_DUMMY/4] = HLEOP(hleop_dummy); + ram32[A_C0_DUMMY/4] = HLEOP(hleop_dummy); + ram32[A_B0_5B_DUMMY/4] = HLEOP(hleop_dummy); ram32[0x8000/4] = HLEOP(hleop_execret); ram32[A_EEXIT_PTR/4] = SWAP32(A_EEXIT_DEF); @@ -3782,13 +3793,13 @@ static void handle_chain_x_x_1(u32 enable, u32 irqbit) // hleExc0_{0,1}* are usually removed by A(56)/A(72) on the game's startup, // so this is only partially implemented -void hleExc0_0_1() // A(93h) - CdromDmaIrqFunc2 +static void hleExc0_0_1() // A(93h) - CdromDmaIrqFunc2 { u32 cdrom_dma_ack_enable = 1; // a000b93c handle_chain_x_x_1(cdrom_dma_ack_enable, 3); // IRQ3 DMA } -void hleExc0_0_2() // A(91h) - CdromDmaIrqFunc1 +static void hleExc0_0_2() // A(91h) - CdromDmaIrqFunc1 { u32 ret = 0; //PSXBIOS_LOG("%s\n", __func__); @@ -3803,13 +3814,13 @@ void hleExc0_0_2() // A(91h) - CdromDmaIrqFunc1 mips_return_c(ret, 20); } -void hleExc0_1_1() // A(92h) - CdromIoIrqFunc2 +static void hleExc0_1_1() // A(92h) - CdromIoIrqFunc2 { u32 cdrom_irq_ack_enable = 1; // a000b938 handle_chain_x_x_1(cdrom_irq_ack_enable, 2); // IRQ2 cdrom } -void hleExc0_1_2() // A(90h) - CdromIoIrqFunc1 +static void hleExc0_1_2() // A(90h) - CdromIoIrqFunc1 { u32 ret = 0; if (psxHu32(0x1074) & psxHu32(0x1070) & 4) { // IRQ2 cdrom @@ -3819,7 +3830,7 @@ void hleExc0_1_2() // A(90h) - CdromIoIrqFunc1 mips_return_c(ret, 20); } -void hleExc0_2_2_syscall() // not in any A/B/C table +static void hleExc0_2_2_syscall() // not in any A/B/C table { u32 tcbPtr = loadRam32(A_TT_PCB); TCB *tcb = loadRam32ptr(tcbPtr); @@ -3863,7 +3874,7 @@ void hleExc0_2_2_syscall() // not in any A/B/C table psxBios_ReturnFromException(); } -void hleExc1_0_1(void) +static void hleExc1_0_1(void) { u32 vbl_irq_ack_enable = loadRam32(A_RCNT_VBL_ACK + 0x0c); // 860c handle_chain_x_x_1(vbl_irq_ack_enable, 0); // IRQ0 vblank @@ -3879,45 +3890,45 @@ static void handle_chain_1_x_2(u32 ev_index, u32 irqbit) mips_return_c(ret, 22); } -void hleExc1_0_2(void) +static void hleExc1_0_2(void) { handle_chain_1_x_2(3, 0); // IRQ0 vblank } -void hleExc1_1_1(void) +static void hleExc1_1_1(void) { u32 rcnt_irq_ack_enable = loadRam32(A_RCNT_VBL_ACK + 0x08); // 8608 handle_chain_x_x_1(rcnt_irq_ack_enable, 6); // IRQ6 rcnt2 } -void hleExc1_1_2(void) +static void hleExc1_1_2(void) { handle_chain_1_x_2(2, 6); // IRQ6 rcnt2 } -void hleExc1_2_1(void) +static void hleExc1_2_1(void) { u32 rcnt_irq_ack_enable = loadRam32(A_RCNT_VBL_ACK + 0x04); // 8604 handle_chain_x_x_1(rcnt_irq_ack_enable, 5); // IRQ5 rcnt1 } -void hleExc1_2_2(void) +static void hleExc1_2_2(void) { handle_chain_1_x_2(1, 5); // IRQ5 rcnt1 } -void hleExc1_3_1(void) +static void hleExc1_3_1(void) { u32 rcnt_irq_ack_enable = loadRam32(A_RCNT_VBL_ACK + 0x00); // 8600 handle_chain_x_x_1(rcnt_irq_ack_enable, 4); // IRQ4 rcnt0 } -void hleExc1_3_2(void) +static void hleExc1_3_2(void) { handle_chain_1_x_2(0, 4); // IRQ4 rcnt0 } -void hleExc3_0_2_defint(void) +static void hleExc3_0_2_defint(void) { static const struct { u8 ev, irqbit; @@ -3945,7 +3956,7 @@ void hleExc3_0_2_defint(void) mips_return_c(0, 11 + 7*11 + 7*11 + 12); } -void hleExcPadCard1(void) +static void hleExcPadCard1(void) { if (loadRam32(A_PAD_IRQR_ENA)) { u8 *pad_buf1 = loadRam8ptr(A_PAD_INBUF + 0); @@ -3966,7 +3977,7 @@ void hleExcPadCard1(void) mips_return_c(0, 18); } -void hleExcPadCard2(void) +static void hleExcPadCard2(void) { u32 ret = psxHu32(0x1074) & psxHu32(0x1070) & 1; mips_return_c(ret, 15); @@ -4033,6 +4044,106 @@ void psxBiosException() { psxBios_ReturnFromException(); } +/* HLE */ +static void hleDummy() { + log_unhandled("hleDummy called @%08x ra=%08x\n", psxRegs.pc - 4, ra); + psxRegs.pc = ra; + psxRegs.cycle += 1000; + + psxBranchTest(); +} + +static void hleA0() { + u32 call = t1 & 0xff; + u32 entry = loadRam32(A_A0_TABLE + call * 4); + + if (call < 192 && entry != A_A0_DUMMY) { + PSXBIOS_LOG("custom A%02x %s(0x%x, ) addr=%08x ra=%08x\n", + call, biosA0n[call], a0, entry, ra); + softCall(entry); + pc0 = ra; + PSXBIOS_LOG(" -> %08x\n", v0); + } + else if (biosA0[call]) + biosA0[call](); + + psxBranchTest(); +} + +static void hleB0() { + u32 call = t1 & 0xff; + u32 entry = loadRam32(A_B0_TABLE + call * 4); + int is_custom = 0; + + if (call == 0x5b) + is_custom = entry != A_B0_5B_DUMMY; + else + is_custom = entry != A_B0_DUMMY; + if (is_custom) { + PSXBIOS_LOG("custom B%02x %s(0x%x, ) addr=%08x ra=%08x\n", + call, biosB0n[call], a0, entry, ra); + softCall(entry); + pc0 = ra; + PSXBIOS_LOG(" -> %08x\n", v0); + } + else if (biosB0[call]) + biosB0[call](); + + psxBranchTest(); +} + +static void hleC0() { + u32 call = t1 & 0xff; + u32 entry = loadRam32(A_C0_TABLE + call * 4); + + if (call < 128 && entry != A_C0_DUMMY) { + PSXBIOS_LOG("custom C%02x %s(0x%x, ) addr=%08x ra=%08x\n", + call, biosC0n[call], a0, entry, ra); + softCall(entry); + pc0 = ra; + PSXBIOS_LOG(" -> %08x\n", v0); + } + else if (biosC0[call]) + biosC0[call](); + + psxBranchTest(); +} + +// currently not used +static void hleBootstrap() { + CheckCdrom(); + LoadCdrom(); +} + +static void hleExecRet() { + const EXEC *header = (EXEC *)PSXM(s0); + + PSXBIOS_LOG("ExecRet %x: %x\n", s0, header->ret); + + ra = SWAP32(header->ret); + sp = SWAP32(header->_sp); + fp = SWAP32(header->_fp); + gp = SWAP32(header->_gp); + s0 = SWAP32(header->base); + + v0 = 1; + psxRegs.pc = ra; +} + +void (* const psxHLEt[24])() = { + hleDummy, hleA0, hleB0, hleC0, + hleBootstrap, hleExecRet, psxBiosException, hleDummy, + hleExc0_0_1, hleExc0_0_2, + hleExc0_1_1, hleExc0_1_2, hleExc0_2_2_syscall, + hleExc1_0_1, hleExc1_0_2, + hleExc1_1_1, hleExc1_1_2, + hleExc1_2_1, hleExc1_2_2, + hleExc1_3_1, hleExc1_3_2, + hleExc3_0_2_defint, + hleExcPadCard1, hleExcPadCard2, +}; + + #define bfreeze(ptr, size) { \ if (Mode == 1) memcpy(&psxR[base], ptr, size); \ if (Mode == 0) memcpy(ptr, &psxR[base], size); \ diff --git a/libpcsxcore/psxhle.c b/libpcsxcore/psxhle.c deleted file mode 100644 index 175b86ab4..000000000 --- a/libpcsxcore/psxhle.c +++ /dev/null @@ -1,112 +0,0 @@ -/*************************************************************************** - * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU General Public License * - * along with this program; if not, write to the * - * Free Software Foundation, Inc., * - * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * - ***************************************************************************/ - -/* -* Internal PSX HLE functions. -*/ - -#include "psxhle.h" - -#if 0 -#define PSXHLE_LOG SysPrintf -#else -#define PSXHLE_LOG(...) -#endif - -static void hleDummy() { - log_unhandled("hleDummy called @%08x ra=%08x\n", - psxRegs.pc - 4, psxRegs.GPR.n.ra); - psxRegs.pc = psxRegs.GPR.n.ra; - psxRegs.cycle += 1000; - - psxBranchTest(); -} - -static void hleA0() { - u32 call = psxRegs.GPR.n.t1 & 0xff; - - if (biosA0[call]) biosA0[call](); - - psxBranchTest(); -} - -static void hleB0() { - u32 call = psxRegs.GPR.n.t1 & 0xff; - - if (biosB0[call]) biosB0[call](); - - psxBranchTest(); -} - -static void hleC0() { - u32 call = psxRegs.GPR.n.t1 & 0xff; - - if (biosC0[call]) biosC0[call](); - - psxBranchTest(); -} - -static void hleBootstrap() { // 0xbfc00000 - PSXHLE_LOG("hleBootstrap\n"); - CheckCdrom(); - LoadCdrom(); - PSXHLE_LOG("CdromLabel: \"%s\": PC = %8.8lx (SP = %8.8lx)\n", CdromLabel, psxRegs.pc, psxRegs.GPR.n.sp); -} - -typedef struct { - u32 _pc0; - u32 gp0; - u32 t_addr; - u32 t_size; - u32 d_addr; - u32 d_size; - u32 b_addr; - u32 b_size; - u32 S_addr; - u32 s_size; - u32 _sp,_fp,_gp,ret,base; -} EXEC; - -static void hleExecRet() { - EXEC *header = (EXEC*)PSXM(psxRegs.GPR.n.s0); - - PSXHLE_LOG("ExecRet %x: %x\n", psxRegs.GPR.n.s0, header->ret); - - psxRegs.GPR.n.ra = SWAP32(header->ret); - psxRegs.GPR.n.sp = SWAP32(header->_sp); - psxRegs.GPR.n.fp = SWAP32(header->_fp); - psxRegs.GPR.n.gp = SWAP32(header->_gp); - psxRegs.GPR.n.s0 = SWAP32(header->base); - - psxRegs.GPR.n.v0 = 1; - psxRegs.pc = psxRegs.GPR.n.ra; -} - -void (* const psxHLEt[24])() = { - hleDummy, hleA0, hleB0, hleC0, - hleBootstrap, hleExecRet, psxBiosException, hleDummy, - hleExc0_0_1, hleExc0_0_2, - hleExc0_1_1, hleExc0_1_2, hleExc0_2_2_syscall, - hleExc1_0_1, hleExc1_0_2, - hleExc1_1_1, hleExc1_1_2, - hleExc1_2_1, hleExc1_2_2, - hleExc1_3_1, hleExc1_3_2, - hleExc3_0_2_defint, - hleExcPadCard1, hleExcPadCard2, -}; diff --git a/libpcsxcore/psxhle.h b/libpcsxcore/psxhle.h index b5508725f..e6d2df813 100644 --- a/libpcsxcore/psxhle.h +++ b/libpcsxcore/psxhle.h @@ -28,16 +28,6 @@ extern "C" { #include "r3000a.h" #include "plugins.h" -void hleExc0_0_1(); void hleExc0_0_2(); -void hleExc0_1_1(); void hleExc0_1_2(); -void hleExc0_2_2_syscall(); -void hleExc1_0_1(); void hleExc1_0_2(); -void hleExc1_1_1(); void hleExc1_1_2(); -void hleExc1_2_1(); void hleExc1_2_2(); -void hleExc1_3_1(); void hleExc1_3_2(); -void hleExc3_0_2_defint(); -void hleExcPadCard1(); void hleExcPadCard2(); - enum hle_op { hleop_dummy = 0, hleop_a0, hleop_b0, hleop_c0, hleop_bootstrap, hleop_execret, hleop_exception, hleop_unused, From 06c11e4a2d5b058897c39c49bd29e2612c7ed511 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 20 Sep 2023 01:56:41 +0300 Subject: [PATCH 371/597] input changes part3 notaz/pcsx_rearmed#309 --- frontend/menu.c | 3 ++ include/psemu_plugin_defs.h | 3 +- libpcsxcore/plugins.c | 91 +++++++++++++++++++++++++------------ 3 files changed, 67 insertions(+), 30 deletions(-) diff --git a/frontend/menu.c b/frontend/menu.c index 8e7cd5041..f33ac33f2 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -312,11 +312,13 @@ static void menu_sync_config(void) switch (in_type_sel1) { case 1: in_type[0] = PSE_PAD_TYPE_ANALOGPAD; break; case 2: in_type[0] = PSE_PAD_TYPE_NEGCON; break; + case 3: in_type[0] = PSE_PAD_TYPE_NONE; break; default: in_type[0] = PSE_PAD_TYPE_STANDARD; } switch (in_type_sel2) { case 1: in_type[1] = PSE_PAD_TYPE_ANALOGPAD; break; case 2: in_type[1] = PSE_PAD_TYPE_NEGCON; break; + case 3: in_type[1] = PSE_PAD_TYPE_NONE; break; default: in_type[1] = PSE_PAD_TYPE_STANDARD; } if (in_evdev_allow_abs_only != allow_abs_only_old) { @@ -1203,6 +1205,7 @@ static const char *men_in_type_sel[] = { "Standard (SCPH-1080)", "Analog (SCPH-1150)", "GunCon", + "None", NULL }; static const char h_nub_btns[] = "Experimental, keep this OFF if unsure. Select rescan after change."; diff --git a/include/psemu_plugin_defs.h b/include/psemu_plugin_defs.h index 40a67605b..d005d080c 100644 --- a/include/psemu_plugin_defs.h +++ b/include/psemu_plugin_defs.h @@ -224,7 +224,8 @@ typedef struct unsigned char multitapLongModeEnabled; unsigned char PadMode; // 0 : digital 1: analog - unsigned char reserved[52]; + unsigned char cmd4dConfig[6]; + unsigned char reserved[46]; //Lightgun values int absoluteX,absoluteY; diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index 2dacfd5a1..fb0ea614e 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -364,24 +364,23 @@ static unsigned char buf[256]; static unsigned char stdpar[8] = { 0x41, 0x5a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; //response for request 44, 45, 46, 47, 4C, 4D -static unsigned char resp45[8] = {0xF3, 0x5A, 0x01, 0x02, 0x00, 0x02, 0x01, 0x00}; -static unsigned char resp46_00[8] = {0xF3, 0x5A, 0x00, 0x00, 0x01, 0x02, 0x00, 0x0A}; -static unsigned char resp46_01[8] = {0xF3, 0x5A, 0x00, 0x00, 0x01, 0x01, 0x01, 0x14}; -static unsigned char resp47[8] = {0xF3, 0x5A, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00}; -static unsigned char resp4C_00[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00}; -static unsigned char resp4C_01[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00}; -static unsigned char resp4D[8] = {0xF3, 0x5A, 0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF}; +static const u8 resp45[8] = {0xF3, 0x5A, 0x01, 0x02, 0x00, 0x02, 0x01, 0x00}; +static const u8 resp46_00[8] = {0xF3, 0x5A, 0x00, 0x00, 0x01, 0x02, 0x00, 0x0A}; +static const u8 resp46_01[8] = {0xF3, 0x5A, 0x00, 0x00, 0x01, 0x01, 0x01, 0x14}; +static const u8 resp47[8] = {0xF3, 0x5A, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00}; +static const u8 resp4C_00[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00}; +static const u8 resp4C_01[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00}; //fixed reponse of request number 41, 48, 49, 4A, 4B, 4E, 4F -static unsigned char resp40[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; -static unsigned char resp41[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; -static unsigned char resp43[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; -static unsigned char resp44[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; -static unsigned char resp49[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; -static unsigned char resp4A[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; -static unsigned char resp4B[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; -static unsigned char resp4E[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; -static unsigned char resp4F[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static const u8 resp40[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static const u8 resp41[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static const u8 resp43[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static const u8 resp44[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static const u8 resp49[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static const u8 resp4A[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static const u8 resp4B[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static const u8 resp4E[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +static const u8 resp4F[8] = {0xF3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; // Resquest of psx core enum { @@ -481,8 +480,17 @@ enum { static void initBufForRequest(int padIndex, char value) { - switch (value){ - //Pad keystate already in buffer + if (pads[padIndex].configMode) { + buf[0] = 0xf3; buf[1] = 0x5a; + respSize = 8; + } + else if (value != 0x42 && value != 0x43) { + respSize = 1; + return; + } + + switch (value) { + // keystate already in buffer, set by PADstartPoll_() //case CMD_READ_DATA_AND_VIBRATE : // break; case CMD_CONFIG_MODE : @@ -490,7 +498,7 @@ static void initBufForRequest(int padIndex, char value) { memcpy(buf, resp43, 8); break; } - //else, not in config mode, pad keystate return (already in the buffer) + // else not in config mode, pad keystate return break; case CMD_SET_MODE_AND_LOCK : memcpy(buf, resp44, 8); @@ -508,8 +516,8 @@ static void initBufForRequest(int padIndex, char value) { case CMD_QUERY_MODE : memcpy(buf, resp4C_00, 8); break; - case CMD_VIBRATION_TOGGLE : - memcpy(buf, resp4D, 8); + case CMD_VIBRATION_TOGGLE: // 4d + memcpy(buf + 2, pads[padIndex].cmd4dConfig, 6); break; case REQ40 : memcpy(buf, resp40, 8); @@ -535,7 +543,7 @@ static void initBufForRequest(int padIndex, char value) { } } -static void reqIndex2Treatment(int padIndex, char value) { +static void reqIndex2Treatment(int padIndex, u8 value) { switch (pads[padIndex].txData[0]) { case CMD_CONFIG_MODE : //0x43 @@ -549,8 +557,8 @@ static void reqIndex2Treatment(int padIndex, char value) { //0x44 store the led state for change mode if the next value = 0x02 //0x01 analog ON //0x00 analog OFF - //ledStateReq44[padIndex] = value; - pads[padIndex].PadMode = value; + if ((value & ~1) == 0) + pads[padIndex].PadMode = value; break; case CMD_QUERY_ACT : //0x46 @@ -563,10 +571,6 @@ static void reqIndex2Treatment(int padIndex, char value) { memcpy(buf, resp4C_01, 8); } break; - case CMD_VIBRATION_TOGGLE : - //0x4D - memcpy(buf, resp4D, 8); - break; case CMD_READ_DATA_AND_VIBRATE: //mem the vibration value for small motor; pads[padIndex].Vib[0] = value; @@ -581,8 +585,23 @@ static void vibrate(int padIndex) { pad->VibF[0] = pad->Vib[0]; pad->VibF[1] = pad->Vib[1]; plat_trigger_vibrate(padIndex, pad->VibF[0], pad->VibF[1]); - //printf("vibration pad %i", padIndex); + //printf("vibration pad %i\n", padIndex); + } +} + +static void log_pad(int port, int pos) +{ +#if 0 + if (port == 0 && pos == respSize - 1) { + int i; + for (i = 0; i < respSize; i++) + printf("%02x ", pads[port].txData[i]); + printf("|"); + for (i = 0; i < respSize; i++) + printf(" %02x", buf[i]); + printf("\n"); } +#endif } // Build response for 0x42 request Pad in port @@ -652,6 +671,8 @@ static void PADstartPoll_(PadDataS *pad) { respSize = 4; break; case PSE_PAD_TYPE_ANALOGPAD: // scph1150 + if (pad->PadMode == 0) + goto standard; stdpar[0] = 0x73; stdpar[1] = 0x5a; stdpar[2] = pad->buttonStatus & 0xff; @@ -676,6 +697,7 @@ static void PADstartPoll_(PadDataS *pad) { respSize = 8; break; case PSE_PAD_TYPE_STANDARD: + standard: stdpar[0] = 0x41; stdpar[1] = 0x5a; stdpar[2] = pad->buttonStatus & 0xff; @@ -706,6 +728,10 @@ static void PADpoll_dualshock(int port, unsigned char value, int pos) vibrate(port); } break; + case 7: + if (pads[port].txData[0] == CMD_VIBRATION_TOGGLE) + memcpy(pads[port].cmd4dConfig, pads[port].txData + 2, 6); + break; } } @@ -727,6 +753,7 @@ static unsigned char PADpoll_(int port, unsigned char value, int pos, int *more_ if (pos >= respSize) return 0xff; // no response/HiZ + log_pad(port, pos); return buf[pos]; } @@ -814,6 +841,7 @@ long CALLBACK PAD1__keypressed() { return 0; } static int LoadPAD1plugin(const char *PAD1dll) { void *drv; + size_t p; hPAD1Driver = SysLoadLibrary(PAD1dll); if (hPAD1Driver == NULL) { @@ -835,6 +863,11 @@ static int LoadPAD1plugin(const char *PAD1dll) { LoadPad1Sym0(poll, "PADpoll"); LoadPad1SymN(setSensitive, "PADsetSensitive"); + memset(pads, 0, sizeof(pads)); + for (p = 0; p < sizeof(pads) / sizeof(pads[0]); p++) { + memset(pads[p].cmd4dConfig, 0xff, sizeof(pads[p].cmd4dConfig)); + } + return 0; } From a6e034904c2ae8b254b707a17df7de161efbfd6c Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 20 Sep 2023 22:49:40 +0300 Subject: [PATCH 372/597] cdrom: some report mode details trusting mednafen on this one libretro/pcsx_rearmed#762 --- libpcsxcore/cdrom.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 6f48df05f..0ae2c50ba 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -65,7 +65,8 @@ static struct { unsigned char Absolute[3]; } subq; unsigned char TrackChanged; - unsigned char unused3[3]; + unsigned char ReportDelay; + unsigned char unused3[2]; unsigned int freeze_ver; unsigned char Prev[4]; @@ -524,7 +525,9 @@ static void cdrPlayInterrupt_Autopause() StopCdda(); SetPlaySeekRead(cdr.StatP, 0); } - else if (((cdr.Mode & MODE_REPORT) || cdr.FastForward || cdr.FastBackward)) { + else if ((cdr.Mode & MODE_REPORT) && !cdr.ReportDelay && + ((cdr.subq.Absolute[2] & 0x0f) == 0 || cdr.FastForward || cdr.FastBackward)) + { cdr.Result[0] = cdr.StatP; cdr.Result[1] = cdr.subq.Track; cdr.Result[2] = cdr.subq.Index; @@ -560,6 +563,9 @@ static void cdrPlayInterrupt_Autopause() SetResultSize(8); setIrq(0x1001); } + + if (cdr.ReportDelay) + cdr.ReportDelay--; } // LastReadCycles @@ -817,6 +823,7 @@ void cdrInterrupt(void) { cdr.SubqForwardSectors = 1; cdr.TrackChanged = FALSE; cdr.FirstSector = 1; + cdr.ReportDelay = 60; if (!Config.Cdda) CDR_play(cdr.SetSectorPlay); From 3b988ef27435350b7b944016f6387cb189051c45 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 21 Sep 2023 01:17:37 +0300 Subject: [PATCH 373/597] psxbios: some more details --- libpcsxcore/psxbios.c | 45 +++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 83bc684a3..0cb1b8702 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -309,6 +309,12 @@ static u32 floodchk; #define HLEOP(n) SWAPu32((0x3b << 26) | (n)); +static u8 loadRam8(u32 addr) +{ + assert(!(addr & 0x5f800000)); + return psxM[addr & 0x1fffff]; +} + static u32 loadRam32(u32 addr) { assert(!(addr & 0x5f800000)); @@ -561,6 +567,11 @@ void psxBios_atoi() { // 0x10 s32 n = 0, f = 0; char *p = (char *)Ra0; + if (p == INVALID_PTR) { + mips_return(0); + return; + } + for (;;p++) { switch (*p) { case ' ': case '\t': continue; @@ -576,6 +587,7 @@ void psxBios_atoi() { // 0x10 v0 = (f ? -n : n); pc0 = ra; + PSXBIOS_LOG("psxBios_%s %s (%x) -> 0x%x\n", biosA0n[0x10], Ra0, a0, v0); } void psxBios_atol() { // 0x11 @@ -625,22 +637,24 @@ void psxBios_longjmp() { // 0x14 } void psxBios_strcat() { // 0x15 - char *p1 = (char *)Ra0, *p2 = (char *)Ra1; + u8 *p2 = (u8 *)Ra1; + u32 p1 = a0; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s: %s, %s\n", biosA0n[0x15], Ra0, Ra1); -#endif - if (a0 == 0 || a1 == 0) + PSXBIOS_LOG("psxBios_%s %s (%x), %s (%x)\n", biosA0n[0x15], Ra0, a0, Ra1, a1); + if (a0 == 0 || a1 == 0 || p2 == INVALID_PTR) { - v0 = 0; - pc0 = ra; + mips_return_c(0, 6); return; } - while (*p1++); - --p1; - while ((*p1++ = *p2++) != '\0'); + while (loadRam8(p1)) { + use_cycles(4); + p1++; + } + for (; *p2; p1++, p2++) + storeRam8(p1, *p2); + storeRam8(p1, 0); - v0 = a0; pc0 = ra; + mips_return_c(a0, 22); } void psxBios_strncat() { // 0x16 @@ -759,6 +773,7 @@ void psxBios_strncmp() { // 0x18 void psxBios_strcpy() { // 0x19 char *p1 = (char *)Ra0, *p2 = (char *)Ra1; + PSXBIOS_LOG("psxBios_%s %x, %s (%x)\n", biosA0n[0x19], a0, p2, a1); if (a0 == 0 || a1 == 0) { v0 = 0; @@ -900,6 +915,7 @@ void psxBios_strtok() { // 0x23 void psxBios_strstr() { // 0x24 char *p = (char *)Ra0, *p1, *p2; + PSXBIOS_LOG("psxBios_%s %s (%x), %s (%x)\n", biosA0n[0x24], p, a0, Ra1, a1); while (*p != '\0') { p1 = p; @@ -912,10 +928,12 @@ void psxBios_strstr() { // 0x24 if (*p2 == '\0') { v0 = a0 + (p - (char *)Ra0); pc0 = ra; + PSXBIOS_LOG(" -> %x\n", v0); return; } - p++; + // bug: skips the whole matched substring + 1 + p = p1 + 1; } v0 = 0; pc0 = ra; @@ -4067,6 +4085,7 @@ static void hleA0() { else if (biosA0[call]) biosA0[call](); + //printf("A(%02x) -> %x\n", call, v0); psxBranchTest(); } @@ -4089,6 +4108,7 @@ static void hleB0() { else if (biosB0[call]) biosB0[call](); + //printf("B(%02x) -> %x\n", call, v0); psxBranchTest(); } @@ -4106,6 +4126,7 @@ static void hleC0() { else if (biosC0[call]) biosC0[call](); + //printf("C(%02x) -> %x\n", call, v0); psxBranchTest(); } From 42eb665ec2aa1b0733644aaa4640cf4ddc7350e6 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 21 Sep 2023 20:34:02 +0300 Subject: [PATCH 374/597] log some info about bios and config --- libpcsxcore/new_dynarec/new_dynarec.c | 18 +++++++++++++++--- libpcsxcore/psxbios.c | 10 +++++++++- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index c4be88ff0..d37615e9d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -6191,6 +6191,12 @@ static noinline void new_dynarec_test(void) out = ndrc->translation_cache; } +static int get_cycle_multiplier(void) +{ + return Config.cycle_multiplier_override && Config.cycle_multiplier == CYCLE_MULT_DEFAULT + ? Config.cycle_multiplier_override : Config.cycle_multiplier; +} + // clear the state completely, instead of just marking // things invalid like invalidate_all_pages() does void new_dynarec_clear_full(void) @@ -6218,6 +6224,12 @@ void new_dynarec_clear_full(void) stat_clear(stat_blocks); stat_clear(stat_links); + if (cycle_multiplier_old != Config.cycle_multiplier + || new_dynarec_hacks_old != new_dynarec_hacks) + { + SysPrintf("ndrc config: mul=%d, ha=%x, pex=%d\n", + get_cycle_multiplier(), new_dynarec_hacks, Config.PreciseExceptions); + } cycle_multiplier_old = Config.cycle_multiplier; new_dynarec_hacks_old = new_dynarec_hacks; } @@ -8958,13 +8970,13 @@ static int new_recompile_block(u_int addr) return 0; } - cycle_multiplier_active = Config.cycle_multiplier_override && Config.cycle_multiplier == CYCLE_MULT_DEFAULT - ? Config.cycle_multiplier_override : Config.cycle_multiplier; + cycle_multiplier_active = get_cycle_multiplier(); source = get_source_start(start, &pagelimit); if (source == NULL) { if (addr != hack_addr) { - SysPrintf("Compile at bogus memory address: %08x\n", addr); + SysPrintf("Compile at bogus memory address: %08x, ra=%x\n", + addr, psxRegs.GPR.n.ra); hack_addr = addr; } //abort(); diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 0cb1b8702..b370966a1 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -3341,7 +3341,15 @@ void psxBiosInit() { biosA0[0x3e] = biosB0[0x3f] = psxBios_puts_psxout; biosA0[0x3f] = psxBios_printf_psxout; - if (!Config.HLE) return; + if (!Config.HLE) { + char verstr[0x24+1]; + rom32 = (u32 *)psxR; + memcpy(verstr, psxR + 0x12c, 0x24); + verstr[0x24] = 0; + SysPrintf("BIOS: %08x, '%s', '%c'\n", SWAP32(rom32[0x100/4]), + verstr, psxR[0x7ff52]); + return; + } for(i = 0; i < 256; i++) { if (biosA0[i] == NULL) biosA0[i] = psxBios_dummy; From 25427adfbaed8ac93f01b56c4fac9811dd029b51 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 23 Sep 2023 23:57:36 +0300 Subject: [PATCH 375/597] drc: adjust ld_use_hazard --- libpcsxcore/new_dynarec/new_dynarec.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index d37615e9d..07cd61913 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -6537,10 +6537,14 @@ static int apply_hacks(void) return 0; } -static int is_ld_use_hazard(int ld_rt, const struct decoded_insn *op) +static int is_ld_use_hazard(const struct decoded_insn *op_ld, + const struct decoded_insn *op) { - return ld_rt != 0 && (ld_rt == op->rs1 || ld_rt == op->rs2) - && op->itype != LOADLR && op->itype != CJUMP && op->itype != SJUMP; + if (op_ld->rt1 == 0 || (op_ld->rt1 != op->rs1 && op_ld->rt1 != op->rs2)) + return 0; + if (op_ld->itype == LOADLR && op->itype == LOADLR) + return op_ld->rt1 == op_ld->rs1; + return op->itype != CJUMP && op->itype != SJUMP; } static void force_intcall(int i) @@ -6932,7 +6936,7 @@ static noinline void pass1_disassemble(u_int pagelimit) else dop = &dops[t]; } - if ((dop && is_ld_use_hazard(dops[i].rt1, dop)) + if ((dop && is_ld_use_hazard(&dops[i], dop)) || (!dop && Config.PreciseExceptions)) { // jump target wants DS result - potential load delay effect SysPrintf("load delay in DS @%08x (%08x)\n", start + i*4, start); @@ -6949,7 +6953,7 @@ static noinline void pass1_disassemble(u_int pagelimit) } } else if (i > 0 && dops[i-1].is_delay_load - && is_ld_use_hazard(dops[i-1].rt1, &dops[i]) + && is_ld_use_hazard(&dops[i-1], &dops[i]) && (i < 2 || !dops[i-2].is_ujump)) { SysPrintf("load delay @%08x (%08x)\n", start + i*4, start); for (j = i - 1; j > 0 && dops[j-1].is_delay_load; j--) From de74f59932e94887debf30e5ec437d7f63591f74 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 23 Sep 2023 22:00:13 +0300 Subject: [PATCH 376/597] psxbios: experimental vsync HLE unsure how useful this is --- frontend/main.c | 1 + libpcsxcore/cdrom.c | 1 + libpcsxcore/cheat.c | 1 + libpcsxcore/misc.c | 43 +++++++++----- libpcsxcore/new_dynarec/emu_if.c | 1 + libpcsxcore/new_dynarec/events.c | 3 +- libpcsxcore/new_dynarec/events.h | 4 +- libpcsxcore/new_dynarec/linkage_offsets.h | 2 +- libpcsxcore/new_dynarec/new_dynarec.c | 31 +++++++--- libpcsxcore/ppf.c | 1 + libpcsxcore/psxbios.c | 71 +++++++++++++++++++++++ libpcsxcore/psxbios.h | 2 + libpcsxcore/r3000a.c | 3 + libpcsxcore/r3000a.h | 5 +- libpcsxcore/sio.c | 12 +--- libpcsxcore/sio.h | 2 - 16 files changed, 141 insertions(+), 42 deletions(-) diff --git a/frontend/main.c b/frontend/main.c index 092a844a1..e23499038 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -23,6 +23,7 @@ #include "plat.h" #include "../libpcsxcore/misc.h" #include "../libpcsxcore/cheat.h" +#include "../libpcsxcore/sio.h" #include "../libpcsxcore/new_dynarec/new_dynarec.h" #include "../plugins/cdrcimg/cdrcimg.h" #include "../plugins/dfsound/spu_config.h" diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 0ae2c50ba..e232d05a4 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -23,6 +23,7 @@ #include #include "cdrom.h" +#include "misc.h" #include "ppf.h" #include "psxdma.h" #include "arm_features.h" diff --git a/libpcsxcore/cheat.c b/libpcsxcore/cheat.c index 7e9dc240c..e0cf411e0 100644 --- a/libpcsxcore/cheat.c +++ b/libpcsxcore/cheat.c @@ -19,6 +19,7 @@ #include "psxcommon.h" #include "r3000a.h" #include "psxmem.h" +#include "misc.h" #include "cheat.h" diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index feabe15b5..50caad422 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -28,6 +28,7 @@ #include "mdec.h" #include "gpu.h" #include "ppf.h" +#include "psxbios.h" #include "database.h" #include @@ -178,7 +179,10 @@ static void getFromCnf(char *buf, const char *key, u32 *val) } int LoadCdrom() { - EXE_HEADER tmpHead; + union { + EXE_HEADER h; + u32 d[sizeof(EXE_HEADER) / sizeof(u32)]; + } tmpHead; struct iso_directory_record *dir; u8 time[4], *buf; u8 mdir[4096]; @@ -186,8 +190,10 @@ int LoadCdrom() { u32 cnf_tcb = 4; u32 cnf_event = 16; u32 cnf_stack = 0; + u32 t_addr; + u32 t_size; u32 sp = 0; - int ret; + int i, ret; if (!Config.HLE) { if (psxRegs.pc != 0x80030000) // BiosBootBypass'ed or custom BIOS? @@ -250,32 +256,34 @@ int LoadCdrom() { } memcpy(&tmpHead, buf + 12, sizeof(EXE_HEADER)); + for (i = 2; i < sizeof(tmpHead.d) / sizeof(tmpHead.d[0]); i++) + tmpHead.d[i] = SWAP32(tmpHead.d[i]); - SysPrintf("manual booting '%s' pc=%x\n", exename, SWAP32(tmpHead.pc0)); - sp = SWAP32(tmpHead.s_addr); + SysPrintf("manual booting '%s' pc=%x\n", exename, tmpHead.h.pc0); + sp = tmpHead.h.s_addr; if (cnf_stack) sp = cnf_stack; - SetBootRegs(SWAP32(tmpHead.pc0), SWAP32(tmpHead.gp0), sp); - - tmpHead.t_size = SWAP32(tmpHead.t_size); - tmpHead.t_addr = SWAP32(tmpHead.t_addr); - - psxCpu->Clear(tmpHead.t_addr, tmpHead.t_size / 4); - //psxCpu->Reset(); + SetBootRegs(tmpHead.h.pc0, tmpHead.h.gp0, sp); // Read the rest of the main executable - while (tmpHead.t_size & ~2047) { - void *ptr = (void *)PSXM(tmpHead.t_addr); + for (t_addr = tmpHead.h.t_addr, t_size = tmpHead.h.t_size; t_size & ~2047; ) { + void *ptr = (void *)PSXM(t_addr); incTime(); READTRACK(); if (ptr != INVALID_PTR) memcpy(ptr, buf+12, 2048); - tmpHead.t_size -= 2048; - tmpHead.t_addr += 2048; + t_addr += 2048; + t_size -= 2048; } + psxCpu->Clear(tmpHead.h.t_addr, tmpHead.h.t_size / 4); + //psxCpu->Reset(); + + if (Config.HLE) + psxBiosCheckExe(tmpHead.h.t_addr, tmpHead.h.t_size); + return 0; } @@ -690,6 +698,7 @@ int SaveState(const char *file) { } int LoadState(const char *file) { + u32 biosBranchCheckOld = psxRegs.biosBranchCheck; void *f; GPUFreeze_t *gpufP = NULL; SPUFreeze_t *spufP = NULL; @@ -721,6 +730,7 @@ int LoadState(const char *file) { SaveFuncs.read(f, psxH, 0x00010000); SaveFuncs.read(f, &psxRegs, offsetof(psxRegisters, gteBusyCycle)); psxRegs.gteBusyCycle = psxRegs.cycle; + psxRegs.biosBranchCheck = ~0; psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); @@ -751,6 +761,9 @@ int LoadState(const char *file) { mdecFreeze(f, 0); new_dyna_freeze(f, 0); + if (Config.HLE) + psxBiosCheckExe(biosBranchCheckOld, 0x60); + result = 0; cleanup: SaveFuncs.close(f); diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index e21003c02..9d8df341e 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -12,6 +12,7 @@ #include "events.h" #include "../psxhle.h" #include "../psxinterpreter.h" +#include "../psxcounters.h" #include "../r3000a.h" #include "../gte_arm.h" #include "../gte_neon.h" diff --git a/libpcsxcore/new_dynarec/events.c b/libpcsxcore/new_dynarec/events.c index b1d427c94..2bc93326f 100644 --- a/libpcsxcore/new_dynarec/events.c +++ b/libpcsxcore/new_dynarec/events.c @@ -12,7 +12,7 @@ extern int pending_exception; u32 event_cycles[PSXINT_COUNT]; -void schedule_timeslice(void) +u32 schedule_timeslice(void) { u32 i, c = psxRegs.cycle; u32 irqs = psxRegs.interrupt; @@ -28,6 +28,7 @@ void schedule_timeslice(void) min = dif; } next_interupt = c + min; + return next_interupt; } typedef void (irq_func)(); diff --git a/libpcsxcore/new_dynarec/events.h b/libpcsxcore/new_dynarec/events.h index 919855cc1..eeec289d5 100644 --- a/libpcsxcore/new_dynarec/events.h +++ b/libpcsxcore/new_dynarec/events.h @@ -1,3 +1,5 @@ +#include "../psxcommon.h" + union psxCP0Regs_; -void schedule_timeslice(void); +u32 schedule_timeslice(void); void gen_interupt(union psxCP0Regs_ *cp0); diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h index 23935b875..541325acd 100644 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ b/libpcsxcore/new_dynarec/linkage_offsets.h @@ -27,7 +27,7 @@ #define LO_psxRegs_subCycle (LO_muldivBusyCycle + 4) #define LO_psxRegs_biuReg (LO_psxRegs_subCycle + 4*2) #define LO_psxRegs_reserved (LO_psxRegs_biuReg + 4) -#define LO_psxRegs_end (LO_psxRegs_reserved + 4*3) +#define LO_psxRegs_end (LO_psxRegs_reserved + 4*7) #define LO_rcnts (LO_psxRegs_end) #define LO_rcnts_end (LO_rcnts + 7*4*4) #define LO_inv_code_start (LO_rcnts_end) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 07cd61913..090165e66 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -37,6 +37,7 @@ static Jit g_jit; #include "new_dynarec_config.h" #include "../psxhle.h" #include "../psxinterpreter.h" +#include "../psxcounters.h" #include "../gte.h" #include "emu_if.h" // emulator interface #include "linkage_offsets.h" @@ -6500,6 +6501,15 @@ void new_dynarec_print_stats(void) #endif } +static void force_intcall(int i) +{ + memset(&dops[i], 0, sizeof(dops[i])); + dops[i].itype = INTCALL; + dops[i].rs1 = CCREG; + dops[i].is_exception = 1; + cinfo[i].ba = -1; +} + static int apply_hacks(void) { int i; @@ -6534,6 +6544,18 @@ static int apply_hacks(void) return 1; } } + if (Config.HLE) + { + if (start <= psxRegs.biosBranchCheck && psxRegs.biosBranchCheck < start + i*4) + { + i = (psxRegs.biosBranchCheck - start) / 4u + 23; + if (dops[i].is_jump && !dops[i+1].bt) + { + force_intcall(i); + dops[i+1].is_ds = 0; + } + } + } return 0; } @@ -6547,15 +6569,6 @@ static int is_ld_use_hazard(const struct decoded_insn *op_ld, return op->itype != CJUMP && op->itype != SJUMP; } -static void force_intcall(int i) -{ - memset(&dops[i], 0, sizeof(dops[i])); - dops[i].itype = INTCALL; - dops[i].rs1 = CCREG; - dops[i].is_exception = 1; - cinfo[i].ba = -1; -} - static void disassemble_one(int i, u_int src) { unsigned int type, op, op2, op3; diff --git a/libpcsxcore/ppf.c b/libpcsxcore/ppf.c index 2ce1a9d9a..a7f6aefd4 100644 --- a/libpcsxcore/ppf.c +++ b/libpcsxcore/ppf.c @@ -21,6 +21,7 @@ #include "psxcommon.h" #include "ppf.h" +#include "misc.h" #include "cdrom.h" typedef struct tagPPF_DATA { diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index b370966a1..bad34578e 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -36,6 +36,7 @@ #include "sio.h" #include "psxhle.h" #include "psxinterpreter.h" +#include "new_dynarec/events.h" #include #ifndef PSXBIOS_LOG @@ -3330,6 +3331,8 @@ void psxBiosInit() { int i; uLongf len; + psxRegs.biosBranchCheck = ~0; + memset(psxM, 0, 0x10000); for(i = 0; i < 256; i++) { biosA0[i] = NULL; @@ -4172,6 +4175,74 @@ void (* const psxHLEt[24])() = { hleExcPadCard1, hleExcPadCard2, }; +void psxBiosCheckExe(u32 t_addr, u32 t_size) +{ + // lw $v0, 0x10($sp) + // nop + // addiu $v0, -1 + // sw $v0, 0x10($sp) + // lw $v0, 0x10($sp) + // nop + // bne $v0, $v1, not_timeout + // nop + // lui $a0, ... + static const u8 pattern[] = { + 0x10, 0x00, 0xA2, 0x8F, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0x42, 0x24, 0x10, 0x00, 0xA2, 0xAF, + 0x10, 0x00, 0xA2, 0x8F, 0x00, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x43, 0x14, 0x00, 0x00, 0x00, 0x00, + }; + u32 start = t_addr & 0x1ffffc; + u32 end = (start + t_size) & 0x1ffffc; + u32 buf[sizeof(pattern) / sizeof(u32)]; + const u32 *r32 = (u32 *)(psxM + start); + u32 i, j; + + if (end <= start) + return; + if (!Config.HLE) + return; + + memcpy(buf, pattern, sizeof(buf)); + for (i = 0; i < t_size / 4; i += j + 1) { + for (j = 0; j < sizeof(buf) / sizeof(buf[0]); j++) + if (r32[i + j] != buf[j]) + break; + if (j != sizeof(buf) / sizeof(buf[0])) + continue; + + if ((SWAP32(r32[i + j]) >> 16) != 0x3c04) // lui + continue; + SysPrintf("HLE vsync @%08x\n", start + i * 4); + psxRegs.biosBranchCheck = (t_addr & 0xa01ffffc) + i * 4; + } +} + +void psxBiosCheckBranch(void) +{ +#if 1 + // vsync HLE hack + static u32 cycles_prev, v0_prev; + u32 cycles_passed, waste_cycles; + u32 loops, v0_expect = v0_prev - 1; + if (v0 != 1) + return; + execI(&psxRegs); + cycles_passed = psxRegs.cycle - cycles_prev; + cycles_prev = psxRegs.cycle; + v0_prev = v0; + if (cycles_passed < 10 || cycles_passed > 50 || v0 != v0_expect) + return; + + waste_cycles = schedule_timeslice() - psxRegs.cycle; + loops = waste_cycles / cycles_passed; + if (loops > v0) + loops = v0; + v0 -= loops; + psxRegs.cycle += loops * cycles_passed; + //printf("c %4u %d\n", loops, cycles_passed); +#endif +} #define bfreeze(ptr, size) { \ if (Mode == 1) memcpy(&psxR[base], ptr, size); \ diff --git a/libpcsxcore/psxbios.h b/libpcsxcore/psxbios.h index 4ebbd2b69..c1368e67d 100644 --- a/libpcsxcore/psxbios.h +++ b/libpcsxcore/psxbios.h @@ -40,6 +40,8 @@ void psxBiosException(); void psxBiosFreeze(int Mode); void psxBiosCnfLoaded(u32 tcb_cnt, u32 evcb_cnt, u32 sp); void psxBiosSetupBootState(void); +void psxBiosCheckExe(u32 t_addr, u32 t_size); +void psxBiosCheckBranch(void); extern void (*biosA0[256])(); extern void (**biosB0)(); diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 69772d44c..8035dfd13 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -26,6 +26,7 @@ #include "mdec.h" #include "gte.h" #include "psxinterpreter.h" +#include "psxbios.h" #include "../include/compiler_features.h" R3000Acpu *psxCpu = NULL; @@ -210,6 +211,8 @@ void psxBranchTest() { psxRegs.CP0.n.Cause |= 0x400; if (((psxRegs.CP0.n.Cause | 1) & psxRegs.CP0.n.SR & 0x401) == 0x401) psxException(0, 0, &psxRegs.CP0); + else if (unlikely(psxRegs.pc == psxRegs.biosBranchCheck)) + psxBiosCheckBranch(); } void psxJumpTest() { diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 3a903b1a5..a8f39abb3 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -25,9 +25,6 @@ extern "C" { #endif #include "psxcommon.h" -#include "psxmem.h" -#include "psxcounters.h" -#include "psxbios.h" enum R3000Aexception { R3000E_Int = 0, // Interrupt @@ -224,6 +221,8 @@ typedef struct { u8 dloadSel; /* interp. delay load state */ u8 dloadReg[2]; u32 dloadVal[2]; + u32 biosBranchCheck; + u32 reserved[3]; // warning: changing anything in psxRegisters requires update of all // asm in libpcsxcore/new_dynarec/ } psxRegisters; diff --git a/libpcsxcore/sio.c b/libpcsxcore/sio.c index ab6baa12a..5d5019d24 100644 --- a/libpcsxcore/sio.c +++ b/libpcsxcore/sio.c @@ -21,6 +21,8 @@ * SIO functions. */ +#include "misc.h" +#include "psxcounters.h" #include "sio.h" #include @@ -325,16 +327,6 @@ unsigned short sioReadBaud16() { return BaudReg; } -void netError() { - ClosePlugins(); - SysMessage(_("Connection closed!\n")); - - CdromId[0] = '\0'; - CdromLabel[0] = '\0'; - - SysRunGui(); -} - void sioInterrupt() { #ifdef PAD_LOG PAD_LOG("Sio Interrupt (CP0.Status = %x)\n", psxRegs.CP0.n.Status); diff --git a/libpcsxcore/sio.h b/libpcsxcore/sio.h index a554c2bbd..2dccdaea5 100644 --- a/libpcsxcore/sio.h +++ b/libpcsxcore/sio.h @@ -48,8 +48,6 @@ unsigned short sioReadMode16(); unsigned short sioReadCtrl16(); unsigned short sioReadBaud16(); -void netError(); - void sioInterrupt(); int sioFreeze(void *f, int Mode); From 0d87d06249b1c05a52288172d999648d13fab69b Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 23 Sep 2023 22:39:52 +0300 Subject: [PATCH 377/597] log some build info helpful for bug reports --- Makefile | 2 +- frontend/main.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 63ffcb628..c020274a2 100644 --- a/Makefile +++ b/Makefile @@ -266,7 +266,7 @@ endif # misc OBJS += frontend/main.o frontend/plugin.o - +frontend/main.o: CFLAGS += -DBUILTIN_GPU=$(BUILTIN_GPU) frontend/menu.o frontend/main.o: frontend/revision.h frontend/plat_sdl.o frontend/libretro.o: frontend/revision.h diff --git a/frontend/main.c b/frontend/main.c index e23499038..34f68d40d 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -436,6 +436,49 @@ static void log_wrong_cpu(void) #endif // DO_CPU_CHECKS } +#define MKSTR2(x) #x +#define MKSTR(x) MKSTR2(x) +static const char *get_build_info(void) +{ + return " (" +#ifdef __VERSION__ + "cc " __VERSION__ " " +#endif +#if defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__ == 8 + "64bit " +#elif defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__ == 4 + "32bit " +#endif +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + "be " +#endif +#if defined(__PIC__) || defined(__pic__) + "pic " +#endif +#if defined(__aarch64__) + "arm64" +#elif defined(__arm__) + "arm" +#endif +#ifdef __ARM_ARCH + "v" MKSTR(__ARM_ARCH) " " +#endif +#if defined(__AVX__) + "avx " +#elif defined(__SSSE3__) + "ssse3 " +#elif defined(__ARM_NEON) || defined(__ARM_NEON__) + "neon " +#endif +#if defined(LIGHTREC) + "lightrec " +#elif !defined(DRC_DISABLE) + "ari64 " +#endif + "gpu=" MKSTR(BUILTIN_GPU) + ")"; +} + int emu_core_preinit(void) { // what is the name of the config file? @@ -465,7 +508,7 @@ int emu_core_preinit(void) int emu_core_init(void) { - SysPrintf("Starting PCSX-ReARMed " REV "\n"); + SysPrintf("Starting PCSX-ReARMed " REV "%s\n", get_build_info()); #ifndef NO_FRONTEND check_profile(); From 3faf5c235830d46b111bfc6459f50071b43f6a0d Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 24 Sep 2023 23:02:12 +0300 Subject: [PATCH 378/597] input changes part4 libretro/pcsx_rearmed#765 --- include/psemu_plugin_defs.h | 27 +++++++++++--------- libpcsxcore/misc.c | 2 ++ libpcsxcore/plugins.c | 49 ++++++++++++++++++++++++++++--------- libpcsxcore/plugins.h | 2 ++ 4 files changed, 58 insertions(+), 22 deletions(-) diff --git a/include/psemu_plugin_defs.h b/include/psemu_plugin_defs.h index d005d080c..c7a0d1c99 100644 --- a/include/psemu_plugin_defs.h +++ b/include/psemu_plugin_defs.h @@ -200,6 +200,9 @@ typedef struct // controller type - fill it withe predefined values above unsigned char controllerType; + unsigned char padding; + unsigned short saveSize; + int portMultitap; int requestPadIndex; @@ -214,22 +217,24 @@ typedef struct // values are in range -128 - 127 unsigned char moveX, moveY; + // Lightgun values + int absoluteX, absoluteY; + unsigned char Vib[2]; unsigned char VibF[2]; - //configuration mode Request 0x43 - int configMode; - - unsigned char txData[34]; - + struct { + unsigned char configMode; + unsigned char padMode; // 0 : digital 1: analog + unsigned char cmd4dConfig[6]; + unsigned int lastUseFrame; + unsigned int digitalModeFrames; + } ds; unsigned char multitapLongModeEnabled; - unsigned char PadMode; // 0 : digital 1: analog - unsigned char cmd4dConfig[6]; - unsigned char reserved[46]; - - //Lightgun values - int absoluteX,absoluteY; + unsigned char padding2; + unsigned char txData[34]; + unsigned char reserved[26]; } PadDataS; /* NET PlugIn v2 */ diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 50caad422..da1dbea83 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -690,6 +690,7 @@ int SaveState(const char *file) { psxRcntFreeze(f, 1); mdecFreeze(f, 1); new_dyna_freeze(f, 1); + padFreeze(f, 1); result = 0; cleanup: @@ -760,6 +761,7 @@ int LoadState(const char *file) { psxRcntFreeze(f, 0); mdecFreeze(f, 0); new_dyna_freeze(f, 0); + padFreeze(f, 0); if (Config.HLE) psxBiosCheckExe(biosBranchCheckOld, 0x60); diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index fb0ea614e..6e5cdbfec 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -23,6 +23,7 @@ #include "plugins.h" #include "cdriso.h" +#include "psxcounters.h" static char IsoFile[MAXPATHLEN] = ""; static s64 cdOpenCaseTime = 0; @@ -480,7 +481,7 @@ enum { static void initBufForRequest(int padIndex, char value) { - if (pads[padIndex].configMode) { + if (pads[padIndex].ds.configMode) { buf[0] = 0xf3; buf[1] = 0x5a; respSize = 8; } @@ -489,12 +490,24 @@ static void initBufForRequest(int padIndex, char value) { return; } + // switch to analog mode automatically after the game finishes init + if (value == 0x42 && pads[padIndex].ds.padMode == 0) + pads[padIndex].ds.digitalModeFrames++; + if (pads[padIndex].ds.digitalModeFrames == 60*4) { + pads[padIndex].ds.padMode = 1; + pads[padIndex].ds.digitalModeFrames = 0; + } + + if ((u32)(frame_counter - pads[padIndex].ds.lastUseFrame) > 60u) + pads[padIndex].ds.padMode = 0; // according to nocash + pads[padIndex].ds.lastUseFrame = frame_counter; + switch (value) { // keystate already in buffer, set by PADstartPoll_() //case CMD_READ_DATA_AND_VIBRATE : // break; case CMD_CONFIG_MODE : - if (pads[padIndex].configMode) { + if (pads[padIndex].ds.configMode) { memcpy(buf, resp43, 8); break; } @@ -505,7 +518,7 @@ static void initBufForRequest(int padIndex, char value) { break; case CMD_QUERY_MODEL_AND_MODE : memcpy(buf, resp45, 8); - buf[4] = pads[padIndex].PadMode; + buf[4] = pads[padIndex].ds.padMode; break; case CMD_QUERY_ACT : memcpy(buf, resp46_00, 8); @@ -517,7 +530,7 @@ static void initBufForRequest(int padIndex, char value) { memcpy(buf, resp4C_00, 8); break; case CMD_VIBRATION_TOGGLE: // 4d - memcpy(buf + 2, pads[padIndex].cmd4dConfig, 6); + memcpy(buf + 2, pads[padIndex].ds.cmd4dConfig, 6); break; case REQ40 : memcpy(buf, resp40, 8); @@ -548,9 +561,9 @@ static void reqIndex2Treatment(int padIndex, u8 value) { case CMD_CONFIG_MODE : //0x43 if (value == 0) { - pads[padIndex].configMode = 0; + pads[padIndex].ds.configMode = 0; } else { - pads[padIndex].configMode = 1; + pads[padIndex].ds.configMode = 1; } break; case CMD_SET_MODE_AND_LOCK : @@ -558,7 +571,7 @@ static void reqIndex2Treatment(int padIndex, u8 value) { //0x01 analog ON //0x00 analog OFF if ((value & ~1) == 0) - pads[padIndex].PadMode = value; + pads[padIndex].ds.padMode = value; break; case CMD_QUERY_ACT : //0x46 @@ -585,7 +598,7 @@ static void vibrate(int padIndex) { pad->VibF[0] = pad->Vib[0]; pad->VibF[1] = pad->Vib[1]; plat_trigger_vibrate(padIndex, pad->VibF[0], pad->VibF[1]); - //printf("vibration pad %i\n", padIndex); + //printf("vib%i %02x %02x\n", padIndex, pad->VibF[0], pad->VibF[1]); } } @@ -671,7 +684,7 @@ static void PADstartPoll_(PadDataS *pad) { respSize = 4; break; case PSE_PAD_TYPE_ANALOGPAD: // scph1150 - if (pad->PadMode == 0) + if (pad->ds.padMode == 0) goto standard; stdpar[0] = 0x73; stdpar[1] = 0x5a; @@ -730,7 +743,7 @@ static void PADpoll_dualshock(int port, unsigned char value, int pos) break; case 7: if (pads[port].txData[0] == CMD_VIBRATION_TOGGLE) - memcpy(pads[port].cmd4dConfig, pads[port].txData + 2, 6); + memcpy(pads[port].ds.cmd4dConfig, pads[port].txData + 2, 6); break; } } @@ -865,7 +878,7 @@ static int LoadPAD1plugin(const char *PAD1dll) { memset(pads, 0, sizeof(pads)); for (p = 0; p < sizeof(pads) / sizeof(pads[0]); p++) { - memset(pads[p].cmd4dConfig, 0xff, sizeof(pads[p].cmd4dConfig)); + memset(pads[p].ds.cmd4dConfig, 0xff, sizeof(pads[p].ds.cmd4dConfig)); } return 0; @@ -940,6 +953,20 @@ static int LoadPAD2plugin(const char *PAD2dll) { return 0; } +int padFreeze(void *f, int Mode) { + size_t i; + + for (i = 0; i < sizeof(pads) / sizeof(pads[0]); i++) { + pads[i].saveSize = sizeof(pads[i]); + gzfreeze(&pads[i], sizeof(pads[i])); + if (Mode == 0 && pads[i].saveSize != sizeof(pads[i])) + SaveFuncs.seek(f, pads[i].saveSize - sizeof(pads[i]), SEEK_CUR); + } + + return 0; +} + + void *hNETDriver = NULL; void CALLBACK NET__setInfo(netInfo *info) {} diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index c563470bf..e43ff9add 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -381,6 +381,8 @@ const char *GetIsoFile(void); boolean UsingIso(void); void SetCdOpenCaseTime(s64 time); +int padFreeze(void *f, int Mode); + extern void pl_gun_byte2(int port, unsigned char byte); extern void plat_trigger_vibrate(int pad, int low, int high); extern void plat_get_psx_resolution(int *xres, int *yres); From ba8d6a2dbb802b215c1657c5b456cb8c65a18520 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 27 Sep 2023 01:09:51 +0300 Subject: [PATCH 379/597] plugin_lib: fix a silly crash --- frontend/plugin_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 8a6b6adb1..917ae1796 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -137,7 +137,7 @@ static __attribute__((noinline)) void draw_active_chans(int vout_w, int vout_h) unsigned short *d, p; int c, x, y; - if (dest == NULL || pl_vout_bpp != 16) + if (pl_vout_buf == NULL || pl_vout_bpp != 16) return; spu_get_debug_info(&live_chans, &run_chans, &fmod_chans, &noise_chans); From 0a42e81c36ab5ad2008ece5d9c288291e2f8c6fc Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 27 Sep 2023 01:20:27 +0300 Subject: [PATCH 380/597] spu: avoid relying on signed overflow undefined behavior didn't seem to cause any issue in practice, but who knows with all the different compilers --- plugins/dfsound/adsr.c | 45 ++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/plugins/dfsound/adsr.c b/plugins/dfsound/adsr.c index 9e328620a..cb366dc4e 100644 --- a/plugins/dfsound/adsr.c +++ b/plugins/dfsound/adsr.c @@ -65,7 +65,7 @@ INLINE void StartADSR(int ch) // MIX ADSR static int MixADSR(ADSRInfoEx *adsr, int ns_to) { - int EnvelopeVol = adsr->EnvelopeVol; + unsigned int EnvelopeVol = adsr->EnvelopeVol; int ns = 0, val, rto, level; if (adsr->State == ADSR_RELEASE) @@ -77,10 +77,10 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) for (; ns < ns_to; ns++) { EnvelopeVol += ((long long)val * EnvelopeVol) >> (15+16); - if (EnvelopeVol <= 0) + if ((signed int)EnvelopeVol <= 0) break; - ChanBuf[ns] *= EnvelopeVol >> 21; + ChanBuf[ns] *= (signed int)EnvelopeVol >> 21; ChanBuf[ns] >>= 10; } } @@ -89,10 +89,10 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) for (; ns < ns_to; ns++) { EnvelopeVol += val; - if (EnvelopeVol <= 0) + if ((signed int)EnvelopeVol <= 0) break; - ChanBuf[ns] *= EnvelopeVol >> 21; + ChanBuf[ns] *= (signed int)EnvelopeVol >> 21; ChanBuf[ns] >>= 10; } } @@ -111,14 +111,14 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) for (; ns < ns_to; ns++) { EnvelopeVol += val; - if (EnvelopeVol < 0) + if ((signed int)EnvelopeVol < 0) // overflow break; - ChanBuf[ns] *= EnvelopeVol >> 21; + ChanBuf[ns] *= (signed int)EnvelopeVol >> 21; ChanBuf[ns] >>= 10; } - if (EnvelopeVol < 0) // overflow + if ((signed int)EnvelopeVol < 0) // overflow { EnvelopeVol = 0x7fffffff; adsr->State = ADSR_DECAY; @@ -136,7 +136,7 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) for (; ns < ns_to; ) { EnvelopeVol += ((long long)val * EnvelopeVol) >> (15+16); - if (EnvelopeVol < 0) + if ((signed int)EnvelopeVol < 0) EnvelopeVol = 0; ChanBuf[ns] *= EnvelopeVol >> 21; @@ -170,14 +170,14 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) for (; ns < ns_to; ns++) { EnvelopeVol += val; - if ((unsigned int)EnvelopeVol >= 0x7fe00000) + if (EnvelopeVol >= 0x7fe00000) { EnvelopeVol = 0x7fffffff; ns = ns_to; break; } - ChanBuf[ns] *= EnvelopeVol >> 21; + ChanBuf[ns] *= (signed int)EnvelopeVol >> 21; ChanBuf[ns] >>= 10; } } @@ -189,10 +189,10 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) for (; ns < ns_to; ns++) { EnvelopeVol += ((long long)val * EnvelopeVol) >> (15+16); - if (EnvelopeVol < 0) + if ((signed int)EnvelopeVol < 0) break; - ChanBuf[ns] *= EnvelopeVol >> 21; + ChanBuf[ns] *= (signed int)EnvelopeVol >> 21; ChanBuf[ns] >>= 10; } } @@ -201,10 +201,10 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) for (; ns < ns_to; ns++) { EnvelopeVol += val; - if (EnvelopeVol < 0) + if ((signed int)EnvelopeVol < 0) break; - ChanBuf[ns] *= EnvelopeVol >> 21; + ChanBuf[ns] *= (signed int)EnvelopeVol >> 21; ChanBuf[ns] >>= 10; } } @@ -219,7 +219,7 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) static int SkipADSR(ADSRInfoEx *adsr, int ns_to) { - int EnvelopeVol = adsr->EnvelopeVol; + unsigned int EnvelopeVol = adsr->EnvelopeVol; int ns = 0, val, rto, level; int64_t v64; @@ -231,7 +231,7 @@ static int SkipADSR(ADSRInfoEx *adsr, int ns_to) for (; ns < ns_to; ns++) { EnvelopeVol += ((long long)val * EnvelopeVol) >> (15+16); - if (EnvelopeVol <= 0) + if ((signed int)EnvelopeVol <= 0) break; } } @@ -257,10 +257,10 @@ static int SkipADSR(ADSRInfoEx *adsr, int ns_to) for (; ns < ns_to; ns++) { EnvelopeVol += val; - if (EnvelopeVol < 0) + if ((signed int)EnvelopeVol < 0) break; } - if (EnvelopeVol < 0) // overflow + if ((signed int)EnvelopeVol < 0) // overflow { EnvelopeVol = 0x7fffffff; adsr->State = ADSR_DECAY; @@ -278,7 +278,7 @@ static int SkipADSR(ADSRInfoEx *adsr, int ns_to) for (; ns < ns_to; ) { EnvelopeVol += ((long long)val * EnvelopeVol) >> (15+16); - if (EnvelopeVol < 0) + if ((signed int)EnvelopeVol < 0) EnvelopeVol = 0; ns++; @@ -320,7 +320,7 @@ static int SkipADSR(ADSRInfoEx *adsr, int ns_to) for (; ns < ns_to; ns++) { EnvelopeVol += ((long long)val * EnvelopeVol) >> (15+16); - if (EnvelopeVol < 0) + if ((signed int)EnvelopeVol < 0) break; } } @@ -330,10 +330,7 @@ static int SkipADSR(ADSRInfoEx *adsr, int ns_to) v64 += (int64_t)val * (ns_to - ns); EnvelopeVol = (int)v64; if (v64 > 0) - { ns = ns_to; - break; - } } } break; From eb38e4a2fbcdece0afdea63f6f0ad3b2a84cb8a5 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 27 Sep 2023 02:24:32 +0300 Subject: [PATCH 381/597] rm leftover debug code --- libpcsxcore/new_dynarec/linkage_arm64.S | 2 +- libpcsxcore/new_dynarec/pcsxmem.c | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 7b77c62e9..31b7b9f90 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -210,7 +210,7 @@ FUNCTION(new_dyna_leave): .align 2 .macro memhandler_pre - /* w0 = adddr/data, x1 = rhandler, w2 = cycles, x3 = whandler */ + /* w0 = addr/data, x1 = rhandler, w2 = cycles, x3 = whandler */ ldr w4, [rFP, #LO_last_count] add w4, w4, w2 str w4, [rFP, #LO_cycle] diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index 8f79c50ae..da5b67e1f 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -244,9 +244,6 @@ static u32 read_biu(u32 addr) if (addr != 0xfffe0130) return read_mem_dummy(addr); - FILE *f = fopen("/tmp/psxbiu.bin", "wb"); - fwrite(psxM, 1, 0x200000, f); - fclose(f); memprintf("read_biu %08x @%08x %u\n", psxRegs.biuReg, psxRegs.pc, psxRegs.cycle); return psxRegs.biuReg; From d358733b8461f6fa182b33d29f0676c2df505854 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 27 Sep 2023 23:12:48 +0300 Subject: [PATCH 382/597] spu: try to improve timing notaz/pcsx_rearmed#305 --- frontend/plugin.c | 4 +-- libpcsxcore/new_dynarec/pcsxmem.c | 24 ++++++++++++---- libpcsxcore/plugins.h | 2 +- libpcsxcore/psxhw.c | 6 ++-- plugins/dfsound/dma.c | 4 +-- plugins/dfsound/externals.h | 5 ++-- plugins/dfsound/registers.c | 22 +++++++++------ plugins/dfsound/spu.c | 47 ++++++++++++++++++++----------- 8 files changed, 73 insertions(+), 41 deletions(-) diff --git a/frontend/plugin.c b/frontend/plugin.c index 3374141ed..2f8dcc2f9 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -51,7 +51,7 @@ extern long CALLBACK SPUinit(void); extern long CALLBACK SPUshutdown(void); extern long CALLBACK SPUclose(void); extern void CALLBACK SPUwriteRegister(unsigned long, unsigned short, unsigned int); -extern unsigned short CALLBACK SPUreadRegister(unsigned long); +extern unsigned short CALLBACK SPUreadRegister(unsigned long, unsigned int); extern void CALLBACK SPUwriteDMAMem(unsigned short *, int, unsigned int); extern void CALLBACK SPUreadDMAMem(unsigned short *, int, unsigned int); extern void CALLBACK SPUplayADPCMchannel(void *, unsigned int, int); @@ -309,7 +309,7 @@ pc_hook_func_ret(long, GPU_dmaChain, (uint32_t *a0, int32_t a1), (a0, a1), P pc_hook_func (GPU_updateLace, (void), (), PCNT_GPU) pc_hook_func (SPU_writeRegister, (unsigned long a0, unsigned short a1, uint32_t a2), (a0, a1, a2), PCNT_SPU) -pc_hook_func_ret(unsigned short,SPU_readRegister, (unsigned long a0), (a0), PCNT_SPU) +pc_hook_func_ret(unsigned short,SPU_readRegister, (unsigned long a0, , unsigned int a1), (a0, a1), PCNT_SPU) pc_hook_func (SPU_writeDMAMem, (unsigned short *a0, int a1, uint32_t a2), (a0, a1, a2), PCNT_SPU) pc_hook_func (SPU_readDMAMem, (unsigned short *a0, int a1, uint32_t a2), (a0, a1, a2), PCNT_SPU) pc_hook_func (SPU_playADPCMchannel, (void *a0, unsigned int a1, int a2), (a0, a1, a2), PCNT_SPU) diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index da5b67e1f..7c670f8bc 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -180,6 +180,19 @@ make_dma_func(3) make_dma_func(4) make_dma_func(6) +static u32 io_spu_read16(u32 addr) +{ + return SPU_readRegister(addr, psxRegs.cycle); +} + +static u32 io_spu_read32(u32 addr) +{ + u32 ret; + ret = SPU_readRegister(addr, psxRegs.cycle); + ret |= SPU_readRegister(addr + 2, psxRegs.cycle) << 16; + return ret; +} + static void io_spu_write16(u32 value) { // meh @@ -387,6 +400,11 @@ void new_dyna_pcsx_mem_init(void) map_item(&mem_iortab[IOMEM8(0x1802)], cdrRead2, 1); map_item(&mem_iortab[IOMEM8(0x1803)], cdrRead3, 1); + for (i = 0x1c00; i < 0x2000; i += 2) { + map_item(&mem_iortab[IOMEM16(i)], io_spu_read16, 1); + map_item(&mem_iortab[IOMEM32(i)], io_spu_read32, 1); + } + // write(u32 data) map_item(&mem_iowtab[IOMEM32(0x1040)], io_write_sio32, 1); map_item(&mem_iowtab[IOMEM32(0x1070)], psxHwWriteIstat, 1); @@ -456,14 +474,8 @@ void new_dyna_pcsx_mem_init(void) void new_dyna_pcsx_mem_reset(void) { - int i; - // plugins might change so update the pointers map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); - - for (i = 0x1c00; i < 0x2000; i += 2) - map_item(&mem_iortab[IOMEM16(i)], SPU_readRegister, 1); - map_item(&mem_iowtab[IOMEM32(0x1810)], GPU_writeData, 1); } diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index e43ff9add..fbbd44f10 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -166,7 +166,7 @@ typedef long (CALLBACK* SPUinit)(void); typedef long (CALLBACK* SPUshutdown)(void); typedef long (CALLBACK* SPUclose)(void); typedef void (CALLBACK* SPUwriteRegister)(unsigned long, unsigned short, unsigned int); -typedef unsigned short (CALLBACK* SPUreadRegister)(unsigned long); +typedef unsigned short (CALLBACK* SPUreadRegister)(unsigned long, unsigned int); typedef void (CALLBACK* SPUwriteDMAMem)(unsigned short *, int, unsigned int); typedef void (CALLBACK* SPUreadDMAMem)(unsigned short *, int, unsigned int); typedef void (CALLBACK* SPUplayADPCMchannel)(xa_decode_t *, unsigned int, int); diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index fb365c07d..ecb8eaf9f 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -253,7 +253,7 @@ u16 psxHwRead16(u32 add) { // falthrough default: if (0x1f801c00 <= add && add < 0x1f802000) - return SPU_readRegister(add); + return SPU_readRegister(add, psxRegs.cycle); hard = psxHu16(add); #ifdef PSXHW_LOG PSXHW_LOG("*Unkwnown 16bit read at address %x\n", add); @@ -411,8 +411,8 @@ u32 psxHwRead32(u32 add) { // falthrough default: if (0x1f801c00 <= add && add < 0x1f802000) { - hard = SPU_readRegister(add); - hard |= SPU_readRegister(add + 2) << 16; + hard = SPU_readRegister(add, psxRegs.cycle); + hard |= SPU_readRegister(add + 2, psxRegs.cycle) << 16; return hard; } hard = psxHu32(add); diff --git a/plugins/dfsound/dma.c b/plugins/dfsound/dma.c index ada007f01..1aebfce5f 100644 --- a/plugins/dfsound/dma.c +++ b/plugins/dfsound/dma.c @@ -41,7 +41,7 @@ void CALLBACK SPUreadDMAMem(unsigned short *pusPSXMem, int iSize, unsigned int addr = spu.spuAddr, irq_addr = regAreaGet(H_SPUirqAddr) << 3; int i, irq; - do_samples_if_needed(cycles, 1); + do_samples_if_needed(cycles, 1, 2); irq = addr <= irq_addr && irq_addr < addr + iSize*2; for(i = 0; i < iSize; i++) @@ -66,7 +66,7 @@ void CALLBACK SPUwriteDMAMem(unsigned short *pusPSXMem, int iSize, unsigned int addr = spu.spuAddr, irq_addr = regAreaGet(H_SPUirqAddr) << 3; int i, irq; - do_samples_if_needed(cycles, 1); + do_samples_if_needed(cycles, 1, 2); spu.bMemDirty = 1; irq = addr <= irq_addr && irq_addr < addr + iSize*2; diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index f638b94fa..f3fbc678c 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -118,6 +118,7 @@ typedef struct unsigned int prevflags:3; // flags from previous block unsigned int bIgnoreLoop:1; // Ignore loop unsigned int bNewPitch:1; // pitch changed + unsigned int bStarting:1; // starting after keyon union { struct { int iLeftVolume; // left volume @@ -261,9 +262,9 @@ void do_samples(unsigned int cycles_to, int do_sync); void schedule_next_irq(void); void check_irq_io(unsigned int addr); -#define do_samples_if_needed(c, sync) \ +#define do_samples_if_needed(c, sync, samples) \ do { \ - if (sync || (int)((c) - spu.cycles_played) >= 16 * 768) \ + if (sync || (int)((c) - spu.cycles_played) >= (samples) * 768) \ do_samples(c, sync); \ } while (0) diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index bcac4d9ad..a20538a65 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -60,7 +60,7 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, if (val == 0 && (r & 0xff8) == 0xd88) return; - do_samples_if_needed(cycles, 0); + do_samples_if_needed(cycles, 0, 16); if(r>=0x0c00 && r<0x0d80) // some channel info? { @@ -213,10 +213,12 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, */ //-------------------------------------------------// case H_SPUon1: + do_samples_if_needed(cycles, 0, 2); SoundOn(0,16,val); break; //-------------------------------------------------// - case H_SPUon2: + case H_SPUon2: + do_samples_if_needed(cycles, 0, 2); SoundOn(16,24,val); break; //-------------------------------------------------// @@ -309,7 +311,7 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, // READ REGISTER: called by main emu //////////////////////////////////////////////////////////////////////// -unsigned short CALLBACK SPUreadRegister(unsigned long reg) +unsigned short CALLBACK SPUreadRegister(unsigned long reg, unsigned int cycles) { const unsigned long r = reg & 0xffe; @@ -319,12 +321,13 @@ unsigned short CALLBACK SPUreadRegister(unsigned long reg) { case 12: // get adsr vol { - const int ch=(r>>4)-0xc0; - if(spu.dwNewChannel&(1<>16); + // this used to return 1 immediately after keyon to deal with + // some poor timing, but that causes Rayman 2 to lose track of + // it's channels on busy scenes and start looping some of them forever + const int ch = (r>>4) - 0xc0; + if (spu.s_chan[ch].bStarting) + do_samples_if_needed(cycles, 0, 2); + return (unsigned short)(spu.s_chan[ch].ADSRX.EnvelopeVol >> 16); } case 14: // get loop address @@ -404,6 +407,7 @@ static void SoundOn(int start,int end,unsigned short val) if((val&1) && regAreaGetCh(ch, 6)) // mmm... start has to be set before key on !?! { spu.s_chan[ch].bIgnoreLoop = 0; + spu.s_chan[ch].bStarting = 1; spu.dwNewChannel|=(1<prevflags=2; - s_chan->iSBPos=27; - s_chan->spos=0; + s_chan->prevflags = 2; + s_chan->iSBPos = 27; + s_chan->spos = 0; + s_chan->bStarting = 1; s_chan->pCurr = spu.spuMemC + ((regAreaGetCh(ch, 6) & ~1) << 3); @@ -421,8 +422,11 @@ static int decode_block(void *unused, int ch, int *SB) int ret = 0; start = s_chan->pCurr; // set up the current pos - if (start == spu.spuMemC) // ? + if (start - spu.spuMemC < 0x1000) { // ? + //log_unhandled("ch%02d plays decode bufs @%05lx\n", + // ch, (long)(start - spu.spuMemC)); ret = 1; + } if (s_chan->prevflags & 1) // 1: stop/loop { @@ -448,6 +452,7 @@ static int decode_block(void *unused, int ch, int *SB) s_chan->pCurr = start; // store values for next cycle s_chan->prevflags = flags; + s_chan->bStarting = 0; return ret; } @@ -477,6 +482,7 @@ static int skip_block(int ch) s_chan->pCurr = start; s_chan->prevflags = flags; + s_chan->bStarting = 0; return ret; } @@ -794,12 +800,14 @@ static void do_channels(int ns_to) d = do_samples_default(decode_block, NULL, ch, ns_to, SB, sinc, &s_chan->spos, &s_chan->iSBPos); - d = MixADSR(&s_chan->ADSRX, d); - if (d < ns_to) { - spu.dwChannelsAudible &= ~(1 << ch); - s_chan->ADSRX.State = ADSR_RELEASE; - s_chan->ADSRX.EnvelopeVol = 0; - memset(&ChanBuf[d], 0, (ns_to - d) * sizeof(ChanBuf[0])); + if (!s_chan->bStarting) { + d = MixADSR(&s_chan->ADSRX, d); + if (d < ns_to) { + spu.dwChannelsAudible &= ~(1 << ch); + s_chan->ADSRX.State = ADSR_RELEASE; + s_chan->ADSRX.EnvelopeVol = 0; + memset(&ChanBuf[d], 0, (ns_to - d) * sizeof(ChanBuf[0])); + } } if (ch == 1 || ch == 3) @@ -965,12 +973,14 @@ static void queue_channel_work(int ns_to, unsigned int silentch) d = do_samples_skip(ch, ns_to); work->ch[ch].ns_to = d; - // note: d is not accurate on skip - d = SkipADSR(&s_chan->ADSRX, d); - if (d < ns_to) { - spu.dwChannelsAudible &= ~(1 << ch); - s_chan->ADSRX.State = ADSR_RELEASE; - s_chan->ADSRX.EnvelopeVol = 0; + if (!s_chan->bStarting) { + // note: d is not accurate on skip + d = SkipADSR(&s_chan->ADSRX, d); + if (d < ns_to) { + spu.dwChannelsAudible &= ~(1 << ch); + s_chan->ADSRX.State = ADSR_RELEASE; + s_chan->ADSRX.EnvelopeVol = 0; + } } s_chan->bNewPitch = 0; } @@ -1178,6 +1188,11 @@ void do_samples(unsigned int cycles_to, int do_direct) spu.cycles_played += ns_to * 768; spu.decode_pos = (spu.decode_pos + ns_to) & 0x1ff; +#if 0 + static int ccount; static time_t ctime; ccount++; + if (time(NULL) != ctime) + { printf("%d\n", ccount); ccount = 0; ctime = time(NULL); } +#endif } static void do_samples_finish(int *SSumLR, int ns_to, From 0471495852ef192cd232e97dddfd8b79b5238a1f Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 30 Sep 2023 21:08:47 +0300 Subject: [PATCH 383/597] drc: make hash table issues easier to debug --- libpcsxcore/new_dynarec/new_dynarec.c | 55 +++++++++++++++++++-------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 090165e66..738401374 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -648,6 +648,24 @@ static struct ht_entry *hash_table_get(u_int vaddr) return &hash_table[((vaddr>>16)^vaddr)&0xFFFF]; } +#define HASH_TABLE_BAD 0xbac + +static void hash_table_clear(void) +{ + struct ht_entry *ht_bin; + int i, j; + for (i = 0; i < ARRAY_SIZE(hash_table); i++) { + for (j = 0; j < ARRAY_SIZE(hash_table[i].vaddr); j++) { + hash_table[i].vaddr[j] = ~0; + hash_table[i].tcaddr[j] = (void *)(uintptr_t)HASH_TABLE_BAD; + } + } + // don't allow ~0 to hit + ht_bin = hash_table_get(~0); + for (j = 0; j < ARRAY_SIZE(ht_bin->vaddr); j++) + ht_bin->vaddr[j] = 1; +} + static void hash_table_add(u_int vaddr, void *tcaddr) { struct ht_entry *ht_bin = hash_table_get(vaddr); @@ -663,17 +681,30 @@ static void hash_table_remove(int vaddr) //printf("remove hash: %x\n",vaddr); struct ht_entry *ht_bin = hash_table_get(vaddr); if (ht_bin->vaddr[1] == vaddr) { - ht_bin->vaddr[1] = -1; - ht_bin->tcaddr[1] = NULL; + ht_bin->vaddr[1] = ~0; + ht_bin->tcaddr[1] = (void *)(uintptr_t)HASH_TABLE_BAD; } if (ht_bin->vaddr[0] == vaddr) { ht_bin->vaddr[0] = ht_bin->vaddr[1]; ht_bin->tcaddr[0] = ht_bin->tcaddr[1]; - ht_bin->vaddr[1] = -1; - ht_bin->tcaddr[1] = NULL; + ht_bin->vaddr[1] = ~0; + ht_bin->tcaddr[1] = (void *)(uintptr_t)HASH_TABLE_BAD; } } +static void mini_ht_clear(void) +{ +#ifdef USE_MINI_HT + int i; + for (i = 0; i < ARRAY_SIZE(mini_ht) - 1; i++) { + mini_ht[i][0] = ~0; + mini_ht[i][1] = HASH_TABLE_BAD; + } + mini_ht[i][0] = 1; + mini_ht[i][1] = HASH_TABLE_BAD; +#endif +} + static void mark_invalid_code(u_int vaddr, u_int len, char invalid) { u_int vaddr_m = vaddr & 0x1fffffff; @@ -1568,9 +1599,7 @@ static int invalidate_range(u_int start, u_int end, } if (hit) { do_clear_cache(); -#ifdef USE_MINI_HT - memset(mini_ht, -1, sizeof(mini_ht)); -#endif + mini_ht_clear(); } if (inv_start <= (start_m & ~0xfff) && inv_end >= (start_m | 0xfff)) @@ -1627,10 +1656,8 @@ void new_dynarec_invalidate_all_pages(void) } } - #ifdef USE_MINI_HT - memset(mini_ht, -1, sizeof(mini_ht)); - #endif do_clear_cache(); + mini_ht_clear(); } // Add an entry to jump_out after making a link @@ -6205,9 +6232,9 @@ void new_dynarec_clear_full(void) int n; out = ndrc->translation_cache; memset(invalid_code,1,sizeof(invalid_code)); - memset(hash_table,0xff,sizeof(hash_table)); - memset(mini_ht,-1,sizeof(mini_ht)); memset(shadow,0,sizeof(shadow)); + hash_table_clear(); + mini_ht_clear(); copy=shadow; expirep = EXPIRITY_OFFSET; pending_exception=0; @@ -8882,9 +8909,7 @@ static noinline void pass10_expire_blocks(void) hit = blocks_remove_matching_addrs(&blocks[block_i], base_offs, base_shift); if (hit) { do_clear_cache(); - #ifdef USE_MINI_HT - memset(mini_ht, -1, sizeof(mini_ht)); - #endif + mini_ht_clear(); } } else From dc3178e9ced416632e8b5b5486bb35a561a6b2b9 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 30 Sep 2023 22:43:03 +0300 Subject: [PATCH 384/597] psxbios: more careful cnf parsing --- libpcsxcore/misc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index da1dbea83..d748ac072 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -22,6 +22,7 @@ */ #include +#include #include #include "misc.h" #include "cdrom.h" @@ -174,8 +175,13 @@ static void getFromCnf(char *buf, const char *key, u32 *val) buf = strstr(buf, key); if (buf) buf = strchr(buf, '='); - if (buf) - *val = strtol(buf + 1, NULL, 16); + if (buf) { + unsigned long v; + errno = 0; + v = strtoul(buf + 1, NULL, 16); + if (errno == 0) + *val = v; + } } int LoadCdrom() { From b34d6a805a50ee4a897b0a53bbc0b89e3eb7f72e Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 2 Oct 2023 00:02:39 +0300 Subject: [PATCH 385/597] patch up some savestate issues exposed by RetroArch's run-ahead/rewind functionality --- frontend/libretro.c | 1 - libpcsxcore/mdec.c | 13 +++-- libpcsxcore/misc.c | 4 +- libpcsxcore/new_dynarec/events.h | 2 + libpcsxcore/psxbios.c | 5 +- libpcsxcore/psxbios.h | 2 +- libpcsxcore/psxcounters.c | 8 +--- plugins/dfsound/externals.h | 2 +- plugins/dfsound/freeze.c | 81 +++++++++++++++++++++++--------- plugins/dfsound/spu.c | 8 ++-- plugins/dfsound/spu.h | 2 + plugins/dfsound/spu_config.h | 1 - plugins/dfsound/xa.c | 9 ++-- 13 files changed, 86 insertions(+), 52 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index a26d46626..b8b11665f 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -3120,7 +3120,6 @@ void retro_init(void) #endif pl_rearmed_cbs.gpu_peops.iUseDither = 1; pl_rearmed_cbs.gpu_peops.dwActFixes = GPU_PEOPS_OLD_FRAME_SKIP; - spu_config.iUseFixedUpdates = 1; SaveFuncs.open = save_open; SaveFuncs.read = save_read; diff --git a/libpcsxcore/mdec.c b/libpcsxcore/mdec.c index 167a1cff1..612fe974a 100644 --- a/libpcsxcore/mdec.c +++ b/libpcsxcore/mdec.c @@ -674,27 +674,26 @@ void mdec1Interrupt() { } int mdecFreeze(void *f, int Mode) { - u8 *base = (u8 *)&psxM[0x100000]; + u8 *base = (u8 *)psxM; u32 v; gzfreeze(&mdec.reg0, sizeof(mdec.reg0)); gzfreeze(&mdec.reg1, sizeof(mdec.reg1)); - // old code used to save raw pointers.. v = (u8 *)mdec.rl - base; gzfreeze(&v, sizeof(v)); - mdec.rl = (u16 *)(base + (v & 0xffffe)); + mdec.rl = (u16 *)(base + (v & 0x1ffffe)); v = (u8 *)mdec.rl_end - base; gzfreeze(&v, sizeof(v)); - mdec.rl_end = (u16 *)(base + (v & 0xffffe)); + mdec.rl_end = (u16 *)(base + (v & 0x1ffffe)); v = 0; if (mdec.block_buffer_pos) - v = mdec.block_buffer_pos - base; + v = mdec.block_buffer_pos - mdec.block_buffer; gzfreeze(&v, sizeof(v)); mdec.block_buffer_pos = 0; - if (v) - mdec.block_buffer_pos = base + (v & 0xfffff); + if (v && v < sizeof(mdec.block_buffer)) + mdec.block_buffer_pos = mdec.block_buffer; gzfreeze(&mdec.block_buffer, sizeof(mdec.block_buffer)); gzfreeze(&mdec.pending_dma1, sizeof(mdec.pending_dma1)); diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index d748ac072..8997c0b5a 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -288,7 +288,7 @@ int LoadCdrom() { //psxCpu->Reset(); if (Config.HLE) - psxBiosCheckExe(tmpHead.h.t_addr, tmpHead.h.t_size); + psxBiosCheckExe(tmpHead.h.t_addr, tmpHead.h.t_size, 0); return 0; } @@ -770,7 +770,7 @@ int LoadState(const char *file) { padFreeze(f, 0); if (Config.HLE) - psxBiosCheckExe(biosBranchCheckOld, 0x60); + psxBiosCheckExe(biosBranchCheckOld, 0x60, 1); result = 0; cleanup: diff --git a/libpcsxcore/new_dynarec/events.h b/libpcsxcore/new_dynarec/events.h index eeec289d5..5f57f3748 100644 --- a/libpcsxcore/new_dynarec/events.h +++ b/libpcsxcore/new_dynarec/events.h @@ -1,5 +1,7 @@ #include "../psxcommon.h" +extern int stop; + union psxCP0Regs_; u32 schedule_timeslice(void); void gen_interupt(union psxCP0Regs_ *cp0); diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index bad34578e..73f277a0c 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -4175,7 +4175,7 @@ void (* const psxHLEt[24])() = { hleExcPadCard1, hleExcPadCard2, }; -void psxBiosCheckExe(u32 t_addr, u32 t_size) +void psxBiosCheckExe(u32 t_addr, u32 t_size, int loading_state) { // lw $v0, 0x10($sp) // nop @@ -4213,7 +4213,8 @@ void psxBiosCheckExe(u32 t_addr, u32 t_size) if ((SWAP32(r32[i + j]) >> 16) != 0x3c04) // lui continue; - SysPrintf("HLE vsync @%08x\n", start + i * 4); + if (!loading_state) + SysPrintf("HLE vsync @%08x\n", start + i * 4); psxRegs.biosBranchCheck = (t_addr & 0xa01ffffc) + i * 4; } } diff --git a/libpcsxcore/psxbios.h b/libpcsxcore/psxbios.h index c1368e67d..c8c07ff7c 100644 --- a/libpcsxcore/psxbios.h +++ b/libpcsxcore/psxbios.h @@ -40,7 +40,7 @@ void psxBiosException(); void psxBiosFreeze(int Mode); void psxBiosCnfLoaded(u32 tcb_cnt, u32 evcb_cnt, u32 sp); void psxBiosSetupBootState(void); -void psxBiosCheckExe(u32 t_addr, u32 t_size); +void psxBiosCheckExe(u32 t_addr, u32 t_size, int loading_state); void psxBiosCheckBranch(void); extern void (*biosA0[256])(); diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index 9ff679e20..ab8beeea4 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -561,7 +561,6 @@ void psxRcntInit() s32 psxRcntFreeze( void *f, s32 Mode ) { u32 spuSyncCount = 0; - u32 count; s32 i; gzfreeze( &rcnts, sizeof(Rcnt) * CounterQuantity ); @@ -572,14 +571,9 @@ s32 psxRcntFreeze( void *f, s32 Mode ) if (Mode == 0) { - // don't trust things from a savestate rcnts[3].rate = 1; - for( i = 0; i < CounterQuantity; ++i ) - { + for( i = 0; i < CounterQuantity - 1; ++i ) _psxRcntWmode( i, rcnts[i].mode ); - count = (psxRegs.cycle - rcnts[i].cycleStart) / rcnts[i].rate; - _psxRcntWcount( i, count ); - } scheduleRcntBase(); psxRcntSet(); } diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index f3fbc678c..4407177a9 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -218,7 +218,7 @@ typedef struct void (CALLBACK *cddavCallback)(short, short); void (CALLBACK *scheduleCallback)(unsigned int); - xa_decode_t * xapGlobal; + const xa_decode_t * xapGlobal; unsigned int * XAFeed; unsigned int * XAPlay; unsigned int * XAStart; diff --git a/plugins/dfsound/freeze.c b/plugins/dfsound/freeze.c index 51e9fd775..d4898b46a 100644 --- a/plugins/dfsound/freeze.c +++ b/plugins/dfsound/freeze.c @@ -122,9 +122,10 @@ typedef struct unsigned short decode_pos; uint32_t pSpuIrq; uint32_t spuAddr; - uint32_t dummy1; - uint32_t dummy2; - uint32_t dummy3; + uint32_t rvb_cur; + uint16_t xa_left; + uint16_t cdda_left; + uint32_t cycles_played; SPUCHAN_orig s_chan[MAXCHAN]; @@ -132,8 +133,8 @@ typedef struct //////////////////////////////////////////////////////////////////////// -void LoadStateV5(SPUFreeze_t * pF); // newest version -void LoadStateUnknown(SPUFreeze_t * pF, uint32_t cycles); // unknown format +static SPUOSSFreeze_t * LoadStateV5(SPUFreeze_t * pF, uint32_t cycles); +static void LoadStateUnknown(SPUFreeze_t * pF, uint32_t cycles); // unknown format // we want to retain compatibility between versions, // so use original channel struct @@ -228,14 +229,16 @@ static void load_register(unsigned long reg, unsigned int cycles) long CALLBACK SPUfreeze(uint32_t ulFreezeMode, SPUFreeze_t * pF, uint32_t cycles) { - int i;SPUOSSFreeze_t * pFO; + SPUOSSFreeze_t * pFO = NULL; + int i; if(!pF) return 0; // first check - do_samples(cycles, 1); - if(ulFreezeMode) // info or save? {//--------------------------------------------------// + int xa_left = 0, cdda_left = 0; + do_samples(cycles, 1); + if(ulFreezeMode==1) memset(pF,0,sizeof(SPUFreeze_t)+sizeof(SPUOSSFreeze_t)); @@ -250,10 +253,31 @@ long CALLBACK SPUfreeze(uint32_t ulFreezeMode, SPUFreeze_t * pF, if(spu.xapGlobal && spu.XAPlay!=spu.XAFeed) // some xa { - pF->xaS=*spu.xapGlobal; + xa_left = spu.XAFeed - spu.XAPlay; + if (xa_left < 0) + xa_left = spu.XAEnd - spu.XAPlay + spu.XAFeed - spu.XAStart; + pF->xaS = *spu.xapGlobal; + } + else if (spu.CDDAPlay != spu.CDDAFeed) + { + // abuse the xa struct to store leftover cdda samples + unsigned int *p = spu.CDDAPlay; + cdda_left = spu.CDDAFeed - spu.CDDAPlay; + if (cdda_left < 0) + cdda_left = spu.CDDAEnd - spu.CDDAPlay + spu.CDDAFeed - spu.CDDAStart; + if (cdda_left > sizeof(pF->xaS.pcm) / 4) + cdda_left = sizeof(pF->xaS.pcm) / 4; + if (p + cdda_left <= spu.CDDAEnd) + memcpy(pF->xaS.pcm, p, cdda_left * 4); + else { + memcpy(pF->xaS.pcm, p, (spu.CDDAEnd - p) * 4); + memcpy((char *)pF->xaS.pcm + (spu.CDDAEnd - p) * 4, spu.CDDAStart, + (cdda_left - (spu.CDDAEnd - p)) * 4); + } + pF->xaS.nsamples = 0; } - else - memset(&pF->xaS,0,sizeof(xa_decode_t)); // or clean xa + else + memset(&pF->xaS,0,sizeof(xa_decode_t)); // or clean xa pFO=(SPUOSSFreeze_t *)(pF+1); // store special stuff @@ -263,6 +287,10 @@ long CALLBACK SPUfreeze(uint32_t ulFreezeMode, SPUFreeze_t * pF, pFO->spuAddr=spu.spuAddr; if(pFO->spuAddr==0) pFO->spuAddr=0xbaadf00d; pFO->decode_pos = spu.decode_pos; + pFO->rvb_cur = spu.rvb->CurrAddr; + pFO->xa_left = xa_left; + pFO->cdda_left = cdda_left; + pFO->cycles_played = spu.cycles_played; for(i=0;icSPUPort,0x200); spu.bMemDirty = 1; - if(pF->xaS.nsamples<=4032) // start xa again - SPUplayADPCMchannel(&pF->xaS, spu.cycles_played, 0); - - spu.xapGlobal=0; - - if(!strcmp(pF->szSPUName,"PBOSS") && pF->ulFreezeVersion==5) - LoadStateV5(pF); + if (!strcmp(pF->szSPUName,"PBOSS") && pF->ulFreezeVersion==5) + pFO = LoadStateV5(pF, cycles); else LoadStateUnknown(pF, cycles); + spu.XAPlay = spu.XAFeed = spu.XAStart; + spu.CDDAPlay = spu.CDDAFeed = spu.CDDAStart; + if (pFO && pFO->xa_left && pF->xaS.nsamples) { // start xa again + FeedXA(&pF->xaS); + spu.XAPlay = spu.XAFeed - pFO->xa_left; + if (spu.XAPlay < spu.XAStart) + spu.XAPlay = spu.XAStart; + } + else if (pFO && pFO->cdda_left) { // start cdda again + FeedCDDA((void *)pF->xaS.pcm, pFO->cdda_left * 4); + } + // repair some globals for(i=0;i<=62;i+=2) load_register(H_Reverb+i, cycles); @@ -308,7 +343,6 @@ long CALLBACK SPUfreeze(uint32_t ulFreezeMode, SPUFreeze_t * pF, for(i=0;ispuAddr & 0x7fffe; } spu.decode_pos = pFO->decode_pos & 0x1ff; + spu.rvb->CurrAddr = pFO->rvb_cur; + spu.cycles_played = pFO->cycles_played ? pFO->cycles_played : cycles; spu.dwNewChannel=0; spu.dwChannelsAudible=0; @@ -343,11 +379,12 @@ void LoadStateV5(SPUFreeze_t * pF) spu.s_chan[i].pCurr+=(uintptr_t)spu.spuMemC; spu.s_chan[i].pLoop+=(uintptr_t)spu.spuMemC; } + return pFO; } //////////////////////////////////////////////////////////////////////// -void LoadStateUnknown(SPUFreeze_t * pF, uint32_t cycles) +static void LoadStateUnknown(SPUFreeze_t * pF, uint32_t cycles) { int i; @@ -360,6 +397,7 @@ void LoadStateUnknown(SPUFreeze_t * pF, uint32_t cycles) spu.dwChannelsAudible=0; spu.dwChannelDead=0; spu.pSpuIrq=spu.spuMemC; + spu.cycles_played = cycles; for(i=0;i<0xc0;i++) { @@ -368,3 +406,4 @@ void LoadStateUnknown(SPUFreeze_t * pF, uint32_t cycles) } //////////////////////////////////////////////////////////////////////// +// vim:shiftwidth=1:expandtab diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 283d59886..9300a16cc 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1286,7 +1286,7 @@ void schedule_next_irq(void) void CALLBACK SPUasync(unsigned int cycle, unsigned int flags) { - do_samples(cycle, spu_config.iUseFixedUpdates); + do_samples(cycle, 0); if (spu.spuCtrl & CTRL_IRQ) schedule_next_irq(); @@ -1328,6 +1328,7 @@ void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycle, int is_s do_samples(cycle, 1); // catch up to prevent source underflows later FeedXA(xap); // call main XA feeder + spu.xapGlobal = xap; // store info for save states } // CDDA AUDIO @@ -1339,7 +1340,8 @@ int CALLBACK SPUplayCDDAchannel(short *pcm, int nbytes, unsigned int cycle, int if (is_start) do_samples(cycle, 1); // catch up to prevent source underflows later - return FeedCDDA((unsigned char *)pcm, nbytes); + FeedCDDA((unsigned char *)pcm, nbytes); + return 0; } // to be called after state load @@ -1361,7 +1363,7 @@ static void SetupStreams(void) spu.XAFeed = spu.XAStart; spu.CDDAStart = malloc(CDDA_BUFFER_SIZE); // alloc cdda buffer - spu.CDDAEnd = spu.CDDAStart + 16384; + spu.CDDAEnd = spu.CDDAStart + CDDA_BUFFER_SIZE / sizeof(uint32_t); spu.CDDAPlay = spu.CDDAStart; spu.CDDAFeed = spu.CDDAStart; diff --git a/plugins/dfsound/spu.h b/plugins/dfsound/spu.h index 334c68099..810ec07de 100644 --- a/plugins/dfsound/spu.h +++ b/plugins/dfsound/spu.h @@ -29,5 +29,7 @@ void ClearWorkingState(void); void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycle, int is_start); int CALLBACK SPUplayCDDAchannel(short *pcm, int bytes, unsigned int cycle, int is_start); +void FeedXA(const xa_decode_t *xap); +void FeedCDDA(unsigned char *pcm, int nBytes); #endif /* __P_SPU_H__ */ diff --git a/plugins/dfsound/spu_config.h b/plugins/dfsound/spu_config.h index 95c894867..b830142ed 100644 --- a/plugins/dfsound/spu_config.h +++ b/plugins/dfsound/spu_config.h @@ -11,7 +11,6 @@ typedef struct int iUseInterpolation; int iTempo; int iUseThread; - int iUseFixedUpdates; // output fixed number of samples/frame // status int iThreadAvail; diff --git a/plugins/dfsound/xa.c b/plugins/dfsound/xa.c index 23924d3b1..08afc0099 100644 --- a/plugins/dfsound/xa.c +++ b/plugins/dfsound/xa.c @@ -130,13 +130,12 @@ static unsigned long timeGetTime_spu() // FEED XA //////////////////////////////////////////////////////////////////////// -INLINE void FeedXA(xa_decode_t *xap) +void FeedXA(const xa_decode_t *xap) { int sinc,spos,i,iSize,iPlace,vl,vr; if(!spu.bSPUIsOpen) return; - spu.xapGlobal = xap; // store info for save states spu.XARepeat = 3; // set up repeat #if 0//def XA_HACK @@ -410,12 +409,12 @@ INLINE void FeedXA(xa_decode_t *xap) // FEED CDDA //////////////////////////////////////////////////////////////////////// -INLINE int FeedCDDA(unsigned char *pcm, int nBytes) +void FeedCDDA(unsigned char *pcm, int nBytes) { int space; space=(spu.CDDAPlay-spu.CDDAFeed-1)*4 & (CDDA_BUFFER_SIZE - 1); if(space0) { @@ -431,8 +430,6 @@ INLINE int FeedCDDA(unsigned char *pcm, int nBytes) nBytes-=space; pcm+=space; } - - return 0x676f; // rearmed_go } #endif From d3f0cb2bbea928854c0e05fcc43f9447a01c71c6 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 2 Oct 2023 01:24:50 +0300 Subject: [PATCH 386/597] add a thp-based huge page alloc fallback --- frontend/libpicofe | 2 +- libpcsxcore/psxmem.c | 21 +++++++++++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/frontend/libpicofe b/frontend/libpicofe index 5dd225ecd..b0ce6fa8b 160000 --- a/frontend/libpicofe +++ b/frontend/libpicofe @@ -1 +1 @@ -Subproject commit 5dd225ecd6d5a04fd8e6f16c8f8ee65ee88c6fed +Subproject commit b0ce6fa8bd3c171debd5589f3ee8a95e26b1d61b diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 42755e529..389bdba51 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -44,16 +44,29 @@ static void * psxMapDefault(unsigned long addr, size_t size, int is_fixed, enum psxMapTag tag) { -#if !P_HAVE_MMAP void *ptr; - - ptr = malloc(size); +#if !P_HAVE_MMAP + ptr = calloc(1, size); return ptr ? ptr : MAP_FAILED; #else int flags = MAP_PRIVATE | MAP_ANONYMOUS; - return mmap((void *)(uintptr_t)addr, size, + ptr = mmap((void *)(uintptr_t)addr, size, PROT_READ | PROT_WRITE, flags, -1, 0); +#ifdef MADV_HUGEPAGE + if (size >= 2*1024*1024) { + if (ptr != MAP_FAILED && ((uintptr_t)ptr & (2*1024*1024 - 1))) { + // try to manually realign assuming bottom-to-top alloc + munmap(ptr, size); + addr = (uintptr_t)ptr & ~(2*1024*1024 - 1); + ptr = mmap((void *)(uintptr_t)addr, size, + PROT_READ | PROT_WRITE, flags, -1, 0); + } + if (ptr != MAP_FAILED) + madvise(ptr, size, MADV_HUGEPAGE); + } +#endif + return ptr; #endif } From 65722e0455a2a42eece5e67ce2e92bfac03e1368 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 4 Oct 2023 23:51:55 +0300 Subject: [PATCH 387/597] drc: avoid excessive recursion in hle mode --- libpcsxcore/new_dynarec/linkage_arm.S | 1 - libpcsxcore/new_dynarec/new_dynarec.c | 25 +++++++++++++++---------- libpcsxcore/psxbios.c | 14 ++++++++++---- libpcsxcore/r3000a.h | 3 ++- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index d2d6d8740..5d9318094 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -318,7 +318,6 @@ FUNCTION(jump_to_new_pc): .align 2 FUNCTION(new_dyna_leave): ldr r0, [fp, #LO_last_count] - add r12, fp, #28 add r10, r0, r10 str r10, [fp, #LO_cycle] ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 738401374..74f32ee35 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -9086,17 +9086,22 @@ static int new_recompile_block(u_int addr) void *instr_addr0_override = NULL; int ds = 0; - if (start == 0x80030000) { - // nasty hack for the fastbios thing - // override block entry to this code + if ((Config.HLE && start == 0x80000080) || start == 0x80030000) { instr_addr0_override = out; - emit_movimm(start,0); - // abuse io address var as a flag that we - // have already returned here once - emit_readword(&address,1); - emit_writeword(0,&pcaddr); - emit_writeword(0,&address); - emit_cmp(0,1); + emit_movimm(start, 0); + if (start == 0x80030000) { + // for BiosBootBypass() to work + // io address var abused as a "already been here" flag + emit_readword(&address, 1); + emit_writeword(0, &pcaddr); + emit_writeword(0, &address); + emit_cmp(0, 1); + } + else { + emit_readword(&psxRegs.cpuInRecursion, 1); + emit_writeword(0, &pcaddr); + emit_test(1, 1); + } #ifdef __aarch64__ emit_jeq(out + 4*2); emit_far_jump(new_dyna_leave); diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 73f277a0c..432b0e2c1 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -398,14 +398,17 @@ static inline void softCall(u32 pc) { ra = 0x80001000; psxRegs.CP0.n.SR &= ~0x404; // disable interrupts + assert(psxRegs.cpuInRecursion <= 1); + psxRegs.cpuInRecursion++; psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, PTR_1); - while (pc0 != 0x80001000 && ++lim < 1000000) + while (pc0 != 0x80001000 && ++lim < 0x100000) psxCpu->ExecuteBlock(EXEC_CALLER_HLE); psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, PTR_1); + psxRegs.cpuInRecursion--; - if (lim == 1000000) + if (lim == 0x100000) PSXBIOS_LOG("softCall @%x hit lim\n", pc); ra = sra; psxRegs.CP0.n.SR |= ssr & 0x404; @@ -421,14 +424,16 @@ static inline void softCallInException(u32 pc) { return; ra = 0x80001000; + psxRegs.cpuInRecursion++; psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, PTR_1); - while (!returned_from_exception() && pc0 != 0x80001000 && ++lim < 1000000) + while (!returned_from_exception() && pc0 != 0x80001000 && ++lim < 0x100000) psxCpu->ExecuteBlock(EXEC_CALLER_HLE); psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, PTR_1); + psxRegs.cpuInRecursion--; - if (lim == 1000000) + if (lim == 0x100000) PSXBIOS_LOG("softCallInException @%x hit lim\n", pc); if (pc0 == 0x80001000) ra = sra; @@ -4035,6 +4040,7 @@ void psxBiosException() { sp = fp = loadRam32(A_EXC_SP); gp = A_EXC_GP; use_cycles(46); + assert(!psxRegs.cpuInRecursion); // do the chains (always 4) for (c = lim = 0; c < 4; c++) { diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index a8f39abb3..157d31b98 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -222,7 +222,8 @@ typedef struct { u8 dloadReg[2]; u32 dloadVal[2]; u32 biosBranchCheck; - u32 reserved[3]; + u32 cpuInRecursion; + u32 reserved[2]; // warning: changing anything in psxRegisters requires update of all // asm in libpcsxcore/new_dynarec/ } psxRegisters; From 49e9602dceff7aa9f771c93a0bc3c21cbb9ee79a Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 5 Oct 2023 00:54:29 +0300 Subject: [PATCH 388/597] standalone: load per-game config from cmd line too notaz/pcsx_rearmed#163 --- frontend/main.c | 2 ++ frontend/menu.c | 2 +- frontend/menu.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/frontend/main.c b/frontend/main.c index 34f68d40d..05e4d55e5 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -710,6 +710,8 @@ int main(int argc, char *argv[]) } if (ready_to_go) { + if (menu_load_config(1) != 0) + menu_load_config(0); menu_prepare_emu(); // If a state has been specified, then load that diff --git a/frontend/menu.c b/frontend/menu.c index f33ac33f2..6bc20c504 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -589,7 +589,7 @@ static void parse_str_val(char *cval, const char *src) static void keys_load_all(const char *cfg); -static int menu_load_config(int is_game) +int menu_load_config(int is_game) { char cfgfile[MAXPATHLEN]; int i, ret = -1; diff --git a/frontend/menu.h b/frontend/menu.h index 8f5acda7a..9d60e8822 100644 --- a/frontend/menu.h +++ b/frontend/menu.h @@ -7,6 +7,7 @@ void menu_loop(void); void menu_finish(void); void menu_notify_mode_change(int w, int h, int bpp); +int menu_load_config(int is_game); enum g_opts_opts { OPT_SHOWFPS = 1 << 0, From 5aa9f158c9f135b5933d34857faa6e593fa9ed9b Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 6 Oct 2023 01:12:50 +0300 Subject: [PATCH 389/597] spu: adjust fmod to match nocash description mednafen doesn't quite agree though, so this may need to be revisited --- plugins/dfsound/spu.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 9300a16cc..04997baac 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -274,19 +274,16 @@ static void StartSound(int ch) INLINE int FModChangeFrequency(int *SB, int pitch, int ns) { - unsigned int NP=pitch; - int sinc; + pitch = (signed short)pitch; + pitch = ((32768 + iFMod[ns]) * pitch) >> 15; + pitch &= 0xffff; + if (pitch > 0x3fff) + pitch = 0x3fff; - NP=((32768L+iFMod[ns])*NP)>>15; + iFMod[ns] = 0; + SB[32] = 1; // reset interpolation - if(NP>0x3fff) NP=0x3fff; - if(NP<0x1) NP=0x1; - - sinc=NP<<4; // calc frequency - iFMod[ns]=0; - SB[32]=1; // reset interpolation - - return sinc; + return pitch << 4; } //////////////////////////////////////////////////////////////////////// @@ -399,16 +396,18 @@ static void decode_block_data(int *dest, const unsigned char *src, int predict_n d = (int)*src; s = (int)(signed short)((d & 0x0f) << 12); - fa = s >> shift_factor; + fa = s >> shift_factor; fa += ((s_1 * f[predict_nr][0])>>6) + ((s_2 * f[predict_nr][1])>>6); - s_2=s_1;s_1=fa; + ssat32_to_16(fa); + s_2 = s_1; s_1 = fa; dest[nSample++] = fa; s = (int)(signed short)((d & 0xf0) << 8); - fa = s >> shift_factor; + fa = s >> shift_factor; fa += ((s_1 * f[predict_nr][0])>>6) + ((s_2 * f[predict_nr][1])>>6); - s_2=s_1;s_1=fa; + ssat32_to_16(fa); + s_2 = s_1; s_1 = fa; dest[nSample++] = fa; } From 893f780e3ec8eb0b83fa4fc374c361ffffab0cff Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 7 Oct 2023 01:01:44 +0300 Subject: [PATCH 390/597] gpulib: forgot to mark fb dirty --- plugins/gpulib/gpu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index c84414418..fec468824 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -447,9 +447,11 @@ static void finish_vram_transfer(int is_read) { if (is_read) gpu.status &= ~PSX_GPU_STATUS_IMG; - else + else { + gpu.state.fb_dirty = 1; renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y, gpu.dma_start.w, gpu.dma_start.h, 0); + } } static void do_vram_copy(const uint32_t *params) @@ -598,6 +600,7 @@ static noinline int do_cmd_buffer(uint32_t *data, int count) break; } do_vram_copy(data + pos + 1); + vram_dirty = 1; pos += 4; continue; } From 38e4048faeaccf7fdc6084f64866f2ea52bb97f1 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 7 Oct 2023 22:56:09 +0300 Subject: [PATCH 391/597] spu: try to clean up the interpolation mess It's still full of magic offsets and constants, but maybe not as much as before. --- libpcsxcore/plugins.h | 2 +- plugins/dfsound/externals.h | 22 ++- plugins/dfsound/freeze.c | 12 +- plugins/dfsound/registers.c | 1 - plugins/dfsound/spu.c | 284 ++++++++++++++++-------------------- 5 files changed, 152 insertions(+), 169 deletions(-) diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index fbbd44f10..cb9b88a2c 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -184,7 +184,7 @@ typedef struct { unsigned char SPUPorts[0x200]; unsigned char SPURam[0x80000]; xa_decode_t xa; - unsigned char *SPUInfo; + unsigned char *unused; } SPUFreeze_t; typedef long (CALLBACK* SPUfreeze)(uint32_t, SPUFreeze_t *, uint32_t); typedef void (CALLBACK* SPUasync)(uint32_t, uint32_t); diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index 4407177a9..9fb58ad24 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -117,7 +117,6 @@ typedef struct unsigned int bFMod:2; // freq mod (0=off, 1=sound channel, 2=freq channel) unsigned int prevflags:3; // flags from previous block unsigned int bIgnoreLoop:1; // Ignore loop - unsigned int bNewPitch:1; // pitch changed unsigned int bStarting:1; // starting after keyon union { struct { @@ -183,7 +182,21 @@ typedef struct // psx buffers / addresses -#define SB_SIZE (32 + 4) +typedef union +{ + int SB[28 + 4 + 4]; + struct { + int sample[28]; + union { + struct { + int pos; + signed short val[4]; + } gauss; + int simple[5]; // 28-32 + } interp; + int sinc_old; + }; +} sample_buf; typedef struct { @@ -239,10 +252,13 @@ typedef struct REVERBInfo * rvb; // buffers - int * SB; + void * unused; int * SSumLR; unsigned short regArea[0x400]; + + sample_buf sb[MAXCHAN]; + int interpolation; } SPUInfo; #define regAreaGet(offset) \ diff --git a/plugins/dfsound/freeze.c b/plugins/dfsound/freeze.c index d4898b46a..c4afad830 100644 --- a/plugins/dfsound/freeze.c +++ b/plugins/dfsound/freeze.c @@ -15,6 +15,7 @@ * * ***************************************************************************/ +#include #include "stdafx.h" #define _IN_FREEZE @@ -145,7 +146,8 @@ static void save_channel(SPUCHAN_orig *d, const SPUCHAN *s, int ch) d->iSBPos = s->iSBPos; d->spos = s->spos; d->sinc = s->sinc; - memcpy(d->SB, spu.SB + ch * SB_SIZE, sizeof(d->SB[0]) * SB_SIZE); + assert(sizeof(d->SB) >= sizeof(spu.sb[ch])); + memcpy(d->SB, &spu.sb[ch], sizeof(spu.sb[ch])); d->iStart = (regAreaGetCh(ch, 6) & ~1) << 3; d->iCurr = 0; // set by the caller d->iLoop = 0; // set by the caller @@ -159,8 +161,8 @@ static void save_channel(SPUCHAN_orig *d, const SPUCHAN *s, int ch) d->bIgnoreLoop = (s->prevflags ^ 2) << 1; d->iRightVolume = s->iRightVolume; d->iRawPitch = s->iRawPitch; - d->s_1 = spu.SB[ch * SB_SIZE + 27]; // yes it's reversed - d->s_2 = spu.SB[ch * SB_SIZE + 26]; + d->s_1 = spu.sb[ch].SB[27]; // yes it's reversed + d->s_2 = spu.sb[ch].SB[26]; d->bRVBActive = s->bRVBActive; d->bNoise = s->bNoise; d->bFMod = s->bFMod; @@ -187,7 +189,7 @@ static void load_channel(SPUCHAN *d, const SPUCHAN_orig *s, int ch) d->spos = s->spos; d->sinc = s->sinc; d->sinc_inv = 0; - memcpy(spu.SB + ch * SB_SIZE, s->SB, sizeof(spu.SB[0]) * SB_SIZE); + memcpy(&spu.sb[ch], s->SB, sizeof(spu.sb[ch])); d->pCurr = (void *)((uintptr_t)s->iCurr & 0x7fff0); d->pLoop = (void *)((uintptr_t)s->iLoop & 0x7fff0); d->bReverb = s->bReverb; @@ -340,7 +342,7 @@ long CALLBACK SPUfreeze(uint32_t ulFreezeMode, SPUFreeze_t * pF, load_register(H_CDRight, cycles); // fix to prevent new interpolations from crashing - for(i=0;iSB; + if (sb->sinc_old != sinc) + { + sb->sinc_old = sinc; + SB[32] = 1; + } if(SB[32]==1) // flag == 1? calc step and set flag... and don't change the value in this pass { const int id1=SB[30]-SB[29]; // curr delta to next val @@ -175,8 +181,9 @@ static void InterpolateUp(int *SB, int sinc) // even easier interpolation on downsampling, also no special filter, again just "Pete's common sense" tm // -static void InterpolateDown(int *SB, int sinc) +static void InterpolateDown(sample_buf *sb, int sinc) { + int *SB = sb->SB; if(sinc>=0x20000L) // we would skip at least one val? { SB[29]+=(SB[30]-SB[29])/2; // add easy weight @@ -186,15 +193,8 @@ static void InterpolateDown(int *SB, int sinc) } //////////////////////////////////////////////////////////////////////// -// helpers for gauss interpolation - -#define gval0 (((short*)(&SB[29]))[gpos&3]) -#define gval(x) ((int)((short*)(&SB[29]))[(gpos+x)&3]) #include "gauss_i.h" - -//////////////////////////////////////////////////////////////////////// - #include "xa.c" static void do_irq(void) @@ -232,15 +232,17 @@ void check_irq_io(unsigned int addr) // START SOUND... called by main thread to setup a new sound on a channel //////////////////////////////////////////////////////////////////////// -static void StartSoundSB(int *SB) +static void ResetInterpolation(sample_buf *sb) { - SB[26]=0; // init mixing vars - SB[27]=0; + memset(&sb->interp, 0, sizeof(sb->interp)); + sb->sinc_old = -1; +} - SB[28]=0; - SB[29]=0; // init our interpolation helpers - SB[30]=0; - SB[31]=0; +static void StartSoundSB(sample_buf *sb) +{ + sb->SB[26] = 0; // init mixing vars + sb->SB[27] = 0; + ResetInterpolation(sb); } static void StartSoundMain(int ch) @@ -265,14 +267,14 @@ static void StartSoundMain(int ch) static void StartSound(int ch) { StartSoundMain(ch); - StartSoundSB(spu.SB + ch * SB_SIZE); + StartSoundSB(&spu.sb[ch]); } //////////////////////////////////////////////////////////////////////// // ALL KIND OF HELPERS //////////////////////////////////////////////////////////////////////// -INLINE int FModChangeFrequency(int *SB, int pitch, int ns) +INLINE int FModChangeFrequency(int pitch, int ns) { pitch = (signed short)pitch; pitch = ((32768 + iFMod[ns]) * pitch) >> 15; @@ -281,101 +283,50 @@ INLINE int FModChangeFrequency(int *SB, int pitch, int ns) pitch = 0x3fff; iFMod[ns] = 0; - SB[32] = 1; // reset interpolation return pitch << 4; } -//////////////////////////////////////////////////////////////////////// - -INLINE void StoreInterpolationVal(int *SB, int sinc, int fa, int fmod_freq) +INLINE void StoreInterpolationGaussCubic(sample_buf *sb, int fa) { - if(fmod_freq) // fmod freq channel - SB[29]=fa; - else - { - ssat32_to_16(fa); - - if(spu_config.iUseInterpolation>=2) // gauss/cubic interpolation - { - int gpos = SB[28]; - gval0 = fa; - gpos = (gpos+1) & 3; - SB[28] = gpos; - } - else - if(spu_config.iUseInterpolation==1) // simple interpolation - { - SB[28] = 0; - SB[29] = SB[30]; // -> helpers for simple linear interpolation: delay real val for two slots, and calc the two deltas, for a 'look at the future behaviour' - SB[30] = SB[31]; - SB[31] = fa; - SB[32] = 1; // -> flag: calc new interolation - } - else SB[29]=fa; // no interpolation - } + int gpos = sb->interp.gauss.pos & 3; + sb->interp.gauss.val[gpos++] = fa; + sb->interp.gauss.pos = gpos & 3; } -//////////////////////////////////////////////////////////////////////// +#define gval(x) (int)sb->interp.gauss.val[(gpos + x) & 3] -INLINE int iGetInterpolationVal(int *SB, int sinc, int spos, int fmod_freq) +INLINE int GetInterpolationCubic(const sample_buf *sb, int spos) { + int gpos = sb->interp.gauss.pos; + int xd = (spos >> 1) + 1; int fa; - if(fmod_freq) return SB[29]; - - switch(spu_config.iUseInterpolation) - { - //--------------------------------------------------// - case 3: // cubic interpolation - { - long xd;int gpos; - xd = (spos >> 1)+1; - gpos = SB[28]; - - fa = gval(3) - 3*gval(2) + 3*gval(1) - gval0; - fa *= (xd - (2<<15)) / 6; - fa >>= 15; - fa += gval(2) - gval(1) - gval(1) + gval0; - fa *= (xd - (1<<15)) >> 1; - fa >>= 15; - fa += gval(1) - gval0; - fa *= xd; - fa >>= 15; - fa = fa + gval0; - - } break; - //--------------------------------------------------// - case 2: // gauss interpolation - { - int vl, vr;int gpos; - vl = (spos >> 6) & ~3; - gpos = SB[28]; - vr=(gauss[vl]*(int)gval0) >> 15; - vr+=(gauss[vl+1]*gval(1)) >> 15; - vr+=(gauss[vl+2]*gval(2)) >> 15; - vr+=(gauss[vl+3]*gval(3)) >> 15; - fa = vr; - } break; - //--------------------------------------------------// - case 1: // simple interpolation - { - if(sinc<0x10000L) // -> upsampling? - InterpolateUp(SB, sinc); // --> interpolate up - else InterpolateDown(SB, sinc); // --> else down - fa=SB[29]; - } break; - //--------------------------------------------------// - default: // no interpolation - { - fa=SB[29]; - } break; - //--------------------------------------------------// - } - + fa = gval(3) - 3*gval(2) + 3*gval(1) - gval(0); + fa *= (xd - (2<<15)) / 6; + fa >>= 15; + fa += gval(2) - gval(1) - gval(1) + gval(0); + fa *= (xd - (1<<15)) >> 1; + fa >>= 15; + fa += gval(1) - gval(0); + fa *= xd; + fa >>= 15; + fa = fa + gval(0); return fa; } +INLINE int GetInterpolationGauss(const sample_buf *sb, int spos) +{ + int gpos = sb->interp.gauss.pos; + int vl = (spos >> 6) & ~3; + int vr; + vr = (gauss[vl+0] * gval(0)) >> 15; + vr += (gauss[vl+1] * gval(1)) >> 15; + vr += (gauss[vl+2] * gval(2)) >> 15; + vr += (gauss[vl+3] * gval(3)) >> 15; + return vr; +} + static void decode_block_data(int *dest, const unsigned char *src, int predict_nr, int shift_factor) { static const int f[16][2] = { @@ -527,10 +478,10 @@ static void scan_for_irq(int ch, unsigned int *upd_samples) } } -#define make_do_samples(name, fmod_code, interp_start, interp1_code, interp2_code, interp_end) \ -static noinline int do_samples_##name( \ +#define make_do_samples(name, fmod_code, interp_start, interp_store, interp_get, interp_end) \ +static noinline int name( \ int (*decode_f)(void *context, int ch, int *SB), void *ctx, \ - int ch, int ns_to, int *SB, int sinc, int *spos, int *sbpos) \ + int ch, int ns_to, sample_buf *sb, int sinc, int *spos, int *sbpos) \ { \ int ns, d, fa; \ int ret = ns_to; \ @@ -543,20 +494,20 @@ static noinline int do_samples_##name( \ *spos += sinc; \ while (*spos >= 0x10000) \ { \ - fa = SB[(*sbpos)++]; \ + fa = sb->SB[(*sbpos)++]; \ if (*sbpos >= 28) \ { \ *sbpos = 0; \ - d = decode_f(ctx, ch, SB); \ + d = decode_f(ctx, ch, sb->SB); \ if (d && ns < ret) \ ret = ns; \ } \ \ - interp1_code; \ + interp_store; \ *spos -= 0x10000; \ } \ \ - interp2_code; \ + interp_get; \ } \ \ interp_end; \ @@ -564,30 +515,56 @@ static noinline int do_samples_##name( \ return ret; \ } -#define fmod_recv_check \ - if(spu.s_chan[ch].bFMod==1 && iFMod[ns]) \ - sinc = FModChangeFrequency(SB, spu.s_chan[ch].iRawPitch, ns) - -make_do_samples(default, fmod_recv_check, , - StoreInterpolationVal(SB, sinc, fa, spu.s_chan[ch].bFMod==2), - ChanBuf[ns] = iGetInterpolationVal(SB, sinc, *spos, spu.s_chan[ch].bFMod==2), ) -make_do_samples(noint, , fa = SB[29], , ChanBuf[ns] = fa, SB[29] = fa) - +// helpers for simple linear interpolation: delay real val for two slots, +// and calc the two deltas, for a 'look at the future behaviour' #define simple_interp_store \ - SB[28] = 0; \ - SB[29] = SB[30]; \ - SB[30] = SB[31]; \ - SB[31] = fa; \ - SB[32] = 1 + sb->SB[28] = 0; \ + sb->SB[29] = sb->SB[30]; \ + sb->SB[30] = sb->SB[31]; \ + sb->SB[31] = fa; \ + sb->SB[32] = 1 #define simple_interp_get \ if(sinc<0x10000) /* -> upsampling? */ \ - InterpolateUp(SB, sinc); /* --> interpolate up */ \ - else InterpolateDown(SB, sinc); /* --> else down */ \ - ChanBuf[ns] = SB[29] + InterpolateUp(sb, sinc); /* --> interpolate up */ \ + else InterpolateDown(sb, sinc); /* --> else down */ \ + ChanBuf[ns] = sb->SB[29] -make_do_samples(simple, , , +make_do_samples(do_samples_nointerp, , fa = sb->SB[29], + , ChanBuf[ns] = fa, sb->SB[29] = fa) +make_do_samples(do_samples_simple, , , simple_interp_store, simple_interp_get, ) +make_do_samples(do_samples_gauss, , , + StoreInterpolationGaussCubic(sb, fa), + ChanBuf[ns] = GetInterpolationGauss(sb, *spos), ) +make_do_samples(do_samples_cubic, , , + StoreInterpolationGaussCubic(sb, fa), + ChanBuf[ns] = GetInterpolationCubic(sb, *spos), ) +make_do_samples(do_samples_fmod, + sinc = FModChangeFrequency(spu.s_chan[ch].iRawPitch, ns), , + StoreInterpolationGaussCubic(sb, fa), + ChanBuf[ns] = GetInterpolationGauss(sb, *spos), ) + +INLINE int do_samples_adpcm( + int (*decode_f)(void *context, int ch, int *SB), void *ctx, + int ch, int ns_to, int fmod, sample_buf *sb, int sinc, int *spos, int *sbpos) +{ + int interp = spu.interpolation; + if (fmod == 1) + return do_samples_fmod(decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); + if (fmod) + interp = 2; + switch (interp) { + case 0: + return do_samples_nointerp(decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); + case 1: + return do_samples_simple (decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); + default: + return do_samples_gauss (decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); + case 3: + return do_samples_cubic (decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); + } +} static int do_samples_skip(int ch, int ns_to) { @@ -762,7 +739,15 @@ static void do_channels(int ns_to) unsigned int mask; int do_rvb, ch, d; SPUCHAN *s_chan; - int *SB, sinc; + + if (unlikely(spu.interpolation != spu_config.iUseInterpolation)) + { + spu.interpolation = spu_config.iUseInterpolation; + mask = spu.dwChannelsAudible & 0xffffff; + for (ch = 0; mask != 0; ch++, mask >>= 1) + if (mask & 1) + ResetInterpolation(&spu.sb[ch]); + } do_rvb = spu.rvb->StartAddr && spu_config.iUseReverb; if (do_rvb) @@ -780,24 +765,11 @@ static void do_channels(int ns_to) if (!(mask & 1)) continue; // channel not playing? next s_chan = &spu.s_chan[ch]; - SB = spu.SB + ch * SB_SIZE; - sinc = s_chan->sinc; - if (spu.s_chan[ch].bNewPitch) - SB[32] = 1; // reset interpolation - spu.s_chan[ch].bNewPitch = 0; - if (s_chan->bNoise) d = do_samples_noise(ch, ns_to); - else if (s_chan->bFMod == 2 - || (s_chan->bFMod == 0 && spu_config.iUseInterpolation == 0)) - d = do_samples_noint(decode_block, NULL, ch, ns_to, - SB, sinc, &s_chan->spos, &s_chan->iSBPos); - else if (s_chan->bFMod == 0 && spu_config.iUseInterpolation == 1) - d = do_samples_simple(decode_block, NULL, ch, ns_to, - SB, sinc, &s_chan->spos, &s_chan->iSBPos); else - d = do_samples_default(decode_block, NULL, ch, ns_to, - SB, sinc, &s_chan->spos, &s_chan->iSBPos); + d = do_samples_adpcm(decode_block, NULL, ch, ns_to, s_chan->bFMod, + &spu.sb[ch], s_chan->sinc, &s_chan->spos, &s_chan->iSBPos); if (!s_chan->bStarting) { d = MixADSR(&s_chan->ADSRX, d); @@ -883,7 +855,6 @@ static struct spu_worker { unsigned short bNoise:1; unsigned short bFMod:2; unsigned short bRVBActive:1; - unsigned short bNewPitch:1; ADSRInfoEx adsr; } ch[24]; int SSumLR[NSSIZE * 2]; @@ -965,7 +936,6 @@ static void queue_channel_work(int ns_to, unsigned int silentch) work->ch[ch].bNoise = s_chan->bNoise; work->ch[ch].bFMod = s_chan->bFMod; work->ch[ch].bRVBActive = s_chan->bRVBActive; - work->ch[ch].bNewPitch = s_chan->bNewPitch; if (s_chan->prevflags & 1) work->ch[ch].start = work->ch[ch].loop; @@ -981,7 +951,6 @@ static void queue_channel_work(int ns_to, unsigned int silentch) s_chan->ADSRX.EnvelopeVol = 0; } } - s_chan->bNewPitch = 0; } work->rvb_addr = 0; @@ -1001,18 +970,27 @@ static void queue_channel_work(int ns_to, unsigned int silentch) static void do_channel_work(struct work_item *work) { unsigned int mask; - int *SB, sinc, spos, sbpos; + int spos, sbpos; int d, ch, ns_to; ns_to = work->ns_to; + if (unlikely(spu.interpolation != spu_config.iUseInterpolation)) + { + spu.interpolation = spu_config.iUseInterpolation; + mask = spu.dwChannelsAudible & 0xffffff; + for (ch = 0; mask != 0; ch++, mask >>= 1) + if (mask & 1) + ResetInterpolation(&spu.sb[ch]); + } + if (work->rvb_addr) memset(RVB, 0, ns_to * sizeof(RVB[0]) * 2); mask = work->channels_new; for (ch = 0; mask != 0; ch++, mask >>= 1) { if (mask & 1) - StartSoundSB(spu.SB + ch * SB_SIZE); + StartSoundSB(&spu.sb[ch]); } mask = work->channels_on; @@ -1023,21 +1001,12 @@ static void do_channel_work(struct work_item *work) d = work->ch[ch].ns_to; spos = work->ch[ch].spos; sbpos = work->ch[ch].sbpos; - sinc = work->ch[ch].sinc; - - SB = spu.SB + ch * SB_SIZE; - if (work->ch[ch].bNewPitch) - SB[32] = 1; // reset interpolation if (work->ch[ch].bNoise) do_lsfr_samples(d, work->ctrl, &spu.dwNoiseCount, &spu.dwNoiseVal); - else if (work->ch[ch].bFMod == 2 - || (work->ch[ch].bFMod == 0 && spu_config.iUseInterpolation == 0)) - do_samples_noint(decode_block_work, work, ch, d, SB, sinc, &spos, &sbpos); - else if (work->ch[ch].bFMod == 0 && spu_config.iUseInterpolation == 1) - do_samples_simple(decode_block_work, work, ch, d, SB, sinc, &spos, &sbpos); else - do_samples_default(decode_block_work, work, ch, d, SB, sinc, &spos, &sbpos); + do_samples_adpcm(decode_block_work, work, ch, d, work->ch[ch].bFMod, + &spu.sb[ch], work->ch[ch].sinc, &spos, &sbpos); d = MixADSR(&work->ch[ch].adsr, d); if (d < ns_to) { @@ -1509,7 +1478,6 @@ long CALLBACK SPUinit(void) spu.s_chan = calloc(MAXCHAN+1, sizeof(spu.s_chan[0])); // channel + 1 infos (1 is security for fmod handling) spu.rvb = calloc(1, sizeof(REVERBInfo)); - spu.SB = calloc(MAXCHAN, sizeof(spu.SB[0]) * SB_SIZE); spu.spuAddr = 0; spu.decode_pos = 0; @@ -1569,8 +1537,6 @@ long CALLBACK SPUshutdown(void) free(spu.spuMemC); spu.spuMemC = NULL; - free(spu.SB); - spu.SB = NULL; free(spu.s_chan); spu.s_chan = NULL; free(spu.rvb); From 0de2ae236e4f4cd7c9fb0ee648037f93753c02ff Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 8 Oct 2023 01:43:25 +0300 Subject: [PATCH 392/597] spu: make fmod work in threaded mode having to do such work makes me want to drop this dreaded threaded stuff altogether --- plugins/dfsound/adsr.c | 30 ++++----- plugins/dfsound/externals.h | 1 + plugins/dfsound/spu.c | 117 ++++++++++++++++++++++++++---------- 3 files changed, 100 insertions(+), 48 deletions(-) diff --git a/plugins/dfsound/adsr.c b/plugins/dfsound/adsr.c index cb366dc4e..23ff3df62 100644 --- a/plugins/dfsound/adsr.c +++ b/plugins/dfsound/adsr.c @@ -63,7 +63,7 @@ INLINE void StartADSR(int ch) // MIX ADSR //////////////////////////////////////////////////////////////////////// -static int MixADSR(ADSRInfoEx *adsr, int ns_to) +static int MixADSR(int *samples, ADSRInfoEx *adsr, int ns_to) { unsigned int EnvelopeVol = adsr->EnvelopeVol; int ns = 0, val, rto, level; @@ -80,8 +80,8 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) if ((signed int)EnvelopeVol <= 0) break; - ChanBuf[ns] *= (signed int)EnvelopeVol >> 21; - ChanBuf[ns] >>= 10; + samples[ns] *= (signed int)EnvelopeVol >> 21; + samples[ns] >>= 10; } } else @@ -92,8 +92,8 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) if ((signed int)EnvelopeVol <= 0) break; - ChanBuf[ns] *= (signed int)EnvelopeVol >> 21; - ChanBuf[ns] >>= 10; + samples[ns] *= (signed int)EnvelopeVol >> 21; + samples[ns] >>= 10; } } @@ -114,8 +114,8 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) if ((signed int)EnvelopeVol < 0) // overflow break; - ChanBuf[ns] *= (signed int)EnvelopeVol >> 21; - ChanBuf[ns] >>= 10; + samples[ns] *= (signed int)EnvelopeVol >> 21; + samples[ns] >>= 10; } if ((signed int)EnvelopeVol < 0) // overflow @@ -139,8 +139,8 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) if ((signed int)EnvelopeVol < 0) EnvelopeVol = 0; - ChanBuf[ns] *= EnvelopeVol >> 21; - ChanBuf[ns] >>= 10; + samples[ns] *= EnvelopeVol >> 21; + samples[ns] >>= 10; ns++; if (((EnvelopeVol >> 27) & 0xf) <= level) @@ -177,8 +177,8 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) break; } - ChanBuf[ns] *= (signed int)EnvelopeVol >> 21; - ChanBuf[ns] >>= 10; + samples[ns] *= (signed int)EnvelopeVol >> 21; + samples[ns] >>= 10; } } else @@ -192,8 +192,8 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) if ((signed int)EnvelopeVol < 0) break; - ChanBuf[ns] *= (signed int)EnvelopeVol >> 21; - ChanBuf[ns] >>= 10; + samples[ns] *= (signed int)EnvelopeVol >> 21; + samples[ns] >>= 10; } } else @@ -204,8 +204,8 @@ static int MixADSR(ADSRInfoEx *adsr, int ns_to) if ((signed int)EnvelopeVol < 0) break; - ChanBuf[ns] *= (signed int)EnvelopeVol >> 21; - ChanBuf[ns] >>= 10; + samples[ns] *= (signed int)EnvelopeVol >> 21; + samples[ns] >>= 10; } } } diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index 9fb58ad24..297c2c39c 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -259,6 +259,7 @@ typedef struct sample_buf sb[MAXCHAN]; int interpolation; + sample_buf sb_thread[MAXCHAN]; } SPUInfo; #define regAreaGet(offset) \ diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 9e1e83f20..8edcd4db9 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -18,6 +18,7 @@ * * ***************************************************************************/ +#include #include "stdafx.h" #define _IN_SPU @@ -274,15 +275,15 @@ static void StartSound(int ch) // ALL KIND OF HELPERS //////////////////////////////////////////////////////////////////////// -INLINE int FModChangeFrequency(int pitch, int ns) +INLINE int FModChangeFrequency(int pitch, int ns, int *fmod_buf) { pitch = (signed short)pitch; - pitch = ((32768 + iFMod[ns]) * pitch) >> 15; + pitch = ((32768 + fmod_buf[ns]) * pitch) >> 15; pitch &= 0xffff; if (pitch > 0x3fff) pitch = 0x3fff; - iFMod[ns] = 0; + fmod_buf[ns] = 0; return pitch << 4; } @@ -479,7 +480,7 @@ static void scan_for_irq(int ch, unsigned int *upd_samples) } #define make_do_samples(name, fmod_code, interp_start, interp_store, interp_get, interp_end) \ -static noinline int name( \ +static noinline int name(int *dst, \ int (*decode_f)(void *context, int ch, int *SB), void *ctx, \ int ch, int ns_to, sample_buf *sb, int sinc, int *spos, int *sbpos) \ { \ @@ -528,41 +529,41 @@ static noinline int name( \ if(sinc<0x10000) /* -> upsampling? */ \ InterpolateUp(sb, sinc); /* --> interpolate up */ \ else InterpolateDown(sb, sinc); /* --> else down */ \ - ChanBuf[ns] = sb->SB[29] + dst[ns] = sb->SB[29] make_do_samples(do_samples_nointerp, , fa = sb->SB[29], - , ChanBuf[ns] = fa, sb->SB[29] = fa) + , dst[ns] = fa, sb->SB[29] = fa) make_do_samples(do_samples_simple, , , simple_interp_store, simple_interp_get, ) make_do_samples(do_samples_gauss, , , StoreInterpolationGaussCubic(sb, fa), - ChanBuf[ns] = GetInterpolationGauss(sb, *spos), ) + dst[ns] = GetInterpolationGauss(sb, *spos), ) make_do_samples(do_samples_cubic, , , StoreInterpolationGaussCubic(sb, fa), - ChanBuf[ns] = GetInterpolationCubic(sb, *spos), ) + dst[ns] = GetInterpolationCubic(sb, *spos), ) make_do_samples(do_samples_fmod, - sinc = FModChangeFrequency(spu.s_chan[ch].iRawPitch, ns), , + sinc = FModChangeFrequency(spu.s_chan[ch].iRawPitch, ns, iFMod), , StoreInterpolationGaussCubic(sb, fa), - ChanBuf[ns] = GetInterpolationGauss(sb, *spos), ) + dst[ns] = GetInterpolationGauss(sb, *spos), ) -INLINE int do_samples_adpcm( +INLINE int do_samples_adpcm(int *dst, int (*decode_f)(void *context, int ch, int *SB), void *ctx, int ch, int ns_to, int fmod, sample_buf *sb, int sinc, int *spos, int *sbpos) { int interp = spu.interpolation; if (fmod == 1) - return do_samples_fmod(decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); + return do_samples_fmod(dst, decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); if (fmod) interp = 2; switch (interp) { case 0: - return do_samples_nointerp(decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); + return do_samples_nointerp(dst, decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); case 1: - return do_samples_simple (decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); + return do_samples_simple (dst, decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); default: - return do_samples_gauss (decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); + return do_samples_gauss (dst, decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); case 3: - return do_samples_cubic (decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); + return do_samples_cubic (dst, decode_f, ctx, ch, ns_to, sb, sinc, spos, sbpos); } } @@ -593,7 +594,33 @@ static int do_samples_skip(int ch, int ns_to) return ret; } -static void do_lsfr_samples(int ns_to, int ctrl, +static int do_samples_skip_fmod(int ch, int ns_to, int *fmod_buf) +{ + SPUCHAN *s_chan = &spu.s_chan[ch]; + int spos = s_chan->spos; + int ret = ns_to, ns, d; + + spos += s_chan->iSBPos << 16; + + for (ns = 0; ns < ns_to; ns++) + { + spos += FModChangeFrequency(s_chan->iRawPitch, ns, fmod_buf); + while (spos >= 28*0x10000) + { + d = skip_block(ch); + if (d && ns < ret) + ret = ns; + spos -= 28*0x10000; + } + } + + s_chan->iSBPos = spos >> 16; + s_chan->spos = spos & 0xffff; + + return ret; +} + +static void do_lsfr_samples(int *dst, int ns_to, int ctrl, unsigned int *dwNoiseCount, unsigned int *dwNoiseVal) { unsigned int counter = *dwNoiseCount; @@ -617,20 +644,20 @@ static void do_lsfr_samples(int ns_to, int ctrl, val = (val << 1) | bit; } - ChanBuf[ns] = (signed short)val; + dst[ns] = (signed short)val; } *dwNoiseCount = counter; *dwNoiseVal = val; } -static int do_samples_noise(int ch, int ns_to) +static int do_samples_noise(int *dst, int ch, int ns_to) { int ret; ret = do_samples_skip(ch, ns_to); - do_lsfr_samples(ns_to, spu.spuCtrl, &spu.dwNoiseCount, &spu.dwNoiseVal); + do_lsfr_samples(dst, ns_to, spu.spuCtrl, &spu.dwNoiseCount, &spu.dwNoiseVal); return ret; } @@ -766,13 +793,13 @@ static void do_channels(int ns_to) s_chan = &spu.s_chan[ch]; if (s_chan->bNoise) - d = do_samples_noise(ch, ns_to); + d = do_samples_noise(ChanBuf, ch, ns_to); else - d = do_samples_adpcm(decode_block, NULL, ch, ns_to, s_chan->bFMod, + d = do_samples_adpcm(ChanBuf, decode_block, NULL, ch, ns_to, s_chan->bFMod, &spu.sb[ch], s_chan->sinc, &s_chan->spos, &s_chan->iSBPos); if (!s_chan->bStarting) { - d = MixADSR(&s_chan->ADSRX, d); + d = MixADSR(ChanBuf, &s_chan->ADSRX, d); if (d < ns_to) { spu.dwChannelsAudible &= ~(1 << ch); s_chan->ADSRX.State = ADSR_RELEASE; @@ -855,6 +882,7 @@ static struct spu_worker { unsigned short bNoise:1; unsigned short bFMod:2; unsigned short bRVBActive:1; + unsigned short bStarting:1; ADSRInfoEx adsr; } ch[24]; int SSumLR[NSSIZE * 2]; @@ -900,6 +928,7 @@ static int decode_block_work(void *context, int ch, int *SB) static void queue_channel_work(int ns_to, unsigned int silentch) { + int tmpFMod[NSSIZE]; struct work_item *work; SPUCHAN *s_chan; unsigned int mask; @@ -914,7 +943,7 @@ static void queue_channel_work(int ns_to, unsigned int silentch) mask = work->channels_new = spu.dwNewChannel & 0xffffff; for (ch = 0; mask != 0; ch++, mask >>= 1) { if (mask & 1) - StartSoundMain(ch); + StartSound(ch); } mask = work->channels_on = spu.dwChannelsAudible & 0xffffff; @@ -936,10 +965,32 @@ static void queue_channel_work(int ns_to, unsigned int silentch) work->ch[ch].bNoise = s_chan->bNoise; work->ch[ch].bFMod = s_chan->bFMod; work->ch[ch].bRVBActive = s_chan->bRVBActive; + work->ch[ch].bStarting = s_chan->bStarting; if (s_chan->prevflags & 1) work->ch[ch].start = work->ch[ch].loop; - d = do_samples_skip(ch, ns_to); + if (unlikely(s_chan->bFMod == 2)) + { + // sucks, have to do double work + assert(!s_chan->bNoise); + d = do_samples_gauss(tmpFMod, decode_block, NULL, ch, ns_to, + &spu.sb[ch], s_chan->sinc, &s_chan->spos, &s_chan->iSBPos); + if (!s_chan->bStarting) { + d = MixADSR(tmpFMod, &s_chan->ADSRX, d); + if (d < ns_to) { + spu.dwChannelsAudible &= ~(1 << ch); + s_chan->ADSRX.State = ADSR_RELEASE; + s_chan->ADSRX.EnvelopeVol = 0; + } + } + memset(&tmpFMod[d], 0, (ns_to - d) * sizeof(tmpFMod[d])); + work->ch[ch].ns_to = d; + continue; + } + if (unlikely(s_chan->bFMod)) + d = do_samples_skip_fmod(ch, ns_to, tmpFMod); + else + d = do_samples_skip(ch, ns_to); work->ch[ch].ns_to = d; if (!s_chan->bStarting) { @@ -951,7 +1002,7 @@ static void queue_channel_work(int ns_to, unsigned int silentch) s_chan->ADSRX.EnvelopeVol = 0; } } - } + } // for (ch;;) work->rvb_addr = 0; if (spu.rvb->StartAddr) { @@ -978,10 +1029,10 @@ static void do_channel_work(struct work_item *work) if (unlikely(spu.interpolation != spu_config.iUseInterpolation)) { spu.interpolation = spu_config.iUseInterpolation; - mask = spu.dwChannelsAudible & 0xffffff; + mask = work->channels_on; for (ch = 0; mask != 0; ch++, mask >>= 1) if (mask & 1) - ResetInterpolation(&spu.sb[ch]); + ResetInterpolation(&spu.sb_thread[ch]); } if (work->rvb_addr) @@ -990,7 +1041,7 @@ static void do_channel_work(struct work_item *work) mask = work->channels_new; for (ch = 0; mask != 0; ch++, mask >>= 1) { if (mask & 1) - StartSoundSB(&spu.sb[ch]); + StartSoundSB(&spu.sb_thread[ch]); } mask = work->channels_on; @@ -1003,12 +1054,12 @@ static void do_channel_work(struct work_item *work) sbpos = work->ch[ch].sbpos; if (work->ch[ch].bNoise) - do_lsfr_samples(d, work->ctrl, &spu.dwNoiseCount, &spu.dwNoiseVal); + do_lsfr_samples(ChanBuf, d, work->ctrl, &spu.dwNoiseCount, &spu.dwNoiseVal); else - do_samples_adpcm(decode_block_work, work, ch, d, work->ch[ch].bFMod, - &spu.sb[ch], work->ch[ch].sinc, &spos, &sbpos); + do_samples_adpcm(ChanBuf, decode_block_work, work, ch, d, work->ch[ch].bFMod, + &spu.sb_thread[ch], work->ch[ch].sinc, &spos, &sbpos); - d = MixADSR(&work->ch[ch].adsr, d); + d = MixADSR(ChanBuf, &work->ch[ch].adsr, d); if (d < ns_to) { work->ch[ch].adsr.EnvelopeVol = 0; memset(&ChanBuf[d], 0, (ns_to - d) * sizeof(ChanBuf[0])); From 63b05f75121cffb317e0ef68fa90a00c7a9aabdb Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 8 Oct 2023 01:55:58 +0300 Subject: [PATCH 393/597] setup spu r8 handlers used by Xenogears, unknown what for --- libpcsxcore/new_dynarec/pcsxmem.c | 12 ++++++++++++ libpcsxcore/psxhw.c | 7 +++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index 7c670f8bc..fc19494eb 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -180,6 +180,16 @@ make_dma_func(3) make_dma_func(4) make_dma_func(6) +static u32 io_spu_read8_even(u32 addr) +{ + return SPU_readRegister(addr, psxRegs.cycle) & 0xff; +} + +static u32 io_spu_read8_odd(u32 addr) +{ + return SPU_readRegister(addr, psxRegs.cycle) >> 8; +} + static u32 io_spu_read16(u32 addr) { return SPU_readRegister(addr, psxRegs.cycle); @@ -401,6 +411,8 @@ void new_dyna_pcsx_mem_init(void) map_item(&mem_iortab[IOMEM8(0x1803)], cdrRead3, 1); for (i = 0x1c00; i < 0x2000; i += 2) { + map_item(&mem_iortab[IOMEM8(i)], io_spu_read8_even, 1); + map_item(&mem_iortab[IOMEM8(i+1)], io_spu_read8_odd, 1); map_item(&mem_iortab[IOMEM16(i)], io_spu_read16, 1); map_item(&mem_iortab[IOMEM32(i)], io_spu_read32, 1); } diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index ecb8eaf9f..60ff6c4ca 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -120,8 +120,11 @@ u8 psxHwRead8(u32 add) { log_unhandled("unhandled r8 %08x @%08x\n", add, psxRegs.pc); // falthrough default: - if (0x1f801c00 <= add && add < 0x1f802000) - log_unhandled("spu r8 %02x @%08x\n", add, psxRegs.pc); + if (0x1f801c00 <= add && add < 0x1f802000) { + u16 val = SPU_readRegister(add & ~1, psxRegs.cycle); + hard = (add & 1) ? val >> 8 : val; + break; + } hard = psxHu8(add); #ifdef PSXHW_LOG PSXHW_LOG("*Unkwnown 8bit read at address %x\n", add); From abf094858889145af1fde9840429dfb8a0e70a39 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 9 Oct 2023 01:20:07 +0300 Subject: [PATCH 394/597] gpu: a bit better idle bit handling notaz/pcsx_rearmed#217 --- Makefile | 2 +- frontend/libretro.c | 2 ++ frontend/plugin_lib.c | 2 ++ frontend/plugin_lib.h | 3 +++ libpcsxcore/gpu.c | 32 ++++++++++++++++++++++++++++++++ libpcsxcore/gpu.h | 8 ++++++++ libpcsxcore/psxdma.c | 6 ++++++ plugins/gpulib/gpu.c | 14 ++++++++++++-- plugins/gpulib/gpu.h | 1 + 9 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 libpcsxcore/gpu.c diff --git a/Makefile b/Makefile index c020274a2..7c16e4a7c 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cheat.o libpcsxcore libpcsxcore/misc.o libpcsxcore/plugins.o libpcsxcore/ppf.o libpcsxcore/psxbios.o \ libpcsxcore/psxcommon.o libpcsxcore/psxcounters.o libpcsxcore/psxdma.o \ libpcsxcore/psxhw.o libpcsxcore/psxinterpreter.o libpcsxcore/psxmem.o libpcsxcore/r3000a.o \ - libpcsxcore/sio.o libpcsxcore/spu.o + libpcsxcore/sio.o libpcsxcore/spu.o libpcsxcore/gpu.o OBJS += libpcsxcore/gte.o libpcsxcore/gte_nf.o libpcsxcore/gte_divider.o #OBJS += libpcsxcore/debug.o libpcsxcore/socket.o libpcsxcore/disr3000a.o ifeq "$(ARCH)" "arm" diff --git a/frontend/libretro.c b/frontend/libretro.c index b8b11665f..d21b1a2b9 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -27,6 +27,7 @@ #include "../libpcsxcore/cdriso.h" #include "../libpcsxcore/cheat.h" #include "../libpcsxcore/r3000a.h" +#include "../libpcsxcore/gpu.h" #include "../plugins/dfsound/out.h" #include "../plugins/dfsound/spu_config.h" #include "cspace.h" @@ -488,6 +489,7 @@ struct rearmed_cbs pl_rearmed_cbs = { .pl_vout_close = vout_close, .mmap = pl_mmap, .munmap = pl_munmap, + .gpu_state_change = gpu_state_change, /* from psxcounters */ .gpu_hcnt = &hSyncCount, .gpu_frame_count = &frame_counter, diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 917ae1796..2339028e7 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -33,6 +33,7 @@ #include "psemu_plugin_defs.h" #include "../libpcsxcore/new_dynarec/new_dynarec.h" #include "../libpcsxcore/psxmem_map.h" +#include "../libpcsxcore/gpu.h" #define HUD_HEIGHT 10 @@ -770,6 +771,7 @@ struct rearmed_cbs pl_rearmed_cbs = { .mmap = pl_mmap, .munmap = pl_munmap, .pl_set_gpu_caps = pl_set_gpu_caps, + .gpu_state_change = gpu_state_change, }; /* watchdog */ diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index 4984d3054..efd7d1e2b 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -49,6 +49,7 @@ void pl_switch_dispmode(void); void pl_timing_prepare(int is_pal); void pl_frame_limit(void); +// for communication with gpulib struct rearmed_cbs { void (*pl_get_layer_pos)(int *x, int *y, int *w, int *h); int (*pl_vout_open)(void); @@ -61,6 +62,8 @@ struct rearmed_cbs { // only used by some frontends void (*pl_vout_set_raw_vram)(void *vram); void (*pl_set_gpu_caps)(int caps); + // emulation related + void (*gpu_state_change)(int what); // some stats, for display by some plugins int flips_per_sec, cpu_usage; float vsps_cur; // currect vsync/s diff --git a/libpcsxcore/gpu.c b/libpcsxcore/gpu.c new file mode 100644 index 000000000..1eadf59c6 --- /dev/null +++ b/libpcsxcore/gpu.c @@ -0,0 +1,32 @@ +/*************************************************************************** + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + ***************************************************************************/ + +#include "gpu.h" +#include "psxdma.h" + +void gpu_state_change(int what) +{ + enum psx_gpu_state state = what; + switch (state) + { + case PGS_VRAM_TRANSFER_START: + HW_GPU_STATUS &= ~SWAP32(PSXGPU_nBUSY); + break; + case PGS_VRAM_TRANSFER_END: + HW_GPU_STATUS |= SWAP32(PSXGPU_nBUSY); + break; + case PGS_PRIMITIVE_START: + HW_GPU_STATUS &= ~SWAP32(PSXGPU_nBUSY); + GPUDMA_INT(200); // see gpuInterrupt + break; + } +} diff --git a/libpcsxcore/gpu.h b/libpcsxcore/gpu.h index 21384e582..ef9c71838 100644 --- a/libpcsxcore/gpu.h +++ b/libpcsxcore/gpu.h @@ -43,4 +43,12 @@ HW_GPU_STATUS |= SWAP32(GPU_readStatus() & ~PSXGPU_TIMING_BITS); \ } +enum psx_gpu_state { + PGS_VRAM_TRANSFER_START, + PGS_VRAM_TRANSFER_END, + PGS_PRIMITIVE_START, // for non-dma only +}; + +void gpu_state_change(int what); + #endif /* __GPU_H__ */ diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index c0aee7ed4..0ffec81c2 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -156,6 +156,8 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU HW_DMA2_MADR = SWAPu32(madr + words_copy * 4); + // careful: gpu_state_change() also messes with this + HW_GPU_STATUS &= SWAP32(~PSXGPU_nBUSY); // already 32-bit word size ((size * 4) / 4) GPUDMA_INT(words / 4); return; @@ -177,6 +179,8 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU HW_DMA2_MADR = SWAPu32(madr); + // careful: gpu_state_change() also messes with this + HW_GPU_STATUS &= SWAP32(~PSXGPU_nBUSY); // already 32-bit word size ((size * 4) / 4) GPUDMA_INT(words / 4); return; @@ -214,6 +218,8 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU DMA_INTERRUPT(2); } +// note: this is also (ab)used for non-dma prim command +// to delay gpu returning to idle state, see gpu_state_change() void gpuInterrupt() { if (HW_DMA2_CHCR == SWAP32(0x01000401) && !(HW_DMA2_MADR & SWAP32(0x800000))) { diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index fec468824..15810b82d 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -12,6 +12,7 @@ #include #include #include "gpu.h" +#include "../../libpcsxcore/gpu.h" // meh #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #ifdef __GNUC__ @@ -441,6 +442,8 @@ static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_re log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w', gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h); + if (gpu.gpu_state_change) + gpu.gpu_state_change(PGS_VRAM_TRANSFER_START); } static void finish_vram_transfer(int is_read) @@ -452,6 +455,8 @@ static void finish_vram_transfer(int is_read) renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y, gpu.dma_start.w, gpu.dma_start.h, 0); } + if (gpu.gpu_state_change) + gpu.gpu_state_change(PGS_VRAM_TRANSFER_END); } static void do_vram_copy(const uint32_t *params) @@ -630,12 +635,16 @@ static noinline int do_cmd_buffer(uint32_t *data, int count) return count - pos; } -static void flush_cmd_buffer(void) +static noinline void flush_cmd_buffer(void) { int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len); if (left > 0) memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4); - gpu.cmd_len = left; + if (left != gpu.cmd_len) { + if (!gpu.dma.h && gpu.gpu_state_change) + gpu.gpu_state_change(PGS_PRIMITIVE_START); + gpu.cmd_len = left; + } } void GPUwriteDataMem(uint32_t *mem, int count) @@ -914,6 +923,7 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs) gpu.mmap = cbs->mmap; gpu.munmap = cbs->munmap; + gpu.gpu_state_change = cbs->gpu_state_change; // delayed vram mmap if (gpu.vram == NULL) diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index dbca8081a..4abc36b7b 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -107,6 +107,7 @@ struct psx_gpu { (int *x, int *y, int *w, int *h, int *vram_h); void *(*mmap)(unsigned int size); void (*munmap)(void *ptr, unsigned int size); + void (*gpu_state_change)(int what); // psx_gpu_state }; extern struct psx_gpu gpu; From 4bb8d7e16bad9322d862d6d0dcaa048d75064c1f Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 6 Oct 2023 22:53:47 +0200 Subject: [PATCH 395/597] Add compile-time option to drop psxMemRLUT, psxMemWLUT Add compile-time option which, if enabled, will replace accesses to psxMemRLUT and psxMemWLUT with a small inline function psxm(). The reasons behind this change are: - These were some BIG LUTs. On 32-bit, they would account for 512 KiB of RAM. On 64-bit, they would account for 1 MiB of RAM. This sounds tiny by today's standards, but it still is huge for some of the platforms that PCSX supports. - Computing the pointer isn't that resource-intensive. Still slower than reading from a LUT (as long as the LUT entry is in the cache, which it should be, as the few valid entries are grouped together), but I doubt that it slows down the interpreter by a lot. - Even if it does slow down the interpreter a bit, it shouldn't be a huge deal, given that the interpreter isn't really used nowadays as the JITs support all the major CPU architectures, and the interpreter is used mostly for debugging purposes. Besides, the two JITs do not use these LUTs. Signed-off-by: Paul Cercueil --- Makefile | 3 +- libpcsxcore/psxinterpreter.c | 18 ++++-------- libpcsxcore/psxmem.c | 57 +++++++++++++++++++++--------------- libpcsxcore/psxmem.h | 42 +++++++++++++++++++++++++- 4 files changed, 82 insertions(+), 38 deletions(-) diff --git a/Makefile b/Makefile index 7c16e4a7c..103cbb791 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,8 @@ CFLAGS += -O2 -DNDEBUG endif CFLAGS += -DP_HAVE_MMAP=$(if $(NO_MMAP),0,1) \ -DP_HAVE_PTHREAD=$(if $(NO_PTHREAD),0,1) \ - -DP_HAVE_POSIX_MEMALIGN=$(if $(NO_POSIX_MEMALIGN),0,1) + -DP_HAVE_POSIX_MEMALIGN=$(if $(NO_POSIX_MEMALIGN),0,1) \ + -DDISABLE_MEM_LUTS=0 CXXFLAGS += $(CFLAGS) #DRC_DBG = 1 #PCNT = 1 diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 2ffab69a2..f6ff2e8b2 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -176,25 +176,21 @@ static int execBreakCheck(psxRegisters *regs, u32 pc) // get an opcode without triggering exceptions or affecting cache u32 intFakeFetch(u32 pc) { - u8 *base = psxMemRLUT[pc >> 16]; - u32 *code; - if (unlikely(base == INVALID_PTR)) + u32 *code = (u32 *)psxm(pc & ~0x3, 0); + if (unlikely(code == INVALID_PTR)) return 0; // nop - code = (u32 *)(base + (pc & 0xfffc)); return SWAP32(*code); } static u32 INT_ATTR fetchNoCache(psxRegisters *regs, u8 **memRLUT, u32 pc) { - u8 *base = memRLUT[pc >> 16]; - u32 *code; - if (unlikely(base == INVALID_PTR)) { + u32 *code = (u32 *)psxm_lut(pc & ~0x3, 0, memRLUT); + if (unlikely(code == INVALID_PTR)) { SysPrintf("game crash @%08x, ra=%08x\n", pc, regs->GPR.n.ra); intException(regs, pc, R3000E_IBE << 2); return 0; // execute as nop } - code = (u32 *)(base + (pc & 0xfffc)); return SWAP32(*code); } @@ -217,14 +213,12 @@ static u32 INT_ATTR fetchICache(psxRegisters *regs, u8 **memRLUT, u32 pc) if (((entry->tag ^ pc) & 0xfffffff0) != 0 || pc < entry->tag) { - const u8 *base = memRLUT[pc >> 16]; - const u32 *code; - if (unlikely(base == INVALID_PTR)) { + const u32 *code = (u32 *)psxm_lut(pc & ~0xf, 0, memRLUT); + if (unlikely(code == INVALID_PTR)) { SysPrintf("game crash @%08x, ra=%08x\n", pc, regs->GPR.n.ra); intException(regs, pc, R3000E_IBE << 2); return 0; // execute as nop } - code = (u32 *)(base + (pc & 0xfff0)); entry->tag = pc; // treat as 4 words, although other configurations are said to be possible diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 389bdba51..2196fa7b7 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -206,6 +206,9 @@ int psxMemInit(void) return -1; } + if (DISABLE_MEM_LUTS) + return 0; + psxMemRLUT = (u8 **)malloc(0x10000 * sizeof(void *)); psxMemWLUT = (u8 **)malloc(0x10000 * sizeof(void *)); @@ -285,19 +288,25 @@ void psxMemShutdown() { free(psxMemWLUT); psxMemWLUT = NULL; } +int cache_isolated; + void psxMemOnIsolate(int enable) { - if (enable) { - memset(psxMemWLUT + 0x0000, (int)(uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); - memset(psxMemWLUT + 0x8000, (int)(uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); - //memset(psxMemWLUT + 0xa000, (int)(uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); - } else { - int i; - for (i = 0; i < 0x80; i++) - psxMemWLUT[i + 0x0000] = (void *)&psxM[(i & 0x1f) << 16]; - memcpy(psxMemWLUT + 0x8000, psxMemWLUT, 0x80 * sizeof(void *)); - memcpy(psxMemWLUT + 0xa000, psxMemWLUT, 0x80 * sizeof(void *)); + if (!DISABLE_MEM_LUTS) { + if (enable) { + memset(psxMemWLUT + 0x0000, (int)(uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); + memset(psxMemWLUT + 0x8000, (int)(uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); + //memset(psxMemWLUT + 0xa000, (int)(uintptr_t)INVALID_PTR, 0x80 * sizeof(void *)); + } else { + int i; + for (i = 0; i < 0x80; i++) + psxMemWLUT[i + 0x0000] = (void *)&psxM[(i & 0x1f) << 16]; + memcpy(psxMemWLUT + 0x8000, psxMemWLUT, 0x80 * sizeof(void *)); + memcpy(psxMemWLUT + 0xa000, psxMemWLUT, 0x80 * sizeof(void *)); + } } + + cache_isolated = enable; psxCpu->Notify(enable ? R3000ACPU_NOTIFY_CACHE_ISOLATED : R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL); } @@ -313,11 +322,11 @@ u8 psxMemRead8(u32 mem) { else return psxHwRead8(mem); } else { - p = (char *)(psxMemRLUT[t]); + p = psxm(mem, 0); if (p != INVALID_PTR) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, R1); - return *(u8 *)(p + (mem & 0xffff)); + return *(u8 *)p; } else { #ifdef PSXMEM_LOG PSXMEM_LOG("err lb %8.8lx\n", mem); @@ -338,11 +347,11 @@ u16 psxMemRead16(u32 mem) { else return psxHwRead16(mem); } else { - p = (char *)(psxMemRLUT[t]); + p = psxm(mem, 0); if (p != INVALID_PTR) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, R2); - return SWAPu16(*(u16 *)(p + (mem & 0xffff))); + return SWAPu16(*(u16 *)p); } else { #ifdef PSXMEM_LOG PSXMEM_LOG("err lh %8.8lx\n", mem); @@ -363,11 +372,11 @@ u32 psxMemRead32(u32 mem) { else return psxHwRead32(mem); } else { - p = (char *)(psxMemRLUT[t]); + p = psxm(mem, 0); if (p != INVALID_PTR) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, R4); - return SWAPu32(*(u32 *)(p + (mem & 0xffff))); + return SWAPu32(*(u32 *)p); } else { if (mem == 0xfffe0130) return psxRegs.biuReg; @@ -390,11 +399,11 @@ void psxMemWrite8(u32 mem, u8 value) { else psxHwWrite8(mem, value); } else { - p = (char *)(psxMemWLUT[t]); + p = psxm(mem, 1); if (p != INVALID_PTR) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, W1); - *(u8 *)(p + (mem & 0xffff)) = value; + *(u8 *)p = value; #ifndef DRC_DISABLE psxCpu->Clear((mem & (~3)), 1); #endif @@ -417,11 +426,11 @@ void psxMemWrite16(u32 mem, u16 value) { else psxHwWrite16(mem, value); } else { - p = (char *)(psxMemWLUT[t]); + p = psxm(mem, 1); if (p != INVALID_PTR) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, W2); - *(u16 *)(p + (mem & 0xffff)) = SWAPu16(value); + *(u16 *)p = SWAPu16(value); #ifndef DRC_DISABLE psxCpu->Clear((mem & (~3)), 1); #endif @@ -445,11 +454,11 @@ void psxMemWrite32(u32 mem, u32 value) { else psxHwWrite32(mem, value); } else { - p = (char *)(psxMemWLUT[t]); + p = psxm(mem, 1); if (p != INVALID_PTR) { if (Config.Debug) DebugCheckBP((mem & 0xffffff) | 0x80000000, W4); - *(u32 *)(p + (mem & 0xffff)) = SWAPu32(value); + *(u32 *)p = SWAPu32(value); #ifndef DRC_DISABLE psxCpu->Clear(mem, 1); #endif @@ -476,9 +485,9 @@ void *psxMemPointer(u32 mem) { else return NULL; } else { - p = (char *)(psxMemWLUT[t]); + p = psxm(mem, 1); if (p != INVALID_PTR) { - return (void *)(p + (mem & 0xffff)); + return (void *)p; } return NULL; } diff --git a/libpcsxcore/psxmem.h b/libpcsxcore/psxmem.h index 129973cf9..a52472c6d 100644 --- a/libpcsxcore/psxmem.h +++ b/libpcsxcore/psxmem.h @@ -114,8 +114,48 @@ extern s8 *psxH; extern u8 **psxMemWLUT; extern u8 **psxMemRLUT; +extern int cache_isolated; -#define PSXM(mem) (psxMemRLUT[(mem) >> 16] == INVALID_PTR ? INVALID_PTR : (u8*)(psxMemRLUT[(mem) >> 16] + ((mem) & 0xffff))) +static inline void * psxm_lut(u32 mem, int write, u8 **lut) +{ + if (!DISABLE_MEM_LUTS) { + void *ptr = lut[mem >> 16]; + + return ptr == INVALID_PTR ? INVALID_PTR : ptr + (u16)mem; + } + + if (mem >= 0xa0000000) + mem -= 0xa0000000; + else + mem &= ~0x80000000; + + if (mem < 0x800000) { + if (cache_isolated) + return INVALID_PTR; + + return &psxM[mem & 0x1fffff]; + } + + if (mem > 0x1f800000 && mem <= 0x1f810000) + return &psxH[mem - 0x1f800000]; + + if (!write) { + if (mem > 0x1fc00000 && mem <= 0x1fc80000) + return &psxR[mem - 0x1fc00000]; + + if (mem > 0x1f000000 && mem <= 0x1f010000) + return &psxP[mem - 0x1f000000]; + } + + return INVALID_PTR; +} + +static inline void * psxm(u32 mem, int write) +{ + return psxm_lut(mem, write, write ? psxMemWLUT : psxMemRLUT); +} + +#define PSXM(mem) psxm(mem, 0) #define PSXMs8(mem) (*(s8 *)PSXM(mem)) #define PSXMs16(mem) (SWAP16(*(s16 *)PSXM(mem))) #define PSXMs32(mem) (SWAP32(*(s32 *)PSXM(mem))) From 9ece32be0b943bfc3cb0bf23dc08730431c01ec7 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 9 Oct 2023 21:10:37 +0300 Subject: [PATCH 396/597] psxbios: don't limit pointers to ram scratchpad is sometimes used --- libpcsxcore/psxbios.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 432b0e2c1..11011d3dc 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -1479,13 +1479,15 @@ void psxBios_printf() { // 0x3f } static void psxBios_cd() { // 0x40 - const char *p, *dir = castRam8ptr(a0); + const char *p, *dir = Ra0; PSXBIOS_LOG("psxBios_%s %x(%s)\n", biosB0n[0x40], a0, dir); - if ((p = strchr(dir, ':'))) - dir = ++p; - if (*dir == '\\') - dir++; - snprintf(cdir, sizeof(cdir), "%s", dir); + if (dir != INVALID_PTR) { + if ((p = strchr(dir, ':'))) + dir = ++p; + if (*dir == '\\') + dir++; + snprintf(cdir, sizeof(cdir), "%s", dir); + } mips_return_c(1, 100); } @@ -2566,7 +2568,7 @@ void psxBios_puts() { // 3e/3f } static void bufile(const u8 *mcd_data, u32 dir_) { - struct DIRENTRY *dir = (struct DIRENTRY *)castRam8ptr(dir_); + struct DIRENTRY *dir = (struct DIRENTRY *)PSXM(dir_); const char *pfile = ffile + 5; const u8 *data = mcd_data; int i = 0, match = 0; @@ -2574,6 +2576,9 @@ static void bufile(const u8 *mcd_data, u32 dir_) { u32 head = 0; v0 = 0; + if (dir == INVALID_PTR) + return; + for (; nfile <= 15 && !match; nfile++) { const char *name; @@ -2627,11 +2632,12 @@ static void bufile(const u8 *mcd_data, u32 dir_) { */ static void psxBios_firstfile() { // 42 - char *pa0 = castRam8ptr(a0); + char *pa0 = Ra0; PSXBIOS_LOG("psxBios_%s %s %x\n", biosB0n[0x42], pa0, a1); v0 = 0; + if (pa0 != INVALID_PTR) { snprintf(ffile, sizeof(ffile), "%s", pa0); if (ffile[5] == 0) From 78a6da0cc3511f4159bf2c648e7e72dc13204492 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 9 Oct 2023 21:54:54 +0300 Subject: [PATCH 397/597] provide DISABLE_MEM_LUTS default --- libpcsxcore/psxmem.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libpcsxcore/psxmem.h b/libpcsxcore/psxmem.h index a52472c6d..f3c2051b8 100644 --- a/libpcsxcore/psxmem.h +++ b/libpcsxcore/psxmem.h @@ -116,6 +116,10 @@ extern u8 **psxMemWLUT; extern u8 **psxMemRLUT; extern int cache_isolated; +#ifndef DISABLE_MEM_LUTS +#define DISABLE_MEM_LUTS 0 +#endif + static inline void * psxm_lut(u32 mem, int write, u8 **lut) { if (!DISABLE_MEM_LUTS) { From 979b861b31ef1f5033db5bd4433b842944300a3e Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 10 Oct 2023 00:39:57 +0300 Subject: [PATCH 398/597] add a nasty hack for gpu busy timing with a hope to remove it someday notaz/pcsx_rearmed#225 --- libpcsxcore/database.c | 7 +++++ libpcsxcore/new_dynarec/pcsxmem.c | 29 +++++--------------- libpcsxcore/psxcommon.h | 1 + libpcsxcore/psxhw.c | 45 ++++++++++++++++++++++++++----- libpcsxcore/psxhw.h | 3 +++ 5 files changed, 56 insertions(+), 29 deletions(-) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 9db351c31..5276b92cf 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -30,6 +30,12 @@ static const char * const gpu_slow_llist_db[] = "SLES02731", "SLPS02477", "SLPS03198", "SLUS01138", }; +static const char * const gpu_busy_hack_db[] = +{ + /* ToHeart (Japan) */ + "SLPS01919", "SLPS01920", +}; + #define HACK_ENTRY(var, list) \ { #var, &Config.hacks.var, list, ARRAY_SIZE(list) } @@ -44,6 +50,7 @@ hack_db[] = { HACK_ENTRY(cdr_read_timing, cdr_read_hack_db), HACK_ENTRY(gpu_slow_list_walking, gpu_slow_llist_db), + HACK_ENTRY(gpu_busy_hack, gpu_busy_hack_db), }; static const struct diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index fc19494eb..f4b1d90e8 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -218,27 +218,6 @@ static void io_spu_write32(u32 value) wfunc(a + 2, value >> 16, psxRegs.cycle); } -static u32 io_gpu_read_status(void) -{ - u32 v; - - // meh2, syncing for img bit, might want to avoid it.. - gpuSyncPluginSR(); - v = HW_GPU_STATUS; - - // XXX: because of large timeslices can't use hSyncCount, using rough - // approximization instead. Perhaps better use hcounter code here or something. - if (hSyncCount < 240 && (HW_GPU_STATUS & PSXGPU_ILACE_BITS) != PSXGPU_ILACE_BITS) - v |= PSXGPU_LCF & (psxRegs.cycle << 20); - return v; -} - -static void io_gpu_write_status(u32 value) -{ - GPU_writeStatus(value); - gpuSyncPluginSR(); -} - void new_dyna_pcsx_mem_isolate(int enable) { int i; @@ -385,7 +364,7 @@ void new_dyna_pcsx_mem_init(void) map_item(&mem_iortab[IOMEM32(0x1124)], io_rcnt_read_mode2, 1); map_item(&mem_iortab[IOMEM32(0x1128)], io_rcnt_read_target2, 1); // map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); - map_item(&mem_iortab[IOMEM32(0x1814)], io_gpu_read_status, 1); + map_item(&mem_iortab[IOMEM32(0x1814)], psxHwReadGpuSR, 1); map_item(&mem_iortab[IOMEM32(0x1820)], mdecRead0, 1); map_item(&mem_iortab[IOMEM32(0x1824)], mdecRead1, 1); @@ -438,7 +417,7 @@ void new_dyna_pcsx_mem_init(void) map_item(&mem_iowtab[IOMEM32(0x1124)], io_rcnt_write_mode2, 1); map_item(&mem_iowtab[IOMEM32(0x1128)], io_rcnt_write_target2, 1); // map_item(&mem_iowtab[IOMEM32(0x1810)], GPU_writeData, 1); - map_item(&mem_iowtab[IOMEM32(0x1814)], io_gpu_write_status, 1); + map_item(&mem_iowtab[IOMEM32(0x1814)], psxHwWriteGpuSR, 1); map_item(&mem_iowtab[IOMEM32(0x1820)], mdecWrite0, 1); map_item(&mem_iowtab[IOMEM32(0x1824)], mdecWrite1, 1); @@ -489,6 +468,10 @@ void new_dyna_pcsx_mem_reset(void) // plugins might change so update the pointers map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); map_item(&mem_iowtab[IOMEM32(0x1810)], GPU_writeData, 1); + if (Config.hacks.gpu_busy_hack) + map_item(&mem_iortab[IOMEM32(0x1814)], psxHwReadGpuSRbusyHack, 1); + else + map_item(&mem_iortab[IOMEM32(0x1814)], psxHwReadGpuSR, 1); } void new_dyna_pcsx_mem_shutdown(void) diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index 4c78255e9..f978a5836 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -150,6 +150,7 @@ typedef struct { struct { boolean cdr_read_timing; boolean gpu_slow_list_walking; + boolean gpu_busy_hack; } hacks; } PcsxConfig; diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index 60ff6c4ca..254693e19 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -32,6 +32,8 @@ #define PAD_LOG(...) #endif +static u32 (*psxHwReadGpuSRptr)(void) = psxHwReadGpuSR; + void psxHwReset() { memset(psxH, 0, 0x10000); @@ -39,6 +41,8 @@ void psxHwReset() { cdrReset(); psxRcntInit(); HW_GPU_STATUS = SWAP32(0x14802000); + psxHwReadGpuSRptr = Config.hacks.gpu_busy_hack + ? psxHwReadGpuSRbusyHack : psxHwReadGpuSR; } void psxHwWriteIstat(u32 value) @@ -78,6 +82,39 @@ void psxHwWriteDmaIcr32(u32 value) HW_DMA_ICR = SWAPu32(tmp); } +void psxHwWriteGpuSR(u32 value) +{ + GPU_writeStatus(value); + gpuSyncPluginSR(); +} + +u32 psxHwReadGpuSR(void) +{ + u32 v; + + // meh2, syncing for img bit, might want to avoid it.. + gpuSyncPluginSR(); + v = HW_GPU_STATUS; + + // XXX: because of large timeslices can't use hSyncCount, using rough + // approximization instead. Perhaps better use hcounter code here or something. + if (hSyncCount < 240 && (HW_GPU_STATUS & PSXGPU_ILACE_BITS) != PSXGPU_ILACE_BITS) + v |= PSXGPU_LCF & (psxRegs.cycle << 20); + return v; +} + +// a hack due to poor timing of gpu idle bit +// to get rid of this, GPU draw times, DMAs, cpu timing has to fall within +// certain timing window or else games like "ToHeart" softlock +u32 psxHwReadGpuSRbusyHack(void) +{ + u32 v = psxHwReadGpuSR(); + static u32 hack; + if (!(hack++ & 3)) + v &= ~PSXGPU_nBUSY; + return v; +} + u8 psxHwRead8(u32 add) { unsigned char hard; @@ -302,10 +339,7 @@ u32 psxHwRead32(u32 add) { #endif return hard; case 0x1f801814: - gpuSyncPluginSR(); - hard = SWAP32(HW_GPU_STATUS); - if (hSyncCount < 240 && (hard & PSXGPU_ILACE_BITS) != PSXGPU_ILACE_BITS) - hard |= PSXGPU_LCF & (psxRegs.cycle << 20); + hard = psxHwReadGpuSRptr(); #ifdef PSXHW_LOG PSXHW_LOG("GPU STATUS 32bit read %x\n", hard); #endif @@ -771,8 +805,7 @@ void psxHwWrite32(u32 add, u32 value) { #ifdef PSXHW_LOG PSXHW_LOG("GPU STATUS 32bit write %x\n", value); #endif - GPU_writeStatus(value); - gpuSyncPluginSR(); + psxHwWriteGpuSR(value); return; case 0x1f801820: diff --git a/libpcsxcore/psxhw.h b/libpcsxcore/psxhw.h index 2bde9edcb..91256ce31 100644 --- a/libpcsxcore/psxhw.h +++ b/libpcsxcore/psxhw.h @@ -85,6 +85,9 @@ int psxHwFreeze(void *f, int Mode); void psxHwWriteIstat(u32 value); void psxHwWriteImask(u32 value); void psxHwWriteDmaIcr32(u32 value); +void psxHwWriteGpuSR(u32 value); +u32 psxHwReadGpuSR(void); +u32 psxHwReadGpuSRbusyHack(void); #ifdef __cplusplus } From 7285d7ad81bd55b5be49c6e7ee71d0583e84327d Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 10 Oct 2023 23:48:42 +0300 Subject: [PATCH 399/597] spu: rearrange struct to reduce padding --- plugins/dfsound/externals.h | 37 ++++++++++++++++++------------------- plugins/dfsound/registers.c | 4 ++-- plugins/dfsound/spu.c | 2 +- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index 297c2c39c..4e1a40bcd 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -204,11 +204,6 @@ typedef struct unsigned short spuStat; unsigned int spuAddr; - union { - unsigned char *spuMemC; - unsigned short *spuMem; - }; - unsigned char * pSpuIrq; unsigned int cycles_played; unsigned int cycles_dma_end; @@ -224,11 +219,28 @@ typedef struct unsigned int dwChannelsAudible; // not silent channels unsigned int dwChannelDead; // silent+not useful channels + unsigned int XARepeat; + unsigned int XALastVal; + + int iLeftXAVol; + int iRightXAVol; + + union { + unsigned char *spuMemC; + unsigned short *spuMem; + }; + unsigned char * pSpuIrq; + unsigned char * pSpuBuffer; short * pS; + SPUCHAN * s_chan; + REVERBInfo * rvb; + + int * SSumLR; + void (CALLBACK *irqCallback)(void); // func of main emu, called on spu irq - void (CALLBACK *cddavCallback)(short, short); + //void (CALLBACK *cddavCallback)(short, short); void (CALLBACK *scheduleCallback)(unsigned int); const xa_decode_t * xapGlobal; @@ -242,19 +254,6 @@ typedef struct unsigned int * CDDAStart; unsigned int * CDDAEnd; - unsigned int XARepeat; - unsigned int XALastVal; - - int iLeftXAVol; - int iRightXAVol; - - SPUCHAN * s_chan; - REVERBInfo * rvb; - - // buffers - void * unused; - int * SSumLR; - unsigned short regArea[0x400]; sample_buf sb[MAXCHAN]; diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index 2796f9c3f..1e3767ad3 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -232,11 +232,11 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, //-------------------------------------------------// case H_CDLeft: spu.iLeftXAVol=(int16_t)val; - if(spu.cddavCallback) spu.cddavCallback(0,(int16_t)val); + //if(spu.cddavCallback) spu.cddavCallback(0,(int16_t)val); break; case H_CDRight: spu.iRightXAVol=(int16_t)val; - if(spu.cddavCallback) spu.cddavCallback(1,(int16_t)val); + //if(spu.cddavCallback) spu.cddavCallback(1,(int16_t)val); break; //-------------------------------------------------// case H_FMod1: diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 8edcd4db9..f29ca4fa7 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1609,7 +1609,7 @@ void CALLBACK SPUregisterCallback(void (CALLBACK *callback)(void)) void CALLBACK SPUregisterCDDAVolume(void (CALLBACK *CDDAVcallback)(short, short)) { - spu.cddavCallback = CDDAVcallback; + //spu.cddavCallback = CDDAVcallback; } void CALLBACK SPUregisterScheduleCb(void (CALLBACK *callback)(unsigned int)) From 561aa7a9b6794c18eb1e0611854fc5edfc03a58f Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 11 Oct 2023 02:11:51 +0300 Subject: [PATCH 400/597] spu: try keyoff ignore unclear if that's what's supposed to happen notaz/pcsx_rearmed#315 --- plugins/dfsound/externals.h | 2 ++ plugins/dfsound/registers.c | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index 4e1a40bcd..fca387d14 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -225,6 +225,8 @@ typedef struct int iLeftXAVol; int iRightXAVol; + unsigned int last_keyon_cycles; + union { unsigned char *spuMemC; unsigned short *spuMem; diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index 1e3767ad3..6f06ffcea 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -213,20 +213,32 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, */ //-------------------------------------------------// case H_SPUon1: + spu.last_keyon_cycles = cycles; do_samples_if_needed(cycles, 0, 2); SoundOn(0,16,val); break; //-------------------------------------------------// case H_SPUon2: + spu.last_keyon_cycles = cycles; do_samples_if_needed(cycles, 0, 2); SoundOn(16,24,val); break; //-------------------------------------------------// case H_SPUoff1: + if (cycles - spu.last_keyon_cycles < 786u) { + if (val & regAreaGet(H_SPUon1)) + log_unhandled("koff1 %04x %d\n", val, cycles - spu.last_keyon_cycles); + val &= ~regAreaGet(H_SPUon1); + } SoundOff(0,16,val); break; //-------------------------------------------------// case H_SPUoff2: + if (cycles - spu.last_keyon_cycles < 786u) { + if (val & regAreaGet(H_SPUon1)) + log_unhandled("koff2 %04x %d\n", val, cycles - spu.last_keyon_cycles); + val &= ~regAreaGet(H_SPUon2); + } SoundOff(16,24,val); break; //-------------------------------------------------// @@ -420,7 +432,7 @@ static void SoundOn(int start,int end,unsigned short val) static void SoundOff(int start,int end,unsigned short val) { int ch; - for(ch=start;ch>=1) // loop channels + for (ch = start; val && ch < end; ch++, val >>= 1) // loop channels { if(val&1) { @@ -563,3 +575,5 @@ static void ReverbOn(int start,int end,unsigned short val) spu.s_chan[ch].bReverb=val&1; // -> reverb on/off } } + +// vim:shiftwidth=1:expandtab From d0af6d75e0f944f31c8c88d053bf88b990fbcfb8 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 11 Oct 2023 02:14:08 +0300 Subject: [PATCH 401/597] unbreak some old savestates I have too many of them --- libpcsxcore/psxcounters.c | 6 ++++++ plugins/dfsound/externals.h | 2 +- plugins/dfsound/freeze.c | 2 ++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index ab8beeea4..02191c712 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -561,6 +561,7 @@ void psxRcntInit() s32 psxRcntFreeze( void *f, s32 Mode ) { u32 spuSyncCount = 0; + u32 count; s32 i; gzfreeze( &rcnts, sizeof(Rcnt) * CounterQuantity ); @@ -573,7 +574,12 @@ s32 psxRcntFreeze( void *f, s32 Mode ) { rcnts[3].rate = 1; for( i = 0; i < CounterQuantity - 1; ++i ) + { _psxRcntWmode( i, rcnts[i].mode ); + count = (psxRegs.cycle - rcnts[i].cycleStart) / rcnts[i].rate; + if (count > 0x1000) + _psxRcntWcount( i, count & 0xffff ); + } scheduleRcntBase(); psxRcntSet(); } diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index fca387d14..d752acf2e 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -190,7 +190,7 @@ typedef union union { struct { int pos; - signed short val[4]; + int val[4]; } gauss; int simple[5]; // 28-32 } interp; diff --git a/plugins/dfsound/freeze.c b/plugins/dfsound/freeze.c index c4afad830..8816a51ce 100644 --- a/plugins/dfsound/freeze.c +++ b/plugins/dfsound/freeze.c @@ -341,6 +341,8 @@ long CALLBACK SPUfreeze(uint32_t ulFreezeMode, SPUFreeze_t * pF, load_register(H_CDLeft, cycles); load_register(H_CDRight, cycles); + if (spu.rvb->CurrAddr < spu.rvb->StartAddr) + spu.rvb->CurrAddr = spu.rvb->StartAddr; // fix to prevent new interpolations from crashing spu.interpolation = -1; From 9b84c4f7c6edcd29dc0a38f3d68263813366b8d3 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 12 Oct 2023 01:36:37 +0300 Subject: [PATCH 402/597] cdrom: adjust the resume timing hack otherwise some load times become excessive, like in ff7 --- libpcsxcore/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index e232d05a4..e23660ce4 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -577,7 +577,7 @@ static int cdrSeekTime(unsigned char *target) seekTime = MAX_VALUE(seekTime, 20000); // need this stupidly long penalty or else Spyro2 intro desyncs - pausePenalty = (s32)(psxRegs.cycle - cdr.LastReadCycles) > cdReadTime * 4 ? cdReadTime * 25 : 0; + pausePenalty = (s32)(psxRegs.cycle - cdr.LastReadCycles) > cdReadTime * 8 ? cdReadTime * 25 : 0; seekTime += pausePenalty; seekTime = MIN_VALUE(seekTime, PSXCLK * 2 / 3); From b9a092f587202b35fd32440f48899515e3924109 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 12 Oct 2023 01:38:04 +0300 Subject: [PATCH 403/597] cdrom: adjust pause behavior follows mednafen now notaz/pcsx_rearmed#288 --- libpcsxcore/cdrom.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index e23660ce4..5404c469e 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -67,7 +67,8 @@ static struct { } subq; unsigned char TrackChanged; unsigned char ReportDelay; - unsigned char unused3[2]; + unsigned char unused3; + unsigned short sectorsRead; unsigned int freeze_ver; unsigned char Prev[4]; @@ -631,6 +632,20 @@ static void msfiAdd(u8 *msfi, u32 count) } } +static void msfiSub(u8 *msfi, u32 count) +{ + assert(count < 75); + msfi[2] -= count; + if ((s8)msfi[2] < 0) { + msfi[2] += 75; + msfi[1]--; + if ((s8)msfi[1] < 0) { + msfi[1] = 60; + msfi[0]--; + } + } +} + void cdrPlayReadInterrupt(void) { cdr.LastReadCycles = psxRegs.cycle; @@ -825,6 +840,7 @@ void cdrInterrupt(void) { cdr.TrackChanged = FALSE; cdr.FirstSector = 1; cdr.ReportDelay = 60; + cdr.sectorsRead = 0; if (!Config.Cdda) CDR_play(cdr.SetSectorPlay); @@ -898,6 +914,12 @@ void cdrInterrupt(void) { case CdlPause: StopCdda(); StopReading(); + + // how the drive maintains the position while paused is quite + // complicated, this is the minimum to make "Bedlam" happy + msfiSub(cdr.SetSectorPlay, MIN_VALUE(cdr.sectorsRead, 4)); + cdr.sectorsRead = 0; + /* Gundam Battle Assault 2: much slower (*) - Fixes boot, gameplay @@ -1165,6 +1187,7 @@ void cdrInterrupt(void) { UpdateSubq(cdr.SetSectorPlay); cdr.LocL[0] = LOCL_INVALID; cdr.SubqForwardSectors = 1; + cdr.sectorsRead = 0; cycles = (cdr.Mode & MODE_SPEED) ? cdReadTime : cdReadTime * 2; cycles += seekTime; @@ -1287,7 +1310,8 @@ static void cdrUpdateTransferBuf(const u8 *buf) return; memcpy(cdr.Transfer, buf, DATA_SIZE); CheckPPFCache(cdr.Transfer, cdr.Prev[0], cdr.Prev[1], cdr.Prev[2]); - CDR_LOG("cdr.Transfer %x:%x:%x\n", cdr.Transfer[0], cdr.Transfer[1], cdr.Transfer[2]); + CDR_LOG("cdr.Transfer %02x:%02x:%02x\n", + cdr.Transfer[0], cdr.Transfer[1], cdr.Transfer[2]); if (cdr.FifoOffset < 2048 + 12) CDR_LOG("FifoOffset(1) %d/%d\n", cdr.FifoOffset, cdr.FifoSize); } @@ -1309,6 +1333,7 @@ static void cdrReadInterrupt(void) // note: CdlGetlocL should work as soon as STATUS_READ is indicated SetPlaySeekRead(cdr.StatP, STATUS_READ | STATUS_ROTATING); + cdr.sectorsRead++; read_ok = ReadTrack(cdr.SetSectorPlay); if (read_ok) From c57af5e60d8a9cb3eb4a0f4513b211f930a4558c Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 13 Oct 2023 01:15:13 +0300 Subject: [PATCH 404/597] patch up some clang/apple issues --- include/arm_features.h | 21 ++++++++++++-- libpcsxcore/decode_xa.c | 8 +++--- libpcsxcore/new_dynarec/linkage_arm.S | 2 +- libpcsxcore/new_dynarec/linkage_arm64.S | 38 ++++++++++++++++--------- plugins/gpu_neon/psx_gpu/psx_gpu.c | 1 + plugins/gpu_neon/psx_gpu/psx_gpu_simd.c | 3 -- 6 files changed, 48 insertions(+), 25 deletions(-) diff --git a/include/arm_features.h b/include/arm_features.h index 4f216a3c2..9f51ab811 100644 --- a/include/arm_features.h +++ b/include/arm_features.h @@ -52,8 +52,14 @@ #define HAVE_NEON32 #endif +#if defined(__APPLE__) && defined(__aarch64__) +#define ASM_SEPARATOR %% +#else +#define ASM_SEPARATOR ; +#endif + /* global function/external symbol */ -#ifndef __MACH__ +#ifndef __APPLE__ #define ESYM(name) name #define FUNCTION(name) \ @@ -61,16 +67,25 @@ .type name, %function; \ name +#define ESIZE(name_, size_) \ + .size name_, size_ + +#define EOBJECT(name_) \ + .type name_, %object + #define EXTRA_UNSAVED_REGS #else #define ESYM(name) _##name #define FUNCTION(name) \ - .globl ESYM(name); \ - name: \ + name: ASM_SEPARATOR \ + .globl ESYM(name) ASM_SEPARATOR \ ESYM(name) +#define ESIZE(name_, size_) +#define EOBJECT(name_) + // r7 is preserved, but add it for EABI alignment.. #define EXTRA_UNSAVED_REGS r7, r9, diff --git a/libpcsxcore/decode_xa.c b/libpcsxcore/decode_xa.c index ee1dd6f3e..17df65f1e 100644 --- a/libpcsxcore/decode_xa.c +++ b/libpcsxcore/decode_xa.c @@ -110,10 +110,10 @@ static __inline void ADPCM_DecodeBlock16( ADPCM_Decode_t *decp, u8 filter_range, x2 -= (IK0(filterid) * fy0 + (IK1(filterid) * fy1)) >> SHC; fy1 = fy0; fy0 = x2; x3 -= (IK0(filterid) * fy0 + (IK1(filterid) * fy1)) >> SHC; fy1 = fy0; fy0 = x3; - XACLAMP( x0, -32768<> SH; destp += inc; - XACLAMP( x1, -32768<> SH; destp += inc; - XACLAMP( x2, -32768<> SH; destp += inc; - XACLAMP( x3, -32768<> SH; destp += inc; + XACLAMP( x0, (int)(-32768u<> SH; destp += inc; + XACLAMP( x1, (int)(-32768u<> SH; destp += inc; + XACLAMP( x2, (int)(-32768u<> SH; destp += inc; + XACLAMP( x3, (int)(-32768u<> SH; destp += inc; } decp->y0 = fy0; decp->y1 = fy1; diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 5d9318094..baac17657 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -27,13 +27,13 @@ #ifdef __MACH__ #define dynarec_local ESYM(dynarec_local) #define ndrc_add_jump_out ESYM(ndrc_add_jump_out) -#define ndrc_try_restore_block ESYM(ndrc_try_restore_block) #define ndrc_get_addr_ht ESYM(ndrc_get_addr_ht) #define ndrc_get_addr_ht_param ESYM(ndrc_get_addr_ht_param) #define ndrc_write_invalidate_one ESYM(ndrc_write_invalidate_one) #define gen_interupt ESYM(gen_interupt) #define gteCheckStallRaw ESYM(gteCheckStallRaw) #define psxException ESYM(psxException) +#define execI ESYM(execI) #endif .bss diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 31b7b9f90..3519dffb4 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -24,6 +24,16 @@ #include "assem_arm64.h" #include "linkage_offsets.h" +#ifdef __MACH__ +#define dynarec_local ESYM(dynarec_local) +#define ndrc_add_jump_out ESYM(ndrc_add_jump_out) +#define ndrc_get_addr_ht ESYM(ndrc_get_addr_ht) +#define gen_interupt ESYM(gen_interupt) +#define gteCheckStallRaw ESYM(gteCheckStallRaw) +#define psxException ESYM(psxException) +#define execI ESYM(execI) +#endif + #if (LO_mem_wtab & 7) #error misligned pointers #endif @@ -31,16 +41,16 @@ .bss .align 4 .global dynarec_local - .type dynarec_local, %object - .size dynarec_local, LO_dynarec_local_size + EOBJECT(dynarec_local) + ESIZE(dynarec_local, LO_dynarec_local_size) dynarec_local: .space LO_dynarec_local_size #define DRC_VAR_(name, vname, size_) \ - vname = dynarec_local + LO_##name; \ - .global vname; \ - .type vname, %object; \ - .size vname, size_ + vname = dynarec_local + LO_##name ASM_SEPARATOR \ + .globl vname; \ + EOBJECT(vname); \ + ESIZE(vname, LO_dynarec_local_size) #define DRC_VAR(name, size_) \ DRC_VAR_(name, ESYM(name), size_) @@ -89,7 +99,7 @@ FUNCTION(dyna_linker): /* r1 = instruction to patch */ bl ndrc_get_addr_ht br x0 - .size dyna_linker, .-dyna_linker + ESIZE(dyna_linker, .-dyna_linker) .align 2 FUNCTION(cc_interrupt): @@ -115,7 +125,7 @@ FUNCTION(cc_interrupt): ldr w0, [rFP, #LO_pcaddr] bl ndrc_get_addr_ht br x0 - .size cc_interrupt, .-cc_interrupt + ESIZE(cc_interrupt, .-cc_interrupt) .align 2 FUNCTION(jump_addrerror_ds): /* R3000E_AdEL / R3000E_AdES in w0 */ @@ -170,7 +180,7 @@ FUNCTION(jump_to_new_pc): cbnz w2, new_dyna_leave bl ndrc_get_addr_ht br x0 - .size jump_to_new_pc, .-jump_to_new_pc + ESIZE(jump_to_new_pc, .-jump_to_new_pc) /* stack must be aligned by 16, and include space for save_regs() use */ .align 2 @@ -189,7 +199,7 @@ FUNCTION(new_dyna_start): sub rCC, w2, w1 bl ndrc_get_addr_ht br x0 - .size new_dyna_start, .-new_dyna_start + ESIZE(new_dyna_start, .-new_dyna_start) .align 2 FUNCTION(new_dyna_leave): @@ -203,7 +213,7 @@ FUNCTION(new_dyna_leave): ldp x27, x28, [sp, #16*5] ldp x29, x30, [sp], #SSP_ALL ret - .size new_dyna_leave, .-new_dyna_leave + ESIZE(new_dyna_leave, .-new_dyna_leave) /* --------------------------------------- */ @@ -281,16 +291,16 @@ handler_read_end: FUNCTION(jump_handler_write8): add x3, x3, #0x1000/4*8 + 0x1000/2*8 /* shift to r8 part */ - pcsx_write_mem strb uxtb 0 + pcsx_write_mem strb, uxtb, 0 b handler_write_end FUNCTION(jump_handler_write16): add x3, x3, #0x1000/4*8 /* shift to r16 part */ - pcsx_write_mem strh uxth 1 + pcsx_write_mem strh, uxth, 1 b handler_write_end FUNCTION(jump_handler_write32): - pcsx_write_mem str mov 2 + pcsx_write_mem str, mov, 2 handler_write_end: memhandler_post diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index fbacbd5f0..ea3641f83 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -4582,6 +4582,7 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, if(vertex_a->x >= vertex_b->x) { vertex_swap(vertex_a, vertex_b); + (void)triangle_winding; } x_a = vertex_a->x; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c index ac4af9daa..b5274362a 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c @@ -2196,9 +2196,6 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, \ setup_blocks_add_blocks_##target(); \ \ - s32 pixel_span = span_num_blocks * 8; \ - pixel_span -= __builtin_popcount(span_edge_data->right_mask & 0xFF); \ - \ span_num_blocks--; \ while(span_num_blocks) \ { \ From d6a231b7c91ec99ddacfb2538ae8dc1286b10b82 Mon Sep 17 00:00:00 2001 From: StormedBubbles <80055191+StormedBubbles@users.noreply.github.com> Date: Tue, 30 May 2023 16:30:16 -0400 Subject: [PATCH 405/597] Optional lightgun crosshairs + add "Konami Gun" device --- frontend/libretro.c | 204 ++++++++++++++++++++-- frontend/libretro_core_options.h | 280 +++++++++++++++++++++++++++++++ frontend/plugin.c | 4 +- 3 files changed, 468 insertions(+), 20 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index d21b1a2b9..8a2447aa9 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -143,6 +143,7 @@ int in_mouse[8][2]; int multitap1 = 0; int multitap2 = 0; int in_enable_vibration = 1; +int in_enable_crosshair[2] = { 0, 0 }; // NegCon adjustment parameters // > The NegCon 'twist' action is somewhat awkward when mapped @@ -270,14 +271,54 @@ static void convert(void *buf, size_t bytes) } #endif +// Function to add crosshairs +static void addCrosshair(int port, int crosshair_color, unsigned short *buffer, int bufferStride, int pos_x, int pos_y, int thickness, int size_x, int size_y) { + for (port = 0; port < 2; port++) { + // Draw the horizontal line of the crosshair + for (int i = pos_y - thickness / 2; i <= pos_y + thickness / 2; i++) { + for (int j = pos_x - size_x / 2; j <= pos_x + size_x / 2; j++) { + if ((i + vout_height) >= 0 && (i + vout_height) < bufferStride && j >= 0 && j < bufferStride && in_enable_crosshair[port] > 0) + buffer[i * bufferStride + j] = crosshair_color; + } + } + + // Draw the vertical line of the crosshair + for (int i = pos_x - thickness / 2; i <= pos_x + thickness / 2; i++) { + for (int j = pos_y - size_y / 2; j <= pos_y + size_y / 2; j++) { + if (i >= 0 && i < bufferStride && (j + vout_height) >= 0 && (j + vout_height) < bufferStride && in_enable_crosshair[port] > 0) + buffer[j * bufferStride + i] = crosshair_color; + } + } + } +} + +struct CrosshairInfo { + int pos_x, pos_y, thickness, size_x, size_y; +}; + +// Calculate size and position of crosshairs +static void CrosshairDimensions(int port, struct CrosshairInfo *info) { + int gunx = input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_X); + int guny = input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_Y); + if (gunx == 32767) // Prevent crosshairs from wrapping around right side of screen to left + info->pos_x = (gunx + 32767.0f) * vout_width / 65534.0f - 0.5f; + else + info->pos_x = (gunx + 32767.0f) * vout_width / 65534.0f; + info->pos_y = (guny + 32767.0f) * vout_height / 65534.0f - vout_height; + info->thickness = pl_rearmed_cbs.gpu_neon.enhancement_enable ? 4 : 2; + info->size_x = psx_w * (pl_rearmed_cbs.gpu_neon.enhancement_enable ? 2 : 1) / 40.0f; + info->size_y = psx_h * (pl_rearmed_cbs.gpu_neon.enhancement_enable ? 2 : 1) * (4.0f / 3.0f) / 40.0f; +} + static void vout_flip(const void *vram, int stride, int bgr24, int x, int y, int w, int h, int dims_changed) { unsigned short *dest = vout_buf_ptr; const unsigned short *src = vram; int dstride = vout_width, h1 = h; + int port = 0; - if (vram == NULL || dims_changed) + if (vram == NULL || dims_changed || (in_enable_crosshair[0] + in_enable_crosshair[1]) > 0) { memset(vout_buf_ptr, 0, dstride * vout_height * 2); // blanking @@ -303,6 +344,15 @@ static void vout_flip(const void *vram, int stride, int bgr24, } } + for (port = 0; port < 2; port++) { + if (in_enable_crosshair[port] > 0 && (in_type[port] == PSE_PAD_TYPE_GUNCON || in_type[port] == PSE_PAD_TYPE_GUN)) + { + struct CrosshairInfo crosshairInfo; + CrosshairDimensions(port, &crosshairInfo); + addCrosshair(port, in_enable_crosshair[port], dest, dstride, crosshairInfo.pos_x, crosshairInfo.pos_y, crosshairInfo.thickness, crosshairInfo.size_x, crosshairInfo.size_y); + } + } + out: #ifndef FRONTEND_SUPPORTS_RGB565 convert(vout_buf_ptr, vout_width * vout_height * 2); @@ -518,8 +568,41 @@ void plat_trigger_vibrate(int pad, int low, int high) } } +//Percentage distance of screen to adjust for Konami Gun +static float KonamiGunAdjustX = 0; +static float KonamiGunAdjustY = 0; + void pl_gun_byte2(int port, unsigned char byte) { + int irq_count = 4; + float justifier_multiplier = 0; + int justifier_width = psx_w; + int justifier_height = psx_h; + int justifier_offscreen = input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_IS_OFFSCREEN); + int justifier_reload = input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_RELOAD); + + if (justifier_width == 256) + justifier_multiplier = is_pal_mode ? .157086f : .158532f; + else if (justifier_width == 320) + justifier_multiplier = is_pal_mode ? .196358f : .198166f; + else if (justifier_width == 384) + justifier_multiplier = is_pal_mode ? .224409f : .226475f; + else if (justifier_width == 512) + justifier_multiplier = is_pal_mode ? .314173f : .317065f; + else // (justifier_width == 640) + justifier_multiplier = is_pal_mode ? .392717f : .396332f; + + int gunx = input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_X); + int guny = input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_Y); + + //Default offset of +105 for X and -12 for Y is chosen to obtain alignment in Die Hard Trilogy, which has no calibration feature + int gunx_scaled = ((gunx + 32767.0f) / 65534.0f + KonamiGunAdjustX) * justifier_width / justifier_multiplier + 105.0f; + int guny_scaled = ((guny + 32767.0f) / 65534.0f + KonamiGunAdjustY) * justifier_height - 12.0f; + + if ((byte & 0x10) && !justifier_offscreen && !justifier_reload) + { + psxScheduleIrq10(irq_count, gunx_scaled, guny_scaled); + } } /* sound calls */ @@ -538,25 +621,27 @@ void out_register_libretro(struct out_driver *drv) drv->feed = snd_feed; } -#define RETRO_DEVICE_PSE_STANDARD RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_JOYPAD, 0) -#define RETRO_DEVICE_PSE_ANALOG RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_ANALOG, 0) -#define RETRO_DEVICE_PSE_DUALSHOCK RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_ANALOG, 1) -#define RETRO_DEVICE_PSE_NEGCON RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_ANALOG, 2) -#define RETRO_DEVICE_PSE_GUNCON RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_LIGHTGUN, 0) -#define RETRO_DEVICE_PSE_MOUSE RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_MOUSE, 0) +#define RETRO_DEVICE_PSE_STANDARD RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_JOYPAD, 0) +#define RETRO_DEVICE_PSE_ANALOG RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_ANALOG, 0) +#define RETRO_DEVICE_PSE_DUALSHOCK RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_ANALOG, 1) +#define RETRO_DEVICE_PSE_NEGCON RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_ANALOG, 2) +#define RETRO_DEVICE_PSE_GUNCON RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_LIGHTGUN, 0) +#define RETRO_DEVICE_PSE_JUSTIFIER RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_LIGHTGUN, 1) +#define RETRO_DEVICE_PSE_MOUSE RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_MOUSE, 0) static char *get_pse_pad_label[] = { "none", "mouse", "negcon", "konami gun", "standard", "analog", "guncon", "dualshock" }; -static const struct retro_controller_description pads[7] = +static const struct retro_controller_description pads[8] = { - { "standard", RETRO_DEVICE_JOYPAD }, - { "analog", RETRO_DEVICE_PSE_ANALOG }, - { "dualshock", RETRO_DEVICE_PSE_DUALSHOCK }, - { "negcon", RETRO_DEVICE_PSE_NEGCON }, - { "guncon", RETRO_DEVICE_PSE_GUNCON }, - { "mouse", RETRO_DEVICE_PSE_MOUSE }, + { "standard", RETRO_DEVICE_JOYPAD }, + { "analog", RETRO_DEVICE_PSE_ANALOG }, + { "dualshock", RETRO_DEVICE_PSE_DUALSHOCK }, + { "negcon", RETRO_DEVICE_PSE_NEGCON }, + { "guncon", RETRO_DEVICE_PSE_GUNCON }, + { "konami gun", RETRO_DEVICE_PSE_JUSTIFIER }, + { "mouse", RETRO_DEVICE_PSE_MOUSE }, { NULL, 0 }, }; @@ -610,6 +695,10 @@ static bool update_option_visibility(void) "pcsx_rearmed_negcon_deadzone", "pcsx_rearmed_negcon_response", "pcsx_rearmed_input_sensitivity", + "pcsx_rearmed_crosshair1", + "pcsx_rearmed_crosshair2", + "pcsx_rearmed_konamigunadjustx", + "pcsx_rearmed_konamigunadjusty", "pcsx_rearmed_gunconadjustx", "pcsx_rearmed_gunconadjusty", "pcsx_rearmed_gunconadjustratiox", @@ -850,6 +939,9 @@ void retro_set_controller_port_device(unsigned port, unsigned device) case RETRO_DEVICE_PSE_GUNCON: in_type[port] = PSE_PAD_TYPE_GUNCON; break; + case RETRO_DEVICE_PSE_JUSTIFIER: + in_type[port] = PSE_PAD_TYPE_GUN; + break; case RETRO_DEVICE_NONE: default: in_type[port] = PSE_PAD_TYPE_NONE; @@ -1499,7 +1591,8 @@ bool retro_load_game(const struct retro_game_info *info) { port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_TRIGGER, "Gun Trigger" }, \ { port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_RELOAD, "Gun Reload" }, \ { port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_AUX_A, "Gun Aux A" }, \ - { port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_AUX_B, "Gun Aux B" }, + { port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_AUX_B, "Gun Aux B" }, \ + { port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_START, "Gun Start" }, JOYP(0) JOYP(1) @@ -1743,11 +1836,11 @@ static const unsigned short retro_psx_map[] = { }; #define RETRO_PSX_MAP_LEN (sizeof(retro_psx_map) / sizeof(retro_psx_map[0])) -//Percentage distance of screen to adjust +//Percentage distance of screen to adjust for Guncon static int GunconAdjustX = 0; static int GunconAdjustY = 0; -//Used when out by a percentage +//Used when out by a percentage with Guncon static float GunconAdjustRatioX = 1; static float GunconAdjustRatioY = 1; @@ -2328,9 +2421,59 @@ static void update_variables(bool in_flight) } #endif // GPU_UNAI + var.value = NULL; + var.key = "pcsx_rearmed_crosshair1"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + in_enable_crosshair[0] = 0; + else if (strcmp(var.value, "blue") == 0) + in_enable_crosshair[0] = 0x1F; + else if (strcmp(var.value, "green") == 0) + in_enable_crosshair[0] = 0x7E0; + else if (strcmp(var.value, "red") == 0) + in_enable_crosshair[0] = 0xF800; + else if (strcmp(var.value, "white") == 0) + in_enable_crosshair[0] = 0xFFFF; + } + + var.value = NULL; + var.key = "pcsx_rearmed_crosshair2"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + in_enable_crosshair[1] = 0; + else if (strcmp(var.value, "blue") == 0) + in_enable_crosshair[1] = 0x1F; + else if (strcmp(var.value, "green") == 0) + in_enable_crosshair[1] = 0x7E0; + else if (strcmp(var.value, "red") == 0) + in_enable_crosshair[1] = 0xF800; + else if (strcmp(var.value, "white") == 0) + in_enable_crosshair[1] = 0xFFFF; + } + //This adjustment process gives the user the ability to manually align the mouse up better //with where the shots are in the emulator. + var.value = NULL; + var.key = "pcsx_rearmed_konamigunadjustx"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + KonamiGunAdjustX = atof(var.value) / 100.0f; + } + + var.value = NULL; + var.key = "pcsx_rearmed_konamigunadjusty"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + KonamiGunAdjustY = atof(var.value) / 100.0f; + } + var.value = NULL; var.key = "pcsx_rearmed_gunconadjustx"; @@ -2466,7 +2609,6 @@ unsigned char axis_range_modifier(int16_t axis_value, bool is_square) static void update_input_guncon(int port, int ret) { //ToDo: - //Core option for cursors for both players //Separate pointer and lightgun control types //Mouse range is -32767 -> 32767 @@ -2505,6 +2647,29 @@ static void update_input_guncon(int port, int ret) } +static void update_input_justifier(int port, int ret) +{ + //ToDo: + //Separate pointer and lightgun control types + + //RetroArch lightgun range is -32767 -> 32767 on both axes (positive Y is down) + + //JUSTIFIER has 3 controls, Trigger,Special,Start which equal Square,Cross,Start + + // Trigger + if (input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_TRIGGER) || input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_RELOAD)) + in_keystate[port] |= (1 << DKEY_SQUARE); + + // Special + if (input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_AUX_A)) + in_keystate[port] |= (1 << DKEY_CROSS); + + // Start + if (input_state_cb(port, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_START)) + in_keystate[port] |= (1 << DKEY_START); + +} + static void update_input_negcon(int port, int ret) { int lsx; @@ -2673,6 +2838,9 @@ static void update_input(void) case PSE_PAD_TYPE_GUNCON: update_input_guncon(i, ret); break; + case PSE_PAD_TYPE_GUN: + update_input_justifier(i, ret); + break; case PSE_PAD_TYPE_NEGCON: update_input_negcon(i, ret); break; diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 8e746805e..60832742b 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -959,6 +959,226 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "1.00", }, + { + "pcsx_rearmed_crosshair1", + "Player 1 Lightgun Crosshair", + NULL, + "Toggle player 1's crosshair for the Guncon or Konami Gun", + NULL, + "input", + { + { "disabled", NULL }, + { "blue", NULL }, + { "green", NULL }, + { "red", NULL }, + { "white", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_crosshair2", + "Player 2 Lightgun Crosshair", + NULL, + "Toggle player 2's crosshair for the Guncon or Konami Gun", + NULL, + "input", + { + { "disabled", NULL }, + { "blue", NULL }, + { "green", NULL }, + { "red", NULL }, + { "white", NULL }, + { NULL, NULL }, + }, + "disabled", + }, + { + "pcsx_rearmed_konamigunadjustx", + "Konami Gun X Axis Offset", + NULL, + "Apply an X axis offset to light gun input when emulating a Konami Gun (Hyper Blaster / Justifier) device. Can be used to correct aiming misalignments.", + NULL, + "input", + { + { "-40", NULL }, + { "-39", NULL }, + { "-38", NULL }, + { "-37", NULL }, + { "-36", NULL }, + { "-35", NULL }, + { "-34", NULL }, + { "-33", NULL }, + { "-32", NULL }, + { "-31", NULL }, + { "-30", NULL }, + { "-29", NULL }, + { "-28", NULL }, + { "-27", NULL }, + { "-26", NULL }, + { "-25", NULL }, + { "-24", NULL }, + { "-23", NULL }, + { "-22", NULL }, + { "-21", NULL }, + { "-20", NULL }, + { "-19", NULL }, + { "-18", NULL }, + { "-17", NULL }, + { "-16", NULL }, + { "-15", NULL }, + { "-14", NULL }, + { "-13", NULL }, + { "-12", NULL }, + { "-11", NULL }, + { "-10", NULL }, + { "-9", NULL }, + { "-8", NULL }, + { "-7", NULL }, + { "-6", NULL }, + { "-5", NULL }, + { "-4", NULL }, + { "-3", NULL }, + { "-2", NULL }, + { "-1", NULL }, + { "0", NULL }, + { "1", NULL }, + { "2", NULL }, + { "3", NULL }, + { "4", NULL }, + { "5", NULL }, + { "6", NULL }, + { "7", NULL }, + { "8", NULL }, + { "9", NULL }, + { "10", NULL }, + { "11", NULL }, + { "12", NULL }, + { "13", NULL }, + { "14", NULL }, + { "15", NULL }, + { "16", NULL }, + { "17", NULL }, + { "18", NULL }, + { "19", NULL }, + { "20", NULL }, + { "21", NULL }, + { "22", NULL }, + { "23", NULL }, + { "24", NULL }, + { "25", NULL }, + { "26", NULL }, + { "27", NULL }, + { "28", NULL }, + { "29", NULL }, + { "30", NULL }, + { "31", NULL }, + { "32", NULL }, + { "33", NULL }, + { "34", NULL }, + { "35", NULL }, + { "36", NULL }, + { "37", NULL }, + { "38", NULL }, + { "39", NULL }, + { "40", NULL }, + { NULL, NULL }, + }, + "0", + }, + { + "pcsx_rearmed_konamigunadjusty", + "Konami Gun Y Axis Offset", + NULL, + "Apply a Y axis offset to light gun input when emulating a Konami Gun (Hyper Blaster / Justifier) device. Can be used to correct aiming misalignments.", + NULL, + "input", + { + { "-40", NULL }, + { "-39", NULL }, + { "-38", NULL }, + { "-37", NULL }, + { "-36", NULL }, + { "-35", NULL }, + { "-34", NULL }, + { "-33", NULL }, + { "-32", NULL }, + { "-31", NULL }, + { "-30", NULL }, + { "-29", NULL }, + { "-28", NULL }, + { "-27", NULL }, + { "-26", NULL }, + { "-25", NULL }, + { "-24", NULL }, + { "-23", NULL }, + { "-22", NULL }, + { "-21", NULL }, + { "-20", NULL }, + { "-19", NULL }, + { "-18", NULL }, + { "-17", NULL }, + { "-16", NULL }, + { "-15", NULL }, + { "-14", NULL }, + { "-13", NULL }, + { "-12", NULL }, + { "-11", NULL }, + { "-10", NULL }, + { "-9", NULL }, + { "-8", NULL }, + { "-7", NULL }, + { "-6", NULL }, + { "-5", NULL }, + { "-4", NULL }, + { "-3", NULL }, + { "-2", NULL }, + { "-1", NULL }, + { "0", NULL }, + { "1", NULL }, + { "2", NULL }, + { "3", NULL }, + { "4", NULL }, + { "5", NULL }, + { "6", NULL }, + { "7", NULL }, + { "8", NULL }, + { "9", NULL }, + { "10", NULL }, + { "11", NULL }, + { "12", NULL }, + { "13", NULL }, + { "14", NULL }, + { "15", NULL }, + { "16", NULL }, + { "17", NULL }, + { "18", NULL }, + { "19", NULL }, + { "20", NULL }, + { "21", NULL }, + { "22", NULL }, + { "23", NULL }, + { "24", NULL }, + { "25", NULL }, + { "26", NULL }, + { "27", NULL }, + { "28", NULL }, + { "29", NULL }, + { "30", NULL }, + { "31", NULL }, + { "32", NULL }, + { "33", NULL }, + { "34", NULL }, + { "35", NULL }, + { "36", NULL }, + { "37", NULL }, + { "38", NULL }, + { "39", NULL }, + { "40", NULL }, + { NULL, NULL }, + }, + "0", + }, { "pcsx_rearmed_gunconadjustx", "Guncon X Axis Offset", @@ -967,6 +1187,21 @@ struct retro_core_option_v2_definition option_defs_us[] = { NULL, "input", { + { "-40", NULL }, + { "-39", NULL }, + { "-38", NULL }, + { "-37", NULL }, + { "-36", NULL }, + { "-35", NULL }, + { "-34", NULL }, + { "-33", NULL }, + { "-32", NULL }, + { "-31", NULL }, + { "-30", NULL }, + { "-29", NULL }, + { "-28", NULL }, + { "-27", NULL }, + { "-26", NULL }, { "-25", NULL }, { "-24", NULL }, { "-23", NULL }, @@ -1018,6 +1253,21 @@ struct retro_core_option_v2_definition option_defs_us[] = { { "23", NULL }, { "24", NULL }, { "25", NULL }, + { "26", NULL }, + { "27", NULL }, + { "28", NULL }, + { "29", NULL }, + { "30", NULL }, + { "31", NULL }, + { "32", NULL }, + { "33", NULL }, + { "34", NULL }, + { "35", NULL }, + { "36", NULL }, + { "37", NULL }, + { "38", NULL }, + { "39", NULL }, + { "40", NULL }, { NULL, NULL }, }, "0", @@ -1030,6 +1280,21 @@ struct retro_core_option_v2_definition option_defs_us[] = { NULL, "input", { + { "-40", NULL }, + { "-39", NULL }, + { "-38", NULL }, + { "-37", NULL }, + { "-36", NULL }, + { "-35", NULL }, + { "-34", NULL }, + { "-33", NULL }, + { "-32", NULL }, + { "-31", NULL }, + { "-30", NULL }, + { "-29", NULL }, + { "-28", NULL }, + { "-27", NULL }, + { "-26", NULL }, { "-25", NULL }, { "-24", NULL }, { "-23", NULL }, @@ -1081,6 +1346,21 @@ struct retro_core_option_v2_definition option_defs_us[] = { { "23", NULL }, { "24", NULL }, { "25", NULL }, + { "26", NULL }, + { "27", NULL }, + { "28", NULL }, + { "29", NULL }, + { "30", NULL }, + { "31", NULL }, + { "32", NULL }, + { "33", NULL }, + { "34", NULL }, + { "35", NULL }, + { "36", NULL }, + { "37", NULL }, + { "38", NULL }, + { "39", NULL }, + { "40", NULL }, { NULL, NULL }, }, "0", diff --git a/frontend/plugin.c b/frontend/plugin.c index 2f8dcc2f9..a0942df4f 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -76,7 +76,7 @@ static long CALLBACK PADreadPort1(PadDataS *pad) { pad->portMultitap = multitap1; - if (in_type[pad_index] == PSE_PAD_TYPE_ANALOGJOY || in_type[pad_index] == PSE_PAD_TYPE_ANALOGPAD || in_type[pad_index] == PSE_PAD_TYPE_NEGCON || in_type[pad_index] == PSE_PAD_TYPE_GUNCON) + if (in_type[pad_index] == PSE_PAD_TYPE_ANALOGJOY || in_type[pad_index] == PSE_PAD_TYPE_ANALOGPAD || in_type[pad_index] == PSE_PAD_TYPE_NEGCON || in_type[pad_index] == PSE_PAD_TYPE_GUNCON || in_type[pad_index] == PSE_PAD_TYPE_GUN) { pad->leftJoyX = in_analog_left[pad_index][0]; pad->leftJoyY = in_analog_left[pad_index][1]; @@ -104,7 +104,7 @@ static long CALLBACK PADreadPort2(PadDataS *pad) { pad->portMultitap = multitap2; - if (in_type[pad_index] == PSE_PAD_TYPE_ANALOGJOY || in_type[pad_index] == PSE_PAD_TYPE_ANALOGPAD || in_type[pad_index] == PSE_PAD_TYPE_NEGCON || in_type[pad_index] == PSE_PAD_TYPE_GUNCON) + if (in_type[pad_index] == PSE_PAD_TYPE_ANALOGJOY || in_type[pad_index] == PSE_PAD_TYPE_ANALOGPAD || in_type[pad_index] == PSE_PAD_TYPE_NEGCON || in_type[pad_index] == PSE_PAD_TYPE_GUNCON || in_type[pad_index] == PSE_PAD_TYPE_GUN) { pad->leftJoyX = in_analog_left[pad_index][0]; pad->leftJoyY = in_analog_left[pad_index][1]; From bf58ac4cd633bbd3c40bd7f79f9e00b98e156572 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 13 Oct 2023 18:56:41 +0300 Subject: [PATCH 406/597] bigendian again libretro/pcsx_rearmed#777 --- libpcsxcore/psxhw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index 254693e19..f889a53b3 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -94,11 +94,11 @@ u32 psxHwReadGpuSR(void) // meh2, syncing for img bit, might want to avoid it.. gpuSyncPluginSR(); - v = HW_GPU_STATUS; + v = SWAP32(HW_GPU_STATUS); // XXX: because of large timeslices can't use hSyncCount, using rough // approximization instead. Perhaps better use hcounter code here or something. - if (hSyncCount < 240 && (HW_GPU_STATUS & PSXGPU_ILACE_BITS) != PSXGPU_ILACE_BITS) + if (hSyncCount < 240 && (v & PSXGPU_ILACE_BITS) != PSXGPU_ILACE_BITS) v |= PSXGPU_LCF & (psxRegs.cycle << 20); return v; } From 9a0a61d27586bfb93aa443cc59d9588d2b9cf992 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 13 Oct 2023 23:19:24 +0300 Subject: [PATCH 407/597] eliminate event code duplication --- Makefile | 5 +- libpcsxcore/cdrom.c | 41 +++------ libpcsxcore/gpu.c | 2 +- libpcsxcore/mdec.c | 4 +- libpcsxcore/misc.c | 1 + libpcsxcore/new_dynarec/emu_if.c | 19 +---- libpcsxcore/new_dynarec/events.h | 7 -- libpcsxcore/psxbios.c | 2 +- libpcsxcore/psxcounters.c | 4 +- libpcsxcore/psxdma.c | 15 ++-- libpcsxcore/psxdma.h | 43 +--------- .../{new_dynarec/events.c => psxevents.c} | 25 ++++-- libpcsxcore/psxevents.h | 52 ++++++++++++ libpcsxcore/psxhw.c | 3 +- libpcsxcore/r3000a.c | 85 +------------------ libpcsxcore/r3000a.h | 35 -------- libpcsxcore/sio.c | 26 +++--- libpcsxcore/spu.c | 6 +- 18 files changed, 123 insertions(+), 252 deletions(-) delete mode 100644 libpcsxcore/new_dynarec/events.h rename libpcsxcore/{new_dynarec/events.c => psxevents.c} (79%) create mode 100644 libpcsxcore/psxevents.h diff --git a/Makefile b/Makefile index 103cbb791..1d57f2cb8 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,8 @@ OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cheat.o libpcsxcore libpcsxcore/decode_xa.o libpcsxcore/mdec.o \ libpcsxcore/misc.o libpcsxcore/plugins.o libpcsxcore/ppf.o libpcsxcore/psxbios.o \ libpcsxcore/psxcommon.o libpcsxcore/psxcounters.o libpcsxcore/psxdma.o \ - libpcsxcore/psxhw.o libpcsxcore/psxinterpreter.o libpcsxcore/psxmem.o libpcsxcore/r3000a.o \ + libpcsxcore/psxhw.o libpcsxcore/psxinterpreter.o libpcsxcore/psxmem.o \ + libpcsxcore/psxevents.o libpcsxcore/r3000a.o \ libpcsxcore/sio.o libpcsxcore/spu.o libpcsxcore/gpu.o OBJS += libpcsxcore/gte.o libpcsxcore/gte_nf.o libpcsxcore/gte_divider.o #OBJS += libpcsxcore/debug.o libpcsxcore/socket.o libpcsxcore/disr3000a.o @@ -75,7 +76,7 @@ OBJS += libpcsxcore/new_dynarec/pcsxmem.o else CFLAGS += -DDRC_DISABLE endif -OBJS += libpcsxcore/new_dynarec/emu_if.o libpcsxcore/new_dynarec/events.o +OBJS += libpcsxcore/new_dynarec/emu_if.o libpcsxcore/new_dynarec/new_dynarec.o: libpcsxcore/new_dynarec/pcsxmem_inline.c ifdef DRC_DBG libpcsxcore/new_dynarec/emu_if.o: CFLAGS += -D_FILE_OFFSET_BITS=64 diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 5404c469e..047172ce6 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -26,6 +26,7 @@ #include "misc.h" #include "ppf.h" #include "psxdma.h" +#include "psxevents.h" #include "arm_features.h" /* logging */ @@ -242,14 +243,6 @@ static void sec2msf(unsigned int s, u8 *msf) { msf[2] = s; } -// cdrInterrupt -#define CDR_INT(eCycle) { \ - psxRegs.interrupt |= (1 << PSXINT_CDR); \ - psxRegs.intCycle[PSXINT_CDR].cycle = eCycle; \ - psxRegs.intCycle[PSXINT_CDR].sCycle = psxRegs.cycle; \ - new_dyna_set_event(PSXINT_CDR, eCycle); \ -} - // cdrPlayReadInterrupt #define CDRPLAYREAD_INT(eCycle, isFirst) { \ u32 e_ = eCycle; \ @@ -259,15 +252,7 @@ static void sec2msf(unsigned int s, u8 *msf) { else \ psxRegs.intCycle[PSXINT_CDREAD].sCycle += psxRegs.intCycle[PSXINT_CDREAD].cycle; \ psxRegs.intCycle[PSXINT_CDREAD].cycle = e_; \ - new_dyna_set_event_abs(PSXINT_CDREAD, psxRegs.intCycle[PSXINT_CDREAD].sCycle + e_); \ -} - -// cdrLidSeekInterrupt -#define CDRLID_INT(eCycle) { \ - psxRegs.interrupt |= (1 << PSXINT_CDRLID); \ - psxRegs.intCycle[PSXINT_CDRLID].cycle = eCycle; \ - psxRegs.intCycle[PSXINT_CDRLID].sCycle = psxRegs.cycle; \ - new_dyna_set_event(PSXINT_CDRLID, eCycle); \ + set_event_raw_abs(PSXINT_CDREAD, psxRegs.intCycle[PSXINT_CDREAD].sCycle + e_); \ } #define StopReading() { \ @@ -331,7 +316,7 @@ void cdrLidSeekInterrupt(void) { memset(cdr.Prev, 0xff, sizeof(cdr.Prev)); cdr.DriveState = DRIVESTATE_LID_OPEN; - CDRLID_INT(0x800); + set_event(PSXINT_CDRLID, 0x800); } break; @@ -347,7 +332,7 @@ void cdrLidSeekInterrupt(void) // only sometimes does that // (not done when lots of commands are sent?) - CDRLID_INT(cdReadTime * 30); + set_event(PSXINT_CDRLID, cdReadTime * 30); break; } else if (cdr.StatP & STATUS_ROTATING) { @@ -361,12 +346,12 @@ void cdrLidSeekInterrupt(void) // and is only cleared by CdlNop cdr.DriveState = DRIVESTATE_RESCAN_CD; - CDRLID_INT(cdReadTime * 105); + set_event(PSXINT_CDRLID, cdReadTime * 105); break; } // recheck for close - CDRLID_INT(cdReadTime * 3); + set_event(PSXINT_CDRLID, cdReadTime * 3); break; case DRIVESTATE_RESCAN_CD: @@ -375,7 +360,7 @@ void cdrLidSeekInterrupt(void) // this is very long on real hardware, over 6 seconds // make it a bit faster here... - CDRLID_INT(cdReadTime * 150); + set_event(PSXINT_CDRLID, cdReadTime * 150); break; case DRIVESTATE_PREPARE_CD: @@ -385,7 +370,7 @@ void cdrLidSeekInterrupt(void) } else { SetPlaySeekRead(cdr.StatP, STATUS_SEEK); - CDRLID_INT(cdReadTime * 26); + set_event(PSXINT_CDRLID, cdReadTime * 26); } break; } @@ -1142,7 +1127,7 @@ void cdrInterrupt(void) { // yes, it really sets STATUS_SHELLOPEN cdr.StatP |= STATUS_SHELLOPEN; cdr.DriveState = DRIVESTATE_RESCAN_CD; - CDRLID_INT(20480); + set_event(PSXINT_CDRLID, 20480); start_rotating = 1; break; @@ -1220,7 +1205,7 @@ void cdrInterrupt(void) { if (second_resp_time) { cdr.CmdInProgress = Cmd | 0x100; - CDR_INT(second_resp_time); + set_event(PSXINT_CDR, second_resp_time); } else if (cdr.Cmd && cdr.Cmd != (Cmd & 0xff)) { cdr.CmdInProgress = cdr.Cmd; @@ -1469,7 +1454,7 @@ void cdrWrite1(unsigned char rt) { if (!cdr.CmdInProgress) { cdr.CmdInProgress = rt; // should be something like 12k + controller delays - CDR_INT(5000); + set_event(PSXINT_CDR, 5000); } else { CDR_LOG_I("cmd while busy: %02x, prev %02x, busy %02x\n", @@ -1550,7 +1535,7 @@ void cdrWrite3(unsigned char rt) { c = 2048 - (psxRegs.cycle - nextCycle); c = MAX_VALUE(c, 512); } - CDR_INT(c); + set_event(PSXINT_CDR, c); } } cdr.Stat &= ~rt; @@ -1640,7 +1625,7 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { } psxCpu->Clear(madr, cdsize / 4); - CDRDMA_INT((cdsize/4) * 24); + set_event(PSXINT_CDRDMA, (cdsize / 4) * 24); HW_DMA3_CHCR &= SWAPu32(~0x10000000); if (chcr & 0x100) { diff --git a/libpcsxcore/gpu.c b/libpcsxcore/gpu.c index 1eadf59c6..2416405e5 100644 --- a/libpcsxcore/gpu.c +++ b/libpcsxcore/gpu.c @@ -26,7 +26,7 @@ void gpu_state_change(int what) break; case PGS_PRIMITIVE_START: HW_GPU_STATUS &= ~SWAP32(PSXGPU_nBUSY); - GPUDMA_INT(200); // see gpuInterrupt + set_event(PSXINT_GPUDMA, 200); // see gpuInterrupt break; } } diff --git a/libpcsxcore/mdec.c b/libpcsxcore/mdec.c index 612fe974a..c0f2cfd70 100644 --- a/libpcsxcore/mdec.c +++ b/libpcsxcore/mdec.c @@ -525,7 +525,7 @@ void psxDma0(u32 adr, u32 bcr, u32 chcr) { break; } - MDECINDMA_INT(size); + set_event(PSXINT_MDECINDMA, size); } void mdec0Interrupt() @@ -625,7 +625,7 @@ void psxDma1(u32 adr, u32 bcr, u32 chcr) { } /* define the power of mdec */ - MDECOUTDMA_INT(words * MDEC_BIAS); + set_event(PSXINT_MDECOUTDMA, words * MDEC_BIAS); /* some CPU stalling */ psxRegs.cycle += words; } diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 8997c0b5a..f332f4377 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -769,6 +769,7 @@ int LoadState(const char *file) { new_dyna_freeze(f, 0); padFreeze(f, 0); + events_restore(); if (Config.HLE) psxBiosCheckExe(biosBranchCheckOld, 0x60, 1); diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 9d8df341e..6c1b48c59 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -9,10 +9,10 @@ #include "emu_if.h" #include "pcsxmem.h" -#include "events.h" #include "../psxhle.h" #include "../psxinterpreter.h" #include "../psxcounters.h" +#include "../psxevents.h" #include "../r3000a.h" #include "../gte_arm.h" #include "../gte_neon.h" @@ -41,19 +41,6 @@ void pcsx_mtc0_ds(u32 reg, u32 val) MTC0(&psxRegs, reg, val); } -static void new_dyna_restore(void) -{ - int i; - for (i = 0; i < PSXINT_COUNT; i++) - event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle; - - event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter; - psxRegs.interrupt |= 1 << PSXINT_RCNT; - psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1; - - new_dyna_pcsx_mem_load_state(); -} - void new_dyna_freeze(void *f, int mode) { const char header_save[8] = "ariblks"; @@ -72,7 +59,7 @@ void new_dyna_freeze(void *f, int mode) SaveFuncs.write(f, addrs, size); } else { - new_dyna_restore(); + new_dyna_pcsx_mem_load_state(); bytes = SaveFuncs.read(f, header, sizeof(header)); if (bytes != sizeof(header) || strcmp(header, header_save)) { @@ -241,7 +228,7 @@ static void ari64_reset() { new_dyna_pcsx_mem_reset(); new_dynarec_invalidate_all_pages(); - new_dyna_restore(); + new_dyna_pcsx_mem_load_state(); pending_exception = 1; } diff --git a/libpcsxcore/new_dynarec/events.h b/libpcsxcore/new_dynarec/events.h deleted file mode 100644 index 5f57f3748..000000000 --- a/libpcsxcore/new_dynarec/events.h +++ /dev/null @@ -1,7 +0,0 @@ -#include "../psxcommon.h" - -extern int stop; - -union psxCP0Regs_; -u32 schedule_timeslice(void); -void gen_interupt(union psxCP0Regs_ *cp0); diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 11011d3dc..af3c55fb0 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -36,7 +36,7 @@ #include "sio.h" #include "psxhle.h" #include "psxinterpreter.h" -#include "new_dynarec/events.h" +#include "psxevents.h" #include #ifndef PSXBIOS_LOG diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index 02191c712..d0d45ec5c 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -22,6 +22,7 @@ */ #include "psxcounters.h" +#include "psxevents.h" #include "gpu.h" //#include "debug.h" #define DebugVSync() @@ -232,8 +233,7 @@ void psxRcntSet() } } - psxRegs.interrupt |= (1 << PSXINT_RCNT); - new_dyna_set_event(PSXINT_RCNT, psxNextCounter); + set_event(PSXINT_RCNT, psxNextCounter); } /******************************************************************************/ diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index 0ffec81c2..24570968d 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -67,7 +67,7 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU // This should be much slower, like 12+ cycles/byte, it's like // that because the CPU runs too fast and fifo is not emulated. // See also set_dma_end(). - SPUDMA_INT(words * 4); + set_event(PSXINT_SPUDMA, words * 4); return; case 0x01000200: //spu to cpu transfer @@ -78,7 +78,7 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU psxCpu->Clear(madr, words_copy); HW_DMA4_MADR = SWAPu32(madr + words_copy * 4); - SPUDMA_INT(words * 4); + set_event(PSXINT_SPUDMA, words * 4); return; default: @@ -159,7 +159,7 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU // careful: gpu_state_change() also messes with this HW_GPU_STATUS &= SWAP32(~PSXGPU_nBUSY); // already 32-bit word size ((size * 4) / 4) - GPUDMA_INT(words / 4); + set_event(PSXINT_GPUDMA, words / 4); return; case 0x01000201: // mem2vram @@ -182,7 +182,7 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU // careful: gpu_state_change() also messes with this HW_GPU_STATUS &= SWAP32(~PSXGPU_nBUSY); // already 32-bit word size ((size * 4) / 4) - GPUDMA_INT(words / 4); + set_event(PSXINT_GPUDMA, words / 4); return; case 0x01000401: // dma chain @@ -206,7 +206,7 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU // Einhander = parse linked list in pieces (todo) // Rebel Assault 2 = parse linked list in pieces (todo) - GPUDMA_INT(size); + set_event(PSXINT_GPUDMA, size); return; default: @@ -226,7 +226,7 @@ void gpuInterrupt() { u32 size, madr_next = 0xffffff; size = GPU_dmaChain((u32 *)psxM, HW_DMA2_MADR & 0x1fffff, &madr_next); HW_DMA2_MADR = SWAPu32(madr_next); - GPUDMA_INT(size); + set_event(PSXINT_GPUDMA, size); return; } if (HW_DMA2_CHCR & SWAP32(0x01000000)) @@ -261,10 +261,9 @@ void psxDma6(u32 madr, u32 bcr, u32 chcr) { } *++mem = SWAP32(0xffffff); - //GPUOTCDMA_INT(size); // halted psxRegs.cycle += words; - GPUOTCDMA_INT(16); + set_event(PSXINT_GPUOTCDMA, 16); return; } else { diff --git a/libpcsxcore/psxdma.h b/libpcsxcore/psxdma.h index eaddb3889..5c0ab4e6b 100644 --- a/libpcsxcore/psxdma.h +++ b/libpcsxcore/psxdma.h @@ -28,48 +28,7 @@ extern "C" { #include "r3000a.h" #include "psxhw.h" #include "psxmem.h" - -#define GPUDMA_INT(eCycle) { \ - psxRegs.interrupt |= (1 << PSXINT_GPUDMA); \ - psxRegs.intCycle[PSXINT_GPUDMA].cycle = eCycle; \ - psxRegs.intCycle[PSXINT_GPUDMA].sCycle = psxRegs.cycle; \ - new_dyna_set_event(PSXINT_GPUDMA, eCycle); \ -} - -#define SPUDMA_INT(eCycle) { \ - psxRegs.interrupt |= (1 << PSXINT_SPUDMA); \ - psxRegs.intCycle[PSXINT_SPUDMA].cycle = eCycle; \ - psxRegs.intCycle[PSXINT_SPUDMA].sCycle = psxRegs.cycle; \ - new_dyna_set_event(PSXINT_SPUDMA, eCycle); \ -} - -#define MDECOUTDMA_INT(eCycle) { \ - psxRegs.interrupt |= (1 << PSXINT_MDECOUTDMA); \ - psxRegs.intCycle[PSXINT_MDECOUTDMA].cycle = eCycle; \ - psxRegs.intCycle[PSXINT_MDECOUTDMA].sCycle = psxRegs.cycle; \ - new_dyna_set_event(PSXINT_MDECOUTDMA, eCycle); \ -} - -#define MDECINDMA_INT(eCycle) { \ - psxRegs.interrupt |= (1 << PSXINT_MDECINDMA); \ - psxRegs.intCycle[PSXINT_MDECINDMA].cycle = eCycle; \ - psxRegs.intCycle[PSXINT_MDECINDMA].sCycle = psxRegs.cycle; \ - new_dyna_set_event(PSXINT_MDECINDMA, eCycle); \ -} - -#define GPUOTCDMA_INT(eCycle) { \ - psxRegs.interrupt |= (1 << PSXINT_GPUOTCDMA); \ - psxRegs.intCycle[PSXINT_GPUOTCDMA].cycle = eCycle; \ - psxRegs.intCycle[PSXINT_GPUOTCDMA].sCycle = psxRegs.cycle; \ - new_dyna_set_event(PSXINT_GPUOTCDMA, eCycle); \ -} - -#define CDRDMA_INT(eCycle) { \ - psxRegs.interrupt |= (1 << PSXINT_CDRDMA); \ - psxRegs.intCycle[PSXINT_CDRDMA].cycle = eCycle; \ - psxRegs.intCycle[PSXINT_CDRDMA].sCycle = psxRegs.cycle; \ - new_dyna_set_event(PSXINT_CDRDMA, eCycle); \ -} +#include "psxevents.h" void psxDma2(u32 madr, u32 bcr, u32 chcr); void psxDma3(u32 madr, u32 bcr, u32 chcr); diff --git a/libpcsxcore/new_dynarec/events.c b/libpcsxcore/psxevents.c similarity index 79% rename from libpcsxcore/new_dynarec/events.c rename to libpcsxcore/psxevents.c index 2bc93326f..06089f2ea 100644 --- a/libpcsxcore/new_dynarec/events.c +++ b/libpcsxcore/psxevents.c @@ -1,9 +1,9 @@ #include -#include "../r3000a.h" -#include "../cdrom.h" -#include "../psxdma.h" -#include "../mdec.h" -#include "events.h" +#include "r3000a.h" +#include "cdrom.h" +#include "psxdma.h" +#include "mdec.h" +#include "psxevents.h" extern int pending_exception; @@ -31,6 +31,9 @@ u32 schedule_timeslice(void) return next_interupt; } +static void irqNoOp() { +} + typedef void (irq_func)(); static irq_func * const irq_funcs[] = { @@ -43,6 +46,7 @@ static irq_func * const irq_funcs[] = { [PSXINT_MDECINDMA] = mdec0Interrupt, [PSXINT_GPUOTCDMA] = gpuotcInterrupt, [PSXINT_CDRDMA] = cdrDmaInterrupt, + [PSXINT_NEWDRC_CHECK] = irqNoOp, [PSXINT_CDRLID] = cdrLidSeekInterrupt, [PSXINT_IRQ10] = irq10Interrupt, [PSXINT_SPU_UPDATE] = spuUpdate, @@ -50,7 +54,7 @@ static irq_func * const irq_funcs[] = { }; /* local dupe of psxBranchTest, using event_cycles */ -static void irq_test(psxCP0Regs *cp0) +void irq_test(psxCP0Regs *cp0) { u32 cycle = psxRegs.cycle; u32 irq, irq_bits; @@ -88,4 +92,13 @@ void gen_interupt(psxCP0Regs *cp0) next_interupt, next_interupt - psxRegs.cycle); } +void events_restore(void) +{ + int i; + for (i = 0; i < PSXINT_COUNT; i++) + event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle; + event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter; + psxRegs.interrupt |= 1 << PSXINT_RCNT; + psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1; +} diff --git a/libpcsxcore/psxevents.h b/libpcsxcore/psxevents.h new file mode 100644 index 000000000..2f84a4a5a --- /dev/null +++ b/libpcsxcore/psxevents.h @@ -0,0 +1,52 @@ +#ifndef __PSXEVENTS_H__ +#define __PSXEVENTS_H__ + +#include "psxcommon.h" + +enum { + PSXINT_SIO = 0, // sioInterrupt + PSXINT_CDR, // cdrInterrupt + PSXINT_CDREAD, // cdrPlayReadInterrupt + PSXINT_GPUDMA, // gpuInterrupt + PSXINT_MDECOUTDMA, // mdec1Interrupt + PSXINT_SPUDMA, // spuInterrupt + PSXINT_UNUSED, // + PSXINT_MDECINDMA, // mdec0Interrupt + PSXINT_GPUOTCDMA, // gpuotcInterrupt + PSXINT_CDRDMA, // cdrDmaInterrupt + PSXINT_NEWDRC_CHECK, // (none) + PSXINT_RCNT, // psxRcntUpdate + PSXINT_CDRLID, // cdrLidSeekInterrupt + PSXINT_IRQ10, // irq10Interrupt + PSXINT_SPU_UPDATE, // spuUpdate + PSXINT_COUNT +}; + +extern u32 event_cycles[PSXINT_COUNT]; +extern u32 next_interupt; +extern int stop; + +#define set_event_raw_abs(e, abs) { \ + u32 abs_ = abs; \ + s32 di_ = next_interupt - abs_; \ + event_cycles[e] = abs_; \ + if (di_ > 0) { \ + /*printf("%u: next_interupt %u -> %u\n", psxRegs.cycle, next_interupt, abs_);*/ \ + next_interupt = abs_; \ + } \ +} + +#define set_event(e, c) do { \ + psxRegs.interrupt |= (1 << (e)); \ + psxRegs.intCycle[e].cycle = c; \ + psxRegs.intCycle[e].sCycle = psxRegs.cycle; \ + set_event_raw_abs(e, psxRegs.cycle + (c)) \ +} while (0) + +union psxCP0Regs_; +u32 schedule_timeslice(void); +void irq_test(union psxCP0Regs_ *cp0); +void gen_interupt(union psxCP0Regs_ *cp0); +void events_restore(void); + +#endif // __PSXEVENTS_H__ diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index f889a53b3..8be775bca 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -22,6 +22,7 @@ */ #include "psxhw.h" +#include "psxevents.h" #include "mdec.h" #include "cdrom.h" #include "gpu.h" @@ -62,7 +63,7 @@ void psxHwWriteImask(u32 value) if (stat & value) { //if ((psxRegs.CP0.n.SR & 0x401) == 0x401) // log_unhandled("irq on unmask @%08x\n", psxRegs.pc); - new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); + set_event(PSXINT_NEWDRC_CHECK, 1); } psxRegs.CP0.n.Cause &= ~0x400; if (stat & value) diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 8035dfd13..f0a0ddce1 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -27,6 +27,7 @@ #include "gte.h" #include "psxinterpreter.h" #include "psxbios.h" +#include "psxevents.h" #include "../include/compiler_features.h" R3000Acpu *psxCpu = NULL; @@ -131,87 +132,9 @@ void psxBranchTest() { if ((psxRegs.cycle - psxNextsCounter) >= psxNextCounter) psxRcntUpdate(); - if (psxRegs.interrupt) { - if ((psxRegs.interrupt & (1 << PSXINT_SIO))) { // sio - if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_SIO].sCycle) >= psxRegs.intCycle[PSXINT_SIO].cycle) { - psxRegs.interrupt &= ~(1 << PSXINT_SIO); - sioInterrupt(); - } - } - if (psxRegs.interrupt & (1 << PSXINT_CDR)) { // cdr - if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_CDR].sCycle) >= psxRegs.intCycle[PSXINT_CDR].cycle) { - psxRegs.interrupt &= ~(1 << PSXINT_CDR); - cdrInterrupt(); - } - } - if (psxRegs.interrupt & (1 << PSXINT_CDREAD)) { // cdr read - if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_CDREAD].sCycle) >= psxRegs.intCycle[PSXINT_CDREAD].cycle) { - psxRegs.interrupt &= ~(1 << PSXINT_CDREAD); - cdrPlayReadInterrupt(); - } - } - if (psxRegs.interrupt & (1 << PSXINT_GPUDMA)) { // gpu dma - if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_GPUDMA].sCycle) >= psxRegs.intCycle[PSXINT_GPUDMA].cycle) { - psxRegs.interrupt &= ~(1 << PSXINT_GPUDMA); - gpuInterrupt(); - } - } - if (psxRegs.interrupt & (1 << PSXINT_MDECOUTDMA)) { // mdec out dma - if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_MDECOUTDMA].sCycle) >= psxRegs.intCycle[PSXINT_MDECOUTDMA].cycle) { - psxRegs.interrupt &= ~(1 << PSXINT_MDECOUTDMA); - mdec1Interrupt(); - } - } - if (psxRegs.interrupt & (1 << PSXINT_SPUDMA)) { // spu dma - if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_SPUDMA].sCycle) >= psxRegs.intCycle[PSXINT_SPUDMA].cycle) { - psxRegs.interrupt &= ~(1 << PSXINT_SPUDMA); - spuInterrupt(); - } - } - if (psxRegs.interrupt & (1 << PSXINT_MDECINDMA)) { // mdec in - if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_MDECINDMA].sCycle) >= psxRegs.intCycle[PSXINT_MDECINDMA].cycle) { - psxRegs.interrupt &= ~(1 << PSXINT_MDECINDMA); - mdec0Interrupt(); - } - } - if (psxRegs.interrupt & (1 << PSXINT_GPUOTCDMA)) { // gpu otc - if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_GPUOTCDMA].sCycle) >= psxRegs.intCycle[PSXINT_GPUOTCDMA].cycle) { - psxRegs.interrupt &= ~(1 << PSXINT_GPUOTCDMA); - gpuotcInterrupt(); - } - } - if (psxRegs.interrupt & (1 << PSXINT_CDRDMA)) { // cdrom - if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_CDRDMA].sCycle) >= psxRegs.intCycle[PSXINT_CDRDMA].cycle) { - psxRegs.interrupt &= ~(1 << PSXINT_CDRDMA); - cdrDmaInterrupt(); - } - } - if (psxRegs.interrupt & (1 << PSXINT_CDRLID)) { // cdr lid states - if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_CDRLID].sCycle) >= psxRegs.intCycle[PSXINT_CDRLID].cycle) { - psxRegs.interrupt &= ~(1 << PSXINT_CDRLID); - cdrLidSeekInterrupt(); - } - } - if (psxRegs.interrupt & (1 << PSXINT_IRQ10)) { // irq10 - controller port pin8 - if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_IRQ10].sCycle) >= psxRegs.intCycle[PSXINT_IRQ10].cycle) { - psxRegs.interrupt &= ~(1 << PSXINT_IRQ10); - irq10Interrupt(); - } - } - if (psxRegs.interrupt & (1 << PSXINT_SPU_UPDATE)) { // scheduled spu update - if ((psxRegs.cycle - psxRegs.intCycle[PSXINT_SPU_UPDATE].sCycle) >= psxRegs.intCycle[PSXINT_SPU_UPDATE].cycle) { - psxRegs.interrupt &= ~(1 << PSXINT_SPU_UPDATE); - spuUpdate(); - } - } - } + irq_test(&psxRegs.CP0); - psxRegs.CP0.n.Cause &= ~0x400; - if (psxHu32(0x1070) & psxHu32(0x1074)) - psxRegs.CP0.n.Cause |= 0x400; - if (((psxRegs.CP0.n.Cause | 1) & psxRegs.CP0.n.SR & 0x401) == 0x401) - psxException(0, 0, &psxRegs.CP0); - else if (unlikely(psxRegs.pc == psxRegs.biosBranchCheck)) + if (unlikely(psxRegs.pc == psxRegs.biosBranchCheck)) psxBiosCheckBranch(); } @@ -267,7 +190,7 @@ static void psxScheduleIrq10One(u32 cycles_abs) { psxRegs.interrupt |= 1 << PSXINT_IRQ10; psxRegs.intCycle[PSXINT_IRQ10].cycle = c; psxRegs.intCycle[PSXINT_IRQ10].sCycle = rcnts[3].cycleStart; - new_dyna_set_event_abs(PSXINT_IRQ10, cycles_abs); + set_event_raw_abs(PSXINT_IRQ10, cycles_abs); } void irq10Interrupt() { diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 157d31b98..4d2cfbd2f 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -163,25 +163,6 @@ typedef union { PAIR p[32]; } psxCP2Ctrl; -enum { - PSXINT_SIO = 0, - PSXINT_CDR, - PSXINT_CDREAD, - PSXINT_GPUDMA, - PSXINT_MDECOUTDMA, - PSXINT_SPUDMA, - PSXINT_GPUBUSY, - PSXINT_MDECINDMA, - PSXINT_GPUOTCDMA, - PSXINT_CDRDMA, - PSXINT_NEWDRC_CHECK, - PSXINT_RCNT, - PSXINT_CDRLID, - PSXINT_IRQ10, - PSXINT_SPU_UPDATE, - PSXINT_COUNT -}; - enum R3000Abdt { // corresponds to bits 31,30 of Cause reg R3000A_BRANCH_TAKEN = 3, @@ -231,24 +212,8 @@ typedef struct { extern psxRegisters psxRegs; /* new_dynarec stuff */ -extern u32 event_cycles[PSXINT_COUNT]; -extern u32 next_interupt; - void new_dyna_freeze(void *f, int mode); -#define new_dyna_set_event_abs(e, abs) { \ - u32 abs_ = abs; \ - s32 di_ = next_interupt - abs_; \ - event_cycles[e] = abs_; \ - if (di_ > 0) { \ - /*printf("%u: next_interupt %u -> %u\n", psxRegs.cycle, next_interupt, abs_);*/ \ - next_interupt = abs_; \ - } \ -} - -#define new_dyna_set_event(e, c) \ - new_dyna_set_event_abs(e, psxRegs.cycle + (c)) - int psxInit(); void psxReset(); void psxShutdown(); diff --git a/libpcsxcore/sio.c b/libpcsxcore/sio.c index 5d5019d24..0bc763ca6 100644 --- a/libpcsxcore/sio.c +++ b/libpcsxcore/sio.c @@ -23,6 +23,7 @@ #include "misc.h" #include "psxcounters.h" +#include "psxevents.h" #include "sio.h" #include @@ -73,13 +74,6 @@ static unsigned int padst; char Mcd1Data[MCD_SIZE], Mcd2Data[MCD_SIZE]; char McdDisable[2]; -#define SIO_INT(eCycle) { \ - psxRegs.interrupt |= (1 << PSXINT_SIO); \ - psxRegs.intCycle[PSXINT_SIO].cycle = eCycle; \ - psxRegs.intCycle[PSXINT_SIO].sCycle = psxRegs.cycle; \ - new_dyna_set_event(PSXINT_SIO, eCycle); \ -} - // clk cycle byte // 4us * 8bits = (PSXCLK / 1000000) * 32; (linuzappz) // TODO: add SioModePrescaler and BaudReg @@ -107,7 +101,7 @@ void sioWrite8(unsigned char value) { if (more_data) { bufcount = parp + 1; - SIO_INT(SIO_CYCLES); + set_event(PSXINT_SIO, SIO_CYCLES); } } else padst = 0; @@ -121,14 +115,14 @@ void sioWrite8(unsigned char value) { if (more_data) { bufcount = parp + 1; - SIO_INT(SIO_CYCLES); + set_event(PSXINT_SIO, SIO_CYCLES); } return; } switch (mcdst) { case 1: - SIO_INT(SIO_CYCLES); + set_event(PSXINT_SIO, SIO_CYCLES); if (rdwr) { parp++; return; } parp = 1; switch (value) { @@ -138,7 +132,7 @@ void sioWrite8(unsigned char value) { } return; case 2: // address H - SIO_INT(SIO_CYCLES); + set_event(PSXINT_SIO, SIO_CYCLES); adrH = value; *buf = 0; parp = 0; @@ -146,7 +140,7 @@ void sioWrite8(unsigned char value) { mcdst = 3; return; case 3: // address L - SIO_INT(SIO_CYCLES); + set_event(PSXINT_SIO, SIO_CYCLES); adrL = value; *buf = adrH; parp = 0; @@ -154,7 +148,7 @@ void sioWrite8(unsigned char value) { mcdst = 4; return; case 4: - SIO_INT(SIO_CYCLES); + set_event(PSXINT_SIO, SIO_CYCLES); parp = 0; switch (rdwr) { case 1: // read @@ -204,7 +198,7 @@ void sioWrite8(unsigned char value) { if (rdwr == 2) { if (parp < 128) buf[parp + 1] = value; } - SIO_INT(SIO_CYCLES); + set_event(PSXINT_SIO, SIO_CYCLES); return; } @@ -219,7 +213,7 @@ void sioWrite8(unsigned char value) { bufcount = 1; parp = 0; padst = 1; - SIO_INT(SIO_CYCLES); + set_event(PSXINT_SIO, SIO_CYCLES); return; case 0x81: // start memcard if (CtrlReg & 0x2000) @@ -239,7 +233,7 @@ void sioWrite8(unsigned char value) { bufcount = 3; mcdst = 1; rdwr = 0; - SIO_INT(SIO_CYCLES); + set_event(PSXINT_SIO, SIO_CYCLES); return; default: no_device: diff --git a/libpcsxcore/spu.c b/libpcsxcore/spu.c index 90d2f4dbd..69d65bef7 100644 --- a/libpcsxcore/spu.c +++ b/libpcsxcore/spu.c @@ -22,6 +22,7 @@ */ #include "spu.h" +#include "psxevents.h" void CALLBACK SPUirq(void) { psxHu32ref(0x1070) |= SWAPu32(0x200); @@ -29,10 +30,7 @@ void CALLBACK SPUirq(void) { // spuUpdate void CALLBACK SPUschedule(unsigned int cycles_after) { - psxRegs.interrupt |= (1 << PSXINT_SPU_UPDATE); - psxRegs.intCycle[PSXINT_SPU_UPDATE].cycle = cycles_after; - psxRegs.intCycle[PSXINT_SPU_UPDATE].sCycle = psxRegs.cycle; - new_dyna_set_event(PSXINT_SPU_UPDATE, cycles_after); + set_event(PSXINT_SPU_UPDATE, cycles_after); } void spuUpdate() { From c2eee46bfb8a3fde297735a8b115330498d442b4 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 14 Oct 2023 19:18:01 +0300 Subject: [PATCH 408/597] spu: implement irq on dma notaz/pcsx_rearmed#295 --- frontend/plugin.c | 2 +- libpcsxcore/plugins.h | 2 +- libpcsxcore/psxevents.c | 1 + libpcsxcore/psxevents.h | 2 +- libpcsxcore/spu.c | 11 ++++++++++- libpcsxcore/spu.h | 3 ++- plugins/dfsound/dma.c | 23 +++++++++++++++-------- plugins/dfsound/externals.h | 2 +- plugins/dfsound/spu.c | 4 ++-- 9 files changed, 34 insertions(+), 16 deletions(-) diff --git a/frontend/plugin.c b/frontend/plugin.c index a0942df4f..e9dbcacb2 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -55,7 +55,7 @@ extern unsigned short CALLBACK SPUreadRegister(unsigned long, unsigned int); extern void CALLBACK SPUwriteDMAMem(unsigned short *, int, unsigned int); extern void CALLBACK SPUreadDMAMem(unsigned short *, int, unsigned int); extern void CALLBACK SPUplayADPCMchannel(void *, unsigned int, int); -extern void CALLBACK SPUregisterCallback(void (*cb)(void)); +extern void CALLBACK SPUregisterCallback(void (*cb)(int)); extern void CALLBACK SPUregisterScheduleCb(void (*cb)(unsigned int)); extern long CALLBACK SPUfreeze(unsigned int, void *, unsigned int); extern void CALLBACK SPUasync(unsigned int, unsigned int); diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index cb9b88a2c..d20866568 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -170,7 +170,7 @@ typedef unsigned short (CALLBACK* SPUreadRegister)(unsigned long, unsigned int); typedef void (CALLBACK* SPUwriteDMAMem)(unsigned short *, int, unsigned int); typedef void (CALLBACK* SPUreadDMAMem)(unsigned short *, int, unsigned int); typedef void (CALLBACK* SPUplayADPCMchannel)(xa_decode_t *, unsigned int, int); -typedef void (CALLBACK* SPUregisterCallback)(void (CALLBACK *callback)(void)); +typedef void (CALLBACK* SPUregisterCallback)(void (CALLBACK *callback)(int)); typedef void (CALLBACK* SPUregisterScheduleCb)(void (CALLBACK *callback)(unsigned int cycles_after)); typedef struct { unsigned char PluginName[8]; diff --git a/libpcsxcore/psxevents.c b/libpcsxcore/psxevents.c index 06089f2ea..28c1b5dfd 100644 --- a/libpcsxcore/psxevents.c +++ b/libpcsxcore/psxevents.c @@ -50,6 +50,7 @@ static irq_func * const irq_funcs[] = { [PSXINT_CDRLID] = cdrLidSeekInterrupt, [PSXINT_IRQ10] = irq10Interrupt, [PSXINT_SPU_UPDATE] = spuUpdate, + [PSXINT_SPU_IRQ] = spuDelayedIrq, [PSXINT_RCNT] = psxRcntUpdate, }; diff --git a/libpcsxcore/psxevents.h b/libpcsxcore/psxevents.h index 2f84a4a5a..1f1067efa 100644 --- a/libpcsxcore/psxevents.h +++ b/libpcsxcore/psxevents.h @@ -10,7 +10,7 @@ enum { PSXINT_GPUDMA, // gpuInterrupt PSXINT_MDECOUTDMA, // mdec1Interrupt PSXINT_SPUDMA, // spuInterrupt - PSXINT_UNUSED, // + PSXINT_SPU_IRQ, // spuDelayedIrq PSXINT_MDECINDMA, // mdec0Interrupt PSXINT_GPUOTCDMA, // gpuotcInterrupt PSXINT_CDRDMA, // cdrDmaInterrupt diff --git a/libpcsxcore/spu.c b/libpcsxcore/spu.c index 69d65bef7..56a1de3bb 100644 --- a/libpcsxcore/spu.c +++ b/libpcsxcore/spu.c @@ -24,7 +24,16 @@ #include "spu.h" #include "psxevents.h" -void CALLBACK SPUirq(void) { +void CALLBACK SPUirq(int cycles_after) { + if (cycles_after > 0) { + set_event(PSXINT_SPU_IRQ, cycles_after); + return; + } + + psxHu32ref(0x1070) |= SWAPu32(0x200); +} + +void spuDelayedIrq() { psxHu32ref(0x1070) |= SWAPu32(0x200); } diff --git a/libpcsxcore/spu.h b/libpcsxcore/spu.h index 44a35d5f6..6b8699b97 100644 --- a/libpcsxcore/spu.h +++ b/libpcsxcore/spu.h @@ -39,8 +39,9 @@ extern "C" { #define H_SPUoff1 0x0d8c #define H_SPUoff2 0x0d8e -void CALLBACK SPUirq(void); +void CALLBACK SPUirq(int cycles_after); void CALLBACK SPUschedule(unsigned int cycles_after); +void spuDelayedIrq(); void spuUpdate(); #ifdef __cplusplus diff --git a/plugins/dfsound/dma.c b/plugins/dfsound/dma.c index 1aebfce5f..13f9c269f 100644 --- a/plugins/dfsound/dma.c +++ b/plugins/dfsound/dma.c @@ -39,10 +39,10 @@ void CALLBACK SPUreadDMAMem(unsigned short *pusPSXMem, int iSize, unsigned int cycles) { unsigned int addr = spu.spuAddr, irq_addr = regAreaGet(H_SPUirqAddr) << 3; - int i, irq; + int i, irq_after; do_samples_if_needed(cycles, 1, 2); - irq = addr <= irq_addr && irq_addr < addr + iSize*2; + irq_after = (irq_addr - addr) & 0x7ffff; for(i = 0; i < iSize; i++) { @@ -50,8 +50,10 @@ void CALLBACK SPUreadDMAMem(unsigned short *pusPSXMem, int iSize, addr += 2; addr &= 0x7fffe; } - if (irq && (spu.spuCtrl & CTRL_IRQ)) + if ((spu.spuCtrl & CTRL_IRQ) && irq_after < iSize * 2) { log_unhandled("rdma spu irq: %x/%x+%x\n", irq_addr, spu.spuAddr, iSize * 2); + spu.irqCallback(irq_after); + } spu.spuAddr = addr; set_dma_end(iSize, cycles); } @@ -64,11 +66,11 @@ void CALLBACK SPUwriteDMAMem(unsigned short *pusPSXMem, int iSize, unsigned int cycles) { unsigned int addr = spu.spuAddr, irq_addr = regAreaGet(H_SPUirqAddr) << 3; - int i, irq; + int i, irq_after; do_samples_if_needed(cycles, 1, 2); + irq_after = (irq_addr - addr) & 0x7ffff; spu.bMemDirty = 1; - irq = addr <= irq_addr && irq_addr < addr + iSize*2; if (addr + iSize*2 < 0x80000) { @@ -77,7 +79,6 @@ void CALLBACK SPUwriteDMAMem(unsigned short *pusPSXMem, int iSize, } else { - irq |= irq_addr < ((addr + iSize*2) & 0x7ffff); for (i = 0; i < iSize; i++) { *(unsigned short *)(spu.spuMemC + addr) = *pusPSXMem++; @@ -85,10 +86,16 @@ void CALLBACK SPUwriteDMAMem(unsigned short *pusPSXMem, int iSize, addr &= 0x7fffe; } } - if (irq && (spu.spuCtrl & CTRL_IRQ)) // unhandled because need to implement delay - log_unhandled("wdma spu irq: %x/%x+%x\n", irq_addr, spu.spuAddr, iSize * 2); + if ((spu.spuCtrl & CTRL_IRQ) && irq_after < iSize * 2) { + log_unhandled("wdma spu irq: %x/%x+%x (%u)\n", + irq_addr, spu.spuAddr, iSize * 2, irq_after); + // this should be consistent with psxdma.c timing + // might also need more delay like in set_dma_end() + spu.irqCallback(irq_after); + } spu.spuAddr = addr; set_dma_end(iSize, cycles); } //////////////////////////////////////////////////////////////////////// +// vim:shiftwidth=1:expandtab diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index d752acf2e..4f48c65da 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -241,7 +241,7 @@ typedef struct int * SSumLR; - void (CALLBACK *irqCallback)(void); // func of main emu, called on spu irq + void (CALLBACK *irqCallback)(int); //void (CALLBACK *cddavCallback)(short, short); void (CALLBACK *scheduleCallback)(unsigned int); diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index f29ca4fa7..057502e43 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -203,7 +203,7 @@ static void do_irq(void) //if(!(spu.spuStat & STAT_IRQ)) { spu.spuStat |= STAT_IRQ; // asserted status? - if(spu.irqCallback) spu.irqCallback(); + if(spu.irqCallback) spu.irqCallback(0); } } @@ -1602,7 +1602,7 @@ long CALLBACK SPUshutdown(void) // SETUP CALLBACKS // this functions will be called once, // passes a callback that should be called on SPU-IRQ/cdda volume change -void CALLBACK SPUregisterCallback(void (CALLBACK *callback)(void)) +void CALLBACK SPUregisterCallback(void (CALLBACK *callback)(int)) { spu.irqCallback = callback; } From 86459dfc29c5c5f78c79d649d297a98d7212cc7f Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 14 Oct 2023 23:42:11 +0300 Subject: [PATCH 409/597] update vibration handling following the nocash description notaz/pcsx_rearmed#314 --- include/psemu_plugin_defs.h | 4 +++- libpcsxcore/plugins.c | 42 ++++++++++++++++++++++++------------- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/include/psemu_plugin_defs.h b/include/psemu_plugin_defs.h index c7a0d1c99..3f4d21b2a 100644 --- a/include/psemu_plugin_defs.h +++ b/include/psemu_plugin_defs.h @@ -229,12 +229,14 @@ typedef struct unsigned char cmd4dConfig[6]; unsigned int lastUseFrame; unsigned int digitalModeFrames; + unsigned char configModeUsed; + unsigned char padding[3]; } ds; unsigned char multitapLongModeEnabled; unsigned char padding2; unsigned char txData[34]; - unsigned char reserved[26]; + unsigned char reserved[22]; } PadDataS; /* NET PlugIn v2 */ diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index 6e5cdbfec..868493bb6 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -562,8 +562,9 @@ static void reqIndex2Treatment(int padIndex, u8 value) { //0x43 if (value == 0) { pads[padIndex].ds.configMode = 0; - } else { + } else if (value == 1) { pads[padIndex].ds.configMode = 1; + pads[padIndex].ds.configModeUsed = 1; } break; case CMD_SET_MODE_AND_LOCK : @@ -584,15 +585,19 @@ static void reqIndex2Treatment(int padIndex, u8 value) { memcpy(buf, resp4C_01, 8); } break; - case CMD_READ_DATA_AND_VIBRATE: - //mem the vibration value for small motor; - pads[padIndex].Vib[0] = value; - break; } } -static void vibrate(int padIndex) { +static void ds_update_vibrate(int padIndex) { PadDataS *pad = &pads[padIndex]; + if (pad->ds.configModeUsed) { + pad->Vib[0] = (pad->Vib[0] == 1) ? 1 : 0; + } + else { + // compat mode + pad->Vib[0] = (pad->Vib[0] & 0xc0) == 0x40 && (pad->Vib[1] & 1); + pad->Vib[1] = 0; + } if (pad->Vib[0] != pad->VibF[0] || pad->Vib[1] != pad->VibF[1]) { //value is different update Value and call libretro for vibration pad->VibF[0] = pad->Vib[0]; @@ -733,19 +738,28 @@ static void PADpoll_dualshock(int port, unsigned char value, int pos) case 2: reqIndex2Treatment(port, value); break; - case 3: - if (pads[port].txData[0] == CMD_READ_DATA_AND_VIBRATE) { - // vibration value for the Large motor - pads[port].Vib[1] = value; - - vibrate(port); - } - break; case 7: if (pads[port].txData[0] == CMD_VIBRATION_TOGGLE) memcpy(pads[port].ds.cmd4dConfig, pads[port].txData + 2, 6); break; } + + if (pads[port].txData[0] == CMD_READ_DATA_AND_VIBRATE + && !pads[port].ds.configModeUsed && 2 <= pos && pos < 4) + { + // "compat" single motor mode + pads[port].Vib[pos - 2] = value; + } + else if (pads[port].txData[0] == CMD_READ_DATA_AND_VIBRATE + && 2 <= pos && pos < 8) + { + // 0 - weak motor, 1 - strong motor + int dev = pads[port].ds.cmd4dConfig[pos - 2]; + if (dev < 2) + pads[port].Vib[dev] = value; + } + if (pos == respSize - 1) + ds_update_vibrate(port); } static unsigned char PADpoll_(int port, unsigned char value, int pos, int *more_data) { From e7606d0edc6178f862d94164617e8c5ea4bbbe3b Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 15 Oct 2023 00:48:20 +0300 Subject: [PATCH 410/597] cdrom: adjust timing notaz/pcsx_rearmed#317 --- libpcsxcore/cdrom.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 047172ce6..2ce7fe97b 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -106,7 +106,7 @@ static struct { u16 CmdInProgress; u8 Irq1Pending; u8 unused5; - u32 LastReadCycles; + u32 LastReadSeekCycles; u8 unused7; @@ -555,16 +555,15 @@ static void cdrPlayInterrupt_Autopause() cdr.ReportDelay--; } -// LastReadCycles static int cdrSeekTime(unsigned char *target) { int diff = msf2sec(cdr.SetSectorPlay) - msf2sec(target); - int pausePenalty, seekTime = abs(diff) * (cdReadTime / 2000); + int seekTime = abs(diff) * (cdReadTime / 2000); seekTime = MAX_VALUE(seekTime, 20000); // need this stupidly long penalty or else Spyro2 intro desyncs - pausePenalty = (s32)(psxRegs.cycle - cdr.LastReadCycles) > cdReadTime * 8 ? cdReadTime * 25 : 0; - seekTime += pausePenalty; + if ((s32)(psxRegs.cycle - cdr.LastReadSeekCycles) > cdReadTime * 8) + seekTime += cdReadTime * 25; seekTime = MIN_VALUE(seekTime, PSXCLK * 2 / 3); CDR_LOG("seek: %.2f %.2f\n", (float)seekTime / PSXCLK, (float)seekTime / cdReadTime); @@ -633,7 +632,7 @@ static void msfiSub(u8 *msfi, u32 count) void cdrPlayReadInterrupt(void) { - cdr.LastReadCycles = psxRegs.cycle; + cdr.LastReadSeekCycles = psxRegs.cycle; if (cdr.Reading) { cdrReadInterrupt(); @@ -1071,6 +1070,7 @@ void cdrInterrupt(void) { memcpy(cdr.LocL, buf, 8); UpdateSubq(cdr.SetSectorPlay); cdr.TrackChanged = FALSE; + cdr.LastReadSeekCycles = psxRegs.cycle; break; case CdlTest: From e189515fc16246867490af2b73e31293077d75db Mon Sep 17 00:00:00 2001 From: saulfabreg Wii VC Project Date: Sat, 14 Oct 2023 19:48:43 -0500 Subject: [PATCH 411/597] add Bomberman: Fantasy Race to database for hack "(GPU) slow linked list walking" Without it, the Retry/Quit menu when you lose a race is invisible. Fixes https://github.com/libretro/pcsx_rearmed/issues/776 --- libpcsxcore/database.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 5276b92cf..568bba276 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -20,6 +20,8 @@ static const char * const cdr_read_hack_db[] = static const char * const gpu_slow_llist_db[] = { + /* Bomberman Fantasy Race */ + "SLES01712", "SLPS01525", "SLPS91138", "SLPM87102", "SLUS00823", /* Crash Bash */ "SCES02834", "SCUS94570", "SCUS94616", "SCUS94654", /* Final Fantasy IV */ From de41998a390c2dc03bdc8db2a3060bf5a5e24b06 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 15 Oct 2023 23:10:16 +0300 Subject: [PATCH 412/597] spu: rvb volume is signed notaz/pcsx_rearmed#316 --- plugins/dfsound/registers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index 6f06ffcea..b8620268d 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -167,11 +167,11 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, goto upd_irq; //-------------------------------------------------// case H_SPUrvolL: - spu.rvb->VolLeft=val; + spu.rvb->VolLeft = (int16_t)val; break; //-------------------------------------------------// case H_SPUrvolR: - spu.rvb->VolRight=val; + spu.rvb->VolRight = (int16_t)val; break; //-------------------------------------------------// From 7da5c7ad699ce52d0ef8361709b420751f4e42e0 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 16 Oct 2023 21:00:32 +0300 Subject: [PATCH 413/597] drc: implement cycle reload on read ... but decided to not enable it yet (or ever?) --- libpcsxcore/new_dynarec/assem_arm.c | 24 +++++-- libpcsxcore/new_dynarec/assem_arm64.c | 65 +++++++++++++------ libpcsxcore/new_dynarec/linkage_arm.S | 43 +++++++----- libpcsxcore/new_dynarec/linkage_arm64.S | 17 ++--- libpcsxcore/new_dynarec/patches/trace_drc_chk | 41 ++++++------ 5 files changed, 121 insertions(+), 69 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index bdb81b4d5..70798effe 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -1734,6 +1734,14 @@ static void do_readstub(int n) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); emit_far_call(handler); +#if 0 + if (type == LOADW_STUB) { + // new cycle_count returned in r2 + emit_addimm(2, -(int)stubs[n].d, cc<0?2:cc); + if (cc < 0) + emit_storereg(CCREG, 2); + } +#endif if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { mov_loadtype_adj(type,0,rt); } @@ -1804,6 +1812,14 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, emit_far_call(handler); +#if 0 + if (type == LOADW_STUB) { + // new cycle_count returned in r2 + emit_addimm(2, -adj, cc<0?2:cc); + if (cc < 0) + emit_storereg(CCREG, 2); + } +#endif if(rt>=0&&dops[i].rt1!=0) { switch(type) { case LOADB_STUB: emit_signextend8(0,rt); break; @@ -1887,9 +1903,9 @@ static void do_writestub(int n) if(cc<0) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); - // returns new cycle_count emit_far_call(handler); - emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc); + // new cycle_count returned in r2 + emit_addimm(2,-(int)stubs[n].d,cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); if(restore_jump) @@ -1927,9 +1943,9 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,adj,2); emit_movimm((u_int)handler,3); - // returns new cycle_count emit_far_call(jump_handler_write_h); - emit_addimm(0,-adj,cc<0?2:cc); + // new cycle_count returned in r2 + emit_addimm(2,-adj,cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); restore_regs(reglist); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 97e1fb148..bad2854cf 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1465,6 +1465,7 @@ static void do_readstub(int n) int i = stubs[n].a; int rs = stubs[n].b; const struct regstat *i_regs = (void *)stubs[n].c; + int adj = (int)stubs[n].d; u_int reglist = stubs[n].e; const signed char *i_regmap = i_regs->regmap; int rt; @@ -1527,12 +1528,22 @@ static void do_readstub(int n) handler=jump_handler_read32; assert(handler); pass_args64(rs,temp2); - int cc=get_reg(i_regmap,CCREG); - if(cc<0) - emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); + int cc, cc_use; + cc = cc_use = get_reg(i_regmap, CCREG); + if (cc < 0) + emit_loadreg(CCREG, (cc_use = 2)); + emit_addimm(cc_use, adj, 2); + emit_far_call(handler); - // (no cycle reload after read) + +#if 0 + // cycle reload for read32 only (value in w2 both in and out) + if (type == LOADW_STUB) { + emit_addimm(2, -adj, cc_use); + if (cc < 0) + emit_storereg(CCREG, cc_use); + } +#endif if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { loadstore_extend(type,0,rt); } @@ -1551,7 +1562,8 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, u_int is_dynamic=0; uintptr_t host_addr = 0; void *handler; - int cc=get_reg(regmap,CCREG); + int cc, cc_use; + cc = cc_use = get_reg(regmap, CCREG); //if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt)) // return; handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr); @@ -1588,9 +1600,9 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, emit_movimm(addr,0); else if(ra!=0) emit_mov(ra,0); - if(cc<0) - emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,adj,2); + if (cc < 0) + emit_loadreg(CCREG, (cc_use = 2)); + emit_addimm(cc_use, adj, 2); if(is_dynamic) { uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1; intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl); @@ -1606,7 +1618,16 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, emit_far_call(handler); - // (no cycle reload after read) +#if 0 + // cycle reload for read32 only (value in w2 both in and out) + if (type == LOADW_STUB) { + if (!is_dynamic) + emit_far_call(do_memhandler_post); + emit_addimm(2, -adj, cc_use); + if (cc < 0) + emit_storereg(CCREG, cc_use); + } +#endif if(rt>=0&&dops[i].rt1!=0) loadstore_extend(type, 0, rt); restore_regs(reglist); @@ -1620,6 +1641,7 @@ static void do_writestub(int n) int i=stubs[n].a; int rs=stubs[n].b; struct regstat *i_regs=(struct regstat *)stubs[n].c; + int adj = (int)stubs[n].d; u_int reglist=stubs[n].e; signed char *i_regmap=i_regs->regmap; int rt,r; @@ -1687,16 +1709,19 @@ static void do_writestub(int n) emit_mov64(temp2,3); host_tempreg_release(); } - int cc=get_reg(i_regmap,CCREG); - if(cc<0) - emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); - // returns new cycle_count + int cc, cc_use; + cc = cc_use = get_reg(i_regmap, CCREG); + if (cc < 0) + emit_loadreg(CCREG, (cc_use = 2)); + emit_addimm(cc_use, adj, 2); + emit_far_call(handler); - emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc); - if(cc<0) - emit_storereg(CCREG,2); - if(restore_jump) + + // new cycle_count returned in x2 + emit_addimm(2, -adj, cc_use); + if (cc < 0) + emit_storereg(CCREG, cc_use); + if (restore_jump) set_jump_target(restore_jump, out); restore_regs(reglist); emit_jmp(stubs[n].retaddr); @@ -1736,7 +1761,7 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, emit_far_call(do_memhandler_pre); emit_far_call(handler); emit_far_call(do_memhandler_post); - emit_addimm(0, -adj, cc_use); + emit_addimm(2, -adj, cc_use); if (cc < 0) emit_storereg(CCREG, cc_use); restore_regs(reglist); diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index baac17657..2bcf66549 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -419,15 +419,23 @@ FUNCTION(new_dyna_start): /* --------------------------------------- */ -.align 2 +.macro memhandler_post + /* r2 = cycles_out, r3 = tmp */ + ldr r3, [fp, #LO_next_interupt] + ldr r2, [fp, #LO_cycle] @ memhandlers can modify cc, like dma + str r3, [fp, #LO_last_count] + sub r2, r2, r3 +.endm + +.align 2 -.macro pcsx_read_mem readop tab_shift +.macro pcsx_read_mem_part readop tab_shift /* r0 = address, r1 = handler_tab, r2 = cycles */ lsl r3, r0, #20 lsr r3, #(20+\tab_shift) ldr r12, [fp, #LO_last_count] ldr r1, [r1, r3, lsl #2] - add r2, r2, r12 + add r12, r2, r12 lsls r1, #1 .if \tab_shift == 1 lsl r3, #1 @@ -436,28 +444,30 @@ FUNCTION(new_dyna_start): \readop r0, [r1, r3, lsl #\tab_shift] .endif movcc pc, lr - str r2, [fp, #LO_cycle] - bx r1 + mov r2, r12 + str r12, [fp, #LO_cycle] .endm FUNCTION(jump_handler_read8): add r1, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part - pcsx_read_mem ldrbcc, 0 + pcsx_read_mem_part ldrbcc, 0 + bx r1 @ addr, unused, cycles FUNCTION(jump_handler_read16): add r1, #0x1000/4*4 @ shift to r16 part - pcsx_read_mem ldrhcc, 1 + pcsx_read_mem_part ldrhcc, 1 + bx r1 @ addr, unused, cycles FUNCTION(jump_handler_read32): - pcsx_read_mem ldrcc, 2 - - -.macro memhandler_post - ldr r0, [fp, #LO_next_interupt] - ldr r2, [fp, #LO_cycle] @ memhandlers can modify cc, like dma - str r0, [fp, #LO_last_count] - sub r0, r2, r0 -.endm + pcsx_read_mem_part ldrcc, 2 + bx r1 @ addr, unused, cycles +#if 0 + str lr, [fp, #LO_saved_lr] + blx r1 + ldr lr, [fp, #LO_saved_lr] + memhandler_post + bx lr +#endif .macro pcsx_write_mem wrtop tab_shift /* r0 = address, r1 = data, r2 = cycles, r3 = handler_tab */ @@ -466,7 +476,6 @@ FUNCTION(jump_handler_read32): ldr r3, [r3, r12, lsl #2] str r0, [fp, #LO_address] @ some handlers still need it.. lsls r3, #1 - mov r0, r2 @ cycle return in case of direct store .if \tab_shift == 1 lsl r12, #1 \wrtop r1, [r3, r12] diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 3519dffb4..fa8a41176 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -227,10 +227,11 @@ FUNCTION(new_dyna_leave): .endm .macro memhandler_post - ldr w0, [rFP, #LO_next_interupt] + /* w2 = cycles_out, x3 = tmp */ + ldr w3, [rFP, #LO_next_interupt] ldr w2, [rFP, #LO_cycle] // memhandlers can modify cc, like dma - str w0, [rFP, #LO_last_count] - sub w0, w2, w0 + str w3, [rFP, #LO_last_count] + sub w2, w2, w3 .endm FUNCTION(do_memhandler_pre): @@ -258,17 +259,18 @@ FUNCTION(do_memhandler_post): FUNCTION(jump_handler_read8): add x1, x1, #0x1000/4*8 + 0x1000/2*8 /* shift to r8 part */ pcsx_read_mem ldrb, 0 - b handler_read_end + ldp xzr, x30, [sp], #16 + ret FUNCTION(jump_handler_read16): add x1, x1, #0x1000/4*8 /* shift to r16 part */ pcsx_read_mem ldrh, 1 - b handler_read_end + ldp xzr, x30, [sp], #16 + ret FUNCTION(jump_handler_read32): pcsx_read_mem ldr, 2 - -handler_read_end: + /* memhandler_post */ ldp xzr, x30, [sp], #16 ret @@ -278,7 +280,6 @@ handler_read_end: ldr x3, [x3, w4, uxtw #3] adds x3, x3, x3 bcs 0f - mov w0, w2 /* cycle return */ \wrtop w1, [x3, w4, uxtw #\tab_shift] ret 0: diff --git a/libpcsxcore/new_dynarec/patches/trace_drc_chk b/libpcsxcore/new_dynarec/patches/trace_drc_chk index 5dab31754..da8616988 100644 --- a/libpcsxcore/new_dynarec/patches/trace_drc_chk +++ b/libpcsxcore/new_dynarec/patches/trace_drc_chk @@ -1,8 +1,8 @@ diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c -index ede1f93c..1c8965f0 100644 +index 74f32ee3..4eec8a83 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c -@@ -324,7 +324,7 @@ static struct compile_info +@@ -325,7 +325,7 @@ static struct compile_info int new_dynarec_hacks_old; int new_dynarec_did_compile; @@ -11,7 +11,7 @@ index ede1f93c..1c8965f0 100644 extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 (CCREG) extern int last_count; // last absolute target, often = next_interupt -@@ -602,6 +602,7 @@ static int cycle_multiplier_active; +@@ -603,6 +603,7 @@ static int cycle_multiplier_active; static int CLOCK_ADJUST(int x) { @@ -19,7 +19,7 @@ index ede1f93c..1c8965f0 100644 int m = cycle_multiplier_active; int s = (x >> 31) | 1; return (x * m + s * 50) / 100; -@@ -776,6 +777,9 @@ static noinline u_int generate_exception(u_int pc) +@@ -808,6 +809,9 @@ static noinline u_int generate_exception(u_int pc) // This is called from the recompiled JR/JALR instructions static void noinline *get_addr(u_int vaddr, int can_compile) { @@ -29,7 +29,7 @@ index ede1f93c..1c8965f0 100644 u_int start_page = get_page_prev(vaddr); u_int i, page, end_page = get_page(vaddr); void *found_clean = NULL; -@@ -7157,7 +7161,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) +@@ -7213,7 +7217,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) // R0 is always unneeded u|=1; // Save it @@ -38,7 +38,7 @@ index ede1f93c..1c8965f0 100644 gte_unneeded[i]=gte_u; /* printf("ur (%d,%d) %x: ",istart,iend,start+i*4); -@@ -8299,6 +8303,7 @@ static noinline void pass5a_preallocate1(void) +@@ -8355,6 +8359,7 @@ static noinline void pass5a_preallocate1(void) static noinline void pass5b_preallocate2(void) { int i, hr; @@ -46,7 +46,7 @@ index ede1f93c..1c8965f0 100644 for(i=0;isubCycleStep >= 0x10000); regs->subCycle += regs->subCycleStep; @@ -124,11 +124,12 @@ index 5756bee5..4fe98b1b 100644 regs->subCycle &= 0xffff; } -@@ -1344,8 +1344,14 @@ static void intShutdown() { +@@ -1348,8 +1348,15 @@ static void intShutdown() { // single step (may do several ops in case of a branch or load delay) // called by asm/dynarec void execI(psxRegisters *regs) { + extern int last_count; ++ extern u32 next_interupt; + void do_insn_cmp(void); + printf("execI %08x c %u, ni %u\n", regs->pc, regs->cycle, next_interupt); + last_count = 0; From f8896d181bd26ef6fd18af17f01c77b73ae17a68 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 17 Oct 2023 00:16:46 +0300 Subject: [PATCH 414/597] rework gpu busy timing previous implementation caused complications with dynarecs --- libpcsxcore/gpu.c | 7 +++---- libpcsxcore/misc.c | 5 +++-- libpcsxcore/psxbios.c | 1 - libpcsxcore/psxcounters.c | 2 ++ libpcsxcore/psxdma.c | 10 ++++------ libpcsxcore/psxhw.c | 7 ++++--- libpcsxcore/r3000a.h | 3 ++- 7 files changed, 18 insertions(+), 17 deletions(-) diff --git a/libpcsxcore/gpu.c b/libpcsxcore/gpu.c index 2416405e5..425537d46 100644 --- a/libpcsxcore/gpu.c +++ b/libpcsxcore/gpu.c @@ -19,14 +19,13 @@ void gpu_state_change(int what) switch (state) { case PGS_VRAM_TRANSFER_START: - HW_GPU_STATUS &= ~SWAP32(PSXGPU_nBUSY); + psxRegs.gpuIdleAfter = psxRegs.cycle + PSXCLK / 50; break; case PGS_VRAM_TRANSFER_END: - HW_GPU_STATUS |= SWAP32(PSXGPU_nBUSY); + psxRegs.gpuIdleAfter = psxRegs.cycle; break; case PGS_PRIMITIVE_START: - HW_GPU_STATUS &= ~SWAP32(PSXGPU_nBUSY); - set_event(PSXINT_GPUDMA, 200); // see gpuInterrupt + psxRegs.gpuIdleAfter = psxRegs.cycle + 200; break; } } diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index f332f4377..9486d23bb 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -738,6 +738,8 @@ int LoadState(const char *file) { SaveFuncs.read(f, &psxRegs, offsetof(psxRegisters, gteBusyCycle)); psxRegs.gteBusyCycle = psxRegs.cycle; psxRegs.biosBranchCheck = ~0; + psxRegs.gpuIdleAfter = psxRegs.cycle - 1; + HW_GPU_STATUS &= SWAP32(~PSXGPU_nBUSY); psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); @@ -750,8 +752,7 @@ int LoadState(const char *file) { SaveFuncs.read(f, gpufP, sizeof(GPUFreeze_t)); GPU_freeze(0, gpufP); free(gpufP); - if (HW_GPU_STATUS == 0) - HW_GPU_STATUS = SWAP32(GPU_readStatus()); + gpuSyncPluginSR(); // spu SaveFuncs.read(f, &Size, 4); diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index af3c55fb0..3671af5a0 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -3322,7 +3322,6 @@ void psxBiosSetupBootState(void) GPU_writeStatus(gpu_ctl_def[i]); for (i = 0; i < sizeof(gpu_data_def) / sizeof(gpu_data_def[0]); i++) GPU_writeData(gpu_data_def[i]); - HW_GPU_STATUS |= SWAP32(PSXGPU_nBUSY); // spu for (i = 0x1f801d80; i < sizeof(spu_config) / sizeof(spu_config[0]); i++) diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index d0d45ec5c..f7491b3d5 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -397,6 +397,8 @@ void psxRcntUpdate() } HW_GPU_STATUS = SWAP32(status); GPU_vBlank(0, field); + if ((s32)(psxRegs.gpuIdleAfter - psxRegs.cycle) < 0) + psxRegs.gpuIdleAfter = psxRegs.cycle - 1; // prevent overflow if ((rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset) || (rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset2)) diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index 24570968d..4db99ab1e 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -157,7 +157,7 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU HW_DMA2_MADR = SWAPu32(madr + words_copy * 4); // careful: gpu_state_change() also messes with this - HW_GPU_STATUS &= SWAP32(~PSXGPU_nBUSY); + psxRegs.gpuIdleAfter = psxRegs.cycle + words / 4 + 16; // already 32-bit word size ((size * 4) / 4) set_event(PSXINT_GPUDMA, words / 4); return; @@ -180,7 +180,7 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU HW_DMA2_MADR = SWAPu32(madr); // careful: gpu_state_change() also messes with this - HW_GPU_STATUS &= SWAP32(~PSXGPU_nBUSY); + psxRegs.gpuIdleAfter = psxRegs.cycle + words / 4 + 16; // already 32-bit word size ((size * 4) / 4) set_event(PSXINT_GPUDMA, words / 4); return; @@ -199,13 +199,13 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU if ((int)size <= 0) size = gpuDmaChainSize(madr); - HW_GPU_STATUS &= SWAP32(~PSXGPU_nBUSY); HW_DMA2_MADR = SWAPu32(madr_next); // Tekken 3 = use 1.0 only (not 1.5x) // Einhander = parse linked list in pieces (todo) // Rebel Assault 2 = parse linked list in pieces (todo) + psxRegs.gpuIdleAfter = psxRegs.cycle + size + 16; set_event(PSXINT_GPUDMA, size); return; @@ -218,14 +218,13 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU DMA_INTERRUPT(2); } -// note: this is also (ab)used for non-dma prim command -// to delay gpu returning to idle state, see gpu_state_change() void gpuInterrupt() { if (HW_DMA2_CHCR == SWAP32(0x01000401) && !(HW_DMA2_MADR & SWAP32(0x800000))) { u32 size, madr_next = 0xffffff; size = GPU_dmaChain((u32 *)psxM, HW_DMA2_MADR & 0x1fffff, &madr_next); HW_DMA2_MADR = SWAPu32(madr_next); + psxRegs.gpuIdleAfter = psxRegs.cycle + size + 64; set_event(PSXINT_GPUDMA, size); return; } @@ -234,7 +233,6 @@ void gpuInterrupt() { HW_DMA2_CHCR &= SWAP32(~0x01000000); DMA_INTERRUPT(2); } - HW_GPU_STATUS |= SWAP32(PSXGPU_nBUSY); // GPU no longer busy } void psxDma6(u32 madr, u32 bcr, u32 chcr) { diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index 8be775bca..4811f99ba 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -41,7 +41,7 @@ void psxHwReset() { mdecInit(); // initialize mdec decoder cdrReset(); psxRcntInit(); - HW_GPU_STATUS = SWAP32(0x14802000); + HW_GPU_STATUS = SWAP32(0x10802000); psxHwReadGpuSRptr = Config.hacks.gpu_busy_hack ? psxHwReadGpuSRbusyHack : psxHwReadGpuSR; } @@ -91,16 +91,17 @@ void psxHwWriteGpuSR(u32 value) u32 psxHwReadGpuSR(void) { - u32 v; + u32 v, c = psxRegs.cycle; // meh2, syncing for img bit, might want to avoid it.. gpuSyncPluginSR(); v = SWAP32(HW_GPU_STATUS); + v |= ((s32)(psxRegs.gpuIdleAfter - c) >> 31) & PSXGPU_nBUSY; // XXX: because of large timeslices can't use hSyncCount, using rough // approximization instead. Perhaps better use hcounter code here or something. if (hSyncCount < 240 && (v & PSXGPU_ILACE_BITS) != PSXGPU_ILACE_BITS) - v |= PSXGPU_LCF & (psxRegs.cycle << 20); + v |= PSXGPU_LCF & (c << 20); return v; } diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 4d2cfbd2f..03aeee196 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -204,7 +204,8 @@ typedef struct { u32 dloadVal[2]; u32 biosBranchCheck; u32 cpuInRecursion; - u32 reserved[2]; + u32 gpuIdleAfter; + u32 reserved[1]; // warning: changing anything in psxRegisters requires update of all // asm in libpcsxcore/new_dynarec/ } psxRegisters; From 07f64ac7fd01986bd5cc501ffe7394b09fea0089 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 18 Oct 2023 01:44:46 +0300 Subject: [PATCH 415/597] must specify cflags to link important for -mfpu when using lto --- Makefile | 2 +- frontend/plugin.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 1d57f2cb8..bf528b14c 100644 --- a/Makefile +++ b/Makefile @@ -293,7 +293,7 @@ frontend/revision.h: FORCE target_: $(TARGET) $(TARGET): $(OBJS) - $(CC_LINK) -o $@ $^ $(LDFLAGS) $(LDLIBS) $(EXTRA_LDFLAGS) + $(CC_LINK) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LDLIBS) $(EXTRA_LDFLAGS) clean: $(PLAT_CLEAN) clean_plugins $(RM) $(TARGET) $(OBJS) $(TARGET).map frontend/revision.h diff --git a/frontend/plugin.c b/frontend/plugin.c index e9dbcacb2..3683b5002 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -135,7 +135,7 @@ extern void GPUwriteDataMem(uint32_t *, int); extern uint32_t GPUreadStatus(void); extern uint32_t GPUreadData(void); extern void GPUreadDataMem(uint32_t *, int); -extern long GPUdmaChain(uint32_t *,uint32_t); +extern long GPUdmaChain(uint32_t *, uint32_t, uint32_t *); extern void GPUupdateLace(void); extern long GPUfreeze(uint32_t, void *); extern void GPUvBlank(int, int); From 34ee348026b79e9bc0bc20450bf99ee987970a06 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 20 Oct 2023 00:20:19 +0300 Subject: [PATCH 416/597] reduce seeking and open handles to maybe help smb libretro/pcsx_rearmed#342 --- libpcsxcore/cdriso.c | 159 +++++++++++++++++++++++++------------------ 1 file changed, 93 insertions(+), 66 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index 081a8f2b9..db57b0c54 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -26,14 +26,15 @@ #include "ppf.h" #ifdef _WIN32 -#define WIN32_LEAN_AND_MEAN -#include -#include #define strcasecmp _stricmp -#elif P_HAVE_PTHREAD +#else +#include +#include +#include +#if P_HAVE_PTHREAD #include #include -#include +#endif #endif #include #include @@ -90,6 +91,8 @@ static struct { unsigned int current_buffer; unsigned int sector_in_hunk; } *chd_img; +#else +#define chd_img 0 #endif static int (*cdimg_read_func)(FILE *f, unsigned int base, void *dest, int sector); @@ -159,6 +162,21 @@ static void tok2msf(char *time, char *msf) { } } +static off_t get_size(FILE *f) +{ + off_t old, size; +#if !defined(USE_LIBRETRO_VFS) + struct stat st; + if (fstat(fileno(f), &st) == 0) + return st.st_size; +#endif + old = ftello(f); + fseeko(f, 0, SEEK_END); + size = ftello(f); + fseeko(f, old, SEEK_SET); + return size; +} + // this function tries to get the .toc file of the given .bin // the necessary data is put into the ti (trackinformation)-array static int parsetoc(const char *isofile) { @@ -312,6 +330,7 @@ static int parsecue(const char *isofile) { char filepath[MAXPATHLEN]; char *incue_fname; FILE *fi; + FILE *ftmp = NULL; char *token; char time[20]; char *tmp; @@ -325,18 +344,21 @@ static int parsecue(const char *isofile) { // copy name of the iso and change extension from .bin to .cue strncpy(cuename, isofile, sizeof(cuename)); cuename[MAXPATHLEN - 1] = '\0'; - if (strlen(cuename) >= 4) { + if (strlen(cuename) < 4) + return -1; + if (strcasecmp(cuename + strlen(cuename) - 4, ".cue") == 0) { + // it's already open as cdHandle + fi = cdHandle; + } + else { // If 'isofile' is a '.cd' file, use it as a .cue file // and don't try to search the additional .cue file if (strncasecmp(cuename + strlen(cuename) - 4, ".cd", 3) != 0 ) - strcpy(cuename + strlen(cuename) - 4, ".cue"); - } - else { - return -1; - } + strcpy(cuename + strlen(cuename) - 4, ".cue"); - if ((fi = fopen(cuename, "r")) == NULL) { - return -1; + if ((ftmp = fopen(cuename, "r")) == NULL) + return -1; + fi = ftmp; } // Some stupid tutorials wrongly tell users to use cdrdao to rip a @@ -346,10 +368,11 @@ static int parsecue(const char *isofile) { if (!strncmp(linebuf, "CD_ROM_XA", 9)) { // Don't proceed further, as this is actually a .toc file rather // than a .cue file. - fclose(fi); + if (ftmp) + fclose(ftmp); return parsetoc(isofile); } - fseek(fi, 0, SEEK_SET); + rewind(fi); } // build a path for files referenced in .cue @@ -457,25 +480,23 @@ static int parsecue(const char *isofile) { SysPrintf(_("\ncould not open: %s\n"), filepath); continue; } - fseek(ti[numtracks + 1].handle, 0, SEEK_END); - file_len = ftell(ti[numtracks + 1].handle) / 2352; - - if (numtracks == 0 && strlen(isofile) >= 4 && - (strcmp(isofile + strlen(isofile) - 4, ".cue") == 0 || - strncasecmp(isofile + strlen(isofile) - 4, ".cd", 3) == 0)) { - // user selected .cue/.cdX as image file, use it's data track instead - fclose(cdHandle); - cdHandle = fopen(filepath, "rb"); - } + file_len = get_size(ti[numtracks + 1].handle) / 2352; } } - fclose(fi); + if (ftmp) + fclose(ftmp); // if there are no tracks detected, then it's not a cue file if (!numtracks) return -1; + // the data track handle is always in cdHandle + if (ti[1].handle) { + fclose(cdHandle); + cdHandle = ti[1].handle; + ti[1].handle = NULL; + } return 0; } @@ -530,8 +551,7 @@ static int parseccd(const char *isofile) { // Fill out the last track's end based on size if (numtracks >= 1) { - fseek(cdHandle, 0, SEEK_END); - t = ftell(cdHandle) / 2352 - msf2sec(ti[numtracks].start) + 2 * 75; + t = get_size(cdHandle) / 2352 - msf2sec(ti[numtracks].start) + 2 * 75; sec2msf(t, ti[numtracks].length); } @@ -686,11 +706,9 @@ static int handlepbp(const char *isofile) { if (strlen(isofile) >= 4) ext = isofile + strlen(isofile) - 4; - if (ext == NULL || (strcmp(ext, ".pbp") != 0 && strcmp(ext, ".PBP") != 0)) + if (ext == NULL || strcasecmp(ext, ".pbp") != 0) return -1; - fseeko(cdHandle, 0, SEEK_SET); - numtracks = 0; ret = fread(&pbp_hdr, 1, sizeof(pbp_hdr), cdHandle); @@ -839,9 +857,8 @@ static int handlepbp(const char *isofile) { goto done; fail_io: -#ifndef NDEBUG SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); -#endif + rewind(cdHandle); done: if (compr_img != NULL) { @@ -872,23 +889,21 @@ static int handlecbin(const char *isofile) { if (ext == NULL || (strcasecmp(ext + 1, ".cbn") != 0 && strcasecmp(ext, ".cbin") != 0)) return -1; - fseek(cdHandle, 0, SEEK_SET); - ret = fread(&ciso_hdr, 1, sizeof(ciso_hdr), cdHandle); if (ret != sizeof(ciso_hdr)) { SysPrintf("failed to read ciso header\n"); - return -1; + goto fail_io; } if (strncmp(ciso_hdr.magic, "CISO", 4) != 0 || ciso_hdr.total_bytes <= 0 || ciso_hdr.block_size <= 0) { SysPrintf("bad ciso header\n"); - return -1; + goto fail_io; } if (ciso_hdr.header_size != 0 && ciso_hdr.header_size != sizeof(ciso_hdr)) { ret = fseeko(cdHandle, ciso_hdr.header_size, SEEK_SET); if (ret != 0) { SysPrintf("failed to seek to %x\n", ciso_hdr.header_size); - return -1; + goto fail_io; } } @@ -932,6 +947,7 @@ static int handlecbin(const char *isofile) { free(compr_img); compr_img = NULL; } + rewind(cdHandle); return -1; } @@ -1063,8 +1079,7 @@ static int opensbifile(const char *isoname) { return -1; } - fseek(cdHandle, 0, SEEK_END); - s = ftell(cdHandle) / 2352; + s = msf2sec(ti[1].length); return LoadSBI(sbiname, s); } @@ -1072,7 +1087,9 @@ static int opensbifile(const char *isoname) { static int cdread_normal(FILE *f, unsigned int base, void *dest, int sector) { int ret; - if (fseek(f, base + sector * CD_FRAMESIZE_RAW, SEEK_SET)) + if (!f) + return -1; + if (fseeko(f, base + sector * CD_FRAMESIZE_RAW, SEEK_SET)) goto fail_io; ret = fread(dest, 1, CD_FRAMESIZE_RAW, f); if (ret <= 0) @@ -1089,7 +1106,9 @@ static int cdread_sub_mixed(FILE *f, unsigned int base, void *dest, int sector) { int ret; - if (fseek(f, base + sector * (CD_FRAMESIZE_RAW + SUB_FRAMESIZE), SEEK_SET)) + if (!f) + return -1; + if (fseeko(f, base + sector * (CD_FRAMESIZE_RAW + SUB_FRAMESIZE), SEEK_SET)) goto fail_io; ret = fread(dest, 1, CD_FRAMESIZE_RAW, f); if (ret <= 0) @@ -1103,7 +1122,9 @@ static int cdread_sub_mixed(FILE *f, unsigned int base, void *dest, int sector) static int cdread_sub_sub_mixed(FILE *f, int sector) { - if (fseek(f, sector * (CD_FRAMESIZE_RAW + SUB_FRAMESIZE) + CD_FRAMESIZE_RAW, SEEK_SET)) + if (!f) + return -1; + if (fseeko(f, sector * (CD_FRAMESIZE_RAW + SUB_FRAMESIZE) + CD_FRAMESIZE_RAW, SEEK_SET)) goto fail_io; if (fread(subbuffer, 1, SUB_FRAMESIZE, f) != SUB_FRAMESIZE) goto fail_io; @@ -1154,6 +1175,8 @@ static int cdread_compressed(FILE *f, unsigned int base, void *dest, int sector) off_t start_byte; int ret, block; + if (!cdHandle) + return -1; if (base) sector += base / 2352; @@ -1283,7 +1306,9 @@ static int cdread_2048(FILE *f, unsigned int base, void *dest, int sector) { int ret; - fseek(f, base + sector * 2048, SEEK_SET); + if (!f) + return -1; + fseeko(f, base + sector * 2048, SEEK_SET); ret = fread((char *)dest + 12 * 2, 1, 2048, f); // not really necessary, fake mode 2 header @@ -1326,8 +1351,9 @@ static long CALLBACK ISOopen(void) { char alt_bin_filename[MAXPATHLEN]; const char *bin_filename; char image_str[1024]; + off_t size_main; - if (cdHandle != NULL) { + if (cdHandle || chd_img) { return 0; // it's already open } @@ -1337,6 +1363,7 @@ static long CALLBACK ISOopen(void) { GetIsoFile(), strerror(errno)); return -1; } + size_main = get_size(cdHandle); snprintf(image_str, sizeof(image_str) - 6*4 - 1, "Loaded CD Image: %s", GetIsoFile()); @@ -1380,6 +1407,8 @@ static long CALLBACK ISOopen(void) { CDR_getBuffer = ISOgetBuffer_chd; cdimg_read_func = cdread_chd; cdimg_read_sub_func = cdread_sub_chd; + fclose(cdHandle); + cdHandle = NULL; } #endif @@ -1390,11 +1419,9 @@ static long CALLBACK ISOopen(void) { strcat(image_str, "[+sbi]"); } - fseeko(cdHandle, 0, SEEK_END); - // maybe user selected metadata file instead of main .bin .. bin_filename = GetIsoFile(); - if (ftello(cdHandle) < 2352 * 0x10) { + if (cdHandle && size_main < 2352 * 0x10) { static const char *exts[] = { ".bin", ".BIN", ".img", ".IMG" }; FILE *tmpf = NULL; size_t i; @@ -1415,14 +1442,13 @@ static long CALLBACK ISOopen(void) { bin_filename = alt_bin_filename; fclose(cdHandle); cdHandle = tmpf; - fseeko(cdHandle, 0, SEEK_END); + size_main = get_size(cdHandle); } } // guess whether it is mode1/2048 - if (cdimg_read_func == cdread_normal && ftello(cdHandle) % 2048 == 0) { + if (cdHandle && cdimg_read_func == cdread_normal && size_main % 2048 == 0) { unsigned int modeTest = 0; - fseek(cdHandle, 0, SEEK_SET); if (!fread(&modeTest, sizeof(modeTest), 1, cdHandle)) { SysPrintf(_("File IO error in <%s:%s>.\n"), __FILE__, __func__); } @@ -1431,7 +1457,6 @@ static long CALLBACK ISOopen(void) { isMode1ISO = TRUE; } } - fseek(cdHandle, 0, SEEK_SET); SysPrintf("%s.\n", image_str); @@ -1446,11 +1471,6 @@ static long CALLBACK ISOopen(void) { cdimg_read_sub_func = NULL; } - // make sure we have another handle open for cdda - if (numtracks > 1 && ti[1].handle == NULL) { - ti[1].handle = fopen(bin_filename, "rb"); - } - return 0; } @@ -1580,9 +1600,8 @@ static boolean CALLBACK ISOreadTrack(unsigned char *time) { int sector = MSF2SECT(btoi(time[0]), btoi(time[1]), btoi(time[2])); long ret; - if (cdHandle == NULL) { + if (!cdHandle && !chd_img) return 0; - } if (pregapOffset && sector >= pregapOffset) sector -= 2 * 75; @@ -1621,7 +1640,7 @@ static unsigned char* CALLBACK ISOgetBufferSub(int sector) { return NULL; } else if (subHandle != NULL) { - if (fseek(subHandle, sector * SUB_FRAMESIZE, SEEK_SET)) + if (fseeko(subHandle, sector * SUB_FRAMESIZE, SEEK_SET)) return NULL; if (fread(subbuffer, 1, SUB_FRAMESIZE, subHandle) != SUB_FRAMESIZE) return NULL; @@ -1658,7 +1677,8 @@ static long CALLBACK ISOgetStatus(struct CdrStat *stat) { // read CDDA sector into buffer long CALLBACK ISOreadCDDA(unsigned char m, unsigned char s, unsigned char f, unsigned char *buffer) { unsigned char msf[3] = {m, s, f}; - unsigned int file, track, track_start = 0; + unsigned int track, track_start = 0; + FILE *handle = cdHandle; int ret; cddaCurPos = msf2sec((char *)msf); @@ -1678,15 +1698,22 @@ long CALLBACK ISOreadCDDA(unsigned char m, unsigned char s, unsigned char f, uns return 0; } - file = 1; if (multifile) { // find the file that contains this track - for (file = track; file > 1; file--) - if (ti[file].handle != NULL) + unsigned int file; + for (file = track; file > 1; file--) { + if (ti[file].handle != NULL) { + handle = ti[file].handle; break; + } + } + } + if (!handle) { + memset(buffer, 0, CD_FRAMESIZE_RAW); + return -1; } - ret = cdimg_read_func(ti[file].handle, ti[track].start_offset, + ret = cdimg_read_func(handle, ti[track].start_offset, buffer, cddaCurPos - track_start); if (ret != CD_FRAMESIZE_RAW) { memset(buffer, 0, CD_FRAMESIZE_RAW); @@ -1732,5 +1759,5 @@ void cdrIsoInit(void) { } int cdrIsoActive(void) { - return (cdHandle != NULL); + return (cdHandle || chd_img); } From c91bf7dbe935b1a2f043c40e3409c4f7146707aa Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 20 Oct 2023 22:58:58 +0300 Subject: [PATCH 417/597] don't print some old PCSX version confuses some users which version to report --- libpcsxcore/r3000a.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index f0a0ddce1..fb043ae04 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -36,8 +36,6 @@ psxRegisters psxRegs; #endif int psxInit() { - SysPrintf(_("Running PCSX Version %s (%s).\n"), PCSX_VERSION, __DATE__); - #ifndef DRC_DISABLE if (Config.Cpu == CPU_INTERPRETER) { psxCpu = &psxInt; From 68362e7a7b0d3c6915e0e5c54ec889f6c0ca11b2 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 20 Oct 2023 23:14:17 +0300 Subject: [PATCH 418/597] try a new github issue template --- .github/ISSUE_TEMPLATE/report.yml | 57 +++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/report.yml diff --git a/.github/ISSUE_TEMPLATE/report.yml b/.github/ISSUE_TEMPLATE/report.yml new file mode 100644 index 000000000..f68f236b4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/report.yml @@ -0,0 +1,57 @@ +name: Bug report +description: Report a PCSX-ReARMed issue +body: + - type: input + id: pcsx_version + attributes: + label: PCSX-ReARMed Version + description: Version number of the emulator as shown in the menus and printed in logs. + placeholder: r23l a4e249a1 + validations: + required: true + - type: input + id: device + attributes: + label: Your device + description: "Examples: Raspberry Pi4, PC, PS Vita, PS Classic, etc." + placeholder: PC + validations: + required: true + - type: dropdown + id: os + attributes: + label: Operating System of your device + description: What OS are you using? + options: + - Android + - Apple (iOS, tvOS, macOS etc.) + - Linux (or Linux-based like RetroPie etc.) + - Windows + - Other (consoles, etc.) + - type: dropdown + id: arch + attributes: + label: CPU architecture + description: Select the type of the CPU in your device. In case you're using 32bit software on a 64bit device, please select 32bit. + options: + - ARM 64bit (AArch64) + - ARM 32bit + - x86-64 (64bit Intel, AMD, etc.) + - x86 (32bit Intel, AMD, etc.) + - Other + - type: textarea + id: issue_description + attributes: + label: Issue description + description: What issue are you having? + placeholder: Crash Bandicoot crashes after pressing start. + validations: + required: true + - type: textarea + id: step_by_step + attributes: + label: Step-by-step reproduction and logs + description: Type here how your issue can be reproduced and attach any logs. + placeholder: Wait for the game's title screen to appear and then press start. + validations: + required: false From 4cccc4d2e91e45563a3f77526a5d72fb636d7642 Mon Sep 17 00:00:00 2001 From: Matheus Garcia <37114863+Krush206@users.noreply.github.com> Date: Sat, 21 Oct 2023 15:40:07 +0000 Subject: [PATCH 419/597] Compatibility with FreeBSD --- Makefile | 4 ++-- configure | 20 ++++++++++++++++---- frontend/menu.c | 13 +++++++++---- plugins/gpulib/gpulib.mak | 2 +- 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index bf528b14c..af1216faa 100644 --- a/Makefile +++ b/Makefile @@ -302,10 +302,10 @@ ifneq ($(PLUGINS),) plugins_: $(PLUGINS) $(PLUGINS): - make -C $(dir $@) + $(MAKE) -C $(dir $@) clean_plugins: - make -C plugins/gpulib/ clean + $(MAKE) -C plugins/gpulib/ clean for dir in $(PLUGINS) ; do \ $(MAKE) -C $$(dirname $$dir) clean; done else diff --git a/configure b/configure index 39587aa4d..420a07c5f 100755 --- a/configure +++ b/configure @@ -66,15 +66,27 @@ optimize_cortexa8="no" optimize_arm926ej="no" # hardcoded stuff -CC="${CC-${CROSS_COMPILE}gcc}" -CXX="${CXX-${CROSS_COMPILE}g++}" +if [ ${OSTYPE} = "FreeBSD" ]; then + CC="clang" + CXX="clang++" + CFLAGS="-I/usr/local/include -L/usr/local/lib" + MAKE=gmake +else + CC="${CC-${CROSS_COMPILE}gcc}" + CXX="${CXX-${CROSS_COMPILE}g++}" +fi AS="${AS-${CROSS_COMPILE}as}" AR="${AS-${CROSS_COMPILE}ar}" MAIN_LDLIBS="$LDLIBS -ldl -lm -lpthread" config_mak="config.mak" -SYSROOT="$(${CC} --print-sysroot)" -[ "x${SDL_CONFIG}" = "x" ] && SDL_CONFIG="${SYSROOT}/usr/bin/sdl-config" +if [ ${OSTYPE} = "FreeBSD" ]; then + SYSROOT="$sysroot" + [ "x${SDL_CONFIG}" = "x" ] && SDL_CONFIG="${SYSROOT}/usr/local/bin/sdl-config" +else + SYSROOT="$(${CC} --print-sysroot)" + [ "x${SDL_CONFIG}" = "x" ] && SDL_CONFIG="${SYSROOT}/usr/bin/sdl-config" +fi fail() { diff --git a/frontend/menu.c b/frontend/menu.c index 6bc20c504..6516e343b 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -9,6 +9,11 @@ */ #define _GNU_SOURCE 1 +#ifdef __FreeBSD__ +#define STAT stat +#else +#define STAT stat64 +#endif #include #include #include @@ -214,7 +219,7 @@ static int optional_cdimg_filter(struct dirent **namelist, int count, const char *ext, *p; char buf[256], buf2[256]; int i, d, ret, good_cue; - struct stat64 statf; + struct STAT statf; FILE *f; if (count <= 1) @@ -263,7 +268,7 @@ static int optional_cdimg_filter(struct dirent **namelist, int count, p = buf2; snprintf(buf, sizeof(buf), "%s/%s", basedir, p); - ret = stat64(buf, &statf); + ret = STAT(buf, &statf); if (ret == 0) { rm_namelist_entry(namelist, count, p); good_cue = 1; @@ -541,7 +546,7 @@ static int menu_do_last_cd_img(int is_get) { static const char *defaults[] = { "/media", "/mnt/sd", "/mnt" }; char path[256]; - struct stat64 st; + struct STAT st; FILE *f; int i, ret = -1; @@ -564,7 +569,7 @@ static int menu_do_last_cd_img(int is_get) out: if (is_get) { for (i = 0; last_selected_fname[0] == 0 - || stat64(last_selected_fname, &st) != 0; i++) + || STAT(last_selected_fname, &st) != 0; i++) { if (i >= ARRAY_SIZE(defaults)) break; diff --git a/plugins/gpulib/gpulib.mak b/plugins/gpulib/gpulib.mak index 6377274ec..4e1c6597d 100644 --- a/plugins/gpulib/gpulib.mak +++ b/plugins/gpulib/gpulib.mak @@ -49,7 +49,7 @@ $(BIN_GPULIB): $(SRC) $(SRC_GPULIB) $(GPULIB_A) endif $(GPULIB_A): - make -C ../gpulib/ all + $(MAKE) -C ../gpulib/ all clean: $(RM) $(TARGETS) From 4426590d5e20e054247a65334547e5a25037b36c Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 21 Oct 2023 02:45:06 +0300 Subject: [PATCH 420/597] try a workaround for ff8 analogs notaz/pcsx_rearmed#54 --- libpcsxcore/plugins.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index 868493bb6..a6171879e 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -622,6 +622,14 @@ static void log_pad(int port, int pos) #endif } +static void adjust_analog(unsigned char *b) +{ + // ff8 hates 0x80 for whatever reason (broken in 2d area menus), + // or is this caused by something else we do wrong?? + if (b[6] == 0x80) + b[6] = 0x7f; +} + // Build response for 0x42 request Pad in port static void PADstartPoll_(PadDataS *pad) { switch (pad->controllerType) { @@ -699,6 +707,7 @@ static void PADstartPoll_(PadDataS *pad) { stdpar[5] = pad->rightJoyY; stdpar[6] = pad->leftJoyX; stdpar[7] = pad->leftJoyY; + adjust_analog(stdpar); memcpy(buf, stdpar, 8); respSize = 8; break; @@ -711,6 +720,7 @@ static void PADstartPoll_(PadDataS *pad) { stdpar[5] = pad->rightJoyY; stdpar[6] = pad->leftJoyX; stdpar[7] = pad->leftJoyY; + adjust_analog(stdpar); memcpy(buf, stdpar, 8); respSize = 8; break; From 76c06a1a8a2c359341a086c87c9a12c9926c440f Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 21 Oct 2023 23:33:34 +0300 Subject: [PATCH 421/597] update libpicofe --- configure | 4 ++-- frontend/libpicofe | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/configure b/configure index 420a07c5f..a72c059c1 100755 --- a/configure +++ b/configure @@ -66,7 +66,7 @@ optimize_cortexa8="no" optimize_arm926ej="no" # hardcoded stuff -if [ ${OSTYPE} = "FreeBSD" ]; then +if [ "${OSTYPE}" = "FreeBSD" ]; then CC="clang" CXX="clang++" CFLAGS="-I/usr/local/include -L/usr/local/lib" @@ -80,7 +80,7 @@ AR="${AS-${CROSS_COMPILE}ar}" MAIN_LDLIBS="$LDLIBS -ldl -lm -lpthread" config_mak="config.mak" -if [ ${OSTYPE} = "FreeBSD" ]; then +if [ "${OSTYPE}" = "FreeBSD" ]; then SYSROOT="$sysroot" [ "x${SDL_CONFIG}" = "x" ] && SDL_CONFIG="${SYSROOT}/usr/local/bin/sdl-config" else diff --git a/frontend/libpicofe b/frontend/libpicofe index b0ce6fa8b..740c6f25f 160000 --- a/frontend/libpicofe +++ b/frontend/libpicofe @@ -1 +1 @@ -Subproject commit b0ce6fa8bd3c171debd5589f3ee8a95e26b1d61b +Subproject commit 740c6f25f8240deeb732a0a999f2a57cc2f6f6d6 From f3746eea2d69d08948522600b99388618ec46f1b Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 22 Oct 2023 00:09:16 +0300 Subject: [PATCH 422/597] add a libcrypt warning gives a hint instead of silently not working --- frontend/libretro.c | 59 +++++++++++++++++----------- frontend/main.c | 7 +++- libpcsxcore/cdrom.c | 5 ++- libpcsxcore/database.c | 74 +++++++++++++++++++++++++++++++++++- libpcsxcore/database.h | 3 +- libpcsxcore/ppf.c | 9 +++-- libpcsxcore/ppf.h | 7 ++-- libpcsxcore/psxinterpreter.c | 8 +++- 8 files changed, 136 insertions(+), 36 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 8a2447aa9..a30a1ab2e 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -28,6 +28,7 @@ #include "../libpcsxcore/cheat.h" #include "../libpcsxcore/r3000a.h" #include "../libpcsxcore/gpu.h" +#include "../libpcsxcore/database.h" #include "../plugins/dfsound/out.h" #include "../plugins/dfsound/spu_config.h" #include "cspace.h" @@ -1502,6 +1503,32 @@ static void set_retro_memmap(void) #endif } +static void show_notification(const char *msg_str, + unsigned duration_ms, unsigned priority) +{ + if (msg_interface_version >= 1) + { + struct retro_message_ext msg = { + msg_str, + duration_ms, + 3, + RETRO_LOG_WARN, + RETRO_MESSAGE_TARGET_ALL, + RETRO_MESSAGE_TYPE_NOTIFICATION, + -1 + }; + environ_cb(RETRO_ENVIRONMENT_SET_MESSAGE_EXT, &msg); + } + else + { + struct retro_message msg = { + msg_str, + 180 + }; + environ_cb(RETRO_ENVIRONMENT_SET_MESSAGE, &msg); + } +} + static void retro_audio_buff_status_cb( bool active, unsigned occupancy, bool underrun_likely) { @@ -1781,6 +1808,9 @@ bool retro_load_game(const struct retro_game_info *info) set_retro_memmap(); retro_set_audio_buff_status_cb(); + if (check_unsatisfied_libcrypt()) + show_notification("LibCrypt protected game with missing SBI detected", 3000, 3); + return true; } @@ -3181,38 +3211,21 @@ static void loadPSXBios(void) if (!found_bios) { const char *msg_str; + unsigned duration; if (useHLE) { - msg_str = "BIOS set to \'hle\' in core options - real BIOS will be ignored"; + msg_str = "BIOS set to \'hle\'"; SysPrintf("Using HLE BIOS.\n"); + // shorter as the user probably intentionally wants to use HLE + duration = 700; } else { msg_str = "No PlayStation BIOS file found - add for better compatibility"; SysPrintf("No BIOS files found.\n"); + duration = 3000; } - - if (msg_interface_version >= 1) - { - struct retro_message_ext msg = { - msg_str, - 3000, - 3, - RETRO_LOG_WARN, - RETRO_MESSAGE_TARGET_ALL, - RETRO_MESSAGE_TYPE_NOTIFICATION, - -1 - }; - environ_cb(RETRO_ENVIRONMENT_SET_MESSAGE_EXT, &msg); - } - else - { - struct retro_message msg = { - msg_str, - 180 - }; - environ_cb(RETRO_ENVIRONMENT_SET_MESSAGE, &msg); - } + show_notification(msg_str, duration, 2); } } diff --git a/frontend/main.c b/frontend/main.c index 05e4d55e5..18ca6e57c 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -24,6 +24,7 @@ #include "../libpcsxcore/misc.h" #include "../libpcsxcore/cheat.h" #include "../libpcsxcore/sio.h" +#include "../libpcsxcore/database.h" #include "../libpcsxcore/new_dynarec/new_dynarec.h" #include "../plugins/cdrcimg/cdrcimg.h" #include "../plugins/dfsound/spu_config.h" @@ -413,7 +414,11 @@ void emu_on_new_cd(int show_hud_msg) } if (show_hud_msg) { - snprintf(hud_msg, sizeof(hud_msg), BOOT_MSG); + if (check_unsatisfied_libcrypt()) + snprintf(hud_msg, sizeof(hud_msg), + "LibCrypt protected game with missing SBI detected"); + else + snprintf(hud_msg, sizeof(hud_msg), BOOT_MSG); hud_new_msg = 3; } } diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 2ce7fe97b..bc973e904 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -462,12 +462,13 @@ static int ReadTrack(const u8 *time) static void UpdateSubq(const u8 *time) { const struct SubQ *subq; + int s = MSF2SECT(time[0], time[1], time[2]); u16 crc; - if (CheckSBI(time)) + if (CheckSBI(s)) return; - subq = (struct SubQ *)CDR_getBufferSub(MSF2SECT(time[0], time[1], time[2])); + subq = (struct SubQ *)CDR_getBufferSub(s); if (subq != NULL && cdr.CurTrack == 1) { crc = calcCrc((u8 *)subq + 12, 10); if (crc == (((u16)subq->CRC[0] << 8) | subq->CRC[1])) { diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 568bba276..2d9a2ea04 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -1,5 +1,6 @@ #include "misc.h" #include "sio.h" +#include "ppf.h" #include "new_dynarec/new_dynarec.h" /* It's duplicated from emu_if.c */ @@ -92,7 +93,7 @@ cycle_multiplier_overrides[] = }; /* Function for automatic patching according to GameID. */ -void Apply_Hacks_Cdrom() +void Apply_Hacks_Cdrom(void) { size_t i, j; @@ -139,3 +140,74 @@ void Apply_Hacks_Cdrom() } } } + +// from duckstation's gamedb.json +static const u16 libcrypt_ids[] = { + 17, 311, 995, 1041, 1226, 1241, 1301, 1362, 1431, 1444, + 1492, 1493, 1494, 1495, 1516, 1517, 1518, 1519, 1545, 1564, + 1695, 1700, 1701, 1702, 1703, 1704, 1715, 1733, 1763, 1882, + 1906, 1907, 1909, 1943, 1979, 2004, 2005, 2006, 2007, 2024, + 2025, 2026, 2027, 2028, 2029, 2030, 2031, 2061, 2071, 2080, + 2081, 2082, 2083, 2084, 2086, 2104, 2105, 2112, 2113, 2118, + 2181, 2182, 2184, 2185, 2207, 2208, 2209, 2210, 2211, 2222, + 2264, 2290, 2292, 2293, 2328, 2329, 2330, 2354, 2355, 2365, + 2366, 2367, 2368, 2369, 2395, 2396, 2402, 2430, 2431, 2432, + 2433, 2487, 2488, 2489, 2490, 2491, 2529, 2530, 2531, 2532, + 2533, 2538, 2544, 2545, 2546, 2558, 2559, 2560, 2561, 2562, + 2563, 2572, 2573, 2681, 2688, 2689, 2698, 2700, 2704, 2705, + 2706, 2707, 2708, 2722, 2723, 2724, 2733, 2754, 2755, 2756, + 2763, 2766, 2767, 2768, 2769, 2824, 2830, 2831, 2834, 2835, + 2839, 2857, 2858, 2859, 2860, 2861, 2862, 2965, 2966, 2967, + 2968, 2969, 2975, 2976, 2977, 2978, 2979, 3061, 3062, 3189, + 3190, 3191, 3241, 3242, 3243, 3244, 3245, 3324, 3489, 3519, + 3520, 3521, 3522, 3523, 3530, 3603, 3604, 3605, 3606, 3607, + 3626, 3648, 12080, 12081, 12082, 12083, 12084, 12328, 12329, 12330, + 12558, 12559, 12560, 12561, 12562, 12965, 12966, 12967, 12968, 12969, + 22080, 22081, 22082, 22083, 22084, 22328, 22329, 22330, 22965, 22966, + 22967, 22968, 22969, 32080, 32081, 32082, 32083, 32084, 32965, 32966, + 32967, 32968, 32969 +}; + +// as documented by nocash +static const u16 libcrypt_sectors[16] = { + 14105, 14231, 14485, 14579, 14649, 14899, 15056, 15130, + 15242, 15312, 15378, 15628, 15919, 16031, 16101, 16167 +}; + +int check_unsatisfied_libcrypt(void) +{ + const char *p = CdromId + 4; + u16 id, key = 0; + size_t i; + + if (strncmp(CdromId, "SCE", 3) && strncmp(CdromId, "SLE", 3)) + return 0; + while (*p == '0') + p++; + id = (u16)atoi(p); + for (i = 0; i < ARRAY_SIZE(libcrypt_ids); i++) + if (id == libcrypt_ids[i]) + break; + if (i == ARRAY_SIZE(libcrypt_ids)) + return 0; + + // detected a protected game + if (!CDR_getBufferSub(libcrypt_sectors[0]) && !sbi_sectors) { + SysPrintf("==================================================\n"); + SysPrintf("LibCrypt game detected with missing SBI/subchannel\n"); + SysPrintf("==================================================\n"); + return 1; + } + + if (sbi_sectors) { + // calculate key just for fun (we don't really need it) + for (i = 0; i < 16; i++) + if (CheckSBI(libcrypt_sectors[i] - 2*75)) + key |= 1u << (15 - i); + } + if (key) + SysPrintf("%s, possible key=%04X\n", "LibCrypt detected", key); + else + SysPrintf("%s\n", "LibCrypt detected"); + return 0; +} diff --git a/libpcsxcore/database.h b/libpcsxcore/database.h index fbb564dd7..1ec8875e5 100644 --- a/libpcsxcore/database.h +++ b/libpcsxcore/database.h @@ -1,6 +1,7 @@ #ifndef DATABASE_H #define DATABASE_H -extern void Apply_Hacks_Cdrom(); +void Apply_Hacks_Cdrom(void); +int check_unsatisfied_libcrypt(void); #endif diff --git a/libpcsxcore/ppf.c b/libpcsxcore/ppf.c index a7f6aefd4..f37687cc0 100644 --- a/libpcsxcore/ppf.c +++ b/libpcsxcore/ppf.c @@ -357,6 +357,7 @@ void BuildPPFCache() { // redump.org SBI files, slightly different handling from PCSX-Reloaded unsigned char *sbi_sectors; +int sbi_len; int LoadSBI(const char *fname, int sector_count) { int good_sectors = 0; @@ -370,7 +371,8 @@ int LoadSBI(const char *fname, int sector_count) { if (sbihandle == NULL) return -1; - sbi_sectors = calloc(1, sector_count / 8); + sbi_len = (sector_count + 7) / 8; + sbi_sectors = calloc(1, sbi_len); if (sbi_sectors == NULL) goto end; @@ -414,15 +416,13 @@ int LoadSBI(const char *fname, int sector_count) { break; } - fclose(sbihandle); - return 0; - end: if (!clean_eof) SysPrintf(_("SBI: parse failure at 0x%lx\n"), ftell(sbihandle)); if (!good_sectors) { free(sbi_sectors); sbi_sectors = NULL; + sbi_len = 0; } fclose(sbihandle); return sbi_sectors ? 0 : -1; @@ -432,5 +432,6 @@ void UnloadSBI(void) { if (sbi_sectors) { free(sbi_sectors); sbi_sectors = NULL; + sbi_len = 0; } } diff --git a/libpcsxcore/ppf.h b/libpcsxcore/ppf.h index fb0a3777b..a1b147510 100644 --- a/libpcsxcore/ppf.h +++ b/libpcsxcore/ppf.h @@ -31,16 +31,17 @@ int LoadSBI(const char *fname, int sector_count); void UnloadSBI(void); extern unsigned char *sbi_sectors; +extern int sbi_len; #include "cdrom.h" -static inline int CheckSBI(const u8 *t) +static inline int CheckSBI(int s) { - int s; if (sbi_sectors == NULL) return 0; + if ((unsigned int)(s >> 3) >= (unsigned int)sbi_len) + return 0; - s = MSF2SECT(t[0], t[1], t[2]); return (sbi_sectors[s >> 3] >> (s & 7)) & 1; } diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index f6ff2e8b2..5f6971df7 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -958,8 +958,14 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) { case 7: if ((regs_->CP0.n.DCIC ^ val) & 0xff800000) log_unhandled("DCIC: %08x->%08x\n", regs_->CP0.n.DCIC, val); - // fallthrough + goto default_; + case 3: + if (regs_->CP0.n.BPC != val) + log_unhandled("BPC: %08x->%08x\n", regs_->CP0.n.BPC, val); + goto default_; + default: + default_: regs_->CP0.r[reg] = val; break; } From 2da2fc7676c1fc40d26226a7a4c43728d9a2eedf Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 23 Oct 2023 21:00:35 +0300 Subject: [PATCH 423/597] gpu_neon: rework buffering to reduce flickering ... maybe notaz/pcsx_rearmed#324 --- frontend/plat_sdl.c | 7 + frontend/plugin_lib.c | 18 +- frontend/plugin_lib.h | 1 + plugins/dfxvideo/gpulib_if.c | 2 +- plugins/gpu-gles/gpulib_if.c | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu.c | 5 + plugins/gpu_neon/psx_gpu/psx_gpu.h | 16 +- plugins/gpu_neon/psx_gpu/psx_gpu_4x.c | 22 +- plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h | 31 +-- .../gpu_neon/psx_gpu/psx_gpu_offsets_update.c | 46 ++-- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 228 ++++++++++-------- plugins/gpu_neon/psx_gpu_if.c | 112 +++------ plugins/gpu_senquack/gpulib_if.cpp | 2 +- plugins/gpu_unai/gpulib_if.cpp | 2 +- plugins/gpulib/gpu.c | 22 +- plugins/gpulib/gpu.h | 2 +- plugins/gpulib/vout_pl.c | 2 + 17 files changed, 252 insertions(+), 268 deletions(-) diff --git a/frontend/plat_sdl.c b/frontend/plat_sdl.c index 5f29b90c8..c5570253e 100644 --- a/frontend/plat_sdl.c +++ b/frontend/plat_sdl.c @@ -328,6 +328,13 @@ void plat_video_menu_end(void) void plat_video_menu_leave(void) { + void *fb = NULL; + if (plat_sdl_overlay != NULL || plat_sdl_gl_active) + fb = shadow_fb; + else if (plat_sdl_screen) + fb = plat_sdl_screen->pixels; + if (fb) + memset(fb, 0, g_menuscreen_w * g_menuscreen_h * 2); in_menu = 0; } diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 2339028e7..50aba227c 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -134,7 +134,7 @@ static __attribute__((noinline)) void draw_active_chans(int vout_w, int vout_h) static const unsigned short colors[2] = { 0x1fe3, 0x0700 }; unsigned short *dest = (unsigned short *)pl_vout_buf + - vout_w * (vout_h - HUD_HEIGHT) + vout_w / 2 - 192/2; + pl_vout_w * (vout_h - HUD_HEIGHT) + pl_vout_w / 2 - 192/2; unsigned short *d, p; int c, x, y; @@ -149,7 +149,7 @@ static __attribute__((noinline)) void draw_active_chans(int vout_w, int vout_h) (fmod_chans & (1< 0) { + if (flip_clear_counter > 0) { if (pl_plat_clear) pl_plat_clear(); else memset(pl_vout_buf, 0, dstride * h_full * pl_vout_bpp / 8); - clear_counter--; + flip_clear_counter--; } if (pl_plat_blit) diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index efd7d1e2b..97d44f258 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -45,6 +45,7 @@ void pl_start_watchdog(void); void *pl_prepare_screenshot(int *w, int *h, int *bpp); void pl_init(void); void pl_switch_dispmode(void); +void pl_force_clear(void); void pl_timing_prepare(int is_pal); void pl_frame_limit(void); diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index 978e7d84c..d08ca67e2 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -299,7 +299,7 @@ void renderer_notify_res_change(void) { } -void renderer_notify_scanout_x_change(int x, int w) +void renderer_notify_scanout_change(int x, int y) { } diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c index 923f652e3..a3a0c43b1 100644 --- a/plugins/gpu-gles/gpulib_if.c +++ b/plugins/gpu-gles/gpulib_if.c @@ -514,7 +514,7 @@ void renderer_notify_res_change(void) { } -void renderer_notify_scanout_x_change(int x, int w) +void renderer_notify_scanout_change(int x, int y) { } diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index ea3641f83..62080f3f8 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -23,6 +24,7 @@ #include "vector_ops.h" #endif #include "psx_gpu_simd.h" +#include "psx_gpu_offsets.h" #if 0 void dump_r_d(const char *name, void *dump); @@ -5012,6 +5014,9 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN; psx_gpu->saved_hres = 256; + + // check some offset + psx_gpu->reserved_a[(offsetof(psx_gpu_struct, blocks) == psx_gpu_blocks_offset) - 1] = 0; } u64 get_us(void) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 4eb622dfb..da9e34266 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -141,8 +141,6 @@ typedef struct u32 triangle_color; u32 dither_table[4]; - u32 uvrgb_phase; - struct render_block_handler_struct *render_block_handler; void *texture_page_ptr; void *texture_page_base; @@ -150,6 +148,8 @@ typedef struct u16 *vram_ptr; u16 *vram_out_ptr; + u32 uvrgb_phase; + u16 render_state_base; u16 render_state; @@ -194,15 +194,15 @@ typedef struct s16 saved_viewport_start_y; s16 saved_viewport_end_x; s16 saved_viewport_end_y; - u8 enhancement_buf_by_x16[64]; // 0-3 specifying which buf - u16 enhancement_buf_start[4]; // x pos where buf[n] begins - - u16 enhancement_scanout_x[4]; - u16 enhancement_scanout_select; + struct psx_gpu_scanout { + u16 x, y, w, h; + } enhancement_scanouts[4]; // 0-3 specifying which buf to use + u16 enhancement_scanout_eselect; // eviction selector + u16 enhancement_current_buf; // Align up to 64 byte boundary to keep the upcoming buffers cache line // aligned, also make reachable with single immediate addition - u8 reserved_a[142]; + u8 reserved_a[188 + 9*4 - 9*sizeof(void *)]; // 8KB block_struct blocks[MAX_BLOCKS_PER_ROW]; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c index bd6c7a1f7..7b3ee85a1 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c @@ -1,11 +1,3 @@ -#define select_enhancement_buf_index(psx_gpu, x) \ - ((psx_gpu)->enhancement_buf_by_x16[(u32)(x) / \ - (1024u / sizeof((psx_gpu)->enhancement_buf_by_x16))]) - -#define select_enhancement_buf_ptr(psx_gpu, x) \ - ((psx_gpu)->enhancement_buf_ptr + \ - (select_enhancement_buf_index(psx_gpu, x) << 20)) - #if !defined(NEON_BUILD) || defined(SIMD_BUILD) #ifndef zip_4x32b @@ -325,12 +317,12 @@ render_block_handler_struct render_sprite_block_handlers_4x[] = render_sprite_blocks_switch_block_4x() }; - void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, s32 width, s32 height, u32 flags, u32 color) { s32 x_right = x + width - 1; s32 y_bottom = y + height - 1; + s16 end_x; #ifdef PROFILE sprites++; @@ -352,8 +344,12 @@ void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, height -= clip; } - if(x_right > psx_gpu->viewport_end_x) - width -= x_right - psx_gpu->viewport_end_x; + end_x = psx_gpu->viewport_end_x; + if (end_x - psx_gpu->viewport_start_x + 1 > 512) + end_x = psx_gpu->viewport_start_x + 511; + + if(x_right > end_x) + width -= x_right - end_x; if(y_bottom > psx_gpu->viewport_end_y) height -= y_bottom - psx_gpu->viewport_end_y; @@ -361,7 +357,9 @@ void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, if((width <= 0) || (height <= 0)) return; - psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x); + if (!psx_gpu->enhancement_current_buf_ptr) + return; + psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; x *= 2; y *= 2; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h index 161384e90..2f8a64635 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h @@ -1,7 +1,6 @@ #ifndef __P_PSX_GPU_OFFSETS_H__ #define __P_PSX_GPU_OFFSETS_H__ -#define psx_gpu_test_mask_offset 0x0 #define psx_gpu_uvrg_offset 0x10 #define psx_gpu_uvrg_dx_offset 0x20 #define psx_gpu_uvrg_dy_offset 0x30 @@ -13,23 +12,18 @@ #define psx_gpu_b_offset 0x90 #define psx_gpu_b_dy_offset 0x94 #define psx_gpu_triangle_area_offset 0x98 -#define psx_gpu_texture_window_settings_offset 0x9c #define psx_gpu_current_texture_mask_offset 0xa0 -#define psx_gpu_viewport_mask_offset 0xa4 #define psx_gpu_dirty_textures_4bpp_mask_offset 0xa8 #define psx_gpu_dirty_textures_8bpp_mask_offset 0xac #define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 0xb0 #define psx_gpu_triangle_color_offset 0xb4 #define psx_gpu_dither_table_offset 0xb8 -#define psx_gpu_uvrgb_phase_offset 0xc8 -#define psx_gpu_render_block_handler_offset 0xcc -#define psx_gpu_texture_page_ptr_offset 0xd0 -#define psx_gpu_texture_page_base_offset 0xd4 -#define psx_gpu_clut_ptr_offset 0xd8 -#define psx_gpu_vram_ptr_offset 0xdc -#define psx_gpu_vram_out_ptr_offset 0xe0 -#define psx_gpu_render_state_base_offset 0xe4 -#define psx_gpu_render_state_offset 0xe6 +#define psx_gpu_texture_page_ptr_offset 0xcc +#define psx_gpu_texture_page_base_offset 0xd0 +#define psx_gpu_clut_ptr_offset 0xd4 +#define psx_gpu_vram_ptr_offset 0xd8 +#define psx_gpu_vram_out_ptr_offset 0xdc +#define psx_gpu_uvrgb_phase_offset 0xe0 #define psx_gpu_num_spans_offset 0xe8 #define psx_gpu_num_blocks_offset 0xea #define psx_gpu_viewport_start_x_offset 0xec @@ -38,26 +32,13 @@ #define psx_gpu_viewport_end_y_offset 0xf2 #define psx_gpu_mask_msb_offset 0xf4 #define psx_gpu_triangle_winding_offset 0xf6 -#define psx_gpu_display_area_draw_enable_offset 0xf7 #define psx_gpu_current_texture_page_offset 0xf8 -#define psx_gpu_last_8bpp_texture_page_offset 0xf9 #define psx_gpu_texture_mask_width_offset 0xfa #define psx_gpu_texture_mask_height_offset 0xfb -#define psx_gpu_texture_window_x_offset 0xfc -#define psx_gpu_texture_window_y_offset 0xfd -#define psx_gpu_primitive_type_offset 0xfe -#define psx_gpu_render_mode_offset 0xff -#define psx_gpu_offset_x_offset 0x100 -#define psx_gpu_offset_y_offset 0x102 -#define psx_gpu_clut_settings_offset 0x104 -#define psx_gpu_texture_settings_offset 0x106 #define psx_gpu_reciprocal_table_ptr_offset 0x108 #define psx_gpu_blocks_offset 0x200 #define psx_gpu_span_uvrg_offset_offset 0x2200 #define psx_gpu_span_edge_data_offset 0x4200 #define psx_gpu_span_b_offset_offset 0x5200 -#define psx_gpu_texture_4bpp_cache_offset 0x5a00 -#define psx_gpu_texture_8bpp_even_cache_offset 0x205a00 -#define psx_gpu_texture_8bpp_odd_cache_offset 0x305a00 #endif /* __P_PSX_GPU_OFFSETS_H__ */ diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c index b1de121ee..9b3784827 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c @@ -4,7 +4,7 @@ #include "common.h" #define WRITE_OFFSET(f, member) \ - fprintf(f, "#define %-50s0x%x\n", \ + fprintf(f, "#define %-50s0x%zx\n", \ "psx_gpu_" #member "_offset", \ offsetof(psx_gpu_struct, member)); @@ -22,8 +22,10 @@ int main() perror("fopen"); return 1; } + fputs("#ifndef __P_PSX_GPU_OFFSETS_H__\n", f); + fputs("#define __P_PSX_GPU_OFFSETS_H__\n\n", f); - WRITE_OFFSET(f, test_mask); + //WRITE_OFFSET(f, test_mask); WRITE_OFFSET(f, uvrg); WRITE_OFFSET(f, uvrg_dx); WRITE_OFFSET(f, uvrg_dy); @@ -35,23 +37,23 @@ int main() WRITE_OFFSET(f, b); WRITE_OFFSET(f, b_dy); WRITE_OFFSET(f, triangle_area); - WRITE_OFFSET(f, texture_window_settings); + //WRITE_OFFSET(f, texture_window_settings); WRITE_OFFSET(f, current_texture_mask); - WRITE_OFFSET(f, viewport_mask); + //WRITE_OFFSET(f, viewport_mask); WRITE_OFFSET(f, dirty_textures_4bpp_mask); WRITE_OFFSET(f, dirty_textures_8bpp_mask); WRITE_OFFSET(f, dirty_textures_8bpp_alternate_mask); WRITE_OFFSET(f, triangle_color); WRITE_OFFSET(f, dither_table); - WRITE_OFFSET(f, uvrgb_phase); - WRITE_OFFSET(f, render_block_handler); + //WRITE_OFFSET(f, render_block_handler); WRITE_OFFSET(f, texture_page_ptr); WRITE_OFFSET(f, texture_page_base); WRITE_OFFSET(f, clut_ptr); WRITE_OFFSET(f, vram_ptr); WRITE_OFFSET(f, vram_out_ptr); - WRITE_OFFSET(f, render_state_base); - WRITE_OFFSET(f, render_state); + WRITE_OFFSET(f, uvrgb_phase); + //WRITE_OFFSET(f, render_state_base); + //WRITE_OFFSET(f, render_state); WRITE_OFFSET(f, num_spans); WRITE_OFFSET(f, num_blocks); WRITE_OFFSET(f, viewport_start_x); @@ -60,27 +62,29 @@ int main() WRITE_OFFSET(f, viewport_end_y); WRITE_OFFSET(f, mask_msb); WRITE_OFFSET(f, triangle_winding); - WRITE_OFFSET(f, display_area_draw_enable); + //WRITE_OFFSET(f, display_area_draw_enable); WRITE_OFFSET(f, current_texture_page); - WRITE_OFFSET(f, last_8bpp_texture_page); + //WRITE_OFFSET(f, last_8bpp_texture_page); WRITE_OFFSET(f, texture_mask_width); WRITE_OFFSET(f, texture_mask_height); - WRITE_OFFSET(f, texture_window_x); - WRITE_OFFSET(f, texture_window_y); - WRITE_OFFSET(f, primitive_type); - WRITE_OFFSET(f, render_mode); - WRITE_OFFSET(f, offset_x); - WRITE_OFFSET(f, offset_y); - WRITE_OFFSET(f, clut_settings); - WRITE_OFFSET(f, texture_settings); + //WRITE_OFFSET(f, texture_window_x); + //WRITE_OFFSET(f, texture_window_y); + //WRITE_OFFSET(f, primitive_type); + //WRITE_OFFSET(f, render_mode); + //WRITE_OFFSET(f, offset_x); + //WRITE_OFFSET(f, offset_y); + //WRITE_OFFSET(f, clut_settings); + //WRITE_OFFSET(f, texture_settings); WRITE_OFFSET(f, reciprocal_table_ptr); WRITE_OFFSET(f, blocks); WRITE_OFFSET(f, span_uvrg_offset); WRITE_OFFSET(f, span_edge_data); WRITE_OFFSET(f, span_b_offset); - WRITE_OFFSET(f, texture_4bpp_cache); - WRITE_OFFSET(f, texture_8bpp_even_cache); - WRITE_OFFSET(f, texture_8bpp_odd_cache); + //WRITE_OFFSET(f, texture_4bpp_cache); + //WRITE_OFFSET(f, texture_8bpp_even_cache); + //WRITE_OFFSET(f, texture_8bpp_odd_cache); + + fputs("\n#endif /* __P_PSX_GPU_OFFSETS_H__ */\n", f); fclose(f); return 0; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 5badf6b9a..de227d5b9 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -788,78 +788,111 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) #ifdef PCSX -#define ENH_BUF_TABLE_STEP (1024 / sizeof(psx_gpu->enhancement_buf_by_x16)) - -static int is_new_scanout(psx_gpu_struct *psx_gpu, int x) +// this thing has become such a PITA, should just handle the 2048 width really +static void update_enhancement_buf_scanouts(psx_gpu_struct *psx_gpu, + int x, int y, int w, int h) { - int i, scanout_x; - for (i = 0; i < ARRAY_SIZE(psx_gpu->enhancement_scanout_x); i++) - { - scanout_x = psx_gpu->enhancement_scanout_x[i]; - if (x <= scanout_x && scanout_x < x + ENH_BUF_TABLE_STEP) - { - if (x != scanout_x) - log_anomaly("unaligned scanout x: %d,%d\n", scanout_x, x); - return 1; - } + int max_bufs = ARRAY_SIZE(psx_gpu->enhancement_scanouts); + struct psx_gpu_scanout *s; + int i, sel, right, bottom; + u32 tol_x = 48, tol_y = 16; + u32 intersection; + + //w = (w + 15) & ~15; + psx_gpu->saved_hres = w; + assert(!(max_bufs & (max_bufs - 1))); + for (i = 0; i < max_bufs; i++) { + s = &psx_gpu->enhancement_scanouts[i]; + if (s->x == x && s->y == y && w - s->w <= tol_x && h - s->h <= tol_y) + return; } - return 0; -} - -static void update_enhancement_buf_table_from_hres(psx_gpu_struct *psx_gpu) -{ - u32 b, x; - b = 0; - psx_gpu->enhancement_buf_by_x16[0] = b; - psx_gpu->enhancement_buf_start[0] = 0; - for (x = 1; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) - { - if (b < 3 && is_new_scanout(psx_gpu, x * ENH_BUF_TABLE_STEP)) { - b++; - psx_gpu->enhancement_buf_start[b] = x * ENH_BUF_TABLE_STEP; + // evict any scanout that intersects + right = x + w; + bottom = y + h; + for (i = 0, sel = -1; i < max_bufs; i++) { + s = &psx_gpu->enhancement_scanouts[i]; + if (s->x >= right) continue; + if (s->x + s->w <= x) continue; + if (s->y >= bottom) continue; + if (s->y + s->h <= y) continue; + // ... but allow upto 16 pixels intersection that some games do + if ((intersection = s->x + s->w - x) - 1u <= tol_x) { + s->w -= intersection; + continue; } - - psx_gpu->enhancement_buf_by_x16[x] = b; + if ((intersection = s->y + s->h - y) - 1u <= tol_y) { + s->h -= intersection; + continue; + } + //printf("%4d%4d%4dx%d evicted\n", s->x, s->y, s->w, s->h); + s->w = 0; + sel = i; + break; } -#if 0 - printf("buf_by_x16:\n"); - for (b = 0; b < 3; b++) { - int first = -1, count = 0; - for (x = 0; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) { - if (psx_gpu->enhancement_buf_by_x16[x] == b) { - if (first < 0) first = x; - count++; + if (sel >= 0) { + // 2nd intersection check + for (i = 0; i < max_bufs; i++) { + s = &psx_gpu->enhancement_scanouts[i]; + if (!s->w) + continue; + if ((intersection = right - s->x) - 1u <= tol_x) { + w -= intersection; + break; + } + if ((intersection = bottom - s->y) - 1u <= tol_y) { + h -= intersection; + break; } - } - if (count) { - assert(first * ENH_BUF_TABLE_STEP == psx_gpu->enhancement_buf_start[b]); - printf("%d: %3zd-%zd\n", b, first * ENH_BUF_TABLE_STEP, - (first + count) * ENH_BUF_TABLE_STEP); } } + else + sel = psx_gpu->enhancement_scanout_eselect++; + psx_gpu->enhancement_scanout_eselect &= max_bufs - 1; + s = &psx_gpu->enhancement_scanouts[sel]; + s->x = x; + s->y = y; + s->w = w; + s->h = h; + + sync_enhancement_buffers(x, y, w, h); +#if 0 + printf("scanouts:\n"); + for (i = 0; i < ARRAY_SIZE(psx_gpu->enhancement_scanouts); i++) { + s = &psx_gpu->enhancement_scanouts[i]; + if (s->w) + printf("%4d%4d%4dx%d\n", s->x, s->y, s->w, s->h); + } #endif } -static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu, - u32 x0, u32 len) +static int select_enhancement_buf_index(psx_gpu_struct *psx_gpu, s32 x, s32 y) { -#if 0 - u32 x, b; + int i; + for (i = 0; i < ARRAY_SIZE(psx_gpu->enhancement_scanouts); i++) { + const struct psx_gpu_scanout *s = &psx_gpu->enhancement_scanouts[i]; + if (s->x <= x && x < s->x + s->w && + s->y <= y && y < s->y + s->h) + return i; + } + return -1; +} - for (x = x0, b = 0; x >= len; b++) - x -= len; - if (b > 3) - b = 3; +#define select_enhancement_buf_by_index(psx_gpu_, i_) \ + ((psx_gpu_)->enhancement_buf_ptr + ((i_) << 20)) - memset(psx_gpu->enhancement_buf_by_x16 + x0 / ENH_BUF_TABLE_STEP, - b, (len + ENH_BUF_TABLE_STEP - 1) / ENH_BUF_TABLE_STEP); -#endif +static void *select_enhancement_buf_ptr(psx_gpu_struct *psx_gpu, s32 x, s32 y) +{ + int i = select_enhancement_buf_index(psx_gpu, x, y); + return i >= 0 ? select_enhancement_buf_by_index(psx_gpu, i) : NULL; } -#define select_enhancement_buf(psx_gpu) \ - psx_gpu->enhancement_current_buf_ptr = \ - select_enhancement_buf_ptr(psx_gpu, psx_gpu->saved_viewport_start_x) +static void select_enhancement_buf(psx_gpu_struct *psx_gpu) +{ + s32 x = psx_gpu->saved_viewport_start_x; + s32 y = psx_gpu->saved_viewport_start_y; + psx_gpu->enhancement_current_buf_ptr = select_enhancement_buf_ptr(psx_gpu, x, y); +} #define enhancement_disable() { \ psx_gpu->vram_out_ptr = psx_gpu->vram_ptr; \ @@ -870,13 +903,19 @@ static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu, psx_gpu->uvrgb_phase = 0x8000; \ } -#define enhancement_enable() { \ - psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; \ - psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x * 2; \ - psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y * 2; \ - psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2 + 1; \ - psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2 + 1; \ - psx_gpu->uvrgb_phase = 0x7fff; \ +static int enhancement_enable(psx_gpu_struct *psx_gpu) +{ + if (!psx_gpu->enhancement_current_buf_ptr) + return 0; + psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; + psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x * 2; + psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y * 2; + psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2 + 1; + psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2 + 1; + if (psx_gpu->viewport_end_x - psx_gpu->viewport_start_x + 1 > 1024) + psx_gpu->viewport_end_x = psx_gpu->viewport_start_x + 1023; + psx_gpu->uvrgb_phase = 0x7fff; + return 1; } #define shift_vertices3(v) { \ @@ -971,17 +1010,10 @@ void scale2x_tiles8(void *dst, const void *src, int w8, int h) static int disable_main_render; -static int check_enhanced_range(psx_gpu_struct *psx_gpu, int x, int x_end) +// simple check for a case where no clipping is used +// - now handled by adjusting the viewport +static int check_enhanced_range(psx_gpu_struct *psx_gpu, int x, int y) { - // reject to avoid oveflowing the 1024 width - // (assume some offscreen render-to-texture thing) - int fb_index; - if (x < 0) - return 1; - fb_index = select_enhancement_buf_index(psx_gpu, x); - if (x >= psx_gpu->enhancement_buf_start[fb_index] + 512) - return 0; - return 1; } @@ -1067,7 +1099,9 @@ static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, if (!check_enhanced_range(psx_gpu, vertex_ptrs[0]->x, vertex_ptrs[2]->x)) return; - enhancement_enable(); + if (!enhancement_enable(psx_gpu)) + return; + shift_vertices3(vertex_ptrs); shift_triangle_area(); render_triangle_p(psx_gpu, vertex_ptrs, current_command); @@ -1198,7 +1232,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 width = list_s16[4] & 0x3FF; u32 height = list_s16[5] & 0x1FF; u32 color = list[0] & 0xFFFFFF; - u32 i1, i2; + s32 i1, i2; x &= ~0xF; width = ((width + 0xF) & ~0xF); @@ -1207,16 +1241,14 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, do_fill(psx_gpu, x, y, width, height, color); - i1 = select_enhancement_buf_index(psx_gpu, x); - i2 = select_enhancement_buf_index(psx_gpu, x + width - 1); - if (i1 != i2) { + i1 = select_enhancement_buf_index(psx_gpu, x, y); + i2 = select_enhancement_buf_index(psx_gpu, x + width - 1, y + height - 1); + if (i1 < 0 || i1 != i2) { sync_enhancement_buffers(x, y, width, height); break; } - if (x >= psx_gpu->enhancement_buf_start[i1] + psx_gpu->saved_hres) - break; - psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x); + psx_gpu->vram_out_ptr = select_enhancement_buf_by_index(psx_gpu, i1); x *= 2; y *= 2; width *= 2; @@ -1346,8 +1378,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, vertexes[1].y = list_s16[5] + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, list[0], 0); - enhancement_enable(); - render_line(psx_gpu, vertexes, current_command, list[0], 1); + if (enhancement_enable(psx_gpu)) + render_line(psx_gpu, vertexes, current_command, list[0], 1); break; } @@ -1370,8 +1402,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, enhancement_disable(); render_line(psx_gpu, vertexes, current_command, list[0], 0); - enhancement_enable(); - render_line(psx_gpu, vertexes, current_command, list[0], 1); + if (enhancement_enable(psx_gpu)) + render_line(psx_gpu, vertexes, current_command, list[0], 1); list_position++; num_vertexes++; @@ -1406,8 +1438,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, vertexes[1].y = list_s16[7] + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, 0, 0); - enhancement_enable(); - render_line(psx_gpu, vertexes, current_command, 0, 1); + if (enhancement_enable(psx_gpu)) + render_line(psx_gpu, vertexes, current_command, 0, 1); break; } @@ -1439,8 +1471,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, enhancement_disable(); render_line(psx_gpu, vertexes, current_command, 0, 0); - enhancement_enable(); - render_line(psx_gpu, vertexes, current_command, 0, 1); + if (enhancement_enable(psx_gpu)) + render_line(psx_gpu, vertexes, current_command, 0, 1); list_position += 2; num_vertexes++; @@ -1632,8 +1664,6 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s16 viewport_start_x = list[0] & 0x3FF; s16 viewport_start_y = (list[0] >> 10) & 0x1FF; - u32 w; - s32 d; if(viewport_start_x == psx_gpu->viewport_start_x && viewport_start_y == psx_gpu->viewport_start_y) @@ -1645,13 +1675,6 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, psx_gpu->saved_viewport_start_x = viewport_start_x; psx_gpu->saved_viewport_start_y = viewport_start_y; - w = (u32)psx_gpu->viewport_end_x - (u32)viewport_start_x + 1; - d = psx_gpu->saved_hres - w; - if(-16 <= d && d <= 16) - { - update_enhancement_buf_table_from_x(psx_gpu, - viewport_start_x, w); - } select_enhancement_buf(psx_gpu); #ifdef TEXTURE_CACHE_4BPP @@ -1668,8 +1691,6 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s16 viewport_end_x = list[0] & 0x3FF; s16 viewport_end_y = (list[0] >> 10) & 0x1FF; - u32 w; - s32 d; if(viewport_end_x == psx_gpu->viewport_end_x && viewport_end_y == psx_gpu->viewport_end_y) @@ -1682,13 +1703,6 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, psx_gpu->saved_viewport_end_x = viewport_end_x; psx_gpu->saved_viewport_end_y = viewport_end_y; - w = (u32)viewport_end_x - (u32)psx_gpu->viewport_start_x + 1; - d = psx_gpu->saved_hres - w; - if(-16 <= d && d <= 16) - { - update_enhancement_buf_table_from_x(psx_gpu, - psx_gpu->viewport_start_x, w); - } select_enhancement_buf(psx_gpu); #ifdef TEXTURE_CACHE_4BPP diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index 4a8b76fcc..04a15eb2e 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -13,6 +13,12 @@ #include #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif +#ifndef max +#define max(a, b) ((a) > (b) ? (a) : (b)) +#endif extern const unsigned char cmd_lengths[256]; #define command_lengths cmd_lengths @@ -61,7 +67,9 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd) static void *get_enhancement_bufer(int *x, int *y, int *w, int *h, int *vram_h) { - uint16_t *ret = select_enhancement_buf_ptr(&egpu, *x); + uint16_t *ret = select_enhancement_buf_ptr(&egpu, *x, *y); + if (ret == NULL) + return NULL; *x *= 2; *y *= 2; @@ -115,44 +123,28 @@ void renderer_finish(void) static __attribute__((noinline)) void sync_enhancement_buffers(int x, int y, int w, int h) { - const int step_x = 1024 / sizeof(egpu.enhancement_buf_by_x16); - int hres = egpu.saved_hres; - int x_buf, w1, s, fb_index; - u16 *src, *dst; - - if (egpu.enhancement_buf_ptr == NULL) - return; - - w += x & (step_x - 1); - x &= ~(step_x - 1); - w = (w + step_x - 1) & ~(step_x - 1); - if (y + h > 512) - h = 512 - y; - - // find x_buf which is an offset into this enhancement_buf - fb_index = egpu.enhancement_buf_by_x16[x / step_x]; - x_buf = x - egpu.enhancement_buf_start[fb_index]; - - while (w > 0) { - fb_index = egpu.enhancement_buf_by_x16[x / step_x]; - for (w1 = 0; w > 0 && x_buf < hres; x_buf += step_x, w1++, w -= step_x) - if (fb_index != egpu.enhancement_buf_by_x16[x / step_x + w1]) - break; - // skip further unneeded data, if any - for (s = 0; w > 0; s++, w -= step_x) - if (fb_index != egpu.enhancement_buf_by_x16[x / step_x + w1 + s]) - break; - - if (w1 > 0) { - src = gpu.vram + y * 1024 + x; - dst = select_enhancement_buf_ptr(&egpu, x); - dst += (y * 1024 + x) * 2; - scale2x_tiles8(dst, src, w1 * step_x / 8, h); - } - - x += (w1 + s) * step_x; - x &= 0x3ff; - x_buf = 0; + int i, right = x + w, bottom = y + h; + const u16 *src = gpu.vram; + // use these because the scanout struct may hold reduced w, h + // due to intersection stuff, see the update_enhancement_buf_scanouts() mess + int s_w = max(gpu.screen.hres, gpu.screen.w); + int s_h = gpu.screen.vres; + s_w = min(s_w, 512); + for (i = 0; i < ARRAY_SIZE(egpu.enhancement_scanouts); i++) { + const struct psx_gpu_scanout *s = &egpu.enhancement_scanouts[i]; + u16 *dst = select_enhancement_buf_by_index(&egpu, i); + int x1, x2, y1, y2; + if (s->w == 0) continue; + if (s->x >= right) continue; + if (s->x + s_w <= x) continue; + if (s->y >= bottom) continue; + if (s->y + s_h <= y) continue; + x1 = max(x, s->x); + x2 = min(right, s->x + s_w); + y1 = max(y, s->y); + y2 = min(bottom, s->y + s_h); + scale2x_tiles8(dst + y1 * 1024*2 + x1 * 2, + src + y1 * 1024 + x1, (x2 - x1 + 7) / 8u, y2 - y1); } } @@ -167,8 +159,8 @@ void renderer_update_caches(int x, int y, int w, int h, int state_changed) if (gpu.state.enhancement_active) { if (state_changed) { - egpu.saved_hres = 0; - renderer_notify_res_change(); + memset(egpu.enhancement_scanouts, 0, sizeof(egpu.enhancement_scanouts)); + egpu.enhancement_scanout_eselect = 0; return; } sync_enhancement_buffers(x, y, w, h); @@ -191,45 +183,15 @@ void renderer_set_interlace(int enable, int is_odd) void renderer_notify_res_change(void) { - renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres); + renderer_notify_scanout_change(gpu.screen.src_x, gpu.screen.src_y); } -void renderer_notify_scanout_x_change(int x, int w) +void renderer_notify_scanout_change(int x, int y) { - int hres = (w + 15) & ~15; - int max_bufs = ARRAY_SIZE(egpu.enhancement_scanout_x); - int need_update = 0; - int i; - - if (!gpu.state.enhancement_active) + if (!gpu.state.enhancement_active || !egpu.enhancement_buf_ptr) return; - assert(!(max_bufs & (max_bufs - 1))); - if (egpu.saved_hres != hres) { - for (i = 0; i < max_bufs; i++) - egpu.enhancement_scanout_x[i] = x; - need_update = 1; - } - - if (egpu.enhancement_scanout_x[egpu.enhancement_scanout_select] != x) - { - // maybe triple buffering? - for (i = 0; i < max_bufs; i++) - if (egpu.enhancement_scanout_x[i] == x) - break; - if (i == max_bufs) - need_update = 1; - - egpu.enhancement_scanout_x[egpu.enhancement_scanout_select] = x; - } - egpu.enhancement_scanout_select++; - egpu.enhancement_scanout_select &= max_bufs - 1; - if (need_update) - { - egpu.saved_hres = hres; - update_enhancement_buf_table_from_hres(&egpu); - sync_enhancement_buffers(0, 0, 1024, 512); - } + update_enhancement_buf_scanouts(&egpu, x, y, gpu.screen.hres, gpu.screen.vres); } #include "../../frontend/plugin_lib.h" diff --git a/plugins/gpu_senquack/gpulib_if.cpp b/plugins/gpu_senquack/gpulib_if.cpp index 72dcc6d67..e5a51aa50 100644 --- a/plugins/gpu_senquack/gpulib_if.cpp +++ b/plugins/gpu_senquack/gpulib_if.cpp @@ -144,7 +144,7 @@ void renderer_notify_res_change(void) */ } -void renderer_notify_scanout_x_change(int x, int w) +void renderer_notify_scanout_change(int x, int y) { } diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 1c461421a..02f6b9221 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -163,7 +163,7 @@ void renderer_notify_res_change(void) { } -void renderer_notify_scanout_x_change(int x, int w) +void renderer_notify_scanout_change(int x, int y) { } diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 15810b82d..fdb109ce0 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -260,6 +260,7 @@ long GPUshutdown(void) void GPUwriteStatus(uint32_t data) { uint32_t cmd = data >> 24; + int src_x, src_y; if (cmd < ARRAY_SIZE(gpu.regs)) { if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data) @@ -289,14 +290,17 @@ void GPUwriteStatus(uint32_t data) gpu.status |= PSX_GPU_STATUS_DMA(data & 3); break; case 0x05: - gpu.screen.src_x = data & 0x3ff; - gpu.screen.src_y = (data >> 10) & 0x1ff; - renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres); - if (gpu.frameskip.set) { - decide_frameskip_allow(gpu.ex_regs[3]); - if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) { - decide_frameskip(); - gpu.frameskip.last_flip_frame = *gpu.state.frame_count; + src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff; + if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) { + gpu.screen.src_x = src_x; + gpu.screen.src_y = src_y; + renderer_notify_scanout_change(src_x, src_y); + if (gpu.frameskip.set) { + decide_frameskip_allow(gpu.ex_regs[3]); + if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) { + decide_frameskip(); + gpu.frameskip.last_flip_frame = *gpu.state.frame_count; + } } } break; @@ -825,7 +829,7 @@ long GPUfreeze(uint32_t type, struct GPUFreeze *freeze) GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1)); } renderer_sync_ecmds(gpu.ex_regs); - renderer_update_caches(0, 0, 1024, 512, 1); + renderer_update_caches(0, 0, 1024, 512, 0); break; } diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 4abc36b7b..bf3d28a92 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -126,7 +126,7 @@ void renderer_flush_queues(void); void renderer_set_interlace(int enable, int is_odd); void renderer_set_config(const struct rearmed_cbs *config); void renderer_notify_res_change(void); -void renderer_notify_scanout_x_change(int x, int w); +void renderer_notify_scanout_change(int x, int y); int vout_init(void); int vout_finish(void); diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index f9ac0f30f..958468c90 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -84,6 +84,8 @@ void vout_update(void) if (!gpu.state.enhancement_was_active) return; // buffer not ready yet vram = gpu.get_enhancement_bufer(&src_x, &src_y, &w, &h, &vram_h); + if (vram == NULL) + return; x *= 2; y *= 2; src_x2 *= 2; } From 8392bff9a4b9daa48f5199051bbe8eff372279b4 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 23 Oct 2023 21:20:51 +0300 Subject: [PATCH 424/597] more timing hacks --- libpcsxcore/database.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 2d9a2ea04..a68fe4f38 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -90,6 +90,8 @@ cycle_multiplier_overrides[] = /* Digimon World */ { "SLUS01032", 153 }, { "SLES02914", 153 }, + /* Syphon Filter - reportedly hangs under unknown conditions */ + { "SCUS94240", 169 }, }; /* Function for automatic patching according to GameID. */ From c9f729d05b31bba1988b9336233f080421c16de5 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 24 Oct 2023 01:00:55 +0300 Subject: [PATCH 425/597] improve field status after enable sort of a hack (as usual) but ohwell... --- libpcsxcore/gpu.h | 2 +- libpcsxcore/psxhw.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/gpu.h b/libpcsxcore/gpu.h index ef9c71838..ec5019c9c 100644 --- a/libpcsxcore/gpu.h +++ b/libpcsxcore/gpu.h @@ -31,7 +31,7 @@ #define PSXGPU_FIELD (1u<<13) // both must be set for interlace to work -#define PSXGPU_ILACE_BITS (PSXGPU_ILACE | PSXGPU_DHEIGHT | PSXGPU_FIELD) +#define PSXGPU_ILACE_BITS (PSXGPU_ILACE | PSXGPU_DHEIGHT) #define HW_GPU_STATUS psxHu32ref(0x1814) diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index 4811f99ba..832e6d7d3 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -85,8 +85,13 @@ void psxHwWriteDmaIcr32(u32 value) void psxHwWriteGpuSR(u32 value) { + u32 old_sr = HW_GPU_STATUS, new_sr; GPU_writeStatus(value); gpuSyncPluginSR(); + new_sr = HW_GPU_STATUS; + // "The Next Tetris" seems to rely on the field order after enable + if ((old_sr ^ new_sr) & new_sr & SWAP32(PSXGPU_ILACE)) + frame_counter |= 1; } u32 psxHwReadGpuSR(void) From 9454d33991e921f526c0c6fddb6c1be057ba8ab3 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 23 Oct 2023 00:38:45 +0200 Subject: [PATCH 426/597] psxmem.h: Fix annoying warning Avoid doing arithmetic on pointers to keep compilers happy. Signed-off-by: Paul Cercueil --- libpcsxcore/psxmem.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/psxmem.h b/libpcsxcore/psxmem.h index f3c2051b8..4e95c4dd1 100644 --- a/libpcsxcore/psxmem.h +++ b/libpcsxcore/psxmem.h @@ -125,7 +125,8 @@ static inline void * psxm_lut(u32 mem, int write, u8 **lut) if (!DISABLE_MEM_LUTS) { void *ptr = lut[mem >> 16]; - return ptr == INVALID_PTR ? INVALID_PTR : ptr + (u16)mem; + return ptr == INVALID_PTR ? INVALID_PTR + : (void *)((uintptr_t)ptr + (u16)mem); } if (mem >= 0xa0000000) From 6a131b037d9133d6bffcb8e4e67940fcf069e539 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 24 Oct 2023 01:22:27 +0300 Subject: [PATCH 427/597] save some things that weren't saved but should have been libretro/pcsx_rearmed#274 --- frontend/libretro.c | 18 ++++++++++++++---- libpcsxcore/misc.c | 41 +++++++++++++++++++++++++++++++++++++++++ libpcsxcore/misc.h | 1 + 3 files changed, 56 insertions(+), 4 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index a30a1ab2e..d74a7b415 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -98,6 +98,7 @@ static bool show_advanced_gpu_peops_settings = true; static bool show_advanced_gpu_unai_settings = true; #endif static float mouse_sensitivity = 1.0f; +static unsigned int disk_current_index; typedef enum { @@ -1070,14 +1071,24 @@ static void save_close(void *file) bool retro_serialize(void *data, size_t size) { - int ret = SaveState(data); + int ret; + CdromFrontendId = disk_current_index; + ret = SaveState(data); return ret == 0 ? true : false; } +static bool disk_set_image_index(unsigned int index); + bool retro_unserialize(const void *data, size_t size) { - int ret = LoadState(data); - return ret == 0 ? true : false; + int ret; + CdromFrontendId = -1; + ret = LoadState(data); + if (ret) + return false; + if (CdromFrontendId != -1 && CdromFrontendId != disk_current_index) + disk_set_image_index(CdromFrontendId); + return true; } /* cheats */ @@ -1143,7 +1154,6 @@ void retro_cheat_set(unsigned index, bool enabled, const char *code) static unsigned int disk_initial_index; static char disk_initial_path[PATH_MAX]; static bool disk_ejected; -static unsigned int disk_current_index; static unsigned int disk_count; static struct disks_state { diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 9486d23bb..526ebd434 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -35,6 +35,7 @@ char CdromId[10] = ""; char CdromLabel[33] = ""; +int CdromFrontendId; // for frontend use // PSX Executable types #define PSX_EXE 1 @@ -640,7 +641,21 @@ static const char PcsxHeader[32] = "STv4 PCSX v" PCSX_VERSION; // If you make changes to the savestate version, please increment the value below. static const u32 SaveVersion = 0x8b410006; +#define MISC_MAGIC 0x4353494d +struct misc_save_data { + u32 magic; + u32 gteBusyCycle; + u32 muldivBusyCycle; + u32 biuReg; + u32 biosBranchCheck; + u32 gpuIdleAfter; + u32 gpuSr; + u32 frame_counter; + int CdromFrontendId; +}; + int SaveState(const char *file) { + struct misc_save_data *misc = (void *)(psxH + 0xf000); void *f; GPUFreeze_t *gpufP = NULL; SPUFreezeHdr_t spufH; @@ -649,6 +664,19 @@ int SaveState(const char *file) { int result = -1; int Size; + assert(!psxRegs.branching); + assert(!psxRegs.cpuInRecursion); + assert(!misc->magic); + misc->magic = MISC_MAGIC; + misc->gteBusyCycle = psxRegs.gteBusyCycle; + misc->muldivBusyCycle = psxRegs.muldivBusyCycle; + misc->biuReg = psxRegs.biuReg; + misc->biosBranchCheck = psxRegs.biosBranchCheck; + misc->gpuIdleAfter = psxRegs.gpuIdleAfter; + misc->gpuSr = HW_GPU_STATUS; + misc->frame_counter = frame_counter; + misc->CdromFrontendId = CdromFrontendId; + f = SaveFuncs.open(file, "wb"); if (f == NULL) return -1; @@ -700,11 +728,13 @@ int SaveState(const char *file) { result = 0; cleanup: + memset(misc, 0, sizeof(*misc)); SaveFuncs.close(f); return result; } int LoadState(const char *file) { + struct misc_save_data *misc = (void *)(psxH + 0xf000); u32 biosBranchCheckOld = psxRegs.biosBranchCheck; void *f; GPUFreeze_t *gpufP = NULL; @@ -740,6 +770,16 @@ int LoadState(const char *file) { psxRegs.biosBranchCheck = ~0; psxRegs.gpuIdleAfter = psxRegs.cycle - 1; HW_GPU_STATUS &= SWAP32(~PSXGPU_nBUSY); + if (misc->magic == MISC_MAGIC) { + psxRegs.gteBusyCycle = misc->gteBusyCycle; + psxRegs.muldivBusyCycle = misc->muldivBusyCycle; + psxRegs.biuReg = misc->biuReg; + psxRegs.biosBranchCheck = misc->biosBranchCheck; + psxRegs.gpuIdleAfter = misc->gpuIdleAfter; + HW_GPU_STATUS = misc->gpuSr; + frame_counter = misc->frame_counter; + CdromFrontendId = misc->CdromFrontendId; + } psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); @@ -776,6 +816,7 @@ int LoadState(const char *file) { result = 0; cleanup: + memset(misc, 0, sizeof(*misc)); SaveFuncs.close(f); return result; } diff --git a/libpcsxcore/misc.h b/libpcsxcore/misc.h index c5eb327a8..a1c36bb98 100644 --- a/libpcsxcore/misc.h +++ b/libpcsxcore/misc.h @@ -55,6 +55,7 @@ typedef struct { extern char CdromId[10]; extern char CdromLabel[33]; +extern int CdromFrontendId; // for frontend use void BiosBootBypass(); From db6e56b6bfb5bcdacc86150061861a74365331e0 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 24 Oct 2023 19:08:40 +0200 Subject: [PATCH 428/597] psxdma: Fix endian issue in gpuInterrupt() The HW_DMA2_MADR register contains little-endian data, therefore it must always be accessed through one of the swap macros. This fixes the slow DMA linked-list walking on big-endian systems. Signed-off-by: Paul Cercueil --- libpcsxcore/psxdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index 4db99ab1e..fa8f33940 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -221,8 +221,8 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU void gpuInterrupt() { if (HW_DMA2_CHCR == SWAP32(0x01000401) && !(HW_DMA2_MADR & SWAP32(0x800000))) { - u32 size, madr_next = 0xffffff; - size = GPU_dmaChain((u32 *)psxM, HW_DMA2_MADR & 0x1fffff, &madr_next); + u32 size, madr_next = 0xffffff, madr = SWAPu32(HW_DMA2_MADR); + size = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, &madr_next); HW_DMA2_MADR = SWAPu32(madr_next); psxRegs.gpuIdleAfter = psxRegs.cycle + size + 64; set_event(PSXINT_GPUDMA, size); From 44e76f8ad4944acfc109baf89beda7b723f8a209 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 24 Oct 2023 21:42:04 +0300 Subject: [PATCH 429/597] gpulib: add a "borderless" option to restore old behavior --- frontend/libretro.c | 4 +++- frontend/libretro_core_options.h | 3 ++- frontend/menu.c | 2 +- frontend/plugin_lib.h | 4 +++- plugins/gpulib/gpu.c | 16 ++++++++++------ plugins/gpulib/vout_pl.c | 6 ++++-- 6 files changed, 23 insertions(+), 12 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index d74a7b415..5a44e43e0 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2293,8 +2293,10 @@ static void update_variables(bool in_flight) { if (strcmp(var.value, "game") == 0) pl_rearmed_cbs.screen_centering_type = 1; - else if (strcmp(var.value, "manual") == 0) + else if (strcmp(var.value, "borderless") == 0) pl_rearmed_cbs.screen_centering_type = 2; + else if (strcmp(var.value, "manual") == 0) + pl_rearmed_cbs.screen_centering_type = 3; else // auto pl_rearmed_cbs.screen_centering_type = 0; } diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 60832742b..8379ade95 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -439,12 +439,13 @@ struct retro_core_option_v2_definition option_defs_us[] = { "pcsx_rearmed_screen_centering", "(GPU) Screen centering", NULL, - "The PSX has a feature allowing it to shift the image position on screen. Some (mostly PAL) games used this feature in a strange way making the image miscentered and causing borders to appear. With 'Auto' the emulator tries to correct this miscentering automatically. 'Game-controlled' uses the settings supplied by the game. 'Manual' allows to override those values with the settings below.", + "The PSX has a feature allowing it to shift the image position on screen. Some (mostly PAL) games used this feature in a strange way making the image miscentered and causing uneven borders to appear. With 'Auto' the emulator tries to correct this miscentering automatically. 'Game-controlled' uses the settings supplied by the game. 'Manual' allows to override those values with the settings below.", NULL, "video", { { "auto", "Auto" }, { "game", "Game-controlled" }, + { "borderless", "Borderless" }, { "manual", "Manual" }, { NULL, NULL }, }, diff --git a/frontend/menu.c b/frontend/menu.c index 6516e343b..868a486a1 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1264,7 +1264,7 @@ static const char *men_soft_filter[] = { "None", #endif NULL }; static const char *men_dummy[] = { NULL }; -static const char *men_centering[] = { "Auto", "Ingame", "Force", NULL }; +static const char *men_centering[] = { "Auto", "Ingame", "Borderless", "Force", NULL }; static const char h_scaler[] = "int. 2x - scales w. or h. 2x if it fits on screen\n" "int. 4:3 - uses integer if possible, else fractional"; static const char h_cscaler[] = "Displays the scaler layer, you can resize it\n" diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index 97d44f258..76220978d 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -111,13 +111,15 @@ struct rearmed_cbs { } gpu_peopsgl; // misc int gpu_caps; - int screen_centering_type; // 0 - auto, 1 - game conrolled, 2 - manual + int screen_centering_type; int screen_centering_x; int screen_centering_y; }; extern struct rearmed_cbs pl_rearmed_cbs; +enum centering_type { C_AUTO = 0, C_INGAME, C_BORDERLESS, C_MANUAL }; + enum gpu_plugin_caps { GPU_CAP_OWNS_DISPLAY = (1 << 0), GPU_CAP_SUPPORTS_2X = (1 << 1), diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index fdb109ce0..f07677451 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -13,8 +13,11 @@ #include #include "gpu.h" #include "../../libpcsxcore/gpu.h" // meh +#include "../../frontend/plugin_lib.h" +#ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif #ifdef __GNUC__ #define unlikely(x) __builtin_expect((x), 0) #define preload __builtin_prefetch @@ -81,9 +84,9 @@ static noinline void update_width(void) sw /= hdiv; sw = (sw + 2) & ~3; // according to nocash switch (gpu.state.screen_centering_type) { - case 1: + case C_INGAME: break; - case 2: + case C_MANUAL: x = gpu.state.screen_centering_x; break; default: @@ -125,9 +128,12 @@ static noinline void update_height(void) /* nothing displayed? */; else { switch (gpu.state.screen_centering_type) { - case 1: + case C_INGAME: + break; + case C_BORDERLESS: + y = 0; break; - case 2: + case C_MANUAL: y = gpu.state.screen_centering_y; break; default: @@ -903,8 +909,6 @@ void GPUgetScreenInfo(int *y, int *base_hres) *base_hres >>= 1; } -#include "../../frontend/plugin_lib.h" - void GPUrearmedCallbacks(const struct rearmed_cbs *cbs) { gpu.frameskip.set = cbs->frameskip; diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index 958468c90..ab56cad2d 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -29,9 +29,11 @@ static void check_mode_change(int force) { int w = gpu.screen.hres; int h = gpu.screen.vres; - int w_out = w; - int h_out = h; + int w_out, h_out; + if (gpu.state.screen_centering_type == C_BORDERLESS) + h = gpu.screen.h; + w_out = w, h_out = h; #ifdef RAW_FB_DISPLAY w = w_out = 1024, h = h_out = 512; #endif From 9ed80467762a5024f7ba04e9fb384faceca35c29 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 25 Oct 2023 01:36:42 +0300 Subject: [PATCH 430/597] gpulib: trust game's centering based on a database libretro/pcsx_rearmed#160 --- frontend/plugin.c | 3 +++ frontend/plugin_lib.h | 1 + libpcsxcore/database.c | 11 ++++++++++- libpcsxcore/psxcommon.h | 3 ++- libpcsxcore/psxhw.c | 2 +- plugins/gpulib/gpu.c | 6 +++++- plugins/gpulib/gpu.h | 3 ++- 7 files changed, 24 insertions(+), 5 deletions(-) diff --git a/frontend/plugin.c b/frontend/plugin.c index 3683b5002..88d756ebd 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -264,6 +264,9 @@ void plugin_call_rearmed_cbs(void) extern void *hGPUDriver; void (*rearmed_set_cbs)(const struct rearmed_cbs *cbs); + pl_rearmed_cbs.screen_centering_type_default = + Config.hacks.gpu_centering ? C_INGAME : C_AUTO; + rearmed_set_cbs = SysLoadSym(hGPUDriver, "GPUrearmedCallbacks"); if (rearmed_set_cbs != NULL) rearmed_set_cbs(&pl_rearmed_cbs); diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index 76220978d..2ac49f2ce 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -112,6 +112,7 @@ struct rearmed_cbs { // misc int gpu_caps; int screen_centering_type; + int screen_centering_type_default; int screen_centering_x; int screen_centering_y; }; diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index a68fe4f38..2acd67541 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -39,6 +39,14 @@ static const char * const gpu_busy_hack_db[] = "SLPS01919", "SLPS01920", }; +static const char * const gpu_centering_hack_db[] = +{ + /* Gradius Gaiden */ + "SLPM86042", "SLPM86103", "SLPM87323", + /* Sexy Parodius */ + "SLPM86009", +}; + #define HACK_ENTRY(var, list) \ { #var, &Config.hacks.var, list, ARRAY_SIZE(list) } @@ -53,7 +61,8 @@ hack_db[] = { HACK_ENTRY(cdr_read_timing, cdr_read_hack_db), HACK_ENTRY(gpu_slow_list_walking, gpu_slow_llist_db), - HACK_ENTRY(gpu_busy_hack, gpu_busy_hack_db), + HACK_ENTRY(gpu_busy, gpu_busy_hack_db), + HACK_ENTRY(gpu_centering, gpu_centering_hack_db), }; static const struct diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index f978a5836..09fb39a4e 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -150,7 +150,8 @@ typedef struct { struct { boolean cdr_read_timing; boolean gpu_slow_list_walking; - boolean gpu_busy_hack; + boolean gpu_busy; + boolean gpu_centering; } hacks; } PcsxConfig; diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index 832e6d7d3..b8ca1996a 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -42,7 +42,7 @@ void psxHwReset() { cdrReset(); psxRcntInit(); HW_GPU_STATUS = SWAP32(0x10802000); - psxHwReadGpuSRptr = Config.hacks.gpu_busy_hack + psxHwReadGpuSRptr = Config.hacks.gpu_busy ? psxHwReadGpuSRbusyHack : psxHwReadGpuSR; } diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index f07677451..2ac36c1b0 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -74,7 +74,10 @@ static noinline void update_width(void) int hres = hres_all[(gpu.status >> 16) & 7]; int pal = gpu.status & PSX_GPU_STATUS_PAL; int sw = gpu.screen.x2 - gpu.screen.x1; + int type = gpu.state.screen_centering_type; int x = 0, x_auto; + if (type == C_AUTO) + type = gpu.state.screen_centering_type_default; if (sw <= 0) /* nothing displayed? */; else { @@ -83,7 +86,7 @@ static noinline void update_width(void) x = (x + 1) & ~1; // blitter limitation sw /= hdiv; sw = (sw + 2) & ~3; // according to nocash - switch (gpu.state.screen_centering_type) { + switch (type) { case C_INGAME: break; case C_MANUAL: @@ -919,6 +922,7 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs) gpu.state.frame_count = cbs->gpu_frame_count; gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace; gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable; + gpu.state.screen_centering_type_default = cbs->screen_centering_type_default; if (gpu.state.screen_centering_type != cbs->screen_centering_type || gpu.state.screen_centering_x != cbs->screen_centering_x || gpu.state.screen_centering_y != cbs->screen_centering_y) { diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index bf3d28a92..ab1d23a7a 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -89,7 +89,8 @@ struct psx_gpu { } last_list; uint32_t last_vram_read_frame; uint32_t w_out_old, h_out_old, status_vo_old; - int screen_centering_type; // 0 - auto, 1 - game conrolled, 2 - manual + short screen_centering_type; + short screen_centering_type_default; int screen_centering_x; int screen_centering_y; } state; From b560436b844ccc088f7e72aff7e246b0d285744c Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 25 Oct 2023 02:07:00 +0300 Subject: [PATCH 431/597] fix build --- libpcsxcore/new_dynarec/pcsxmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index f4b1d90e8..e16f5ee77 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -468,7 +468,7 @@ void new_dyna_pcsx_mem_reset(void) // plugins might change so update the pointers map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); map_item(&mem_iowtab[IOMEM32(0x1810)], GPU_writeData, 1); - if (Config.hacks.gpu_busy_hack) + if (Config.hacks.gpu_busy) map_item(&mem_iortab[IOMEM32(0x1814)], psxHwReadGpuSRbusyHack, 1); else map_item(&mem_iortab[IOMEM32(0x1814)], psxHwReadGpuSR, 1); From f0d80aa4838a05c8e86d99bb1379ba0579794e10 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 25 Oct 2023 23:06:59 +0300 Subject: [PATCH 432/597] cdriso: unbreak chd cdda fixes notaz/pcsx_rearmed#328 --- libpcsxcore/cdriso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index db57b0c54..660d109cc 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -1708,7 +1708,7 @@ long CALLBACK ISOreadCDDA(unsigned char m, unsigned char s, unsigned char f, uns } } } - if (!handle) { + if (!handle && !chd_img) { memset(buffer, 0, CD_FRAMESIZE_RAW); return -1; } From 1587b08ecb80dd257ac255f76e716a6053c2785b Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 25 Oct 2023 23:08:48 +0300 Subject: [PATCH 433/597] gpu_neon: fix some missing ebuf updates --- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 11 ++++++++--- plugins/gpu_neon/psx_gpu_if.c | 2 ++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index de227d5b9..5f69919e2 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -889,8 +889,8 @@ static void *select_enhancement_buf_ptr(psx_gpu_struct *psx_gpu, s32 x, s32 y) static void select_enhancement_buf(psx_gpu_struct *psx_gpu) { - s32 x = psx_gpu->saved_viewport_start_x; - s32 y = psx_gpu->saved_viewport_start_y; + s32 x = psx_gpu->saved_viewport_start_x + 16; + s32 y = psx_gpu->saved_viewport_start_y + 16; psx_gpu->enhancement_current_buf_ptr = select_enhancement_buf_ptr(psx_gpu, x, y); } @@ -1704,7 +1704,12 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, psx_gpu->saved_viewport_end_y = viewport_end_y; select_enhancement_buf(psx_gpu); - +#if 0 + if (!psx_gpu->enhancement_current_buf_ptr) + log_anomaly("vp %3d,%3d %3d,%d - no buf\n", + psx_gpu->viewport_start_x, psx_gpu->viewport_start_y, + viewport_end_x, viewport_end_y); +#endif #ifdef TEXTURE_CACHE_4BPP psx_gpu->viewport_mask = texture_region_mask(psx_gpu->viewport_start_x, diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index 04a15eb2e..ea98ade85 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -161,6 +161,8 @@ void renderer_update_caches(int x, int y, int w, int h, int state_changed) if (state_changed) { memset(egpu.enhancement_scanouts, 0, sizeof(egpu.enhancement_scanouts)); egpu.enhancement_scanout_eselect = 0; + update_enhancement_buf_scanouts(&egpu, + gpu.screen.src_x, gpu.screen.src_y, gpu.screen.hres, gpu.screen.vres); return; } sync_enhancement_buffers(x, y, w, h); From 6ca445e297c87f83b1e7af0a880a5a599861f066 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 26 Oct 2023 00:47:50 +0300 Subject: [PATCH 434/597] cdrom: try some different seek times libretro/pcsx_rearmed#781 libretro/pcsx_rearmed#150 --- libpcsxcore/cdrom.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index bc973e904..fd09c7efc 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -448,11 +448,11 @@ static int ReadTrack(const u8 *time) tmp[1] = itob(time[1]); tmp[2] = itob(time[2]); + CDR_LOG("ReadTrack *** %02x:%02x:%02x\n", tmp[0], tmp[1], tmp[2]); + if (memcmp(cdr.Prev, tmp, 3) == 0) return 1; - CDR_LOG("ReadTrack *** %02x:%02x:%02x\n", tmp[0], tmp[1], tmp[2]); - read_ok = CDR_readTrack(tmp); if (read_ok) memcpy(cdr.Prev, tmp, 3); @@ -560,14 +560,21 @@ static int cdrSeekTime(unsigned char *target) { int diff = msf2sec(cdr.SetSectorPlay) - msf2sec(target); int seekTime = abs(diff) * (cdReadTime / 2000); + int cyclesSinceRS = psxRegs.cycle - cdr.LastReadSeekCycles; seekTime = MAX_VALUE(seekTime, 20000); // need this stupidly long penalty or else Spyro2 intro desyncs - if ((s32)(psxRegs.cycle - cdr.LastReadSeekCycles) > cdReadTime * 8) + // note: if misapplied this breaks MGS cutscenes among other things + if (cyclesSinceRS > cdReadTime * 50) seekTime += cdReadTime * 25; + // Transformers Beast Wars Transmetals does Setloc(x),SeekL,Setloc(x),ReadN + // and then wants some slack time + else if (cyclesSinceRS < cdReadTime *3/2) + seekTime += cdReadTime; seekTime = MIN_VALUE(seekTime, PSXCLK * 2 / 3); - CDR_LOG("seek: %.2f %.2f\n", (float)seekTime / PSXCLK, (float)seekTime / cdReadTime); + CDR_LOG("seek: %.2f %.2f (%.2f)\n", (float)seekTime / PSXCLK, + (float)seekTime / cdReadTime, (float)cyclesSinceRS / cdReadTime); return seekTime; } @@ -1191,11 +1198,11 @@ void cdrInterrupt(void) { // FALLTHROUGH set_error: - CDR_LOG_I("cmd %02x error %02x\n", Cmd, error); SetResultSize(2); cdr.Result[0] = cdr.StatP | STATUS_ERROR; cdr.Result[1] = not_ready ? ERROR_NOTREADY : error; cdr.Stat = DiskError; + CDR_LOG_I("cmd %02x error %02x\n", Cmd, cdr.Result[1]); break; } From 4a6b26b86e4303d2ff3d242ac9fbc6d8cd4f8f8b Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 26 Oct 2023 00:56:20 +0300 Subject: [PATCH 435/597] cdrom: maybe more accurate lid behavior libretro/pcsx_rearmed#779 --- libpcsxcore/cdrom.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index fd09c7efc..cc506c96d 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -199,7 +199,7 @@ unsigned char Test23[] = { 0x43, 0x58, 0x44, 0x32, 0x39 ,0x34, 0x30, 0x51 }; #define STATUS_READ (1<<5) // 0x20 #define STATUS_SHELLOPEN (1<<4) // 0x10 #define STATUS_UNKNOWN3 (1<<3) // 0x08 -#define STATUS_UNKNOWN2 (1<<2) // 0x04 +#define STATUS_SEEKERROR (1<<2) // 0x04 #define STATUS_ROTATING (1<<1) // 0x02 #define STATUS_ERROR (1<<0) // 0x01 @@ -207,6 +207,7 @@ unsigned char Test23[] = { 0x43, 0x58, 0x44, 0x32, 0x39 ,0x34, 0x30, 0x51 }; #define ERROR_NOTREADY (1<<7) // 0x80 #define ERROR_INVALIDCMD (1<<6) // 0x40 #define ERROR_INVALIDARG (1<<5) // 0x20 +#define ERROR_SHELLOPEN (1<<3) // 0x08 // 1x = 75 sectors per second // PSXCLK = 1 sec in the ps @@ -306,7 +307,7 @@ void cdrLidSeekInterrupt(void) default: case DRIVESTATE_STANDBY: StopCdda(); - StopReading(); + //StopReading(); SetPlaySeekRead(cdr.StatP, 0); if (CDR_getStatus(&stat) == -1) @@ -326,11 +327,28 @@ void cdrLidSeekInterrupt(void) // 02, 12, 10 if (!(cdr.StatP & STATUS_SHELLOPEN)) { + SetPlaySeekRead(cdr.StatP, 0); cdr.StatP |= STATUS_SHELLOPEN; - // could generate error irq here, but real hardware - // only sometimes does that - // (not done when lots of commands are sent?) + // IIRC this sometimes doesn't happen on real hw + // (when lots of commands are sent?) + if (cdr.Reading) { + StopReading(); + SetResultSize(2); + cdr.Result[0] = cdr.StatP | STATUS_SEEKERROR; + cdr.Result[1] = ERROR_SHELLOPEN; + cdr.Stat = DiskError; + setIrq(0x1006); + } + if (cdr.CmdInProgress) { + psxRegs.interrupt &= ~(1 << PSXINT_CDR); + cdr.CmdInProgress = 0; + SetResultSize(2); + cdr.Result[0] = cdr.StatP | STATUS_ERROR; + cdr.Result[1] = ERROR_NOTREADY; + cdr.Stat = DiskError; + setIrq(0x1007); + } set_event(PSXINT_CDRLID, cdReadTime * 30); break; @@ -665,7 +683,7 @@ void cdrPlayReadInterrupt(void) if (!cdr.Stat && (cdr.Mode & (MODE_AUTOPAUSE|MODE_REPORT))) cdrPlayInterrupt_Autopause(); - if (!cdr.Muted && !Config.Cdda) { + if (!cdr.Muted && cdr.Play && !Config.Cdda) { cdrPrepCdda(read_buf, CD_FRAMESIZE_RAW / 4); cdrAttenuate(read_buf, CD_FRAMESIZE_RAW / 4, 1); SPU_playCDDAchannel(read_buf, CD_FRAMESIZE_RAW, psxRegs.cycle, cdr.FirstSector); @@ -756,7 +774,7 @@ void cdrInterrupt(void) { break; case CdlSetloc: - case CdlSetloc + CMD_WHILE_NOT_READY: + // case CdlSetloc + CMD_WHILE_NOT_READY: // or is it? CDR_LOG("CDROM setloc command (%02X, %02X, %02X)\n", cdr.Param[0], cdr.Param[1], cdr.Param[2]); // MM must be BCD, SS must be BCD and <0x60, FF must be BCD and <0x75 From 718b363f51678f354fead84ef98e4c28dcae3689 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 26 Oct 2023 01:00:29 +0300 Subject: [PATCH 436/597] psxhw: debug-log more weird writes they'd likely need special handling --- libpcsxcore/psxhw.c | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index b8ca1996a..f5efa22bd 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -473,33 +473,15 @@ u32 psxHwRead32(u32 add) { void psxHwWrite8(u32 add, u8 value) { switch (add & 0x1fffffff) { - case 0x1f801040: sioWrite8(value); break; + case 0x1f801040: sioWrite8(value); break; + case 0x1f8010f6: + // nocash documents it as forced w32, but still games use this? + break; case 0x1f801800: cdrWrite0(value); break; case 0x1f801801: cdrWrite1(value); break; case 0x1f801802: cdrWrite2(value); break; case 0x1f801803: cdrWrite3(value); break; - case 0x1f801041: case 0x1f801042: case 0x1f801043: - case 0x1f801044: case 0x1f801045: - case 0x1f801046: case 0x1f801047: - case 0x1f801048: case 0x1f801049: - case 0x1f80104a: case 0x1f80104b: - case 0x1f80104c: case 0x1f80104d: - case 0x1f80104e: case 0x1f80104f: - case 0x1f801050: case 0x1f801051: - case 0x1f801054: case 0x1f801055: - case 0x1f801058: case 0x1f801059: - case 0x1f80105a: case 0x1f80105b: - case 0x1f80105c: case 0x1f80105d: - case 0x1f801100: case 0x1f801101: - case 0x1f801104: case 0x1f801105: - case 0x1f801108: case 0x1f801109: - case 0x1f801110: case 0x1f801111: - case 0x1f801114: case 0x1f801115: - case 0x1f801118: case 0x1f801119: - case 0x1f801120: case 0x1f801121: - case 0x1f801124: case 0x1f801125: - case 0x1f801128: case 0x1f801129: case 0x1f801810: case 0x1f801811: case 0x1f801812: case 0x1f801813: case 0x1f801814: case 0x1f801815: @@ -517,6 +499,8 @@ void psxHwWrite8(u32 add, u8 value) { SPU_writeRegister(add, value, psxRegs.cycle); return; } + else if (0x1f801000 <= add && add < 0x1f801800) + log_unhandled("unhandled w8 %08x @%08x\n", add, psxRegs.pc); psxHu8(add) = value; #ifdef PSXHW_LOG From 9930d23dc9c698c804b20b6eb6936c3bf4e0868e Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 27 Oct 2023 00:24:47 +0300 Subject: [PATCH 437/597] cdrom: proper autopause int libretro/pcsx_rearmed#396 --- libpcsxcore/cdrom.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index cc506c96d..99e8dcd6f 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -517,14 +517,10 @@ static void cdrPlayInterrupt_Autopause() u32 i; if ((cdr.Mode & MODE_AUTOPAUSE) && cdr.TrackChanged) { - CDR_LOG( "CDDA STOP\n" ); + CDR_LOG_I("autopause\n"); - // Magic the Gathering - // - looping territory cdda - - // ...? - //cdr.ResultReady = 1; - //cdr.Stat = DataReady; + SetResultSize(1); + cdr.Result[0] = cdr.StatP; cdr.Stat = DataEnd; setIrq(0x1000); // 0x1000 just for logging purposes @@ -534,6 +530,7 @@ static void cdrPlayInterrupt_Autopause() else if ((cdr.Mode & MODE_REPORT) && !cdr.ReportDelay && ((cdr.subq.Absolute[2] & 0x0f) == 0 || cdr.FastForward || cdr.FastBackward)) { + SetResultSize(8); cdr.Result[0] = cdr.StatP; cdr.Result[1] = cdr.subq.Track; cdr.Result[2] = cdr.subq.Index; @@ -558,15 +555,10 @@ static void cdrPlayInterrupt_Autopause() cdr.Result[4] = cdr.subq.Absolute[1]; cdr.Result[5] = cdr.subq.Absolute[2]; } - cdr.Result[6] = abs_lev_max >> 0; cdr.Result[7] = abs_lev_max >> 8; - // Rayman: Logo freeze (resultready + dataready) - cdr.ResultReady = 1; cdr.Stat = DataReady; - - SetResultSize(8); setIrq(0x1001); } @@ -667,11 +659,12 @@ void cdrPlayReadInterrupt(void) if (!cdr.Play) return; - CDR_LOG( "CDDA - %d:%d:%d\n", - cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2] ); + CDR_LOG("CDDA - %02d:%02d:%02d m %02x\n", + cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], cdr.Mode); SetPlaySeekRead(cdr.StatP, STATUS_PLAY); if (memcmp(cdr.SetSectorPlay, cdr.SetSectorEnd, 3) == 0) { + CDR_LOG_I("end stop\n"); StopCdda(); SetPlaySeekRead(cdr.StatP, 0); cdr.TrackChanged = TRUE; From 00274bf7d753021f93f7c29593084bed62b093c9 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 27 Oct 2023 23:56:06 +0300 Subject: [PATCH 438/597] adjust vita's mappings to more resemble other platforms --- frontend/libretro.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 5a44e43e0..26f36d428 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -377,7 +377,7 @@ psx_map_t custom_psx_maps[] = { { NULL, 0x12800000, 0x010000, MAP_TAG_OTHER }, // 0x1f800000 { NULL, 0x12c00000, 0x080000, MAP_TAG_OTHER }, // 0x1fc00000 { NULL, 0x11000000, 0x800000, MAP_TAG_LUTS }, // 0x08000000 - { NULL, 0x12000000, 0x200000, MAP_TAG_VRAM }, // 0x00000000 + { NULL, 0x12000000, 0x201000, MAP_TAG_VRAM }, // 0x00000000 }; void *pl_3ds_mmap(unsigned long addr, size_t size, int is_fixed, @@ -446,19 +446,20 @@ void pl_3ds_munmap(void *ptr, size_t size, enum psxMapTag tag) typedef struct { void *buffer; - uint32_t target_map; size_t size; enum psxMapTag tag; + int used; } psx_map_t; -void *addr = NULL; +static void *addr = NULL; psx_map_t custom_psx_maps[] = { - { NULL, NULL, 0x210000, MAP_TAG_RAM }, // 0x80000000 - { NULL, NULL, 0x010000, MAP_TAG_OTHER }, // 0x1f800000 - { NULL, NULL, 0x080000, MAP_TAG_OTHER }, // 0x1fc00000 - { NULL, NULL, 0x800000, MAP_TAG_LUTS }, // 0x08000000 - { NULL, NULL, 0x200000, MAP_TAG_VRAM }, // 0x00000000 + { NULL, 0x800000, MAP_TAG_LUTS }, + { NULL, 0x080000, MAP_TAG_OTHER }, + { NULL, 0x010000, MAP_TAG_OTHER }, + { NULL, 0x201000, MAP_TAG_VRAM }, + { NULL, 0x802000, MAP_TAG_VRAM }, // enhanced renderer + { NULL, 0x210000, MAP_TAG_RAM }, }; int init_vita_mmap() @@ -468,12 +469,13 @@ int init_vita_mmap() addr = malloc(64 * 1024 * 1024); if (addr == NULL) return -1; - tmpaddr = ((u32)(addr + 0xFFFFFF)) & ~0xFFFFFF; - custom_psx_maps[0].buffer = tmpaddr + 0x2000000; - custom_psx_maps[1].buffer = tmpaddr + 0x1800000; - custom_psx_maps[2].buffer = tmpaddr + 0x1c00000; - custom_psx_maps[3].buffer = tmpaddr + 0x0000000; + tmpaddr = (void *)(((size_t)addr + 0xFFFFFF) & ~0xFFFFFF); + custom_psx_maps[0].buffer = tmpaddr + 0x0000000; + custom_psx_maps[1].buffer = tmpaddr + 0x0800000; + custom_psx_maps[2].buffer = tmpaddr + 0x0880000; + custom_psx_maps[3].buffer = tmpaddr + 0x0900000; custom_psx_maps[4].buffer = tmpaddr + 0x1000000; + custom_psx_maps[5].buffer = tmpaddr + 0x2000000; #if 0 for(n = 0; n < 5; n++){ sceClibPrintf("addr reserved %x\n",custom_psx_maps[n].buffer); @@ -484,6 +486,11 @@ int init_vita_mmap() void deinit_vita_mmap() { + size_t i; + for (i = 0; i < sizeof(custom_psx_maps) / sizeof(custom_psx_maps[0]); i++) { + custom_psx_maps[i].buffer = NULL; + custom_psx_maps[i].used = 0; + } free(addr); } @@ -497,8 +504,9 @@ void *pl_vita_mmap(unsigned long addr, size_t size, int is_fixed, for (; custom_map->size; custom_map++) { - if ((custom_map->size == size) && (custom_map->tag == tag)) + if (custom_map->size == size && custom_map->tag == tag && !custom_map->used) { + custom_map->used = 1; return custom_map->buffer; } } @@ -516,6 +524,7 @@ void pl_vita_munmap(void *ptr, size_t size, enum psxMapTag tag) { if ((custom_map->buffer == ptr)) { + custom_map->used = 0; return; } } From 5d9e675ab0d2e2e548264843d4f3a25d1f02f129 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 28 Oct 2023 22:53:21 +0300 Subject: [PATCH 439/597] libretro: try to support different pitches untested as GET_CURRENT_SOFTWARE_FRAMEBUFFER returns false in my setup --- frontend/libretro.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 26f36d428..0642c8e35 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -80,7 +80,7 @@ static unsigned msg_interface_version = 0; static void *vout_buf; static void *vout_buf_ptr; -static int vout_width, vout_height; +static int vout_width = 256, vout_height = 240, vout_pitch = 256; static int vout_fb_dirty; static int psx_w, psx_h; static bool vout_can_dupe; @@ -234,8 +234,13 @@ static void set_vout_fb() fb.height = vout_height; fb.access_flags = RETRO_MEMORY_ACCESS_WRITE; - if (environ_cb(RETRO_ENVIRONMENT_GET_CURRENT_SOFTWARE_FRAMEBUFFER, &fb) && fb.format == RETRO_PIXEL_FORMAT_RGB565) - vout_buf_ptr = (uint16_t *)fb.data; + vout_pitch = vout_width; + if (environ_cb(RETRO_ENVIRONMENT_GET_CURRENT_SOFTWARE_FRAMEBUFFER, &fb) && fb.format == RETRO_PIXEL_FORMAT_RGB565) { + vout_buf_ptr = fb.data; + if (fb.pitch / 2 != vout_pitch && fb.pitch != vout_width * 2) + SysPrintf("got unusual pitch %zd for resolution %dx%d\n", fb.pitch, vout_width, vout_height); + vout_pitch = fb.pitch / 2; + } else vout_buf_ptr = vout_buf; } @@ -317,7 +322,7 @@ static void vout_flip(const void *vram, int stride, int bgr24, { unsigned short *dest = vout_buf_ptr; const unsigned short *src = vram; - int dstride = vout_width, h1 = h; + int dstride = vout_pitch, h1 = h; int port = 0; if (vram == NULL || dims_changed || (in_enable_crosshair[0] + in_enable_crosshair[1]) > 0) @@ -357,7 +362,7 @@ static void vout_flip(const void *vram, int stride, int bgr24, out: #ifndef FRONTEND_SUPPORTS_RGB565 - convert(vout_buf_ptr, vout_width * vout_height * 2); + convert(vout_buf_ptr, vout_pitch * vout_height * 2); #endif vout_fb_dirty = 1; pl_rearmed_cbs.flip_cnt++; @@ -976,8 +981,8 @@ void retro_get_system_info(struct retro_system_info *info) void retro_get_system_av_info(struct retro_system_av_info *info) { - unsigned geom_height = vout_height > 0 ? vout_height : 240; - unsigned geom_width = vout_width > 0 ? vout_width : 320; + unsigned geom_height = vout_height; + unsigned geom_width = vout_width; memset(info, 0, sizeof(*info)); info->timing.fps = is_pal_mode ? 50.0 : 60.0; @@ -2972,6 +2977,7 @@ void retro_run(void) LoadCdrom(); } + set_vout_fb(); print_internal_fps(); /* Check whether current frame should @@ -3033,10 +3039,8 @@ void retro_run(void) } video_cb((vout_fb_dirty || !vout_can_dupe || !duping_enable) ? vout_buf_ptr : NULL, - vout_width, vout_height, vout_width * 2); + vout_width, vout_height, vout_pitch * 2); vout_fb_dirty = 0; - - set_vout_fb(); } static bool try_use_bios(const char *path, bool preferred_only) From 91da8e32287f181a870140fdcc8fcdac7bafdebe Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 28 Oct 2023 23:23:30 +0300 Subject: [PATCH 440/597] libretro: clear the allocated buffers there is code that relies on that memory to be cleared because linux mmap() clears it --- frontend/libretro.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 0642c8e35..cf875beb4 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -400,6 +400,7 @@ void *pl_3ds_mmap(unsigned long addr, size_t size, int is_fixed, if ((custom_map->size == size) && (custom_map->tag == tag)) { uint32_t ptr_aligned, tmp; + void *ret; custom_map->buffer = malloc(size + 0x1000); ptr_aligned = (((u32)custom_map->buffer) + 0xFFF) & ~0xFFF; @@ -410,12 +411,14 @@ void *pl_3ds_mmap(unsigned long addr, size_t size, int is_fixed, exit(1); } - return (void *)custom_map->target_map; + ret = (void *)custom_map->target_map; + memset(ret, 0, size); + return ret; } } } - return malloc(size); + return calloc(size, 1); } void pl_3ds_munmap(void *ptr, size_t size, enum psxMapTag tag) @@ -481,6 +484,7 @@ int init_vita_mmap() custom_psx_maps[3].buffer = tmpaddr + 0x0900000; custom_psx_maps[4].buffer = tmpaddr + 0x1000000; custom_psx_maps[5].buffer = tmpaddr + 0x2000000; + memset(tmpaddr, 0, 0x2210000); #if 0 for(n = 0; n < 5; n++){ sceClibPrintf("addr reserved %x\n",custom_psx_maps[n].buffer); @@ -516,7 +520,7 @@ void *pl_vita_mmap(unsigned long addr, size_t size, int is_fixed, } } - return malloc(size); + return calloc(size, 1); } void pl_vita_munmap(void *ptr, size_t size, enum psxMapTag tag) @@ -3298,8 +3302,10 @@ void retro_init(void) #elif defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) && P_HAVE_POSIX_MEMALIGN if (posix_memalign(&vout_buf, 16, VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2) != 0) vout_buf = (void *) 0; + else + memset(vout_buf, 0, VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2); #else - vout_buf = malloc(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2); + vout_buf = calloc(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT, 2); #endif vout_buf_ptr = vout_buf; From 606bece1f0e8aaf037a2abc0247059dcf9ad382c Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 28 Oct 2023 23:40:01 +0300 Subject: [PATCH 441/597] libretro: fix option mismatch --- frontend/libretro.c | 35 +++++++++++++------------------- frontend/libretro_core_options.h | 12 +++++++---- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index cf875beb4..c7439dd44 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2250,6 +2250,7 @@ static void update_variables(bool in_flight) spu_config.iUseThread = 0; } +#if 0 // currently disabled, see USE_READ_THREAD in libpcsxcore/cdriso.c if (P_HAVE_PTHREAD) { var.value = NULL; var.key = "pcsx_rearmed_async_cd"; @@ -2272,6 +2273,7 @@ static void update_variables(bool in_flight) } } } +#endif var.value = NULL; var.key = "pcsx_rearmed_noxadecoding"; @@ -2573,6 +2575,18 @@ static void update_variables(bool in_flight) mouse_sensitivity = atof(var.value); } + if (found_bios) + { + var.value = NULL; + var.key = "pcsx_rearmed_show_bios_bootlogo"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + Config.SlowBoot = 0; + if (strcmp(var.value, "enabled") == 0) + Config.SlowBoot = 1; + } + } + if (in_flight) { // inform core things about possible config changes @@ -2590,27 +2604,6 @@ static void update_variables(bool in_flight) /* dfinput_activate(); */ } - else - { - //not yet running - - //bootlogo display hack - if (found_bios) - { - var.value = NULL; - var.key = "pcsx_rearmed_show_bios_bootlogo"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) - { - Config.SlowBoot = 0; - rebootemu = 0; - if (strcmp(var.value, "enabled") == 0) - { - Config.SlowBoot = 1; - rebootemu = 1; - } - } - } - } update_option_visibility(); } diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 8379ade95..5ec62f4b9 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -1577,13 +1577,18 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled", }, +#endif /* !DRC_DISABLE && !LIGHTREC */ { "pcsx_rearmed_nostalls", - "(Speed Hack) Disable CPU/GTE Stalls", "Disable CPU/GTE Stalls", - "Will cause some games to run too quickly.", NULL, - "speed_hack", + "Will cause some games to run too quickly." +#if defined(LIGHTREC) + " Interpreter only." +#endif + , + NULL, + "compat_hack", { { "disabled", NULL }, { "enabled", NULL }, @@ -1591,7 +1596,6 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled", }, -#endif /* !DRC_DISABLE && !LIGHTREC */ { NULL, NULL, NULL, NULL, NULL, NULL, {{0}}, NULL }, }; From 02b1a085fcb5bbabc4a9148daba91c3dc0a798f8 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 29 Oct 2023 01:19:32 +0300 Subject: [PATCH 442/597] cdrom: allow resetting with lid open libretro/pcsx_rearmed#679 --- libpcsxcore/cdriso.c | 1 - libpcsxcore/cdrom.c | 8 +++++++- libpcsxcore/misc.c | 9 ++++++++- libpcsxcore/misc.h | 2 +- libpcsxcore/plugins.h | 2 ++ libpcsxcore/r3000a.c | 3 +-- 6 files changed, 19 insertions(+), 6 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index 660d109cc..ecf424096 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -103,7 +103,6 @@ long CALLBACK CDR__configure(void); long CALLBACK CDR__test(void); void CALLBACK CDR__about(void); long CALLBACK CDR__setfilename(char *filename); -long CALLBACK CDR__getStatus(struct CdrStat *stat); static void DecodeRawSubData(void); diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 99e8dcd6f..6f4e3eb42 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1694,7 +1694,13 @@ void cdrReset() { cdr.Reg2 = 0x1f; cdr.Stat = NoIntr; cdr.FifoOffset = DATA_SIZE; // fifo empty - if (CdromId[0] == '\0') { + + CDR_getStatus(&stat); + if (stat.Status & STATUS_SHELLOPEN) { + cdr.DriveState = DRIVESTATE_LID_OPEN; + cdr.StatP = STATUS_SHELLOPEN; + } + else if (CdromId[0] == '\0') { cdr.DriveState = DRIVESTATE_STOPPED; cdr.StatP = 0; } diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 526ebd434..f175e2a36 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -163,12 +163,19 @@ static void SetBootRegs(u32 pc, u32 gp, u32 sp) psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); } -void BiosBootBypass() { +int BiosBootBypass() { + struct CdrStat stat = { 0, 0, }; assert(psxRegs.pc == 0x80030000); + // no bypass if the lid is open + CDR__getStatus(&stat); + if (stat.Status & 0x10) + return 0; + // skip BIOS logos and region check psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); psxRegs.pc = psxRegs.GPR.n.ra; + return 1; } static void getFromCnf(char *buf, const char *key, u32 *val) diff --git a/libpcsxcore/misc.h b/libpcsxcore/misc.h index a1c36bb98..539acc7b0 100644 --- a/libpcsxcore/misc.h +++ b/libpcsxcore/misc.h @@ -57,7 +57,7 @@ extern char CdromId[10]; extern char CdromLabel[33]; extern int CdromFrontendId; // for frontend use -void BiosBootBypass(); +int BiosBootBypass(); int LoadCdrom(); int LoadCdromFile(const char *filename, EXE_HEADER *head); diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index d20866568..b106028c2 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -161,6 +161,8 @@ extern CDRsetfilename CDR_setfilename; extern CDRreadCDDA CDR_readCDDA; extern CDRgetTE CDR_getTE; +long CALLBACK CDR__getStatus(struct CdrStat *stat); + // SPU Functions typedef long (CALLBACK* SPUinit)(void); typedef long (CALLBACK* SPUshutdown)(void); diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index fb043ae04..0c29dba73 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -76,8 +76,7 @@ void psxReset() { if (!Config.HLE) { psxExecuteBios(); if (psxRegs.pc == 0x80030000 && !Config.SlowBoot) { - BiosBootBypass(); - introBypassed = TRUE; + introBypassed = BiosBootBypass(); } } if (Config.HLE || introBypassed) From 2f59faba23ff29fd7679d217947d1574d271b5a6 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 29 Oct 2023 23:54:58 +0200 Subject: [PATCH 443/597] cdrom: simplify getStatus --- libpcsxcore/cdriso.c | 29 +++++++---------------------- libpcsxcore/cdrom.c | 2 ++ libpcsxcore/plugins.h | 6 +++--- 3 files changed, 12 insertions(+), 25 deletions(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index ecf424096..c352b168b 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -60,7 +60,6 @@ static boolean multifile = FALSE; static unsigned char cdbuffer[CD_FRAMESIZE_RAW]; static unsigned char subbuffer[SUB_FRAMESIZE]; -static boolean playing = FALSE; static boolean cddaBigEndian = FALSE; /* Frame offset into CD image where pregap data would be found if it was there. * If a game seeks there we must *not* return subchannel data since it's @@ -68,8 +67,6 @@ static boolean cddaBigEndian = FALSE; * XXX: there could be multiple pregaps but PSX dumps only have one? */ static unsigned int pregapOffset; -static unsigned int cddaCurPos; - // compressed image stuff static struct { unsigned char buff_raw[16][CD_FRAMESIZE_RAW]; @@ -219,7 +216,8 @@ static int parsetoc(const char *isofile) { // check if it's really a TOC named as a .cue if (fgets(linebuf, sizeof(linebuf), fi) != NULL) { token = strtok(linebuf, " "); - if (token && strncmp(token, "CD", 2) != 0 && strcmp(token, "CATALOG") != 0) { + if (token && strncmp(token, "CD", 2) != 0) { + // && strcmp(token, "CATALOG") != 0) - valid for a real .cue fclose(fi); return -1; } @@ -1337,7 +1335,8 @@ static void PrintTracks(void) { for (i = 1; i <= numtracks; i++) { SysPrintf(_("Track %.2d (%s) - Start %.2d:%.2d:%.2d, Length %.2d:%.2d:%.2d\n"), - i, (ti[i].type == DATA ? "DATA" : "AUDIO"), + i, (ti[i].type == DATA ? "DATA" : + (ti[i].type == CDDA ? "AUDIO" : "UNKNOWN")), ti[i].start[0], ti[i].start[1], ti[i].start[2], ti[i].length[0], ti[i].length[1], ti[i].length[2]); } @@ -1484,7 +1483,6 @@ static long CALLBACK ISOclose(void) { fclose(subHandle); subHandle = NULL; } - playing = FALSE; cddaHandle = NULL; if (compr_img != NULL) { @@ -1616,13 +1614,11 @@ static boolean CALLBACK ISOreadTrack(unsigned char *time) { // sector: byte 0 - minute; byte 1 - second; byte 2 - frame // does NOT uses bcd format static long CALLBACK ISOplay(unsigned char *time) { - playing = TRUE; return 0; } // stops cdda audio static long CALLBACK ISOstop(void) { - playing = FALSE; return 0; } @@ -1653,22 +1649,10 @@ static unsigned char* CALLBACK ISOgetBufferSub(int sector) { } static long CALLBACK ISOgetStatus(struct CdrStat *stat) { - u32 sect; - CDR__getStatus(stat); - if (playing) { - stat->Type = 0x02; - stat->Status |= 0x80; - } - else { - // BIOS - boot ID (CD type) - stat->Type = ti[1].type; - } - - // relative -> absolute time - sect = cddaCurPos; - sec2msf(sect, (char *)stat->Time); + // BIOS - boot ID (CD type) + stat->Type = ti[1].type; return 0; } @@ -1678,6 +1662,7 @@ long CALLBACK ISOreadCDDA(unsigned char m, unsigned char s, unsigned char f, uns unsigned char msf[3] = {m, s, f}; unsigned int track, track_start = 0; FILE *handle = cdHandle; + unsigned int cddaCurPos; int ret; cddaCurPos = msf2sec((char *)msf); diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 6f4e3eb42..83f8c1c14 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1132,6 +1132,8 @@ void cdrInterrupt(void) { cdr.Result[1] |= 0x80; } cdr.Result[0] |= (cdr.Result[1] >> 4) & 0x08; + CDR_LOG_I("CdlID: %02x %02x %02x %02x\n", cdr.Result[0], + cdr.Result[1], cdr.Result[2], cdr.Result[3]); /* This adds the string "PCSX" in Playstation bios boot screen */ memcpy((char *)&cdr.Result[4], "PCSX", 4); diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index b106028c2..269ef18a5 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -120,9 +120,9 @@ typedef long (CALLBACK* CDRplay)(unsigned char *); typedef long (CALLBACK* CDRstop)(void); typedef long (CALLBACK* CDRsetfilename)(char *); struct CdrStat { - uint32_t Type; - uint32_t Status; - unsigned char Time[3]; + uint32_t Type; // DATA, CDDA + uint32_t Status; // same as cdr.StatP + unsigned char Time_[3]; // unused }; typedef long (CALLBACK* CDRgetStatus)(struct CdrStat *); typedef char* (CALLBACK* CDRgetDriveLetter)(void); From 20bfbac0014bac12af9aa63e49eeedfac725f812 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 29 Oct 2023 23:58:03 +0200 Subject: [PATCH 444/597] rework memhandlers to deal with some bus details notaz/pcsx_rearmed#327 --- libpcsxcore/new_dynarec/pcsxmem.c | 90 ++- libpcsxcore/psxcounters.c | 6 +- libpcsxcore/psxhw.c | 1009 ++++++++--------------------- libpcsxcore/psxhw.h | 10 +- libpcsxcore/psxinterpreter.c | 8 +- libpcsxcore/psxmem.c | 4 +- libpcsxcore/psxmem.h | 4 +- 7 files changed, 363 insertions(+), 768 deletions(-) diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index e16f5ee77..e61e8a352 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -75,12 +75,12 @@ static void write_mem_dummy(u32 data) /* IO handlers */ static u32 io_read_sio16() { - return sioRead8() | (sioRead8() << 8); + return sioRead8(); } static u32 io_read_sio32() { - return sioRead8() | (sioRead8() << 8) | (sioRead8() << 16) | (sioRead8() << 24); + return sioRead8(); } static void io_write_sio16(u32 value) @@ -97,6 +97,11 @@ static void io_write_sio32(u32 value) sioWrite8((unsigned char)(value >> 24)); } +static u32 io_read_sio2_status() +{ + return 0x80; +} + #if !defined(DRC_DBG) && defined(__arm__) static void map_rcnt_rcount0(u32 mode) @@ -164,22 +169,6 @@ make_rcnt_funcs(0) make_rcnt_funcs(1) make_rcnt_funcs(2) -#define make_dma_func(n) \ -static void io_write_chcr##n(u32 value) \ -{ \ - HW_DMA##n##_CHCR = value; \ - if (value & 0x01000000 && HW_DMA_PCR & (8 << (n * 4))) { \ - psxDma##n(HW_DMA##n##_MADR, HW_DMA##n##_BCR, value); \ - } \ -} - -make_dma_func(0) -make_dma_func(1) -make_dma_func(2) -make_dma_func(3) -make_dma_func(4) -make_dma_func(6) - static u32 io_spu_read8_even(u32 addr) { return SPU_readRegister(addr, psxRegs.cycle) & 0xff; @@ -262,6 +251,29 @@ static void write_biu(u32 value) psxRegs.biuReg = value; } +/* scph7001 (pc = 8003de60, v1 = 1f8010f0): + lhu $t9, 0($v1) + li $at, 0xFFF0FFFF + and $t0, $t9, $at + lui $at, 8 + or $t1, $t0, $at + sh $t1, 0($v1) +*/ +#define make_forcew32_func(addr) \ +static void io_write_force32_##addr(u32 value) \ +{ \ + psxHu32ref(0x##addr) = SWAPu32(value); \ +} +make_forcew32_func(1014) +make_forcew32_func(1060) +make_forcew32_func(1080) +make_forcew32_func(1090) +make_forcew32_func(10a0) +make_forcew32_func(10b0) +make_forcew32_func(10c0) +make_forcew32_func(10e0) +make_forcew32_func(10f0) + void new_dyna_pcsx_mem_load_state(void) { map_rcnt_rcount0(rcnts[0].mode); @@ -354,6 +366,7 @@ void new_dyna_pcsx_mem_init(void) } map_item(&mem_iortab[IOMEM32(0x1040)], io_read_sio32, 1); + map_item(&mem_iortab[IOMEM32(0x1044)], sioReadStat16, 1); map_item(&mem_iortab[IOMEM32(0x1100)], psxRcntRcount0, 1); map_item(&mem_iortab[IOMEM32(0x1104)], io_rcnt_read_mode0, 1); map_item(&mem_iortab[IOMEM32(0x1108)], io_rcnt_read_target0, 1); @@ -373,6 +386,7 @@ void new_dyna_pcsx_mem_init(void) map_item(&mem_iortab[IOMEM16(0x1048)], sioReadMode16, 1); map_item(&mem_iortab[IOMEM16(0x104a)], sioReadCtrl16, 1); map_item(&mem_iortab[IOMEM16(0x104e)], sioReadBaud16, 1); + map_item(&mem_iortab[IOMEM16(0x1054)], io_read_sio2_status, 1); map_item(&mem_iortab[IOMEM16(0x1100)], psxRcntRcount0, 1); map_item(&mem_iortab[IOMEM16(0x1104)], io_rcnt_read_mode0, 1); map_item(&mem_iortab[IOMEM16(0x1108)], io_rcnt_read_target0, 1); @@ -400,12 +414,18 @@ void new_dyna_pcsx_mem_init(void) map_item(&mem_iowtab[IOMEM32(0x1040)], io_write_sio32, 1); map_item(&mem_iowtab[IOMEM32(0x1070)], psxHwWriteIstat, 1); map_item(&mem_iowtab[IOMEM32(0x1074)], psxHwWriteImask, 1); - map_item(&mem_iowtab[IOMEM32(0x1088)], io_write_chcr0, 1); - map_item(&mem_iowtab[IOMEM32(0x1098)], io_write_chcr1, 1); - map_item(&mem_iowtab[IOMEM32(0x10a8)], io_write_chcr2, 1); - map_item(&mem_iowtab[IOMEM32(0x10b8)], io_write_chcr3, 1); - map_item(&mem_iowtab[IOMEM32(0x10c8)], io_write_chcr4, 1); - map_item(&mem_iowtab[IOMEM32(0x10e8)], io_write_chcr6, 1); + map_item(&mem_iowtab[IOMEM32(0x1088)], psxHwWriteChcr0, 1); + map_item(&mem_iowtab[IOMEM32(0x108c)], psxHwWriteChcr0, 1); + map_item(&mem_iowtab[IOMEM32(0x1098)], psxHwWriteChcr1, 1); + map_item(&mem_iowtab[IOMEM32(0x109c)], psxHwWriteChcr1, 1); + map_item(&mem_iowtab[IOMEM32(0x10a8)], psxHwWriteChcr2, 1); + map_item(&mem_iowtab[IOMEM32(0x10ac)], psxHwWriteChcr2, 1); + map_item(&mem_iowtab[IOMEM32(0x10b8)], psxHwWriteChcr3, 1); + map_item(&mem_iowtab[IOMEM32(0x10bc)], psxHwWriteChcr3, 1); + map_item(&mem_iowtab[IOMEM32(0x10c8)], psxHwWriteChcr4, 1); + map_item(&mem_iowtab[IOMEM32(0x10cc)], psxHwWriteChcr4, 1); + map_item(&mem_iowtab[IOMEM32(0x10e8)], psxHwWriteChcr6, 1); + map_item(&mem_iowtab[IOMEM32(0x10ec)], psxHwWriteChcr6, 1); map_item(&mem_iowtab[IOMEM32(0x10f4)], psxHwWriteDmaIcr32, 1); map_item(&mem_iowtab[IOMEM32(0x1100)], io_rcnt_write_count0, 1); map_item(&mem_iowtab[IOMEM32(0x1104)], io_rcnt_write_mode0, 1); @@ -421,13 +441,35 @@ void new_dyna_pcsx_mem_init(void) map_item(&mem_iowtab[IOMEM32(0x1820)], mdecWrite0, 1); map_item(&mem_iowtab[IOMEM32(0x1824)], mdecWrite1, 1); + map_item(&mem_iowtab[IOMEM16(0x1014)], io_write_force32_1014, 1); map_item(&mem_iowtab[IOMEM16(0x1040)], io_write_sio16, 1); map_item(&mem_iowtab[IOMEM16(0x1044)], sioWriteStat16, 1); map_item(&mem_iowtab[IOMEM16(0x1048)], sioWriteMode16, 1); map_item(&mem_iowtab[IOMEM16(0x104a)], sioWriteCtrl16, 1); map_item(&mem_iowtab[IOMEM16(0x104e)], sioWriteBaud16, 1); + map_item(&mem_iowtab[IOMEM16(0x1060)], io_write_force32_1060, 1); map_item(&mem_iowtab[IOMEM16(0x1070)], psxHwWriteIstat, 1); map_item(&mem_iowtab[IOMEM16(0x1074)], psxHwWriteImask, 1); + map_item(&mem_iowtab[IOMEM16(0x1080)], io_write_force32_1080, 1); + map_item(&mem_iowtab[IOMEM16(0x1088)], psxHwWriteChcr0, 1); + map_item(&mem_iowtab[IOMEM16(0x108c)], psxHwWriteChcr0, 1); + map_item(&mem_iowtab[IOMEM16(0x1090)], io_write_force32_1090, 1); + map_item(&mem_iowtab[IOMEM16(0x1098)], psxHwWriteChcr1, 1); + map_item(&mem_iowtab[IOMEM16(0x109c)], psxHwWriteChcr1, 1); + map_item(&mem_iowtab[IOMEM16(0x10a0)], io_write_force32_10a0, 1); + map_item(&mem_iowtab[IOMEM16(0x10a8)], psxHwWriteChcr2, 1); + map_item(&mem_iowtab[IOMEM16(0x10ac)], psxHwWriteChcr2, 1); + map_item(&mem_iowtab[IOMEM16(0x10b0)], io_write_force32_10b0, 1); + map_item(&mem_iowtab[IOMEM16(0x10b8)], psxHwWriteChcr3, 1); + map_item(&mem_iowtab[IOMEM16(0x10bc)], psxHwWriteChcr3, 1); + map_item(&mem_iowtab[IOMEM16(0x10c0)], io_write_force32_10c0, 1); + map_item(&mem_iowtab[IOMEM16(0x10c8)], psxHwWriteChcr4, 1); + map_item(&mem_iowtab[IOMEM16(0x10cc)], psxHwWriteChcr4, 1); + map_item(&mem_iowtab[IOMEM16(0x10e0)], io_write_force32_10e0, 1); + map_item(&mem_iowtab[IOMEM16(0x10e8)], psxHwWriteChcr6, 1); + map_item(&mem_iowtab[IOMEM16(0x10ec)], psxHwWriteChcr6, 1); + map_item(&mem_iowtab[IOMEM16(0x10f0)], io_write_force32_10f0, 1); + map_item(&mem_iowtab[IOMEM16(0x10f4)], psxHwWriteDmaIcr32, 1); map_item(&mem_iowtab[IOMEM16(0x1100)], io_rcnt_write_count0, 1); map_item(&mem_iowtab[IOMEM16(0x1104)], io_rcnt_write_mode0, 1); map_item(&mem_iowtab[IOMEM16(0x1108)], io_rcnt_write_target0, 1); diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index f7491b3d5..c62712034 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -118,11 +118,7 @@ void verboseLog( u32 level, const char *str, ... ) static inline void _psxRcntWcount( u32 index, u32 value ) { - if( value > 0xffff ) - { - verboseLog( 1, "[RCNT %i] wcount > 0xffff: %x\n", index, value ); - value &= 0xffff; - } + value &= 0xffff; rcnts[index].cycleStart = psxRegs.cycle; rcnts[index].cycleStart -= value * rcnts[index].rate; diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index f5efa22bd..8179d9597 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -27,12 +27,6 @@ #include "cdrom.h" #include "gpu.h" -//#undef PSXHW_LOG -//#define PSXHW_LOG printf -#ifndef PAD_LOG -#define PAD_LOG(...) -#endif - static u32 (*psxHwReadGpuSRptr)(void) = psxHwReadGpuSR; void psxHwReset() { @@ -70,6 +64,23 @@ void psxHwWriteImask(u32 value) psxRegs.CP0.n.Cause |= 0x400; } +#define make_dma_func(n) \ +void psxHwWriteChcr##n(u32 value) \ +{ \ + if (value & SWAPu32(HW_DMA##n##_CHCR) & 0x01000000) \ + log_unhandled("dma" #n " %08x -> %08x\n", HW_DMA##n##_CHCR, value); \ + HW_DMA##n##_CHCR = SWAPu32(value); \ + if (value & 0x01000000 && SWAPu32(HW_DMA_PCR) & (8u << (n * 4))) \ + psxDma##n(SWAPu32(HW_DMA##n##_MADR), SWAPu32(HW_DMA##n##_BCR), value); \ +} + +make_dma_func(0) +make_dma_func(1) +make_dma_func(2) +make_dma_func(3) +make_dma_func(4) +make_dma_func(6) + void psxHwWriteDmaIcr32(u32 value) { u32 tmp = value & 0x00ff803f; @@ -123,763 +134,303 @@ u32 psxHwReadGpuSRbusyHack(void) } u8 psxHwRead8(u32 add) { - unsigned char hard; - - switch (add & 0x1fffffff) { - case 0x1f801040: hard = sioRead8(); break; - case 0x1f801800: hard = cdrRead0(); break; - case 0x1f801801: hard = cdrRead1(); break; - case 0x1f801802: hard = cdrRead2(); break; - case 0x1f801803: hard = cdrRead3(); break; - - case 0x1f801041: case 0x1f801042: case 0x1f801043: - case 0x1f801044: case 0x1f801045: - case 0x1f801046: case 0x1f801047: - case 0x1f801048: case 0x1f801049: - case 0x1f80104a: case 0x1f80104b: - case 0x1f80104c: case 0x1f80104d: - case 0x1f80104e: case 0x1f80104f: - case 0x1f801050: case 0x1f801051: - case 0x1f801054: case 0x1f801055: - case 0x1f801058: case 0x1f801059: - case 0x1f80105a: case 0x1f80105b: - case 0x1f80105c: case 0x1f80105d: - case 0x1f801100: case 0x1f801101: - case 0x1f801104: case 0x1f801105: - case 0x1f801108: case 0x1f801109: - case 0x1f801110: case 0x1f801111: - case 0x1f801114: case 0x1f801115: - case 0x1f801118: case 0x1f801119: - case 0x1f801120: case 0x1f801121: - case 0x1f801124: case 0x1f801125: - case 0x1f801128: case 0x1f801129: - case 0x1f801810: case 0x1f801811: - case 0x1f801812: case 0x1f801813: - case 0x1f801814: case 0x1f801815: - case 0x1f801816: case 0x1f801817: - case 0x1f801820: case 0x1f801821: - case 0x1f801822: case 0x1f801823: - case 0x1f801824: case 0x1f801825: - case 0x1f801826: case 0x1f801827: - log_unhandled("unhandled r8 %08x @%08x\n", add, psxRegs.pc); - // falthrough - default: - if (0x1f801c00 <= add && add < 0x1f802000) { - u16 val = SPU_readRegister(add & ~1, psxRegs.cycle); - hard = (add & 1) ? val >> 8 : val; - break; - } - hard = psxHu8(add); -#ifdef PSXHW_LOG - PSXHW_LOG("*Unkwnown 8bit read at address %x\n", add); -#endif - return hard; + u8 hard; + + switch (add & 0xffff) { + case 0x1040: hard = sioRead8(); break; + case 0x1800: hard = cdrRead0(); break; + case 0x1801: hard = cdrRead1(); break; + case 0x1802: hard = cdrRead2(); break; + case 0x1803: hard = cdrRead3(); break; + + case 0x1041: case 0x1042: case 0x1043: + case 0x1044: case 0x1045: + case 0x1046: case 0x1047: + case 0x1048: case 0x1049: + case 0x104a: case 0x104b: + case 0x104c: case 0x104d: + case 0x104e: case 0x104f: + case 0x1050: case 0x1051: + case 0x1054: case 0x1055: + case 0x1058: case 0x1059: + case 0x105a: case 0x105b: + case 0x105c: case 0x105d: + case 0x1100: case 0x1101: + case 0x1104: case 0x1105: + case 0x1108: case 0x1109: + case 0x1110: case 0x1111: + case 0x1114: case 0x1115: + case 0x1118: case 0x1119: + case 0x1120: case 0x1121: + case 0x1124: case 0x1125: + case 0x1128: case 0x1129: + case 0x1810: case 0x1811: + case 0x1812: case 0x1813: + case 0x1814: case 0x1815: + case 0x1816: case 0x1817: + case 0x1820: case 0x1821: + case 0x1822: case 0x1823: + case 0x1824: case 0x1825: + case 0x1826: case 0x1827: + log_unhandled("unhandled r8 %08x @%08x\n", add, psxRegs.pc); + // falthrough + default: + if (0x1f801c00 <= add && add < 0x1f802000) { + u16 val = SPU_readRegister(add & ~1, psxRegs.cycle); + hard = (add & 1) ? val >> 8 : val; + break; + } + hard = psxHu8(add); } -#ifdef PSXHW_LOG - PSXHW_LOG("*Known 8bit read at address %x value %x\n", add, hard); -#endif + //printf("r8 %08x %02x @%08x\n", add, hard, psxRegs.pc); return hard; } u16 psxHwRead16(u32 add) { unsigned short hard; - switch (add & 0x1fffffff) { -#ifdef PSXHW_LOG - case 0x1f801070: PSXHW_LOG("IREG 16bit read %x\n", psxHu16(0x1070)); - return psxHu16(0x1070); - case 0x1f801074: PSXHW_LOG("IMASK 16bit read %x\n", psxHu16(0x1074)); - return psxHu16(0x1074); -#endif - case 0x1f801040: - hard = sioRead8(); - hard|= sioRead8() << 8; - PAD_LOG("sio read16 %x; ret = %x\n", add&0xf, hard); - return hard; - case 0x1f801044: - hard = sioReadStat16(); - PAD_LOG("sio read16 %x; ret = %x\n", add&0xf, hard); - return hard; - case 0x1f801048: - hard = sioReadMode16(); - PAD_LOG("sio read16 %x; ret = %x\n", add&0xf, hard); - return hard; - case 0x1f80104a: - hard = sioReadCtrl16(); - PAD_LOG("sio read16 %x; ret = %x\n", add&0xf, hard); - return hard; - case 0x1f80104e: - hard = sioReadBaud16(); - PAD_LOG("sio read16 %x; ret = %x\n", add&0xf, hard); - return hard; - - /* Fixes Armored Core misdetecting the Link cable being detected. - * We want to turn that thing off and force it to do local multiplayer instead. - * Thanks Sony for the fix, they fixed it in their PS Classic fork. - */ - case 0x1f801054: - return 0x80; - - case 0x1f801100: - hard = psxRcntRcount0(); -#ifdef PSXHW_LOG - PSXHW_LOG("T0 count read16: %x\n", hard); -#endif - return hard; - case 0x1f801104: - hard = psxRcntRmode(0); -#ifdef PSXHW_LOG - PSXHW_LOG("T0 mode read16: %x\n", hard); -#endif - return hard; - case 0x1f801108: - hard = psxRcntRtarget(0); -#ifdef PSXHW_LOG - PSXHW_LOG("T0 target read16: %x\n", hard); -#endif - return hard; - case 0x1f801110: - hard = psxRcntRcount1(); -#ifdef PSXHW_LOG - PSXHW_LOG("T1 count read16: %x\n", hard); -#endif - return hard; - case 0x1f801114: - hard = psxRcntRmode(1); -#ifdef PSXHW_LOG - PSXHW_LOG("T1 mode read16: %x\n", hard); -#endif - return hard; - case 0x1f801118: - hard = psxRcntRtarget(1); -#ifdef PSXHW_LOG - PSXHW_LOG("T1 target read16: %x\n", hard); -#endif - return hard; - case 0x1f801120: - hard = psxRcntRcount2(); -#ifdef PSXHW_LOG - PSXHW_LOG("T2 count read16: %x\n", hard); -#endif - return hard; - case 0x1f801124: - hard = psxRcntRmode(2); -#ifdef PSXHW_LOG - PSXHW_LOG("T2 mode read16: %x\n", hard); -#endif - return hard; - case 0x1f801128: - hard = psxRcntRtarget(2); -#ifdef PSXHW_LOG - PSXHW_LOG("T2 target read16: %x\n", hard); -#endif - return hard; - - //case 0x1f802030: hard = //int_2000???? - //case 0x1f802040: hard =//dip switches...?? - - case 0x1f801042: - case 0x1f801046: - case 0x1f80104c: - case 0x1f801050: - case 0x1f801058: - case 0x1f80105a: - case 0x1f80105c: - case 0x1f801800: - case 0x1f801802: - case 0x1f801810: - case 0x1f801812: - case 0x1f801814: - case 0x1f801816: - case 0x1f801820: - case 0x1f801822: - case 0x1f801824: - case 0x1f801826: - log_unhandled("unhandled r16 %08x @%08x\n", add, psxRegs.pc); - // falthrough - default: - if (0x1f801c00 <= add && add < 0x1f802000) - return SPU_readRegister(add, psxRegs.cycle); - hard = psxHu16(add); -#ifdef PSXHW_LOG - PSXHW_LOG("*Unkwnown 16bit read at address %x\n", add); -#endif - return hard; + switch (add & 0xffff) { + case 0x1040: hard = sioRead8(); break; + case 0x1044: hard = sioReadStat16(); break; + case 0x1048: hard = sioReadMode16(); break; + case 0x104a: hard = sioReadCtrl16(); break; + case 0x104e: hard = sioReadBaud16(); break; + case 0x1054: hard = 0x80; break; // Armored Core Link cable misdetection + case 0x1100: hard = psxRcntRcount0(); break; + case 0x1104: hard = psxRcntRmode(0); break; + case 0x1108: hard = psxRcntRtarget(0); break; + case 0x1110: hard = psxRcntRcount1(); break; + case 0x1114: hard = psxRcntRmode(1); break; + case 0x1118: hard = psxRcntRtarget(1); break; + case 0x1120: hard = psxRcntRcount2(); break; + case 0x1124: hard = psxRcntRmode(2); break; + case 0x1128: hard = psxRcntRtarget(2); break; + + case 0x1042: + case 0x1046: + case 0x104c: + case 0x1050: + case 0x1058: + case 0x105a: + case 0x105c: + case 0x1800: + case 0x1802: + case 0x1810: + case 0x1812: + case 0x1814: + case 0x1816: + case 0x1820: + case 0x1822: + case 0x1824: + case 0x1826: + log_unhandled("unhandled r16 %08x @%08x\n", add, psxRegs.pc); + // falthrough + default: + if (0x1f801c00 <= add && add < 0x1f802000) { + hard = SPU_readRegister(add, psxRegs.cycle); + break; + } + hard = psxHu16(add); } -#ifdef PSXHW_LOG - PSXHW_LOG("*Known 16bit read at address %x value %x\n", add, hard); -#endif + //printf("r16 %08x %04x @%08x\n", add, hard, psxRegs.pc); return hard; } u32 psxHwRead32(u32 add) { u32 hard; - switch (add & 0x1fffffff) { - case 0x1f801040: - hard = sioRead8(); - hard |= sioRead8() << 8; - hard |= sioRead8() << 16; - hard |= sioRead8() << 24; - PAD_LOG("sio read32 ;ret = %x\n", hard); - return hard; - case 0x1f801044: - hard = sioReadStat16(); - PAD_LOG("sio read32 %x; ret = %x\n", add&0xf, hard); - return hard; -#ifdef PSXHW_LOG - case 0x1f801060: - PSXHW_LOG("RAM size read %x\n", psxHu32(0x1060)); - return psxHu32(0x1060); - case 0x1f801070: PSXHW_LOG("IREG 32bit read %x\n", psxHu32(0x1070)); - return psxHu32(0x1070); - case 0x1f801074: PSXHW_LOG("IMASK 32bit read %x\n", psxHu32(0x1074)); - return psxHu32(0x1074); -#endif - - case 0x1f801810: - hard = GPU_readData(); -#ifdef PSXHW_LOG - PSXHW_LOG("GPU DATA 32bit read %x\n", hard); -#endif - return hard; - case 0x1f801814: - hard = psxHwReadGpuSRptr(); -#ifdef PSXHW_LOG - PSXHW_LOG("GPU STATUS 32bit read %x\n", hard); -#endif - return hard; - - case 0x1f801820: hard = mdecRead0(); break; - case 0x1f801824: hard = mdecRead1(); break; - -#ifdef PSXHW_LOG - case 0x1f8010a0: - PSXHW_LOG("DMA2 MADR 32bit read %x\n", psxHu32(0x10a0)); - return SWAPu32(HW_DMA2_MADR); - case 0x1f8010a4: - PSXHW_LOG("DMA2 BCR 32bit read %x\n", psxHu32(0x10a4)); - return SWAPu32(HW_DMA2_BCR); - case 0x1f8010a8: - PSXHW_LOG("DMA2 CHCR 32bit read %x\n", psxHu32(0x10a8)); - return SWAPu32(HW_DMA2_CHCR); -#endif - -#ifdef PSXHW_LOG - case 0x1f8010b0: - PSXHW_LOG("DMA3 MADR 32bit read %x\n", psxHu32(0x10b0)); - return SWAPu32(HW_DMA3_MADR); - case 0x1f8010b4: - PSXHW_LOG("DMA3 BCR 32bit read %x\n", psxHu32(0x10b4)); - return SWAPu32(HW_DMA3_BCR); - case 0x1f8010b8: - PSXHW_LOG("DMA3 CHCR 32bit read %x\n", psxHu32(0x10b8)); - return SWAPu32(HW_DMA3_CHCR); -#endif - -#ifdef PSXHW_LOG -/* case 0x1f8010f0: - PSXHW_LOG("DMA PCR 32bit read %x\n", psxHu32(0x10f0)); - return SWAPu32(HW_DMA_PCR); // dma rest channel - case 0x1f8010f4: - PSXHW_LOG("DMA ICR 32bit read %x\n", psxHu32(0x10f4)); - return SWAPu32(HW_DMA_ICR); // interrupt enabler?*/ -#endif - - // time for rootcounters :) - case 0x1f801100: - hard = psxRcntRcount0(); -#ifdef PSXHW_LOG - PSXHW_LOG("T0 count read32: %x\n", hard); -#endif - return hard; - case 0x1f801104: - hard = psxRcntRmode(0); -#ifdef PSXHW_LOG - PSXHW_LOG("T0 mode read32: %x\n", hard); -#endif - return hard; - case 0x1f801108: - hard = psxRcntRtarget(0); -#ifdef PSXHW_LOG - PSXHW_LOG("T0 target read32: %x\n", hard); -#endif - return hard; - case 0x1f801110: - hard = psxRcntRcount1(); -#ifdef PSXHW_LOG - PSXHW_LOG("T1 count read32: %x\n", hard); -#endif - return hard; - case 0x1f801114: - hard = psxRcntRmode(1); -#ifdef PSXHW_LOG - PSXHW_LOG("T1 mode read32: %x\n", hard); -#endif - return hard; - case 0x1f801118: - hard = psxRcntRtarget(1); -#ifdef PSXHW_LOG - PSXHW_LOG("T1 target read32: %x\n", hard); -#endif - return hard; - case 0x1f801120: - hard = psxRcntRcount2(); -#ifdef PSXHW_LOG - PSXHW_LOG("T2 count read32: %x\n", hard); -#endif - return hard; - case 0x1f801124: - hard = psxRcntRmode(2); -#ifdef PSXHW_LOG - PSXHW_LOG("T2 mode read32: %x\n", hard); -#endif - return hard; - case 0x1f801128: - hard = psxRcntRtarget(2); -#ifdef PSXHW_LOG - PSXHW_LOG("T2 target read32: %x\n", hard); -#endif - return hard; - - case 0x1f801048: - case 0x1f80104c: - case 0x1f801050: - case 0x1f801054: - case 0x1f801058: - case 0x1f80105c: - case 0x1f801800: - log_unhandled("unhandled r32 %08x @%08x\n", add, psxRegs.pc); - // falthrough - default: - if (0x1f801c00 <= add && add < 0x1f802000) { - hard = SPU_readRegister(add, psxRegs.cycle); - hard |= SPU_readRegister(add + 2, psxRegs.cycle) << 16; - return hard; - } - hard = psxHu32(add); -#ifdef PSXHW_LOG - PSXHW_LOG("*Unkwnown 32bit read at address %x\n", add); -#endif - return hard; + switch (add & 0xffff) { + case 0x1040: hard = sioRead8(); break; + case 0x1044: hard = sioReadStat16(); break; + case 0x1100: hard = psxRcntRcount0(); break; + case 0x1104: hard = psxRcntRmode(0); break; + case 0x1108: hard = psxRcntRtarget(0); break; + case 0x1110: hard = psxRcntRcount1(); break; + case 0x1114: hard = psxRcntRmode(1); break; + case 0x1118: hard = psxRcntRtarget(1); break; + case 0x1120: hard = psxRcntRcount2(); break; + case 0x1124: hard = psxRcntRmode(2); break; + case 0x1128: hard = psxRcntRtarget(2); break; + case 0x1810: hard = GPU_readData(); break; + case 0x1814: hard = psxHwReadGpuSRptr(); break; + case 0x1820: hard = mdecRead0(); break; + case 0x1824: hard = mdecRead1(); break; + + case 0x1048: + case 0x104c: + case 0x1050: + case 0x1054: + case 0x1058: + case 0x105c: + case 0x1800: + log_unhandled("unhandled r32 %08x @%08x\n", add, psxRegs.pc); + // falthrough + default: + if (0x1f801c00 <= add && add < 0x1f802000) { + hard = SPU_readRegister(add, psxRegs.cycle); + hard |= SPU_readRegister(add + 2, psxRegs.cycle) << 16; + break; + } + hard = psxHu32(add); } -#ifdef PSXHW_LOG - PSXHW_LOG("*Known 32bit read at address %x\n", add); -#endif + //printf("r32 %08x %08x @%08x\n", add, hard, psxRegs.pc); return hard; } -void psxHwWrite8(u32 add, u8 value) { - switch (add & 0x1fffffff) { - case 0x1f801040: sioWrite8(value); break; - case 0x1f8010f6: - // nocash documents it as forced w32, but still games use this? - break; - case 0x1f801800: cdrWrite0(value); break; - case 0x1f801801: cdrWrite1(value); break; - case 0x1f801802: cdrWrite2(value); break; - case 0x1f801803: cdrWrite3(value); break; - - case 0x1f801810: case 0x1f801811: - case 0x1f801812: case 0x1f801813: - case 0x1f801814: case 0x1f801815: - case 0x1f801816: case 0x1f801817: - case 0x1f801820: case 0x1f801821: - case 0x1f801822: case 0x1f801823: - case 0x1f801824: case 0x1f801825: - case 0x1f801826: case 0x1f801827: - log_unhandled("unhandled w8 %08x @%08x\n", add, psxRegs.pc); - // falthrough - default: - if (0x1f801c00 <= add && add < 0x1f802000) { - log_unhandled("spu w8 %02x @%08x\n", value, psxRegs.pc); - if (!(add & 1)) - SPU_writeRegister(add, value, psxRegs.cycle); - return; - } - else if (0x1f801000 <= add && add < 0x1f801800) - log_unhandled("unhandled w8 %08x @%08x\n", add, psxRegs.pc); - - psxHu8(add) = value; -#ifdef PSXHW_LOG - PSXHW_LOG("*Unknown 8bit write at address %x value %x\n", add, value); -#endif +void psxHwWrite8(u32 add, u32 value) { + switch (add & 0xffff) { + case 0x1040: sioWrite8(value); return; + case 0x10f6: + // nocash documents it as forced w32, but still games use this? + break; + case 0x1800: cdrWrite0(value); return; + case 0x1801: cdrWrite1(value); return; + case 0x1802: cdrWrite2(value); return; + case 0x1803: cdrWrite3(value); return; + case 0x2041: break; // "POST (external 7 segment display)" + + default: + if (0x1f801c00 <= add && add < 0x1f802000) { + log_unhandled("spu w8 %02x @%08x\n", value, psxRegs.pc); + if (!(add & 1)) + SPU_writeRegister(add, value, psxRegs.cycle); return; + } + else + log_unhandled("unhandled w8 %08x %08x @%08x\n", + add, value, psxRegs.pc); } psxHu8(add) = value; -#ifdef PSXHW_LOG - PSXHW_LOG("*Known 8bit write at address %x value %x\n", add, value); -#endif } -void psxHwWrite16(u32 add, u16 value) { - switch (add & 0x1fffffff) { - case 0x1f801040: - sioWrite8((unsigned char)value); - sioWrite8((unsigned char)(value>>8)); - PAD_LOG ("sio write16 %x, %x\n", add&0xf, value); - return; - case 0x1f801044: - sioWriteStat16(value); - PAD_LOG ("sio write16 %x, %x\n", add&0xf, value); - return; - case 0x1f801048: - sioWriteMode16(value); - PAD_LOG ("sio write16 %x, %x\n", add&0xf, value); - return; - case 0x1f80104a: // control register - sioWriteCtrl16(value); - PAD_LOG ("sio write16 %x, %x\n", add&0xf, value); - return; - case 0x1f80104e: // baudrate register - sioWriteBaud16(value); - PAD_LOG ("sio write16 %x, %x\n", add&0xf, value); - return; - case 0x1f801070: -#ifdef PSXHW_LOG - PSXHW_LOG("IREG 16bit write %x\n", value); -#endif - psxHwWriteIstat(value); - return; - - case 0x1f801074: -#ifdef PSXHW_LOG - PSXHW_LOG("IMASK 16bit write %x\n", value); -#endif - psxHwWriteImask(value); - return; - - case 0x1f801100: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 0 COUNT 16bit write %x\n", value); -#endif - psxRcntWcount(0, value); return; - case 0x1f801104: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 0 MODE 16bit write %x\n", value); -#endif - psxRcntWmode(0, value); return; - case 0x1f801108: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 0 TARGET 16bit write %x\n", value); -#endif - psxRcntWtarget(0, value); return; - - case 0x1f801110: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 1 COUNT 16bit write %x\n", value); -#endif - psxRcntWcount(1, value); return; - case 0x1f801114: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 1 MODE 16bit write %x\n", value); -#endif - psxRcntWmode(1, value); return; - case 0x1f801118: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 1 TARGET 16bit write %x\n", value); -#endif - psxRcntWtarget(1, value); return; - - case 0x1f801120: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 2 COUNT 16bit write %x\n", value); -#endif - psxRcntWcount(2, value); return; - case 0x1f801124: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 2 MODE 16bit write %x\n", value); -#endif - psxRcntWmode(2, value); return; - case 0x1f801128: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 2 TARGET 16bit write %x\n", value); -#endif - psxRcntWtarget(2, value); return; - - case 0x1f801042: - case 0x1f801046: - case 0x1f80104c: - case 0x1f801050: - case 0x1f801054: - case 0x1f801058: - case 0x1f80105a: - case 0x1f80105c: - case 0x1f801800: - case 0x1f801802: - case 0x1f801810: - case 0x1f801812: - case 0x1f801814: - case 0x1f801816: - case 0x1f801820: - case 0x1f801822: - case 0x1f801824: - case 0x1f801826: - log_unhandled("unhandled w16 %08x @%08x\n", add, psxRegs.pc); - // falthrough - default: - if (0x1f801c00 <= add && add < 0x1f802000) { - SPU_writeRegister(add, value, psxRegs.cycle); - return; - } - - psxHu16ref(add) = SWAPu16(value); -#ifdef PSXHW_LOG - PSXHW_LOG("*Unknown 16bit write at address %x value %x\n", add, value); -#endif +void psxHwWrite16(u32 add, u32 value) { + switch (add & 0xffff) { + case 0x1040: sioWrite8(value); return; + case 0x1044: sioWriteStat16(value); return; + case 0x1048: sioWriteMode16(value); return; + case 0x104a: sioWriteCtrl16(value); return; + case 0x104e: sioWriteBaud16(value); return; + case 0x1070: psxHwWriteIstat(value); return; + case 0x1074: psxHwWriteImask(value); return; + case 0x1100: psxRcntWcount(0, value); return; + case 0x1104: psxRcntWmode(0, value); return; + case 0x1108: psxRcntWtarget(0, value); return; + case 0x1110: psxRcntWcount(1, value); return; + case 0x1114: psxRcntWmode(1, value); return; + case 0x1118: psxRcntWtarget(1, value); return; + case 0x1120: psxRcntWcount(2, value); return; + case 0x1124: psxRcntWmode(2, value); return; + case 0x1128: psxRcntWtarget(2, value); return; + + // forced write32: + case 0x1088: // DMA0 chcr (MDEC in DMA) + case 0x108c: psxHwWriteChcr0(value); return; + case 0x1098: // DMA1 chcr (MDEC out DMA) + case 0x109c: psxHwWriteChcr1(value); return; + case 0x10a8: // DMA2 chcr (GPU DMA) + case 0x10ac: psxHwWriteChcr2(value); return; + case 0x10b8: // DMA3 chcr (CDROM DMA) + case 0x10bc: psxHwWriteChcr3(value); return; + case 0x10c8: // DMA4 chcr (SPU DMA) + case 0x10cc: psxHwWriteChcr4(value); return; + case 0x10e8: // DMA6 chcr (OT clear) + case 0x10ec: psxHwWriteChcr6(value); return; + case 0x10f4: psxHwWriteDmaIcr32(value); return; + + // forced write32 with no immediate effect: + case 0x1014: + case 0x1060: + case 0x1080: + case 0x1090: + case 0x10a0: + case 0x10b0: + case 0x10c0: + case 0x10d0: + case 0x10e0: + case 0x10f0: + psxHu32ref(add) = SWAPu32(value); + return; + + case 0x1800: + case 0x1802: + case 0x1810: + case 0x1812: + case 0x1814: + case 0x1816: + case 0x1820: + case 0x1822: + case 0x1824: + case 0x1826: + log_unhandled("unhandled w16 %08x @%08x\n", add, psxRegs.pc); + break; + + default: + if (0x1f801c00 <= add && add < 0x1f802000) { + SPU_writeRegister(add, value, psxRegs.cycle); return; + } + else if (0x1f801000 <= add && add < 0x1f801800) + log_unhandled("unhandled w16 %08x %08x @%08x\n", + add, value, psxRegs.pc); } psxHu16ref(add) = SWAPu16(value); -#ifdef PSXHW_LOG - PSXHW_LOG("*Known 16bit write at address %x value %x\n", add, value); -#endif -} - -#define DmaExec(n) { \ - if (value & SWAPu32(HW_DMA##n##_CHCR) & 0x01000000) \ - log_unhandled("dma" #n " %08x -> %08x\n", HW_DMA##n##_CHCR, value); \ - HW_DMA##n##_CHCR = SWAPu32(value); \ -\ - if (SWAPu32(HW_DMA##n##_CHCR) & 0x01000000 && SWAPu32(HW_DMA_PCR) & (8 << (n * 4))) { \ - psxDma##n(SWAPu32(HW_DMA##n##_MADR), SWAPu32(HW_DMA##n##_BCR), SWAPu32(HW_DMA##n##_CHCR)); \ - } \ } void psxHwWrite32(u32 add, u32 value) { - switch (add & 0x1fffffff) { - case 0x1f801040: - sioWrite8((unsigned char)value); - sioWrite8((unsigned char)((value&0xff) >> 8)); - sioWrite8((unsigned char)((value&0xff) >> 16)); - sioWrite8((unsigned char)((value&0xff) >> 24)); - PAD_LOG("sio write32 %x\n", value); - return; -#ifdef PSXHW_LOG - case 0x1f801060: - PSXHW_LOG("RAM size write %x\n", value); - psxHu32ref(add) = SWAPu32(value); - return; // Ram size -#endif - - case 0x1f801070: -#ifdef PSXHW_LOG - PSXHW_LOG("IREG 32bit write %x\n", value); -#endif - psxHwWriteIstat(value); - return; - case 0x1f801074: -#ifdef PSXHW_LOG - PSXHW_LOG("IMASK 32bit write %x\n", value); -#endif - psxHwWriteImask(value); - return; - -#ifdef PSXHW_LOG - case 0x1f801080: - PSXHW_LOG("DMA0 MADR 32bit write %x\n", value); - HW_DMA0_MADR = SWAPu32(value); return; // DMA0 madr - case 0x1f801084: - PSXHW_LOG("DMA0 BCR 32bit write %x\n", value); - HW_DMA0_BCR = SWAPu32(value); return; // DMA0 bcr -#endif - case 0x1f801088: -#ifdef PSXHW_LOG - PSXHW_LOG("DMA0 CHCR 32bit write %x\n", value); -#endif - DmaExec(0); // DMA0 chcr (MDEC in DMA) - return; - -#ifdef PSXHW_LOG - case 0x1f801090: - PSXHW_LOG("DMA1 MADR 32bit write %x\n", value); - HW_DMA1_MADR = SWAPu32(value); return; // DMA1 madr - case 0x1f801094: - PSXHW_LOG("DMA1 BCR 32bit write %x\n", value); - HW_DMA1_BCR = SWAPu32(value); return; // DMA1 bcr -#endif - case 0x1f801098: -#ifdef PSXHW_LOG - PSXHW_LOG("DMA1 CHCR 32bit write %x\n", value); -#endif - DmaExec(1); // DMA1 chcr (MDEC out DMA) - return; - -#ifdef PSXHW_LOG - case 0x1f8010a0: - PSXHW_LOG("DMA2 MADR 32bit write %x\n", value); - HW_DMA2_MADR = SWAPu32(value); return; // DMA2 madr - case 0x1f8010a4: - PSXHW_LOG("DMA2 BCR 32bit write %x\n", value); - HW_DMA2_BCR = SWAPu32(value); return; // DMA2 bcr -#endif - case 0x1f8010a8: -#ifdef PSXHW_LOG - PSXHW_LOG("DMA2 CHCR 32bit write %x\n", value); -#endif - DmaExec(2); // DMA2 chcr (GPU DMA) - return; - -#ifdef PSXHW_LOG - case 0x1f8010b0: - PSXHW_LOG("DMA3 MADR 32bit write %x\n", value); - HW_DMA3_MADR = SWAPu32(value); return; // DMA3 madr - case 0x1f8010b4: - PSXHW_LOG("DMA3 BCR 32bit write %x\n", value); - HW_DMA3_BCR = SWAPu32(value); return; // DMA3 bcr -#endif - case 0x1f8010b8: -#ifdef PSXHW_LOG - PSXHW_LOG("DMA3 CHCR 32bit write %x\n", value); -#endif - DmaExec(3); // DMA3 chcr (CDROM DMA) - - return; - -#ifdef PSXHW_LOG - case 0x1f8010c0: - PSXHW_LOG("DMA4 MADR 32bit write %x\n", value); - HW_DMA4_MADR = SWAPu32(value); return; // DMA4 madr - case 0x1f8010c4: - PSXHW_LOG("DMA4 BCR 32bit write %x\n", value); - HW_DMA4_BCR = SWAPu32(value); return; // DMA4 bcr -#endif - case 0x1f8010c8: -#ifdef PSXHW_LOG - PSXHW_LOG("DMA4 CHCR 32bit write %x\n", value); -#endif - DmaExec(4); // DMA4 chcr (SPU DMA) - return; - -#if 0 - case 0x1f8010d0: break; //DMA5write_madr(); - case 0x1f8010d4: break; //DMA5write_bcr(); - case 0x1f8010d8: break; //DMA5write_chcr(); // Not needed -#endif - -#ifdef PSXHW_LOG - case 0x1f8010e0: - PSXHW_LOG("DMA6 MADR 32bit write %x\n", value); - HW_DMA6_MADR = SWAPu32(value); return; // DMA6 bcr - case 0x1f8010e4: - PSXHW_LOG("DMA6 BCR 32bit write %x\n", value); - HW_DMA6_BCR = SWAPu32(value); return; // DMA6 bcr -#endif - case 0x1f8010e8: -#ifdef PSXHW_LOG - PSXHW_LOG("DMA6 CHCR 32bit write %x\n", value); -#endif - DmaExec(6); // DMA6 chcr (OT clear) - return; - -#ifdef PSXHW_LOG - case 0x1f8010f0: - PSXHW_LOG("DMA PCR 32bit write %x\n", value); - HW_DMA_PCR = SWAPu32(value); - return; -#endif - - case 0x1f8010f4: -#ifdef PSXHW_LOG - PSXHW_LOG("DMA ICR 32bit write %x\n", value); -#endif - psxHwWriteDmaIcr32(value); - return; - - case 0x1f801810: -#ifdef PSXHW_LOG - PSXHW_LOG("GPU DATA 32bit write %x\n", value); -#endif - GPU_writeData(value); return; - case 0x1f801814: -#ifdef PSXHW_LOG - PSXHW_LOG("GPU STATUS 32bit write %x\n", value); -#endif - psxHwWriteGpuSR(value); - return; - - case 0x1f801820: - mdecWrite0(value); break; - case 0x1f801824: - mdecWrite1(value); break; - - case 0x1f801100: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 0 COUNT 32bit write %x\n", value); -#endif - psxRcntWcount(0, value & 0xffff); return; - case 0x1f801104: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 0 MODE 32bit write %x\n", value); -#endif - psxRcntWmode(0, value); return; - case 0x1f801108: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 0 TARGET 32bit write %x\n", value); -#endif - psxRcntWtarget(0, value & 0xffff); return; // HW_DMA_ICR&= SWAP32((~value)&0xff000000); - - case 0x1f801110: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 1 COUNT 32bit write %x\n", value); -#endif - psxRcntWcount(1, value & 0xffff); return; - case 0x1f801114: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 1 MODE 32bit write %x\n", value); -#endif - psxRcntWmode(1, value); return; - case 0x1f801118: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 1 TARGET 32bit write %x\n", value); -#endif - psxRcntWtarget(1, value & 0xffff); return; - - case 0x1f801120: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 2 COUNT 32bit write %x\n", value); -#endif - psxRcntWcount(2, value & 0xffff); return; - case 0x1f801124: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 2 MODE 32bit write %x\n", value); -#endif - psxRcntWmode(2, value); return; - case 0x1f801128: -#ifdef PSXHW_LOG - PSXHW_LOG("COUNTER 2 TARGET 32bit write %x\n", value); -#endif - psxRcntWtarget(2, value & 0xffff); return; - - case 0x1f801044: - case 0x1f801048: - case 0x1f80104c: - case 0x1f801050: - case 0x1f801054: - case 0x1f801058: - case 0x1f80105c: - case 0x1f801800: - log_unhandled("unhandled w32 %08x @%08x\n", add, psxRegs.pc); - // falthrough - default: - // Dukes of Hazard 2 - car engine noise - if (0x1f801c00 <= add && add < 0x1f802000) { - SPU_writeRegister(add, value&0xffff, psxRegs.cycle); - SPU_writeRegister(add + 2, value>>16, psxRegs.cycle); - return; - } - - psxHu32ref(add) = SWAPu32(value); -#ifdef PSXHW_LOG - PSXHW_LOG("*Unknown 32bit write at address %x value %x\n", add, value); -#endif + switch (add & 0xffff) { + case 0x1040: sioWrite8(value); return; + case 0x1070: psxHwWriteIstat(value); return; + case 0x1074: psxHwWriteImask(value); return; + case 0x1088: // DMA0 chcr (MDEC in DMA) + case 0x108c: psxHwWriteChcr0(value); return; + case 0x1098: // DMA1 chcr (MDEC out DMA) + case 0x109c: psxHwWriteChcr1(value); return; + case 0x10a8: // DMA2 chcr (GPU DMA) + case 0x10ac: psxHwWriteChcr2(value); return; + case 0x10b8: // DMA3 chcr (CDROM DMA) + case 0x10bc: psxHwWriteChcr3(value); return; + case 0x10c8: // DMA4 chcr (SPU DMA) + case 0x10cc: psxHwWriteChcr4(value); return; + case 0x10e8: // DMA6 chcr (OT clear) + case 0x10ec: psxHwWriteChcr6(value); return; + case 0x10f4: psxHwWriteDmaIcr32(value); return; + + case 0x1810: GPU_writeData(value); return; + case 0x1814: psxHwWriteGpuSR(value); return; + case 0x1820: mdecWrite0(value); break; + case 0x1824: mdecWrite1(value); break; + + case 0x1100: psxRcntWcount(0, value & 0xffff); return; + case 0x1104: psxRcntWmode(0, value); return; + case 0x1108: psxRcntWtarget(0, value & 0xffff); return; + case 0x1110: psxRcntWcount(1, value & 0xffff); return; + case 0x1114: psxRcntWmode(1, value); return; + case 0x1118: psxRcntWtarget(1, value & 0xffff); return; + case 0x1120: psxRcntWcount(2, value & 0xffff); return; + case 0x1124: psxRcntWmode(2, value); return; + case 0x1128: psxRcntWtarget(2, value & 0xffff); return; + + case 0x1044: + case 0x1048: + case 0x104c: + case 0x1050: + case 0x1054: + case 0x1058: + case 0x105c: + case 0x1800: + log_unhandled("unhandled w32 %08x %08x @%08x\n", add, value, psxRegs.pc); + break; + + default: + if (0x1f801c00 <= add && add < 0x1f802000) { + SPU_writeRegister(add, value&0xffff, psxRegs.cycle); + SPU_writeRegister(add + 2, value>>16, psxRegs.cycle); return; + } } psxHu32ref(add) = SWAPu32(value); -#ifdef PSXHW_LOG - PSXHW_LOG("*Known 32bit write at address %x value %x\n", add, value); -#endif } int psxHwFreeze(void *f, int Mode) { diff --git a/libpcsxcore/psxhw.h b/libpcsxcore/psxhw.h index 91256ce31..574ee3337 100644 --- a/libpcsxcore/psxhw.h +++ b/libpcsxcore/psxhw.h @@ -77,13 +77,19 @@ void psxHwReset(); u8 psxHwRead8(u32 add); u16 psxHwRead16(u32 add); u32 psxHwRead32(u32 add); -void psxHwWrite8(u32 add, u8 value); -void psxHwWrite16(u32 add, u16 value); +void psxHwWrite8(u32 add, u32 value); +void psxHwWrite16(u32 add, u32 value); void psxHwWrite32(u32 add, u32 value); int psxHwFreeze(void *f, int Mode); void psxHwWriteIstat(u32 value); void psxHwWriteImask(u32 value); +void psxHwWriteChcr0(u32 value); +void psxHwWriteChcr1(u32 value); +void psxHwWriteChcr2(u32 value); +void psxHwWriteChcr3(u32 value); +void psxHwWriteChcr4(u32 value); +void psxHwWriteChcr6(u32 value); void psxHwWriteDmaIcr32(u32 value); void psxHwWriteGpuSR(u32 value); u32 psxHwReadGpuSR(void); diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 5f6971df7..306085351 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -869,12 +869,12 @@ OP(psxLWR) { doLWR(regs_, _Rt_, _oB_); } OP(psxLWLe) { if (checkLD(regs_, _oB_ & ~3, 0)) doLWL(regs_, _Rt_, _oB_); } OP(psxLWRe) { if (checkLD(regs_, _oB_ , 0)) doLWR(regs_, _Rt_, _oB_); } -OP(psxSB) { psxMemWrite8 (_oB_, _rRt_ & 0xff); } -OP(psxSH) { psxMemWrite16(_oB_, _rRt_ & 0xffff); } +OP(psxSB) { psxMemWrite8 (_oB_, _rRt_); } +OP(psxSH) { psxMemWrite16(_oB_, _rRt_); } OP(psxSW) { psxMemWrite32(_oB_, _rRt_); } -OP(psxSBe) { if (checkST(regs_, _oB_, 0)) psxMemWrite8 (_oB_, _rRt_ & 0xff); } -OP(psxSHe) { if (checkST(regs_, _oB_, 1)) psxMemWrite16(_oB_, _rRt_ & 0xffff); } +OP(psxSBe) { if (checkST(regs_, _oB_, 0)) psxMemWrite8 (_oB_, _rRt_); } +OP(psxSHe) { if (checkST(regs_, _oB_, 1)) psxMemWrite16(_oB_, _rRt_); } OP(psxSWe) { if (checkST(regs_, _oB_, 3)) psxMemWrite32(_oB_, _rRt_); } static void doSWL(psxRegisters *regs, u32 rt, u32 addr) { diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 2196fa7b7..4e03b24bb 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -388,7 +388,7 @@ u32 psxMemRead32(u32 mem) { } } -void psxMemWrite8(u32 mem, u8 value) { +void psxMemWrite8(u32 mem, u32 value) { char *p; u32 t; @@ -415,7 +415,7 @@ void psxMemWrite8(u32 mem, u8 value) { } } -void psxMemWrite16(u32 mem, u16 value) { +void psxMemWrite16(u32 mem, u32 value) { char *p; u32 t; diff --git a/libpcsxcore/psxmem.h b/libpcsxcore/psxmem.h index 4e95c4dd1..f9cc8f0c4 100644 --- a/libpcsxcore/psxmem.h +++ b/libpcsxcore/psxmem.h @@ -178,8 +178,8 @@ void psxMemShutdown(); u8 psxMemRead8 (u32 mem); u16 psxMemRead16(u32 mem); u32 psxMemRead32(u32 mem); -void psxMemWrite8 (u32 mem, u8 value); -void psxMemWrite16(u32 mem, u16 value); +void psxMemWrite8 (u32 mem, u32 value); +void psxMemWrite16(u32 mem, u32 value); void psxMemWrite32(u32 mem, u32 value); void *psxMemPointer(u32 mem); From eaa38b6df0c4b7a654f8aaa6fab21a5fd0c79af6 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 30 Oct 2023 00:33:46 +0200 Subject: [PATCH 445/597] gpulib: don't set_mode for 0 can happen with the new borderless setting --- plugins/gpulib/vout_pl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index ab56cad2d..7f31e151a 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -54,7 +54,8 @@ static void check_mode_change(int force) gpu.state.w_out_old = w_out; gpu.state.h_out_old = h_out; - cbs->pl_vout_set_mode(w_out, h_out, w, h, + if (w_out != 0 && h_out != 0) + cbs->pl_vout_set_mode(w_out, h_out, w, h, (gpu.status & PSX_GPU_STATUS_RGB24) ? 24 : 16); } } From 32fbd56b83ec74d8bcb54e1f732b6140cb90da7e Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 31 Oct 2023 01:43:12 +0200 Subject: [PATCH 446/597] spu: sync on koff otherwise problems with FIFA99 --- plugins/dfsound/registers.c | 2 ++ plugins/dfsound/spu.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index b8620268d..05968b617 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -230,6 +230,7 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, log_unhandled("koff1 %04x %d\n", val, cycles - spu.last_keyon_cycles); val &= ~regAreaGet(H_SPUon1); } + do_samples_if_needed(cycles, 0, 2); SoundOff(0,16,val); break; //-------------------------------------------------// @@ -239,6 +240,7 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, log_unhandled("koff2 %04x %d\n", val, cycles - spu.last_keyon_cycles); val &= ~regAreaGet(H_SPUon2); } + do_samples_if_needed(cycles, 0, 2); SoundOff(16,24,val); break; //-------------------------------------------------// diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 057502e43..8e4ae1775 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -211,7 +211,7 @@ static int check_irq(int ch, unsigned char *pos) { if((spu.spuCtrl & (CTRL_ON|CTRL_IRQ)) == (CTRL_ON|CTRL_IRQ) && pos == spu.pSpuIrq) { - //printf("ch%d irq %04x\n", ch, pos - spu.spuMemC); + //printf("ch%d irq %04zx\n", ch, pos - spu.spuMemC); do_irq(); return 1; } From 5c5e6c0c5f739de80b7f8f9d6c36dda0c2fa579d Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 31 Oct 2023 01:44:38 +0200 Subject: [PATCH 447/597] cdrom: implement xa buffering somewhat libretro/pcsx_rearmed#784 --- libpcsxcore/cdrom.c | 105 ++++++++++++++++++++++++---------------- libpcsxcore/decode_xa.c | 9 ++-- libpcsxcore/decode_xa.h | 4 +- plugins/dfsound/spu.c | 8 +-- 4 files changed, 72 insertions(+), 54 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 83f8c1c14..950b6484b 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -94,9 +94,12 @@ static struct { unsigned char Track; boolean Play, Muted; int CurTrack; - int Mode, File, Channel; + unsigned char Mode; + unsigned char FileChannelSelected; + unsigned char CurFile, CurChannel; + int FilterFile, FilterChannel; unsigned char LocL[8]; - int FirstSector; + int unused4; xa_decode_t Xa; @@ -105,7 +108,7 @@ static struct { u16 CmdInProgress; u8 Irq1Pending; - u8 unused5; + u8 AdpcmActive; u32 LastReadSeekCycles; u8 unused7; @@ -679,8 +682,7 @@ void cdrPlayReadInterrupt(void) if (!cdr.Muted && cdr.Play && !Config.Cdda) { cdrPrepCdda(read_buf, CD_FRAMESIZE_RAW / 4); cdrAttenuate(read_buf, CD_FRAMESIZE_RAW / 4, 1); - SPU_playCDDAchannel(read_buf, CD_FRAMESIZE_RAW, psxRegs.cycle, cdr.FirstSector); - cdr.FirstSector = 0; + SPU_playCDDAchannel(read_buf, CD_FRAMESIZE_RAW, psxRegs.cycle, 0); } msfiAdd(cdr.SetSectorPlay, 1); @@ -841,7 +843,8 @@ void cdrInterrupt(void) { cdr.LocL[0] = LOCL_INVALID; cdr.SubqForwardSectors = 1; cdr.TrackChanged = FALSE; - cdr.FirstSector = 1; + cdr.FileChannelSelected = 0; + cdr.AdpcmActive = 0; cdr.ReportDelay = 60; cdr.sectorsRead = 0; @@ -979,8 +982,8 @@ void cdrInterrupt(void) { break; case CdlSetfilter: - cdr.File = cdr.Param[0]; - cdr.Channel = cdr.Param[1]; + cdr.FilterFile = cdr.Param[0]; + cdr.FilterChannel = cdr.Param[1]; break; case CdlSetmode: @@ -995,8 +998,8 @@ void cdrInterrupt(void) { SetResultSize(5); cdr.Result[1] = cdr.Mode; cdr.Result[2] = 0; - cdr.Result[3] = cdr.File; - cdr.Result[4] = cdr.Channel; + cdr.Result[3] = cdr.FilterFile; + cdr.Result[4] = cdr.FilterChannel; break; case CdlGetlocL: @@ -1186,7 +1189,8 @@ void cdrInterrupt(void) { cdr.SetlocPending = 0; } cdr.Reading = 1; - cdr.FirstSector = 1; + cdr.FileChannelSelected = 0; + cdr.AdpcmActive = 0; // Fighting Force 2 - update subq time immediately // - fixes new game @@ -1324,7 +1328,9 @@ static void cdrUpdateTransferBuf(const u8 *buf) static void cdrReadInterrupt(void) { - u8 *buf = NULL, *hdr; + const struct { u8 file, chan, mode, coding; } *subhdr; + const u8 *buf = NULL; + int deliver_data = 1; u8 subqPos[3]; int read_ok; @@ -1357,29 +1363,45 @@ static void cdrReadInterrupt(void) if (!cdr.Stat && !cdr.Irq1Pending) cdrUpdateTransferBuf(buf); - if ((!cdr.Muted) && (cdr.Mode & MODE_STRSND) && (!Config.Xa) && (cdr.FirstSector != -1)) { // CD-XA - hdr = buf + 4; - // Firemen 2: Multi-XA files - briefings, cutscenes - if( cdr.FirstSector == 1 && (cdr.Mode & MODE_SF)==0 ) { - cdr.File = hdr[0]; - cdr.Channel = hdr[1]; + subhdr = (void *)(buf + 4); + do { + // try to process as adpcm + if (!(cdr.Mode & MODE_STRSND)) + break; + if (buf[3] != 2 || (subhdr->mode & 0x44) != 0x44) // or 0x64? + break; + CDR_LOG("f=%d m=%d %d,%3d | %d,%2d | %d,%2d\n", !!(cdr.Mode & MODE_SF), cdr.Muted, + subhdr->file, subhdr->chan, cdr.CurFile, cdr.CurChannel, cdr.FilterFile, cdr.FilterChannel); + if ((cdr.Mode & MODE_SF) && (subhdr->file != cdr.FilterFile || subhdr->chan != cdr.FilterChannel)) + break; + if (subhdr->chan & 0xe0) { // ? + if (subhdr->chan != 0xff) + log_unhandled("adpcm %d:%d\n", subhdr->file, subhdr->chan); + break; + } + if (!cdr.FileChannelSelected) { + cdr.CurFile = subhdr->file; + cdr.CurChannel = subhdr->chan; + cdr.FileChannelSelected = 1; } + else if (subhdr->file != cdr.CurFile || subhdr->chan != cdr.CurChannel) + break; - /* Gameblabla - * Skips playing on channel 255. - * Fixes missing audio in Blue's Clues : Blue's Big Musical. (Should also fix Taxi 2) - * TODO : Check if this is the proper behaviour. - * */ - if ((hdr[2] & 0x4) && hdr[0] == cdr.File && hdr[1] == cdr.Channel && cdr.Channel != 255) { - int ret = xa_decode_sector(&cdr.Xa, buf + 4, cdr.FirstSector); - if (!ret) { - cdrAttenuate(cdr.Xa.pcm, cdr.Xa.nsamples, cdr.Xa.stereo); - SPU_playADPCMchannel(&cdr.Xa, psxRegs.cycle, cdr.FirstSector); - cdr.FirstSector = 0; - } - else cdr.FirstSector = -1; + // accepted as adpcm + deliver_data = 0; + + if (Config.Xa) + break; + if (!cdr.Muted && cdr.AdpcmActive) { + cdrAttenuate(cdr.Xa.pcm, cdr.Xa.nsamples, cdr.Xa.stereo); + SPU_playADPCMchannel(&cdr.Xa, psxRegs.cycle, 0); } - } + // decode next + cdr.AdpcmActive = !xa_decode_sector(&cdr.Xa, buf + 4, !cdr.AdpcmActive); + } while (0); + + if ((cdr.Mode & MODE_SF) && (subhdr->mode & 0x44) == 0x44) // according to nocash + deliver_data = 0; /* Croc 2: $40 - only FORM1 (*) @@ -1387,7 +1409,7 @@ static void cdrReadInterrupt(void) Sim Theme Park - no adpcm at all (zero) */ - if (!(cdr.Mode & MODE_STRSND) || !(buf[4+2] & 0x4)) + if (deliver_data) cdrReadInterruptSetResult(cdr.StatP); msfiAdd(cdr.SetSectorPlay, 1); @@ -1397,20 +1419,17 @@ static void cdrReadInterrupt(void) /* cdrRead0: - bit 0,1 - mode - bit 2 - unknown - bit 3 - unknown - bit 4 - unknown + bit 0,1 - reg index + bit 2 - adpcm active bit 5 - 1 result ready bit 6 - 1 dma ready bit 7 - 1 command being processed */ unsigned char cdrRead0(void) { - if (cdr.ResultReady) - cdr.Ctrl |= 0x20; - else - cdr.Ctrl &= ~0x20; + cdr.Ctrl &= ~0x24; + cdr.Ctrl |= cdr.AdpcmActive << 2; + cdr.Ctrl |= cdr.ResultReady << 5; cdr.Ctrl |= 0x40; // data fifo not empty @@ -1691,8 +1710,8 @@ static void getCdInfo(void) void cdrReset() { memset(&cdr, 0, sizeof(cdr)); cdr.CurTrack = 1; - cdr.File = 1; - cdr.Channel = 1; + cdr.FilterFile = 0; + cdr.FilterChannel = 0; cdr.Reg2 = 0x1f; cdr.Stat = NoIntr; cdr.FifoOffset = DATA_SIZE; // fifo empty diff --git a/libpcsxcore/decode_xa.c b/libpcsxcore/decode_xa.c index 17df65f1e..fb6fd1356 100644 --- a/libpcsxcore/decode_xa.c +++ b/libpcsxcore/decode_xa.c @@ -122,7 +122,7 @@ static __inline void ADPCM_DecodeBlock16( ADPCM_Decode_t *decp, u8 filter_range, static int headtable[4] = {0,2,8,10}; //=========================================== -static void xa_decode_data( xa_decode_t *xdp, unsigned char *srcp ) { +static void xa_decode_data( xa_decode_t *xdp, const unsigned char *srcp ) { const u8 *sound_groupsp; const u8 *sound_datap, *sound_datap2; int i, j, k, nbits; @@ -297,8 +297,8 @@ u8 coding2; //============================================ static int parse_xa_audio_sector( xa_decode_t *xdp, - xa_subheader_t *subheadp, - unsigned char *sectorp, + const xa_subheader_t *subheadp, + const unsigned char *sectorp, int is_first_sector ) { if ( is_first_sector ) { switch ( AUDIO_CODING_GET_FREQ(subheadp->coding) ) { @@ -340,8 +340,7 @@ static int parse_xa_audio_sector( xa_decode_t *xdp, //=== - 0 for any other successive sector //=== return -1 if error //================================================================ -s32 xa_decode_sector( xa_decode_t *xdp, - unsigned char *sectorp, int is_first_sector ) { +s32 xa_decode_sector( xa_decode_t *xdp, const unsigned char *sectorp, int is_first_sector ) { if (parse_xa_audio_sector(xdp, (xa_subheader_t *)sectorp, sectorp + sizeof(xa_subheader_t), is_first_sector)) return -1; diff --git a/libpcsxcore/decode_xa.h b/libpcsxcore/decode_xa.h index a5994e12e..54065356a 100644 --- a/libpcsxcore/decode_xa.h +++ b/libpcsxcore/decode_xa.h @@ -40,8 +40,8 @@ typedef struct { } xa_decode_t; s32 xa_decode_sector( xa_decode_t *xdp, - unsigned char *sectorp, - int is_first_sector ); + const unsigned char *sectorp, + int is_first_sector ); #ifdef __cplusplus } diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 8e4ae1775..f5dc40627 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1338,12 +1338,12 @@ void CALLBACK SPUupdate(void) // XA AUDIO -void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycle, int is_start) +void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycle, int unused) { if(!xap) return; if(!xap->freq) return; // no xa freq ? bye - if (is_start) + if (spu.XAPlay == spu.XAFeed) do_samples(cycle, 1); // catch up to prevent source underflows later FeedXA(xap); // call main XA feeder @@ -1351,12 +1351,12 @@ void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycle, int is_s } // CDDA AUDIO -int CALLBACK SPUplayCDDAchannel(short *pcm, int nbytes, unsigned int cycle, int is_start) +int CALLBACK SPUplayCDDAchannel(short *pcm, int nbytes, unsigned int cycle, int unused) { if (!pcm) return -1; if (nbytes<=0) return -1; - if (is_start) + if (spu.CDDAPlay == spu.CDDAFeed) do_samples(cycle, 1); // catch up to prevent source underflows later FeedCDDA((unsigned char *)pcm, nbytes); From 9bcccb919c8a2b112b5449e910ecf87bce686ec2 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 1 Nov 2023 01:47:14 +0200 Subject: [PATCH 448/597] spu: maybe better mute Blue's Clues --- plugins/dfsound/spu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index f5dc40627..502567bd4 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -816,7 +816,9 @@ static void do_channels(int ns_to) if (s_chan->bFMod == 2) // fmod freq channel memcpy(iFMod, &ChanBuf, ns_to * sizeof(iFMod[0])); - if (s_chan->bRVBActive && do_rvb) + if (!(spu.spuCtrl & CTRL_MUTE)) + ; + else if (s_chan->bRVBActive && do_rvb) mix_chan_rvb(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume, RVB); else mix_chan(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume); @@ -1237,7 +1239,7 @@ static void do_samples_finish(int *SSumLR, int ns_to, vol_l = vol_l * spu_config.iVolume >> 10; vol_r = vol_r * spu_config.iVolume >> 10; - if (!(spu.spuCtrl & CTRL_MUTE) || !(vol_l | vol_r)) + if (!(vol_l | vol_r)) { // muted? (rare) memset(spu.pS, 0, ns_to * 2 * sizeof(spu.pS[0])); From c8d69ce8e9b4995fa43ba722d0532eb5c53225b4 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 1 Nov 2023 01:48:23 +0200 Subject: [PATCH 449/597] frontend: drop an outdated sanity check --- frontend/menu.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/frontend/menu.c b/frontend/menu.c index 868a486a1..5a21e9676 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -2023,10 +2023,6 @@ static const char credits_text[] = static int reset_game(void) { - // sanity check - if (bios_sel == 0 && !Config.HLE) - return -1; - ClosePlugins(); OpenPlugins(); SysReset(); From 5a920d326f0508f941d6cfc6c42500e961f82a72 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 1 Nov 2023 18:28:03 +0200 Subject: [PATCH 450/597] rename gpu_unai to gpu_unai_old Because libretro's gpu_unai is different, making syncing the branches a PITA. --- Makefile | 8 ++--- configure | 6 ++-- frontend/main.c | 6 ++-- frontend/menu.c | 30 +++++++++---------- frontend/plugin_lib.h | 2 +- plugins/gpu_neon/Makefile | 1 + plugins/{gpu_unai => gpu_unai_old}/Makefile | 2 +- plugins/{gpu_unai => gpu_unai_old}/debug.h | 0 plugins/{gpu_unai => gpu_unai_old}/gpu.cpp | 0 plugins/{gpu_unai => gpu_unai_old}/gpu.h | 0 plugins/{gpu_unai => gpu_unai_old}/gpu_arm.h | 0 plugins/{gpu_unai => gpu_unai_old}/gpu_arm.s | 0 plugins/{gpu_unai => gpu_unai_old}/gpu_blit.h | 0 .../{gpu_unai => gpu_unai_old}/gpu_command.h | 0 .../gpu_fixedpoint.h | 0 .../{gpu_unai => gpu_unai_old}/gpu_inner.h | 0 .../gpu_inner_blend.h | 0 .../gpu_inner_light.h | 0 .../gpu_raster_image.h | 0 .../gpu_raster_line.h | 0 .../gpu_raster_polygon.h | 0 .../gpu_raster_sprite.h | 0 .../{gpu_unai => gpu_unai_old}/gpulib_if.cpp | 22 +++++++------- plugins/{gpu_unai => gpu_unai_old}/port.h | 0 plugins/{gpu_unai => gpu_unai_old}/profiler.h | 0 readme.txt | 4 +-- 26 files changed, 41 insertions(+), 40 deletions(-) rename plugins/{gpu_unai => gpu_unai_old}/Makefile (90%) rename plugins/{gpu_unai => gpu_unai_old}/debug.h (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpu.cpp (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpu.h (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpu_arm.h (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpu_arm.s (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpu_blit.h (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpu_command.h (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpu_fixedpoint.h (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpu_inner.h (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpu_inner_blend.h (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpu_inner_light.h (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpu_raster_image.h (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpu_raster_line.h (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpu_raster_polygon.h (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpu_raster_sprite.h (100%) rename plugins/{gpu_unai => gpu_unai_old}/gpulib_if.cpp (97%) rename plugins/{gpu_unai => gpu_unai_old}/port.h (100%) rename plugins/{gpu_unai => gpu_unai_old}/profiler.h (100%) diff --git a/Makefile b/Makefile index af1216faa..85886d0e0 100644 --- a/Makefile +++ b/Makefile @@ -142,12 +142,12 @@ plugins/dfxvideo/gpulib_if.o: CFLAGS += -fno-strict-aliasing plugins/dfxvideo/gpulib_if.o: plugins/dfxvideo/prim.c plugins/dfxvideo/soft.c OBJS += plugins/dfxvideo/gpulib_if.o endif -ifeq "$(BUILTIN_GPU)" "unai" -OBJS += plugins/gpu_unai/gpulib_if.o +ifeq "$(BUILTIN_GPU)" "unai_old" +OBJS += plugins/gpu_unai_old/gpulib_if.o ifeq "$(ARCH)" "arm" -OBJS += plugins/gpu_unai/gpu_arm.o +OBJS += plugins/gpu_unai_old/gpu_arm.o endif -plugins/gpu_unai/gpulib_if.o: CFLAGS += -DREARMED -O3 +plugins/gpu_unai_old/gpulib_if.o: CFLAGS += -DREARMED -O3 CC_LINK = $(CXX) endif diff --git a/configure b/configure index a72c059c1..01f26b447 100755 --- a/configure +++ b/configure @@ -39,12 +39,12 @@ check_define_val() platform_list="generic pandora maemo caanoo libretro" platform="generic" -builtin_gpu_list="peops unai neon senquack" +builtin_gpu_list="neon peops senquack unai_old" builtin_gpu="" sound_driver_list="oss alsa pulseaudio sdl libretro" sound_drivers="" plugins="plugins/spunull/spunull.so \ -plugins/dfxvideo/gpu_peops.so plugins/gpu_unai/gpu_unai.so plugins/gpu_senquack/gpu_senquack.so" +plugins/dfxvideo/gpu_peops.so plugins/gpu_unai_old/gpu_unai_old.so plugins/gpu_senquack/gpu_senquack.so" drc_cache_base="no" have_armv5="" have_armv6="" @@ -250,7 +250,7 @@ arm*) builtin_gpu="neon" elif [ "$have_armv7" != "yes" ]; then # pre-ARMv7 hardware is usually not fast enough for peops - builtin_gpu="unai" + builtin_gpu="unai_old" else builtin_gpu="peops" fi diff --git a/frontend/main.c b/frontend/main.c index 18ca6e57c..45e25815b 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -148,9 +148,9 @@ void emu_set_default_config(void) pl_rearmed_cbs.gpu_senquack.fast_lighting = 0; pl_rearmed_cbs.gpu_senquack.blending = 1; pl_rearmed_cbs.gpu_senquack.dithering = 0; - pl_rearmed_cbs.gpu_unai.abe_hack = - pl_rearmed_cbs.gpu_unai.no_light = - pl_rearmed_cbs.gpu_unai.no_blend = 0; + pl_rearmed_cbs.gpu_unai_old.abe_hack = + pl_rearmed_cbs.gpu_unai_old.no_light = + pl_rearmed_cbs.gpu_unai_old.no_blend = 0; memset(&pl_rearmed_cbs.gpu_peopsgl, 0, sizeof(pl_rearmed_cbs.gpu_peopsgl)); pl_rearmed_cbs.gpu_peopsgl.iVRamSize = 64; pl_rearmed_cbs.gpu_peopsgl.iTexGarbageCollection = 1; diff --git a/frontend/menu.c b/frontend/menu.c index 5a21e9676..b2d50aad7 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -434,10 +434,10 @@ static const struct { CE_INTVAL_V(frameskip, 4), CE_INTVAL_P(gpu_peops.iUseDither), CE_INTVAL_P(gpu_peops.dwActFixes), - CE_INTVAL_P(gpu_unai.lineskip), - CE_INTVAL_P(gpu_unai.abe_hack), - CE_INTVAL_P(gpu_unai.no_light), - CE_INTVAL_P(gpu_unai.no_blend), + CE_INTVAL_P(gpu_unai_old.lineskip), + CE_INTVAL_P(gpu_unai_old.abe_hack), + CE_INTVAL_P(gpu_unai_old.no_light), + CE_INTVAL_P(gpu_unai_old.no_blend), CE_INTVAL_P(gpu_senquack.ilace_force), CE_INTVAL_P(gpu_senquack.pixel_skip), CE_INTVAL_P(gpu_senquack.lighting), @@ -1384,19 +1384,19 @@ static int menu_loop_plugin_gpu_neon(int id, int keys) #endif -static menu_entry e_menu_plugin_gpu_unai[] = +static menu_entry e_menu_plugin_gpu_unai_old[] = { - mee_onoff ("Skip every 2nd line", 0, pl_rearmed_cbs.gpu_unai.lineskip, 1), - mee_onoff ("Abe's Odyssey hack", 0, pl_rearmed_cbs.gpu_unai.abe_hack, 1), - mee_onoff ("Disable lighting", 0, pl_rearmed_cbs.gpu_unai.no_light, 1), - mee_onoff ("Disable blending", 0, pl_rearmed_cbs.gpu_unai.no_blend, 1), + mee_onoff ("Skip every 2nd line", 0, pl_rearmed_cbs.gpu_unai_old.lineskip, 1), + mee_onoff ("Abe's Odyssey hack", 0, pl_rearmed_cbs.gpu_unai_old.abe_hack, 1), + mee_onoff ("Disable lighting", 0, pl_rearmed_cbs.gpu_unai_old.no_light, 1), + mee_onoff ("Disable blending", 0, pl_rearmed_cbs.gpu_unai_old.no_blend, 1), mee_end, }; -static int menu_loop_plugin_gpu_unai(int id, int keys) +static int menu_loop_plugin_gpu_unai_old(int id, int keys) { int sel = 0; - me_loop(e_menu_plugin_gpu_unai, &sel); + me_loop(e_menu_plugin_gpu_unai_old, &sel); return 0; } @@ -1519,7 +1519,7 @@ static const char h_plugin_gpu[] = "builtin_gpu is the NEON GPU, very fast and accurate\n" #endif "gpu_peops is Pete's soft GPU, slow but accurate\n" - "gpu_unai is GPU from PCSX4ALL, fast but glitchy\n" + "gpu_unai_old is from old PCSX4ALL, fast but glitchy\n" "gpu_senquack is more accurate but slower\n" "gpu_gles Pete's hw GPU, uses 3D chip but is glitchy\n" "must save config and reload the game if changed"; @@ -1527,8 +1527,8 @@ static const char h_plugin_spu[] = "spunull effectively disables sound\n" "must save config and reload the game if changed"; static const char h_gpu_peops[] = "Configure P.E.Op.S. SoftGL Driver V1.17"; static const char h_gpu_peopsgl[]= "Configure P.E.Op.S. MesaGL Driver V1.78"; -static const char h_gpu_unai[] = "Configure Unai/PCSX4ALL Team GPU plugin"; -static const char h_gpu_senquack[] = "Configure Unai/PCSX4ALL Senquack plugin"; +static const char h_gpu_unai_old[] = "Configure Unai/PCSX4ALL Team GPU plugin (old)"; +static const char h_gpu_senquack[] = "Configure Unai/PCSX4ALL Senquack plugin"; static const char h_spu[] = "Configure built-in P.E.Op.S. Sound Driver V1.7"; static menu_entry e_menu_plugin_options[] = @@ -1540,7 +1540,7 @@ static menu_entry e_menu_plugin_options[] = mee_handler_h ("Configure built-in GPU plugin", menu_loop_plugin_gpu_neon, h_gpu_neon), #endif mee_handler_h ("Configure gpu_peops plugin", menu_loop_plugin_gpu_peops, h_gpu_peops), - mee_handler_h ("Configure gpu_unai GPU plugin", menu_loop_plugin_gpu_unai, h_gpu_unai), + mee_handler_h ("Configure gpu_unai_old GPU plugin", menu_loop_plugin_gpu_unai_old, h_gpu_unai_old), mee_handler_h ("Configure gpu_senquack GPU plugin", menu_loop_plugin_gpu_senquack, h_gpu_senquack), mee_handler_h ("Configure gpu_gles GPU plugin", menu_loop_plugin_gpu_peopsgl, h_gpu_peopsgl), mee_handler_h ("Configure built-in SPU plugin", menu_loop_plugin_spu, h_spu), diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index 2ac49f2ce..cb1f4f77f 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -93,7 +93,7 @@ struct rearmed_cbs { int abe_hack; int no_light, no_blend; int lineskip; - } gpu_unai; + } gpu_unai_old; struct { int ilace_force; int pixel_skip; diff --git a/plugins/gpu_neon/Makefile b/plugins/gpu_neon/Makefile index 955feab6a..810809971 100644 --- a/plugins/gpu_neon/Makefile +++ b/plugins/gpu_neon/Makefile @@ -4,6 +4,7 @@ include ../../config.mak SRC += psx_gpu_if.c +CFLAGS += -I../../include CFLAGS += -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP CFLAGS += -DNEON_BUILD ifeq "$(HAVE_NEON)" "1" diff --git a/plugins/gpu_unai/Makefile b/plugins/gpu_unai_old/Makefile similarity index 90% rename from plugins/gpu_unai/Makefile rename to plugins/gpu_unai_old/Makefile index 1075ee522..ed3eff0ad 100644 --- a/plugins/gpu_unai/Makefile +++ b/plugins/gpu_unai_old/Makefile @@ -12,5 +12,5 @@ SRC += gpu_arm.s endif #BIN_STANDALONE = gpuPCSX4ALL.so -BIN_GPULIB = gpu_unai.so +BIN_GPULIB = gpu_unai_old.so include ../gpulib/gpulib.mak diff --git a/plugins/gpu_unai/debug.h b/plugins/gpu_unai_old/debug.h similarity index 100% rename from plugins/gpu_unai/debug.h rename to plugins/gpu_unai_old/debug.h diff --git a/plugins/gpu_unai/gpu.cpp b/plugins/gpu_unai_old/gpu.cpp similarity index 100% rename from plugins/gpu_unai/gpu.cpp rename to plugins/gpu_unai_old/gpu.cpp diff --git a/plugins/gpu_unai/gpu.h b/plugins/gpu_unai_old/gpu.h similarity index 100% rename from plugins/gpu_unai/gpu.h rename to plugins/gpu_unai_old/gpu.h diff --git a/plugins/gpu_unai/gpu_arm.h b/plugins/gpu_unai_old/gpu_arm.h similarity index 100% rename from plugins/gpu_unai/gpu_arm.h rename to plugins/gpu_unai_old/gpu_arm.h diff --git a/plugins/gpu_unai/gpu_arm.s b/plugins/gpu_unai_old/gpu_arm.s similarity index 100% rename from plugins/gpu_unai/gpu_arm.s rename to plugins/gpu_unai_old/gpu_arm.s diff --git a/plugins/gpu_unai/gpu_blit.h b/plugins/gpu_unai_old/gpu_blit.h similarity index 100% rename from plugins/gpu_unai/gpu_blit.h rename to plugins/gpu_unai_old/gpu_blit.h diff --git a/plugins/gpu_unai/gpu_command.h b/plugins/gpu_unai_old/gpu_command.h similarity index 100% rename from plugins/gpu_unai/gpu_command.h rename to plugins/gpu_unai_old/gpu_command.h diff --git a/plugins/gpu_unai/gpu_fixedpoint.h b/plugins/gpu_unai_old/gpu_fixedpoint.h similarity index 100% rename from plugins/gpu_unai/gpu_fixedpoint.h rename to plugins/gpu_unai_old/gpu_fixedpoint.h diff --git a/plugins/gpu_unai/gpu_inner.h b/plugins/gpu_unai_old/gpu_inner.h similarity index 100% rename from plugins/gpu_unai/gpu_inner.h rename to plugins/gpu_unai_old/gpu_inner.h diff --git a/plugins/gpu_unai/gpu_inner_blend.h b/plugins/gpu_unai_old/gpu_inner_blend.h similarity index 100% rename from plugins/gpu_unai/gpu_inner_blend.h rename to plugins/gpu_unai_old/gpu_inner_blend.h diff --git a/plugins/gpu_unai/gpu_inner_light.h b/plugins/gpu_unai_old/gpu_inner_light.h similarity index 100% rename from plugins/gpu_unai/gpu_inner_light.h rename to plugins/gpu_unai_old/gpu_inner_light.h diff --git a/plugins/gpu_unai/gpu_raster_image.h b/plugins/gpu_unai_old/gpu_raster_image.h similarity index 100% rename from plugins/gpu_unai/gpu_raster_image.h rename to plugins/gpu_unai_old/gpu_raster_image.h diff --git a/plugins/gpu_unai/gpu_raster_line.h b/plugins/gpu_unai_old/gpu_raster_line.h similarity index 100% rename from plugins/gpu_unai/gpu_raster_line.h rename to plugins/gpu_unai_old/gpu_raster_line.h diff --git a/plugins/gpu_unai/gpu_raster_polygon.h b/plugins/gpu_unai_old/gpu_raster_polygon.h similarity index 100% rename from plugins/gpu_unai/gpu_raster_polygon.h rename to plugins/gpu_unai_old/gpu_raster_polygon.h diff --git a/plugins/gpu_unai/gpu_raster_sprite.h b/plugins/gpu_unai_old/gpu_raster_sprite.h similarity index 100% rename from plugins/gpu_unai/gpu_raster_sprite.h rename to plugins/gpu_unai_old/gpu_raster_sprite.h diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai_old/gpulib_if.cpp similarity index 97% rename from plugins/gpu_unai/gpulib_if.cpp rename to plugins/gpu_unai_old/gpulib_if.cpp index 02f6b9221..cc3280299 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai_old/gpulib_if.cpp @@ -104,10 +104,10 @@ static u32 GPU_GP1; /////////////////////////////////////////////////////////////////////////////// -#include "../gpu_unai/gpu_fixedpoint.h" +#include "gpu_fixedpoint.h" // Inner loop driver instanciation file -#include "../gpu_unai/gpu_inner.h" +#include "gpu_inner.h" // GPU Raster Macros #define GPU_RGB16(rgb) ((((rgb)&0xF80000)>>9)|(((rgb)&0xF800)>>6)|(((rgb)&0xF8)>>3)) @@ -120,19 +120,19 @@ static u32 GPU_GP1; #define GPU_SWAP(a,b,t) {(t)=(a);(a)=(b);(b)=(t);} // GPU internal image drawing functions -#include "../gpu_unai/gpu_raster_image.h" +#include "gpu_raster_image.h" // GPU internal line drawing functions -#include "../gpu_unai/gpu_raster_line.h" +#include "gpu_raster_line.h" // GPU internal polygon drawing functions -#include "../gpu_unai/gpu_raster_polygon.h" +#include "gpu_raster_polygon.h" // GPU internal sprite drawing functions -#include "../gpu_unai/gpu_raster_sprite.h" +#include "gpu_raster_sprite.h" // GPU command buffer execution/store -#include "../gpu_unai/gpu_command.h" +#include "gpu_command.h" ///////////////////////////////////////////////////////////////////////////// @@ -544,10 +544,10 @@ void renderer_set_interlace(int enable, int is_odd) void renderer_set_config(const struct rearmed_cbs *cbs) { - force_interlace = cbs->gpu_unai.lineskip; - enableAbbeyHack = cbs->gpu_unai.abe_hack; - light = !cbs->gpu_unai.no_light; - blend = !cbs->gpu_unai.no_blend; + force_interlace = cbs->gpu_unai_old.lineskip; + enableAbbeyHack = cbs->gpu_unai_old.abe_hack; + light = !cbs->gpu_unai_old.no_light; + blend = !cbs->gpu_unai_old.no_blend; GPU_FrameBuffer = (u16 *)gpu.vram; } diff --git a/plugins/gpu_unai/port.h b/plugins/gpu_unai_old/port.h similarity index 100% rename from plugins/gpu_unai/port.h rename to plugins/gpu_unai_old/port.h diff --git a/plugins/gpu_unai/profiler.h b/plugins/gpu_unai_old/profiler.h similarity index 100% rename from plugins/gpu_unai/profiler.h rename to plugins/gpu_unai_old/profiler.h diff --git a/readme.txt b/readme.txt index 5e3f3e620..1125bf94f 100644 --- a/readme.txt +++ b/readme.txt @@ -89,8 +89,8 @@ builtin_gpu - this is either Exophase's ARM NEON GPU (accurate and fast, gpu_peops or gpu_unai (depends on compile options). gpu_peops.so - P.E.Op.S. soft GPU, reasonably accurate but slow (also found with older emulators on PC) -gpu_unai.so - Unai's plugin from PCSX4ALL project. Faster than P.E.Op.S. - but has some glitches. +gpu_unai_old.so- Unai's plugin from PCSX4ALL project. Faster than P.E.Op.S. + but has some glitches (old version). gpu_gles.so - experimental port of P.E.Op.S. MesaGL plugin to OpenGL ES. Occasionally faster but has lots of glitches and seems to be rather unstable (may crash the driver/system). From 4cfc568dce10c4ff9a476e229c56b17f8f1ed678 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 1 Nov 2023 18:47:16 +0200 Subject: [PATCH 451/597] rename gpu_senquack to gpu_unai to reduce the pain of syncing with libretro --- Makefile | 8 ++-- configure | 4 +- frontend/main.c | 12 +++--- frontend/menu.c | 38 +++++++++---------- frontend/plugin_lib.h | 2 +- plugins/{gpu_senquack => gpu_unai}/Makefile | 2 +- .../README_senquack.txt | 0 plugins/{gpu_senquack => gpu_unai}/debug.h | 0 plugins/{gpu_senquack => gpu_unai}/gpu.cpp | 0 plugins/{gpu_senquack => gpu_unai}/gpu.h | 0 plugins/{gpu_senquack => gpu_unai}/gpu_arm.S | 0 plugins/{gpu_senquack => gpu_unai}/gpu_arm.h | 0 plugins/{gpu_senquack => gpu_unai}/gpu_blit.h | 0 .../{gpu_senquack => gpu_unai}/gpu_command.h | 0 .../gpu_fixedpoint.h | 0 .../{gpu_senquack => gpu_unai}/gpu_inner.h | 0 .../gpu_inner_blend.h | 0 .../gpu_inner_blend_arm.h | 0 .../gpu_inner_blend_arm5.h | 0 .../gpu_inner_blend_arm7.h | 0 .../gpu_inner_light.h | 0 .../gpu_inner_light_arm.h | 0 .../gpu_inner_quantization.h | 0 .../gpu_raster_image.h | 0 .../gpu_raster_line.h | 0 .../gpu_raster_polygon.h | 0 .../gpu_raster_sprite.h | 0 .../{gpu_senquack => gpu_unai}/gpu_senquack.h | 0 .../{gpu_senquack => gpu_unai}/gpulib_if.cpp | 14 +++---- plugins/{gpu_senquack => gpu_unai}/port.h | 0 plugins/{gpu_senquack => gpu_unai}/profiler.h | 0 31 files changed, 40 insertions(+), 40 deletions(-) rename plugins/{gpu_senquack => gpu_unai}/Makefile (92%) rename plugins/{gpu_senquack => gpu_unai}/README_senquack.txt (100%) rename plugins/{gpu_senquack => gpu_unai}/debug.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu.cpp (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_arm.S (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_arm.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_blit.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_command.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_fixedpoint.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_inner.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_inner_blend.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_inner_blend_arm.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_inner_blend_arm5.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_inner_blend_arm7.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_inner_light.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_inner_light_arm.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_inner_quantization.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_raster_image.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_raster_line.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_raster_polygon.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_raster_sprite.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpu_senquack.h (100%) rename plugins/{gpu_senquack => gpu_unai}/gpulib_if.cpp (97%) rename plugins/{gpu_senquack => gpu_unai}/port.h (100%) rename plugins/{gpu_senquack => gpu_unai}/profiler.h (100%) diff --git a/Makefile b/Makefile index 85886d0e0..8cbf5c66b 100644 --- a/Makefile +++ b/Makefile @@ -151,12 +151,12 @@ plugins/gpu_unai_old/gpulib_if.o: CFLAGS += -DREARMED -O3 CC_LINK = $(CXX) endif -ifeq "$(BUILTIN_GPU)" "senquack" -OBJS += plugins/gpu_senquack/gpulib_if.o +ifeq "$(BUILTIN_GPU)" "unai" +OBJS += plugins/gpu_unai/gpulib_if.o ifeq "$(ARCH)" "arm" -OBJS += plugins/gpu_senquack/gpu_arm.o +OBJS += plugins/gpu_unai/gpu_arm.o endif -plugins/gpu_senquack/gpulib_if.o: CFLAGS += -DREARMED -O3 +plugins/gpu_unai/gpulib_if.o: CFLAGS += -DREARMED -O3 CC_LINK = $(CXX) endif diff --git a/configure b/configure index 01f26b447..ac7af0e2e 100755 --- a/configure +++ b/configure @@ -39,12 +39,12 @@ check_define_val() platform_list="generic pandora maemo caanoo libretro" platform="generic" -builtin_gpu_list="neon peops senquack unai_old" +builtin_gpu_list="neon peops unai unai_old" builtin_gpu="" sound_driver_list="oss alsa pulseaudio sdl libretro" sound_drivers="" plugins="plugins/spunull/spunull.so \ -plugins/dfxvideo/gpu_peops.so plugins/gpu_unai_old/gpu_unai_old.so plugins/gpu_senquack/gpu_senquack.so" +plugins/dfxvideo/gpu_peops.so plugins/gpu_unai_old/gpu_unai_old.so plugins/gpu_unai/gpu_unai.so" drc_cache_base="no" have_armv5="" have_armv6="" diff --git a/frontend/main.c b/frontend/main.c index 45e25815b..cec1fbbf3 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -142,12 +142,12 @@ void emu_set_default_config(void) pl_rearmed_cbs.gpu_neon.enhancement_no_main = 0; pl_rearmed_cbs.gpu_peops.iUseDither = 0; pl_rearmed_cbs.gpu_peops.dwActFixes = 1<<7; - pl_rearmed_cbs.gpu_senquack.ilace_force = 0; - pl_rearmed_cbs.gpu_senquack.pixel_skip = 0; - pl_rearmed_cbs.gpu_senquack.lighting = 1; - pl_rearmed_cbs.gpu_senquack.fast_lighting = 0; - pl_rearmed_cbs.gpu_senquack.blending = 1; - pl_rearmed_cbs.gpu_senquack.dithering = 0; + pl_rearmed_cbs.gpu_unai.ilace_force = 0; + pl_rearmed_cbs.gpu_unai.pixel_skip = 0; + pl_rearmed_cbs.gpu_unai.lighting = 1; + pl_rearmed_cbs.gpu_unai.fast_lighting = 0; + pl_rearmed_cbs.gpu_unai.blending = 1; + pl_rearmed_cbs.gpu_unai.dithering = 0; pl_rearmed_cbs.gpu_unai_old.abe_hack = pl_rearmed_cbs.gpu_unai_old.no_light = pl_rearmed_cbs.gpu_unai_old.no_blend = 0; diff --git a/frontend/menu.c b/frontend/menu.c index b2d50aad7..9200e10de 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -438,13 +438,13 @@ static const struct { CE_INTVAL_P(gpu_unai_old.abe_hack), CE_INTVAL_P(gpu_unai_old.no_light), CE_INTVAL_P(gpu_unai_old.no_blend), - CE_INTVAL_P(gpu_senquack.ilace_force), - CE_INTVAL_P(gpu_senquack.pixel_skip), - CE_INTVAL_P(gpu_senquack.lighting), - CE_INTVAL_P(gpu_senquack.fast_lighting), - CE_INTVAL_P(gpu_senquack.blending), - CE_INTVAL_P(gpu_senquack.dithering), - CE_INTVAL_P(gpu_senquack.scale_hires), + CE_INTVAL_P(gpu_unai.ilace_force), + CE_INTVAL_P(gpu_unai.pixel_skip), + CE_INTVAL_P(gpu_unai.lighting), + CE_INTVAL_P(gpu_unai.fast_lighting), + CE_INTVAL_P(gpu_unai.blending), + CE_INTVAL_P(gpu_unai.dithering), + CE_INTVAL_P(gpu_unai.scale_hires), CE_INTVAL_P(gpu_neon.allow_interlace), CE_INTVAL_P(gpu_neon.enhancement_enable), CE_INTVAL_P(gpu_neon.enhancement_no_main), @@ -1400,21 +1400,21 @@ static int menu_loop_plugin_gpu_unai_old(int id, int keys) return 0; } -static menu_entry e_menu_plugin_gpu_senquack[] = +static menu_entry e_menu_plugin_gpu_unai[] = { - mee_onoff ("Interlace", 0, pl_rearmed_cbs.gpu_senquack.ilace_force, 1), - mee_onoff ("Dithering", 0, pl_rearmed_cbs.gpu_senquack.dithering, 1), - mee_onoff ("Lighting", 0, pl_rearmed_cbs.gpu_senquack.lighting, 1), - mee_onoff ("Fast lighting", 0, pl_rearmed_cbs.gpu_senquack.fast_lighting, 1), - mee_onoff ("Blending", 0, pl_rearmed_cbs.gpu_senquack.blending, 1), - mee_onoff ("Pixel skip", 0, pl_rearmed_cbs.gpu_senquack.pixel_skip, 1), + mee_onoff ("Interlace", 0, pl_rearmed_cbs.gpu_unai.ilace_force, 1), + mee_onoff ("Dithering", 0, pl_rearmed_cbs.gpu_unai.dithering, 1), + mee_onoff ("Lighting", 0, pl_rearmed_cbs.gpu_unai.lighting, 1), + mee_onoff ("Fast lighting", 0, pl_rearmed_cbs.gpu_unai.fast_lighting, 1), + mee_onoff ("Blending", 0, pl_rearmed_cbs.gpu_unai.blending, 1), + mee_onoff ("Pixel skip", 0, pl_rearmed_cbs.gpu_unai.pixel_skip, 1), mee_end, }; -static int menu_loop_plugin_gpu_senquack(int id, int keys) +static int menu_loop_plugin_gpu_unai(int id, int keys) { int sel = 0; - me_loop(e_menu_plugin_gpu_senquack, &sel); + me_loop(e_menu_plugin_gpu_unai, &sel); return 0; } @@ -1520,7 +1520,7 @@ static const char h_plugin_gpu[] = #endif "gpu_peops is Pete's soft GPU, slow but accurate\n" "gpu_unai_old is from old PCSX4ALL, fast but glitchy\n" - "gpu_senquack is more accurate but slower\n" + "gpu_unai is newer, more accurate but slower\n" "gpu_gles Pete's hw GPU, uses 3D chip but is glitchy\n" "must save config and reload the game if changed"; static const char h_plugin_spu[] = "spunull effectively disables sound\n" @@ -1528,7 +1528,7 @@ static const char h_plugin_spu[] = "spunull effectively disables sound\n" static const char h_gpu_peops[] = "Configure P.E.Op.S. SoftGL Driver V1.17"; static const char h_gpu_peopsgl[]= "Configure P.E.Op.S. MesaGL Driver V1.78"; static const char h_gpu_unai_old[] = "Configure Unai/PCSX4ALL Team GPU plugin (old)"; -static const char h_gpu_senquack[] = "Configure Unai/PCSX4ALL Senquack plugin"; +static const char h_gpu_unai[] = "Configure Unai/PCSX4ALL Team plugin (new)"; static const char h_spu[] = "Configure built-in P.E.Op.S. Sound Driver V1.7"; static menu_entry e_menu_plugin_options[] = @@ -1541,7 +1541,7 @@ static menu_entry e_menu_plugin_options[] = #endif mee_handler_h ("Configure gpu_peops plugin", menu_loop_plugin_gpu_peops, h_gpu_peops), mee_handler_h ("Configure gpu_unai_old GPU plugin", menu_loop_plugin_gpu_unai_old, h_gpu_unai_old), - mee_handler_h ("Configure gpu_senquack GPU plugin", menu_loop_plugin_gpu_senquack, h_gpu_senquack), + mee_handler_h ("Configure gpu_unai GPU plugin", menu_loop_plugin_gpu_unai, h_gpu_unai), mee_handler_h ("Configure gpu_gles GPU plugin", menu_loop_plugin_gpu_peopsgl, h_gpu_peopsgl), mee_handler_h ("Configure built-in SPU plugin", menu_loop_plugin_spu, h_spu), mee_end, diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index cb1f4f77f..6cfe59649 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -102,7 +102,7 @@ struct rearmed_cbs { int blending; int dithering; int scale_hires; - } gpu_senquack; + } gpu_unai; struct { int dwActFixes; int bDrawDither, iFilterType, iFrameTexType; diff --git a/plugins/gpu_senquack/Makefile b/plugins/gpu_unai/Makefile similarity index 92% rename from plugins/gpu_senquack/Makefile rename to plugins/gpu_unai/Makefile index c3be35b4d..12c688e9e 100644 --- a/plugins/gpu_senquack/Makefile +++ b/plugins/gpu_unai/Makefile @@ -15,5 +15,5 @@ SRC += gpu_arm.S endif #BIN_STANDALONE = gpuPCSX4ALL.so -BIN_GPULIB = gpu_senquack.so +BIN_GPULIB = gpu_unai.so include ../gpulib/gpulib.mak diff --git a/plugins/gpu_senquack/README_senquack.txt b/plugins/gpu_unai/README_senquack.txt similarity index 100% rename from plugins/gpu_senquack/README_senquack.txt rename to plugins/gpu_unai/README_senquack.txt diff --git a/plugins/gpu_senquack/debug.h b/plugins/gpu_unai/debug.h similarity index 100% rename from plugins/gpu_senquack/debug.h rename to plugins/gpu_unai/debug.h diff --git a/plugins/gpu_senquack/gpu.cpp b/plugins/gpu_unai/gpu.cpp similarity index 100% rename from plugins/gpu_senquack/gpu.cpp rename to plugins/gpu_unai/gpu.cpp diff --git a/plugins/gpu_senquack/gpu.h b/plugins/gpu_unai/gpu.h similarity index 100% rename from plugins/gpu_senquack/gpu.h rename to plugins/gpu_unai/gpu.h diff --git a/plugins/gpu_senquack/gpu_arm.S b/plugins/gpu_unai/gpu_arm.S similarity index 100% rename from plugins/gpu_senquack/gpu_arm.S rename to plugins/gpu_unai/gpu_arm.S diff --git a/plugins/gpu_senquack/gpu_arm.h b/plugins/gpu_unai/gpu_arm.h similarity index 100% rename from plugins/gpu_senquack/gpu_arm.h rename to plugins/gpu_unai/gpu_arm.h diff --git a/plugins/gpu_senquack/gpu_blit.h b/plugins/gpu_unai/gpu_blit.h similarity index 100% rename from plugins/gpu_senquack/gpu_blit.h rename to plugins/gpu_unai/gpu_blit.h diff --git a/plugins/gpu_senquack/gpu_command.h b/plugins/gpu_unai/gpu_command.h similarity index 100% rename from plugins/gpu_senquack/gpu_command.h rename to plugins/gpu_unai/gpu_command.h diff --git a/plugins/gpu_senquack/gpu_fixedpoint.h b/plugins/gpu_unai/gpu_fixedpoint.h similarity index 100% rename from plugins/gpu_senquack/gpu_fixedpoint.h rename to plugins/gpu_unai/gpu_fixedpoint.h diff --git a/plugins/gpu_senquack/gpu_inner.h b/plugins/gpu_unai/gpu_inner.h similarity index 100% rename from plugins/gpu_senquack/gpu_inner.h rename to plugins/gpu_unai/gpu_inner.h diff --git a/plugins/gpu_senquack/gpu_inner_blend.h b/plugins/gpu_unai/gpu_inner_blend.h similarity index 100% rename from plugins/gpu_senquack/gpu_inner_blend.h rename to plugins/gpu_unai/gpu_inner_blend.h diff --git a/plugins/gpu_senquack/gpu_inner_blend_arm.h b/plugins/gpu_unai/gpu_inner_blend_arm.h similarity index 100% rename from plugins/gpu_senquack/gpu_inner_blend_arm.h rename to plugins/gpu_unai/gpu_inner_blend_arm.h diff --git a/plugins/gpu_senquack/gpu_inner_blend_arm5.h b/plugins/gpu_unai/gpu_inner_blend_arm5.h similarity index 100% rename from plugins/gpu_senquack/gpu_inner_blend_arm5.h rename to plugins/gpu_unai/gpu_inner_blend_arm5.h diff --git a/plugins/gpu_senquack/gpu_inner_blend_arm7.h b/plugins/gpu_unai/gpu_inner_blend_arm7.h similarity index 100% rename from plugins/gpu_senquack/gpu_inner_blend_arm7.h rename to plugins/gpu_unai/gpu_inner_blend_arm7.h diff --git a/plugins/gpu_senquack/gpu_inner_light.h b/plugins/gpu_unai/gpu_inner_light.h similarity index 100% rename from plugins/gpu_senquack/gpu_inner_light.h rename to plugins/gpu_unai/gpu_inner_light.h diff --git a/plugins/gpu_senquack/gpu_inner_light_arm.h b/plugins/gpu_unai/gpu_inner_light_arm.h similarity index 100% rename from plugins/gpu_senquack/gpu_inner_light_arm.h rename to plugins/gpu_unai/gpu_inner_light_arm.h diff --git a/plugins/gpu_senquack/gpu_inner_quantization.h b/plugins/gpu_unai/gpu_inner_quantization.h similarity index 100% rename from plugins/gpu_senquack/gpu_inner_quantization.h rename to plugins/gpu_unai/gpu_inner_quantization.h diff --git a/plugins/gpu_senquack/gpu_raster_image.h b/plugins/gpu_unai/gpu_raster_image.h similarity index 100% rename from plugins/gpu_senquack/gpu_raster_image.h rename to plugins/gpu_unai/gpu_raster_image.h diff --git a/plugins/gpu_senquack/gpu_raster_line.h b/plugins/gpu_unai/gpu_raster_line.h similarity index 100% rename from plugins/gpu_senquack/gpu_raster_line.h rename to plugins/gpu_unai/gpu_raster_line.h diff --git a/plugins/gpu_senquack/gpu_raster_polygon.h b/plugins/gpu_unai/gpu_raster_polygon.h similarity index 100% rename from plugins/gpu_senquack/gpu_raster_polygon.h rename to plugins/gpu_unai/gpu_raster_polygon.h diff --git a/plugins/gpu_senquack/gpu_raster_sprite.h b/plugins/gpu_unai/gpu_raster_sprite.h similarity index 100% rename from plugins/gpu_senquack/gpu_raster_sprite.h rename to plugins/gpu_unai/gpu_raster_sprite.h diff --git a/plugins/gpu_senquack/gpu_senquack.h b/plugins/gpu_unai/gpu_senquack.h similarity index 100% rename from plugins/gpu_senquack/gpu_senquack.h rename to plugins/gpu_unai/gpu_senquack.h diff --git a/plugins/gpu_senquack/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp similarity index 97% rename from plugins/gpu_senquack/gpulib_if.cpp rename to plugins/gpu_unai/gpulib_if.cpp index e5a51aa50..7b148d4f8 100644 --- a/plugins/gpu_senquack/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -635,13 +635,13 @@ void renderer_set_interlace(int enable, int is_odd) void renderer_set_config(const struct rearmed_cbs *cbs) { gpu_senquack.vram = (u16*)gpu.vram; - gpu_senquack.config.ilace_force = cbs->gpu_senquack.ilace_force; - gpu_senquack.config.pixel_skip = cbs->gpu_senquack.pixel_skip; - gpu_senquack.config.lighting = cbs->gpu_senquack.lighting; - gpu_senquack.config.fast_lighting = cbs->gpu_senquack.fast_lighting; - gpu_senquack.config.blending = cbs->gpu_senquack.blending; - gpu_senquack.config.dithering = cbs->gpu_senquack.dithering; - gpu_senquack.config.scale_hires = cbs->gpu_senquack.scale_hires; + gpu_senquack.config.ilace_force = cbs->gpu_unai.ilace_force; + gpu_senquack.config.pixel_skip = cbs->gpu_unai.pixel_skip; + gpu_senquack.config.lighting = cbs->gpu_unai.lighting; + gpu_senquack.config.fast_lighting = cbs->gpu_unai.fast_lighting; + gpu_senquack.config.blending = cbs->gpu_unai.blending; + gpu_senquack.config.dithering = cbs->gpu_unai.dithering; + gpu_senquack.config.scale_hires = cbs->gpu_unai.scale_hires; } // vim:shiftwidth=2:expandtab diff --git a/plugins/gpu_senquack/port.h b/plugins/gpu_unai/port.h similarity index 100% rename from plugins/gpu_senquack/port.h rename to plugins/gpu_unai/port.h diff --git a/plugins/gpu_senquack/profiler.h b/plugins/gpu_unai/profiler.h similarity index 100% rename from plugins/gpu_senquack/profiler.h rename to plugins/gpu_unai/profiler.h From e223fa15d18a141a26d73683d036d130cc5c6e8d Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 1 Nov 2023 18:56:36 +0200 Subject: [PATCH 452/597] gpu_unai: sync with libretro --- Makefile | 2 +- plugins/gpu_unai/Makefile | 2 +- plugins/gpu_unai/gpu.cpp | 418 ++++++++-------- plugins/gpu_unai/gpu.h | 10 +- plugins/gpu_unai/gpu_arm.h | 2 +- plugins/gpu_unai/gpu_command.h | 302 ++++++------ plugins/gpu_unai/gpu_inner.h | 177 +++---- plugins/gpu_unai/gpu_inner_light.h | 14 +- plugins/gpu_unai/gpu_inner_light_arm.h | 4 +- plugins/gpu_unai/gpu_inner_quantization.h | 8 +- plugins/gpu_unai/gpu_raster_image.h | 94 ++-- plugins/gpu_unai/gpu_raster_line.h | 58 +-- plugins/gpu_unai/gpu_raster_polygon.h | 122 ++--- plugins/gpu_unai/gpu_raster_sprite.h | 68 +-- .../gpu_unai/{gpu_senquack.h => gpu_unai.h} | 105 ++-- plugins/gpu_unai/gpulib_if.cpp | 465 ++++++++++++------ plugins/gpulib/gpu.h | 4 + 17 files changed, 1054 insertions(+), 801 deletions(-) rename plugins/gpu_unai/{gpu_senquack.h => gpu_unai.h} (84%) diff --git a/Makefile b/Makefile index 8cbf5c66b..af3e1ab60 100644 --- a/Makefile +++ b/Makefile @@ -156,7 +156,7 @@ OBJS += plugins/gpu_unai/gpulib_if.o ifeq "$(ARCH)" "arm" OBJS += plugins/gpu_unai/gpu_arm.o endif -plugins/gpu_unai/gpulib_if.o: CFLAGS += -DREARMED -O3 +plugins/gpu_unai/gpulib_if.o: CFLAGS += -DREARMED -DUSE_GPULIB=1 -O3 CC_LINK = $(CXX) endif diff --git a/plugins/gpu_unai/Makefile b/plugins/gpu_unai/Makefile index 12c688e9e..756d19aa8 100644 --- a/plugins/gpu_unai/Makefile +++ b/plugins/gpu_unai/Makefile @@ -3,7 +3,7 @@ CFLAGS += -DREARMED CFLAGS += -I../../include #CFLAGS += -DINLINE="static __inline__" #CFLAGS += -Dasm="__asm__ __volatile__" -#CFLAGS += -DUSE_GPULIB=1 +CFLAGS += -DUSE_GPULIB=1 include ../../config.mak diff --git a/plugins/gpu_unai/gpu.cpp b/plugins/gpu_unai/gpu.cpp index 5f2929fca..c3f709542 100644 --- a/plugins/gpu_unai/gpu.cpp +++ b/plugins/gpu_unai/gpu.cpp @@ -23,7 +23,7 @@ #include "plugins.h" #include "psxcommon.h" //#include "port.h" -#include "gpu_senquack.h" +#include "gpu_unai.h" #define VIDEO_WIDTH 320 @@ -33,7 +33,7 @@ #define TPS 1000000 #endif -#define IS_PAL (gpu_senquack.GPU_GP1&(0x08<<17)) +#define IS_PAL (gpu_unai.GPU_GP1&(0x08<<17)) //senquack - Original 512KB of guard space seems not to be enough, as Xenogears // accesses outside this range and crashes in town intro fight sequence. @@ -78,28 +78,28 @@ static u16 GPU_FrameBuffer[(FRAME_BUFFER_SIZE*2 + 4096)/2] __attribute__((aligne /////////////////////////////////////////////////////////////////////////////// static void gpuReset(void) { - memset((void*)&gpu_senquack, 0, sizeof(gpu_senquack)); - gpu_senquack.vram = (u16*)GPU_FrameBuffer + (4096/2); //4kb guard room in front - gpu_senquack.GPU_GP1 = 0x14802000; - gpu_senquack.DrawingArea[2] = 256; - gpu_senquack.DrawingArea[3] = 240; - gpu_senquack.DisplayArea[2] = 256; - gpu_senquack.DisplayArea[3] = 240; - gpu_senquack.DisplayArea[5] = 240; - gpu_senquack.TextureWindow[0] = 0; - gpu_senquack.TextureWindow[1] = 0; - gpu_senquack.TextureWindow[2] = 255; - gpu_senquack.TextureWindow[3] = 255; + memset((void*)&gpu_unai, 0, sizeof(gpu_unai)); + gpu_unai.vram = (u16*)GPU_FrameBuffer + (4096/2); //4kb guard room in front + gpu_unai.GPU_GP1 = 0x14802000; + gpu_unai.DrawingArea[2] = 256; + gpu_unai.DrawingArea[3] = 240; + gpu_unai.DisplayArea[2] = 256; + gpu_unai.DisplayArea[3] = 240; + gpu_unai.DisplayArea[5] = 240; + gpu_unai.TextureWindow[0] = 0; + gpu_unai.TextureWindow[1] = 0; + gpu_unai.TextureWindow[2] = 255; + gpu_unai.TextureWindow[3] = 255; //senquack - new vars must be updated whenever texture window is changed: // (used for polygon-drawing in gpu_inner.h, gpu_raster_polygon.h) const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 - gpu_senquack.u_msk = (((u32)gpu_senquack.TextureWindow[2]) << fb) | ((1 << fb) - 1); - gpu_senquack.v_msk = (((u32)gpu_senquack.TextureWindow[3]) << fb) | ((1 << fb) - 1); + gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1); // Configuration options - gpu_senquack.config = gpu_senquack_config_ext; - gpu_senquack.ilace_mask = gpu_senquack.config.ilace_force; - gpu_senquack.frameskip.skipCount = gpu_senquack.config.frameskip_count; + gpu_unai.config = gpu_unai_config_ext; + gpu_unai.ilace_mask = gpu_unai.config.ilace_force; + gpu_unai.frameskip.skipCount = gpu_unai.config.frameskip_count; SetupLightLUT(); SetupDitheringConstants(); @@ -118,8 +118,8 @@ long GPU_init(void) } #endif - gpu_senquack.fb_dirty = true; - gpu_senquack.dma.last_dma = NULL; + gpu_unai.fb_dirty = true; + gpu_unai.dma.last_dma = NULL; return (0); } @@ -137,27 +137,27 @@ long GPU_freeze(u32 bWrite, GPUFreeze_t* p2) if (bWrite) { - p2->ulStatus = gpu_senquack.GPU_GP1; + p2->ulStatus = gpu_unai.GPU_GP1; memset(p2->ulControl, 0, sizeof(p2->ulControl)); // save resolution and registers for P.E.Op.S. compatibility - p2->ulControl[3] = (3 << 24) | ((gpu_senquack.GPU_GP1 >> 23) & 1); - p2->ulControl[4] = (4 << 24) | ((gpu_senquack.GPU_GP1 >> 29) & 3); - p2->ulControl[5] = (5 << 24) | (gpu_senquack.DisplayArea[0] | (gpu_senquack.DisplayArea[1] << 10)); + p2->ulControl[3] = (3 << 24) | ((gpu_unai.GPU_GP1 >> 23) & 1); + p2->ulControl[4] = (4 << 24) | ((gpu_unai.GPU_GP1 >> 29) & 3); + p2->ulControl[5] = (5 << 24) | (gpu_unai.DisplayArea[0] | (gpu_unai.DisplayArea[1] << 10)); p2->ulControl[6] = (6 << 24) | (2560 << 12); - p2->ulControl[7] = (7 << 24) | (gpu_senquack.DisplayArea[4] | (gpu_senquack.DisplayArea[5] << 10)); - p2->ulControl[8] = (8 << 24) | ((gpu_senquack.GPU_GP1 >> 17) & 0x3f) | ((gpu_senquack.GPU_GP1 >> 10) & 0x40); - memcpy((void*)p2->psxVRam, (void*)gpu_senquack.vram, FRAME_BUFFER_SIZE); + p2->ulControl[7] = (7 << 24) | (gpu_unai.DisplayArea[4] | (gpu_unai.DisplayArea[5] << 10)); + p2->ulControl[8] = (8 << 24) | ((gpu_unai.GPU_GP1 >> 17) & 0x3f) | ((gpu_unai.GPU_GP1 >> 10) & 0x40); + memcpy((void*)p2->psxVRam, (void*)gpu_unai.vram, FRAME_BUFFER_SIZE); return (1); } else { extern void GPU_writeStatus(u32 data); - gpu_senquack.GPU_GP1 = p2->ulStatus; - memcpy((void*)gpu_senquack.vram, (void*)p2->psxVRam, FRAME_BUFFER_SIZE); + gpu_unai.GPU_GP1 = p2->ulStatus; + memcpy((void*)gpu_unai.vram, (void*)p2->psxVRam, FRAME_BUFFER_SIZE); GPU_writeStatus((5 << 24) | p2->ulControl[5]); GPU_writeStatus((7 << 24) | p2->ulControl[7]); GPU_writeStatus((8 << 24) | p2->ulControl[8]); - gpuSetTexture(gpu_senquack.GPU_GP1); + gpuSetTexture(gpu_unai.GPU_GP1); return (1); } return (0); @@ -190,24 +190,24 @@ u8 PacketSize[256] = /////////////////////////////////////////////////////////////////////////////// INLINE void gpuSendPacket() { - gpuSendPacketFunction(gpu_senquack.PacketBuffer.U4[0]>>24); + gpuSendPacketFunction(gpu_unai.PacketBuffer.U4[0]>>24); } /////////////////////////////////////////////////////////////////////////////// INLINE void gpuCheckPacket(u32 uData) { - if (gpu_senquack.PacketCount) + if (gpu_unai.PacketCount) { - gpu_senquack.PacketBuffer.U4[gpu_senquack.PacketIndex++] = uData; - --gpu_senquack.PacketCount; + gpu_unai.PacketBuffer.U4[gpu_unai.PacketIndex++] = uData; + --gpu_unai.PacketCount; } else { - gpu_senquack.PacketBuffer.U4[0] = uData; - gpu_senquack.PacketCount = PacketSize[uData >> 24]; - gpu_senquack.PacketIndex = 1; + gpu_unai.PacketBuffer.U4[0] = uData; + gpu_unai.PacketCount = PacketSize[uData >> 24]; + gpu_unai.PacketIndex = 1; } - if (!gpu_senquack.PacketCount) gpuSendPacket(); + if (!gpu_unai.PacketCount) gpuSendPacket(); } /////////////////////////////////////////////////////////////////////////////// @@ -217,42 +217,42 @@ void GPU_writeDataMem(u32* dmaAddress, int dmaCount) fprintf(stdout,"GPU_writeDataMem(%d)\n",dmaCount); #endif u32 data; - const u16 *VIDEO_END = (u16*)gpu_senquack.vram+(FRAME_BUFFER_SIZE/2)-1; - gpu_senquack.GPU_GP1 &= ~0x14000000; + const u16 *VIDEO_END = (u16*)gpu_unai.vram+(FRAME_BUFFER_SIZE/2)-1; + gpu_unai.GPU_GP1 &= ~0x14000000; while (dmaCount) { - if (gpu_senquack.dma.FrameToWrite) + if (gpu_unai.dma.FrameToWrite) { while (dmaCount) { dmaCount--; data = *dmaAddress++; - if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; - gpu_senquack.dma.pvram[gpu_senquack.dma.px] = data; - if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; + gpu_unai.dma.pvram[gpu_unai.dma.px] = data; + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - gpu_senquack.dma.px = 0; - gpu_senquack.dma.pvram += 1024; - if (++gpu_senquack.dma.py >= gpu_senquack.dma.y_end) + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; + if (++gpu_unai.dma.py >= gpu_unai.dma.y_end) { - gpu_senquack.dma.FrameToWrite = false; - gpu_senquack.GPU_GP1 &= ~0x08000000; - gpu_senquack.fb_dirty = true; + gpu_unai.dma.FrameToWrite = false; + gpu_unai.GPU_GP1 &= ~0x08000000; + gpu_unai.fb_dirty = true; break; } } - if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; - gpu_senquack.dma.pvram[gpu_senquack.dma.px] = data>>16; - if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; + gpu_unai.dma.pvram[gpu_unai.dma.px] = data>>16; + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - gpu_senquack.dma.px = 0; - gpu_senquack.dma.pvram += 1024; - if (++gpu_senquack.dma.py >= gpu_senquack.dma.y_end) + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; + if (++gpu_unai.dma.py >= gpu_unai.dma.y_end) { - gpu_senquack.dma.FrameToWrite = false; - gpu_senquack.GPU_GP1 &= ~0x08000000; - gpu_senquack.fb_dirty = true; + gpu_unai.dma.FrameToWrite = false; + gpu_unai.GPU_GP1 &= ~0x08000000; + gpu_unai.fb_dirty = true; break; } } @@ -266,7 +266,7 @@ void GPU_writeDataMem(u32* dmaAddress, int dmaCount) } } - gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 | 0x14000000) & ~0x60000000; + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 | 0x14000000) & ~0x60000000; } long GPU_dmaChain(u32 *rambase, u32 start_addr) @@ -279,9 +279,9 @@ long GPU_dmaChain(u32 *rambase, u32 start_addr) u32 len, count; long dma_words = 0; - if (gpu_senquack.dma.last_dma) *gpu_senquack.dma.last_dma |= 0x800000; + if (gpu_unai.dma.last_dma) *gpu_unai.dma.last_dma |= 0x800000; - gpu_senquack.GPU_GP1 &= ~0x14000000; + gpu_unai.GPU_GP1 &= ~0x14000000; addr = start_addr & 0xffffff; for (count = 0; addr != 0xffffff; count++) @@ -315,10 +315,10 @@ long GPU_dmaChain(u32 *rambase, u32 start_addr) list[0] &= ~0x800000; } - if (gpu_senquack.dma.last_dma) *gpu_senquack.dma.last_dma &= ~0x800000; - gpu_senquack.dma.last_dma = rambase + (start_addr & 0x1fffff) / 4; + if (gpu_unai.dma.last_dma) *gpu_unai.dma.last_dma &= ~0x800000; + gpu_unai.dma.last_dma = rambase + (start_addr & 0x1fffff) / 4; - gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 | 0x14000000) & ~0x60000000; + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 | 0x14000000) & ~0x60000000; return dma_words; } @@ -326,40 +326,40 @@ long GPU_dmaChain(u32 *rambase, u32 start_addr) /////////////////////////////////////////////////////////////////////////////// void GPU_writeData(u32 data) { - const u16 *VIDEO_END = (u16*)gpu_senquack.vram+(FRAME_BUFFER_SIZE/2)-1; + const u16 *VIDEO_END = (u16*)gpu_unai.vram+(FRAME_BUFFER_SIZE/2)-1; #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"GPU_writeData()\n"); #endif - gpu_senquack.GPU_GP1 &= ~0x14000000; + gpu_unai.GPU_GP1 &= ~0x14000000; - if (gpu_senquack.dma.FrameToWrite) + if (gpu_unai.dma.FrameToWrite) { - if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; - gpu_senquack.dma.pvram[gpu_senquack.dma.px]=(u16)data; - if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; + gpu_unai.dma.pvram[gpu_unai.dma.px]=(u16)data; + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - gpu_senquack.dma.px = 0; - gpu_senquack.dma.pvram += 1024; - if (++gpu_senquack.dma.py >= gpu_senquack.dma.y_end) + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; + if (++gpu_unai.dma.py >= gpu_unai.dma.y_end) { - gpu_senquack.dma.FrameToWrite = false; - gpu_senquack.GPU_GP1 &= ~0x08000000; - gpu_senquack.fb_dirty = true; + gpu_unai.dma.FrameToWrite = false; + gpu_unai.GPU_GP1 &= ~0x08000000; + gpu_unai.fb_dirty = true; } } - if (gpu_senquack.dma.FrameToWrite) + if (gpu_unai.dma.FrameToWrite) { - if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; - gpu_senquack.dma.pvram[gpu_senquack.dma.px]=data>>16; - if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; + gpu_unai.dma.pvram[gpu_unai.dma.px]=data>>16; + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - gpu_senquack.dma.px = 0; - gpu_senquack.dma.pvram += 1024; - if (++gpu_senquack.dma.py >= gpu_senquack.dma.y_end) + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; + if (++gpu_unai.dma.py >= gpu_unai.dma.y_end) { - gpu_senquack.dma.FrameToWrite = false; - gpu_senquack.GPU_GP1 &= ~0x08000000; - gpu_senquack.fb_dirty = true; + gpu_unai.dma.FrameToWrite = false; + gpu_unai.GPU_GP1 &= ~0x08000000; + gpu_unai.fb_dirty = true; } } } @@ -368,56 +368,56 @@ void GPU_writeData(u32 data) { gpuCheckPacket(data); } - gpu_senquack.GPU_GP1 |= 0x14000000; + gpu_unai.GPU_GP1 |= 0x14000000; } /////////////////////////////////////////////////////////////////////////////// void GPU_readDataMem(u32* dmaAddress, int dmaCount) { - const u16 *VIDEO_END = (u16*)gpu_senquack.vram+(FRAME_BUFFER_SIZE/2)-1; + const u16 *VIDEO_END = (u16*)gpu_unai.vram+(FRAME_BUFFER_SIZE/2)-1; #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"GPU_readDataMem(%d)\n",dmaCount); #endif - if(!gpu_senquack.dma.FrameToRead) return; + if(!gpu_unai.dma.FrameToRead) return; - gpu_senquack.GPU_GP1 &= ~0x14000000; + gpu_unai.GPU_GP1 &= ~0x14000000; do { - if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; // lower 16 bit //senquack - 64-bit fix (from notaz) - //u32 data = (unsigned long)gpu_senquack.dma.pvram[gpu_senquack.dma.px]; - u32 data = (u32)gpu_senquack.dma.pvram[gpu_senquack.dma.px]; + //u32 data = (unsigned long)gpu_unai.dma.pvram[gpu_unai.dma.px]; + u32 data = (u32)gpu_unai.dma.pvram[gpu_unai.dma.px]; - if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - gpu_senquack.dma.px = 0; - gpu_senquack.dma.pvram += 1024; + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; } - if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; // higher 16 bit (always, even if it's an odd width) //senquack - 64-bit fix (from notaz) - //data |= (unsigned long)(gpu_senquack.dma.pvram[gpu_senquack.dma.px])<<16; - data |= (u32)(gpu_senquack.dma.pvram[gpu_senquack.dma.px])<<16; + //data |= (unsigned long)(gpu_unai.dma.pvram[gpu_unai.dma.px])<<16; + data |= (u32)(gpu_unai.dma.pvram[gpu_unai.dma.px])<<16; *dmaAddress++ = data; - if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - gpu_senquack.dma.px = 0; - gpu_senquack.dma.pvram += 1024; - if (++gpu_senquack.dma.py >= gpu_senquack.dma.y_end) + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; + if (++gpu_unai.dma.py >= gpu_unai.dma.y_end) { - gpu_senquack.dma.FrameToRead = false; - gpu_senquack.GPU_GP1 &= ~0x08000000; + gpu_unai.dma.FrameToRead = false; + gpu_unai.GPU_GP1 &= ~0x08000000; break; } } } while (--dmaCount); - gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 | 0x14000000) & ~0x60000000; + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 | 0x14000000) & ~0x60000000; } @@ -425,48 +425,48 @@ void GPU_readDataMem(u32* dmaAddress, int dmaCount) /////////////////////////////////////////////////////////////////////////////// u32 GPU_readData(void) { - const u16 *VIDEO_END = (u16*)gpu_senquack.vram+(FRAME_BUFFER_SIZE/2)-1; + const u16 *VIDEO_END = (u16*)gpu_unai.vram+(FRAME_BUFFER_SIZE/2)-1; #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"GPU_readData()\n"); #endif - gpu_senquack.GPU_GP1 &= ~0x14000000; - if (gpu_senquack.dma.FrameToRead) + gpu_unai.GPU_GP1 &= ~0x14000000; + if (gpu_unai.dma.FrameToRead) { - if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; - gpu_senquack.GPU_GP0 = gpu_senquack.dma.pvram[gpu_senquack.dma.px]; - if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; + gpu_unai.GPU_GP0 = gpu_unai.dma.pvram[gpu_unai.dma.px]; + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - gpu_senquack.dma.px = 0; - gpu_senquack.dma.pvram += 1024; - if (++gpu_senquack.dma.py >= gpu_senquack.dma.y_end) + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; + if (++gpu_unai.dma.py >= gpu_unai.dma.y_end) { - gpu_senquack.dma.FrameToRead = false; - gpu_senquack.GPU_GP1 &= ~0x08000000; + gpu_unai.dma.FrameToRead = false; + gpu_unai.GPU_GP1 &= ~0x08000000; } } - if ((&gpu_senquack.dma.pvram[gpu_senquack.dma.px])>(VIDEO_END)) gpu_senquack.dma.pvram-=512*1024; - gpu_senquack.GPU_GP0 |= gpu_senquack.dma.pvram[gpu_senquack.dma.px]<<16; - if (++gpu_senquack.dma.px >= gpu_senquack.dma.x_end) + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; + gpu_unai.GPU_GP0 |= gpu_unai.dma.pvram[gpu_unai.dma.px]<<16; + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - gpu_senquack.dma.px = 0; - gpu_senquack.dma.pvram += 1024; - if (++gpu_senquack.dma.py >= gpu_senquack.dma.y_end) + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; + if (++gpu_unai.dma.py >= gpu_unai.dma.y_end) { - gpu_senquack.dma.FrameToRead = false; - gpu_senquack.GPU_GP1 &= ~0x08000000; + gpu_unai.dma.FrameToRead = false; + gpu_unai.GPU_GP1 &= ~0x08000000; } } } - gpu_senquack.GPU_GP1 |= 0x14000000; + gpu_unai.GPU_GP1 |= 0x14000000; - return (gpu_senquack.GPU_GP0); + return (gpu_unai.GPU_GP0); } /////////////////////////////////////////////////////////////////////////////// u32 GPU_readStatus(void) { - return gpu_senquack.GPU_GP1; + return gpu_unai.GPU_GP1; } INLINE void GPU_NoSkip(void) @@ -474,16 +474,16 @@ INLINE void GPU_NoSkip(void) #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"GPU_NoSkip()\n"); #endif - gpu_senquack.frameskip.wasSkip = gpu_senquack.frameskip.isSkip; - if (gpu_senquack.frameskip.isSkip) + gpu_unai.frameskip.wasSkip = gpu_unai.frameskip.isSkip; + if (gpu_unai.frameskip.isSkip) { - gpu_senquack.frameskip.isSkip = false; - gpu_senquack.frameskip.skipGPU = false; + gpu_unai.frameskip.isSkip = false; + gpu_unai.frameskip.skipGPU = false; } else { - gpu_senquack.frameskip.isSkip = gpu_senquack.frameskip.skipFrame; - gpu_senquack.frameskip.skipGPU = gpu_senquack.frameskip.skipFrame; + gpu_unai.frameskip.isSkip = gpu_unai.frameskip.skipFrame; + gpu_unai.frameskip.skipGPU = gpu_unai.frameskip.skipFrame; } } @@ -498,26 +498,26 @@ void GPU_writeStatus(u32 data) gpuReset(); break; case 0x01: - gpu_senquack.GPU_GP1 &= ~0x08000000; - gpu_senquack.PacketCount = 0; - gpu_senquack.dma.FrameToRead = gpu_senquack.dma.FrameToWrite = false; + gpu_unai.GPU_GP1 &= ~0x08000000; + gpu_unai.PacketCount = 0; + gpu_unai.dma.FrameToRead = gpu_unai.dma.FrameToWrite = false; break; case 0x02: - gpu_senquack.GPU_GP1 &= ~0x08000000; - gpu_senquack.PacketCount = 0; - gpu_senquack.dma.FrameToRead = gpu_senquack.dma.FrameToWrite = false; + gpu_unai.GPU_GP1 &= ~0x08000000; + gpu_unai.PacketCount = 0; + gpu_unai.dma.FrameToRead = gpu_unai.dma.FrameToWrite = false; break; case 0x03: - gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 & ~0x00800000) | ((data & 1) << 23); + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x00800000) | ((data & 1) << 23); break; case 0x04: - if (data == 0x04000000) gpu_senquack.PacketCount = 0; - gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 & ~0x60000000) | ((data & 3) << 29); + if (data == 0x04000000) gpu_unai.PacketCount = 0; + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x60000000) | ((data & 3) << 29); break; case 0x05: // Start of Display Area in VRAM - gpu_senquack.DisplayArea[0] = data & 0x3ff; // X (0..1023) - gpu_senquack.DisplayArea[1] = (data >> 10) & 0x1ff; // Y (0..511) + gpu_unai.DisplayArea[0] = data & 0x3ff; // X (0..1023) + gpu_unai.DisplayArea[1] = (data >> 10) & 0x1ff; // Y (0..511) GPU_NoSkip(); break; case 0x06: @@ -525,7 +525,7 @@ void GPU_writeStatus(u32 data) // 0-11 X1 (260h+0) ;12bit ;\counted in 53.222400MHz units, // 12-23 X2 (260h+320*8) ;12bit ;/relative to HSYNC - // senquack - gpu_senquack completely ignores GP1(0x06) command and + // senquack - gpu_unai completely ignores GP1(0x06) command and // lacks even a place in DisplayArea[] array to store the values. // It seems to have been concerned only with vertical display range // and centering top/bottom. I will not add support here, and @@ -540,10 +540,10 @@ void GPU_writeStatus(u32 data) { u32 v1=data & 0x000003FF; //(short)(data & 0x3ff); u32 v2=(data & 0x000FFC00) >> 10; //(short)((data>>10) & 0x3ff); - if ((gpu_senquack.DisplayArea[4]!=v1)||(gpu_senquack.DisplayArea[5]!=v2)) + if ((gpu_unai.DisplayArea[4]!=v1)||(gpu_unai.DisplayArea[5]!=v2)) { - gpu_senquack.DisplayArea[4] = v1; - gpu_senquack.DisplayArea[5] = v2; + gpu_unai.DisplayArea[4] = v1; + gpu_unai.DisplayArea[5] = v2; #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"video_clear(CHANGE_Y)\n"); #endif @@ -555,53 +555,53 @@ void GPU_writeStatus(u32 data) { static const u32 HorizontalResolution[8] = { 256, 368, 320, 384, 512, 512, 640, 640 }; static const u32 VerticalResolution[4] = { 240, 480, 256, 480 }; - gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 & ~0x007F0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10); + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x007F0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10); #ifdef ENABLE_GPU_LOG_SUPPORT - fprintf(stdout,"GPU_writeStatus(RES=%dx%d,BITS=%d,PAL=%d)\n",HorizontalResolution[(gpu_senquack.GPU_GP1 >> 16) & 7], - VerticalResolution[(gpu_senquack.GPU_GP1 >> 19) & 3],(gpu_senquack.GPU_GP1&0x00200000?24:15),(IS_PAL?1:0)); + fprintf(stdout,"GPU_writeStatus(RES=%dx%d,BITS=%d,PAL=%d)\n",HorizontalResolution[(gpu_unai.GPU_GP1 >> 16) & 7], + VerticalResolution[(gpu_unai.GPU_GP1 >> 19) & 3],(gpu_unai.GPU_GP1&0x00200000?24:15),(IS_PAL?1:0)); #endif // Video mode change - u32 new_width = HorizontalResolution[(gpu_senquack.GPU_GP1 >> 16) & 7]; - u32 new_height = VerticalResolution[(gpu_senquack.GPU_GP1 >> 19) & 3]; + u32 new_width = HorizontalResolution[(gpu_unai.GPU_GP1 >> 16) & 7]; + u32 new_height = VerticalResolution[(gpu_unai.GPU_GP1 >> 19) & 3]; - if (gpu_senquack.DisplayArea[2] != new_width || gpu_senquack.DisplayArea[3] != new_height) + if (gpu_unai.DisplayArea[2] != new_width || gpu_unai.DisplayArea[3] != new_height) { // Update width - gpu_senquack.DisplayArea[2] = new_width; + gpu_unai.DisplayArea[2] = new_width; if (PixelSkipEnabled()) { // Set blit_mask for high horizontal resolutions. This allows skipping // rendering pixels that would never get displayed on low-resolution // platforms that use simple pixel-dropping scaler. - switch (gpu_senquack.DisplayArea[2]) + switch (gpu_unai.DisplayArea[2]) { - case 512: gpu_senquack.blit_mask = 0xa4; break; // GPU_BlitWWSWWSWS - case 640: gpu_senquack.blit_mask = 0xaa; break; // GPU_BlitWS - default: gpu_senquack.blit_mask = 0; break; + case 512: gpu_unai.blit_mask = 0xa4; break; // GPU_BlitWWSWWSWS + case 640: gpu_unai.blit_mask = 0xaa; break; // GPU_BlitWS + default: gpu_unai.blit_mask = 0; break; } } else { - gpu_senquack.blit_mask = 0; + gpu_unai.blit_mask = 0; } // Update height - gpu_senquack.DisplayArea[3] = new_height; + gpu_unai.DisplayArea[3] = new_height; if (LineSkipEnabled()) { // Set rendering line-skip (only render every other line in high-res // 480 vertical mode, or, optionally, force it for all video modes) - if (gpu_senquack.DisplayArea[3] == 480) { - if (gpu_senquack.config.ilace_force) { - gpu_senquack.ilace_mask = 3; // Only need 1/4 of lines + if (gpu_unai.DisplayArea[3] == 480) { + if (gpu_unai.config.ilace_force) { + gpu_unai.ilace_mask = 3; // Only need 1/4 of lines } else { - gpu_senquack.ilace_mask = 1; // Only need 1/2 of lines + gpu_unai.ilace_mask = 1; // Only need 1/2 of lines } } else { // Vert resolution changed from 480 to lower one - gpu_senquack.ilace_mask = gpu_senquack.config.ilace_force; + gpu_unai.ilace_mask = gpu_unai.config.ilace_force; } } else { - gpu_senquack.ilace_mask = 0; + gpu_unai.ilace_mask = 0; } #ifdef ENABLE_GPU_LOG_SUPPORT @@ -614,12 +614,12 @@ void GPU_writeStatus(u32 data) break; case 0x10: switch (data & 0xff) { - case 2: gpu_senquack.GPU_GP0 = gpu_senquack.tex_window; break; - case 3: gpu_senquack.GPU_GP0 = (gpu_senquack.DrawingArea[1] << 10) | gpu_senquack.DrawingArea[0]; break; - case 4: gpu_senquack.GPU_GP0 = ((gpu_senquack.DrawingArea[3]-1) << 10) | (gpu_senquack.DrawingArea[2]-1); break; - case 5: case 6: gpu_senquack.GPU_GP0 = (((u32)gpu_senquack.DrawingOffset[1] & 0x7ff) << 11) | ((u32)gpu_senquack.DrawingOffset[0] & 0x7ff); break; - case 7: gpu_senquack.GPU_GP0 = 2; break; - case 8: case 15: gpu_senquack.GPU_GP0 = 0xBFC03720; break; + case 2: gpu_unai.GPU_GP0 = gpu_unai.tex_window; break; + case 3: gpu_unai.GPU_GP0 = (gpu_unai.DrawingArea[1] << 10) | gpu_unai.DrawingArea[0]; break; + case 4: gpu_unai.GPU_GP0 = ((gpu_unai.DrawingArea[3]-1) << 10) | (gpu_unai.DrawingArea[2]-1); break; + case 5: case 6: gpu_unai.GPU_GP0 = (((u32)gpu_unai.DrawingOffset[1] & 0x7ff) << 11) | ((u32)gpu_unai.DrawingOffset[0] & 0x7ff); break; + case 7: gpu_unai.GPU_GP0 = 2; break; + case 8: case 15: gpu_unai.GPU_GP0 = 0xBFC03720; break; } break; } @@ -632,18 +632,18 @@ static void gpuVideoOutput(void) { int h0, x0, y0, w0, h1; - x0 = gpu_senquack.DisplayArea[0]; - y0 = gpu_senquack.DisplayArea[1]; + x0 = gpu_unai.DisplayArea[0]; + y0 = gpu_unai.DisplayArea[1]; - w0 = gpu_senquack.DisplayArea[2]; - h0 = gpu_senquack.DisplayArea[3]; // video mode + w0 = gpu_unai.DisplayArea[2]; + h0 = gpu_unai.DisplayArea[3]; // video mode - h1 = gpu_senquack.DisplayArea[5] - gpu_senquack.DisplayArea[4]; // display needed + h1 = gpu_unai.DisplayArea[5] - gpu_unai.DisplayArea[4]; // display needed if (h0 == 480) h1 = Min2(h1*2,480); - bool isRGB24 = (gpu_senquack.GPU_GP1 & 0x00200000 ? true : false); + bool isRGB24 = (gpu_unai.GPU_GP1 & 0x00200000 ? true : false); u16* dst16 = SCREEN; - u16* src16 = (u16*)gpu_senquack.vram; + u16* src16 = (u16*)gpu_unai.vram; // PS1 fb read wraps around (fixes black screen in 'Tobal no. 1') unsigned int src16_offs_msk = 1024*512-1; @@ -669,9 +669,9 @@ static void gpuVideoOutput(void) h0=(h0==480 ? 2048 : 1024); { - const int li=gpu_senquack.ilace_mask; + const int li=gpu_unai.ilace_mask; bool pi = ProgressiveInterlaceEnabled(); - bool pif = gpu_senquack.prog_ilace_flag; + bool pif = gpu_unai.prog_ilace_flag; switch ( w0 ) { case 256: @@ -731,7 +731,7 @@ static void gpuVideoOutput(void) } break; } - gpu_senquack.prog_ilace_flag = !gpu_senquack.prog_ilace_flag; + gpu_unai.prog_ilace_flag = !gpu_unai.prog_ilace_flag; } video_flip(); } @@ -744,9 +744,9 @@ static void GPU_frameskip (bool show) u32 now=get_ticks(); // current frame // Update frameskip - if (gpu_senquack.frameskip.skipCount==0) gpu_senquack.frameskip.skipFrame=false; // frameskip off - else if (gpu_senquack.frameskip.skipCount==7) { if (show) gpu_senquack.frameskip.skipFrame=!gpu_senquack.frameskip.skipFrame; } // frameskip medium - else if (gpu_senquack.frameskip.skipCount==8) gpu_senquack.frameskip.skipFrame=true; // frameskip maximum + if (gpu_unai.frameskip.skipCount==0) gpu_unai.frameskip.skipFrame=false; // frameskip off + else if (gpu_unai.frameskip.skipCount==7) { if (show) gpu_unai.frameskip.skipFrame=!gpu_unai.frameskip.skipFrame; } // frameskip medium + else if (gpu_unai.frameskip.skipCount==8) gpu_unai.frameskip.skipFrame=true; // frameskip maximum else { static u32 spd=100; // speed % @@ -761,13 +761,13 @@ static void GPU_frameskip (bool show) frames=0; prev=now; } - switch(gpu_senquack.frameskip.skipCount) + switch(gpu_unai.frameskip.skipCount) { - case 1: if (spd<50) gpu_senquack.frameskip.skipFrame=true; else gpu_senquack.frameskip.skipFrame=false; break; // frameskip on (spd<50%) - case 2: if (spd<60) gpu_senquack.frameskip.skipFrame=true; else gpu_senquack.frameskip.skipFrame=false; break; // frameskip on (spd<60%) - case 3: if (spd<70) gpu_senquack.frameskip.skipFrame=true; else gpu_senquack.frameskip.skipFrame=false; break; // frameskip on (spd<70%) - case 4: if (spd<80) gpu_senquack.frameskip.skipFrame=true; else gpu_senquack.frameskip.skipFrame=false; break; // frameskip on (spd<80%) - case 5: if (spd<90) gpu_senquack.frameskip.skipFrame=true; else gpu_senquack.frameskip.skipFrame=false; break; // frameskip on (spd<90%) + case 1: if (spd<50) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<50%) + case 2: if (spd<60) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<60%) + case 3: if (spd<70) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<70%) + case 4: if (spd<80) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<80%) + case 5: if (spd<90) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<90%) } } } @@ -776,10 +776,10 @@ static void GPU_frameskip (bool show) void GPU_updateLace(void) { // Interlace bit toggle - gpu_senquack.GPU_GP1 ^= 0x80000000; + gpu_unai.GPU_GP1 ^= 0x80000000; // Update display? - if ((gpu_senquack.fb_dirty) && (!gpu_senquack.frameskip.wasSkip) && (!(gpu_senquack.GPU_GP1&0x00800000))) + if ((gpu_unai.fb_dirty) && (!gpu_unai.frameskip.wasSkip) && (!(gpu_unai.GPU_GP1&0x00800000))) { // Display updated gpuVideoOutput(); @@ -794,33 +794,33 @@ void GPU_updateLace(void) #endif } - if ((!gpu_senquack.frameskip.skipCount) && (gpu_senquack.DisplayArea[3] == 480)) gpu_senquack.frameskip.skipGPU=true; // Tekken 3 hack + if ((!gpu_unai.frameskip.skipCount) && (gpu_unai.DisplayArea[3] == 480)) gpu_unai.frameskip.skipGPU=true; // Tekken 3 hack - gpu_senquack.fb_dirty=false; - gpu_senquack.dma.last_dma = NULL; + gpu_unai.fb_dirty=false; + gpu_unai.dma.last_dma = NULL; } // Allows frontend to signal plugin to redraw screen after returning to emu void GPU_requestScreenRedraw() { - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; } void GPU_getScreenInfo(GPUScreenInfo_t *sinfo) { - bool depth24 = (gpu_senquack.GPU_GP1 & 0x00200000 ? true : false); - int16_t hres = (uint16_t)gpu_senquack.DisplayArea[2]; - int16_t vres = (uint16_t)gpu_senquack.DisplayArea[3]; - int16_t w = hres; // Original gpu_senquack doesn't support width < 100% - int16_t h = gpu_senquack.DisplayArea[5] - gpu_senquack.DisplayArea[4]; + bool depth24 = (gpu_unai.GPU_GP1 & 0x00200000 ? true : false); + int16_t hres = (uint16_t)gpu_unai.DisplayArea[2]; + int16_t vres = (uint16_t)gpu_unai.DisplayArea[3]; + int16_t w = hres; // Original gpu_unai doesn't support width < 100% + int16_t h = gpu_unai.DisplayArea[5] - gpu_unai.DisplayArea[4]; if (vres == 480) h *= 2; if (h <= 0 || h > vres) h = vres; - sinfo->vram = (uint8_t*)gpu_senquack.vram; - sinfo->x = (uint16_t)gpu_senquack.DisplayArea[0]; - sinfo->y = (uint16_t)gpu_senquack.DisplayArea[1]; + sinfo->vram = (uint8_t*)gpu_unai.vram; + sinfo->x = (uint16_t)gpu_unai.DisplayArea[0]; + sinfo->y = (uint16_t)gpu_unai.DisplayArea[1]; sinfo->w = w; sinfo->h = h; sinfo->hres = hres; diff --git a/plugins/gpu_unai/gpu.h b/plugins/gpu_unai/gpu.h index 7a467511c..f5eb69b4b 100644 --- a/plugins/gpu_unai/gpu.h +++ b/plugins/gpu_unai/gpu.h @@ -22,7 +22,7 @@ #ifndef GPU_UNAI_GPU_H #define GPU_UNAI_GPU_H -struct gpu_senquack_config_t { +struct gpu_unai_config_t { uint8_t pixel_skip:1; // If 1, allows skipping rendering pixels that // would not be visible when a high horizontal // resolution PS1 video mode is set. @@ -34,7 +34,7 @@ struct gpu_senquack_config_t { uint8_t ilace_force:3; // Option to force skipping rendering of lines, // for very slow platforms. Value will be - // assigned to 'ilace_mask' in gpu_senquack struct. + // assigned to 'ilace_mask' in gpu_unai struct. // Normally 0. Value '1' will skip rendering // odd lines. @@ -47,13 +47,13 @@ struct gpu_senquack_config_t { uint8_t blending:1; uint8_t dithering:1; - //senquack Only PCSX Rearmed's version of gpu_senquack had this, and I + //senquack Only PCSX Rearmed's version of gpu_unai had this, and I // don't think it's necessary. It would require adding 'AH' flag to // gpuSpriteSpanFn() increasing size of sprite span function array. //uint8_t enableAbbeyHack:1; // Abe's Odyssey hack //////////////////////////////////////////////////////////////////////////// - // Variables used only by older standalone version of gpu_senquack (gpu.cpp) + // Variables used only by older standalone version of gpu_unai (gpu.cpp) #ifndef USE_GPULIB uint8_t prog_ilace:1; // Progressive interlace option (old option) // This option was somewhat oddly named: @@ -66,7 +66,7 @@ struct gpu_senquack_config_t { #endif }; -extern gpu_senquack_config_t gpu_senquack_config_ext; +extern gpu_unai_config_t gpu_unai_config_ext; // TODO: clean up show_fps frontend option extern bool show_fps; diff --git a/plugins/gpu_unai/gpu_arm.h b/plugins/gpu_unai/gpu_arm.h index b9f8f97cc..0f8ed6b5f 100644 --- a/plugins/gpu_unai/gpu_arm.h +++ b/plugins/gpu_unai/gpu_arm.h @@ -5,7 +5,7 @@ extern "C" { #endif -void draw_spr16_full(u16 *d, void *s, u16 *pal, int lines); +void draw_spr16_full(void *d, void *s, void *pal, int lines); #ifdef __cplusplus } diff --git a/plugins/gpu_unai/gpu_command.h b/plugins/gpu_unai/gpu_command.h index d052ae8ce..cf6b62b46 100644 --- a/plugins/gpu_unai/gpu_command.h +++ b/plugins/gpu_unai/gpu_command.h @@ -26,9 +26,9 @@ void gpuSetTexture(u16 tpage) { u32 tmode, tx, ty; - gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 & ~0x1FF) | (tpage & 0x1FF); - gpu_senquack.TextureWindow[0]&= ~gpu_senquack.TextureWindow[2]; - gpu_senquack.TextureWindow[1]&= ~gpu_senquack.TextureWindow[3]; + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x1FF) | (tpage & 0x1FF); + gpu_unai.TextureWindow[0]&= ~gpu_unai.TextureWindow[2]; + gpu_unai.TextureWindow[1]&= ~gpu_unai.TextureWindow[3]; tmode = (tpage >> 7) & 3; // 16bpp, 8bpp, or 4bpp texture colors? // 0: 4bpp 1: 8bpp 2/3: 16bpp @@ -40,18 +40,18 @@ void gpuSetTexture(u16 tpage) tx = (tpage & 0x0F) << 6; ty = (tpage & 0x10) << 4; - tx += (gpu_senquack.TextureWindow[0] >> (2 - tmode)); - ty += gpu_senquack.TextureWindow[1]; + tx += (gpu_unai.TextureWindow[0] >> (2 - tmode)); + ty += gpu_unai.TextureWindow[1]; - gpu_senquack.BLEND_MODE = ((tpage>>5) & 3) << 3; - gpu_senquack.TEXT_MODE = (tmode + 1) << 5; // gpu_senquack.TEXT_MODE should be values 1..3, so add one - gpu_senquack.TBA = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(tx, ty)]; + gpu_unai.BLEND_MODE = ((tpage>>5) & 3) << 3; + gpu_unai.TEXT_MODE = (tmode + 1) << 5; // gpu_unai.TEXT_MODE should be values 1..3, so add one + gpu_unai.TBA = &gpu_unai.vram[FRAME_OFFSET(tx, ty)]; } /////////////////////////////////////////////////////////////////////////////// INLINE void gpuSetCLUT(u16 clut) { - gpu_senquack.CBA = &((u16*)gpu_senquack.vram)[(clut & 0x7FFF) << 4]; + gpu_unai.CBA = &gpu_unai.vram[(clut & 0x7FFF) << 4]; } #ifdef ENABLE_GPU_NULL_SUPPORT @@ -67,20 +67,20 @@ INLINE void gpuSetCLUT(u16 clut) #endif #define Blending (((PRIM&0x2) && BlendingEnabled()) ? (PRIM&0x2) : 0) -#define Blending_Mode (((PRIM&0x2) && BlendingEnabled()) ? gpu_senquack.BLEND_MODE : 0) +#define Blending_Mode (((PRIM&0x2) && BlendingEnabled()) ? gpu_unai.BLEND_MODE : 0) #define Lighting (((~PRIM)&0x1) && LightingEnabled()) // Dithering applies only to Gouraud-shaded polys or texture-blended polys: #define Dithering (((((~PRIM)&0x1) || (PRIM&0x10)) && DitheringEnabled()) ? \ - (ForcedDitheringEnabled() ? (1<<9) : (gpu_senquack.GPU_GP1 & (1 << 9))) \ + (ForcedDitheringEnabled() ? (1<<9) : (gpu_unai.GPU_GP1 & (1 << 9))) \ : 0) /////////////////////////////////////////////////////////////////////////////// -//Now handled by Rearmed's gpulib and gpu_senquack/gpulib_if.cpp: +//Now handled by Rearmed's gpulib and gpu_unai/gpulib_if.cpp: /////////////////////////////////////////////////////////////////////////////// #ifndef USE_GPULIB // Handles GP0 draw settings commands 0xE1...0xE6 -static void gpuGP0Cmd_0xEx(gpu_senquack_t &gpu_senquack, u32 cmd_word) +static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word) { // Assume incoming GP0 command is 0xE1..0xE6, convert to 1..6 u8 num = (cmd_word >> 24) & 7; @@ -88,65 +88,65 @@ static void gpuGP0Cmd_0xEx(gpu_senquack_t &gpu_senquack, u32 cmd_word) case 1: { // GP0(E1h) - Draw Mode setting (aka "Texpage") DO_LOG(("GP0(0xE1) DrawMode TexPage(0x%x)\n", cmd_word)); - u32 cur_texpage = gpu_senquack.GPU_GP1 & 0x7FF; + u32 cur_texpage = gpu_unai.GPU_GP1 & 0x7FF; u32 new_texpage = cmd_word & 0x7FF; if (cur_texpage != new_texpage) { - gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 & ~0x7FF) | new_texpage; - gpuSetTexture(gpu_senquack.GPU_GP1); + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x7FF) | new_texpage; + gpuSetTexture(gpu_unai.GPU_GP1); } } break; case 2: { // GP0(E2h) - Texture Window setting DO_LOG(("GP0(0xE2) TextureWindow(0x%x)\n", cmd_word)); - if (cmd_word != gpu_senquack.TextureWindowCur) { + if (cmd_word != gpu_unai.TextureWindowCur) { static const u8 TextureMask[32] = { 255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7, 127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7 }; - gpu_senquack.TextureWindowCur = cmd_word; - gpu_senquack.TextureWindow[0] = ((cmd_word >> 10) & 0x1F) << 3; - gpu_senquack.TextureWindow[1] = ((cmd_word >> 15) & 0x1F) << 3; - gpu_senquack.TextureWindow[2] = TextureMask[(cmd_word >> 0) & 0x1F]; - gpu_senquack.TextureWindow[3] = TextureMask[(cmd_word >> 5) & 0x1F]; - gpu_senquack.TextureWindow[0] &= ~gpu_senquack.TextureWindow[2]; - gpu_senquack.TextureWindow[1] &= ~gpu_senquack.TextureWindow[3]; + gpu_unai.TextureWindowCur = cmd_word; + gpu_unai.TextureWindow[0] = ((cmd_word >> 10) & 0x1F) << 3; + gpu_unai.TextureWindow[1] = ((cmd_word >> 15) & 0x1F) << 3; + gpu_unai.TextureWindow[2] = TextureMask[(cmd_word >> 0) & 0x1F]; + gpu_unai.TextureWindow[3] = TextureMask[(cmd_word >> 5) & 0x1F]; + gpu_unai.TextureWindow[0] &= ~gpu_unai.TextureWindow[2]; + gpu_unai.TextureWindow[1] &= ~gpu_unai.TextureWindow[3]; // Inner loop vars must be updated whenever texture window is changed: const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 - gpu_senquack.u_msk = (((u32)gpu_senquack.TextureWindow[2]) << fb) | ((1 << fb) - 1); - gpu_senquack.v_msk = (((u32)gpu_senquack.TextureWindow[3]) << fb) | ((1 << fb) - 1); + gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1); - gpuSetTexture(gpu_senquack.GPU_GP1); + gpuSetTexture(gpu_unai.GPU_GP1); } } break; case 3: { // GP0(E3h) - Set Drawing Area top left (X1,Y1) DO_LOG(("GP0(0xE3) DrawingArea Pos(0x%x)\n", cmd_word)); - gpu_senquack.DrawingArea[0] = cmd_word & 0x3FF; - gpu_senquack.DrawingArea[1] = (cmd_word >> 10) & 0x3FF; + gpu_unai.DrawingArea[0] = cmd_word & 0x3FF; + gpu_unai.DrawingArea[1] = (cmd_word >> 10) & 0x3FF; } break; case 4: { // GP0(E4h) - Set Drawing Area bottom right (X2,Y2) DO_LOG(("GP0(0xE4) DrawingArea Size(0x%x)\n", cmd_word)); - gpu_senquack.DrawingArea[2] = (cmd_word & 0x3FF) + 1; - gpu_senquack.DrawingArea[3] = ((cmd_word >> 10) & 0x3FF) + 1; + gpu_unai.DrawingArea[2] = (cmd_word & 0x3FF) + 1; + gpu_unai.DrawingArea[3] = ((cmd_word >> 10) & 0x3FF) + 1; } break; case 5: { // GP0(E5h) - Set Drawing Offset (X,Y) DO_LOG(("GP0(0xE5) DrawingOffset(0x%x)\n", cmd_word)); - gpu_senquack.DrawingOffset[0] = ((s32)cmd_word<<(32-11))>>(32-11); - gpu_senquack.DrawingOffset[1] = ((s32)cmd_word<<(32-22))>>(32-11); + gpu_unai.DrawingOffset[0] = ((s32)cmd_word<<(32-11))>>(32-11); + gpu_unai.DrawingOffset[1] = ((s32)cmd_word<<(32-22))>>(32-11); } break; case 6: { // GP0(E6h) - Mask Bit Setting DO_LOG(("GP0(0xE6) SetMask(0x%x)\n", cmd_word)); - gpu_senquack.Masking = (cmd_word & 0x2) << 1; - gpu_senquack.PixelMSB = (cmd_word & 0x1) << 8; + gpu_unai.Masking = (cmd_word & 0x2) << 1; + gpu_unai.PixelMSB = (cmd_word & 0x1) << 8; } break; } } @@ -157,14 +157,14 @@ void gpuSendPacketFunction(const int PRIM) //senquack - TODO: optimize this (packet pointer union as prim draw parameter // introduced as optimization for gpulib command-list processing) - PtrUnion packet = { .ptr = (void*)&gpu_senquack.PacketBuffer }; + PtrUnion packet = { .ptr = (void*)&gpu_unai.PacketBuffer }; switch (PRIM) { case 0x02: { NULL_GPU(); gpuClearImage(packet); // prim handles updateLace && skip - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuClearImage(0x%x)\n",PRIM)); } break; @@ -172,16 +172,16 @@ void gpuSendPacketFunction(const int PRIM) case 0x21: case 0x22: case 0x23: { // Monochrome 3-pt poly - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); PP driver = gpuPolySpanDrivers[ - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Blending_Mode | - gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB + gpu_unai.Masking | Blending | gpu_unai.PixelMSB ]; gpuDrawPolyF(packet, driver, false); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawPolyF(0x%x)\n",PRIM)); } } break; @@ -190,28 +190,28 @@ void gpuSendPacketFunction(const int PRIM) case 0x25: case 0x26: case 0x27: { // Textured 3-pt poly - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); - gpuSetTexture (gpu_senquack.PacketBuffer.U4[4] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16); u32 driver_idx = - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Dithering | - Blending_Mode | gpu_senquack.TEXT_MODE | - gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB; + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB; if (!FastLightingEnabled()) { driver_idx |= Lighting; } else { - if (!((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F))) + if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))) driver_idx |= Lighting; } PP driver = gpuPolySpanDrivers[driver_idx]; gpuDrawPolyFT(packet, driver, false); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawPolyFT(0x%x)\n",PRIM)); } } break; @@ -220,16 +220,16 @@ void gpuSendPacketFunction(const int PRIM) case 0x29: case 0x2A: case 0x2B: { // Monochrome 4-pt poly - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); PP driver = gpuPolySpanDrivers[ - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Blending_Mode | - gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB + gpu_unai.Masking | Blending | gpu_unai.PixelMSB ]; gpuDrawPolyF(packet, driver, true); // is_quad = true - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawPolyF(0x%x) (4-pt QUAD)\n",PRIM)); } } break; @@ -238,28 +238,28 @@ void gpuSendPacketFunction(const int PRIM) case 0x2D: case 0x2E: case 0x2F: { // Textured 4-pt poly - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); - gpuSetTexture (gpu_senquack.PacketBuffer.U4[4] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16); u32 driver_idx = - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Dithering | - Blending_Mode | gpu_senquack.TEXT_MODE | - gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB; + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB; if (!FastLightingEnabled()) { driver_idx |= Lighting; } else { - if (!((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F))) + if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))) driver_idx |= Lighting; } PP driver = gpuPolySpanDrivers[driver_idx]; gpuDrawPolyFT(packet, driver, true); // is_quad = true - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawPolyFT(0x%x) (4-pt QUAD)\n",PRIM)); } } break; @@ -268,7 +268,7 @@ void gpuSendPacketFunction(const int PRIM) case 0x31: case 0x32: case 0x33: { // Gouraud-shaded 3-pt poly - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); //NOTE: The '129' here is CF_GOURAUD | CF_LIGHT, however @@ -276,13 +276,13 @@ void gpuSendPacketFunction(const int PRIM) // shouldn't apply. Until the original array of template // instantiation ptrs is fixed, we're stuck with this. (TODO) PP driver = gpuPolySpanDrivers[ - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Dithering | Blending_Mode | - gpu_senquack.Masking | Blending | 129 | gpu_senquack.PixelMSB + gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB ]; gpuDrawPolyG(packet, driver, false); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawPolyG(0x%x)\n",PRIM)); } } break; @@ -291,19 +291,19 @@ void gpuSendPacketFunction(const int PRIM) case 0x35: case 0x36: case 0x37: { // Gouraud-shaded, textured 3-pt poly - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); - gpuSetTexture (gpu_senquack.PacketBuffer.U4[5] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16); PP driver = gpuPolySpanDrivers[ - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Dithering | - Blending_Mode | gpu_senquack.TEXT_MODE | - gpu_senquack.Masking | Blending | ((Lighting)?129:0) | gpu_senquack.PixelMSB + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB ]; gpuDrawPolyGT(packet, driver, false); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawPolyGT(0x%x)\n",PRIM)); } } break; @@ -312,18 +312,18 @@ void gpuSendPacketFunction(const int PRIM) case 0x39: case 0x3A: case 0x3B: { // Gouraud-shaded 4-pt poly - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); // See notes regarding '129' for 0x30..0x33 further above -senquack PP driver = gpuPolySpanDrivers[ - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Dithering | Blending_Mode | - gpu_senquack.Masking | Blending | 129 | gpu_senquack.PixelMSB + gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB ]; gpuDrawPolyG(packet, driver, true); // is_quad = true - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawPolyG(0x%x) (4-pt QUAD)\n",PRIM)); } } break; @@ -332,19 +332,19 @@ void gpuSendPacketFunction(const int PRIM) case 0x3D: case 0x3E: case 0x3F: { // Gouraud-shaded, textured 4-pt poly - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); - gpuSetTexture (gpu_senquack.PacketBuffer.U4[5] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16); PP driver = gpuPolySpanDrivers[ - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Dithering | - Blending_Mode | gpu_senquack.TEXT_MODE | - gpu_senquack.Masking | Blending | ((Lighting)?129:0) | gpu_senquack.PixelMSB + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB ]; gpuDrawPolyGT(packet, driver, true); // is_quad = true - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawPolyGT(0x%x) (4-pt QUAD)\n",PRIM)); } } break; @@ -353,14 +353,14 @@ void gpuSendPacketFunction(const int PRIM) case 0x41: case 0x42: case 0x43: { // Monochrome line - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); // Shift index right by one, as untextured prims don't use lighting - u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; PSD driver = gpuPixelSpanDrivers[driver_idx]; gpuDrawLineF(packet, driver); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawLineF(0x%x)\n",PRIM)); } } break; @@ -373,22 +373,22 @@ void gpuSendPacketFunction(const int PRIM) case 0x4D: case 0x4E: case 0x4F: { // Monochrome line strip - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); // Shift index right by one, as untextured prims don't use lighting - u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; PSD driver = gpuPixelSpanDrivers[driver_idx]; gpuDrawLineF(packet, driver); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawLineF(0x%x)\n",PRIM)); } - if ((gpu_senquack.PacketBuffer.U4[3] & 0xF000F000) != 0x50005000) + if ((le32_raw(gpu_unai.PacketBuffer.U4[3]) & HTOLE32(0xF000F000)) != HTOLE32(0x50005000)) { - gpu_senquack.PacketBuffer.U4[1] = gpu_senquack.PacketBuffer.U4[2]; - gpu_senquack.PacketBuffer.U4[2] = gpu_senquack.PacketBuffer.U4[3]; - gpu_senquack.PacketCount = 1; - gpu_senquack.PacketIndex = 3; + gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2]; + gpu_unai.PacketBuffer.U4[2] = gpu_unai.PacketBuffer.U4[3]; + gpu_unai.PacketCount = 1; + gpu_unai.PacketIndex = 3; } } break; @@ -396,16 +396,16 @@ void gpuSendPacketFunction(const int PRIM) case 0x51: case 0x52: case 0x53: { // Gouraud-shaded line - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); // Shift index right by one, as untextured prims don't use lighting - u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; // Index MSB selects Gouraud-shaded PixelSpanDriver: driver_idx |= (1 << 5); PSD driver = gpuPixelSpanDrivers[driver_idx]; gpuDrawLineG(packet, driver); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawLineG(0x%x)\n",PRIM)); } } break; @@ -418,26 +418,26 @@ void gpuSendPacketFunction(const int PRIM) case 0x5D: case 0x5E: case 0x5F: { // Gouraud-shaded line strip - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); // Shift index right by one, as untextured prims don't use lighting - u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; // Index MSB selects Gouraud-shaded PixelSpanDriver: driver_idx |= (1 << 5); PSD driver = gpuPixelSpanDrivers[driver_idx]; gpuDrawLineG(packet, driver); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawLineG(0x%x)\n",PRIM)); } - if ((gpu_senquack.PacketBuffer.U4[4] & 0xF000F000) != 0x50005000) + if ((le32_raw(gpu_unai.PacketBuffer.U4[4]) & HTOLE32(0xF000F000)) != HTOLE32(0x50005000)) { - gpu_senquack.PacketBuffer.U1[3 + (2 * 4)] = gpu_senquack.PacketBuffer.U1[3 + (0 * 4)]; - gpu_senquack.PacketBuffer.U4[0] = gpu_senquack.PacketBuffer.U4[2]; - gpu_senquack.PacketBuffer.U4[1] = gpu_senquack.PacketBuffer.U4[3]; - gpu_senquack.PacketBuffer.U4[2] = gpu_senquack.PacketBuffer.U4[4]; - gpu_senquack.PacketCount = 2; - gpu_senquack.PacketIndex = 3; + gpu_unai.PacketBuffer.U1[3 + (2 * 4)] = gpu_unai.PacketBuffer.U1[3 + (0 * 4)]; + gpu_unai.PacketBuffer.U4[0] = gpu_unai.PacketBuffer.U4[2]; + gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[3]; + gpu_unai.PacketBuffer.U4[2] = gpu_unai.PacketBuffer.U4[4]; + gpu_unai.PacketCount = 2; + gpu_unai.PacketIndex = 3; } } break; @@ -445,12 +445,12 @@ void gpuSendPacketFunction(const int PRIM) case 0x61: case 0x62: case 0x63: { // Monochrome rectangle (variable size) - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawT(0x%x)\n",PRIM)); } } break; @@ -459,30 +459,30 @@ void gpuSendPacketFunction(const int PRIM) case 0x65: case 0x66: case 0x67: { // Textured rectangle (variable size) - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); - u32 driver_idx = Blending_Mode | gpu_senquack.TEXT_MODE | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>1); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); // This fixes Silent Hill running animation on loading screens: // (On PSX, color values 0x00-0x7F darken the source texture's color, // 0x81-FF lighten textures (ultimately clamped to 0x1F), // 0x80 leaves source texture color unchanged, HOWEVER, - // gpu_senquack uses a simple lighting LUT whereby only the upper + // gpu_unai uses a simple lighting LUT whereby only the upper // 5 bits of an 8-bit color are used, so 0x80-0x87 all behave as // 0x80. // // NOTE: I've changed all textured sprite draw commands here and // elsewhere to use proper behavior, but left poly commands // alone, I don't want to slow rendering down too much. (TODO) - //if ((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F)) + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) // Strip lower 3 bits of each color and determine if lighting should be used: - if ((gpu_senquack.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawS(0x%x)\n",PRIM)); } } break; @@ -491,13 +491,13 @@ void gpuSendPacketFunction(const int PRIM) case 0x69: case 0x6A: case 0x6B: { // Monochrome rectangle (1x1 dot) - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpu_senquack.PacketBuffer.U4[2] = 0x00010001; - PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001); + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawT(0x%x)\n",PRIM)); } } break; @@ -506,13 +506,13 @@ void gpuSendPacketFunction(const int PRIM) case 0x71: case 0x72: case 0x73: { // Monochrome rectangle (8x8) - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpu_senquack.PacketBuffer.U4[2] = 0x00080008; - PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008); + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawT(0x%x)\n",PRIM)); } } break; @@ -521,21 +521,21 @@ void gpuSendPacketFunction(const int PRIM) case 0x75: case 0x76: case 0x77: { // Textured rectangle (8x8) - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpu_senquack.PacketBuffer.U4[3] = 0x00080008; - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); - u32 driver_idx = Blending_Mode | gpu_senquack.TEXT_MODE | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>1); + gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00080008); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); //senquack - Only color 808080h-878787h allows skipping lighting calculation: - //if ((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F)) + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) // Strip lower 3 bits of each color and determine if lighting should be used: - if ((gpu_senquack.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawS(0x%x)\n",PRIM)); } } break; @@ -544,13 +544,13 @@ void gpuSendPacketFunction(const int PRIM) case 0x79: case 0x7A: case 0x7B: { // Monochrome rectangle (16x16) - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpu_senquack.PacketBuffer.U4[2] = 0x00100010; - PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010); + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawT(0x%x)\n",PRIM)); } } break; @@ -559,44 +559,44 @@ void gpuSendPacketFunction(const int PRIM) case 0x7D: #ifdef __arm__ /* Notaz 4bit sprites optimization */ - if ((!gpu_senquack.frameskip.skipGPU) && (!(gpu_senquack.GPU_GP1&0x180)) && (!(gpu_senquack.Masking|gpu_senquack.PixelMSB))) + if ((!gpu_unai.frameskip.skipGPU) && (!(gpu_unai.GPU_GP1&0x180)) && (!(gpu_unai.Masking|gpu_unai.PixelMSB))) { - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); gpuDrawS16(packet); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; break; } #endif case 0x7E: case 0x7F: { // Textured rectangle (16x16) - if (!gpu_senquack.frameskip.skipGPU) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpu_senquack.PacketBuffer.U4[3] = 0x00100010; - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); - u32 driver_idx = Blending_Mode | gpu_senquack.TEXT_MODE | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>1); + gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00100010); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); //senquack - Only color 808080h-878787h allows skipping lighting calculation: - //if ((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F)) + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) // Strip lower 3 bits of each color and determine if lighting should be used: - if ((gpu_senquack.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawS(0x%x)\n",PRIM)); } } break; case 0x80: // vid -> vid gpuMoveImage(packet); // prim handles updateLace && skip - if ((!gpu_senquack.frameskip.skipCount) && (gpu_senquack.DisplayArea[3] == 480)) // Tekken 3 hack + if ((!gpu_unai.frameskip.skipCount) && (gpu_unai.DisplayArea[3] == 480)) // Tekken 3 hack { - if (!gpu_senquack.frameskip.skipGPU) gpu_senquack.fb_dirty = true; + if (!gpu_unai.frameskip.skipGPU) gpu_unai.fb_dirty = true; } else { - gpu_senquack.fb_dirty = true; + gpu_unai.fb_dirty = true; } DO_LOG(("gpuMoveImage(0x%x)\n",PRIM)); break; @@ -609,13 +609,13 @@ void gpuSendPacketFunction(const int PRIM) DO_LOG(("gpuStoreImage(0x%x)\n",PRIM)); break; case 0xE1 ... 0xE6: { // Draw settings - gpuGP0Cmd_0xEx(gpu_senquack, gpu_senquack.PacketBuffer.U4[0]); + gpuGP0Cmd_0xEx(gpu_unai, le32_to_u32(gpu_unai.PacketBuffer.U4[0])); } break; } } #endif //!USE_GPULIB /////////////////////////////////////////////////////////////////////////////// -// End of code specific to non-gpulib standalone version of gpu_senquack +// End of code specific to non-gpulib standalone version of gpu_unai /////////////////////////////////////////////////////////////////////////////// #endif /* __GPU_UNAI_GPU_COMMAND_H__ */ diff --git a/plugins/gpu_unai/gpu_inner.h b/plugins/gpu_unai/gpu_inner.h index 8cb4bd534..eb209ef4d 100644 --- a/plugins/gpu_unai/gpu_inner.h +++ b/plugins/gpu_unai/gpu_inner.h @@ -123,12 +123,8 @@ static inline u16 gpuGouraudColor15bpp(u32 r, u32 g, u32 b) // rectangles) to use the same set of functions. Since tiles are always // monochrome, they simply wouldn't use the extra set of 32 gouraud-shaded // gpuPixelSpanFn functions (TODO?). -// -// NOTE: While the PS1 framebuffer is 16 bit, we use 8-bit pointers here, -// so that pDst can be incremented directly by 'incr' parameter -// without having to shift it before use. template -static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) +static le16_t* gpuPixelSpanFn(le16_t* pDst, uintptr_t data, ptrdiff_t incr, size_t len) { // Blend func can save an operation if it knows uSrc MSB is // unset. For untextured prims, this is always true. @@ -139,6 +135,9 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) u32 r, g, b; s32 r_incr, g_incr, b_incr; + // Caller counts in bytes, we count in pixels + incr /= 2; + if (CF_GOURAUD) { gcPtr = (GouraudColor*)data; r = gcPtr->r; r_incr = gcPtr->r_incr; @@ -152,15 +151,15 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) if (!CF_GOURAUD) { // NO GOURAUD if (!CF_MASKCHECK && !CF_BLEND) { - if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } - else { *(u16*)pDst = col; } + if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); } + else { *pDst = u16_to_le16(col); } } else if (CF_MASKCHECK && !CF_BLEND) { - if (!(*(u16*)pDst & 0x8000)) { - if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } - else { *(u16*)pDst = col; } + if (!(le16_raw(*pDst) & HTOLE16(0x8000))) { + if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); } + else { *pDst = u16_to_le16(col); } } } else { - uint_fast16_t uDst = *(u16*)pDst; + uint_fast16_t uDst = le16_to_u16(*pDst); if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; } uint_fast16_t uSrc = col; @@ -168,8 +167,8 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) if (CF_BLEND) uSrc = gpuBlending(uSrc, uDst); - if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; } - else { *(u16*)pDst = uSrc; } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else { *pDst = u16_to_le16(uSrc); } } } else @@ -177,16 +176,16 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) if (!CF_MASKCHECK && !CF_BLEND) { col = gpuGouraudColor15bpp(r, g, b); - if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } - else { *(u16*)pDst = col; } + if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); } + else { *pDst = u16_to_le16(col); } } else if (CF_MASKCHECK && !CF_BLEND) { col = gpuGouraudColor15bpp(r, g, b); - if (!(*(u16*)pDst & 0x8000)) { - if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } - else { *(u16*)pDst = col; } + if (!(le16_raw(*pDst) & HTOLE16(0x8000))) { + if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); } + else { *pDst = u16_to_le16(col); } } } else { - uint_fast16_t uDst = *(u16*)pDst; + uint_fast16_t uDst = le16_to_u16(*pDst); if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; } col = gpuGouraudColor15bpp(r, g, b); @@ -199,8 +198,8 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) if (CF_BLEND) uSrc = gpuBlending(uSrc, uDst); - if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; } - else { *(u16*)pDst = uSrc; } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else { *pDst = u16_to_le16(uSrc); } } } @@ -228,7 +227,7 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) return pDst; } -static u8* PixelSpanNULL(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) +static le16_t* PixelSpanNULL(le16_t* pDst, uintptr_t data, ptrdiff_t incr, size_t len) { #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"PixelSpanNULL()\n"); @@ -238,7 +237,7 @@ static u8* PixelSpanNULL(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) /////////////////////////////////////////////////////////////////////////////// // PixelSpan (lines) innerloops driver -typedef u8* (*PSD)(u8* dst, uintptr_t data, ptrdiff_t incr, size_t len); +typedef le16_t* (*PSD)(le16_t* dst, uintptr_t data, ptrdiff_t incr, size_t len); const PSD gpuPixelSpanDrivers[64] = { @@ -282,14 +281,26 @@ const PSD gpuPixelSpanDrivers[64] = // GPU Tiles innerloops generator template -static void gpuTileSpanFn(u16 *pDst, u32 count, u16 data) +static void gpuTileSpanFn(le16_t *pDst, u32 count, u16 data) { + le16_t ldata; + if (!CF_MASKCHECK && !CF_BLEND) { - if (CF_MASKSET) { data = data | 0x8000; } - do { *pDst++ = data; } while (--count); + if (CF_MASKSET) + ldata = u16_to_le16(data | 0x8000); + else + ldata = u16_to_le16(data); + do { *pDst++ = ldata; } while (--count); } else if (CF_MASKCHECK && !CF_BLEND) { - if (CF_MASKSET) { data = data | 0x8000; } - do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count); + if (CF_MASKSET) + ldata = u16_to_le16(data | 0x8000); + else + ldata = u16_to_le16(data); + do { + if (!(le16_raw(*pDst) & HTOLE16(0x8000))) + *pDst = ldata; + pDst++; + } while (--count); } else { // Blend func can save an operation if it knows uSrc MSB is @@ -299,16 +310,16 @@ static void gpuTileSpanFn(u16 *pDst, u32 count, u16 data) uint_fast16_t uSrc, uDst; do { - if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; } - if (CF_MASKCHECK) { if (uDst&0x8000) goto endtile; } + if (CF_MASKCHECK || CF_BLEND) { uDst = le16_to_u16(*pDst); } + if (CF_MASKCHECK) if (uDst&0x8000) { goto endtile; } uSrc = data; if (CF_BLEND) uSrc = gpuBlending(uSrc, uDst); - if (CF_MASKSET) { *pDst = uSrc | 0x8000; } - else { *pDst = uSrc; } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else { *pDst = u16_to_le16(uSrc); } //senquack - Did not apply "Silent Hill" mask-bit fix to here. // It is hard to tell from scarce documentation available and @@ -322,7 +333,7 @@ static void gpuTileSpanFn(u16 *pDst, u32 count, u16 data) } } -static void TileNULL(u16 *pDst, u32 count, u16 data) +static void TileNULL(le16_t *pDst, u32 count, u16 data) { #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"TileNULL()\n"); @@ -331,7 +342,7 @@ static void TileNULL(u16 *pDst, u32 count, u16 data) /////////////////////////////////////////////////////////////////////////////// // Tiles innerloops driver -typedef void (*PT)(u16 *pDst, u32 count, u16 data); +typedef void (*PT)(le16_t *pDst, u32 count, u16 data); // Template instantiation helper macros #define TI(cf) gpuTileSpanFn<(cf)> @@ -355,7 +366,7 @@ const PT gpuTileSpanDrivers[32] = { // GPU Sprites innerloops generator template -static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0) +static void gpuSpriteSpanFn(le16_t *pDst, u32 count, u8* pTxt, u32 u0) { // Blend func can save an operation if it knows uSrc MSB is unset. // Untextured prims can always skip (source color always comes with MSB=0). @@ -364,13 +375,13 @@ static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0) uint_fast16_t uSrc, uDst, srcMSB; bool should_blend; - u32 u0_mask = gpu_senquack.TextureWindow[2]; + u32 u0_mask = gpu_unai.TextureWindow[2]; u8 r5, g5, b5; if (CF_LIGHT) { - r5 = gpu_senquack.r5; - g5 = gpu_senquack.g5; - b5 = gpu_senquack.b5; + r5 = gpu_unai.r5; + g5 = gpu_unai.g5; + b5 = gpu_unai.b5; } if (CF_TEXTMODE==3) { @@ -378,22 +389,22 @@ static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0) u0_mask <<= 1; } - const u16 *CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_senquack.CBA; + const le16_t *CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA; do { - if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; } + if (CF_MASKCHECK || CF_BLEND) { uDst = le16_to_u16(*pDst); } if (CF_MASKCHECK) if (uDst&0x8000) { goto endsprite; } if (CF_TEXTMODE==1) { // 4bpp (CLUT) u8 rgb = pTxt[(u0 & u0_mask)>>1]; - uSrc = CBA_[(rgb>>((u0&1)<<2))&0xf]; + uSrc = le16_to_u16(CBA_[(rgb>>((u0&1)<<2))&0xf]); } if (CF_TEXTMODE==2) { // 8bpp (CLUT) - uSrc = CBA_[pTxt[u0 & u0_mask]]; + uSrc = le16_to_u16(CBA_[pTxt[u0 & u0_mask]]); } if (CF_TEXTMODE==3) { // 16bpp - uSrc = *(u16*)(&pTxt[u0 & u0_mask]); + uSrc = le16_to_u16(*(le16_t*)(&pTxt[u0 & u0_mask])); } if (!uSrc) goto endsprite; @@ -410,9 +421,9 @@ static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0) if (CF_BLEND && should_blend) uSrc = gpuBlending(uSrc, uDst); - if (CF_MASKSET) { *pDst = uSrc | 0x8000; } - else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = uSrc | srcMSB; } - else { *pDst = uSrc; } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = u16_to_le16(uSrc | srcMSB); } + else { *pDst = u16_to_le16(uSrc); } endsprite: u0 += (CF_TEXTMODE==3) ? 2 : 1; @@ -421,7 +432,7 @@ static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0) while (--count); } -static void SpriteNULL(u16 *pDst, u32 count, u8* pTxt, u32 u0) +static void SpriteNULL(le16_t *pDst, u32 count, u8* pTxt, u32 u0) { #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"SpriteNULL()\n"); @@ -432,7 +443,7 @@ static void SpriteNULL(u16 *pDst, u32 count, u8* pTxt, u32 u0) /////////////////////////////////////////////////////////////////////////////// // Sprite innerloops driver -typedef void (*PS)(u16 *pDst, u32 count, u8* pTxt, u32 u0); +typedef void (*PS)(le16_t *pDst, u32 count, u8* pTxt, u32 u0); // Template instantiation helper macros #define TI(cf) gpuSpriteSpanFn<(cf)> @@ -485,7 +496,7 @@ const PS gpuSpriteSpanDrivers[256] = { // relevant blend/light headers. // (see README_senquack.txt) template -static void gpuPolySpanFn(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 count) +static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) { // Blend func can save an operation if it knows uSrc MSB is unset. // Untextured prims can always skip this (src color MSB is always 0). @@ -493,14 +504,14 @@ static void gpuPolySpanFn(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 cou const bool skip_uSrc_mask = MSB_PRESERVED ? (!CF_TEXTMODE) : (!CF_TEXTMODE) || CF_LIGHT; bool should_blend; - u32 bMsk; if (CF_BLITMASK) bMsk = gpu_senquack.blit_mask; + u32 bMsk; if (CF_BLITMASK) bMsk = gpu_unai.blit_mask; if (!CF_TEXTMODE) { if (!CF_GOURAUD) { // UNTEXTURED, NO GOURAUD - const u16 pix15 = gpu_senquack.PixelData; + const u16 pix15 = gpu_unai.PixelData; do { uint_fast16_t uSrc, uDst; @@ -509,7 +520,7 @@ static void gpuPolySpanFn(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 cou // gravestone text at end of Medieval intro sequence. -senquack //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) { goto endpolynotextnogou; } } - if (CF_BLEND || CF_MASKCHECK) uDst = *pDst; + if (CF_BLEND || CF_MASKCHECK) uDst = le16_to_u16(*pDst); if (CF_MASKCHECK) { if (uDst&0x8000) { goto endpolynotextnogou; } } uSrc = pix15; @@ -517,8 +528,8 @@ static void gpuPolySpanFn(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 cou if (CF_BLEND) uSrc = gpuBlending(uSrc, uDst); - if (CF_MASKSET) { *pDst = uSrc | 0x8000; } - else { *pDst = uSrc; } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else { *pDst = u16_to_le16(uSrc); } endpolynotextnogou: pDst++; @@ -527,8 +538,8 @@ static void gpuPolySpanFn(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 cou else { // UNTEXTURED, GOURAUD - u32 l_gCol = gpu_senquack.gCol; - u32 l_gInc = gpu_senquack.gInc; + u32 l_gCol = gpu_unai.gCol; + u32 l_gInc = gpu_unai.gInc; do { uint_fast16_t uDst, uSrc; @@ -536,7 +547,7 @@ static void gpuPolySpanFn(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 cou // See note in above loop regarding CF_BLITMASK //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolynotextgou; } - if (CF_BLEND || CF_MASKCHECK) uDst = *pDst; + if (CF_BLEND || CF_MASKCHECK) uDst = le16_to_u16(*pDst); if (CF_MASKCHECK) { if (uDst&0x8000) goto endpolynotextgou; } if (CF_DITHER) { @@ -555,8 +566,8 @@ static void gpuPolySpanFn(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 cou uSrc = gpuBlending(uSrc, uDst); } - if (CF_MASKSET) { *pDst = uSrc | 0x8000; } - else { *pDst = uSrc; } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else { *pDst = u16_to_le16(uSrc); } endpolynotextgou: pDst++; @@ -571,15 +582,15 @@ static void gpuPolySpanFn(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 cou uint_fast16_t uDst, uSrc, srcMSB; - //senquack - note: original UNAI code had gpu_senquack.{u4/v4} packed into + //senquack - note: original UNAI code had gpu_unai.{u4/v4} packed into // one 32-bit unsigned int, but this proved to lose too much accuracy // (pixel drouputs noticeable in NFS3 sky), so now are separate vars. - u32 l_u_msk = gpu_senquack.u_msk; u32 l_v_msk = gpu_senquack.v_msk; - u32 l_u = gpu_senquack.u & l_u_msk; u32 l_v = gpu_senquack.v & l_v_msk; - s32 l_u_inc = gpu_senquack.u_inc; s32 l_v_inc = gpu_senquack.v_inc; + u32 l_u_msk = gpu_unai.u_msk; u32 l_v_msk = gpu_unai.v_msk; + u32 l_u = gpu_unai.u & l_u_msk; u32 l_v = gpu_unai.v & l_v_msk; + s32 l_u_inc = gpu_unai.u_inc; s32 l_v_inc = gpu_unai.v_inc; - const u16* TBA_ = gpu_senquack.TBA; - const u16* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_senquack.CBA; + const le16_t* TBA_ = gpu_unai.TBA; + const le16_t* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA; u8 r5, g5, b5; u8 r8, g8, b8; @@ -588,17 +599,17 @@ static void gpuPolySpanFn(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 cou if (CF_LIGHT) { if (CF_GOURAUD) { - l_gInc = gpu_senquack.gInc; - l_gCol = gpu_senquack.gCol; + l_gInc = gpu_unai.gInc; + l_gCol = gpu_unai.gCol; } else { if (CF_DITHER) { - r8 = gpu_senquack.r8; - g8 = gpu_senquack.g8; - b8 = gpu_senquack.b8; + r8 = gpu_unai.r8; + g8 = gpu_unai.g8; + b8 = gpu_unai.b8; } else { - r5 = gpu_senquack.r5; - g5 = gpu_senquack.g5; - b5 = gpu_senquack.b5; + r5 = gpu_unai.r5; + g5 = gpu_unai.g5; + b5 = gpu_unai.b5; } } } @@ -606,7 +617,7 @@ static void gpuPolySpanFn(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 cou do { if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolytext; } - if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; } + if (CF_MASKCHECK || CF_BLEND) { uDst = le16_to_u16(*pDst); } if (CF_MASKCHECK) if (uDst&0x8000) { goto endpolytext; } //senquack - adapted to work with new 22.10 fixed point routines: @@ -615,15 +626,15 @@ static void gpuPolySpanFn(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 cou u32 tu=(l_u>>10); u32 tv=(l_v<<1)&(0xff<<11); u8 rgb=((u8*)TBA_)[tv+(tu>>1)]; - uSrc=CBA_[(rgb>>((tu&1)<<2))&0xf]; + uSrc=le16_to_u16(CBA_[(rgb>>((tu&1)<<2))&0xf]); if (!uSrc) goto endpolytext; } if (CF_TEXTMODE==2) { // 8bpp (CLUT) - uSrc = CBA_[(((u8*)TBA_)[(l_u>>10)+((l_v<<1)&(0xff<<11))])]; + uSrc = le16_to_u16(CBA_[(((u8*)TBA_)[(l_u>>10)+((l_v<<1)&(0xff<<11))])]); if (!uSrc) goto endpolytext; } if (CF_TEXTMODE==3) { // 16bpp - uSrc = TBA_[(l_u>>10)+((l_v)&(0xff<<10))]; + uSrc = le16_to_u16(TBA_[(l_u>>10)+((l_v)&(0xff<<10))]); if (!uSrc) goto endpolytext; } @@ -661,9 +672,9 @@ static void gpuPolySpanFn(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 cou uSrc = gpuBlending(uSrc, uDst); } - if (CF_MASKSET) { *pDst = uSrc | 0x8000; } - else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = uSrc | srcMSB; } - else { *pDst = uSrc; } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = u16_to_le16(uSrc | srcMSB); } + else { *pDst = u16_to_le16(uSrc); } endpolytext: pDst++; l_u = (l_u + l_u_inc) & l_u_msk; @@ -674,7 +685,7 @@ static void gpuPolySpanFn(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 cou } } -static void PolyNULL(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 count) +static void PolyNULL(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) { #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"PolyNULL()\n"); @@ -683,7 +694,7 @@ static void PolyNULL(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 count) /////////////////////////////////////////////////////////////////////////////// // Polygon innerloops driver -typedef void (*PP)(const gpu_senquack_t &gpu_senquack, u16 *pDst, u32 count); +typedef void (*PP)(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count); // Template instantiation helper macros #define TI(cf) gpuPolySpanFn<(cf)> diff --git a/plugins/gpu_unai/gpu_inner_light.h b/plugins/gpu_unai/gpu_inner_light.h index b5d89338a..f90e8ec15 100644 --- a/plugins/gpu_unai/gpu_inner_light.h +++ b/plugins/gpu_unai/gpu_inner_light.h @@ -65,7 +65,7 @@ static void SetupLightLUT() for (int i=0; i < 32; ++i) { int val = i * j / 16; if (val > 31) val = 31; - gpu_senquack.LightLUT[(j*32) + i] = val; + gpu_unai.LightLUT[(j*32) + i] = val; } } } @@ -170,9 +170,9 @@ GPU_INLINE u32 gpuLightingRGB24(u32 gCol) //////////////////////////////////////////////////////////////////////////////// GPU_INLINE uint_fast16_t gpuLightingTXTGeneric(uint_fast16_t uSrc, u8 r5, u8 g5, u8 b5) { - return (gpu_senquack.LightLUT[((uSrc&0x7C00)>>5) | b5] << 10) | - (gpu_senquack.LightLUT[ (uSrc&0x03E0) | g5] << 5) | - (gpu_senquack.LightLUT[((uSrc&0x001F)<<5) | r5] ); + return (gpu_unai.LightLUT[((uSrc&0x7C00)>>5) | b5] << 10) | + (gpu_unai.LightLUT[ (uSrc&0x03E0) | g5] << 5) | + (gpu_unai.LightLUT[((uSrc&0x001F)<<5) | r5] ); } @@ -192,9 +192,9 @@ GPU_INLINE uint_fast16_t gpuLightingTXTGeneric(uint_fast16_t uSrc, u8 r5, u8 g5, //////////////////////////////////////////////////////////////////////////////// GPU_INLINE uint_fast16_t gpuLightingTXTGouraudGeneric(uint_fast16_t uSrc, u32 gCol) { - return (gpu_senquack.LightLUT[((uSrc&0x7C00)>>5) | ((gCol>> 5)&0x1F)]<<10) | - (gpu_senquack.LightLUT[ (uSrc&0x03E0) | ((gCol>>16)&0x1F)]<< 5) | - (gpu_senquack.LightLUT[((uSrc&0x001F)<<5) | (gCol>>27) ] ); + return (gpu_unai.LightLUT[((uSrc&0x7C00)>>5) | ((gCol>> 5)&0x1F)]<<10) | + (gpu_unai.LightLUT[ (uSrc&0x03E0) | ((gCol>>16)&0x1F)]<< 5) | + (gpu_unai.LightLUT[((uSrc&0x001F)<<5) | (gCol>>27) ] ); } diff --git a/plugins/gpu_unai/gpu_inner_light_arm.h b/plugins/gpu_unai/gpu_inner_light_arm.h index 550f6b1e4..7bd589088 100644 --- a/plugins/gpu_unai/gpu_inner_light_arm.h +++ b/plugins/gpu_unai/gpu_inner_light_arm.h @@ -61,7 +61,7 @@ GPU_INLINE uint_fast16_t gpuLightingTXTARM(uint_fast16_t uSrc, u8 r5, u8 g5, u8 "orr %[out], %[out], %[db], lsl #0x0A \n\t" // out holds 0xmbbbbbgggggrrrrr : [out] "=&r" (out), [db] "=&r" (db), [dg] "=&r" (dg) : [r5] "r" (r5), [g5] "r" (g5), [b5] "r" (b5), - [lut] "r" (gpu_senquack.LightLUT), [src] "r" (uSrc), "0" (out) + [lut] "r" (gpu_unai.LightLUT), [src] "r" (uSrc), "0" (out) : "cc"); return out; } @@ -103,7 +103,7 @@ GPU_INLINE uint_fast16_t gpuLightingTXTGouraudARM(uint_fast16_t uSrc, u32 gCol) "orr %[out], %[out], %[db], lsl #0x0A \n\t" // out holds 0xmbbbbbgggggrrrrr : [out] "=&r" (out), [db] "=&r" (db), [dg] "=&r" (dg), [gtmp] "=&r" (gtmp) \ - : [gCol] "r" (gCol), [lut] "r" (gpu_senquack.LightLUT), "0" (out), [src] "r" (uSrc) + : [gCol] "r" (gCol), [lut] "r" (gpu_unai.LightLUT), "0" (out), [src] "r" (uSrc) : "cc"); return out; diff --git a/plugins/gpu_unai/gpu_inner_quantization.h b/plugins/gpu_unai/gpu_inner_quantization.h index 6432d0313..8a4e93548 100644 --- a/plugins/gpu_unai/gpu_inner_quantization.h +++ b/plugins/gpu_unai/gpu_inner_quantization.h @@ -63,7 +63,7 @@ static void SetupDitheringConstants() // Is 8x8 matrix overkill as a result, can we use 4x4? component &= ~1; - gpu_senquack.DitherMatrix[offset] = (component) + gpu_unai.DitherMatrix[offset] = (component) | (component << 10) | (component << 20); } @@ -85,15 +85,15 @@ static void SetupDitheringConstants() // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care //////////////////////////////////////////////////////////////////////////////// template -GPU_INLINE u16 gpuColorQuantization24(u32 uSrc24, const u16 *pDst) +GPU_INLINE u16 gpuColorQuantization24(u32 uSrc24, const le16_t *pDst) { if (DITHER) { - u16 fbpos = (u32)(pDst - gpu_senquack.vram); + uintptr_t fbpos = pDst - gpu_unai.vram; u16 offset = ((fbpos & (0x7 << 10)) >> 7) | (fbpos & 0x7); //clean overflow flags and add - uSrc24 = (uSrc24 & 0x1FF7FDFF) + gpu_senquack.DitherMatrix[offset]; + uSrc24 = (uSrc24 & 0x1FF7FDFF) + gpu_unai.DitherMatrix[offset]; if (uSrc24 & (1<< 9)) uSrc24 |= (0x1FF ); if (uSrc24 & (1<<19)) uSrc24 |= (0x1FF<<10); diff --git a/plugins/gpu_unai/gpu_raster_image.h b/plugins/gpu_unai/gpu_raster_image.h index 8e8064c46..909ca3901 100644 --- a/plugins/gpu_unai/gpu_raster_image.h +++ b/plugins/gpu_unai/gpu_raster_image.h @@ -26,25 +26,25 @@ void gpuLoadImage(PtrUnion packet) { u16 x0, y0, w0, h0; - x0 = packet.U2[2] & 1023; - y0 = packet.U2[3] & 511; - w0 = packet.U2[4]; - h0 = packet.U2[5]; + x0 = le16_to_u16(packet.U2[2]) & 1023; + y0 = le16_to_u16(packet.U2[3]) & 511; + w0 = le16_to_u16(packet.U2[4]); + h0 = le16_to_u16(packet.U2[5]); if ((y0 + h0) > FRAME_HEIGHT) { h0 = FRAME_HEIGHT - y0; } - gpu_senquack.dma.FrameToWrite = ((w0)&&(h0)); + gpu_unai.dma.FrameToWrite = ((w0)&&(h0)); - gpu_senquack.dma.px = 0; - gpu_senquack.dma.py = 0; - gpu_senquack.dma.x_end = w0; - gpu_senquack.dma.y_end = h0; - gpu_senquack.dma.pvram = &((u16*)gpu_senquack.vram)[x0+(y0*1024)]; + gpu_unai.dma.px = 0; + gpu_unai.dma.py = 0; + gpu_unai.dma.x_end = w0; + gpu_unai.dma.y_end = h0; + gpu_unai.dma.pvram = &gpu_unai.vram[x0+(y0*1024)]; - gpu_senquack.GPU_GP1 |= 0x08000000; + gpu_unai.GPU_GP1 |= 0x08000000; } #endif // !USE_GPULIB @@ -53,24 +53,24 @@ void gpuLoadImage(PtrUnion packet) void gpuStoreImage(PtrUnion packet) { u16 x0, y0, w0, h0; - x0 = packet.U2[2] & 1023; - y0 = packet.U2[3] & 511; - w0 = packet.U2[4]; - h0 = packet.U2[5]; + x0 = le16_to_u16(packet.U2[2]) & 1023; + y0 = le16_to_u16(packet.U2[3]) & 511; + w0 = le16_to_u16(packet.U2[4]); + h0 = le16_to_u16(packet.U2[5]); if ((y0 + h0) > FRAME_HEIGHT) { h0 = FRAME_HEIGHT - y0; } - gpu_senquack.dma.FrameToRead = ((w0)&&(h0)); + gpu_unai.dma.FrameToRead = ((w0)&&(h0)); - gpu_senquack.dma.px = 0; - gpu_senquack.dma.py = 0; - gpu_senquack.dma.x_end = w0; - gpu_senquack.dma.y_end = h0; - gpu_senquack.dma.pvram = &((u16*)gpu_senquack.vram)[x0+(y0*1024)]; + gpu_unai.dma.px = 0; + gpu_unai.dma.py = 0; + gpu_unai.dma.x_end = w0; + gpu_unai.dma.y_end = h0; + gpu_unai.dma.pvram = &gpu_unai.vram[x0+(y0*1024)]; - gpu_senquack.GPU_GP1 |= 0x08000000; + gpu_unai.GPU_GP1 |= 0x08000000; } #endif // !USE_GPULIB @@ -78,12 +78,12 @@ void gpuMoveImage(PtrUnion packet) { u32 x0, y0, x1, y1; s32 w0, h0; - x0 = packet.U2[2] & 1023; - y0 = packet.U2[3] & 511; - x1 = packet.U2[4] & 1023; - y1 = packet.U2[5] & 511; - w0 = packet.U2[6]; - h0 = packet.U2[7]; + x0 = le16_to_u16(packet.U2[2]) & 1023; + y0 = le16_to_u16(packet.U2[3]) & 511; + x1 = le16_to_u16(packet.U2[4]) & 1023; + y1 = le16_to_u16(packet.U2[5]) & 511; + w0 = le16_to_u16(packet.U2[6]); + h0 = le16_to_u16(packet.U2[7]); if( (x0==x1) && (y0==y1) ) return; if ((w0<=0) || (h0<=0)) return; @@ -94,7 +94,7 @@ void gpuMoveImage(PtrUnion packet) if (((y0+h0)>512)||((x0+w0)>1024)||((y1+h0)>512)||((x1+w0)>1024)) { - u16 *psxVuw=gpu_senquack.vram; + le16_t *psxVuw=gpu_unai.vram; s32 i,j; for(j=0;j>1); lpDst += ((FRAME_OFFSET(x1, y1))>>1); if (w0&1) @@ -127,7 +127,7 @@ void gpuMoveImage(PtrUnion packet) w0>>=1; if (!w0) { do { - *((u16*)lpDst) = *((u16*)lpSrc); + *((le16_t*)lpDst) = *((le16_t*)lpSrc); lpDst += x1; lpSrc += x1; } while (--h0); @@ -135,7 +135,7 @@ void gpuMoveImage(PtrUnion packet) do { x0=w0; do { *lpDst++ = *lpSrc++; } while (--x0); - *((u16*)lpDst) = *((u16*)lpSrc); + *((le16_t*)lpDst) = *((le16_t*)lpSrc); lpDst += x1; lpSrc += x1; } while (--h0); @@ -157,11 +157,11 @@ void gpuMoveImage(PtrUnion packet) void gpuClearImage(PtrUnion packet) { s32 x0, y0, w0, h0; - x0 = packet.S2[2]; - y0 = packet.S2[3]; - w0 = packet.S2[4] & 0x3ff; - h0 = packet.S2[5] & 0x3ff; - + x0 = le16_to_s16(packet.U2[2]); + y0 = le16_to_s16(packet.U2[3]); + w0 = le16_to_s16(packet.U2[4]) & 0x3ff; + h0 = le16_to_s16(packet.U2[5]) & 0x3ff; + w0 += x0; if (x0 < 0) x0 = 0; if (w0 > FRAME_WIDTH) w0 = FRAME_WIDTH; @@ -176,11 +176,11 @@ void gpuClearImage(PtrUnion packet) #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"gpuClearImage(x0=%d,y0=%d,w0=%d,h0=%d)\n",x0,y0,w0,h0); #endif - + if (x0&1) { - u16* pixel = (u16*)gpu_senquack.vram + FRAME_OFFSET(x0, y0); - u16 rgb = GPU_RGB16(packet.U4[0]); + le16_t* pixel = gpu_unai.vram + FRAME_OFFSET(x0, y0); + le16_t rgb = u16_to_le16(GPU_RGB16(le32_to_u32(packet.U4[0]))); y0 = FRAME_WIDTH - w0; do { x0=w0; @@ -190,9 +190,9 @@ void gpuClearImage(PtrUnion packet) } else { - u32* pixel = (u32*)gpu_senquack.vram + ((FRAME_OFFSET(x0, y0))>>1); - u32 rgb = GPU_RGB16(packet.U4[0]); - rgb |= (rgb<<16); + le32_t* pixel = (le32_t*)gpu_unai.vram + ((FRAME_OFFSET(x0, y0))>>1); + u32 _rgb = GPU_RGB16(le32_to_u32(packet.U4[0])); + le32_t rgb = u32_to_le32(_rgb | (_rgb << 16)); if (w0&1) { y0 = (FRAME_WIDTH - w0 +1)>>1; @@ -200,7 +200,7 @@ void gpuClearImage(PtrUnion packet) do { x0=w0; do { *pixel++ = rgb; } while (--x0); - *((u16*)pixel) = (u16)rgb; + *((u16*)pixel) = (u16)le32_raw(rgb); pixel += y0; } while (--h0); } diff --git a/plugins/gpu_unai/gpu_raster_line.h b/plugins/gpu_unai/gpu_raster_line.h index 4dd99a6dd..a338f974e 100644 --- a/plugins/gpu_unai/gpu_raster_line.h +++ b/plugins/gpu_unai/gpu_raster_line.h @@ -69,15 +69,15 @@ void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver) // Clip region: xmax/ymax seem to normally be one *past* the rightmost/ // bottommost pixels of the draw area. Since we render every pixel between // and including both line endpoints, subtract one from xmax/ymax. - const int xmin = gpu_senquack.DrawingArea[0]; - const int ymin = gpu_senquack.DrawingArea[1]; - const int xmax = gpu_senquack.DrawingArea[2] - 1; - const int ymax = gpu_senquack.DrawingArea[3] - 1; + const int xmin = gpu_unai.DrawingArea[0]; + const int ymin = gpu_unai.DrawingArea[1]; + const int xmax = gpu_unai.DrawingArea[2] - 1; + const int ymax = gpu_unai.DrawingArea[3] - 1; - x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_senquack.DrawingOffset[0]; - y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_senquack.DrawingOffset[1]; - x1 = GPU_EXPANDSIGN(packet.S2[4]) + gpu_senquack.DrawingOffset[0]; - y1 = GPU_EXPANDSIGN(packet.S2[5]) + gpu_senquack.DrawingOffset[1]; + x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2])) + gpu_unai.DrawingOffset[0]; + y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3])) + gpu_unai.DrawingOffset[1]; + x1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[4])) + gpu_unai.DrawingOffset[0]; + y1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[5])) + gpu_unai.DrawingOffset[1]; // Always draw top to bottom, so ensure y0 <= y1 if (y0 > y1) { @@ -177,12 +177,9 @@ void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver) err_adjdown; // Subract this from err_term after drawing longer run // Color to draw with (16 bits, highest of which is unset mask bit) - uintptr_t col16 = GPU_RGB16(packet.U4[0]); + uintptr_t col16 = GPU_RGB16(le32_to_u32(packet.U4[0])); - // We use u8 pointers even though PS1 has u16 framebuffer. - // This allows pixel-drawing functions to increment dst pointer - // directly by the passed 'incr' value, not having to shift it first. - u8 *dst = (u8*)gpu_senquack.vram + y0 * dst_stride + x0 * dst_depth; + le16_t *dst = gpu_unai.vram + (y0 * dst_stride + x0 * dst_depth) / FRAME_BYTES_PER_PIXEL; // SPECIAL CASE: Vertical line if (dx == 0) { @@ -278,7 +275,7 @@ void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver) // First run of pixels dst = gpuPixelSpanDriver(dst, col16, incr_major, start_length); - dst += incr_minor; + dst += incr_minor / 2; // Middle runs of pixels while (--minor > 0) { @@ -292,7 +289,7 @@ void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver) } dst = gpuPixelSpanDriver(dst, col16, incr_major, run_length); - dst += incr_minor; + dst += incr_minor / 2; } // Final run of pixels @@ -316,18 +313,18 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) // Clip region: xmax/ymax seem to normally be one *past* the rightmost/ // bottommost pixels of the draw area. We'll render every pixel between // and including both line endpoints, so subtract one from xmax/ymax. - const int xmin = gpu_senquack.DrawingArea[0]; - const int ymin = gpu_senquack.DrawingArea[1]; - const int xmax = gpu_senquack.DrawingArea[2] - 1; - const int ymax = gpu_senquack.DrawingArea[3] - 1; + const int xmin = gpu_unai.DrawingArea[0]; + const int ymin = gpu_unai.DrawingArea[1]; + const int xmax = gpu_unai.DrawingArea[2] - 1; + const int ymax = gpu_unai.DrawingArea[3] - 1; - x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_senquack.DrawingOffset[0]; - y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_senquack.DrawingOffset[1]; - x1 = GPU_EXPANDSIGN(packet.S2[6]) + gpu_senquack.DrawingOffset[0]; - y1 = GPU_EXPANDSIGN(packet.S2[7]) + gpu_senquack.DrawingOffset[1]; + x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2])) + gpu_unai.DrawingOffset[0]; + y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3])) + gpu_unai.DrawingOffset[1]; + x1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[6])) + gpu_unai.DrawingOffset[0]; + y1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[7])) + gpu_unai.DrawingOffset[1]; - u32 col0 = packet.U4[0]; - u32 col1 = packet.U4[2]; + u32 col0 = le32_to_u32(packet.U4[0]); + u32 col1 = le32_to_u32(packet.U4[2]); // Always draw top to bottom, so ensure y0 <= y1 if (y0 > y1) { @@ -519,10 +516,7 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) gcol.g = g0 << GPU_GOURAUD_FIXED_BITS; gcol.b = b0 << GPU_GOURAUD_FIXED_BITS; - // We use u8 pointers even though PS1 has u16 framebuffer. - // This allows pixel-drawing functions to increment dst pointer - // directly by the passed 'incr' value, not having to shift it first. - u8 *dst = (u8*)gpu_senquack.vram + y0 * dst_stride + x0 * dst_depth; + le16_t *dst = gpu_unai.vram + (y0 * dst_stride + x0 * dst_depth) / FRAME_BYTES_PER_PIXEL; // SPECIAL CASE: Vertical line if (dx == 0) { @@ -547,7 +541,7 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) if (db) gcol.b_incr /= dy; } #endif - + gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride, dy+1); return; } @@ -696,7 +690,7 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) // First run of pixels dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, start_length); - dst += incr_minor; + dst += incr_minor / 2; // Middle runs of pixels while (--minor > 0) { @@ -710,7 +704,7 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) } dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, run_length); - dst += incr_minor; + dst += incr_minor / 2; } // Final run of pixels diff --git a/plugins/gpu_unai/gpu_raster_polygon.h b/plugins/gpu_unai/gpu_raster_polygon.h index 8638ac420..ff6dc00d7 100644 --- a/plugins/gpu_unai/gpu_raster_polygon.h +++ b/plugins/gpu_unai/gpu_raster_polygon.h @@ -31,11 +31,19 @@ struct PolyVertex { s32 x, y; // Sign-extended 11-bit X,Y coords union { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + struct { u8 pad[2], v, u; } tex; // Texture coords (if used) +#else struct { u8 u, v, pad[2]; } tex; // Texture coords (if used) +#endif u32 tex_word; }; union { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + struct { u8 pad, b, g, r; } col; // 24-bit RGB color (if used) +#else struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used) +#endif u32 col_word; }; }; @@ -68,30 +76,30 @@ static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyTy vert_stride++; int num_verts = (is_quad) ? 4 : 3; - u32 *ptr; + le32_t *ptr; // X,Y coords, adjusted by draw offsets - s32 x_off = gpu_senquack.DrawingOffset[0]; - s32 y_off = gpu_senquack.DrawingOffset[1]; + s32 x_off = gpu_unai.DrawingOffset[0]; + s32 y_off = gpu_unai.DrawingOffset[1]; ptr = &packet.U4[1]; for (int i=0; i < num_verts; ++i, ptr += vert_stride) { - s16* coord_ptr = (s16*)ptr; - vbuf[i].x = GPU_EXPANDSIGN(coord_ptr[0]) + x_off; - vbuf[i].y = GPU_EXPANDSIGN(coord_ptr[1]) + y_off; + u32 coords = le32_to_u32(*ptr); + vbuf[i].x = GPU_EXPANDSIGN((s16)coords) + x_off; + vbuf[i].y = GPU_EXPANDSIGN((s16)(coords >> 16)) + y_off; } // U,V texture coords (if applicable) if (texturing) { ptr = &packet.U4[2]; for (int i=0; i < num_verts; ++i, ptr += vert_stride) - vbuf[i].tex_word = *ptr; + vbuf[i].tex_word = le32_to_u32(*ptr); } // Colors (if applicable) if (gouraud) { ptr = &packet.U4[0]; for (int i=0; i < num_verts; ++i, ptr += vert_stride) - vbuf[i].col_word = *ptr; + vbuf[i].col_word = le32_to_u32(*ptr); } } @@ -189,8 +197,8 @@ static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVerte // Determine if triangle is completely outside clipping range int xmin, xmax, ymin, ymax; - xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; - ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; + ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; int clipped_lowest_x = Max2(xmin,lowest_x); int clipped_lowest_y = Max2(ymin,lowest_y); int clipped_highest_x = Min2(xmax,highest_x); @@ -218,7 +226,7 @@ gpuDrawPolyF - Flat-shaded, untextured poly void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) { // Set up bgr555 color to be used across calls in inner driver - gpu_senquack.PixelData = GPU_RGB16(packet.U4[0]); + gpu_unai.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0])); PolyVertex vbuf[4]; polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad); @@ -327,8 +335,8 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad } s32 xmin, xmax, ymin, ymax; - xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; - ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; + ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; if ((ymin - ya) > 0) { x3 += (dx3 * (ymin - ya)); @@ -342,10 +350,10 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad if (loop1 <= 0) continue; - u16* PixelBase = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(0, ya)]; - int li=gpu_senquack.ilace_mask; - int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0); - int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1); + le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)]; + int li=gpu_unai.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH, x3 += dx3, x4 += dx4 ) @@ -357,7 +365,7 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad if ((xmin - xa) > 0) xa = xmin; if (xb > xmax) xb = xmax; if ((xb - xa) > 0) - gpuPolySpanDriver(gpu_senquack, PixelBase + xa, (xb - xa)); + gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); } } } while (++cur_pass < total_passes); @@ -369,13 +377,13 @@ gpuDrawPolyFT - Flat-shaded, textured poly void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) { // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light) - gpu_senquack.r8 = packet.U1[0]; - gpu_senquack.g8 = packet.U1[1]; - gpu_senquack.b8 = packet.U1[2]; + gpu_unai.r8 = packet.U1[0]; + gpu_unai.g8 = packet.U1[1]; + gpu_unai.b8 = packet.U1[2]; // r5/g5/b5 used if just texture-blending is applied (15-bit light) - gpu_senquack.r5 = packet.U1[0] >> 3; - gpu_senquack.g5 = packet.U1[1] >> 3; - gpu_senquack.b5 = packet.U1[2] >> 3; + gpu_unai.r5 = packet.U1[0] >> 3; + gpu_unai.g5 = packet.U1[1] >> 3; + gpu_unai.b5 = packet.U1[2] >> 3; PolyVertex vbuf[4]; polyInitVertexBuffer(vbuf, packet, POLYTYPE_FT, is_quad); @@ -452,8 +460,8 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua #endif #endif // Set u,v increments for inner driver - gpu_senquack.u_inc = du4; - gpu_senquack.v_inc = dv4; + gpu_unai.u_inc = du4; + gpu_unai.v_inc = dv4; //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here? // (SAME ISSUE ELSEWHERE) @@ -635,8 +643,8 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua } s32 xmin, xmax, ymin, ymax; - xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; - ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; + ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; if ((ymin - ya) > 0) { x3 += dx3 * (ymin - ya); @@ -652,10 +660,10 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua if (loop1 <= 0) continue; - u16* PixelBase = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(0, ya)]; - int li=gpu_senquack.ilace_mask; - int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0); - int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1); + le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)]; + int li=gpu_unai.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, x3 += dx3, x4 += dx4, @@ -685,12 +693,12 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua } // Set u,v coords for inner driver - gpu_senquack.u = u4; - gpu_senquack.v = v4; + gpu_unai.u = u4; + gpu_unai.v = v4; if (xb > xmax) xb = xmax; if ((xb - xa) > 0) - gpuPolySpanDriver(gpu_senquack, PixelBase + xa, (xb - xa)); + gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); } } } while (++cur_pass < total_passes); @@ -782,7 +790,7 @@ void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad #endif #endif // Setup packed Gouraud increment for inner driver - gpu_senquack.gInc = gpuPackGouraudColInc(dr4, dg4, db4); + gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4); for (s32 loop0 = 2; loop0; loop0--) { if (loop0 == 2) { @@ -979,8 +987,8 @@ void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad } s32 xmin, xmax, ymin, ymax; - xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; - ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; + ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; if ((ymin - ya) > 0) { x3 += (dx3 * (ymin - ya)); @@ -997,10 +1005,10 @@ void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad if (loop1 <= 0) continue; - u16* PixelBase = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(0, ya)]; - int li=gpu_senquack.ilace_mask; - int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0); - int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1); + le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)]; + int li=gpu_unai.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, x3 += dx3, x4 += dx4, @@ -1034,11 +1042,11 @@ void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad } // Setup packed Gouraud color for inner driver - gpu_senquack.gCol = gpuPackGouraudCol(r4, g4, b4); + gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4); if (xb > xmax) xb = xmax; if ((xb - xa) > 0) - gpuPolySpanDriver(gpu_senquack, PixelBase + xa, (xb - xa)); + gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); } } } while (++cur_pass < total_passes); @@ -1148,9 +1156,9 @@ void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua #endif #endif // Set u,v increments and packed Gouraud increment for inner driver - gpu_senquack.u_inc = du4; - gpu_senquack.v_inc = dv4; - gpu_senquack.gInc = gpuPackGouraudColInc(dr4, dg4, db4); + gpu_unai.u_inc = du4; + gpu_unai.v_inc = dv4; + gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4); for (s32 loop0 = 2; loop0; loop0--) { if (loop0 == 2) { @@ -1372,8 +1380,8 @@ void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua } s32 xmin, xmax, ymin, ymax; - xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; - ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; + ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; if ((ymin - ya) > 0) { x3 += (dx3 * (ymin - ya)); @@ -1392,10 +1400,10 @@ void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua if (loop1 <= 0) continue; - u16* PixelBase = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(0, ya)]; - int li=gpu_senquack.ilace_mask; - int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0); - int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1); + le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)]; + int li=gpu_unai.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, x3 += dx3, x4 += dx4, @@ -1438,13 +1446,13 @@ void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua } // Set packed Gouraud color and u,v coords for inner driver - gpu_senquack.u = u4; - gpu_senquack.v = v4; - gpu_senquack.gCol = gpuPackGouraudCol(r4, g4, b4); + gpu_unai.u = u4; + gpu_unai.v = v4; + gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4); if (xb > xmax) xb = xmax; if ((xb - xa) > 0) - gpuPolySpanDriver(gpu_senquack, PixelBase + xa, (xb - xa)); + gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); } } } while (++cur_pass < total_passes); diff --git a/plugins/gpu_unai/gpu_raster_sprite.h b/plugins/gpu_unai/gpu_raster_sprite.h index ddbad67b2..ea4e82f2b 100644 --- a/plugins/gpu_unai/gpu_raster_sprite.h +++ b/plugins/gpu_unai/gpu_raster_sprite.h @@ -32,17 +32,17 @@ void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver) //NOTE: Must 11-bit sign-extend the whole sum here, not just packet X/Y, // or sprites in 1st level of SkullMonkeys disappear when walking right. // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon: - x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_senquack.DrawingOffset[0]); - y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_senquack.DrawingOffset[1]); + x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2]) + gpu_unai.DrawingOffset[0]); + y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3]) + gpu_unai.DrawingOffset[1]); - u32 w = packet.U2[6] & 0x3ff; // Max width is 1023 - u32 h = packet.U2[7] & 0x1ff; // Max height is 511 + u32 w = le16_to_u16(packet.U2[6]) & 0x3ff; // Max width is 1023 + u32 h = le16_to_u16(packet.U2[7]) & 0x1ff; // Max height is 511 x1 = x0 + w; y1 = y0 + h; s32 xmin, xmax, ymin, ymax; - xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; - ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; + ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; u0 = packet.U1[8]; v0 = packet.U1[9]; @@ -59,17 +59,17 @@ void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver) x1 -= x0; if (x1 <= 0) return; - gpu_senquack.r5 = packet.U1[0] >> 3; - gpu_senquack.g5 = packet.U1[1] >> 3; - gpu_senquack.b5 = packet.U1[2] >> 3; + gpu_unai.r5 = packet.U1[0] >> 3; + gpu_unai.g5 = packet.U1[1] >> 3; + gpu_unai.b5 = packet.U1[2] >> 3; - u16 *Pixel = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(x0, y0)]; - const int li=gpu_senquack.ilace_mask; - const int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0); - const int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1); - unsigned int tmode = gpu_senquack.TEXT_MODE >> 5; - const u32 v0_mask = gpu_senquack.TextureWindow[3]; - u8* pTxt_base = (u8*)gpu_senquack.TBA; + le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)]; + const int li=gpu_unai.ilace_mask; + const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); + const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); + unsigned int tmode = gpu_unai.TEXT_MODE >> 5; + const u32 v0_mask = gpu_unai.TextureWindow[3]; + u8* pTxt_base = (u8*)gpu_unai.TBA; // Texture is accessed byte-wise, so adjust idx if 16bpp if (tmode == 3) u0 <<= 1; @@ -98,18 +98,18 @@ void gpuDrawS16(PtrUnion packet) //NOTE: Must 11-bit sign-extend the whole sum here, not just packet X/Y, // or sprites in 1st level of SkullMonkeys disappear when walking right. // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon: - x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_senquack.DrawingOffset[0]); - y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_senquack.DrawingOffset[1]); + x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2]) + gpu_unai.DrawingOffset[0]); + y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3]) + gpu_unai.DrawingOffset[1]); - xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; - ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; + ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; u0 = packet.U1[8]; v0 = packet.U1[9]; if (x0 > xmax - 16 || x0 < xmin || - ((u0 | v0) & 15) || !(gpu_senquack.TextureWindow[2] & gpu_senquack.TextureWindow[3] & 8)) { + ((u0 | v0) & 15) || !(gpu_unai.TextureWindow[2] & gpu_unai.TextureWindow[3] & 8)) { // send corner cases to general handler - packet.U4[3] = 0x00100010; + packet.U4[3] = u32_to_le32(0x00100010); gpuDrawS(packet, gpuSpriteSpanFn<0x20>); return; } @@ -124,7 +124,7 @@ void gpuDrawS16(PtrUnion packet) else if (ymax - y0 < 16) h = ymax - y0; - draw_spr16_full(&gpu_senquack.vram[FRAME_OFFSET(x0, y0)], &gpu_senquack.TBA[FRAME_OFFSET(u0/4, v0)], gpu_senquack.CBA, h); + draw_spr16_full(&gpu_unai.vram[FRAME_OFFSET(x0, y0)], &gpu_unai.TBA[FRAME_OFFSET(u0/4, v0)], gpu_unai.CBA, h); } #endif // __arm__ @@ -133,17 +133,17 @@ void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver) s32 x0, x1, y0, y1; // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon: - x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_senquack.DrawingOffset[0]); - y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_senquack.DrawingOffset[1]); + x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2]) + gpu_unai.DrawingOffset[0]); + y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3]) + gpu_unai.DrawingOffset[1]); - u32 w = packet.U2[4] & 0x3ff; // Max width is 1023 - u32 h = packet.U2[5] & 0x1ff; // Max height is 511 + u32 w = le16_to_u16(packet.U2[4]) & 0x3ff; // Max width is 1023 + u32 h = le16_to_u16(packet.U2[5]) & 0x1ff; // Max height is 511 x1 = x0 + w; y1 = y0 + h; s32 xmin, xmax, ymin, ymax; - xmin = gpu_senquack.DrawingArea[0]; xmax = gpu_senquack.DrawingArea[2]; - ymin = gpu_senquack.DrawingArea[1]; ymax = gpu_senquack.DrawingArea[3]; + xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; + ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; if (y0 < ymin) y0 = ymin; if (y1 > ymax) y1 = ymax; @@ -154,11 +154,11 @@ void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver) x1 -= x0; if (x1 <= 0) return; - const u16 Data = GPU_RGB16(packet.U4[0]); - u16 *Pixel = &((u16*)gpu_senquack.vram)[FRAME_OFFSET(x0, y0)]; - const int li=gpu_senquack.ilace_mask; - const int pi=(ProgressiveInterlaceEnabled()?(gpu_senquack.ilace_mask+1):0); - const int pif=(ProgressiveInterlaceEnabled()?(gpu_senquack.prog_ilace_flag?(gpu_senquack.ilace_mask+1):0):1); + const u16 Data = GPU_RGB16(le32_to_u32(packet.U4[0])); + le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)]; + const int li=gpu_unai.ilace_mask; + const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); + const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); for (; y0 Anything here should be for gpu_senquack's private use. <- +// Header shared between both standalone gpu_unai (gpu.cpp) and new +// gpulib-compatible gpu_unai (gpulib_if.cpp) +// -> Anything here should be for gpu_unai's private use. <- /////////////////////////////////////////////////////////////////////////////// // Compile Options @@ -54,25 +54,67 @@ #define s32 int32_t #define s64 int64_t +typedef struct { + u32 v; +} le32_t; + +typedef struct { + u16 v; +} le16_t; + +static inline u32 le32_to_u32(le32_t le) +{ + return LE32TOH(le.v); +} + +static inline s32 le32_to_s32(le32_t le) +{ + return (int32_t) LE32TOH(le.v); +} + +static inline u32 le32_raw(le32_t le) +{ + return le.v; +} + +static inline le32_t u32_to_le32(u32 u) +{ + return (le32_t){ .v = HTOLE32(u) }; +} + +static inline u16 le16_to_u16(le16_t le) +{ + return LE16TOH(le.v); +} + +static inline s16 le16_to_s16(le16_t le) +{ + return (int16_t) LE16TOH(le.v); +} + +static inline u16 le16_raw(le16_t le) +{ + return le.v; +} + +static inline le16_t u16_to_le16(u16 u) +{ + return (le16_t){ .v = HTOLE16(u) }; +} + union PtrUnion { - u32 *U4; - s32 *S4; - u16 *U2; - s16 *S2; + le32_t *U4; + le16_t *U2; u8 *U1; - s8 *S1; void *ptr; }; union GPUPacket { - u32 U4[16]; - s32 S4[16]; - u16 U2[32]; - s16 S2[32]; + le32_t U4[16]; + le16_t U2[32]; u8 U1[64]; - s8 S1[64]; }; template static inline void SwapValues(T &x, T &y) @@ -133,11 +175,16 @@ static inline s32 GPU_DIV(s32 rs, s32 rt) // 'Unsafe' version of above that doesn't check for div-by-zero #define GPU_FAST_DIV(rs, rt) ((signed)(rs) / (signed)(rt)) -struct gpu_senquack_t { +struct gpu_unai_t { u32 GPU_GP1; GPUPacket PacketBuffer; - u16 *vram; + le16_t *vram; +#ifdef USE_GPULIB + le16_t *downscale_vram; +#endif + //////////////////////////////////////////////////////////////////////////// + // Variables used only by older standalone version of gpu_unai (gpu.cpp) #ifndef USE_GPULIB u32 GPU_GP0; u32 tex_window; // Current texture window vals (set by GP0(E2h) cmd) @@ -146,7 +193,7 @@ struct gpu_senquack_t { bool fb_dirty; // Framebuffer is dirty (according to GPU) // Display status - // NOTE: Standalone older gpu_senquack didn't care about horiz display range + // NOTE: Standalone older gpu_unai didn't care about horiz display range u16 DisplayArea[6]; // [0] : Start of display area (in VRAM) X // [1] : Start of display area (in VRAM) Y // [2] : Display mode resolution HORIZONTAL @@ -159,7 +206,7 @@ struct gpu_senquack_t { struct { s32 px,py; s32 x_end,y_end; - u16* pvram; + le16_t* pvram; u32 *last_dma; // Last dma pointer bool FrameToRead; // Load image in progress bool FrameToWrite; // Store image in progress @@ -175,7 +222,7 @@ struct gpu_senquack_t { bool skipGPU; // Skip GPU primitives } frameskip; #endif - // END of standalone gpu_senquack variables + // END of standalone gpu_unai variables //////////////////////////////////////////////////////////////////////////// u32 TextureWindowCur; // Current setting from last GP0(0xE2) cmd (raw form) @@ -192,8 +239,8 @@ struct gpu_senquack_t { s16 DrawingOffset[2]; // [0] : Drawing offset X (signed) // [1] : Drawing offset Y (signed) - u16* TBA; // Ptr to current texture in VRAM - u16* CBA; // Ptr to current CLUT in VRAM + le16_t* TBA; // Ptr to current texture in VRAM + le16_t* CBA; // Ptr to current CLUT in VRAM //////////////////////////////////////////////////////////////////////////// // Inner Loop parameters @@ -244,39 +291,39 @@ struct gpu_senquack_t { u16 PixelMSB; - gpu_senquack_config_t config; + gpu_unai_config_t config; u8 LightLUT[32*32]; // 5-bit lighting LUT (gpu_inner_light.h) u32 DitherMatrix[64]; // Matrix of dither coefficients }; -static gpu_senquack_t gpu_senquack; +static gpu_unai_t gpu_unai; // Global config that frontend can alter.. Values are read in GPU_init(). // TODO: if frontend menu modifies a setting, add a function that can notify // GPU plugin to use new setting. -gpu_senquack_config_t gpu_senquack_config_ext; +gpu_unai_config_t gpu_unai_config_ext; /////////////////////////////////////////////////////////////////////////////// // Internal inline funcs to get option status: (Allows flexibility) static inline bool LightingEnabled() { - return gpu_senquack.config.lighting; + return gpu_unai.config.lighting; } static inline bool FastLightingEnabled() { - return gpu_senquack.config.fast_lighting; + return gpu_unai.config.fast_lighting; } static inline bool BlendingEnabled() { - return gpu_senquack.config.blending; + return gpu_unai.config.blending; } static inline bool DitheringEnabled() { - return gpu_senquack.config.dithering; + return gpu_unai.config.dithering; } // For now, this is just for development/experimentation purposes.. @@ -295,7 +342,7 @@ static inline bool ProgressiveInterlaceEnabled() // for now when using new gpulib, since it also adds more work in loops. return false; #else - return gpu_senquack.config.prog_ilace; + return gpu_unai.config.prog_ilace; #endif } @@ -305,7 +352,7 @@ static inline bool ProgressiveInterlaceEnabled() // running on higher-res device or a resampling downscaler is enabled. static inline bool PixelSkipEnabled() { - return gpu_senquack.config.pixel_skip || gpu_senquack.config.scale_hires; + return gpu_unai.config.pixel_skip || gpu_unai.config.scale_hires; } static inline bool LineSkipEnabled() diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 7b148d4f8..20794316b 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -26,8 +26,24 @@ #include #include "../gpulib/gpu.h" +#ifdef THREAD_RENDERING +#include "../gpulib/gpulib_thread_if.h" +#define do_cmd_list real_do_cmd_list +#define renderer_init real_renderer_init +#define renderer_finish real_renderer_finish +#define renderer_sync_ecmds real_renderer_sync_ecmds +#define renderer_update_caches real_renderer_update_caches +#define renderer_flush_queues real_renderer_flush_queues +#define renderer_set_interlace real_renderer_set_interlace +#define renderer_set_config real_renderer_set_config +#define renderer_notify_res_change real_renderer_notify_res_change +#define renderer_notify_update_lace real_renderer_notify_update_lace +#define renderer_sync real_renderer_sync +#define ex_regs scratch_ex_regs +#endif + //#include "port.h" -#include "gpu_senquack.h" +#include "gpu_unai.h" // GPU fixed point math #include "gpu_fixedpoint.h" @@ -52,31 +68,183 @@ ///////////////////////////////////////////////////////////////////////////// +#define DOWNSCALE_VRAM_SIZE (1024 * 512 * 2 * 2 + 4096) + +INLINE void scale_640_to_320(le16_t *dest, const le16_t *src, bool isRGB24) { + size_t uCount = 320; + + if(isRGB24) { + const uint8_t* src8 = (const uint8_t *)src; + uint8_t* dst8 = (uint8_t *)dest; + + do { + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8; + src8 += 4; + } while(--uCount); + } else { + const le16_t* src16 = src; + le16_t* dst16 = dest; + + do { + *dst16++ = *src16; + src16 += 2; + } while(--uCount); + } +} + +INLINE void scale_512_to_320(le16_t *dest, const le16_t *src, bool isRGB24) { + size_t uCount = 64; + + if(isRGB24) { + const uint8_t* src8 = (const uint8_t *)src; + uint8_t* dst8 = (uint8_t *)dest; + + do { + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8; + src8 += 4; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8; + src8 += 4; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8; + src8 += 4; + } while(--uCount); + } else { + const le16_t* src16 = src; + le16_t* dst16 = dest; + + do { + *dst16++ = *src16++; + *dst16++ = *src16; + src16 += 2; + *dst16++ = *src16++; + *dst16++ = *src16; + src16 += 2; + *dst16++ = *src16; + src16 += 2; + } while(--uCount); + } +} + +static uint16_t *get_downscale_buffer(int *x, int *y, int *w, int *h, int *vram_h) +{ + le16_t *dest = gpu_unai.downscale_vram; + const le16_t *src = gpu_unai.vram; + bool isRGB24 = (gpu_unai.GPU_GP1 & 0x00200000 ? true : false); + int stride = 1024, dstride = 1024, lines = *h, orig_w = *w; + + // PS1 fb read wraps around (fixes black screen in 'Tobal no. 1') + unsigned int fb_mask = 1024 * 512 - 1; + + if (*h > 240) { + *h /= 2; + stride *= 2; + lines = *h; + + // Ensure start at a non-skipped line + while (*y & gpu_unai.ilace_mask) ++*y; + } + + unsigned int fb_offset_src = (*y * dstride + *x) & fb_mask; + unsigned int fb_offset_dest = fb_offset_src; + + if (*w == 512 || *w == 640) { + *w = 320; + } + + switch(orig_w) { + case 640: + do { + scale_640_to_320(dest + fb_offset_dest, src + fb_offset_src, isRGB24); + fb_offset_src = (fb_offset_src + stride) & fb_mask; + fb_offset_dest = (fb_offset_dest + dstride) & fb_mask; + } while(--lines); + + break; + case 512: + do { + scale_512_to_320(dest + fb_offset_dest, src + fb_offset_src, isRGB24); + fb_offset_src = (fb_offset_src + stride) & fb_mask; + fb_offset_dest = (fb_offset_dest + dstride) & fb_mask; + } while(--lines); + break; + default: + size_t size = isRGB24 ? *w * 3 : *w * 2; + + do { + memcpy(dest + fb_offset_dest, src + fb_offset_src, size); + fb_offset_src = (fb_offset_src + stride) & fb_mask; + fb_offset_dest = (fb_offset_dest + dstride) & fb_mask; + } while(--lines); + break; + } + + return (uint16_t *)gpu_unai.downscale_vram; +} + +static void map_downscale_buffer(void) +{ + if (gpu_unai.downscale_vram) + return; + + gpu_unai.downscale_vram = (le16_t*)gpu.mmap(DOWNSCALE_VRAM_SIZE); + + if (gpu_unai.downscale_vram == NULL) { + fprintf(stderr, "failed to map downscale buffer\n"); + gpu.get_downscale_buffer = NULL; + } + else { + gpu.get_downscale_buffer = get_downscale_buffer; + } +} + +static void unmap_downscale_buffer(void) +{ + if (gpu_unai.downscale_vram == NULL) + return; + + gpu.munmap(gpu_unai.downscale_vram, DOWNSCALE_VRAM_SIZE); + gpu_unai.downscale_vram = NULL; + gpu.get_downscale_buffer = NULL; +} + int renderer_init(void) { - memset((void*)&gpu_senquack, 0, sizeof(gpu_senquack)); - gpu_senquack.vram = (u16*)gpu.vram; + memset((void*)&gpu_unai, 0, sizeof(gpu_unai)); + gpu_unai.vram = (le16_t *)gpu.vram; - // Original standalone gpu_senquack initialized TextureWindow[]. I added the + // Original standalone gpu_unai initialized TextureWindow[]. I added the // same behavior here, since it seems unsafe to leave [2],[3] unset when // using HLE and Rearmed gpu_neon sets this similarly on init. -senquack - gpu_senquack.TextureWindow[0] = 0; - gpu_senquack.TextureWindow[1] = 0; - gpu_senquack.TextureWindow[2] = 255; - gpu_senquack.TextureWindow[3] = 255; + gpu_unai.TextureWindow[0] = 0; + gpu_unai.TextureWindow[1] = 0; + gpu_unai.TextureWindow[2] = 255; + gpu_unai.TextureWindow[3] = 255; //senquack - new vars must be updated whenever texture window is changed: // (used for polygon-drawing in gpu_inner.h, gpu_raster_polygon.h) const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 - gpu_senquack.u_msk = (((u32)gpu_senquack.TextureWindow[2]) << fb) | ((1 << fb) - 1); - gpu_senquack.v_msk = (((u32)gpu_senquack.TextureWindow[3]) << fb) | ((1 << fb) - 1); + gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1); // Configuration options - gpu_senquack.config = gpu_senquack_config_ext; + gpu_unai.config = gpu_unai_config_ext; //senquack - disabled, not sure this is needed and would require modifying // sprite-span functions, perhaps unnecessarily. No Abe Oddysey hack was // present in latest PCSX4ALL sources we were using. - //gpu_senquack.config.enableAbbeyHack = gpu_senquack_config_ext.abe_hack; - gpu_senquack.ilace_mask = gpu_senquack.config.ilace_force; + //gpu_unai.config.enableAbbeyHack = gpu_unai_config_ext.abe_hack; + gpu_unai.ilace_mask = gpu_unai.config.ilace_force; #ifdef GPU_UNAI_USE_INT_DIV_MULTINV // s_invTable @@ -95,11 +263,16 @@ int renderer_init(void) SetupLightLUT(); SetupDitheringConstants(); + if (gpu_unai.config.scale_hires) { + map_downscale_buffer(); + } + return 0; } void renderer_finish(void) { + unmap_downscale_buffer(); } void renderer_notify_res_change(void) @@ -111,12 +284,12 @@ void renderer_notify_res_change(void) switch (gpu.screen.hres) { - case 512: gpu_senquack.blit_mask = 0xa4; break; // GPU_BlitWWSWWSWS - case 640: gpu_senquack.blit_mask = 0xaa; break; // GPU_BlitWS - default: gpu_senquack.blit_mask = 0; break; + case 512: gpu_unai.blit_mask = 0xa4; break; // GPU_BlitWWSWWSWS + case 640: gpu_unai.blit_mask = 0xaa; break; // GPU_BlitWS + default: gpu_unai.blit_mask = 0; break; } } else { - gpu_senquack.blit_mask = 0; + gpu_unai.blit_mask = 0; } if (LineSkipEnabled()) { @@ -124,23 +297,23 @@ void renderer_notify_res_change(void) // 480 vertical mode, or, optionally, force it for all video modes) if (gpu.screen.vres == 480) { - if (gpu_senquack.config.ilace_force) { - gpu_senquack.ilace_mask = 3; // Only need 1/4 of lines + if (gpu_unai.config.ilace_force) { + gpu_unai.ilace_mask = 3; // Only need 1/4 of lines } else { - gpu_senquack.ilace_mask = 1; // Only need 1/2 of lines + gpu_unai.ilace_mask = 1; // Only need 1/2 of lines } } else { // Vert resolution changed from 480 to lower one - gpu_senquack.ilace_mask = gpu_senquack.config.ilace_force; + gpu_unai.ilace_mask = gpu_unai.config.ilace_force; } } else { - gpu_senquack.ilace_mask = 0; + gpu_unai.ilace_mask = 0; } /* printf("res change hres: %d vres: %d depth: %d ilace_mask: %d\n", - gpu.screen.hres, gpu.screen.vres, gpu.status.rgb24 ? 24 : 15, - gpu_senquack.ilace_mask); + gpu.screen.hres, gpu.screen.vres, (gpu.status & PSX_GPU_STATUS_RGB24) ? 24 : 15, + gpu_unai.ilace_mask); */ } @@ -150,7 +323,7 @@ void renderer_notify_scanout_change(int x, int y) #ifdef USE_GPULIB // Handles GP0 draw settings commands 0xE1...0xE6 -static void gpuGP0Cmd_0xEx(gpu_senquack_t &gpu_senquack, u32 cmd_word) +static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word) { // Assume incoming GP0 command is 0xE1..0xE6, convert to 1..6 u8 num = (cmd_word >> 24) & 7; @@ -158,60 +331,60 @@ static void gpuGP0Cmd_0xEx(gpu_senquack_t &gpu_senquack, u32 cmd_word) switch (num) { case 1: { // GP0(E1h) - Draw Mode setting (aka "Texpage") - u32 cur_texpage = gpu_senquack.GPU_GP1 & 0x7FF; + u32 cur_texpage = gpu_unai.GPU_GP1 & 0x7FF; u32 new_texpage = cmd_word & 0x7FF; if (cur_texpage != new_texpage) { - gpu_senquack.GPU_GP1 = (gpu_senquack.GPU_GP1 & ~0x7FF) | new_texpage; - gpuSetTexture(gpu_senquack.GPU_GP1); + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x7FF) | new_texpage; + gpuSetTexture(gpu_unai.GPU_GP1); } } break; case 2: { // GP0(E2h) - Texture Window setting - if (cmd_word != gpu_senquack.TextureWindowCur) { + if (cmd_word != gpu_unai.TextureWindowCur) { static const u8 TextureMask[32] = { 255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7, 127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7 }; - gpu_senquack.TextureWindowCur = cmd_word; - gpu_senquack.TextureWindow[0] = ((cmd_word >> 10) & 0x1F) << 3; - gpu_senquack.TextureWindow[1] = ((cmd_word >> 15) & 0x1F) << 3; - gpu_senquack.TextureWindow[2] = TextureMask[(cmd_word >> 0) & 0x1F]; - gpu_senquack.TextureWindow[3] = TextureMask[(cmd_word >> 5) & 0x1F]; - gpu_senquack.TextureWindow[0] &= ~gpu_senquack.TextureWindow[2]; - gpu_senquack.TextureWindow[1] &= ~gpu_senquack.TextureWindow[3]; + gpu_unai.TextureWindowCur = cmd_word; + gpu_unai.TextureWindow[0] = ((cmd_word >> 10) & 0x1F) << 3; + gpu_unai.TextureWindow[1] = ((cmd_word >> 15) & 0x1F) << 3; + gpu_unai.TextureWindow[2] = TextureMask[(cmd_word >> 0) & 0x1F]; + gpu_unai.TextureWindow[3] = TextureMask[(cmd_word >> 5) & 0x1F]; + gpu_unai.TextureWindow[0] &= ~gpu_unai.TextureWindow[2]; + gpu_unai.TextureWindow[1] &= ~gpu_unai.TextureWindow[3]; // Inner loop vars must be updated whenever texture window is changed: const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 - gpu_senquack.u_msk = (((u32)gpu_senquack.TextureWindow[2]) << fb) | ((1 << fb) - 1); - gpu_senquack.v_msk = (((u32)gpu_senquack.TextureWindow[3]) << fb) | ((1 << fb) - 1); + gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1); - gpuSetTexture(gpu_senquack.GPU_GP1); + gpuSetTexture(gpu_unai.GPU_GP1); } } break; case 3: { // GP0(E3h) - Set Drawing Area top left (X1,Y1) - gpu_senquack.DrawingArea[0] = cmd_word & 0x3FF; - gpu_senquack.DrawingArea[1] = (cmd_word >> 10) & 0x3FF; + gpu_unai.DrawingArea[0] = cmd_word & 0x3FF; + gpu_unai.DrawingArea[1] = (cmd_word >> 10) & 0x3FF; } break; case 4: { // GP0(E4h) - Set Drawing Area bottom right (X2,Y2) - gpu_senquack.DrawingArea[2] = (cmd_word & 0x3FF) + 1; - gpu_senquack.DrawingArea[3] = ((cmd_word >> 10) & 0x3FF) + 1; + gpu_unai.DrawingArea[2] = (cmd_word & 0x3FF) + 1; + gpu_unai.DrawingArea[3] = ((cmd_word >> 10) & 0x3FF) + 1; } break; case 5: { // GP0(E5h) - Set Drawing Offset (X,Y) - gpu_senquack.DrawingOffset[0] = ((s32)cmd_word<<(32-11))>>(32-11); - gpu_senquack.DrawingOffset[1] = ((s32)cmd_word<<(32-22))>>(32-11); + gpu_unai.DrawingOffset[0] = ((s32)cmd_word<<(32-11))>>(32-11); + gpu_unai.DrawingOffset[1] = ((s32)cmd_word<<(32-22))>>(32-11); } break; case 6: { // GP0(E6h) - Mask Bit Setting - gpu_senquack.Masking = (cmd_word & 0x2) << 1; - gpu_senquack.PixelMSB = (cmd_word & 0x1) << 8; + gpu_unai.Masking = (cmd_word & 0x2) << 1; + gpu_unai.PixelMSB = (cmd_word & 0x1) << 8; } break; } } @@ -219,26 +392,27 @@ static void gpuGP0Cmd_0xEx(gpu_senquack_t &gpu_senquack, u32 cmd_word) extern const unsigned char cmd_lengths[256]; -int do_cmd_list(u32 *list, int list_len, int *last_cmd) +int do_cmd_list(u32 *_list, int list_len, int *last_cmd) { u32 cmd = 0, len, i; - u32 *list_start = list; - u32 *list_end = list + list_len; + le32_t *list = (le32_t *)_list; + le32_t *list_start = list; + le32_t *list_end = list + list_len; //TODO: set ilace_mask when resolution changes instead of every time, // eliminate #ifdef below. - gpu_senquack.ilace_mask = gpu_senquack.config.ilace_force; + gpu_unai.ilace_mask = gpu_unai.config.ilace_force; #ifdef HAVE_PRE_ARMV7 /* XXX */ - gpu_senquack.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE); + gpu_unai.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE); #endif - if (gpu_senquack.config.scale_hires) { - gpu_senquack.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE); + if (gpu_unai.config.scale_hires) { + gpu_unai.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE); } for (; list < list_end; list += 1 + len) { - cmd = *list >> 24; + cmd = le32_to_u32(*list) >> 24; len = cmd_lengths[cmd]; if (list + 1 + len > list_end) { cmd = -1; @@ -246,11 +420,11 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) } #define PRIM cmd - gpu_senquack.PacketBuffer.U4[0] = list[0]; + gpu_unai.PacketBuffer.U4[0] = list[0]; for (i = 1; i <= len; i++) - gpu_senquack.PacketBuffer.U4[i] = list[i]; + gpu_unai.PacketBuffer.U4[i] = list[i]; - PtrUnion packet = { .ptr = (void*)&gpu_senquack.PacketBuffer }; + PtrUnion packet = { .ptr = (void*)&gpu_unai.PacketBuffer }; switch (cmd) { @@ -263,9 +437,9 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x22: case 0x23: { // Monochrome 3-pt poly PP driver = gpuPolySpanDrivers[ - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Blending_Mode | - gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB + gpu_unai.Masking | Blending | gpu_unai.PixelMSB ]; gpuDrawPolyF(packet, driver, false); } break; @@ -274,19 +448,19 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x25: case 0x26: case 0x27: { // Textured 3-pt poly - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); - gpuSetTexture(gpu_senquack.PacketBuffer.U4[4] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16); u32 driver_idx = - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Dithering | - Blending_Mode | gpu_senquack.TEXT_MODE | - gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB; + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB; if (!FastLightingEnabled()) { driver_idx |= Lighting; } else { - if (!((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F))) + if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))) driver_idx |= Lighting; } @@ -299,9 +473,9 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x2A: case 0x2B: { // Monochrome 4-pt poly PP driver = gpuPolySpanDrivers[ - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Blending_Mode | - gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB + gpu_unai.Masking | Blending | gpu_unai.PixelMSB ]; gpuDrawPolyF(packet, driver, true); // is_quad = true } break; @@ -310,19 +484,19 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x2D: case 0x2E: case 0x2F: { // Textured 4-pt poly - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); - gpuSetTexture(gpu_senquack.PacketBuffer.U4[4] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16); u32 driver_idx = - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Dithering | - Blending_Mode | gpu_senquack.TEXT_MODE | - gpu_senquack.Masking | Blending | gpu_senquack.PixelMSB; + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB; if (!FastLightingEnabled()) { driver_idx |= Lighting; } else { - if (!((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F))) + if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))) driver_idx |= Lighting; } @@ -339,10 +513,10 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) // shouldn't apply. Until the original array of template // instantiation ptrs is fixed, we're stuck with this. (TODO) PP driver = gpuPolySpanDrivers[ - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Dithering | Blending_Mode | - gpu_senquack.Masking | Blending | 129 | gpu_senquack.PixelMSB + gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB ]; gpuDrawPolyG(packet, driver, false); } break; @@ -351,13 +525,13 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x35: case 0x36: case 0x37: { // Gouraud-shaded, textured 3-pt poly - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); - gpuSetTexture (gpu_senquack.PacketBuffer.U4[5] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16); PP driver = gpuPolySpanDrivers[ - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Dithering | - Blending_Mode | gpu_senquack.TEXT_MODE | - gpu_senquack.Masking | Blending | ((Lighting)?129:0) | gpu_senquack.PixelMSB + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB ]; gpuDrawPolyGT(packet, driver, false); } break; @@ -368,10 +542,10 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x3B: { // Gouraud-shaded 4-pt poly // See notes regarding '129' for 0x30..0x33 further above -senquack PP driver = gpuPolySpanDrivers[ - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Dithering | Blending_Mode | - gpu_senquack.Masking | Blending | 129 | gpu_senquack.PixelMSB + gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB ]; gpuDrawPolyG(packet, driver, true); // is_quad = true } break; @@ -380,13 +554,13 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x3D: case 0x3E: case 0x3F: { // Gouraud-shaded, textured 4-pt poly - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); - gpuSetTexture (gpu_senquack.PacketBuffer.U4[5] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16); PP driver = gpuPolySpanDrivers[ - (gpu_senquack.blit_mask?1024:0) | + (gpu_unai.blit_mask?1024:0) | Dithering | - Blending_Mode | gpu_senquack.TEXT_MODE | - gpu_senquack.Masking | Blending | ((Lighting)?129:0) | gpu_senquack.PixelMSB + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB ]; gpuDrawPolyGT(packet, driver, true); // is_quad = true } break; @@ -396,24 +570,24 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x42: case 0x43: { // Monochrome line // Shift index right by one, as untextured prims don't use lighting - u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; PSD driver = gpuPixelSpanDrivers[driver_idx]; gpuDrawLineF(packet, driver); } break; case 0x48 ... 0x4F: { // Monochrome line strip u32 num_vertexes = 1; - u32 *list_position = &(list[2]); + le32_t *list_position = &list[2]; // Shift index right by one, as untextured prims don't use lighting - u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; PSD driver = gpuPixelSpanDrivers[driver_idx]; gpuDrawLineF(packet, driver); while(1) { - gpu_senquack.PacketBuffer.U4[1] = gpu_senquack.PacketBuffer.U4[2]; - gpu_senquack.PacketBuffer.U4[2] = *list_position++; + gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2]; + gpu_unai.PacketBuffer.U4[2] = *list_position++; gpuDrawLineF(packet, driver); num_vertexes++; @@ -421,7 +595,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) cmd = -1; goto breakloop; } - if((*list_position & 0xf000f000) == 0x50005000) + if((le32_raw(*list_position) & HTOLE32(0xf000f000)) == HTOLE32(0x50005000)) break; } @@ -433,7 +607,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x52: case 0x53: { // Gouraud-shaded line // Shift index right by one, as untextured prims don't use lighting - u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; // Index MSB selects Gouraud-shaded PixelSpanDriver: driver_idx |= (1 << 5); PSD driver = gpuPixelSpanDrivers[driver_idx]; @@ -442,10 +616,10 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x58 ... 0x5F: { // Gouraud-shaded line strip u32 num_vertexes = 1; - u32 *list_position = &(list[2]); + le32_t *list_position = &list[2]; // Shift index right by one, as untextured prims don't use lighting - u32 driver_idx = (Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1; + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; // Index MSB selects Gouraud-shaded PixelSpanDriver: driver_idx |= (1 << 5); PSD driver = gpuPixelSpanDrivers[driver_idx]; @@ -453,10 +627,10 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) while(1) { - gpu_senquack.PacketBuffer.U4[0] = gpu_senquack.PacketBuffer.U4[2]; - gpu_senquack.PacketBuffer.U4[1] = gpu_senquack.PacketBuffer.U4[3]; - gpu_senquack.PacketBuffer.U4[2] = *list_position++; - gpu_senquack.PacketBuffer.U4[3] = *list_position++; + gpu_unai.PacketBuffer.U4[0] = gpu_unai.PacketBuffer.U4[2]; + gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[3]; + gpu_unai.PacketBuffer.U4[2] = *list_position++; + gpu_unai.PacketBuffer.U4[3] = *list_position++; gpuDrawLineG(packet, driver); num_vertexes++; @@ -464,7 +638,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) cmd = -1; goto breakloop; } - if((*list_position & 0xf000f000) == 0x50005000) + if((le32_raw(*list_position) & HTOLE32(0xf000f000)) == HTOLE32(0x50005000)) break; } @@ -475,7 +649,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x61: case 0x62: case 0x63: { // Monochrome rectangle (variable size) - PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); } break; @@ -483,24 +657,24 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x65: case 0x66: case 0x67: { // Textured rectangle (variable size) - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); - u32 driver_idx = Blending_Mode | gpu_senquack.TEXT_MODE | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>1); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); //senquack - Only color 808080h-878787h allows skipping lighting calculation: // This fixes Silent Hill running animation on loading screens: // (On PSX, color values 0x00-0x7F darken the source texture's color, // 0x81-FF lighten textures (ultimately clamped to 0x1F), // 0x80 leaves source texture color unchanged, HOWEVER, - // gpu_senquack uses a simple lighting LUT whereby only the upper + // gpu_unai uses a simple lighting LUT whereby only the upper // 5 bits of an 8-bit color are used, so 0x80-0x87 all behave as // 0x80. // // NOTE: I've changed all textured sprite draw commands here and // elsewhere to use proper behavior, but left poly commands // alone, I don't want to slow rendering down too much. (TODO) - //if ((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F)) + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) // Strip lower 3 bits of each color and determine if lighting should be used: - if ((gpu_senquack.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); @@ -510,8 +684,8 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x69: case 0x6A: case 0x6B: { // Monochrome rectangle (1x1 dot) - gpu_senquack.PacketBuffer.U4[2] = 0x00010001; - PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001); + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); } break; @@ -519,8 +693,8 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x71: case 0x72: case 0x73: { // Monochrome rectangle (8x8) - gpu_senquack.PacketBuffer.U4[2] = 0x00080008; - PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008); + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); } break; @@ -528,14 +702,14 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x75: case 0x76: case 0x77: { // Textured rectangle (8x8) - gpu_senquack.PacketBuffer.U4[3] = 0x00080008; - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); - u32 driver_idx = Blending_Mode | gpu_senquack.TEXT_MODE | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>1); + gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00080008); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); //senquack - Only color 808080h-878787h allows skipping lighting calculation: - //if ((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F)) + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) // Strip lower 3 bits of each color and determine if lighting should be used: - if ((gpu_senquack.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); @@ -545,17 +719,17 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x79: case 0x7A: case 0x7B: { // Monochrome rectangle (16x16) - gpu_senquack.PacketBuffer.U4[2] = 0x00100010; - PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>3)) >> 1]; + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010); + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); } break; case 0x7C: case 0x7D: #ifdef __arm__ - if ((gpu_senquack.GPU_GP1 & 0x180) == 0 && (gpu_senquack.Masking | gpu_senquack.PixelMSB) == 0) + if ((gpu_unai.GPU_GP1 & 0x180) == 0 && (gpu_unai.Masking | gpu_unai.PixelMSB) == 0) { - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); gpuDrawS16(packet); break; } @@ -563,13 +737,13 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) #endif case 0x7E: case 0x7F: { // Textured rectangle (16x16) - gpu_senquack.PacketBuffer.U4[3] = 0x00100010; - gpuSetCLUT (gpu_senquack.PacketBuffer.U4[2] >> 16); - u32 driver_idx = Blending_Mode | gpu_senquack.TEXT_MODE | gpu_senquack.Masking | Blending | (gpu_senquack.PixelMSB>>1); + gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00100010); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); //senquack - Only color 808080h-878787h allows skipping lighting calculation: - //if ((gpu_senquack.PacketBuffer.U1[0]>0x5F) && (gpu_senquack.PacketBuffer.U1[1]>0x5F) && (gpu_senquack.PacketBuffer.U1[2]>0x5F)) + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) // Strip lower 3 bits of each color and determine if lighting should be used: - if ((gpu_senquack.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); @@ -599,20 +773,20 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) goto breakloop; #endif case 0xE1 ... 0xE6: { // Draw settings - gpuGP0Cmd_0xEx(gpu_senquack, gpu_senquack.PacketBuffer.U4[0]); + gpuGP0Cmd_0xEx(gpu_unai, le32_to_u32(gpu_unai.PacketBuffer.U4[0])); } break; } } breakloop: gpu.ex_regs[1] &= ~0x1ff; - gpu.ex_regs[1] |= gpu_senquack.GPU_GP1 & 0x1ff; + gpu.ex_regs[1] |= gpu_unai.GPU_GP1 & 0x1ff; *last_cmd = cmd; return list - list_start; } -void renderer_sync_ecmds(uint32_t *ecmds) +void renderer_sync_ecmds(u32 *ecmds) { int dummy; do_cmd_list(&ecmds[1], 6, &dummy); @@ -631,17 +805,32 @@ void renderer_set_interlace(int enable, int is_odd) } #include "../../frontend/plugin_lib.h" -// Handle any gpulib settings applicable to gpu_senquack: +// Handle any gpulib settings applicable to gpu_unai: void renderer_set_config(const struct rearmed_cbs *cbs) { - gpu_senquack.vram = (u16*)gpu.vram; - gpu_senquack.config.ilace_force = cbs->gpu_unai.ilace_force; - gpu_senquack.config.pixel_skip = cbs->gpu_unai.pixel_skip; - gpu_senquack.config.lighting = cbs->gpu_unai.lighting; - gpu_senquack.config.fast_lighting = cbs->gpu_unai.fast_lighting; - gpu_senquack.config.blending = cbs->gpu_unai.blending; - gpu_senquack.config.dithering = cbs->gpu_unai.dithering; - gpu_senquack.config.scale_hires = cbs->gpu_unai.scale_hires; + gpu_unai.vram = (le16_t *)gpu.vram; + gpu_unai.config.ilace_force = cbs->gpu_unai.ilace_force; + gpu_unai.config.pixel_skip = cbs->gpu_unai.pixel_skip; + gpu_unai.config.lighting = cbs->gpu_unai.lighting; + gpu_unai.config.fast_lighting = cbs->gpu_unai.fast_lighting; + gpu_unai.config.blending = cbs->gpu_unai.blending; + gpu_unai.config.dithering = cbs->gpu_unai.dithering; + gpu_unai.config.scale_hires = cbs->gpu_unai.scale_hires; + + gpu.state.downscale_enable = gpu_unai.config.scale_hires; + if (gpu_unai.config.scale_hires) { + map_downscale_buffer(); + } else { + unmap_downscale_buffer(); + } +} + +void renderer_sync(void) +{ +} + +void renderer_notify_update_lace(int updated) +{ } // vim:shiftwidth=2:expandtab diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index ab1d23a7a..886bb1f5c 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -78,6 +78,8 @@ struct psx_gpu { uint32_t enhancement_enable:1; uint32_t enhancement_active:1; uint32_t enhancement_was_active:1; + uint32_t downscale_enable:1; + uint32_t downscale_active:1; uint32_t dims_changed:1; uint32_t *frame_count; uint32_t *hcnt; /* hsync count */ @@ -106,6 +108,8 @@ struct psx_gpu { } frameskip; void *(*get_enhancement_bufer) (int *x, int *y, int *w, int *h, int *vram_h); + uint16_t *(*get_downscale_buffer) + (int *x, int *y, int *w, int *h, int *vram_h); void *(*mmap)(unsigned int size); void (*munmap)(void *ptr, unsigned int size); void (*gpu_state_change)(int what); // psx_gpu_state From efbe0f77bf8e4d0613435cc02f025b6be0551ae6 Mon Sep 17 00:00:00 2001 From: Bobby Smith <33353403+bslenul@users.noreply.github.com> Date: Wed, 1 Nov 2023 19:21:20 +0100 Subject: [PATCH 453/597] Add missing #if for the "Threaded SPU" core option --- frontend/libretro.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/frontend/libretro.c b/frontend/libretro.c index c7439dd44..c47d260b0 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2240,6 +2240,7 @@ static void update_variables(bool in_flight) spu_config.iUseInterpolation = 0; } +#if P_HAVE_PTHREAD var.value = NULL; var.key = "pcsx_rearmed_spu_thread"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) @@ -2249,6 +2250,7 @@ static void update_variables(bool in_flight) else spu_config.iUseThread = 0; } +#endif #if 0 // currently disabled, see USE_READ_THREAD in libpcsxcore/cdriso.c if (P_HAVE_PTHREAD) { From 846344d40d18b731488e874ea67ef5d66173f62b Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 2 Nov 2023 10:49:09 +0100 Subject: [PATCH 454/597] unai: Preserve MSB in light routines Apply the "MSB_PRESERVED" speed hack to non-ARM architectures. Signed-off-by: Paul Cercueil --- plugins/gpu_unai/gpu_inner.h | 7 +++---- plugins/gpu_unai/gpu_inner_light.h | 6 ++++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/plugins/gpu_unai/gpu_inner.h b/plugins/gpu_unai/gpu_inner.h index eb209ef4d..9f18735d4 100644 --- a/plugins/gpu_unai/gpu_inner.h +++ b/plugins/gpu_unai/gpu_inner.h @@ -62,17 +62,16 @@ #define gpuLightingRGB gpuLightingRGBARM #define gpuLightingTXT gpuLightingTXTARM #define gpuLightingTXTGouraud gpuLightingTXTGouraudARM -// Non-dithering lighting and blending functions preserve uSrc -// MSB. This saves a few operations and useless load/stores. -#define MSB_PRESERVED (!CF_DITHER) #else #define gpuBlending gpuBlendingGeneric #define gpuLightingRGB gpuLightingRGBGeneric #define gpuLightingTXT gpuLightingTXTGeneric #define gpuLightingTXTGouraud gpuLightingTXTGouraudGeneric -#define MSB_PRESERVED 0 #endif +// Non-dithering lighting and blending functions preserve uSrc +// MSB. This saves a few operations and useless load/stores. +#define MSB_PRESERVED (!CF_DITHER) // If defined, Gouraud colors are fixed-point 5.11, otherwise they are 8.16 // This is only for debugging/verification of low-precision colors in C. diff --git a/plugins/gpu_unai/gpu_inner_light.h b/plugins/gpu_unai/gpu_inner_light.h index f90e8ec15..f4f685bb5 100644 --- a/plugins/gpu_unai/gpu_inner_light.h +++ b/plugins/gpu_unai/gpu_inner_light.h @@ -172,7 +172,8 @@ GPU_INLINE uint_fast16_t gpuLightingTXTGeneric(uint_fast16_t uSrc, u8 r5, u8 g5, { return (gpu_unai.LightLUT[((uSrc&0x7C00)>>5) | b5] << 10) | (gpu_unai.LightLUT[ (uSrc&0x03E0) | g5] << 5) | - (gpu_unai.LightLUT[((uSrc&0x001F)<<5) | r5] ); + (gpu_unai.LightLUT[((uSrc&0x001F)<<5) | r5] ) | + (uSrc & 0x8000); } @@ -194,7 +195,8 @@ GPU_INLINE uint_fast16_t gpuLightingTXTGouraudGeneric(uint_fast16_t uSrc, u32 gC { return (gpu_unai.LightLUT[((uSrc&0x7C00)>>5) | ((gCol>> 5)&0x1F)]<<10) | (gpu_unai.LightLUT[ (uSrc&0x03E0) | ((gCol>>16)&0x1F)]<< 5) | - (gpu_unai.LightLUT[((uSrc&0x001F)<<5) | (gCol>>27) ] ); + (gpu_unai.LightLUT[((uSrc&0x001F)<<5) | (gCol>>27) ]) | + (uSrc & 0x8000); } From b3bce60ef65914650716c6caea21480071e3405c Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 31 Oct 2023 18:11:39 +0100 Subject: [PATCH 455/597] unai: Use 8.8 RGB triplet format for gouraud shading Use a new 'gcol_t' type which fits in 64 bits, and encode each color component into a 8.8 fixed-point format. This boosts the precision of the gouraud shading algorithm, at almost zero cost. Fixes #320. Signed-off-by: Paul Cercueil --- plugins/gpu_unai/gpu_inner.h | 15 ++--- plugins/gpu_unai/gpu_inner_light.h | 95 +++++++++++++++--------------- plugins/gpu_unai/gpu_unai.h | 17 ++++-- 3 files changed, 65 insertions(+), 62 deletions(-) diff --git a/plugins/gpu_unai/gpu_inner.h b/plugins/gpu_unai/gpu_inner.h index 9f18735d4..1a93a3920 100644 --- a/plugins/gpu_unai/gpu_inner.h +++ b/plugins/gpu_unai/gpu_inner.h @@ -59,14 +59,10 @@ #include "gpu_inner_blend_arm.h" #include "gpu_inner_light_arm.h" #define gpuBlending gpuBlendingARM -#define gpuLightingRGB gpuLightingRGBARM #define gpuLightingTXT gpuLightingTXTARM -#define gpuLightingTXTGouraud gpuLightingTXTGouraudARM #else #define gpuBlending gpuBlendingGeneric -#define gpuLightingRGB gpuLightingRGBGeneric #define gpuLightingTXT gpuLightingTXTGeneric -#define gpuLightingTXTGouraud gpuLightingTXTGouraudGeneric #endif // Non-dithering lighting and blending functions preserve uSrc @@ -537,8 +533,8 @@ static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) else { // UNTEXTURED, GOURAUD - u32 l_gCol = gpu_unai.gCol; - u32 l_gInc = gpu_unai.gInc; + gcol_t l_gCol = gpu_unai.gCol; + gcol_t l_gInc = gpu_unai.gInc; do { uint_fast16_t uDst, uSrc; @@ -570,7 +566,7 @@ static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) endpolynotextgou: pDst++; - l_gCol += l_gInc; + l_gCol.raw += l_gInc.raw; } while (--count); } @@ -594,7 +590,7 @@ static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) u8 r5, g5, b5; u8 r8, g8, b8; - u32 l_gInc, l_gCol; + gcol_t l_gInc, l_gCol; if (CF_LIGHT) { if (CF_GOURAUD) { @@ -678,7 +674,8 @@ static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) pDst++; l_u = (l_u + l_u_inc) & l_u_msk; l_v = (l_v + l_v_inc) & l_v_msk; - if (CF_LIGHT && CF_GOURAUD) l_gCol += l_gInc; + if (CF_LIGHT && CF_GOURAUD) + l_gCol.raw += l_gInc.raw; } while (--count); } diff --git a/plugins/gpu_unai/gpu_inner_light.h b/plugins/gpu_unai/gpu_inner_light.h index f4f685bb5..44fecdc31 100644 --- a/plugins/gpu_unai/gpu_inner_light.h +++ b/plugins/gpu_unai/gpu_inner_light.h @@ -72,90 +72,89 @@ static void SetupLightLUT() //////////////////////////////////////////////////////////////////////////////// -// Create packed Gouraud fixed-pt 8.3:8.3:8.2 rgb triplet +// Create packed Gouraud fixed-pt 8.8 rgb triplet // // INPUT: // 'r','g','b' are 8.10 fixed-pt color components (r shown here) // 'r' input: --------------rrrrrrrrXXXXXXXXXX // ^ bit 31 // RETURNS: -// u32 output: rrrrrrrrXXXggggggggXXXbbbbbbbbXX -// ^ bit 31 +// gcol_t output: ccccccccXXXXXXXX for c in [r, g, b] +// ^ bit 16 // Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '-' don't care //////////////////////////////////////////////////////////////////////////////// -GPU_INLINE u32 gpuPackGouraudCol(u32 r, u32 g, u32 b) +GPU_INLINE gcol_t gpuPackGouraudCol(u32 r, u32 g, u32 b) { - return ((u32)(b>> 8)&(0x03ff )) - | ((u32)(g<< 3)&(0x07ff<<10)) - | ((u32)(r<<14)&(0x07ff<<21)); + return (gcol_t){ + (u16)(r >> 2), + (u16)(g >> 2), + (u16)(b >> 2), + }; } - //////////////////////////////////////////////////////////////////////////////// -// Create packed increment for Gouraud fixed-pt 8.3:8.3:8.2 rgb triplet +// Create packed increment for Gouraud fixed-pt 8.8 rgb triplet // // INPUT: // Sign-extended 8.10 fixed-pt r,g,b color increment values (only dr is shown) // 'dr' input: ssssssssssssssrrrrrrrrXXXXXXXXXX // ^ bit 31 // RETURNS: -// u32 output: rrrrrrrrXXXggggggggXXXbbbbbbbbXX -// ^ bit 31 +// gcol_t output: ccccccccXXXXXXXX for c in [r, g, b] +// ^ bit 16 // Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and 's' sign bits // // NOTE: The correctness of this code/method has not been fully verified, // having been merely factored out from original code in // poly-drawing functions. Feel free to check/improve it -senquack //////////////////////////////////////////////////////////////////////////////// -GPU_INLINE u32 gpuPackGouraudColInc(s32 dr, s32 dg, s32 db) +GPU_INLINE gcol_t gpuPackGouraudColInc(s32 dr, s32 dg, s32 db) { - u32 dr_tmp = (u32)(dr << 14)&(0xffffffff<<21); if (dr < 0) dr_tmp += 1<<21; - u32 dg_tmp = (u32)(dg << 3)&(0xffffffff<<10); if (dg < 0) dg_tmp += 1<<10; - u32 db_tmp = (u32)(db >> 8)&(0xffffffff ); if (db < 0) db_tmp += 1<< 0; - return db_tmp + dg_tmp + dr_tmp; + return (gcol_t){ + (u16)((dr >> 2) + (dr < 0)), + (u16)((dg >> 2) + (dg < 0)), + (u16)((db >> 2) + (db < 0)), + }; } - //////////////////////////////////////////////////////////////////////////////// -// Extract bgr555 color from Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet +// Extract bgr555 color from Gouraud u32 fixed-pt 8.8 rgb triplet // // INPUT: -// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX -// ^ bit 31 +// 'gCol' input: ccccccccXXXXXXXX for c in [r, g, b] +// ^ bit 16 // RETURNS: // u16 output: 0bbbbbgggggrrrrr // ^ bit 16 // Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '0' zero //////////////////////////////////////////////////////////////////////////////// -GPU_INLINE uint_fast16_t gpuLightingRGBGeneric(u32 gCol) +GPU_INLINE uint_fast16_t gpuLightingRGB(gcol_t gCol) { - return ((gCol<< 5)&0x7C00) | - ((gCol>>11)&0x03E0) | - (gCol>>27); + return (gCol.c.r >> 11) | + ((gCol.c.g >> 6) & 0x3e0) | + ((gCol.c.b >> 1) & 0x7c00); } - //////////////////////////////////////////////////////////////////////////////// -// Convert packed Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet in 'gCol' -// to padded u32 5.4:5.4:5.4 bgr fixed-pt triplet, suitable for use +// Convert packed Gouraud u32 fixed-pt 8.8 rgb triplet in 'gCol' +// to padded u32 5.4 bgr fixed-pt triplet, suitable for use // with HQ 24-bit lighting/quantization. // // INPUT: -// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX -// ^ bit 31 +// 'gCol' input: ccccccccXXXXXXXX for c in [r, g, b] +// ^ bit 16 // RETURNS: // u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX // ^ bit 31 // Where 'X' are fixed-pt bits, '0' zero-padding, and '-' is don't care //////////////////////////////////////////////////////////////////////////////// -GPU_INLINE u32 gpuLightingRGB24(u32 gCol) +GPU_INLINE u32 gpuLightingRGB24(gcol_t gCol) { - return ((gCol<<19) & (0x1FF<<20)) | - ((gCol>> 2) & (0x1FF<<10)) | - (gCol>>23); + return (gCol.c.r >> 7) + | ((gCol.c.g >> 7) << 10) + | ((gCol.c.b >> 7) << 20); } - //////////////////////////////////////////////////////////////////////////////// // Apply fast (low-precision) 5-bit lighting to bgr555 texture color: // @@ -181,25 +180,23 @@ GPU_INLINE uint_fast16_t gpuLightingTXTGeneric(uint_fast16_t uSrc, u8 r5, u8 g5, // Apply fast (low-precision) 5-bit Gouraud lighting to bgr555 texture color: // // INPUT: -// 'gCol' is a packed Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet, value of -// 15.0 is midpoint that does not modify color of texture -// gCol input : rrrrrXXXXXXgggggXXXXXXbbbbbXXXXX -// ^ bit 31 +// 'gCol' is a Gouraud fixed-pt 8.8 rgb triplet +// 'gCol' input: ccccccccXXXXXXXX for c in [r, g, b] +// ^ bit 16 // 'uSrc' input: -bbbbbgggggrrrrr // ^ bit 16 // RETURNS: // u16 output: 0bbbbbgggggrrrrr // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care //////////////////////////////////////////////////////////////////////////////// -GPU_INLINE uint_fast16_t gpuLightingTXTGouraudGeneric(uint_fast16_t uSrc, u32 gCol) +GPU_INLINE uint_fast16_t gpuLightingTXTGouraud(uint_fast16_t uSrc, gcol_t gCol) { - return (gpu_unai.LightLUT[((uSrc&0x7C00)>>5) | ((gCol>> 5)&0x1F)]<<10) | - (gpu_unai.LightLUT[ (uSrc&0x03E0) | ((gCol>>16)&0x1F)]<< 5) | - (gpu_unai.LightLUT[((uSrc&0x001F)<<5) | (gCol>>27) ]) | + return (gpu_unai.LightLUT[((uSrc&0x7C00)>>5) | (gCol.c.b >> 11)] << 10) | + (gpu_unai.LightLUT[ (uSrc&0x03E0) | (gCol.c.g >> 11)] << 5) | + (gpu_unai.LightLUT[((uSrc&0x001F)<<5) | (gCol.c.r >> 11)]) | (uSrc & 0x8000); } - //////////////////////////////////////////////////////////////////////////////// // Apply high-precision 8-bit lighting to bgr555 texture color, // returning a padded u32 5.4:5.4:5.4 bgr fixed-pt triplet @@ -244,22 +241,22 @@ GPU_INLINE u32 gpuLightingTXT24(uint_fast16_t uSrc, u8 r8, u8 g8, u8 b8) // INPUT: // 'uSrc' input: -bbbbbgggggrrrrr // ^ bit 16 -// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX -// ^ bit 31 +// 'gCol' input: ccccccccXXXXXXXX for c in [r, g, b] +// ^ bit 16 // RETURNS: // u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX // ^ bit 31 // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care //////////////////////////////////////////////////////////////////////////////// -GPU_INLINE u32 gpuLightingTXT24Gouraud(uint_fast16_t uSrc, u32 gCol) +GPU_INLINE u32 gpuLightingTXT24Gouraud(uint_fast16_t uSrc, gcol_t gCol) { uint_fast16_t r1 = uSrc&0x001F; uint_fast16_t g1 = uSrc&0x03E0; uint_fast16_t b1 = uSrc&0x7C00; - uint_fast16_t r2 = (gCol>>24) & 0xFF; - uint_fast16_t g2 = (gCol>>13) & 0xFF; - uint_fast16_t b2 = (gCol>> 2) & 0xFF; + uint_fast16_t r2 = gCol.c.r >> 8; + uint_fast16_t g2 = gCol.c.g >> 8; + uint_fast16_t b2 = gCol.c.b >> 8; u32 r3 = r1 * r2; if (r3 & 0xFFFFF000) r3 = ~0xFFFFF000; u32 g3 = g1 * g2; if (g3 & 0xFFFE0000) g3 = ~0xFFFE0000; diff --git a/plugins/gpu_unai/gpu_unai.h b/plugins/gpu_unai/gpu_unai.h index 4ab5a52f1..330620228 100644 --- a/plugins/gpu_unai/gpu_unai.h +++ b/plugins/gpu_unai/gpu_unai.h @@ -53,6 +53,14 @@ #define u32 uint32_t #define s32 int32_t #define s64 int64_t +#define u64 uint64_t + +typedef union { + struct { + u16 r, g, b; + } c; + u64 raw; +} gcol_t; typedef struct { u32 v; @@ -253,11 +261,12 @@ struct gpu_unai_t { s32 u_inc, v_inc; // Color for Gouraud-shaded prims + // Fixed-pt 8.8 rgb triplet // Packed fixed-pt 8.3:8.3:8.2 rgb triplet - // layout: rrrrrrrrXXXggggggggXXXbbbbbbbbXX - // ^ bit 31 ^ bit 0 - u32 gCol; - u32 gInc; // Increment along scanline for gCol + // layout: ccccccccXXXXXXXX for c in [r, g, b] + // ^ bit 16 + gcol_t gCol; + gcol_t gInc; // Increment along scanline for gCol // Color for flat-shaded, texture-blended prims u8 r5, g5, b5; // 5-bit light for undithered prims From bd77cbb0ad54c0929e52cc9ba650330bf9b66d05 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 2 Nov 2023 18:46:17 +0200 Subject: [PATCH 456/597] update gitignore --- .gitignore | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 250027037..571750009 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,34 @@ *.o *.a +*.swp *.so +*.d tags cscope.out +compile_commands.json +.gdb_history pandora pcsx.map config.mak config.log +cheatpops.db frontend/revision.h tools -.pcsx/ obj/ +.vscode/ +.vscode/ipch/* + +pcsx +pcsx_rearmed_libretro.dll +pcsxr_spu_area3.out +psx_gpu_offsets_update +/frontend/320240/pollux_set + +/out/ +/pcsx_bin/ +/.pcsx/ +/screenshots/ +/skin +/bios/ +/old/ +/a/ From 25f460eca64d55f980ab57851ffbd9fe58a5793c Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 2 Nov 2023 19:24:40 +0200 Subject: [PATCH 457/597] spu: implement some more irq details libretro/pcsx_rearmed#787 --- plugins/dfsound/dma.c | 4 ++-- plugins/dfsound/externals.h | 1 + plugins/dfsound/registers.c | 6 +++--- plugins/dfsound/spu.c | 23 +++++++++++++++++------ 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/plugins/dfsound/dma.c b/plugins/dfsound/dma.c index 13f9c269f..25a0aefd5 100644 --- a/plugins/dfsound/dma.c +++ b/plugins/dfsound/dma.c @@ -52,7 +52,7 @@ void CALLBACK SPUreadDMAMem(unsigned short *pusPSXMem, int iSize, } if ((spu.spuCtrl & CTRL_IRQ) && irq_after < iSize * 2) { log_unhandled("rdma spu irq: %x/%x+%x\n", irq_addr, spu.spuAddr, iSize * 2); - spu.irqCallback(irq_after); + do_irq_io(irq_after); } spu.spuAddr = addr; set_dma_end(iSize, cycles); @@ -91,7 +91,7 @@ void CALLBACK SPUwriteDMAMem(unsigned short *pusPSXMem, int iSize, irq_addr, spu.spuAddr, iSize * 2, irq_after); // this should be consistent with psxdma.c timing // might also need more delay like in set_dma_end() - spu.irqCallback(irq_after); + do_irq_io(irq_after); } spu.spuAddr = addr; set_dma_end(iSize, cycles); diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index 4f48c65da..fef5f9ccc 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -279,6 +279,7 @@ extern SPUInfo spu; void do_samples(unsigned int cycles_to, int do_sync); void schedule_next_irq(void); void check_irq_io(unsigned int addr); +void do_irq_io(int cycles_after); #define do_samples_if_needed(c, sync, samples) \ do { \ diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index 05968b617..5d79f251f 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -132,7 +132,7 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, { //-------------------------------------------------// case H_SPUaddr: - spu.spuAddr = (unsigned long) val<<3; + spu.spuAddr = (unsigned int)val << 3; //check_irq_io(spu.spuAddr); break; //-------------------------------------------------// @@ -144,8 +144,8 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, break; //-------------------------------------------------// case H_SPUctrl: + spu.spuStat &= ~STAT_IRQ | val; if (!(spu.spuCtrl & CTRL_IRQ)) { - spu.spuStat&=~STAT_IRQ; if (val & CTRL_IRQ) schedule_next_irq(); } @@ -153,7 +153,7 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, break; //-------------------------------------------------// case H_SPUstat: - spu.spuStat=val&0xf800; + //spu.spuStat=val&0xf800; break; //-------------------------------------------------// case H_SPUReverbAddr: diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 502567bd4..f2023881b 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -198,12 +198,15 @@ static void InterpolateDown(sample_buf *sb, int sinc) #include "gauss_i.h" #include "xa.c" -static void do_irq(void) +static void do_irq(int cycles_after) { - //if(!(spu.spuStat & STAT_IRQ)) + if (spu.spuStat & STAT_IRQ) + log_unhandled("spu: missed irq?\n"); + else { spu.spuStat |= STAT_IRQ; // asserted status? - if(spu.irqCallback) spu.irqCallback(0); + if (spu.irqCallback) + spu.irqCallback(cycles_after); } } @@ -212,7 +215,7 @@ static int check_irq(int ch, unsigned char *pos) if((spu.spuCtrl & (CTRL_ON|CTRL_IRQ)) == (CTRL_ON|CTRL_IRQ) && pos == spu.pSpuIrq) { //printf("ch%d irq %04zx\n", ch, pos - spu.spuMemC); - do_irq(); + do_irq(0); return 1; } return 0; @@ -225,7 +228,15 @@ void check_irq_io(unsigned int addr) if((spu.spuCtrl & (CTRL_ON|CTRL_IRQ)) == (CTRL_ON|CTRL_IRQ) && addr == irq_addr) { //printf("io irq %04x\n", irq_addr); - do_irq(); + do_irq(0); + } +} + +void do_irq_io(int cycles_after) +{ + if ((spu.spuCtrl & (CTRL_ON|CTRL_IRQ)) == (CTRL_ON|CTRL_IRQ)) + { + do_irq(cycles_after); } } @@ -1182,7 +1193,7 @@ void do_samples(unsigned int cycles_to, int do_direct) if (0 < left && left <= ns_to) { //xprintf("decoder irq %x\n", spu.decode_pos); - do_irq(); + do_irq(0); } } if (!spu.cycles_dma_end || (int)(spu.cycles_dma_end - cycles_to) < 0) { From 0b988c8ba4cccb1cd52aeb13f10a122dec1cd54f Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 2 Nov 2023 23:17:28 +0200 Subject: [PATCH 458/597] cdrom: implement some more irq details --- libpcsxcore/cdrom.c | 170 ++++++++++++++++++++------------------------ 1 file changed, 77 insertions(+), 93 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 950b6484b..9519d8c53 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -52,10 +52,10 @@ static struct { // unused members maintain savesate compatibility unsigned char unused0; unsigned char unused1; - unsigned char Reg2; + unsigned char IrqMask; unsigned char unused2; unsigned char Ctrl; - unsigned char Stat; + unsigned char IrqStat; unsigned char StatP; @@ -177,7 +177,7 @@ unsigned char Test20[] = { 0x98, 0x06, 0x10, 0xC3 }; unsigned char Test22[] = { 0x66, 0x6F, 0x72, 0x20, 0x45, 0x75, 0x72, 0x6F }; unsigned char Test23[] = { 0x43, 0x58, 0x44, 0x32, 0x39 ,0x34, 0x30, 0x51 }; -// cdr.Stat: +// cdr.IrqStat: #define NoIntr 0 #define DataReady 1 #define Complete 2 @@ -209,7 +209,8 @@ unsigned char Test23[] = { 0x43, 0x58, 0x44, 0x32, 0x39 ,0x34, 0x30, 0x51 }; /* Errors */ #define ERROR_NOTREADY (1<<7) // 0x80 #define ERROR_INVALIDCMD (1<<6) // 0x40 -#define ERROR_INVALIDARG (1<<5) // 0x20 +#define ERROR_BAD_ARGNUM (1<<5) // 0x20 +#define ERROR_BAD_ARGVAL (1<<4) // 0x10 #define ERROR_SHELLOPEN (1<<3) // 0x08 // 1x = 75 sectors per second @@ -276,23 +277,33 @@ static void sec2msf(unsigned int s, u8 *msf) { x |= f; \ } -#define SetResultSize(size) { \ +#define SetResultSize_(size) { \ cdr.ResultP = 0; \ cdr.ResultC = size; \ cdr.ResultReady = 1; \ } -static void setIrq(int log_cmd) +#define SetResultSize(size) { \ + if (cdr.ResultP < cdr.ResultC) \ + CDR_LOG_I("overwriting result, len=%u\n", cdr.ResultC); \ + SetResultSize_(size); \ +} + +static void setIrq(u8 irq, int log_cmd) { - if (cdr.Stat & cdr.Reg2) + u8 old = cdr.IrqStat & cdr.IrqMask ? 1 : 0; + u8 new_ = irq & cdr.IrqMask ? 1 : 0; + + cdr.IrqStat = irq; + if ((old ^ new_) & new_) psxHu32ref(0x1070) |= SWAP32((u32)0x4); #ifdef CDR_LOG_CMD_IRQ - if (cdr.Stat) + if (cdr.IrqStat) { int i; - CDR_LOG_I("CDR IRQ=%d cmd %02x stat %02x: ", - !!(cdr.Stat & cdr.Reg2), log_cmd, cdr.Stat); + CDR_LOG_I("CDR IRQ=%d cmd %02x irqstat %02x: ", + !!(cdr.IrqStat & cdr.IrqMask), log_cmd, cdr.IrqStat); for (i = 0; i < cdr.ResultC; i++) SysPrintf("%02x ", cdr.Result[i]); SysPrintf("\n"); @@ -340,8 +351,7 @@ void cdrLidSeekInterrupt(void) SetResultSize(2); cdr.Result[0] = cdr.StatP | STATUS_SEEKERROR; cdr.Result[1] = ERROR_SHELLOPEN; - cdr.Stat = DiskError; - setIrq(0x1006); + setIrq(DiskError, 0x1006); } if (cdr.CmdInProgress) { psxRegs.interrupt &= ~(1 << PSXINT_CDR); @@ -349,8 +359,7 @@ void cdrLidSeekInterrupt(void) SetResultSize(2); cdr.Result[0] = cdr.StatP | STATUS_ERROR; cdr.Result[1] = ERROR_NOTREADY; - cdr.Stat = DiskError; - setIrq(0x1007); + setIrq(DiskError, 0x1007); } set_event(PSXINT_CDRLID, cdReadTime * 30); @@ -524,8 +533,7 @@ static void cdrPlayInterrupt_Autopause() SetResultSize(1); cdr.Result[0] = cdr.StatP; - cdr.Stat = DataEnd; - setIrq(0x1000); // 0x1000 just for logging purposes + setIrq(DataEnd, 0x1000); // 0x1000 just for logging purposes StopCdda(); SetPlaySeekRead(cdr.StatP, 0); @@ -561,8 +569,7 @@ static void cdrPlayInterrupt_Autopause() cdr.Result[6] = abs_lev_max >> 0; cdr.Result[7] = abs_lev_max >> 8; - cdr.Stat = DataReady; - setIrq(0x1001); + setIrq(DataReady, 0x1001); } if (cdr.ReportDelay) @@ -676,7 +683,7 @@ void cdrPlayReadInterrupt(void) CDR_readCDDA(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], (u8 *)read_buf); } - if (!cdr.Stat && (cdr.Mode & (MODE_AUTOPAUSE|MODE_REPORT))) + if (!cdr.IrqStat && (cdr.Mode & (MODE_AUTOPAUSE|MODE_REPORT))) cdrPlayInterrupt_Autopause(); if (!cdr.Muted && cdr.Play && !Config.Cdda) { @@ -706,12 +713,13 @@ void cdrInterrupt(void) { u8 set_loc[3]; int read_ok; u16 not_ready = 0; + u8 IrqStat = Acknowledge; u16 Cmd; int i; - if (cdr.Stat) { + if (cdr.IrqStat) { CDR_LOG_I("cmd %02x with irqstat %x\n", - cdr.CmdInProgress, cdr.Stat); + cdr.CmdInProgress, cdr.IrqStat); return; } if (cdr.Irq1Pending) { @@ -722,16 +730,14 @@ void cdrInterrupt(void) { cdr.CmdInProgress, cdr.Irq1Pending); SetResultSize(1); cdr.Result[0] = cdr.Irq1Pending; - cdr.Stat = (cdr.Irq1Pending & STATUS_ERROR) ? DiskError : DataReady; cdr.Irq1Pending = 0; - setIrq(0x1003); + setIrq((cdr.Irq1Pending & STATUS_ERROR) ? DiskError : DataReady, 0x1003); return; } // default response SetResultSize(1); cdr.Result[0] = cdr.StatP; - cdr.Stat = Acknowledge; Cmd = cdr.CmdInProgress; cdr.CmdInProgress = 0; @@ -778,7 +784,7 @@ void cdrInterrupt(void) { CDR_LOG_I("Invalid/out of range seek to %02X:%02X:%02X\n", cdr.Param[0], cdr.Param[1], cdr.Param[2]); if (++cdr.errorRetryhack > 100) break; - error = ERROR_INVALIDARG; + error = ERROR_BAD_ARGNUM; goto set_error; } else @@ -862,7 +868,7 @@ void cdrInterrupt(void) { case CdlForward: // TODO: error 80 if stopped - cdr.Stat = Complete; + IrqStat = Complete; // GameShark CD Player: Calls 2x + Play 2x cdr.FastForward = 1; @@ -870,7 +876,7 @@ void cdrInterrupt(void) { break; case CdlBackward: - cdr.Stat = Complete; + IrqStat = Complete; // GameShark CD Player: Calls 2x + Play 2x cdr.FastBackward = 1; @@ -879,7 +885,7 @@ void cdrInterrupt(void) { case CdlStandby: if (cdr.DriveState != DRIVESTATE_STOPPED) { - error = ERROR_INVALIDARG; + error = ERROR_BAD_ARGNUM; goto set_error; } second_resp_time = cdReadTime * 125 / 2; @@ -887,7 +893,7 @@ void cdrInterrupt(void) { break; case CdlStandby + CMD_PART2: - cdr.Stat = Complete; + IrqStat = Complete; break; case CdlStop: @@ -914,7 +920,7 @@ void cdrInterrupt(void) { break; case CdlStop + CMD_PART2: - cdr.Stat = Complete; + IrqStat = Complete; break; case CdlPause: @@ -953,7 +959,7 @@ void cdrInterrupt(void) { break; case CdlPause + CMD_PART2: - cdr.Stat = Complete; + IrqStat = Complete; break; case CdlReset: @@ -970,7 +976,7 @@ void cdrInterrupt(void) { case CdlReset + CMD_PART2: case CdlReset + CMD_PART2 + CMD_WHILE_NOT_READY: - cdr.Stat = Complete; + IrqStat = Complete; break; case CdlMute: @@ -995,7 +1001,7 @@ void cdrInterrupt(void) { case CdlGetparam: case CdlGetparam + CMD_WHILE_NOT_READY: /* Gameblabla : According to mednafen, Result size should be 5 and done this way. */ - SetResultSize(5); + SetResultSize_(5); cdr.Result[1] = cdr.Mode; cdr.Result[2] = 0; cdr.Result[3] = cdr.FilterFile; @@ -1007,12 +1013,12 @@ void cdrInterrupt(void) { error = 0x80; goto set_error; } - SetResultSize(8); + SetResultSize_(8); memcpy(cdr.Result, cdr.LocL, 8); break; case CdlGetlocP: - SetResultSize(8); + SetResultSize_(8); memcpy(&cdr.Result, &cdr.subq, 8); break; @@ -1023,36 +1029,29 @@ void cdrInterrupt(void) { break; case CdlReadT + CMD_PART2: - cdr.Stat = Complete; + IrqStat = Complete; break; case CdlGetTN: - SetResultSize(3); if (CDR_getTN(cdr.ResultTN) == -1) { - cdr.Stat = DiskError; - cdr.Result[0] |= STATUS_ERROR; - } else { - cdr.Stat = Acknowledge; - cdr.Result[1] = itob(cdr.ResultTN[0]); - cdr.Result[2] = itob(cdr.ResultTN[1]); + assert(0); } + SetResultSize_(3); + cdr.Result[1] = itob(cdr.ResultTN[0]); + cdr.Result[2] = itob(cdr.ResultTN[1]); break; case CdlGetTD: cdr.Track = btoi(cdr.Param[0]); - SetResultSize(4); if (CDR_getTD(cdr.Track, cdr.ResultTD) == -1) { - cdr.Stat = DiskError; - cdr.Result[0] |= STATUS_ERROR; - } else { - cdr.Stat = Acknowledge; - cdr.Result[0] = cdr.StatP; - cdr.Result[1] = itob(cdr.ResultTD[2]); - cdr.Result[2] = itob(cdr.ResultTD[1]); - /* According to Nocash's documentation, the function doesn't care about ff. - * This can be seen also in Mednafen's implementation. */ - //cdr.Result[3] = itob(cdr.ResultTD[0]); + error = ERROR_BAD_ARGVAL; + goto set_error; } + SetResultSize_(3); + cdr.Result[1] = itob(cdr.ResultTD[2]); + cdr.Result[2] = itob(cdr.ResultTD[1]); + // no sector number + //cdr.Result[3] = itob(cdr.ResultTD[0]); break; case CdlSeekL: @@ -1084,7 +1083,7 @@ void cdrInterrupt(void) { case CdlSeekP + CMD_PART2: SetPlaySeekRead(cdr.StatP, 0); cdr.Result[0] = cdr.StatP; - cdr.Stat = Complete; + IrqStat = Complete; Find_CurTrack(cdr.SetSectorPlay); read_ok = ReadTrack(cdr.SetSectorPlay); @@ -1099,15 +1098,15 @@ void cdrInterrupt(void) { case CdlTest + CMD_WHILE_NOT_READY: switch (cdr.Param[0]) { case 0x20: // System Controller ROM Version - SetResultSize(4); + SetResultSize_(4); memcpy(cdr.Result, Test20, 4); break; case 0x22: - SetResultSize(8); + SetResultSize_(8); memcpy(cdr.Result, Test22, 4); break; case 0x23: case 0x24: - SetResultSize(8); + SetResultSize_(8); memcpy(cdr.Result, Test23, 4); break; } @@ -1118,7 +1117,7 @@ void cdrInterrupt(void) { break; case CdlID + CMD_PART2: - SetResultSize(8); + SetResultSize_(8); cdr.Result[0] = cdr.StatP; cdr.Result[1] = 0; cdr.Result[2] = 0; @@ -1140,7 +1139,7 @@ void cdrInterrupt(void) { /* This adds the string "PCSX" in Playstation bios boot screen */ memcpy((char *)&cdr.Result[4], "PCSX", 4); - cdr.Stat = Complete; + IrqStat = Complete; break; case CdlInit: @@ -1168,7 +1167,7 @@ void cdrInterrupt(void) { case CdlReadToc + CMD_PART2: case CdlReadToc + CMD_PART2 + CMD_WHILE_NOT_READY: - cdr.Stat = Complete; + IrqStat = Complete; break; case CdlReadN: @@ -1215,10 +1214,10 @@ void cdrInterrupt(void) { // FALLTHROUGH set_error: - SetResultSize(2); + SetResultSize_(2); cdr.Result[0] = cdr.StatP | STATUS_ERROR; cdr.Result[1] = not_ready ? ERROR_NOTREADY : error; - cdr.Stat = DiskError; + IrqStat = DiskError; CDR_LOG_I("cmd %02x error %02x\n", Cmd, cdr.Result[1]); break; } @@ -1237,7 +1236,7 @@ void cdrInterrupt(void) { CDR_LOG_I("cmd %02x came before %02x finished\n", cdr.Cmd, Cmd); } - setIrq(Cmd); + setIrq(IrqStat, Cmd); } #ifdef HAVE_ARMV7 @@ -1301,17 +1300,16 @@ static void cdrAttenuate(s16 *buf, int samples, int stereo) static void cdrReadInterruptSetResult(unsigned char result) { - if (cdr.Stat) { + if (cdr.IrqStat) { CDR_LOG_I("%d:%02d:%02d irq miss, cmd=%02x irqstat=%02x\n", cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], - cdr.CmdInProgress, cdr.Stat); + cdr.CmdInProgress, cdr.IrqStat); cdr.Irq1Pending = result; return; } SetResultSize(1); cdr.Result[0] = result; - cdr.Stat = (result & STATUS_ERROR) ? DiskError : DataReady; - setIrq(0x1004); + setIrq((result & STATUS_ERROR) ? DiskError : DataReady, 0x1004); } static void cdrUpdateTransferBuf(const u8 *buf) @@ -1360,7 +1358,7 @@ static void cdrReadInterrupt(void) } memcpy(cdr.LocL, buf, 8); - if (!cdr.Stat && !cdr.Irq1Pending) + if (!cdr.IrqStat && !cdr.Irq1Pending) cdrUpdateTransferBuf(buf); subhdr = (void *)(buf + 4); @@ -1528,8 +1526,8 @@ void cdrWrite2(unsigned char rt) { cdr.Param[cdr.ParamC++] = rt; return; case 1: - cdr.Reg2 = rt; - setIrq(0x1005); + cdr.IrqMask = rt; + setIrq(cdr.IrqStat, 0x1005); return; case 2: cdr.AttenuatorLeftToLeftT = rt; @@ -1542,9 +1540,9 @@ void cdrWrite2(unsigned char rt) { unsigned char cdrRead3(void) { if (cdr.Ctrl & 0x1) - psxHu8(0x1803) = cdr.Stat | 0xE0; + psxHu8(0x1803) = cdr.IrqStat | 0xE0; else - psxHu8(0x1803) = cdr.Reg2 | 0xE0; + psxHu8(0x1803) = cdr.IrqMask | 0xE0; CDR_LOG_IO("cdr r3.%s: %02x\n", (cdr.Ctrl & 1) ? "ifl" : "ien", psxHu8(0x1803)); return psxHu8(0x1803); @@ -1558,13 +1556,13 @@ void cdrWrite3(unsigned char rt) { case 0: break; // transfer case 1: - if (cdr.Stat & rt) { + if (cdr.IrqStat & rt) { u32 nextCycle = psxRegs.intCycle[PSXINT_CDR].sCycle + psxRegs.intCycle[PSXINT_CDR].cycle; int pending = psxRegs.interrupt & (1 << PSXINT_CDR); #ifdef CDR_LOG_CMD_IRQ - CDR_LOG_I("ack %02x (w=%02x p=%d,%x,%x,%d)\n", cdr.Stat & rt, rt, - !!pending, cdr.CmdInProgress, + CDR_LOG_I("ack %02x (w=%02x p=%d,%x,%x,%d)\n", + cdr.IrqStat & rt, rt, !!pending, cdr.CmdInProgress, cdr.Irq1Pending, nextCycle - psxRegs.cycle); #endif // note: Croc, Shadow Tower (more) vs Discworld Noir (<993) @@ -1578,7 +1576,7 @@ void cdrWrite3(unsigned char rt) { set_event(PSXINT_CDR, c); } } - cdr.Stat &= ~rt; + cdr.IrqStat &= ~rt; if (rt & 0x40) cdr.ParamC = 0; @@ -1712,8 +1710,8 @@ void cdrReset() { cdr.CurTrack = 1; cdr.FilterFile = 0; cdr.FilterChannel = 0; - cdr.Reg2 = 0x1f; - cdr.Stat = NoIntr; + cdr.IrqMask = 0x1f; + cdr.IrqStat = NoIntr; cdr.FifoOffset = DATA_SIZE; // fifo empty CDR_getStatus(&stat); @@ -1779,20 +1777,6 @@ int cdrFreeze(void *f, int Mode) { if (!Config.Cdda) CDR_play(cdr.SetSectorPlay); } - - if ((cdr.freeze_ver & 0xffffff00) != 0x63647200) { - // old versions did not latch Reg2, have to fixup.. - if (cdr.Reg2 == 0) { - SysPrintf("cdrom: fixing up old savestate\n"); - cdr.Reg2 = 7; - } - // also did not save Attenuator.. - if ((cdr.AttenuatorLeftToLeft | cdr.AttenuatorLeftToRight - | cdr.AttenuatorRightToLeft | cdr.AttenuatorRightToRight) == 0) - { - cdr.AttenuatorLeftToLeft = cdr.AttenuatorRightToRight = 0x80; - } - } } return 0; From 54d6fbe750e0219f7d13eedcd05e6274edbbd7fb Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 3 Nov 2023 02:02:26 +0200 Subject: [PATCH 459/597] cdrom: add forgotten reset --- libpcsxcore/cdrom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 9519d8c53..7431a926d 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -990,6 +990,7 @@ void cdrInterrupt(void) { case CdlSetfilter: cdr.FilterFile = cdr.Param[0]; cdr.FilterChannel = cdr.Param[1]; + cdr.FileChannelSelected = 0; break; case CdlSetmode: From ca640f335d4b230140a33f48c314ecaec5dcd901 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 4 Nov 2023 02:06:57 +0200 Subject: [PATCH 460/597] adjust the deadzone hack libretro/pcsx_rearmed#792 --- libpcsxcore/plugins.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index a6171879e..d44442b01 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -626,8 +626,9 @@ static void adjust_analog(unsigned char *b) { // ff8 hates 0x80 for whatever reason (broken in 2d area menus), // or is this caused by something else we do wrong?? - if (b[6] == 0x80) - b[6] = 0x7f; + // Also S.C.A.R.S. treats 0x7f as turning left. + if (b[6] == 0x7f || b[6] == 0x80) + b[6] = 0x81; } // Build response for 0x42 request Pad in port From c2a25f6790029cd0fd4efe1b8aa96e4159f1b1e9 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 13 Nov 2023 21:04:37 +0200 Subject: [PATCH 461/597] gpu_neon: enforce alignment required for asm libretro/pcsx_rearmed#798 --- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 1 + plugins/gpu_neon/psx_gpu_if.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index f0ba39f39..ffbea043c 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -6129,6 +6129,7 @@ function(scale2x_tiles8) mov r14, r2 0: + pld [r1, #1024*2] vld1.u16 { q0 }, [r1, :128]! vld1.u16 { q2 }, [r1, :128]! vmov q1, q0 diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index ea98ade85..a1476f480 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -143,6 +143,9 @@ sync_enhancement_buffers(int x, int y, int w, int h) x2 = min(right, s->x + s_w); y1 = max(y, s->y); y2 = min(bottom, s->y + s_h); + // 16-byte align for the asm version + x2 += x1 & 7; + x1 &= ~7; scale2x_tiles8(dst + y1 * 1024*2 + x1 * 2, src + y1 * 1024 + x1, (x2 - x1 + 7) / 8u, y2 - y1); } From cc1e8bd4d0eb3227ace5a3d9f70778d0ce3fc537 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 14 Nov 2023 00:44:20 +0200 Subject: [PATCH 462/597] cdrom: try more states let's see what else breaks now libretro/pcsx_rearmed#796 --- libpcsxcore/cdrom.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 7431a926d..013be599c 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -113,7 +113,7 @@ static struct { u8 unused7; - u8 DriveState; + u8 DriveState; // enum drive_state u8 FastForward; u8 FastBackward; u8 errorRetryhack; @@ -222,11 +222,14 @@ unsigned char Test23[] = { 0x43, 0x58, 0x44, 0x32, 0x39 ,0x34, 0x30, 0x51 }; #define SUBQ_FORWARD_SECTORS 2u enum drive_state { - DRIVESTATE_STANDBY = 0, // pause, play, read + DRIVESTATE_STANDBY = 0, // different from paused DRIVESTATE_LID_OPEN, DRIVESTATE_RESCAN_CD, DRIVESTATE_PREPARE_CD, DRIVESTATE_STOPPED, + DRIVESTATE_PAUSED, + DRIVESTATE_PLAY_READ, + DRIVESTATE_SEEK, }; static struct CdrStat stat; @@ -537,6 +540,7 @@ static void cdrPlayInterrupt_Autopause() StopCdda(); SetPlaySeekRead(cdr.StatP, 0); + cdr.DriveState = DRIVESTATE_PAUSED; } else if ((cdr.Mode & MODE_REPORT) && !cdr.ReportDelay && ((cdr.subq.Absolute[2] & 0x0f) == 0 || cdr.FastForward || cdr.FastBackward)) @@ -585,16 +589,17 @@ static int cdrSeekTime(unsigned char *target) // need this stupidly long penalty or else Spyro2 intro desyncs // note: if misapplied this breaks MGS cutscenes among other things - if (cyclesSinceRS > cdReadTime * 50) + if (cdr.DriveState == DRIVESTATE_PAUSED && cyclesSinceRS > cdReadTime * 50) seekTime += cdReadTime * 25; // Transformers Beast Wars Transmetals does Setloc(x),SeekL,Setloc(x),ReadN // and then wants some slack time - else if (cyclesSinceRS < cdReadTime *3/2) + else if (cdr.DriveState == DRIVESTATE_PAUSED || cyclesSinceRS < cdReadTime *3/2) seekTime += cdReadTime; seekTime = MIN_VALUE(seekTime, PSXCLK * 2 / 3); - CDR_LOG("seek: %.2f %.2f (%.2f)\n", (float)seekTime / PSXCLK, - (float)seekTime / cdReadTime, (float)cyclesSinceRS / cdReadTime); + CDR_LOG("seek: %.2f %.2f (%.2f) st %d\n", (float)seekTime / PSXCLK, + (float)seekTime / cdReadTime, (float)cyclesSinceRS / cdReadTime, + cdr.DriveState); return seekTime; } @@ -672,12 +677,14 @@ void cdrPlayReadInterrupt(void) CDR_LOG("CDDA - %02d:%02d:%02d m %02x\n", cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], cdr.Mode); + cdr.DriveState = DRIVESTATE_PLAY_READ; SetPlaySeekRead(cdr.StatP, STATUS_PLAY); if (memcmp(cdr.SetSectorPlay, cdr.SetSectorEnd, 3) == 0) { CDR_LOG_I("end stop\n"); StopCdda(); SetPlaySeekRead(cdr.StatP, 0); cdr.TrackChanged = TRUE; + cdr.DriveState = DRIVESTATE_PAUSED; } else { CDR_readCDDA(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], (u8 *)read_buf); @@ -861,6 +868,7 @@ void cdrInterrupt(void) { // BIOS player - set flag again cdr.Play = TRUE; + cdr.DriveState = DRIVESTATE_PLAY_READ; CDRPLAYREAD_INT(cdReadTime + seekTime, 1); start_rotating = 1; @@ -888,6 +896,7 @@ void cdrInterrupt(void) { error = ERROR_BAD_ARGNUM; goto set_error; } + cdr.DriveState = DRIVESTATE_STANDBY; second_resp_time = cdReadTime * 125 / 2; start_rotating = 1; break; @@ -913,7 +922,7 @@ void cdrInterrupt(void) { cdr.LocL[0] = LOCL_INVALID; second_resp_time = 0x800; - if (cdr.DriveState == DRIVESTATE_STANDBY) + if (cdr.DriveState != DRIVESTATE_STOPPED) second_resp_time = cdReadTime * 30 / 2; cdr.DriveState = DRIVESTATE_STOPPED; @@ -956,6 +965,7 @@ void cdrInterrupt(void) { second_resp_time = (((cdr.Mode & MODE_SPEED) ? 1 : 2) * 1097107); } SetPlaySeekRead(cdr.StatP, 0); + cdr.DriveState = DRIVESTATE_PAUSED; break; case CdlPause + CMD_PART2: @@ -970,6 +980,7 @@ void cdrInterrupt(void) { cdr.LocL[0] = LOCL_INVALID; cdr.Muted = FALSE; cdr.Mode = MODE_SIZE_2340; /* This fixes This is Football 2, Pooh's Party lockups */ + cdr.DriveState = DRIVESTATE_PAUSED; second_resp_time = not_ready ? 70000 : 4100000; start_rotating = 1; break; @@ -1063,6 +1074,7 @@ void cdrInterrupt(void) { seekTime = cdrSeekTime(cdr.SetSector); memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); + cdr.DriveState = DRIVESTATE_SEEK; /* Crusaders of Might and Magic = 0.5x-4x - fix cutscene speech start @@ -1091,6 +1103,7 @@ void cdrInterrupt(void) { if (read_ok && (buf = CDR_getBuffer())) memcpy(cdr.LocL, buf, 8); UpdateSubq(cdr.SetSectorPlay); + cdr.DriveState = DRIVESTATE_STANDBY; cdr.TrackChanged = FALSE; cdr.LastReadSeekCycles = psxRegs.cycle; break; @@ -1198,6 +1211,7 @@ void cdrInterrupt(void) { cdr.LocL[0] = LOCL_INVALID; cdr.SubqForwardSectors = 1; cdr.sectorsRead = 0; + cdr.DriveState = DRIVESTATE_SEEK; cycles = (cdr.Mode & MODE_SPEED) ? cdReadTime : cdReadTime * 2; cycles += seekTime; @@ -1344,6 +1358,7 @@ static void cdrReadInterrupt(void) // note: CdlGetlocL should work as soon as STATUS_READ is indicated SetPlaySeekRead(cdr.StatP, STATUS_READ | STATUS_ROTATING); + cdr.DriveState = DRIVESTATE_PLAY_READ; cdr.sectorsRead++; read_ok = ReadTrack(cdr.SetSectorPlay); @@ -1355,6 +1370,7 @@ static void cdrReadInterrupt(void) if (!read_ok) { CDR_LOG_I("cdrReadInterrupt() Log: err\n"); cdrReadInterruptSetResult(cdr.StatP | STATUS_ERROR); + cdr.DriveState = DRIVESTATE_PAUSED; // ? return; } memcpy(cdr.LocL, buf, 8); From 1d94bcebe550102471628f41282c2df6753078a6 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 13 Nov 2023 22:07:12 +0200 Subject: [PATCH 463/597] rearrange cycle hack table --- libpcsxcore/database.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 2acd67541..6183db2c1 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -67,40 +67,31 @@ hack_db[] = static const struct { - const char * const id; int mult; + const char * const id[4]; } cycle_multiplier_overrides[] = { /* note: values are = (10000 / gui_option) */ /* Internal Section - fussy about timings */ - { "SLPS01868", 202 }, + { 202, { "SLPS01868" } }, /* Super Robot Taisen Alpha - on the edge with 175, * changing memcard settings is enough to break/unbreak it */ - { "SLPS02528", 190 }, - { "SLPS02636", 190 }, + { 190, { "SLPS02528", "SLPS02636" } }, /* Brave Fencer Musashi - cd sectors arrive too fast */ - { "SLUS00726", 170 }, - { "SLPS01490", 170 }, + { 170, { "SLUS00726", "SLPS01490" } }, #if defined(DRC_DISABLE) || defined(LIGHTREC) /* new_dynarec has a hack for this game */ /* Parasite Eve II - internal timer checks */ - { "SLUS01042", 125 }, - { "SLUS01055", 125 }, - { "SLES02558", 125 }, - { "SLES12558", 125 }, + { 125, { "SLUS01042", "SLUS01055", "SLES02558", "SLES12558" } }, #endif /* Discworld Noir - audio skips if CPU runs too fast */ - { "SLES01549", 222 }, - { "SLES02063", 222 }, - { "SLES02064", 222 }, + { 222, { "SLES01549", "SLES02063", "SLES02064" } }, /* Judge Dredd - could also be poor MDEC timing */ - { "SLUS00630", 128 }, - { "SLES00755", 128 }, + { 128, { "SLUS00630", "SLES00755" } }, /* Digimon World */ - { "SLUS01032", 153 }, - { "SLES02914", 153 }, + { 153, { "SLUS01032", "SLES02914" } }, /* Syphon Filter - reportedly hangs under unknown conditions */ - { "SCUS94240", 169 }, + { 169, { "SCUS94240" } }, }; /* Function for automatic patching according to GameID. */ @@ -141,7 +132,11 @@ void Apply_Hacks_Cdrom(void) for (i = 0; i < ARRAY_SIZE(cycle_multiplier_overrides); i++) { - if (strcmp(CdromId, cycle_multiplier_overrides[i].id) == 0) + const char * const * const ids = cycle_multiplier_overrides[i].id; + for (j = 0; j < ARRAY_SIZE(cycle_multiplier_overrides[i].id); j++) + if (ids[j] && strcmp(ids[j], CdromId) == 0) + break; + if (j < ARRAY_SIZE(cycle_multiplier_overrides[i].id)) { Config.cycle_multiplier_override = cycle_multiplier_overrides[i].mult; new_dynarec_hacks_pergame |= NDHACK_OVERRIDE_CYCLE_M; From 3de08a09ec3cd4d9573b2c5826dc3c05fe7f5852 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 14 Nov 2023 00:46:38 +0200 Subject: [PATCH 464/597] yet more timing hacks libretro/pcsx_rearmed#107 --- libpcsxcore/database.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 6183db2c1..ebdb69b64 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -92,6 +92,9 @@ cycle_multiplier_overrides[] = { 153, { "SLUS01032", "SLES02914" } }, /* Syphon Filter - reportedly hangs under unknown conditions */ { 169, { "SCUS94240" } }, + /* Psychic Detective - some weird race condition in the game's cdrom code */ + { 222, { "SLUS00165", "SLUS00166", "SLUS00167" } }, + { 222, { "SLES00070", "SLES10070", "SLES20070" } }, }; /* Function for automatic patching according to GameID. */ From 20ed712f92d53fa7d51a4b805af997f5770077ae Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 16 Nov 2023 21:07:21 +0200 Subject: [PATCH 465/597] spu: patch up more savestate issues notaz/pcsx_rearmed#329 --- plugins/dfsound/freeze.c | 36 +++++++++++++++++++++++++++++++++--- plugins/dfsound/registers.c | 3 ++- plugins/dfsound/reverb.c | 2 +- plugins/dfsound/spu.c | 4 ++-- plugins/dfsound/xa.c | 4 +++- 5 files changed, 41 insertions(+), 8 deletions(-) diff --git a/plugins/dfsound/freeze.c b/plugins/dfsound/freeze.c index 8816a51ce..32c07dbde 100644 --- a/plugins/dfsound/freeze.c +++ b/plugins/dfsound/freeze.c @@ -130,6 +130,14 @@ typedef struct SPUCHAN_orig s_chan[MAXCHAN]; + uint32_t cycles_dma_end; + uint32_t decode_dirty_ch; + uint32_t dwNoiseVal; + uint32_t dwNoiseCount; + uint32_t XARepeat; + uint32_t XALastVal; + uint32_t last_keyon_cycles; + } SPUOSSFreeze_t; //////////////////////////////////////////////////////////////////////// @@ -250,6 +258,8 @@ long CALLBACK SPUfreeze(uint32_t ulFreezeMode, SPUFreeze_t * pF, if(ulFreezeMode==2) return 1; // info mode? ok, bye // save mode: + regAreaGet(H_SPUctrl) = spu.spuCtrl; + regAreaGet(H_SPUstat) = spu.spuStat; memcpy(pF->cSPURam,spu.spuMem,0x80000); // copy common infos memcpy(pF->cSPUPort,spu.regArea,0x200); @@ -312,6 +322,8 @@ long CALLBACK SPUfreeze(uint32_t ulFreezeMode, SPUFreeze_t * pF, memcpy(spu.spuMem,pF->cSPURam,0x80000); // get ram memcpy(spu.regArea,pF->cSPUPort,0x200); spu.bMemDirty = 1; + spu.spuCtrl = regAreaGet(H_SPUctrl); + spu.spuStat = regAreaGet(H_SPUstat); if (!strcmp(pF->szSPUName,"PBOSS") && pF->ulFreezeVersion==5) pFO = LoadStateV5(pF, cycles); @@ -329,18 +341,35 @@ long CALLBACK SPUfreeze(uint32_t ulFreezeMode, SPUFreeze_t * pF, FeedCDDA((void *)pF->xaS.pcm, pFO->cdda_left * 4); } + // not in old savestates + spu.cycles_dma_end = 0; + spu.decode_dirty_ch = spu.dwChannelsAudible & 0x0a; + spu.dwNoiseVal = 0; + spu.dwNoiseCount = 0; + spu.XARepeat = 0; + spu.XALastVal = 0; + spu.last_keyon_cycles = cycles - 16*786u; + if (pFO && pF->ulFreezeSize >= sizeof(*pF) + sizeof(*pFO)) { + spu.cycles_dma_end = pFO->cycles_dma_end; + spu.decode_dirty_ch = pFO->decode_dirty_ch; + spu.dwNoiseVal = pFO->dwNoiseVal; + spu.dwNoiseCount = pFO->dwNoiseCount; + spu.XARepeat = pFO->XARepeat; + spu.XALastVal = pFO->XALastVal; + spu.last_keyon_cycles = pFO->last_keyon_cycles; + } + // repair some globals for(i=0;i<=62;i+=2) load_register(H_Reverb+i, cycles); load_register(H_SPUReverbAddr, cycles); load_register(H_SPUrvolL, cycles); load_register(H_SPUrvolR, cycles); - - load_register(H_SPUctrl, cycles); - load_register(H_SPUstat, cycles); load_register(H_CDLeft, cycles); load_register(H_CDRight, cycles); + // reverb + spu.rvb->StartAddr = regAreaGet(H_SPUReverbAddr) << 2; if (spu.rvb->CurrAddr < spu.rvb->StartAddr) spu.rvb->CurrAddr = spu.rvb->StartAddr; // fix to prevent new interpolations from crashing @@ -383,6 +412,7 @@ static SPUOSSFreeze_t * LoadStateV5(SPUFreeze_t * pF, uint32_t cycles) spu.s_chan[i].pCurr+=(uintptr_t)spu.spuMemC; spu.s_chan[i].pLoop+=(uintptr_t)spu.spuMemC; } + return pFO; } diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index 5d79f251f..1e79c0e6b 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -144,6 +144,7 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, break; //-------------------------------------------------// case H_SPUctrl: + spu.spuStat = (spu.spuStat & ~0x3f) | (val & 0x3f); spu.spuStat &= ~STAT_IRQ | val; if (!(spu.spuCtrl & CTRL_IRQ)) { if (val & CTRL_IRQ) @@ -365,7 +366,7 @@ unsigned short CALLBACK SPUreadRegister(unsigned long reg, unsigned int cycles) return spu.spuCtrl; case H_SPUstat: - return (spu.spuStat & ~0x3F) | (spu.spuCtrl & 0x3F); + return spu.spuStat; case H_SPUaddr: return (unsigned short)(spu.spuAddr>>3); diff --git a/plugins/dfsound/reverb.c b/plugins/dfsound/reverb.c index de9b804af..c0ecea1da 100644 --- a/plugins/dfsound/reverb.c +++ b/plugins/dfsound/reverb.c @@ -177,7 +177,7 @@ static void REVERBPrep(void) REVERBInfo *rvb = spu.rvb; int space, t; - t = spu.regArea[(H_SPUReverbAddr - 0xc00) >> 1]; + t = regAreaGet(H_SPUReverbAddr); if (t == 0xFFFF || t <= 0x200) spu.rvb->StartAddr = spu.rvb->CurrAddr = 0; else if (spu.rvb->StartAddr != (t << 2)) diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index f2023881b..7501df8b3 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1147,7 +1147,7 @@ void do_samples(unsigned int cycles_to, int do_direct) cycle_diff = cycles_to - spu.cycles_played; if (cycle_diff < -2*1048576 || cycle_diff > 2*1048576) { - //xprintf("desync %u %d\n", cycles_to, cycle_diff); + log_unhandled("desync %u %d\n", cycles_to, cycle_diff); spu.cycles_played = cycles_to; return; } @@ -1164,7 +1164,7 @@ void do_samples(unsigned int cycles_to, int do_direct) ns_to = (cycle_diff / 768 + 1) & ~1; if (ns_to > NSSIZE) { // should never happen - //xprintf("ns_to oflow %d %d\n", ns_to, NSSIZE); + log_unhandled("ns_to oflow %d %d\n", ns_to, NSSIZE); ns_to = NSSIZE; } diff --git a/plugins/dfsound/xa.c b/plugins/dfsound/xa.c index 08afc0099..b3ac01d94 100644 --- a/plugins/dfsound/xa.c +++ b/plugins/dfsound/xa.c @@ -413,8 +413,10 @@ void FeedCDDA(unsigned char *pcm, int nBytes) { int space; space=(spu.CDDAPlay-spu.CDDAFeed-1)*4 & (CDDA_BUFFER_SIZE - 1); - if(space0) { From b71d436a54459688bf637e98eaf371def182ac9c Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 16 Nov 2023 23:23:05 +0200 Subject: [PATCH 466/597] psxbios: handle direct bios table calls libretro/pcsx_rearmed#797 --- libpcsxcore/psxbios.c | 96 +++++++++++++++++++++++++++++++++---------- libpcsxcore/psxhle.h | 4 +- 2 files changed, 78 insertions(+), 22 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 3671af5a0..56239f20c 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -303,10 +303,10 @@ static u32 floodchk; #define A_CD_EVENTS 0xb9b8 #define A_EXC_GP 0xf450 -#define A_A0_DUMMY 0x1010 -#define A_B0_DUMMY 0x2010 -#define A_C0_DUMMY 0x3010 -#define A_B0_5B_DUMMY 0x43d0 +#define A_A0_TRAPS 0x1010 +#define A_B0_TRAPS 0x2010 +#define A_C0_TRAPS 0x3010 +#define A_B0_5B_TRAP 0x43d0 #define HLEOP(n) SWAPu32((0x3b << 26) | (n)); @@ -3752,18 +3752,25 @@ void psxBiosInit() { // fill the api jumptables with fake entries as some games patch them // (or rather the funcs listed there) + // also trap the destination as some "Cheats Edition" thing overrides the + // dispatcher with a wrapper and then jumps to the table entries directly ptr = (u32 *)&psxM[A_A0_TABLE]; - for (i = 0; i < 256; i++) - ptr[i] = SWAP32(A_A0_DUMMY); - + for (i = 0; i < 256; i++) { + ptr[i] = SWAP32(A_A0_TRAPS + i*4); + ram32[A_A0_TRAPS/4 + i] = HLEOP(hleop_a0t); + } ptr = (u32 *)&psxM[A_B0_TABLE]; - for (i = 0; i < 256; i++) - ptr[i] = SWAP32(A_B0_DUMMY); + for (i = 0; i < 256; i++) { + ptr[i] = SWAP32(A_B0_TRAPS + i*4); + ram32[A_B0_TRAPS/4 + i] = HLEOP(hleop_b0t); + } // B(5b) is special because games patch (sometimes even jump to) // code at fixed offsets from it, nocash lists offsets: // patch: +3d8, +4dc, +594, +62c, +9c8, +1988 // call: +7a0=4b70, +884=4c54, +894=4c64 - ptr[0x5b] = SWAP32(A_B0_5B_DUMMY); // 0x43d0 + ptr[0x5b] = SWAP32(A_B0_5B_TRAP); // 0x43d0 + ram32[A_B0_5B_TRAP/4] = HLEOP(hleop_b0t); + ram32[0x4b70/4] = SWAP32(0x03e00008); // jr $ra // setPadOutputBuf ram32[0x4c54/4] = SWAP32(0x240e0001); // mov $t6, 1 @@ -3774,15 +3781,17 @@ void psxBiosInit() { ram32[0x4c68/4] = SWAP32(0xac000000 + A_PAD_IRQR_ENA); // sw $0, ... ptr = (u32 *)&psxM[A_C0_TABLE]; - for (i = 0; i < 256/2; i++) - ptr[i] = SWAP32(A_C0_DUMMY); + for (i = 0; i < 256/2; i++) { + ptr[i] = SWAP32(A_C0_TRAPS + i*4); + ram32[A_C0_TRAPS/4 + i] = HLEOP(hleop_c0t); + } ptr[6] = SWAP32(A_EXCEPTION); // more HLE traps - ram32[A_A0_DUMMY/4] = HLEOP(hleop_dummy); - ram32[A_B0_DUMMY/4] = HLEOP(hleop_dummy); - ram32[A_C0_DUMMY/4] = HLEOP(hleop_dummy); - ram32[A_B0_5B_DUMMY/4] = HLEOP(hleop_dummy); + ram32[A_A0_TRAPS/4 - 1] = HLEOP(hleop_dummy); + ram32[A_B0_TRAPS/4 - 1] = HLEOP(hleop_dummy); + ram32[A_C0_TRAPS/4 - 1] = HLEOP(hleop_dummy); + ram32[0x7ffc/4] = HLEOP(hleop_dummy); ram32[0x8000/4] = HLEOP(hleop_execret); ram32[A_EEXIT_PTR/4] = SWAP32(A_EEXIT_DEF); @@ -4097,7 +4106,8 @@ static void hleA0() { u32 call = t1 & 0xff; u32 entry = loadRam32(A_A0_TABLE + call * 4); - if (call < 192 && entry != A_A0_DUMMY) { + use_cycles(4+7); + if (call < 192 && entry != A_A0_TRAPS + call * 4) { PSXBIOS_LOG("custom A%02x %s(0x%x, ) addr=%08x ra=%08x\n", call, biosA0n[call], a0, entry, ra); softCall(entry); @@ -4116,10 +4126,11 @@ static void hleB0() { u32 entry = loadRam32(A_B0_TABLE + call * 4); int is_custom = 0; + use_cycles(4+7); if (call == 0x5b) - is_custom = entry != A_B0_5B_DUMMY; + is_custom = entry != A_B0_5B_TRAP; else - is_custom = entry != A_B0_DUMMY; + is_custom = entry != A_B0_TRAPS + call * 4; if (is_custom) { PSXBIOS_LOG("custom B%02x %s(0x%x, ) addr=%08x ra=%08x\n", call, biosB0n[call], a0, entry, ra); @@ -4138,7 +4149,8 @@ static void hleC0() { u32 call = t1 & 0xff; u32 entry = loadRam32(A_C0_TABLE + call * 4); - if (call < 128 && entry != A_C0_DUMMY) { + use_cycles(4+7); + if (call < 128 && entry != A_C0_TRAPS + call * 4) { PSXBIOS_LOG("custom C%02x %s(0x%x, ) addr=%08x ra=%08x\n", call, biosC0n[call], a0, entry, ra); softCall(entry); @@ -4152,6 +4164,47 @@ static void hleC0() { psxBranchTest(); } +static void hleA0t() { + u32 call = (pc0 - A_A0_TRAPS) / 4 - 1; + if (call >= 256u || !biosA0[call]) { + log_unhandled("unexpected A trap @%08x ra=%08x\n", pc0 - 4, ra); + mips_return_void_c(1000); + } + else + biosA0[call](); + + //printf("A(%02x) -> %x\n", call, v0); + psxBranchTest(); +} + +static void hleB0t() { + u32 call = (pc0 - A_B0_TRAPS) / 4 - 1; + if (pc0 - 4 == A_B0_5B_TRAP) + call = 0x5b; + if (call >= 256u || !biosB0[call]) { + log_unhandled("unexpected B trap @%08x ra=%08x\n", pc0 - 4, ra); + mips_return_void_c(1000); + } + else + biosB0[call](); + + //printf("B(%02x) -> %x\n", call, v0); + psxBranchTest(); +} + +static void hleC0t() { + u32 call = (pc0 - A_C0_TRAPS) / 4 - 1; + if (call >= 128u || !biosC0[call]) { + log_unhandled("unexpected C trap @%08x ra=%08x\n", pc0 - 4, ra); + mips_return_void_c(1000); + } + else + biosC0[call](); + + //printf("C(%02x) -> %x\n", call, v0); + psxBranchTest(); +} + // currently not used static void hleBootstrap() { CheckCdrom(); @@ -4173,7 +4226,7 @@ static void hleExecRet() { psxRegs.pc = ra; } -void (* const psxHLEt[24])() = { +void (* const psxHLEt[hleop_count_])() = { hleDummy, hleA0, hleB0, hleC0, hleBootstrap, hleExecRet, psxBiosException, hleDummy, hleExc0_0_1, hleExc0_0_2, @@ -4184,6 +4237,7 @@ void (* const psxHLEt[24])() = { hleExc1_3_1, hleExc1_3_2, hleExc3_0_2_defint, hleExcPadCard1, hleExcPadCard2, + hleA0t, hleB0t, hleC0t, }; void psxBiosCheckExe(u32 t_addr, u32 t_size, int loading_state) diff --git a/libpcsxcore/psxhle.h b/libpcsxcore/psxhle.h index e6d2df813..a94d694fe 100644 --- a/libpcsxcore/psxhle.h +++ b/libpcsxcore/psxhle.h @@ -39,9 +39,11 @@ enum hle_op { hleop_exc1_3_1, hleop_exc1_3_2, hleop_exc3_0_2, hleop_exc_padcard1, hleop_exc_padcard2, + hleop_a0t, hleop_b0t, hleop_c0t, + hleop_count_ // must be last }; -extern void (* const psxHLEt[24])(); +extern void (* const psxHLEt[hleop_count_])(); #ifdef __cplusplus } From f993417941fb79ae2f22f72e0dc131cd3710a3af Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 17 Nov 2023 01:08:45 +0200 Subject: [PATCH 467/597] psxbios: implement strtol libretro/pcsx_rearmed#288 --- libpcsxcore/psxbios.c | 73 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 9 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 56239f20c..e654f7c9c 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -532,12 +532,8 @@ void psxBios_putc(void) // 0x09, 0x3B pc0 = ra; } -void psxBios_todigit(void) // 0x0a +static u32 do_todigit(u32 c) { - int c = a0; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x0a]); -#endif c &= 0xFF; if (c >= 0x30 && c < 0x3A) { c -= 0x30; @@ -549,14 +545,73 @@ void psxBios_todigit(void) // 0x0a c = c - 0x41 + 10; } else if (c >= 0x80) { + log_unhandled("todigit %02x\n", c); c = -1; } else { c = 0x0098967F; } - v0 = c; - pc0 = ra; + use_cycles(40); + return c; +} + +static void psxBios_todigit(void) // 0x0a +{ + mips_return(do_todigit(a0)); + PSXBIOS_LOG("psxBios_%s '%c' -> %u\n", biosA0n[0x0a], a0, v0); +} + +static void do_strtol(char *p, void *end_, u32 base, int can_neg) { + s32 n = 0, f = 0, t; + u32 *end = end_; + + use_cycles(12); + if (p == INVALID_PTR) { + mips_return(0); + return; + } + + for (; (0x09 <= *p && *p <= '\r') || *p == ' '; p++) + use_cycles(9); + if (can_neg) { + for (; *p == '-'; f = 1, p++) + use_cycles(4); + } + if (base == 0 || base > 36) + base = 10; + if (*p == '0') { + switch (*p++) { + case 'b': case 'B': base = 2; break; + case 'x': case 'X': base = 16; break; + } + } + else if (*p == 'o' || *p == 'O') { + base = 8; + p++; + } + + for (; (t = do_todigit(*p)) < base; p++) { + n = n * base + t; + use_cycles(12); + } + + n = (f ? -n : n); + if (end != INVALID_PTR) + *end = SWAP32(a0 + (p - Ra0)); + mips_return_c(n, 100); +} + +static void psxBios_strtoul() { // 0x0c + do_strtol(a0 ? Ra0 : INVALID_PTR, a1 ? Ra1 : INVALID_PTR, a2, 0); + PSXBIOS_LOG("psxBios_%s %s (%x), %x, %x -> 0x%x\n", + biosA0n[0x0c], a0 ? Ra0 : NULL, a0, a1, a2, v0); +} + +static void psxBios_strtol() { // 0x0d + do_strtol(a0 ? Ra0 : INVALID_PTR, a1 ? Ra1 : INVALID_PTR, a2, 1); + PSXBIOS_LOG("psxBios_%s %s (%x), %x, %x -> 0x%x\n", + biosA0n[0x0d], a0 ? Ra0 : NULL, a0, a1, a2, v0); } void psxBios_abs() { // 0x0e @@ -3382,8 +3437,8 @@ void psxBiosInit() { biosA0[0x09] = psxBios_putc; biosA0[0x0a] = psxBios_todigit; //biosA0[0x0b] = psxBios_atof; - //biosA0[0x0c] = psxBios_strtoul; - //biosA0[0x0d] = psxBios_strtol; + biosA0[0x0c] = psxBios_strtoul; + biosA0[0x0d] = psxBios_strtol; biosA0[0x0e] = psxBios_abs; biosA0[0x0f] = psxBios_labs; biosA0[0x10] = psxBios_atoi; From f926a62f27a3a513b8a4594728a8d6ace65920a3 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 19 Nov 2023 23:45:55 +0200 Subject: [PATCH 468/597] spu: adjust dma timing + a hack libretro/pcsx_rearmed#799 --- libpcsxcore/psxdma.c | 4 ++-- plugins/dfsound/dma.c | 27 ++++++++++++++++++++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index fa8f33940..e3655b5e0 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -67,7 +67,7 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU // This should be much slower, like 12+ cycles/byte, it's like // that because the CPU runs too fast and fifo is not emulated. // See also set_dma_end(). - set_event(PSXINT_SPUDMA, words * 4); + set_event(PSXINT_SPUDMA, words * 4 * 4); return; case 0x01000200: //spu to cpu transfer @@ -78,7 +78,7 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU psxCpu->Clear(madr, words_copy); HW_DMA4_MADR = SWAPu32(madr + words_copy * 4); - set_event(PSXINT_SPUDMA, words * 4); + set_event(PSXINT_SPUDMA, words * 4 * 4); return; default: diff --git a/plugins/dfsound/dma.c b/plugins/dfsound/dma.c index 25a0aefd5..fde1f835b 100644 --- a/plugins/dfsound/dma.c +++ b/plugins/dfsound/dma.c @@ -51,7 +51,7 @@ void CALLBACK SPUreadDMAMem(unsigned short *pusPSXMem, int iSize, addr &= 0x7fffe; } if ((spu.spuCtrl & CTRL_IRQ) && irq_after < iSize * 2) { - log_unhandled("rdma spu irq: %x/%x+%x\n", irq_addr, spu.spuAddr, iSize * 2); + log_unhandled("rdma spu irq: %x/%x-%x\n", irq_addr, spu.spuAddr, addr); do_irq_io(irq_after); } spu.spuAddr = addr; @@ -68,7 +68,7 @@ void CALLBACK SPUwriteDMAMem(unsigned short *pusPSXMem, int iSize, unsigned int addr = spu.spuAddr, irq_addr = regAreaGet(H_SPUirqAddr) << 3; int i, irq_after; - do_samples_if_needed(cycles, 1, 2); + do_samples_if_needed(cycles + iSize*2 * 4, 1, 2); irq_after = (irq_addr - addr) & 0x7ffff; spu.bMemDirty = 1; @@ -87,11 +87,28 @@ void CALLBACK SPUwriteDMAMem(unsigned short *pusPSXMem, int iSize, } } if ((spu.spuCtrl & CTRL_IRQ) && irq_after < iSize * 2) { - log_unhandled("wdma spu irq: %x/%x+%x (%u)\n", - irq_addr, spu.spuAddr, iSize * 2, irq_after); + log_unhandled("%u wdma spu irq: %x/%x-%x (%u)\n", + cycles, irq_addr, spu.spuAddr, addr, irq_after); // this should be consistent with psxdma.c timing // might also need more delay like in set_dma_end() - do_irq_io(irq_after); + do_irq_io(irq_after * 4); + } + for (i = 0; i < 24; i++) { + size_t ediff, p = spu.s_chan[i].pCurr - spu.spuMemC; + if (spu.s_chan[i].ADSRX.State == ADSR_RELEASE && !spu.s_chan[i].ADSRX.EnvelopeVol) + continue; + ediff = addr - p; + if (spu.spuAddr < p && p < spu.spuAddr + iSize * 2) { + log_unhandled("%u spu ch%02d play %zx dma %x-%x (%zd)\n", + cycles, i, p, spu.spuAddr, addr, ediff); + //exit(1); + } + // a hack for the super annoying timing issues in The Emperor's New Groove + // (which is a game bug, but tends to trigger more here) + if (ediff <= 0x20u) { + spu.s_chan[i].pCurr += ediff; + break; + } } spu.spuAddr = addr; set_dma_end(iSize, cycles); From 36c76c861a26d2d2ce6118df1b20a389b8f3afb1 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 19 Nov 2023 12:50:39 +0100 Subject: [PATCH 469/597] Makefile: Enable address sanitizer with DEBUG_ASAN=1 The Address Sanitizer (built in GCC and LLVM) is an extremely powerful tool, moreso than e.g. Valgrind for detecting memory issues. It is not available on every architecture/OS combination (see https://github.com/google/sanitizers/wiki/AddressSanitizer), so it can only be enabled by compiling with DEBUG_ASAN=1. Signed-off-by: Paul Cercueil --- Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index af3e1ab60..6ded0df00 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,9 @@ CFLAGS += -Wall -ggdb -Iinclude -ffast-math ifndef DEBUG CFLAGS += -O2 -DNDEBUG endif +ifeq ($(DEBUG_ASAN), 1) +CFLAGS += -fsanitize=address +endif CFLAGS += -DP_HAVE_MMAP=$(if $(NO_MMAP),0,1) \ -DP_HAVE_PTHREAD=$(if $(NO_PTHREAD),0,1) \ -DP_HAVE_POSIX_MEMALIGN=$(if $(NO_POSIX_MEMALIGN),0,1) \ @@ -36,6 +39,9 @@ endif CC_LINK ?= $(CC) CC_AS ?= $(CC) LDFLAGS += $(MAIN_LDFLAGS) +ifeq ($(DEBUG_ASAN), 1) +LDFLAGS += -static-libasan +endif EXTRA_LDFLAGS ?= -Wl,-Map=$@.map LDLIBS += $(MAIN_LDLIBS) ifdef PCNT From 5338a93079458c4437138830ef711f61c2feabe6 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 19 Nov 2023 12:54:22 +0100 Subject: [PATCH 470/597] frontend: Fix buffer overrun The format passed to sscanf() requested a 256-chars string, but that's excluding the terminating \0 character as the 257th character. Signed-off-by: Paul Cercueil --- frontend/menu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/menu.c b/frontend/menu.c index 9200e10de..413f3caf8 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -217,7 +217,7 @@ static int optional_cdimg_filter(struct dirent **namelist, int count, const char *basedir) { const char *ext, *p; - char buf[256], buf2[256]; + char buf[256], buf2[257]; int i, d, ret, good_cue; struct STAT statf; FILE *f; From e5241564f4444496f51f4de9bf533b1d910449ec Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 20 Nov 2023 02:17:24 +0200 Subject: [PATCH 471/597] defaut dualshock to digital again, unless overriden ... or a key combo is used libretro/pcsx_rearmed#765 --- frontend/libretro.c | 36 +++++++++++++++++++++++++++++--- frontend/libretro_core_options.h | 14 +++++++++++++ include/psemu_plugin_defs.h | 2 +- libpcsxcore/database.c | 13 ++++++++++++ libpcsxcore/plugins.c | 20 ++++++++++-------- libpcsxcore/plugins.h | 1 + libpcsxcore/psxcommon.h | 1 + 7 files changed, 74 insertions(+), 13 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index c47d260b0..7f331574d 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -145,7 +145,9 @@ int in_mouse[8][2]; int multitap1 = 0; int multitap2 = 0; int in_enable_vibration = 1; -int in_enable_crosshair[2] = { 0, 0 }; +static int in_enable_crosshair[2] = { 0, 0 }; +static bool in_dualshock_toggle_enable = 0; +static bool in_dualshock_toggling = 0; // NegCon adjustment parameters // > The NegCon 'twist' action is somewhat awkward when mapped @@ -2011,6 +2013,14 @@ static void update_variables(bool in_flight) in_enable_vibration = 1; } + var.value = NULL; + var.key = "pcsx_rearmed_analog_toggle"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + in_dualshock_toggle_enable = (strcmp(var.value, "enabled") == 0); + } + var.value = NULL; var.key = "pcsx_rearmed_dithering"; @@ -2863,10 +2873,14 @@ static void update_input_mouse(int port, int ret) static void update_input(void) { - // reset all keystate, query libretro for keystate + int16_t analog_combo = + (1 << RETRO_DEVICE_ID_JOYPAD_L) | + (1 << RETRO_DEVICE_ID_JOYPAD_R) | + (1 << RETRO_DEVICE_ID_JOYPAD_SELECT); int i; int j; + // reset all keystate, query libretro for keystate for (i = 0; i < PORTS_NUMBER; i++) { int16_t ret = 0; @@ -2903,7 +2917,23 @@ static void update_input(void) update_input_mouse(i, ret); break; default: - // Query digital inputs + // dualshock ANALOG toggle? + if (type == PSE_PAD_TYPE_ANALOGPAD && in_dualshock_toggle_enable + && (ret & analog_combo) == analog_combo) + { + if (!in_dualshock_toggling) + { + int state = padToggleAnalog(i); + char msg[32]; + snprintf(msg, sizeof(msg), "ANALOG %s", state ? "ON" : "OFF"); + show_notification(msg, 800, 1); + in_dualshock_toggling = true; + } + return; + } + in_dualshock_toggling = false; + + // Set digital inputs for (j = 0; j < RETRO_PSX_MAP_LEN; j++) if (ret & (1 << j)) in_keystate[i] |= retro_psx_map[j]; diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 5ec62f4b9..85771e2ce 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -853,6 +853,20 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "enabled", }, + { + "pcsx_rearmed_analog_toggle", + "DualShock Analog Mode Toggle", + NULL, + "When the input device type is DualShock, this option allows the emulated DualShock to be toggled between DIGITAL and ANALOG mode like original hardware. The button combination is L1 + R1 + Select.", + NULL, + "input", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "enabled", + }, { "pcsx_rearmed_multitap", "Multitap Mode", diff --git a/include/psemu_plugin_defs.h b/include/psemu_plugin_defs.h index 3f4d21b2a..6136ca703 100644 --- a/include/psemu_plugin_defs.h +++ b/include/psemu_plugin_defs.h @@ -228,7 +228,7 @@ typedef struct unsigned char padMode; // 0 : digital 1: analog unsigned char cmd4dConfig[6]; unsigned int lastUseFrame; - unsigned int digitalModeFrames; + unsigned int unused; unsigned char configModeUsed; unsigned char padding[3]; } ds; diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index ebdb69b64..7b860e346 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -47,6 +47,12 @@ static const char * const gpu_centering_hack_db[] = "SLPM86009", }; +static const char * const dualshock_init_analog_hack_db[] = +{ + /* Formula 1 Championship Edition */ + "SLUS00546", +}; + #define HACK_ENTRY(var, list) \ { #var, &Config.hacks.var, list, ARRAY_SIZE(list) } @@ -63,6 +69,7 @@ hack_db[] = HACK_ENTRY(gpu_slow_list_walking, gpu_slow_llist_db), HACK_ENTRY(gpu_busy, gpu_busy_hack_db), HACK_ENTRY(gpu_centering, gpu_centering_hack_db), + HACK_ENTRY(dualshock_init_analog, dualshock_init_analog_hack_db), }; static const struct @@ -116,6 +123,12 @@ void Apply_Hacks_Cdrom(void) } } + if (Config.hacks.dualshock_init_analog) { + // assume the default is off, see LoadPAD1plugin() + for (i = 0; i < 8; i++) + padToggleAnalog(i); + } + /* Apply Memory card hack for Codename Tenka. (The game needs one of the memory card slots to be empty) */ for (i = 0; i < ARRAY_SIZE(MemorycardHack_db); i++) { diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index d44442b01..75e1f5fbe 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -490,15 +490,8 @@ static void initBufForRequest(int padIndex, char value) { return; } - // switch to analog mode automatically after the game finishes init - if (value == 0x42 && pads[padIndex].ds.padMode == 0) - pads[padIndex].ds.digitalModeFrames++; - if (pads[padIndex].ds.digitalModeFrames == 60*4) { - pads[padIndex].ds.padMode = 1; - pads[padIndex].ds.digitalModeFrames = 0; - } - - if ((u32)(frame_counter - pads[padIndex].ds.lastUseFrame) > 60u) + if ((u32)(frame_counter - pads[padIndex].ds.lastUseFrame) > 60u + && !Config.hacks.dualshock_init_analog) pads[padIndex].ds.padMode = 0; // according to nocash pads[padIndex].ds.lastUseFrame = frame_counter; @@ -991,6 +984,15 @@ int padFreeze(void *f, int Mode) { return 0; } +int padToggleAnalog(unsigned int index) +{ + int r = -1; + + if (index < sizeof(pads) / sizeof(pads[0])) + r = (pads[index].ds.padMode ^= 1); + return r; +} + void *hNETDriver = NULL; diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index 269ef18a5..772452d09 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -384,6 +384,7 @@ boolean UsingIso(void); void SetCdOpenCaseTime(s64 time); int padFreeze(void *f, int Mode); +int padToggleAnalog(unsigned int index); extern void pl_gun_byte2(int port, unsigned char byte); extern void plat_trigger_vibrate(int pad, int low, int high); diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index 09fb39a4e..01b2a9aa9 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -152,6 +152,7 @@ typedef struct { boolean gpu_slow_list_walking; boolean gpu_busy; boolean gpu_centering; + boolean dualshock_init_analog; } hacks; } PcsxConfig; From 3c98e40014d5808f23fc70cc231cb257ff1d50ea Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 20 Nov 2023 02:39:56 +0200 Subject: [PATCH 472/597] update analog default list --- libpcsxcore/database.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 7b860e346..640f57a23 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -51,6 +51,8 @@ static const char * const dualshock_init_analog_hack_db[] = { /* Formula 1 Championship Edition */ "SLUS00546", + /* Gran Turismo 2 */ + "SCUS94455", "SCUS94488", "SCPS10116", "SCPS10117", "SCES02380", "SCES-12380", }; #define HACK_ENTRY(var, list) \ From a3d87cd770ffb9b5bcbf519683ec7f29d67794e6 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 20 Nov 2023 23:06:31 +0200 Subject: [PATCH 473/597] libretro: add a few options for the analog combo like in the other core --- frontend/libretro.c | 38 +++++++++++++++++++++++--------- frontend/libretro_core_options.h | 14 +++++++----- libpcsxcore/database.c | 2 +- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 7f331574d..80dc8a9da 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -146,8 +146,8 @@ int multitap1 = 0; int multitap2 = 0; int in_enable_vibration = 1; static int in_enable_crosshair[2] = { 0, 0 }; -static bool in_dualshock_toggle_enable = 0; -static bool in_dualshock_toggling = 0; +static int in_dualshock_analog_combo = 0; +static bool in_dualshock_toggling = false; // NegCon adjustment parameters // > The NegCon 'twist' action is somewhat awkward when mapped @@ -2014,11 +2014,27 @@ static void update_variables(bool in_flight) } var.value = NULL; - var.key = "pcsx_rearmed_analog_toggle"; + var.key = "pcsx_rearmed_analog_combo"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - in_dualshock_toggle_enable = (strcmp(var.value, "enabled") == 0); + if (strcmp(var.value, "l1+r1+select") == 0) + in_dualshock_analog_combo = (1 << RETRO_DEVICE_ID_JOYPAD_L) | + (1 << RETRO_DEVICE_ID_JOYPAD_R) | (1 << RETRO_DEVICE_ID_JOYPAD_SELECT); + else if (strcmp(var.value, "l1+r1+start") == 0) + in_dualshock_analog_combo = (1 << RETRO_DEVICE_ID_JOYPAD_L) | + (1 << RETRO_DEVICE_ID_JOYPAD_R) | (1 << RETRO_DEVICE_ID_JOYPAD_START); + else if (strcmp(var.value, "l1+r1+l3") == 0) + in_dualshock_analog_combo = (1 << RETRO_DEVICE_ID_JOYPAD_L) | + (1 << RETRO_DEVICE_ID_JOYPAD_R) | (1 << RETRO_DEVICE_ID_JOYPAD_L3); + else if (strcmp(var.value, "l1+r1+r3") == 0) + in_dualshock_analog_combo = (1 << RETRO_DEVICE_ID_JOYPAD_L) | + (1 << RETRO_DEVICE_ID_JOYPAD_R) | (1 << RETRO_DEVICE_ID_JOYPAD_R3); + else if (strcmp(var.value, "l3+r3") == 0) + in_dualshock_analog_combo = (1 << RETRO_DEVICE_ID_JOYPAD_L3) | + (1 << RETRO_DEVICE_ID_JOYPAD_R3); + else + in_dualshock_analog_combo = 0; } var.value = NULL; @@ -2873,17 +2889,13 @@ static void update_input_mouse(int port, int ret) static void update_input(void) { - int16_t analog_combo = - (1 << RETRO_DEVICE_ID_JOYPAD_L) | - (1 << RETRO_DEVICE_ID_JOYPAD_R) | - (1 << RETRO_DEVICE_ID_JOYPAD_SELECT); int i; int j; // reset all keystate, query libretro for keystate for (i = 0; i < PORTS_NUMBER; i++) { - int16_t ret = 0; + int32_t ret = 0; int type = in_type[i]; in_keystate[i] = 0; @@ -2892,7 +2904,11 @@ static void update_input(void) continue; if (libretro_supports_bitmasks) + { ret = input_state_cb(i, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_MASK); + // undo int16 sign extension (why input_state_cb returns int16 in the first place?) + ret &= (1 << (RETRO_DEVICE_ID_JOYPAD_R3 + 1)) - 1; + } else { for (j = 0; j < (RETRO_DEVICE_ID_JOYPAD_R3 + 1); j++) @@ -2918,8 +2934,8 @@ static void update_input(void) break; default: // dualshock ANALOG toggle? - if (type == PSE_PAD_TYPE_ANALOGPAD && in_dualshock_toggle_enable - && (ret & analog_combo) == analog_combo) + if (type == PSE_PAD_TYPE_ANALOGPAD && in_dualshock_analog_combo != 0 + && ret == in_dualshock_analog_combo) { if (!in_dualshock_toggling) { diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 85771e2ce..69100c373 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -854,18 +854,22 @@ struct retro_core_option_v2_definition option_defs_us[] = { "enabled", }, { - "pcsx_rearmed_analog_toggle", - "DualShock Analog Mode Toggle", + "pcsx_rearmed_analog_combo", + "DualShock Analog Mode Toggle Key Combo", NULL, - "When the input device type is DualShock, this option allows the emulated DualShock to be toggled between DIGITAL and ANALOG mode like original hardware. The button combination is L1 + R1 + Select.", + "When the input device type is DualShock, this option allows the emulated DualShock to be toggled between DIGITAL and ANALOG mode like original hardware. You can select the button combination for this.", NULL, "input", { { "disabled", NULL }, - { "enabled", NULL }, + { "l1+r1+select", "L1 + R1 + Select" }, + { "l1+r1+start", "L1 + R1 + Start" }, + { "l1+r1+l3", "L1 + R1 + L3" }, + { "l1+r1+r3", "L1 + R1 + R3" }, + { "l3+r3", "L3 + R3" }, { NULL, NULL }, }, - "enabled", + "l1+r1+select" }, { "pcsx_rearmed_multitap", diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 640f57a23..997b13fbc 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -52,7 +52,7 @@ static const char * const dualshock_init_analog_hack_db[] = /* Formula 1 Championship Edition */ "SLUS00546", /* Gran Turismo 2 */ - "SCUS94455", "SCUS94488", "SCPS10116", "SCPS10117", "SCES02380", "SCES-12380", + "SCUS94455", "SCUS94488", "SCPS10116", "SCPS10117", "SCES02380", "SCES12380", }; #define HACK_ENTRY(var, list) \ From 38b8a211aad8d2c485ccf0c0cbb58d965aac3483 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 22 Nov 2023 00:02:50 +0200 Subject: [PATCH 474/597] apply cdrom volume in spu The previous sector delay thing breaks rhythm games. Also much easier to deal with timing in SPU code. --- frontend/plugin.c | 16 +------ libpcsxcore/cdrom.c | 90 ++++++++++++++++--------------------- libpcsxcore/decode_xa.h | 2 +- libpcsxcore/plugins.c | 8 +++- libpcsxcore/plugins.h | 8 ++-- plugins/dfsound/dma.h | 35 --------------- plugins/dfsound/externals.h | 18 +++----- plugins/dfsound/freeze.c | 6 +-- plugins/dfsound/psemuxa.h | 2 +- plugins/dfsound/spu.c | 20 +++++++-- plugins/dfsound/spu.h | 33 +++++++++----- plugins/dfsound/stdafx.h | 8 ++++ plugins/dfsound/xa.c | 60 ++++++++++++++++++++++--- plugins/spunull/spunull.c | 5 +++ 14 files changed, 169 insertions(+), 142 deletions(-) delete mode 100644 plugins/dfsound/dma.h diff --git a/frontend/plugin.c b/frontend/plugin.c index 88d756ebd..c3c104af3 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -46,20 +46,7 @@ static long CALLBACK CDRgetTE(unsigned char _, unsigned char *__, unsigned char static void CALLBACK GPUdisplayText(char *_) { return; } /* SPU */ -extern long CALLBACK SPUopen(void); -extern long CALLBACK SPUinit(void); -extern long CALLBACK SPUshutdown(void); -extern long CALLBACK SPUclose(void); -extern void CALLBACK SPUwriteRegister(unsigned long, unsigned short, unsigned int); -extern unsigned short CALLBACK SPUreadRegister(unsigned long, unsigned int); -extern void CALLBACK SPUwriteDMAMem(unsigned short *, int, unsigned int); -extern void CALLBACK SPUreadDMAMem(unsigned short *, int, unsigned int); -extern void CALLBACK SPUplayADPCMchannel(void *, unsigned int, int); -extern void CALLBACK SPUregisterCallback(void (*cb)(int)); -extern void CALLBACK SPUregisterScheduleCb(void (*cb)(unsigned int)); -extern long CALLBACK SPUfreeze(unsigned int, void *, unsigned int); -extern void CALLBACK SPUasync(unsigned int, unsigned int); -extern int CALLBACK SPUplayCDDAchannel(short *, int, unsigned int, int); +#include "../plugins/dfsound/spu.h" /* PAD */ static long CALLBACK PADinit(long _) { return 0; } @@ -191,6 +178,7 @@ static const struct { DIRECT_SPU(SPUregisterScheduleCb), DIRECT_SPU(SPUasync), DIRECT_SPU(SPUplayCDDAchannel), + DIRECT_SPU(SPUsetCDvol), /* PAD */ DIRECT_PAD(PADinit), DIRECT_PAD(PADshutdown), diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 013be599c..fac5fd39a 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -633,7 +633,6 @@ static u32 cdrAlignTimingHack(u32 cycles) static void cdrUpdateTransferBuf(const u8 *buf); static void cdrReadInterrupt(void); static void cdrPrepCdda(s16 *buf, int samples); -static void cdrAttenuate(s16 *buf, int samples, int stereo); static void msfiAdd(u8 *msfi, u32 count) { @@ -693,9 +692,8 @@ void cdrPlayReadInterrupt(void) if (!cdr.IrqStat && (cdr.Mode & (MODE_AUTOPAUSE|MODE_REPORT))) cdrPlayInterrupt_Autopause(); - if (!cdr.Muted && cdr.Play && !Config.Cdda) { + if (cdr.Play && !Config.Cdda) { cdrPrepCdda(read_buf, CD_FRAMESIZE_RAW / 4); - cdrAttenuate(read_buf, CD_FRAMESIZE_RAW / 4, 1); SPU_playCDDAchannel(read_buf, CD_FRAMESIZE_RAW, psxRegs.cycle, 0); } @@ -933,6 +931,11 @@ void cdrInterrupt(void) { break; case CdlPause: + if (cdr.AdpcmActive) { + cdr.AdpcmActive = 0; + cdr.Xa.nsamples = 0; + SPU_playADPCMchannel(&cdr.Xa, psxRegs.cycle, 1); // flush adpcm + } StopCdda(); StopReading(); @@ -978,9 +981,11 @@ void cdrInterrupt(void) { StopReading(); SetPlaySeekRead(cdr.StatP, 0); cdr.LocL[0] = LOCL_INVALID; - cdr.Muted = FALSE; cdr.Mode = MODE_SIZE_2340; /* This fixes This is Football 2, Pooh's Party lockups */ cdr.DriveState = DRIVESTATE_PAUSED; + cdr.Muted = FALSE; + SPU_setCDvol(cdr.AttenuatorLeftToLeft, cdr.AttenuatorLeftToRight, + cdr.AttenuatorRightToLeft, cdr.AttenuatorRightToRight, psxRegs.cycle); second_resp_time = not_ready ? 70000 : 4100000; start_rotating = 1; break; @@ -992,10 +997,13 @@ void cdrInterrupt(void) { case CdlMute: cdr.Muted = TRUE; + SPU_setCDvol(0, 0, 0, 0, psxRegs.cycle); break; case CdlDemute: cdr.Muted = FALSE; + SPU_setCDvol(cdr.AttenuatorLeftToLeft, cdr.AttenuatorLeftToRight, + cdr.AttenuatorRightToLeft, cdr.AttenuatorRightToRight, psxRegs.cycle); break; case CdlSetfilter: @@ -1275,44 +1283,6 @@ static void cdrPrepCdda(s16 *buf, int samples) #endif } -static void cdrAttenuate(s16 *buf, int samples, int stereo) -{ - int i, l, r; - int ll = cdr.AttenuatorLeftToLeft; - int lr = cdr.AttenuatorLeftToRight; - int rl = cdr.AttenuatorRightToLeft; - int rr = cdr.AttenuatorRightToRight; - - if (lr == 0 && rl == 0 && 0x78 <= ll && ll <= 0x88 && 0x78 <= rr && rr <= 0x88) - return; - - if (!stereo && ll == 0x40 && lr == 0x40 && rl == 0x40 && rr == 0x40) - return; - - if (stereo) { - for (i = 0; i < samples; i++) { - l = buf[i * 2]; - r = buf[i * 2 + 1]; - l = (l * ll + r * rl) >> 7; - r = (r * rr + l * lr) >> 7; - ssat32_to_16(l); - ssat32_to_16(r); - buf[i * 2] = l; - buf[i * 2 + 1] = r; - } - } - else { - for (i = 0; i < samples; i++) { - l = buf[i]; - l = l * (ll + rl) >> 7; - //r = r * (rr + lr) >> 7; - ssat32_to_16(l); - //ssat32_to_16(r); - buf[i] = l; - } - } -} - static void cdrReadInterruptSetResult(unsigned char result) { if (cdr.IrqStat) { @@ -1346,6 +1316,7 @@ static void cdrReadInterrupt(void) int deliver_data = 1; u8 subqPos[3]; int read_ok; + int is_start; memcpy(subqPos, cdr.SetSectorPlay, sizeof(subqPos)); msfiAdd(subqPos, cdr.SubqForwardSectors); @@ -1407,12 +1378,10 @@ static void cdrReadInterrupt(void) if (Config.Xa) break; - if (!cdr.Muted && cdr.AdpcmActive) { - cdrAttenuate(cdr.Xa.pcm, cdr.Xa.nsamples, cdr.Xa.stereo); - SPU_playADPCMchannel(&cdr.Xa, psxRegs.cycle, 0); - } - // decode next - cdr.AdpcmActive = !xa_decode_sector(&cdr.Xa, buf + 4, !cdr.AdpcmActive); + is_start = !cdr.AdpcmActive; + cdr.AdpcmActive = !xa_decode_sector(&cdr.Xa, buf + 4, is_start); + if (cdr.AdpcmActive) + SPU_playADPCMchannel(&cdr.Xa, psxRegs.cycle, is_start); } while (0); if ((cdr.Mode & MODE_SF) && (subhdr->mode & 0x44) == 0x44) // according to nocash @@ -1567,6 +1536,7 @@ unsigned char cdrRead3(void) { void cdrWrite3(unsigned char rt) { const char *rnames[] = { "req", "ifl", "alr", "ava" }; (void)rnames; + u8 ll, lr, rl, rr; CDR_LOG_IO("cdr w3.%s: %02x\n", rnames[cdr.Ctrl & 3], rt); switch (cdr.Ctrl & 3) { @@ -1602,11 +1572,20 @@ void cdrWrite3(unsigned char rt) { cdr.AttenuatorLeftToRightT = rt; return; case 3: + if (rt & 0x01) + log_unhandled("Mute ADPCM?\n"); if (rt & 0x20) { - memcpy(&cdr.AttenuatorLeftToLeft, &cdr.AttenuatorLeftToLeftT, 4); - CDR_LOG("CD-XA Volume: %02x %02x | %02x %02x\n", - cdr.AttenuatorLeftToLeft, cdr.AttenuatorLeftToRight, - cdr.AttenuatorRightToLeft, cdr.AttenuatorRightToRight); + ll = cdr.AttenuatorLeftToLeftT; lr = cdr.AttenuatorLeftToRightT; + rl = cdr.AttenuatorRightToLeftT; rr = cdr.AttenuatorRightToRightT; + if (ll == cdr.AttenuatorLeftToLeft && + lr == cdr.AttenuatorLeftToRight && + rl == cdr.AttenuatorRightToLeft && + rr == cdr.AttenuatorRightToRight) + return; + cdr.AttenuatorLeftToLeftT = ll; cdr.AttenuatorLeftToRightT = lr; + cdr.AttenuatorRightToLeftT = rl; cdr.AttenuatorRightToRightT = rr; + CDR_LOG_I("CD-XA Volume: %02x %02x | %02x %02x\n", ll, lr, rl, rr); + SPU_setCDvol(ll, lr, rl, rr, psxRegs.cycle); } return; } @@ -1750,6 +1729,8 @@ void cdrReset() { cdr.AttenuatorLeftToRight = 0x00; cdr.AttenuatorRightToLeft = 0x00; cdr.AttenuatorRightToRight = 0x80; + SPU_setCDvol(cdr.AttenuatorLeftToLeft, cdr.AttenuatorLeftToRight, + cdr.AttenuatorRightToLeft, cdr.AttenuatorRightToRight, psxRegs.cycle); getCdInfo(); } @@ -1772,6 +1753,7 @@ int cdrFreeze(void *f, int Mode) { gzfreeze(&tmp, sizeof(tmp)); if (Mode == 0) { + u8 ll = 0, lr = 0, rl = 0, rr = 0; getCdInfo(); cdr.FifoOffset = tmp < DATA_SIZE ? tmp : DATA_SIZE; @@ -1794,6 +1776,10 @@ int cdrFreeze(void *f, int Mode) { if (!Config.Cdda) CDR_play(cdr.SetSectorPlay); } + if (!cdr.Muted) + ll = cdr.AttenuatorLeftToLeft, lr = cdr.AttenuatorLeftToLeft, + rl = cdr.AttenuatorRightToLeft, rr = cdr.AttenuatorRightToRight; + SPU_setCDvol(ll, lr, rl, rr, psxRegs.cycle); } return 0; diff --git a/libpcsxcore/decode_xa.h b/libpcsxcore/decode_xa.h index 54065356a..2d85c9315 100644 --- a/libpcsxcore/decode_xa.h +++ b/libpcsxcore/decode_xa.h @@ -30,7 +30,7 @@ typedef struct { s32 y0, y1; } ADPCM_Decode_t; -typedef struct { +typedef struct xa_decode { int freq; int nbits; int stereo; diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index 75e1f5fbe..d7c2acb6e 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -86,6 +86,7 @@ SPUregisterCallback SPU_registerCallback; SPUregisterScheduleCb SPU_registerScheduleCb; SPUasync SPU_async; SPUplayCDDAchannel SPU_playCDDAchannel; +SPUsetCDvol SPU_setCDvol; PADconfigure PAD1_configure; PADabout PAD1_about; @@ -179,7 +180,7 @@ static const char *err; #define LoadSym(dest, src, name, checkerr) { \ dest = (src)SysLoadSym(drv, name); \ - if (checkerr) { CheckErr(name); } else SysLibError(); \ + if (checkerr) { CheckErr(name); } \ } void *hGPUDriver = NULL; @@ -313,13 +314,15 @@ static int LoadCDRplugin(const char *CDRdll) { static void *hSPUDriver = NULL; static void CALLBACK SPU__registerScheduleCb(void (CALLBACK *cb)(unsigned int)) {} +static void CALLBACK SPU__setCDvol(unsigned char ll, unsigned char lr, + unsigned char rl, unsigned char rr, unsigned int cycle) {} #define LoadSpuSym1(dest, name) \ LoadSym(SPU_##dest, SPU##dest, name, TRUE); #define LoadSpuSym0(dest, name) \ LoadSym(SPU_##dest, SPU##dest, name, FALSE); \ - if (SPU_##dest == NULL) SPU_##dest = (SPU##dest) SPU__##dest; + if (SPU_##dest == NULL) SPU_##dest = SPU__##dest; #define LoadSpuSymN(dest, name) \ LoadSym(SPU_##dest, SPU##dest, name, FALSE); @@ -346,6 +349,7 @@ static int LoadSPUplugin(const char *SPUdll) { LoadSpuSym0(registerScheduleCb, "SPUregisterScheduleCb"); LoadSpuSymN(async, "SPUasync"); LoadSpuSymN(playCDDAchannel, "SPUplayCDDAchannel"); + LoadSpuSym0(setCDvol, "SPUsetCDvol"); return 0; } diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index 772452d09..5149d4682 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -179,7 +179,7 @@ typedef struct { uint32_t PluginVersion; uint32_t Size; } SPUFreezeHdr_t; -typedef struct { +typedef struct SPUFreeze { unsigned char PluginName[8]; uint32_t PluginVersion; uint32_t Size; @@ -188,9 +188,10 @@ typedef struct { xa_decode_t xa; unsigned char *unused; } SPUFreeze_t; -typedef long (CALLBACK* SPUfreeze)(uint32_t, SPUFreeze_t *, uint32_t); -typedef void (CALLBACK* SPUasync)(uint32_t, uint32_t); +typedef long (CALLBACK* SPUfreeze)(unsigned int, struct SPUFreeze *, unsigned int); +typedef void (CALLBACK* SPUasync)(unsigned int, unsigned int); typedef int (CALLBACK* SPUplayCDDAchannel)(short *, int, unsigned int, int); +typedef void (CALLBACK* SPUsetCDvol)(unsigned char, unsigned char, unsigned char, unsigned char, unsigned int); // SPU function pointers extern SPUinit SPU_init; @@ -207,6 +208,7 @@ extern SPUregisterCallback SPU_registerCallback; extern SPUregisterScheduleCb SPU_registerScheduleCb; extern SPUasync SPU_async; extern SPUplayCDDAchannel SPU_playCDDAchannel; +extern SPUsetCDvol SPU_setCDvol; // PAD Functions typedef long (CALLBACK* PADconfigure)(void); diff --git a/plugins/dfsound/dma.h b/plugins/dfsound/dma.h deleted file mode 100644 index 4982432b9..000000000 --- a/plugins/dfsound/dma.h +++ /dev/null @@ -1,35 +0,0 @@ -/*************************************************************************** - dma.h - description - ------------------- - begin : Wed May 15 2002 - copyright : (C) 2002 by Pete Bernert - email : BlackDove@addcom.de - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. See also the license.txt file for * - * additional informations. * - * * - ***************************************************************************/ - -//*************************************************************************// -// History of changes: -// -// 2002/05/15 - Pete -// - generic cleanup for the Peops release -// -//*************************************************************************// - -#ifndef __P_DMA_H__ -#define __P_DMA_H__ - -unsigned short CALLBACK SPUreadDMA(void); -void CALLBACK SPUreadDMAMem(unsigned short * pusPSXMem,int iSize); -void CALLBACK SPUwriteDMA(unsigned short val); -void CALLBACK SPUwriteDMAMem(unsigned short * pusPSXMem,int iSize); - -#endif /* __P_DMA_H__ */ diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index fef5f9ccc..e85c191bd 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -89,17 +89,6 @@ typedef struct /////////////////////////////////////////////////////////// -// Tmp Flags - -// used for debug channel muting -#define FLAG_MUTE 1 - -// used for simple interpolation -#define FLAG_IPOL0 2 -#define FLAG_IPOL1 4 - -/////////////////////////////////////////////////////////// - // MAIN CHANNEL STRUCT typedef struct { @@ -225,6 +214,10 @@ typedef struct int iLeftXAVol; int iRightXAVol; + struct { // channel volume in the cd controller + unsigned char ll, lr, rl, rr; // see cdr.Attenuator* in cdrom.c + } cdv; // applied on spu side for easier emulation + unsigned int last_keyon_cycles; union { @@ -289,4 +282,7 @@ void do_irq_io(int cycles_after); #endif +void FeedXA(const xa_decode_t *xap); +void FeedCDDA(unsigned char *pcm, int nBytes); + #endif /* __P_SOUND_EXTERNALS_H__ */ diff --git a/plugins/dfsound/freeze.c b/plugins/dfsound/freeze.c index 32c07dbde..f56c88346 100644 --- a/plugins/dfsound/freeze.c +++ b/plugins/dfsound/freeze.c @@ -107,7 +107,7 @@ typedef struct ADSRInfoEx_orig ADSRX; // next ADSR settings (will be moved to active on sample start) } SPUCHAN_orig; -typedef struct +typedef struct SPUFreeze { char szSPUName[8]; uint32_t ulFreezeVersion; @@ -236,8 +236,8 @@ static void load_register(unsigned long reg, unsigned int cycles) // SPUFREEZE: called by main emu on savestate load/save //////////////////////////////////////////////////////////////////////// -long CALLBACK SPUfreeze(uint32_t ulFreezeMode, SPUFreeze_t * pF, - uint32_t cycles) +long CALLBACK SPUfreeze(unsigned int ulFreezeMode, SPUFreeze_t * pF, + unsigned int cycles) { SPUOSSFreeze_t * pFO = NULL; int i; diff --git a/plugins/dfsound/psemuxa.h b/plugins/dfsound/psemuxa.h index 402d27337..11b748dfa 100644 --- a/plugins/dfsound/psemuxa.h +++ b/plugins/dfsound/psemuxa.h @@ -11,7 +11,7 @@ typedef struct int y0, y1; } ADPCM_Decode_t; -typedef struct +typedef struct xa_decode { int freq; int nbits; diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 7501df8b3..1cc1cbf94 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -27,6 +27,7 @@ #include "registers.h" #include "out.h" #include "spu_config.h" +#include "spu.h" #ifdef __arm__ #include "arm_features.h" @@ -835,7 +836,7 @@ static void do_channels(int ns_to) mix_chan(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume); } - MixXA(spu.SSumLR, RVB, ns_to, spu.decode_pos); + MixCD(spu.SSumLR, RVB, ns_to, spu.decode_pos); if (spu.rvb->StartAddr) { if (do_rvb) @@ -1112,7 +1113,7 @@ static void sync_worker_thread(int force) work = &worker->i[worker->i_reaped & WORK_I_MASK]; thread_work_wait_sync(work, force); - MixXA(work->SSumLR, RVB, work->ns_to, work->decode_pos); + MixCD(work->SSumLR, RVB, work->ns_to, work->decode_pos); do_samples_finish(work->SSumLR, work->ns_to, work->channels_silent, work->decode_pos); @@ -1351,11 +1352,13 @@ void CALLBACK SPUupdate(void) // XA AUDIO -void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycle, int unused) +void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycle, int is_start) { if(!xap) return; if(!xap->freq) return; // no xa freq ? bye + if (is_start) + spu.XAPlay = spu.XAFeed = spu.XAStart; if (spu.XAPlay == spu.XAFeed) do_samples(cycle, 1); // catch up to prevent source underflows later @@ -1376,6 +1379,17 @@ int CALLBACK SPUplayCDDAchannel(short *pcm, int nbytes, unsigned int cycle, int return 0; } +void CALLBACK SPUsetCDvol(unsigned char ll, unsigned char lr, + unsigned char rl, unsigned char rr, unsigned int cycle) +{ + if (spu.XAPlay != spu.XAFeed || spu.CDDAPlay != spu.CDDAFeed) + do_samples(cycle, 1); + spu.cdv.ll = ll; + spu.cdv.lr = lr; + spu.cdv.rl = rl; + spu.cdv.rr = rr; +} + // to be called after state load void ClearWorkingState(void) { diff --git a/plugins/dfsound/spu.h b/plugins/dfsound/spu.h index 810ec07de..d49d9033e 100644 --- a/plugins/dfsound/spu.h +++ b/plugins/dfsound/spu.h @@ -18,18 +18,29 @@ #ifndef __P_SPU_H__ #define __P_SPU_H__ -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define HTOLE16(x) __builtin_bswap16(x) -#define LE16TOH(x) __builtin_bswap16(x) -#else -#define HTOLE16(x) (x) -#define LE16TOH(x) (x) -#endif +struct SPUFreeze; +struct xa_decode; -void ClearWorkingState(void); -void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycle, int is_start); +long CALLBACK SPUopen(void); +long CALLBACK SPUinit(void); +long CALLBACK SPUshutdown(void); +long CALLBACK SPUclose(void); +void CALLBACK SPUwriteRegister(unsigned long, unsigned short, unsigned int); +unsigned short CALLBACK SPUreadRegister(unsigned long, unsigned int); +void CALLBACK SPUregisterCallback(void (*cb)(int)); +void CALLBACK SPUregisterScheduleCb(void (*cb)(unsigned int)); +long CALLBACK SPUfreeze(unsigned int, struct SPUFreeze *, unsigned int); +void CALLBACK SPUasync(unsigned int, unsigned int); + +void CALLBACK SPUreadDMAMem(unsigned short * pusPSXMem,int iSize,unsigned int cycles); +void CALLBACK SPUwriteDMAMem(unsigned short * pusPSXMem,int iSize,unsigned int cycles); + +void CALLBACK SPUplayADPCMchannel(struct xa_decode *xap, unsigned int cycle, int is_start); int CALLBACK SPUplayCDDAchannel(short *pcm, int bytes, unsigned int cycle, int is_start); -void FeedXA(const xa_decode_t *xap); -void FeedCDDA(unsigned char *pcm, int nBytes); +void CALLBACK SPUsetCDvol(unsigned char ll, unsigned char lr, + unsigned char rl, unsigned char rr, unsigned int cycle); + +// internal +void ClearWorkingState(void); #endif /* __P_SPU_H__ */ diff --git a/plugins/dfsound/stdafx.h b/plugins/dfsound/stdafx.h index 96335e38a..ff082bc85 100644 --- a/plugins/dfsound/stdafx.h +++ b/plugins/dfsound/stdafx.h @@ -32,6 +32,14 @@ #define INLINE static inline #endif +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define HTOLE16(x) __builtin_bswap16(x) +#define LE16TOH(x) __builtin_bswap16(x) +#else +#define HTOLE16(x) (x) +#define LE16TOH(x) (x) +#endif + #include "psemuxa.h" #endif /* __P_STDAFX_H__ */ diff --git a/plugins/dfsound/xa.c b/plugins/dfsound/xa.c index b3ac01d94..e3a9fb61a 100644 --- a/plugins/dfsound/xa.c +++ b/plugins/dfsound/xa.c @@ -39,13 +39,55 @@ static int gauss_window[8] = {0, 0, 0, 0, 0, 0, 0, 0}; // MIX XA & CDDA //////////////////////////////////////////////////////////////////////// -INLINE void MixXA(int *SSumLR, int *RVB, int ns_to, int decode_pos) +INLINE void SkipCD(int ns_to, int decode_pos) { int cursor = decode_pos; int ns; - short l, r; + + if(spu.XAPlay != spu.XAFeed) + { + for(ns = 0; ns < ns_to*2; ns += 2) + { + if(spu.XAPlay != spu.XAFeed) spu.XAPlay++; + if(spu.XAPlay == spu.XAEnd) spu.XAPlay=spu.XAStart; + + spu.spuMem[cursor] = 0; + spu.spuMem[cursor + 0x400/2] = 0; + cursor = (cursor + 1) & 0x1ff; + } + } + else if(spu.CDDAPlay != spu.CDDAFeed) + { + for(ns = 0; ns < ns_to*2; ns += 2) + { + if(spu.CDDAPlay != spu.CDDAFeed) spu.CDDAPlay++; + if(spu.CDDAPlay == spu.CDDAEnd) spu.CDDAPlay=spu.CDDAStart; + + spu.spuMem[cursor] = 0; + spu.spuMem[cursor + 0x400/2] = 0; + cursor = (cursor + 1) & 0x1ff; + } + } + spu.XALastVal = 0; +} + +INLINE void MixCD(int *SSumLR, int *RVB, int ns_to, int decode_pos) +{ + int vll = spu.iLeftXAVol * spu.cdv.ll >> 7; + int vrl = spu.iLeftXAVol * spu.cdv.rl >> 7; + int vlr = spu.iRightXAVol * spu.cdv.lr >> 7; + int vrr = spu.iRightXAVol * spu.cdv.rr >> 7; + int cursor = decode_pos; + int l1, r1, l, r; + int ns; uint32_t v = spu.XALastVal; + if ((vll | vlr | vrl | vrr) == 0) + { + SkipCD(ns_to, decode_pos); + return; + } + if(spu.XAPlay != spu.XAFeed || spu.XARepeat > 0) { if(spu.XAPlay == spu.XAFeed) @@ -56,8 +98,11 @@ INLINE void MixXA(int *SSumLR, int *RVB, int ns_to, int decode_pos) if(spu.XAPlay != spu.XAFeed) v=*spu.XAPlay++; if(spu.XAPlay == spu.XAEnd) spu.XAPlay=spu.XAStart; - l = ((int)(short)v * spu.iLeftXAVol) >> 15; - r = ((int)(short)(v >> 16) * spu.iLeftXAVol) >> 15; + l1 = (short)v, r1 = (short)(v >> 16); + l = (l1 * vll + r1 * vrl) >> 15; + r = (r1 * vrr + l1 * vlr) >> 15; + ssat32_to_16(l); + ssat32_to_16(r); if (spu.spuCtrl & CTRL_CD) { SSumLR[ns+0] += l; @@ -84,8 +129,11 @@ INLINE void MixXA(int *SSumLR, int *RVB, int ns_to, int decode_pos) if(spu.CDDAPlay != spu.CDDAFeed) v=*spu.CDDAPlay++; if(spu.CDDAPlay == spu.CDDAEnd) spu.CDDAPlay=spu.CDDAStart; - l = ((int)(short)v * spu.iLeftXAVol) >> 15; - r = ((int)(short)(v >> 16) * spu.iLeftXAVol) >> 15; + l1 = (short)v, r1 = (short)(v >> 16); + l = (l1 * vll + r1 * vrl) >> 15; + r = (r1 * vrr + l1 * vlr) >> 15; + ssat32_to_16(l); + ssat32_to_16(r); if (spu.spuCtrl & CTRL_CD) { SSumLR[ns+0] += l; diff --git a/plugins/spunull/spunull.c b/plugins/spunull/spunull.c index ece5db934..7f16ed5ef 100644 --- a/plugins/spunull/spunull.c +++ b/plugins/spunull/spunull.c @@ -284,6 +284,11 @@ void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap) { } +void CALLBACK SPUsetCDvol(unsigned char ll, unsigned char lr, + unsigned char rl, unsigned char rr, unsigned int cycle) +{ +} + //////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////// From 95a3270ffc4262b75fdca8dc1340481d2354e85a Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 22 Nov 2023 00:06:05 +0200 Subject: [PATCH 475/597] spu: forgot to save some stuff again --- plugins/dfsound/freeze.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/plugins/dfsound/freeze.c b/plugins/dfsound/freeze.c index f56c88346..bddf0adad 100644 --- a/plugins/dfsound/freeze.c +++ b/plugins/dfsound/freeze.c @@ -303,6 +303,13 @@ long CALLBACK SPUfreeze(unsigned int ulFreezeMode, SPUFreeze_t * pF, pFO->xa_left = xa_left; pFO->cdda_left = cdda_left; pFO->cycles_played = spu.cycles_played; + pFO->cycles_dma_end = spu.cycles_dma_end; + pFO->decode_dirty_ch = spu.decode_dirty_ch; + pFO->dwNoiseVal = spu.dwNoiseVal; + pFO->dwNoiseCount = spu.dwNoiseCount; + pFO->XARepeat = spu.XARepeat; + pFO->XALastVal = spu.XALastVal; + pFO->last_keyon_cycles = spu.last_keyon_cycles; for(i=0;i Date: Wed, 22 Nov 2023 00:27:27 +0200 Subject: [PATCH 476/597] cdrom: always error out on shell open Duckstation claims it has been verified on console. libretro/pcsx_rearmed#804 --- libpcsxcore/cdrom.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index fac5fd39a..6d8e631e4 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -344,26 +344,22 @@ void cdrLidSeekInterrupt(void) // 02, 12, 10 if (!(cdr.StatP & STATUS_SHELLOPEN)) { + StopReading(); SetPlaySeekRead(cdr.StatP, 0); cdr.StatP |= STATUS_SHELLOPEN; // IIRC this sometimes doesn't happen on real hw // (when lots of commands are sent?) - if (cdr.Reading) { - StopReading(); - SetResultSize(2); - cdr.Result[0] = cdr.StatP | STATUS_SEEKERROR; - cdr.Result[1] = ERROR_SHELLOPEN; - setIrq(DiskError, 0x1006); - } + SetResultSize(2); + cdr.Result[0] = cdr.StatP | STATUS_SEEKERROR; + cdr.Result[1] = ERROR_SHELLOPEN; if (cdr.CmdInProgress) { psxRegs.interrupt &= ~(1 << PSXINT_CDR); cdr.CmdInProgress = 0; - SetResultSize(2); cdr.Result[0] = cdr.StatP | STATUS_ERROR; cdr.Result[1] = ERROR_NOTREADY; - setIrq(DiskError, 0x1007); } + setIrq(DiskError, 0x1006); set_event(PSXINT_CDRLID, cdReadTime * 30); break; From 2ca1e56e16f03f7afb8988474f3a55e164960665 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 22 Nov 2023 00:37:49 +0200 Subject: [PATCH 477/597] don't use a stack var out of scope newer compilers are stricter about this, as well as asan --- frontend/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/frontend/main.c b/frontend/main.c index cec1fbbf3..019835dd9 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -590,6 +590,7 @@ int main(int argc, char *argv[]) { char file[MAXPATHLEN] = ""; char path[MAXPATHLEN]; + char isofilename[MAXPATHLEN]; const char *cdfile = NULL; const char *loadst_f = NULL; int psxout = 0; @@ -608,8 +609,6 @@ int main(int argc, char *argv[]) SysPrintf("Using config file %s.\n", cfgfile_basename); } else if (!strcmp(argv[i], "-cdfile")) { - char isofilename[MAXPATHLEN]; - if (i+1 >= argc) break; strncpy(isofilename, argv[++i], MAXPATHLEN); if (isofilename[0] != '/') { From f05ce78eb3a05c582928ca320267e7a783868264 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 22 Nov 2023 01:06:05 +0200 Subject: [PATCH 478/597] spu: add a guard for "runaway" channels should really mask but it's inconvenient with the raw pointers used there --- plugins/dfsound/spu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 1cc1cbf94..6671e3eda 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1551,7 +1551,10 @@ long CALLBACK SPUinit(void) int i; memset(&spu, 0, sizeof(spu)); - spu.spuMemC = calloc(1, 512 * 1024); + spu.spuMemC = calloc(1, 512 * 1024 + 16); + // a guard for runaway channels - End+Mute + spu.spuMemC[512 * 1024 + 1] = 1; + InitADSR(); spu.s_chan = calloc(MAXCHAN+1, sizeof(spu.s_chan[0])); // channel + 1 infos (1 is security for fmod handling) From 5c07678c305dbf81aa85f95f900f50863da8d668 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 25 Nov 2023 02:52:47 +0200 Subject: [PATCH 479/597] try different dualshock analog heuristics libretro/pcsx_rearmed#765 --- include/psemu_plugin_defs.h | 4 +++- libpcsxcore/plugins.c | 25 +++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/include/psemu_plugin_defs.h b/include/psemu_plugin_defs.h index 6136ca703..4e69b1670 100644 --- a/include/psemu_plugin_defs.h +++ b/include/psemu_plugin_defs.h @@ -230,7 +230,9 @@ typedef struct unsigned int lastUseFrame; unsigned int unused; unsigned char configModeUsed; - unsigned char padding[3]; + unsigned char autoAnalogTried; + unsigned char userToggled; + unsigned char padding; } ds; unsigned char multitapLongModeEnabled; unsigned char padding2; diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index d7c2acb6e..bab152c68 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -494,9 +494,28 @@ static void initBufForRequest(int padIndex, char value) { return; } - if ((u32)(frame_counter - pads[padIndex].ds.lastUseFrame) > 60u + if ((u32)(frame_counter - pads[padIndex].ds.lastUseFrame) > 2*60u + && pads[padIndex].ds.configModeUsed && !Config.hacks.dualshock_init_analog) + { + //SysPrintf("Pad reset\n"); pads[padIndex].ds.padMode = 0; // according to nocash + pads[padIndex].ds.autoAnalogTried = 0; + } + else if (pads[padIndex].ds.padMode == 0 && value == CMD_READ_DATA_AND_VIBRATE + && pads[padIndex].ds.configModeUsed + && !pads[padIndex].ds.configMode + && !pads[padIndex].ds.userToggled) + { + if (pads[padIndex].ds.autoAnalogTried == 16) { + // auto-enable for convenience + SysPrintf("Auto-enabling dualshock analog mode.\n"); + pads[padIndex].ds.padMode = 1; + pads[padIndex].ds.autoAnalogTried = 255; + } + else if (pads[padIndex].ds.autoAnalogTried < 16) + pads[padIndex].ds.autoAnalogTried++; + } pads[padIndex].ds.lastUseFrame = frame_counter; switch (value) { @@ -992,8 +1011,10 @@ int padToggleAnalog(unsigned int index) { int r = -1; - if (index < sizeof(pads) / sizeof(pads[0])) + if (index < sizeof(pads) / sizeof(pads[0])) { r = (pads[index].ds.padMode ^= 1); + pads[index].ds.userToggled = 1; + } return r; } From dcaa32ddfd2c74db639452d6aee456d41d502700 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 25 Nov 2023 22:37:28 +0200 Subject: [PATCH 480/597] cdrom: fail pausing on seek libretro/pcsx_rearmed#790 --- libpcsxcore/cdrom.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 6d8e631e4..3991bad9d 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -715,6 +715,7 @@ void cdrInterrupt(void) { int read_ok; u16 not_ready = 0; u8 IrqStat = Acknowledge; + u8 DriveStateOld; u16 Cmd; int i; @@ -964,7 +965,16 @@ void cdrInterrupt(void) { second_resp_time = (((cdr.Mode & MODE_SPEED) ? 1 : 2) * 1097107); } SetPlaySeekRead(cdr.StatP, 0); + DriveStateOld = cdr.DriveState; cdr.DriveState = DRIVESTATE_PAUSED; + if (DriveStateOld == DRIVESTATE_SEEK) { + // According to Duckstation this fails, but the + // exact conditions and effects are not clear. + // Moto Racer World Tour seems to rely on this. + // For now assume pause works anyway, just errors out. + error = ERROR_NOTREADY; + goto set_error; + } break; case CdlPause + CMD_PART2: From 3e82ffc4389a4699d0001133c7c2bffd9ee40edc Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 25 Nov 2023 23:56:41 +0200 Subject: [PATCH 481/597] libretro: direct fb access requires duping support because video doesn't necessary update at vsync rate libretro/pcsx_rearmed#805 --- frontend/libretro.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 80dc8a9da..ed4bb5bdf 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -237,7 +237,10 @@ static void set_vout_fb() fb.access_flags = RETRO_MEMORY_ACCESS_WRITE; vout_pitch = vout_width; - if (environ_cb(RETRO_ENVIRONMENT_GET_CURRENT_SOFTWARE_FRAMEBUFFER, &fb) && fb.format == RETRO_PIXEL_FORMAT_RGB565) { + if (environ_cb(RETRO_ENVIRONMENT_GET_CURRENT_SOFTWARE_FRAMEBUFFER, &fb) + && fb.format == RETRO_PIXEL_FORMAT_RGB565 + && vout_can_dupe && duping_enable) + { vout_buf_ptr = fb.data; if (fb.pitch / 2 != vout_pitch && fb.pitch != vout_width * 2) SysPrintf("got unusual pitch %zd for resolution %dx%d\n", fb.pitch, vout_width, vout_height); From 0709d25474f2cd11283933b583c8d6550f62b741 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 25 Nov 2023 23:59:36 +0200 Subject: [PATCH 482/597] libretro: report errors and warnings as such --- frontend/libretro.c | 47 +++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index ed4bb5bdf..0ed853394 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -76,6 +76,13 @@ static retro_set_rumble_state_t rumble_cb; static struct retro_log_callback logging; static retro_log_printf_t log_cb; +#define LogWarn(fmt, ...) do { \ + if (log_cb) log_cb(RETRO_LOG_WARN, fmt, ##__VA_ARGS__); \ +} while (0) +#define LogErr(fmt, ...) do { \ + if (log_cb) log_cb(RETRO_LOG_ERROR, fmt, ##__VA_ARGS__); \ +} while (0) + static unsigned msg_interface_version = 0; static void *vout_buf; @@ -243,7 +250,7 @@ static void set_vout_fb() { vout_buf_ptr = fb.data; if (fb.pitch / 2 != vout_pitch && fb.pitch != vout_width * 2) - SysPrintf("got unusual pitch %zd for resolution %dx%d\n", fb.pitch, vout_width, vout_height); + LogWarn("got unusual pitch %zd for resolution %dx%d\n", fb.pitch, vout_width, vout_height); vout_pitch = fb.pitch / 2; } else @@ -412,7 +419,7 @@ void *pl_3ds_mmap(unsigned long addr, size_t size, int is_fixed, if (svcControlMemory(&tmp, (void *)custom_map->target_map, (void *)ptr_aligned, size, MEMOP_MAP, 0x3) < 0) { - SysPrintf("could not map memory @0x%08X\n", custom_map->target_map); + LogErr("could not map memory @0x%08X\n", custom_map->target_map); exit(1); } @@ -1085,7 +1092,7 @@ static void save_close(void *file) return; if (fp->pos > r_size) - SysPrintf("ERROR: save buffer overflow detected\n"); + LogErr("ERROR: save buffer overflow detected\n"); else if (fp->is_write && fp->pos < r_size) // make sure we don't save trash in leftover space memset(fp->buf + fp->pos, 0, r_size - fp->pos); @@ -1156,7 +1163,7 @@ void retro_cheat_set(unsigned index, bool enabled, const char *code) finish: if (ret != 0) - SysPrintf("Failed to set cheat %#u\n", index); + LogErr("Failed to set cheat %#u\n", index); else if (index < NumCheats) Cheats[index].Enabled = enabled; free(buf); @@ -1262,7 +1269,7 @@ static bool disk_set_image_index(unsigned int index) if (disks[index].fname == NULL) { - SysPrintf("missing disk #%u\n", index); + LogErr("missing disk #%u\n", index); CDR_shutdown(); // RetroArch specifies "no disk" with index == count, @@ -1271,19 +1278,19 @@ static bool disk_set_image_index(unsigned int index) return true; } - SysPrintf("switching to disk %u: \"%s\" #%d\n", index, + LogErr("switching to disk %u: \"%s\" #%d\n", index, disks[index].fname, disks[index].internal_index); cdrIsoMultidiskSelect = disks[index].internal_index; set_cd_image(disks[index].fname); if (ReloadCdromPlugin() < 0) { - SysPrintf("failed to load cdr plugin\n"); + LogErr("failed to load cdr plugin\n"); return false; } if (CDR_open() < 0) { - SysPrintf("failed to open cdr plugin\n"); + LogErr("failed to open cdr plugin\n"); return false; } @@ -1680,7 +1687,7 @@ bool retro_load_game(const struct retro_game_info *info) if (info == NULL || info->path == NULL) { - SysPrintf("info->path required\n"); + LogErr("info->path required\n"); return false; } @@ -1700,7 +1707,7 @@ bool retro_load_game(const struct retro_game_info *info) { if (!read_m3u(info->path)) { - log_cb(RETRO_LOG_INFO, "failed to read m3u file\n"); + LogErr("failed to read m3u file\n"); return false; } } @@ -1733,7 +1740,7 @@ bool retro_load_game(const struct retro_game_info *info) /* have to reload after set_cd_image for correct cdr plugin */ if (LoadPlugins() == -1) { - log_cb(RETRO_LOG_INFO, "failed to load plugins\n"); + LogErr("failed to load plugins\n"); return false; } @@ -1742,7 +1749,7 @@ bool retro_load_game(const struct retro_game_info *info) if (OpenPlugins() == -1) { - log_cb(RETRO_LOG_INFO, "failed to open plugins\n"); + LogErr("failed to open plugins\n"); return false; } @@ -1801,12 +1808,12 @@ bool retro_load_game(const struct retro_game_info *info) if (ReloadCdromPlugin() < 0) { - log_cb(RETRO_LOG_INFO, "failed to reload cdr plugins\n"); + LogErr("failed to reload cdr plugins\n"); return false; } if (CDR_open() < 0) { - log_cb(RETRO_LOG_INFO, "failed to open cdr plugin\n"); + LogErr("failed to open cdr plugin\n"); return false; } } @@ -1821,7 +1828,7 @@ bool retro_load_game(const struct retro_game_info *info) if (!is_exe && CheckCdrom() == -1) { - log_cb(RETRO_LOG_INFO, "unsupported/invalid CD image: %s\n", info->path); + LogErr("unsupported/invalid CD image: %s\n", info->path); return false; } @@ -1833,7 +1840,7 @@ bool retro_load_game(const struct retro_game_info *info) ret = LoadCdrom(); if (ret != 0) { - log_cb(RETRO_LOG_INFO, "could not load %s (%d)\n", is_exe ? "exe" : "CD", ret); + LogErr("could not load %s (%d)\n", is_exe ? "exe" : "CD", ret); return false; } emu_on_new_cd(0); @@ -3210,7 +3217,7 @@ static int init_memcards(void) { if (strlen(dir) + strlen(CARD2_FILE) + 2 > sizeof(Config.Mcd2)) { - SysPrintf("Path '%s' is too long. Cannot use memcard 2. Use a shorter path.\n", dir); + LogErr("Path '%s' is too long. Cannot use memcard 2. Use a shorter path.\n", dir); ret = -1; } else @@ -3222,7 +3229,7 @@ static int init_memcards(void) } else { - SysPrintf("Could not get save directory! Could not create memcard 2."); + LogErr("Could not get save directory! Could not create memcard 2."); ret = -1; } } @@ -3337,7 +3344,7 @@ void retro_init(void) ret |= emu_core_init(); if (ret != 0) { - SysPrintf("PCSX init failed.\n"); + LogErr("PCSX init failed.\n"); exit(1); } @@ -3357,6 +3364,8 @@ void retro_init(void) loadPSXBios(); environ_cb(RETRO_ENVIRONMENT_GET_CAN_DUPE, &vout_can_dupe); + if (!vout_can_dupe) + LogWarn("CAN_DUPE reports false\n"); disk_initial_index = 0; disk_initial_path[0] = '\0'; From f4ab3b64325deef84489bcb6edaea8582d2123fe Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 26 Nov 2023 00:11:30 +0200 Subject: [PATCH 483/597] drop some unused funcs removed in a plugin but not the core --- frontend/plugin.c | 2 -- libpcsxcore/plugins.c | 7 ------- libpcsxcore/plugins.h | 6 ------ 3 files changed, 15 deletions(-) diff --git a/frontend/plugin.c b/frontend/plugin.c index c3c104af3..02354639d 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -219,8 +219,6 @@ static const struct { DIRECT_GPU(GPUkeypressed), DIRECT_GPU(GPUmakeSnapshot), DIRECT_GPU(GPUconfigure), - DIRECT_GPU(GPUtest), - DIRECT_GPU(GPUabout), DIRECT_GPU(GPUgetScreenPic), DIRECT_GPU(GPUshowScreenPic), */ diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index bab152c68..9a3998236 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -31,9 +31,6 @@ static s64 cdOpenCaseTime = 0; GPUupdateLace GPU_updateLace; GPUinit GPU_init; GPUshutdown GPU_shutdown; -GPUconfigure GPU_configure; -GPUtest GPU_test; -GPUabout GPU_about; GPUopen GPU_open; GPUclose GPU_close; GPUreadStatus GPU_readStatus; @@ -214,7 +211,6 @@ static int LoadGPUplugin(const char *GPUdll) { hGPUDriver = SysLoadLibrary(GPUdll); if (hGPUDriver == NULL) { - GPU_configure = NULL; SysMessage (_("Could not load GPU plugin %s!"), GPUdll); return -1; } drv = hGPUDriver; @@ -238,9 +234,6 @@ static int LoadGPUplugin(const char *GPUdll) { LoadGpuSym0(showScreenPic, "GPUshowScreenPic"); LoadGpuSym0(vBlank, "GPUvBlank"); LoadGpuSym0(getScreenInfo, "GPUgetScreenInfo"); - LoadGpuSym0(configure, "GPUconfigure"); - LoadGpuSym0(test, "GPUtest"); - LoadGpuSym0(about, "GPUabout"); return 0; } diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index 5149d4682..d080baed0 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -60,9 +60,6 @@ typedef uint32_t (CALLBACK* GPUreadData)(void); typedef void (CALLBACK* GPUreadDataMem)(uint32_t *, int); typedef long (CALLBACK* GPUdmaChain)(uint32_t *,uint32_t, uint32_t *); typedef void (CALLBACK* GPUupdateLace)(void); -typedef long (CALLBACK* GPUconfigure)(void); -typedef long (CALLBACK* GPUtest)(void); -typedef void (CALLBACK* GPUabout)(void); typedef void (CALLBACK* GPUmakeSnapshot)(void); typedef void (CALLBACK* GPUkeypressed)(int); typedef void (CALLBACK* GPUdisplayText)(char *); @@ -82,9 +79,6 @@ typedef void (CALLBACK* GPUgetScreenInfo)(int *, int *); extern GPUupdateLace GPU_updateLace; extern GPUinit GPU_init; extern GPUshutdown GPU_shutdown; -extern GPUconfigure GPU_configure; -extern GPUtest GPU_test; -extern GPUabout GPU_about; extern GPUopen GPU_open; extern GPUclose GPU_close; extern GPUreadStatus GPU_readStatus; From 1cff67e5df07352a5132bed32084ae0d067521e8 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 26 Nov 2023 01:02:27 +0200 Subject: [PATCH 484/597] update db libretro/pcsx_rearmed#128 --- libpcsxcore/database.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 997b13fbc..a324553ae 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -27,8 +27,12 @@ static const char * const gpu_slow_llist_db[] = "SCES02834", "SCUS94570", "SCUS94616", "SCUS94654", /* Final Fantasy IV */ "SCES03840", "SLPM86028", "SLUS01360", + /* Simple 1500 Series Vol. 57: The Meiro */ + "SLPM86715", /* Spot Goes to Hollywood */ "SLES00330", "SLPS00394", "SLUS00014", + /* Tiny Tank */ + "SCES01338", "SCES02072", "SCES02072", "SCES02072", "SCES02072", "SCUS94427", /* Vampire Hunter D */ "SLES02731", "SLPS02477", "SLPS03198", "SLUS01138", }; @@ -51,8 +55,6 @@ static const char * const dualshock_init_analog_hack_db[] = { /* Formula 1 Championship Edition */ "SLUS00546", - /* Gran Turismo 2 */ - "SCUS94455", "SCUS94488", "SCPS10116", "SCPS10117", "SCES02380", "SCES12380", }; #define HACK_ENTRY(var, list) \ From 61a1bbbb76218c857318abbc75c7f8fac1188a41 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 26 Nov 2023 21:39:08 +0200 Subject: [PATCH 485/597] libretro: drop the Frame Duping option Unclear what it was for, added in commit 0e5a7b7d5a4894754a73d0ea496b3b7b3f6b32d8 libretro/pcsx_rearmed#805 --- frontend/libretro.c | 16 ++-------------- frontend/libretro_core_options.h | 14 -------------- frontend/libretro_core_options_intl.h | 12 ------------ 3 files changed, 2 insertions(+), 40 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 0ed853394..d361be153 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -91,7 +91,6 @@ static int vout_width = 256, vout_height = 240, vout_pitch = 256; static int vout_fb_dirty; static int psx_w, psx_h; static bool vout_can_dupe; -static bool duping_enable; static bool found_bios; static bool display_internal_fps = false; static unsigned frame_count = 0; @@ -246,7 +245,7 @@ static void set_vout_fb() vout_pitch = vout_width; if (environ_cb(RETRO_ENVIRONMENT_GET_CURRENT_SOFTWARE_FRAMEBUFFER, &fb) && fb.format == RETRO_PIXEL_FORMAT_RGB565 - && vout_can_dupe && duping_enable) + && vout_can_dupe) { vout_buf_ptr = fb.data; if (fb.pitch / 2 != vout_pitch && fb.pitch != vout_width * 2) @@ -2109,17 +2108,6 @@ static void update_variables(bool in_flight) } #endif - var.value = NULL; - var.key = "pcsx_rearmed_duping_enable"; - - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) - { - if (strcmp(var.value, "disabled") == 0) - duping_enable = false; - else if (strcmp(var.value, "enabled") == 0) - duping_enable = true; - } - var.value = NULL; var.key = "pcsx_rearmed_display_internal_fps"; @@ -3093,7 +3081,7 @@ void retro_run(void) frameskip_counter = 0; } - video_cb((vout_fb_dirty || !vout_can_dupe || !duping_enable) ? vout_buf_ptr : NULL, + video_cb((vout_fb_dirty || !vout_can_dupe) ? vout_buf_ptr : NULL, vout_width, vout_height, vout_pitch * 2); vout_fb_dirty = 0; } diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 69100c373..25b2da4c3 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -309,20 +309,6 @@ struct retro_core_option_v2_definition option_defs_us[] = { "enabled", #endif }, - { - "pcsx_rearmed_duping_enable", - "Frame Duping (Speedup)", - NULL, - "When enabled and supported by the libretro frontend, provides a small performance increase by directing the frontend to repeat the previous frame if the core has nothing new to display.", - NULL, - "video", - { - { "disabled", NULL }, - { "enabled", NULL }, - { NULL, NULL }, - }, - "enabled", - }, #ifdef THREAD_RENDERING { "pcsx_rearmed_gpu_thread_rendering", diff --git a/frontend/libretro_core_options_intl.h b/frontend/libretro_core_options_intl.h index d66582221..5e74450a4 100644 --- a/frontend/libretro_core_options_intl.h +++ b/frontend/libretro_core_options_intl.h @@ -276,18 +276,6 @@ struct retro_core_option_v2_definition option_defs_tr[] = { }, #endif /* GPU_NEON */ - { - "pcsx_rearmed_duping_enable", - "Frame Duping", - NULL, - "Yeni bir veri yoksa, bir hızlandırma, son kareyi yeniden çizer/yeniden kullanır.", - NULL, - NULL, - { - { NULL, NULL }, - }, - NULL - }, { "pcsx_rearmed_display_internal_fps", "Dahili FPS'yi görüntüle", From 90ac6fed274c1d573a971c66f8a1338e8918f066 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 28 Nov 2023 00:23:03 +0200 Subject: [PATCH 486/597] gpu: start doing some basic gpu timing minimum only for now, mostly based on Mednafen libretro/pcsx_rearmed#573 libretro/pcsx_rearmed#783 --- plugins/dfxvideo/gpulib_if.c | 42 +++- plugins/gpu-gles/gpulib_if.c | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu.h | 3 +- plugins/gpu_neon/psx_gpu/psx_gpu_main.c | 6 +- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 252 +++++++++++------------ plugins/gpu_neon/psx_gpu_if.c | 10 +- plugins/gpu_unai/gpulib_if.cpp | 31 ++- plugins/gpu_unai_old/gpulib_if.cpp | 4 +- plugins/gpulib/gpu.c | 34 +-- plugins/gpulib/gpu.h | 2 +- plugins/gpulib/gpu_timing.h | 15 ++ plugins/gpulib/test.c | 4 +- 12 files changed, 244 insertions(+), 161 deletions(-) create mode 100644 plugins/gpulib/gpu_timing.h diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index d08ca67e2..20383ab52 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -303,16 +303,19 @@ void renderer_notify_scanout_change(int x, int y) { } +#include "../gpulib/gpu_timing.h" extern const unsigned char cmd_lengths[256]; -int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) +int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd) { unsigned int cmd = 0, len; uint32_t *list_start = list; uint32_t *list_end = list + list_len; + u32 cpu_cycles = 0; for (; list < list_end; list += 1 + len) { + short *slist = (void *)list; cmd = GETLE32(list) >> 24; len = cmd_lengths[cmd]; if (list + 1 + len > list_end) { @@ -338,6 +341,8 @@ int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) while(1) { + cpu_cycles += gput_line(0); + if(list_position >= list_end) { cmd = -1; goto breakloop; @@ -361,6 +366,8 @@ int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) while(1) { + cpu_cycles += gput_line(0); + if(list_position >= list_end) { cmd = -1; goto breakloop; @@ -380,7 +387,6 @@ int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) #ifdef TEST case 0xA0: // sys -> vid { - short *slist = (void *)list; u32 load_width = LE2HOST32(slist[4]); u32 load_height = LE2HOST32(slist[5]); u32 load_size = load_width * load_height; @@ -389,6 +395,35 @@ int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) break; } #endif + + // timing + case 0x02: + cpu_cycles += gput_fill(LE2HOST32(slist[4]) & 0x3ff, + LE2HOST32(slist[5]) & 0x1ff); + break; + case 0x20 ... 0x23: cpu_cycles += gput_poly_base(); break; + case 0x24 ... 0x27: cpu_cycles += gput_poly_base_t(); break; + case 0x28 ... 0x2B: cpu_cycles += gput_quad_base(); break; + case 0x2C ... 0x2F: cpu_cycles += gput_quad_base_t(); break; + case 0x30 ... 0x33: cpu_cycles += gput_poly_base_g(); break; + case 0x34 ... 0x37: cpu_cycles += gput_poly_base_gt(); break; + case 0x38 ... 0x3B: cpu_cycles += gput_quad_base_g(); break; + case 0x3C ... 0x3F: cpu_cycles += gput_quad_base_gt(); break; + case 0x40 ... 0x47: cpu_cycles += gput_line(0); break; + case 0x50 ... 0x57: cpu_cycles += gput_line(0); break; + case 0x60 ... 0x63: + cpu_cycles += gput_sprite(LE2HOST32(slist[4]) & 0x3ff, + LE2HOST32(slist[5]) & 0x1ff); + break; + case 0x64 ... 0x67: + cpu_cycles += gput_sprite(LE2HOST32(slist[6]) & 0x3ff, + LE2HOST32(slist[7]) & 0x1ff); + break; + case 0x68 ... 0x6B: cpu_cycles += gput_sprite(1, 1); break; + case 0x70 ... 0x73: + case 0x74 ... 0x77: cpu_cycles += gput_sprite(8, 8); break; + case 0x78 ... 0x7B: + case 0x7C ... 0x7F: cpu_cycles += gput_sprite(16, 16); break; } } @@ -396,6 +431,7 @@ int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) gpu.ex_regs[1] &= ~0x1ff; gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff; + *cpu_cycles_out += cpu_cycles; *last_cmd = cmd; return list - list_start; } @@ -440,3 +476,5 @@ void renderer_set_config(const struct rearmed_cbs *cbs) cbs->pl_set_gpu_caps(0); set_vram(gpu.vram); } + +// vim:ts=2:shiftwidth=2:expandtab diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c index a3a0c43b1..d440fdb10 100644 --- a/plugins/gpu-gles/gpulib_if.c +++ b/plugins/gpu-gles/gpulib_if.c @@ -521,7 +521,7 @@ void renderer_notify_scanout_change(int x, int y) extern const unsigned char cmd_lengths[256]; // XXX: mostly dupe code from soft peops -int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) +int do_cmd_list(unsigned int *list, int list_len, int *cycles, int *last_cmd) { unsigned int cmd, len; unsigned int *list_start = list; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index da9e34266..06514b95e 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -254,7 +254,8 @@ void update_texture_8bpp_cache(psx_gpu_struct *psx_gpu); void flush_render_block_buffer(psx_gpu_struct *psx_gpu); void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram); -u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command); +u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, + s32 *cpu_cycles, u32 *last_command); void triangle_benchmark(psx_gpu_struct *psx_gpu); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c index c7ce0ee4d..435c51a2e 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c @@ -135,6 +135,8 @@ int main(int argc, char *argv[]) FILE *state_file; FILE *list_file; u32 no_display = 0; + s32 dummy0 = 0; + u32 dummy1 = 0; if((argc != 3) && (argc != 4)) { @@ -213,7 +215,7 @@ int main(int argc, char *argv[]) init_counter(); #endif - gpu_parse(psx_gpu, list, size, NULL); + gpu_parse(psx_gpu, list, size, &dummy0, &dummy1); flush_render_block_buffer(psx_gpu); clear_stats(); @@ -222,7 +224,7 @@ int main(int argc, char *argv[]) u32 cycles = get_counter(); #endif - gpu_parse(psx_gpu, list, size, NULL); + gpu_parse(psx_gpu, list, size, &dummy0, &dummy1); flush_render_block_buffer(psx_gpu); #ifdef NEON_BUILD diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 5f69919e2..b0254affc 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -15,6 +15,7 @@ #include #include "common.h" +#include "../../gpulib/gpu_timing.h" #ifndef command_lengths const u8 command_lengths[256] = @@ -250,30 +251,31 @@ static void do_fill(psx_gpu_struct *psx_gpu, u32 x, u32 y, #define SET_Ex(r, v) #endif -u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) +u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, + s32 *cpu_cycles_out, u32 *last_command) { vertex_struct vertexes[4] __attribute__((aligned(16))) = {}; - u32 current_command = 0, command_length; + u32 current_command = 0, command_length, cpu_cycles = 0; u32 *list_start = list; u32 *list_end = list + (size / 4); for(; list < list_end; list += 1 + command_length) { - s16 *list_s16 = (void *)list; - current_command = *list >> 24; - command_length = command_lengths[current_command]; - if (list + 1 + command_length > list_end) { - current_command = (u32)-1; - break; - } - - switch(current_command) - { - case 0x00: - break; - - case 0x02: + s16 *list_s16 = (void *)list; + current_command = *list >> 24; + command_length = command_lengths[current_command]; + if (list + 1 + command_length > list_end) { + current_command = (u32)-1; + break; + } + + switch(current_command) + { + case 0x00: + break; + + case 0x02: { u32 x = list_s16[2] & 0x3FF; u32 y = list_s16[3] & 0x1FF; @@ -282,10 +284,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) u32 color = list[0] & 0xFFFFFF; do_fill(psx_gpu, x, y, width, height, color); - break; + cpu_cycles += gput_fill(width, height); + break; } - - case 0x20 ... 0x23: + + case 0x20 ... 0x23: { set_triangle_color(psx_gpu, list[0] & 0xFFFFFF); @@ -294,10 +297,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) get_vertex_data_xy(2, 6); render_triangle(psx_gpu, vertexes, current_command); - break; + cpu_cycles += gput_poly_base(); + break; } - case 0x24 ... 0x27: + case 0x24 ... 0x27: { set_clut(psx_gpu, list_s16[5]); set_texture(psx_gpu, list_s16[9]); @@ -308,10 +312,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) get_vertex_data_xy_uv(2, 10); render_triangle(psx_gpu, vertexes, current_command); - break; + cpu_cycles += gput_poly_base_t(); + break; } - case 0x28 ... 0x2B: + case 0x28 ... 0x2B: { set_triangle_color(psx_gpu, list[0] & 0xFFFFFF); @@ -322,10 +327,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - break; + cpu_cycles += gput_quad_base(); + break; } - case 0x2C ... 0x2F: + case 0x2C ... 0x2F: { set_clut(psx_gpu, list_s16[5]); set_texture(psx_gpu, list_s16[9]); @@ -338,23 +344,22 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - break; + cpu_cycles += gput_quad_base_t(); + break; } - case 0x30 ... 0x33: + case 0x30 ... 0x33: { get_vertex_data_xy_rgb(0, 0); get_vertex_data_xy_rgb(1, 4); get_vertex_data_xy_rgb(2, 8); render_triangle(psx_gpu, vertexes, current_command); - break; + cpu_cycles += gput_poly_base_g(); + break; } - case 0x34: - case 0x35: - case 0x36: - case 0x37: + case 0x34 ... 0x37: { set_clut(psx_gpu, list_s16[5]); set_texture(psx_gpu, list_s16[11]); @@ -364,13 +369,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) get_vertex_data_xy_uv_rgb(2, 12); render_triangle(psx_gpu, vertexes, current_command); - break; + cpu_cycles += gput_poly_base_gt(); + break; } - case 0x38: - case 0x39: - case 0x3A: - case 0x3B: + case 0x38 ... 0x3B: { get_vertex_data_xy_rgb(0, 0); get_vertex_data_xy_rgb(1, 4); @@ -379,13 +382,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - break; + cpu_cycles += gput_quad_base_g(); + break; } - case 0x3C: - case 0x3D: - case 0x3E: - case 0x3F: + case 0x3C ... 0x3F: { set_clut(psx_gpu, list_s16[5]); set_texture(psx_gpu, list_s16[11]); @@ -397,10 +398,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - break; + cpu_cycles += gput_quad_base_gt(); + break; } - case 0x40 ... 0x47: + case 0x40 ... 0x47: { vertexes[0].x = list_s16[2] + psx_gpu->offset_x; vertexes[0].y = list_s16[3] + psx_gpu->offset_y; @@ -408,10 +410,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].y = list_s16[5] + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, list[0], 0); - break; + cpu_cycles += gput_line(0); + break; } - case 0x48 ... 0x4F: + case 0x48 ... 0x4F: { u32 num_vertexes = 1; u32 *list_position = &(list[2]); @@ -429,6 +432,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, list[0], 0); + cpu_cycles += gput_line(0); list_position++; num_vertexes++; @@ -448,7 +452,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) break; } - case 0x50 ... 0x57: + case 0x50 ... 0x57: { vertexes[0].r = list[0] & 0xFF; vertexes[0].g = (list[0] >> 8) & 0xFF; @@ -463,7 +467,8 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].y = list_s16[7] + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, 0, 0); - break; + cpu_cycles += gput_line(0); + break; } case 0x58 ... 0x5F: @@ -493,6 +498,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, 0, 0); + cpu_cycles += gput_line(0); list_position += 2; num_vertexes++; @@ -512,7 +518,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) break; } - case 0x60 ... 0x63: + case 0x60 ... 0x63: { u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -520,10 +526,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) u32 height = list_s16[5] & 0x1FF; render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]); - break; + cpu_cycles += gput_sprite(width, height); + break; } - case 0x64 ... 0x67: + case 0x64 ... 0x67: { u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -535,37 +542,31 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, width, height, current_command, list[0]); - break; + cpu_cycles += gput_sprite(width, height); + break; } - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: + case 0x68 ... 0x6B: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]); - break; + cpu_cycles += gput_sprite(1, 1); + break; } - case 0x70: - case 0x71: - case 0x72: - case 0x73: + case 0x70 ... 0x73: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]); - break; + cpu_cycles += gput_sprite(8, 8); + break; } - case 0x74: - case 0x75: - case 0x76: - case 0x77: + case 0x74 ... 0x77: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -575,25 +576,21 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 8, 8, current_command, list[0]); - break; + cpu_cycles += gput_sprite(8, 8); + break; } - case 0x78: - case 0x79: - case 0x7A: - case 0x7B: + case 0x78 ... 0x7B: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]); - break; + cpu_cycles += gput_sprite(16, 16); + break; } - case 0x7C: - case 0x7D: - case 0x7E: - case 0x7F: + case 0x7C ... 0x7F: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -603,7 +600,8 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 16, 16, current_command, list[0]); - break; + cpu_cycles += gput_sprite(16, 16); + break; } #ifdef PCSX @@ -643,14 +641,14 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) render_block_copy(psx_gpu, (u16 *)&(list_s16[6]), load_x, load_y, load_width, load_height, load_width); - break; + break; } case 0xC0 ... 0xDF: // vid -> sys break; #endif - case 0xE1: + case 0xE1: set_texture(psx_gpu, list[0]); if(list[0] & (1 << 9)) @@ -659,10 +657,10 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) psx_gpu->render_state_base &= ~RENDER_STATE_DITHER; psx_gpu->display_area_draw_enable = (list[0] >> 10) & 0x1; - SET_Ex(1, list[0]); - break; + SET_Ex(1, list[0]); + break; - case 0xE2: + case 0xE2: { // TODO: Clean u32 texture_window_settings = list[0]; @@ -751,11 +749,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) psx_gpu->offset_x = offset_x >> 21; psx_gpu->offset_y = offset_y >> 21; - SET_Ex(5, list[0]); - break; - } + SET_Ex(5, list[0]); + break; + } - case 0xE6: + case 0xE6: { u32 mask_settings = list[0]; u16 mask_msb = mask_settings << 15; @@ -771,18 +769,18 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) psx_gpu->mask_msb = mask_msb; } - SET_Ex(6, list[0]); - break; + SET_Ex(6, list[0]); + break; } - default: - break; - } + default: + break; + } } breakloop: - if (last_command != NULL) - *last_command = current_command; + *cpu_cycles_out += cpu_cycles; + *last_command = current_command; return list - list_start; } @@ -1194,10 +1192,10 @@ static void do_sprite_enhanced(psx_gpu_struct *psx_gpu, int x, int y, #endif u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, - u32 *last_command) + s32 *cpu_cycles_out, u32 *last_command) { vertex_struct vertexes[4] __attribute__((aligned(16))) = {}; - u32 current_command = 0, command_length; + u32 current_command = 0, command_length, cpu_cycles = 0; u32 *list_start = list; u32 *list_end = list + (size / 4); @@ -1236,6 +1234,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, x &= ~0xF; width = ((width + 0xF) & ~0xF); + cpu_cycles += gput_fill(width, height); if (width == 0 || height == 0) break; @@ -1266,6 +1265,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy(2, 6); do_triangle_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_poly_base(); break; } @@ -1280,6 +1280,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv(2, 10); do_triangle_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_poly_base_t(); break; } @@ -1293,6 +1294,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy(3, 8); do_quad_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_quad_base(); break; } @@ -1309,6 +1311,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, uv_hack(vertexes, 4); do_quad_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_quad_base_t(); break; } @@ -1319,13 +1322,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_rgb(2, 8); do_triangle_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_poly_base_g(); break; } - case 0x34: - case 0x35: - case 0x36: - case 0x37: + case 0x34 ... 0x37: { set_clut(psx_gpu, list_s16[5]); set_texture(psx_gpu, list_s16[11]); @@ -1335,13 +1336,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv_rgb(2, 12); do_triangle_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_poly_base_gt(); break; } - case 0x38: - case 0x39: - case 0x3A: - case 0x3B: + case 0x38 ... 0x3B: { get_vertex_data_xy_rgb(0, 0); get_vertex_data_xy_rgb(1, 4); @@ -1349,13 +1348,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_rgb(3, 12); do_quad_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_quad_base_g(); break; } - case 0x3C: - case 0x3D: - case 0x3E: - case 0x3F: + case 0x3C ... 0x3F: { set_clut(psx_gpu, list_s16[5]); set_texture(psx_gpu, list_s16[11]); @@ -1367,6 +1364,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, uv_hack(vertexes, 4); do_quad_enhanced(psx_gpu, vertexes, current_command); + cpu_cycles += gput_quad_base_gt(); break; } @@ -1380,6 +1378,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, list[0], 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, list[0], 1); + cpu_cycles += gput_line(0); break; } @@ -1404,6 +1403,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, list[0], 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, list[0], 1); + cpu_cycles += gput_line(0); list_position++; num_vertexes++; @@ -1440,6 +1440,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, 0, 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, 0, 1); + cpu_cycles += gput_line(0); break; } @@ -1473,6 +1474,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, 0, 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, 0, 1); + cpu_cycles += gput_line(0); list_position += 2; num_vertexes++; @@ -1503,6 +1505,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + width)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); + cpu_cycles += gput_sprite(width, height); break; } @@ -1522,13 +1525,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + width)) do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); + cpu_cycles += gput_sprite(width, height); break; } - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: + case 0x68 ... 0x6B: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -1537,13 +1538,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 1)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, 1, 1, list[0]); + cpu_cycles += gput_sprite(1, 1); break; } - case 0x70: - case 0x71: - case 0x72: - case 0x73: + case 0x70 ... 0x73: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -1552,13 +1551,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 8)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]); + cpu_cycles += gput_sprite(8, 8); break; } - case 0x74: - case 0x75: - case 0x76: - case 0x77: + case 0x74 ... 0x77: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -1572,13 +1569,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 8)) do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]); + cpu_cycles += gput_sprite(8, 8); break; } - case 0x78: - case 0x79: - case 0x7A: - case 0x7B: + case 0x78 ... 0x7B: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -1587,13 +1582,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 16)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]); + cpu_cycles += gput_sprite(16, 16); break; } - case 0x7C: - case 0x7D: - case 0x7E: - case 0x7F: + case 0x7C ... 0x7F: { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); @@ -1606,6 +1599,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 16)) do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]); + cpu_cycles += gput_sprite(16, 16); break; } @@ -1759,8 +1753,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, enhancement_disable(); breakloop: - if (last_command != NULL) - *last_command = current_command; + *cpu_cycles_out += cpu_cycles; + *last_command = current_command; return list - list_start; } diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index a1476f480..570cc5d25 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -39,7 +39,7 @@ sync_enhancement_buffers(int x, int y, int w, int h); static psx_gpu_struct egpu __attribute__((aligned(256))); -int do_cmd_list(uint32_t *list, int count, int *last_cmd) +int do_cmd_list(uint32_t *list, int count, int *cycles, int *last_cmd) { int ret; @@ -49,9 +49,9 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd) #endif if (gpu.state.enhancement_active) - ret = gpu_parse_enhanced(&egpu, list, count * 4, (u32 *)last_cmd); + ret = gpu_parse_enhanced(&egpu, list, count * 4, cycles, (u32 *)last_cmd); else - ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd); + ret = gpu_parse(&egpu, list, count * 4, cycles, (u32 *)last_cmd); #if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD) __asm__ __volatile__("":::"q4","q5","q6","q7"); @@ -153,7 +153,9 @@ sync_enhancement_buffers(int x, int y, int w, int h) void renderer_sync_ecmds(uint32_t *ecmds) { - gpu_parse(&egpu, ecmds + 1, 6 * 4, NULL); + s32 dummy0 = 0; + u32 dummy1 = 0; + gpu_parse(&egpu, ecmds + 1, 6 * 4, &dummy0, &dummy1); } void renderer_update_caches(int x, int y, int w, int h, int state_changed) diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 20794316b..191108b83 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -390,14 +390,16 @@ static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word) } #endif +#include "../gpulib/gpu_timing.h" extern const unsigned char cmd_lengths[256]; -int do_cmd_list(u32 *_list, int list_len, int *last_cmd) +int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) { u32 cmd = 0, len, i; le32_t *list = (le32_t *)_list; le32_t *list_start = list; le32_t *list_end = list + list_len; + u32 cpu_cycles = 0; //TODO: set ilace_mask when resolution changes instead of every time, // eliminate #ifdef below. @@ -430,6 +432,8 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) { case 0x02: gpuClearImage(packet); + cpu_cycles += gput_fill(le16_to_s16(packet.U2[4]) & 0x3ff, + le16_to_s16(packet.U2[5]) & 0x1ff); break; case 0x20: @@ -442,6 +446,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.Masking | Blending | gpu_unai.PixelMSB ]; gpuDrawPolyF(packet, driver, false); + cpu_cycles += gput_poly_base(); } break; case 0x24: @@ -466,6 +471,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) PP driver = gpuPolySpanDrivers[driver_idx]; gpuDrawPolyFT(packet, driver, false); + cpu_cycles += gput_poly_base_t(); } break; case 0x28: @@ -478,6 +484,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.Masking | Blending | gpu_unai.PixelMSB ]; gpuDrawPolyF(packet, driver, true); // is_quad = true + cpu_cycles += gput_quad_base(); } break; case 0x2C: @@ -502,6 +509,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) PP driver = gpuPolySpanDrivers[driver_idx]; gpuDrawPolyFT(packet, driver, true); // is_quad = true + cpu_cycles += gput_quad_base_t(); } break; case 0x30: @@ -519,6 +527,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB ]; gpuDrawPolyG(packet, driver, false); + cpu_cycles += gput_poly_base_g(); } break; case 0x34: @@ -534,6 +543,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB ]; gpuDrawPolyGT(packet, driver, false); + cpu_cycles += gput_poly_base_gt(); } break; case 0x38: @@ -548,6 +558,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB ]; gpuDrawPolyG(packet, driver, true); // is_quad = true + cpu_cycles += gput_quad_base_g(); } break; case 0x3C: @@ -563,6 +574,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB ]; gpuDrawPolyGT(packet, driver, true); // is_quad = true + cpu_cycles += gput_quad_base_gt(); } break; case 0x40: @@ -573,6 +585,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; PSD driver = gpuPixelSpanDrivers[driver_idx]; gpuDrawLineF(packet, driver); + cpu_cycles += gput_line(0); } break; case 0x48 ... 0x4F: { // Monochrome line strip @@ -589,6 +602,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2]; gpu_unai.PacketBuffer.U4[2] = *list_position++; gpuDrawLineF(packet, driver); + cpu_cycles += gput_line(0); num_vertexes++; if(list_position >= list_end) { @@ -612,6 +626,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) driver_idx |= (1 << 5); PSD driver = gpuPixelSpanDrivers[driver_idx]; gpuDrawLineG(packet, driver); + cpu_cycles += gput_line(0); } break; case 0x58 ... 0x5F: { // Gouraud-shaded line strip @@ -632,6 +647,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.PacketBuffer.U4[2] = *list_position++; gpu_unai.PacketBuffer.U4[3] = *list_position++; gpuDrawLineG(packet, driver); + cpu_cycles += gput_line(0); num_vertexes++; if(list_position >= list_end) { @@ -651,6 +667,8 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) case 0x63: { // Monochrome rectangle (variable size) PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); + cpu_cycles += gput_sprite(le16_to_u16(packet.U2[4]) & 0x3ff, + le16_to_u16(packet.U2[5]) & 0x1ff); } break; case 0x64: @@ -678,6 +696,8 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); + cpu_cycles += gput_sprite(le16_to_u16(packet.U2[6]) & 0x3ff, + le16_to_u16(packet.U2[7]) & 0x1ff); } break; case 0x68: @@ -687,6 +707,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); + cpu_cycles += gput_sprite(1, 1); } break; case 0x70: @@ -696,6 +717,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); + cpu_cycles += gput_sprite(8, 8); } break; case 0x74: @@ -713,6 +735,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); + cpu_cycles += gput_sprite(8, 8); } break; case 0x78: @@ -722,6 +745,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); + cpu_cycles += gput_sprite(16, 16); } break; case 0x7C: @@ -731,6 +755,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) { gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); gpuDrawS16(packet); + cpu_cycles += gput_sprite(16, 16); break; } // fallthrough @@ -747,6 +772,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); + cpu_cycles += gput_sprite(16, 16); } break; #ifdef TEST @@ -782,6 +808,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) gpu.ex_regs[1] &= ~0x1ff; gpu.ex_regs[1] |= gpu_unai.GPU_GP1 & 0x1ff; + *cpu_cycles_out += cpu_cycles; *last_cmd = cmd; return list - list_start; } @@ -789,7 +816,7 @@ int do_cmd_list(u32 *_list, int list_len, int *last_cmd) void renderer_sync_ecmds(u32 *ecmds) { int dummy; - do_cmd_list(&ecmds[1], 6, &dummy); + do_cmd_list(&ecmds[1], 6, &dummy, &dummy); } void renderer_update_caches(int x, int y, int w, int h, int state_changed) diff --git a/plugins/gpu_unai_old/gpulib_if.cpp b/plugins/gpu_unai_old/gpulib_if.cpp index cc3280299..ee694d35d 100644 --- a/plugins/gpu_unai_old/gpulib_if.cpp +++ b/plugins/gpu_unai_old/gpulib_if.cpp @@ -169,7 +169,7 @@ void renderer_notify_scanout_change(int x, int y) extern const unsigned char cmd_lengths[256]; -int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) +int do_cmd_list(unsigned int *list, int list_len, int *cycles, int *last_cmd) { unsigned int cmd = 0, len, i; unsigned int *list_start = list; @@ -523,7 +523,7 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) void renderer_sync_ecmds(uint32_t *ecmds) { int dummy; - do_cmd_list(&ecmds[1], 6, &dummy); + do_cmd_list(&ecmds[1], 6, &dummy, &dummy); } void renderer_update_caches(int x, int y, int w, int h, int state_changed) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 2ac36c1b0..7d40938fd 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -12,6 +12,7 @@ #include #include #include "gpu.h" +#include "gpu_timing.h" #include "../../libpcsxcore/gpu.h" // meh #include "../../frontend/plugin_lib.h" @@ -33,13 +34,14 @@ struct psx_gpu gpu; -static noinline int do_cmd_buffer(uint32_t *data, int count); +static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles); static void finish_vram_transfer(int is_read); static noinline void do_cmd_reset(void) { + int dummy = 0; if (unlikely(gpu.cmd_len > 0)) - do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len); + do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy); gpu.cmd_len = 0; if (unlikely(gpu.dma.h > 0)) @@ -172,8 +174,8 @@ static noinline void decide_frameskip(void) gpu.frameskip.active = 0; if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) { - int dummy; - do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy); + int dummy = 0; + do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy); gpu.frameskip.pending_fill[0] = 0; } } @@ -472,7 +474,7 @@ static void finish_vram_transfer(int is_read) gpu.gpu_state_change(PGS_VRAM_TRANSFER_END); } -static void do_vram_copy(const uint32_t *params) +static void do_vram_copy(const uint32_t *params, int *cpu_cycles) { const uint32_t sx = LE32TOH(params[0]) & 0x3FF; const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF; @@ -484,6 +486,7 @@ static void do_vram_copy(const uint32_t *params) uint16_t lbuf[128]; uint32_t x, y; + *cpu_cycles += gput_copy(w, h); if (sx == dx && sy == dy && msb == 0) return; @@ -519,7 +522,7 @@ static void do_vram_copy(const uint32_t *params) static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) { - int cmd = 0, pos = 0, len, dummy, v; + int cmd = 0, pos = 0, len, dummy = 0, v; int skip = 1; gpu.frameskip.pending_fill[0] = 0; @@ -533,7 +536,7 @@ static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) case 0x02: if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h) // clearing something large, don't skip - do_cmd_list(list, 3, &dummy); + do_cmd_list(list, 3, &dummy, &dummy); else memcpy(gpu.frameskip.pending_fill, list, 3 * 4); break; @@ -583,7 +586,7 @@ static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) return pos; } -static noinline int do_cmd_buffer(uint32_t *data, int count) +static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles) { int cmd, pos; uint32_t old_e3 = gpu.ex_regs[3]; @@ -617,7 +620,7 @@ static noinline int do_cmd_buffer(uint32_t *data, int count) cmd = -1; // incomplete cmd, can't consume yet break; } - do_vram_copy(data + pos + 1); + do_vram_copy(data + pos + 1, cpu_cycles); vram_dirty = 1; pos += 4; continue; @@ -627,7 +630,7 @@ static noinline int do_cmd_buffer(uint32_t *data, int count) if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0)) pos += do_cmd_list_skip(data + pos, count - pos, &cmd); else { - pos += do_cmd_list(data + pos, count - pos, &cmd); + pos += do_cmd_list(data + pos, count - pos, cpu_cycles, &cmd); vram_dirty = 1; } @@ -650,7 +653,8 @@ static noinline int do_cmd_buffer(uint32_t *data, int count) static noinline void flush_cmd_buffer(void) { - int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len); + int dummy = 0, left; + left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy); if (left > 0) memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4); if (left != gpu.cmd_len) { @@ -662,14 +666,14 @@ static noinline void flush_cmd_buffer(void) void GPUwriteDataMem(uint32_t *mem, int count) { - int left; + int dummy = 0, left; log_io("gpu_dma_write %p %d\n", mem, count); if (unlikely(gpu.cmd_len > 0)) flush_cmd_buffer(); - left = do_cmd_buffer(mem, count); + left = do_cmd_buffer(mem, count, &dummy); if (left) log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count); } @@ -686,7 +690,7 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr { uint32_t addr, *list, ld_addr = 0; int len, left, count; - long cpu_cycles = 0; + int cpu_cycles = 0; preload(rambase + (start_addr & 0x1fffff) / 4); @@ -720,7 +724,7 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr } if (len) { - left = do_cmd_buffer(list + 1, len); + left = do_cmd_buffer(list + 1, len, &cpu_cycles); if (left) { memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4); gpu.cmd_len = left; diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 886bb1f5c..13e73c5aa 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -119,7 +119,7 @@ extern struct psx_gpu gpu; extern const unsigned char cmd_lengths[256]; -int do_cmd_list(uint32_t *list, int count, int *last_cmd); +int do_cmd_list(uint32_t *list, int count, int *cycles, int *last_cmd); struct rearmed_cbs; diff --git a/plugins/gpulib/gpu_timing.h b/plugins/gpulib/gpu_timing.h new file mode 100644 index 000000000..0dfe0d68c --- /dev/null +++ b/plugins/gpulib/gpu_timing.h @@ -0,0 +1,15 @@ + +// very conservative and wrong +#define gput_fill(w, h) (23 + (4 + (w) / 16u) * (h)) +#define gput_copy(w, h) ((w) * (h)) +#define gput_poly_base() (23) +#define gput_poly_base_t() (gput_poly_base() + 90) +#define gput_poly_base_g() (gput_poly_base() + 144) +#define gput_poly_base_gt() (gput_poly_base() + 225) +#define gput_quad_base() gput_poly_base() +#define gput_quad_base_t() gput_poly_base_t() +#define gput_quad_base_g() gput_poly_base_g() +#define gput_quad_base_gt() gput_poly_base_gt() +#define gput_line(k) (8 + (k)) +#define gput_sprite(w, h) (8 + ((w) / 2u) * (h)) + diff --git a/plugins/gpulib/test.c b/plugins/gpulib/test.c index 80d0e9efb..3f24cc4fe 100644 --- a/plugins/gpulib/test.c +++ b/plugins/gpulib/test.c @@ -88,13 +88,13 @@ int main(int argc, char *argv[]) pcnt_init(); renderer_init(); - memcpy(gpu.vram, state.vram, sizeof(gpu.vram)); + memcpy(gpu.vram, state.vram, 1024*512*2); if ((state.gpu_register[8] & 0x24) == 0x24) renderer_set_interlace(1, !(state.status >> 31)); start_cycles = pcnt_get(); - do_cmd_list(list, size / 4, &dummy); + do_cmd_list(list, size / 4, &dummy, &dummy); renderer_flush_queues(); printf("%u\n", pcnt_get() - start_cycles); From 427a3f291dc6cc8247425983b0c94f21286612d9 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 28 Nov 2023 00:47:16 +0200 Subject: [PATCH 487/597] gpu_unai: fix clear masking --- plugins/gpu_unai/gpu_raster_image.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/gpu_unai/gpu_raster_image.h b/plugins/gpu_unai/gpu_raster_image.h index 909ca3901..7c9eb4d92 100644 --- a/plugins/gpu_unai/gpu_raster_image.h +++ b/plugins/gpu_unai/gpu_raster_image.h @@ -160,7 +160,7 @@ void gpuClearImage(PtrUnion packet) x0 = le16_to_s16(packet.U2[2]); y0 = le16_to_s16(packet.U2[3]); w0 = le16_to_s16(packet.U2[4]) & 0x3ff; - h0 = le16_to_s16(packet.U2[5]) & 0x3ff; + h0 = le16_to_s16(packet.U2[5]) & 0x1ff; w0 += x0; if (x0 < 0) x0 = 0; From 548cdef90b0ff6137c609c59df9566925c25bb14 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 28 Nov 2023 00:51:32 +0200 Subject: [PATCH 488/597] more timing hacks --- libpcsxcore/database.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index a324553ae..6cce0737b 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -106,6 +106,8 @@ cycle_multiplier_overrides[] = /* Psychic Detective - some weird race condition in the game's cdrom code */ { 222, { "SLUS00165", "SLUS00166", "SLUS00167" } }, { 222, { "SLES00070", "SLES10070", "SLES20070" } }, + /* Zero Divide - sometimes too fast */ + { 200, { "SLUS00183", "SLES00159", "SLPS00083", "SLPM80008" } }, }; /* Function for automatic patching according to GameID. */ From 1cec47197eaa7f82bd4503015761cbc7416fff41 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 30 Nov 2023 01:58:19 +0200 Subject: [PATCH 489/597] gpu: improve timings of clipped sprites Judge Dredd has tons of them. Too lazy to do this for peops so keep it neglected for now. --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 26 +++--- plugins/gpu_neon/psx_gpu/psx_gpu.h | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 100 ++++++++++++++--------- plugins/gpu_unai/gpu_raster_sprite.h | 14 +++- plugins/gpu_unai/gpulib_if.cpp | 43 ++++++---- plugins/gpulib/gpu.c | 9 +- 6 files changed, 118 insertions(+), 76 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 62080f3f8..b671a757b 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -4194,10 +4194,10 @@ render_block_handler_struct render_sprite_block_handlers[] = void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, - s32 width, s32 height, u32 flags, u32 color) + s32 *width, s32 *height, u32 flags, u32 color) { - s32 x_right = x + width - 1; - s32 y_bottom = y + height - 1; + s32 x_right = x + *width - 1; + s32 y_bottom = y + *height - 1; #ifdef PROFILE sprites++; @@ -4206,6 +4206,7 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, if(invalidate_texture_cache_region_viewport(psx_gpu, x, y, x_right, y_bottom) == 0) { + *width = *height = 0; return; } @@ -4214,7 +4215,7 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, u32 clip = psx_gpu->viewport_start_x - x; x += clip; u += clip; - width -= clip; + *width -= clip; } if(y < psx_gpu->viewport_start_y) @@ -4222,21 +4223,24 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, s32 clip = psx_gpu->viewport_start_y - y; y += clip; v += clip; - height -= clip; + *height -= clip; } if(x_right > psx_gpu->viewport_end_x) - width -= x_right - psx_gpu->viewport_end_x; + *width -= x_right - psx_gpu->viewport_end_x; if(y_bottom > psx_gpu->viewport_end_y) - height -= y_bottom - psx_gpu->viewport_end_y; + *height -= y_bottom - psx_gpu->viewport_end_y; - if((width <= 0) || (height <= 0)) + if((*width <= 0) || (*height <= 0)) + { + *width = *height = 0; return; + } #ifdef PROFILE - span_pixels += width * height; - spans += height; + span_pixels += *width * *height; + spans += *height; #endif u32 render_state = flags & @@ -4273,7 +4277,7 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, psx_gpu->render_block_handler = render_block_handler; ((setup_sprite_function_type *)render_block_handler->setup_blocks) - (psx_gpu, x, y, u, v, width, height, color); + (psx_gpu, x, y, u, v, *width, *height, color); } #define draw_pixel_line_mask_evaluate_yes() \ diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 06514b95e..c40c8ae4a 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -244,7 +244,7 @@ void render_block_move(psx_gpu_struct *psx_gpu, u32 source_x, u32 source_y, void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags); void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, - s32 width, s32 height, u32 flags, u32 color); + s32 *width, s32 *height, u32 flags, u32 color); void render_line(psx_gpu_struct *gpu, vertex_struct *vertexes, u32 flags, u32 color, int double_resolution); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index b0254affc..af26fa37c 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -522,10 +522,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); - u32 width = list_s16[4] & 0x3FF; - u32 height = list_s16[5] & 0x1FF; + s32 width = list_s16[4] & 0x3FF; + s32 height = list_s16[5] & 0x1FF; - render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); cpu_cycles += gput_sprite(width, height); break; } @@ -535,13 +536,13 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); u32 uv = list_s16[4]; - u32 width = list_s16[6] & 0x3FF; - u32 height = list_s16[7] & 0x1FF; + s32 width = list_s16[6] & 0x3FF; + s32 height = list_s16[7] & 0x1FF; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, width, height, - current_command, list[0]); + render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, + &width, &height, current_command, list[0]); cpu_cycles += gput_sprite(width, height); break; } @@ -550,8 +551,10 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + s32 width = 1, height = 1; - render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); cpu_cycles += gput_sprite(1, 1); break; } @@ -560,9 +563,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + s32 width = 8, height = 8; - render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]); - cpu_cycles += gput_sprite(8, 8); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); + cpu_cycles += gput_sprite(width, height); break; } @@ -571,12 +576,13 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); u32 uv = list_s16[4]; + s32 width = 8, height = 8; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 8, 8, - current_command, list[0]); - cpu_cycles += gput_sprite(8, 8); + render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, + &width, &height, current_command, list[0]); + cpu_cycles += gput_sprite(width, height); break; } @@ -584,9 +590,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + s32 width = 16, height = 16; - render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]); - cpu_cycles += gput_sprite(16, 16); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); + cpu_cycles += gput_sprite(width, height); break; } @@ -595,16 +603,18 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); u32 uv = list_s16[4]; + s32 width = 16, height = 16; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 16, 16, - current_command, list[0]); - cpu_cycles += gput_sprite(16, 16); + render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, + &width, &height, current_command, list[0]); + cpu_cycles += gput_sprite(width, height); break; } #ifdef PCSX + case 0x1F: // irq? case 0x80 ... 0x9F: // vid -> vid case 0xA0 ... 0xBF: // sys -> vid case 0xC0 ... 0xDF: // vid -> sys @@ -1498,10 +1508,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); - u32 width = list_s16[4] & 0x3FF; - u32 height = list_s16[5] & 0x1FF; + s32 width = list_s16[4] & 0x3FF; + s32 height = list_s16[5] & 0x1FF; - render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); if (check_enhanced_range(psx_gpu, x, x + width)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); @@ -1515,13 +1526,13 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); u8 u = list_s16[4]; u8 v = list_s16[4] >> 8; - u32 width = list_s16[6] & 0x3FF; - u32 height = list_s16[7] & 0x1FF; + s32 width = list_s16[6] & 0x3FF; + s32 height = list_s16[7] & 0x1FF; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, u, v, width, height, - current_command, list[0]); + render_sprite(psx_gpu, x, y, u, v, + &width, &height, current_command, list[0]); if (check_enhanced_range(psx_gpu, x, x + width)) do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); @@ -1533,11 +1544,13 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + s32 width = 1, height = 1; - render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); if (check_enhanced_range(psx_gpu, x, x + 1)) - do_sprite_enhanced(psx_gpu, x, y, 0, 0, 1, 1, list[0]); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); cpu_cycles += gput_sprite(1, 1); break; } @@ -1546,12 +1559,14 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + s32 width = 8, height = 8; - render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); if (check_enhanced_range(psx_gpu, x, x + 8)) - do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]); - cpu_cycles += gput_sprite(8, 8); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); + cpu_cycles += gput_sprite(width, height); break; } @@ -1561,15 +1576,16 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); u8 u = list_s16[4]; u8 v = list_s16[4] >> 8; + s32 width = 8, height = 8; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, u, v, 8, 8, - current_command, list[0]); + render_sprite(psx_gpu, x, y, u, v, + &width, &height, current_command, list[0]); if (check_enhanced_range(psx_gpu, x, x + 8)) - do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]); - cpu_cycles += gput_sprite(8, 8); + do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); + cpu_cycles += gput_sprite(width, height); break; } @@ -1577,12 +1593,14 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + s32 width = 16, height = 16; - render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]); + render_sprite(psx_gpu, x, y, 0, 0, &width, &height, + current_command, list[0]); if (check_enhanced_range(psx_gpu, x, x + 16)) - do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]); - cpu_cycles += gput_sprite(16, 16); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); + cpu_cycles += gput_sprite(width, height); break; } @@ -1592,14 +1610,16 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); u8 u = list_s16[4]; u8 v = list_s16[4] >> 8; + s32 width = 16, height = 16; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, u, v, 16, 16, current_command, list[0]); + render_sprite(psx_gpu, x, y, u, v, + &width, &height, current_command, list[0]); if (check_enhanced_range(psx_gpu, x, x + 16)) - do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]); - cpu_cycles += gput_sprite(16, 16); + do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); + cpu_cycles += gput_sprite(width, height); break; } diff --git a/plugins/gpu_unai/gpu_raster_sprite.h b/plugins/gpu_unai/gpu_raster_sprite.h index ea4e82f2b..6909f4f8a 100644 --- a/plugins/gpu_unai/gpu_raster_sprite.h +++ b/plugins/gpu_unai/gpu_raster_sprite.h @@ -24,7 +24,7 @@ /////////////////////////////////////////////////////////////////////////////// // GPU internal sprite drawing functions -void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver) +void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver, s32 *w_out, s32 *h_out) { s32 x0, x1, y0, y1; u32 u0, v0; @@ -58,6 +58,8 @@ void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver) if (x1 > xmax) x1 = xmax; x1 -= x0; if (x1 <= 0) return; + *w_out = x1; + *h_out = y1 - y0; gpu_unai.r5 = packet.U1[0] >> 3; gpu_unai.g5 = packet.U1[1] >> 3; @@ -87,7 +89,7 @@ void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver) #include "gpu_arm.h" /* Notaz 4bit sprites optimization */ -void gpuDrawS16(PtrUnion packet) +void gpuDrawS16(PtrUnion packet, s32 *w_out, s32 *h_out) { s32 x0, y0; s32 u0, v0; @@ -110,7 +112,7 @@ void gpuDrawS16(PtrUnion packet) ((u0 | v0) & 15) || !(gpu_unai.TextureWindow[2] & gpu_unai.TextureWindow[3] & 8)) { // send corner cases to general handler packet.U4[3] = u32_to_le32(0x00100010); - gpuDrawS(packet, gpuSpriteSpanFn<0x20>); + gpuDrawS(packet, gpuSpriteSpanFn<0x20>, w_out, h_out); return; } @@ -123,12 +125,14 @@ void gpuDrawS16(PtrUnion packet) } else if (ymax - y0 < 16) h = ymax - y0; + *w_out = 16; + *h_out = h; draw_spr16_full(&gpu_unai.vram[FRAME_OFFSET(x0, y0)], &gpu_unai.TBA[FRAME_OFFSET(u0/4, v0)], gpu_unai.CBA, h); } #endif // __arm__ -void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver) +void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver, s32 *w_out, s32 *h_out) { s32 x0, x1, y0, y1; @@ -153,6 +157,8 @@ void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver) if (x1 > xmax) x1 = xmax; x1 -= x0; if (x1 <= 0) return; + *w_out = x1; + *h_out = y1 - y0; const u16 Data = GPU_RGB16(le32_to_u32(packet.U4[0])); le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)]; diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 191108b83..45c73a737 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -666,9 +666,9 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) case 0x62: case 0x63: { // Monochrome rectangle (variable size) PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; - gpuDrawT(packet, driver); - cpu_cycles += gput_sprite(le16_to_u16(packet.U2[4]) & 0x3ff, - le16_to_u16(packet.U2[5]) & 0x1ff); + s32 w = 0, h = 0; + gpuDrawT(packet, driver, &w, &h); + cpu_cycles += gput_sprite(w, h); } break; case 0x64: @@ -677,6 +677,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) case 0x67: { // Textured rectangle (variable size) gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + s32 w = 0, h = 0; //senquack - Only color 808080h-878787h allows skipping lighting calculation: // This fixes Silent Hill running animation on loading screens: @@ -695,9 +696,8 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; - gpuDrawS(packet, driver); - cpu_cycles += gput_sprite(le16_to_u16(packet.U2[6]) & 0x3ff, - le16_to_u16(packet.U2[7]) & 0x1ff); + gpuDrawS(packet, driver, &w, &h); + cpu_cycles += gput_sprite(w, h); } break; case 0x68: @@ -706,7 +706,8 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) case 0x6B: { // Monochrome rectangle (1x1 dot) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; - gpuDrawT(packet, driver); + s32 w = 0, h = 0; + gpuDrawT(packet, driver, &w, &h); cpu_cycles += gput_sprite(1, 1); } break; @@ -716,8 +717,9 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) case 0x73: { // Monochrome rectangle (8x8) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; - gpuDrawT(packet, driver); - cpu_cycles += gput_sprite(8, 8); + s32 w = 0, h = 0; + gpuDrawT(packet, driver, &w, &h); + cpu_cycles += gput_sprite(w, h); } break; case 0x74: @@ -727,6 +729,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00080008); gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + s32 w = 0, h = 0; //senquack - Only color 808080h-878787h allows skipping lighting calculation: //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) @@ -734,8 +737,8 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; - gpuDrawS(packet, driver); - cpu_cycles += gput_sprite(8, 8); + gpuDrawS(packet, driver, &w, &h); + cpu_cycles += gput_sprite(w, h); } break; case 0x78: @@ -744,8 +747,9 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) case 0x7B: { // Monochrome rectangle (16x16) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; - gpuDrawT(packet, driver); - cpu_cycles += gput_sprite(16, 16); + s32 w = 0, h = 0; + gpuDrawT(packet, driver, &w, &h); + cpu_cycles += gput_sprite(w, h); } break; case 0x7C: @@ -753,9 +757,10 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) #ifdef __arm__ if ((gpu_unai.GPU_GP1 & 0x180) == 0 && (gpu_unai.Masking | gpu_unai.PixelMSB) == 0) { - gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); - gpuDrawS16(packet); - cpu_cycles += gput_sprite(16, 16); + s32 w = 0, h = 0; + gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuDrawS16(packet, &w, &h); + cpu_cycles += gput_sprite(w, h); break; } // fallthrough @@ -765,14 +770,15 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00100010); gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + s32 w = 0, h = 0; //senquack - Only color 808080h-878787h allows skipping lighting calculation: //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) // Strip lower 3 bits of each color and determine if lighting should be used: if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; - gpuDrawS(packet, driver); - cpu_cycles += gput_sprite(16, 16); + gpuDrawS(packet, driver, &w, &h); + cpu_cycles += gput_sprite(w, h); } break; #ifdef TEST @@ -792,6 +798,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) case 0xC0: break; #else + case 0x1F: // irq? case 0x80 ... 0x9F: // vid -> vid case 0xA0 ... 0xBF: // sys -> vid case 0xC0 ... 0xDF: // vid -> sys diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 7d40938fd..bf511becb 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -625,6 +625,11 @@ static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles) pos += 4; continue; } + else if (cmd == 0x1f) { + log_anomaly("irq1?\n"); + pos++; + continue; + } // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0)) @@ -710,8 +715,8 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr if (len > 0) cpu_cycles += 5 + len; - log_io(".chain %08lx #%d+%d\n", - (long)(list - rambase) * 4, len, gpu.cmd_len); + log_io(".chain %08lx #%d+%d %u\n", + (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles); if (unlikely(gpu.cmd_len > 0)) { if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) { log_anomaly("cmd_buffer overflow, likely garbage commands\n"); From 26665bc5cb481a2087beb78793b3bef1be7c1597 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 2 Dec 2023 01:05:10 +0200 Subject: [PATCH 490/597] an alt hack for Judge Dredd --- libpcsxcore/database.c | 9 +++++++-- libpcsxcore/psxcommon.h | 1 + libpcsxcore/psxdma.c | 8 ++++---- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 6cce0737b..b35658ba9 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -51,6 +51,12 @@ static const char * const gpu_centering_hack_db[] = "SLPM86009", }; +static const char * const dualshock_timing1024_hack_db[] = +{ + /* Judge Dredd - could also be poor cdrom+mdec+dma timing */ + "SLUS00630", "SLES00755", +}; + static const char * const dualshock_init_analog_hack_db[] = { /* Formula 1 Championship Edition */ @@ -73,6 +79,7 @@ hack_db[] = HACK_ENTRY(gpu_slow_list_walking, gpu_slow_llist_db), HACK_ENTRY(gpu_busy, gpu_busy_hack_db), HACK_ENTRY(gpu_centering, gpu_centering_hack_db), + HACK_ENTRY(gpu_timing1024, dualshock_timing1024_hack_db), HACK_ENTRY(dualshock_init_analog, dualshock_init_analog_hack_db), }; @@ -97,8 +104,6 @@ cycle_multiplier_overrides[] = #endif /* Discworld Noir - audio skips if CPU runs too fast */ { 222, { "SLES01549", "SLES02063", "SLES02064" } }, - /* Judge Dredd - could also be poor MDEC timing */ - { 128, { "SLUS00630", "SLES00755" } }, /* Digimon World */ { 153, { "SLUS01032", "SLES02914" } }, /* Syphon Filter - reportedly hangs under unknown conditions */ diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index 01b2a9aa9..dce4f41e2 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -153,6 +153,7 @@ typedef struct { boolean gpu_busy; boolean gpu_centering; boolean dualshock_init_analog; + boolean gpu_timing1024; } hacks; } PcsxConfig; diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index e3655b5e0..a7c438e3a 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -191,7 +191,7 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU madr_next = 0xffffff; do_walking = Config.GpuListWalking; - if (do_walking < 0) + if (do_walking < 0 || Config.hacks.gpu_timing1024) do_walking = Config.hacks.gpu_slow_list_walking; madr_next_p = do_walking ? &madr_next : NULL; @@ -201,10 +201,10 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU HW_DMA2_MADR = SWAPu32(madr_next); - // Tekken 3 = use 1.0 only (not 1.5x) + // a hack for Judge Dredd which is annoyingly sensitive to timing + if (Config.hacks.gpu_timing1024) + size = 1024; - // Einhander = parse linked list in pieces (todo) - // Rebel Assault 2 = parse linked list in pieces (todo) psxRegs.gpuIdleAfter = psxRegs.cycle + size + 16; set_event(PSXINT_GPUDMA, size); return; From b54a1ac7cfa3f045afef2e04d3e255884c3d5ef0 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 1 Dec 2023 23:45:40 +0200 Subject: [PATCH 491/597] adjust dma alignment and some cleanup libretro/pcsx_rearmed#699 --- libpcsxcore/cdrom.c | 1 + libpcsxcore/mdec.c | 48 ++++++++++++++++++++++++++++++-------------- libpcsxcore/psxdma.c | 1 + 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 3991bad9d..28471e2be 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1635,6 +1635,7 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { switch (chcr & 0x71000000) { case 0x11000000: + madr &= ~3; ptr = getDmaRam(madr, &max_words); if (ptr == INVALID_PTR) { CDR_LOG_I("psxDma3() Log: *** DMA 3 *** NULL Pointer!\n"); diff --git a/libpcsxcore/mdec.c b/libpcsxcore/mdec.c index c0f2cfd70..38b03e1bb 100644 --- a/libpcsxcore/mdec.c +++ b/libpcsxcore/mdec.c @@ -227,8 +227,8 @@ struct _pending_dma1 { static struct { u32 reg0; u32 reg1; - u16 * rl; - u16 * rl_end; + const u16 * rl; + const u16 * rl_end; u8 * block_buffer_pos; u8 block_buffer[16*16*3]; struct _pending_dma1 pending_dma1; @@ -258,7 +258,7 @@ static int aanscales[DSIZE2] = { 289301, 401273, 377991, 340183, 289301, 227303, 156569, 79818 }; -static void iqtab_init(int *iqtab, unsigned char *iq_y) { +static void iqtab_init(int *iqtab, const unsigned char *iq_y) { int i; for (i = 0; i < DSIZE2; i++) { @@ -268,7 +268,7 @@ static void iqtab_init(int *iqtab, unsigned char *iq_y) { #define MDEC_END_OF_DATA 0xfe00 -static unsigned short *rl2blk(int *blk, unsigned short *mdec_rl) { +static const unsigned short *rl2blk(int *blk, const unsigned short *mdec_rl) { int i, k, q_scale, rl, used_col; int *iqtab; @@ -472,7 +472,8 @@ u32 mdecRead1(void) { } void psxDma0(u32 adr, u32 bcr, u32 chcr) { - int cmd = mdec.reg0; + u32 cmd = mdec.reg0, words_max = 0; + const void *mem; int size; if (chcr != 0x01000201) { @@ -485,9 +486,17 @@ void psxDma0(u32 adr, u32 bcr, u32 chcr) { size = (bcr >> 16) * (bcr & 0xffff); + adr &= ~3; + mem = getDmaRam(adr, &words_max); + if (mem == INVALID_PTR || size > words_max) { + log_unhandled("bad dma0 madr %x\n", adr); + HW_DMA0_CHCR &= SWAP32(~0x01000000); + return; + } + switch (cmd >> 28) { case 0x3: // decode 15/24bpp - mdec.rl = (u16 *) PSXM(adr); + mdec.rl = mem; /* now the mdec is busy till all data are decoded */ mdec.reg1 |= MDEC1_BUSY; /* detect the end of decoding */ @@ -507,7 +516,7 @@ void psxDma0(u32 adr, u32 bcr, u32 chcr) { case 0x4: // quantization table upload { - u8 *p = (u8 *)PSXM(adr); + const u8 *p = mem; // printf("uploading new quantization table\n"); // printmatrixu8(p); // printmatrixu8(p + 64); @@ -541,10 +550,10 @@ void mdec0Interrupt() #define SIZE_OF_16B_BLOCK (16*16*2) void psxDma1(u32 adr, u32 bcr, u32 chcr) { + u32 words, words_max = 0; int blk[DSIZE2 * 6]; u8 * image; int size; - u32 words; if (chcr != 0x01000200) { log_unhandled("mdec1: invalid dma %08x\n", chcr); @@ -561,9 +570,16 @@ void psxDma1(u32 adr, u32 bcr, u32 chcr) { mdec.pending_dma1.bcr = bcr; mdec.pending_dma1.chcr = chcr; /* do not free the dma */ - } else { + return; + } - image = (u8 *)PSXM(adr); + adr &= ~3; + image = getDmaRam(adr, &words_max); + if (image == INVALID_PTR || words > words_max) { + log_unhandled("bad dma1 madr %x\n", adr); + HW_DMA1_CHCR &= SWAP32(~0x01000000); + return; + } if (mdec.reg0 & MDEC0_RGB24) { /* 16 bits decoding @@ -623,12 +639,13 @@ void psxDma1(u32 adr, u32 bcr, u32 chcr) { mdec.block_buffer_pos = mdec.block_buffer + size; } } + if (size < 0) + log_unhandled("mdec: bork\n"); - /* define the power of mdec */ - set_event(PSXINT_MDECOUTDMA, words * MDEC_BIAS); - /* some CPU stalling */ - psxRegs.cycle += words; - } + /* define the power of mdec */ + set_event(PSXINT_MDECOUTDMA, words * MDEC_BIAS); + /* some CPU stalling */ + psxRegs.cycle += words; } void mdec1Interrupt() { @@ -657,6 +674,7 @@ void mdec1Interrupt() { */ /* MDEC_END_OF_DATA avoids read outside memory */ + //printf("mdec left %zd, v=%04x\n", mdec.rl_end - mdec.rl, *(mdec.rl)); if (mdec.rl >= mdec.rl_end || SWAP16(*(mdec.rl)) == MDEC_END_OF_DATA) { mdec.reg1 &= ~(MDEC1_STP|MDEC1_BUSY); if (HW_DMA0_CHCR & SWAP32(0x01000000)) diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index a7c438e3a..3ec42ede2 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -242,6 +242,7 @@ void psxDma6(u32 madr, u32 bcr, u32 chcr) { PSXDMA_LOG("*** DMA6 OT *** %x addr = %x size = %x\n", chcr, madr, bcr); if (chcr == 0x11000002) { + madr &= ~3; mem = getDmaRam(madr, &words_max); if (mem == INVALID_PTR) { log_unhandled("bad6 dma madr %x\n", madr); From b44231721e9094ff3a55fada01b5a5b9d6718632 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 3 Dec 2023 01:21:50 +0200 Subject: [PATCH 492/597] patch another gpulib alignment issue for neon bgr888_to_rgb888 --- plugins/gpulib/vout_pl.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index 7f31e151a..80389a344 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -29,7 +29,7 @@ static void check_mode_change(int force) { int w = gpu.screen.hres; int h = gpu.screen.vres; - int w_out, h_out; + int w_out, h_out, bpp = 16; if (gpu.state.screen_centering_type == C_BORDERLESS) h = gpu.screen.h; @@ -45,6 +45,11 @@ static void check_mode_change(int force) w_out *= 2; h_out *= 2; } + if (gpu.status & PSX_GPU_STATUS_RGB24) { + // some asm relies on this alignment + w_out = (w_out + 7) & ~7; + bpp = 24; + } // width|rgb24 change? if (force || (gpu.status ^ gpu.state.status_vo_old) & ((7<<16)|(1<<21)) @@ -55,8 +60,7 @@ static void check_mode_change(int force) gpu.state.h_out_old = h_out; if (w_out != 0 && h_out != 0) - cbs->pl_vout_set_mode(w_out, h_out, w, h, - (gpu.status & PSX_GPU_STATUS_RGB24) ? 24 : 16); + cbs->pl_vout_set_mode(w_out, h_out, w, h, bpp); } } From 1dc68512954094c1cd7bcc075e966925e3808108 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 4 Dec 2023 22:17:10 +0200 Subject: [PATCH 493/597] psxbios: implement some cdrom related stuff --- libpcsxcore/cdrom.c | 20 ++--- libpcsxcore/misc.c | 7 +- libpcsxcore/misc.h | 2 +- libpcsxcore/psxbios.c | 192 +++++++++++++++++++++++++++++++++++------- 4 files changed, 176 insertions(+), 45 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 28471e2be..24ff961d4 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1432,7 +1432,7 @@ unsigned char cdrRead0(void) { } void cdrWrite0(unsigned char rt) { - CDR_LOG_IO("cdr w0.idx: %02x\n", rt); + CDR_LOG_IO("cdr w0.x.idx: %02x\n", rt); cdr.Ctrl = (rt & 3) | (cdr.Ctrl & ~3); } @@ -1446,13 +1446,13 @@ unsigned char cdrRead1(void) { if (cdr.ResultP == cdr.ResultC) cdr.ResultReady = 0; - CDR_LOG_IO("cdr r1.rsp: %02x #%u\n", psxHu8(0x1801), cdr.ResultP - 1); + CDR_LOG_IO("cdr r1.x.rsp: %02x #%u\n", psxHu8(0x1801), cdr.ResultP - 1); return psxHu8(0x1801); } void cdrWrite1(unsigned char rt) { - const char *rnames[] = { "cmd", "smd", "smc", "arr" }; (void)rnames; + const char *rnames[] = { "0.cmd", "1.smd", "2.smc", "3.arr" }; (void)rnames; CDR_LOG_IO("cdr w1.%s: %02x\n", rnames[cdr.Ctrl & 3], rt); switch (cdr.Ctrl & 3) { @@ -1472,10 +1472,9 @@ void cdrWrite1(unsigned char rt) { SysPrintf(" Param[%d] = {", cdr.ParamC); for (i = 0; i < cdr.ParamC; i++) SysPrintf(" %x,", cdr.Param[i]); - SysPrintf("}\n"); - } else { - SysPrintf("\n"); + SysPrintf("}"); } + SysPrintf(" @%08x\n", psxRegs.pc); #endif cdr.ResultReady = 0; @@ -1504,12 +1503,12 @@ unsigned char cdrRead2(void) { else CDR_LOG_I("read empty fifo (%d)\n", cdr.FifoSize); - CDR_LOG_IO("cdr r2.dat: %02x\n", ret); + CDR_LOG_IO("cdr r2.x.dat: %02x\n", ret); return ret; } void cdrWrite2(unsigned char rt) { - const char *rnames[] = { "prm", "ien", "all", "arl" }; (void)rnames; + const char *rnames[] = { "0.prm", "1.ien", "2.all", "3.arl" }; (void)rnames; CDR_LOG_IO("cdr w2.%s: %02x\n", rnames[cdr.Ctrl & 3], rt); switch (cdr.Ctrl & 3) { @@ -1536,12 +1535,13 @@ unsigned char cdrRead3(void) { else psxHu8(0x1803) = cdr.IrqMask | 0xE0; - CDR_LOG_IO("cdr r3.%s: %02x\n", (cdr.Ctrl & 1) ? "ifl" : "ien", psxHu8(0x1803)); + CDR_LOG_IO("cdr r3.%d.%s: %02x\n", cdr.Ctrl & 3, + (cdr.Ctrl & 1) ? "ifl" : "ien", psxHu8(0x1803)); return psxHu8(0x1803); } void cdrWrite3(unsigned char rt) { - const char *rnames[] = { "req", "ifl", "alr", "ava" }; (void)rnames; + const char *rnames[] = { "0.req", "1.ifl", "2.alr", "3.ava" }; (void)rnames; u8 ll, lr, rl, rr; CDR_LOG_IO("cdr w3.%s: %02x\n", rnames[cdr.Ctrl & 3], rt); diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index f175e2a36..e652cc484 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -301,7 +301,7 @@ int LoadCdrom() { return 0; } -int LoadCdromFile(const char *filename, EXE_HEADER *head) { +int LoadCdromFile(const char *filename, EXE_HEADER *head, u8 *time_bcd_out) { struct iso_directory_record *dir; u8 time[4],*buf; u8 mdir[4096]; @@ -334,6 +334,7 @@ int LoadCdromFile(const char *filename, EXE_HEADER *head) { if (GetCdromFile(mdir, time, exename) == -1) return -1; READTRACK(); + incTime(); memcpy(head, buf + 12, sizeof(EXE_HEADER)); size = SWAP32(head->t_size); @@ -343,8 +344,8 @@ int LoadCdromFile(const char *filename, EXE_HEADER *head) { //psxCpu->Reset(); while (size & ~2047) { - incTime(); READTRACK(); + incTime(); mem = PSXM(addr); if (mem != INVALID_PTR) @@ -353,6 +354,8 @@ int LoadCdromFile(const char *filename, EXE_HEADER *head) { size -= 2048; addr += 2048; } + if (time_bcd_out) + memcpy(time_bcd_out, time, 3); return 0; } diff --git a/libpcsxcore/misc.h b/libpcsxcore/misc.h index 539acc7b0..22245d88a 100644 --- a/libpcsxcore/misc.h +++ b/libpcsxcore/misc.h @@ -60,7 +60,7 @@ extern int CdromFrontendId; // for frontend use int BiosBootBypass(); int LoadCdrom(); -int LoadCdromFile(const char *filename, EXE_HEADER *head); +int LoadCdromFile(const char *filename, EXE_HEADER *head, u8 *time_bcd_out); int CheckCdrom(); int Load(const char *ExePath); diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index e654f7c9c..936f07613 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -37,6 +37,8 @@ #include "psxhle.h" #include "psxinterpreter.h" #include "psxevents.h" +#include "cdrom.h" +#include #include #ifndef PSXBIOS_LOG @@ -83,7 +85,7 @@ char *biosA0n[256] = { "dev_card_close", "dev_card_firstfile", "dev_card_nextfile","dev_card_erase", "dev_card_undelete","dev_card_format", "dev_card_rename", "dev_card_6f", // 0x70 - "_bu_init", "_96_init", "CdRemove", "sys_a0_73", + "_bu_init", "CdInit", "CdRemove", "sys_a0_73", "sys_a0_74", "sys_a0_75", "sys_a0_76", "sys_a0_77", "_96_CdSeekL", "sys_a0_79", "sys_a0_7a", "sys_a0_7b", "_96_CdGetStatus", "sys_a0_7d", "_96_CdRead", "sys_a0_7f", @@ -94,7 +96,7 @@ char *biosA0n[256] = { "sys_a0_8c", "sys_a0_8d", "sys_a0_8e", "sys_a0_8f", // 0x90 "sys_a0_90", "sys_a0_91", "sys_a0_92", "sys_a0_93", - "sys_a0_94", "sys_a0_95", "AddCDROMDevice", "AddMemCardDevide", + "sys_a0_94", "CdReset", "AddCDROMDevice", "AddMemCardDevide", "DisableKernelIORedirection", "EnableKernelIORedirection", "sys_a0_9a", "sys_a0_9b", "SetConf", "GetConf", "sys_a0_9e", "SetMem", // 0xa0 @@ -439,9 +441,10 @@ static inline void softCallInException(u32 pc) { ra = sra; } -static u32 OpenEvent(u32 class, u32 spec, u32 mode, u32 func); -static u32 DeliverEvent(u32 class, u32 spec); -static u32 UnDeliverEvent(u32 class, u32 spec); +static u32 OpenEvent(u32 class, u32 spec, u32 mode, u32 func); +static void EnableEvent(u32 ev, int do_log); +static u32 DeliverEvent(u32 class, u32 spec); +static u32 UnDeliverEvent(u32 class, u32 spec); static void CloseEvent(u32 ev); /* * @@ -1583,11 +1586,53 @@ static void FlushCache() { use_cycles(500); } +// you likely want to mask irqs before calling these +static u8 cdrom_sync(int do_ack) +{ + u8 r = 0; + if (psxRegs.interrupt & (1u << PSXINT_CDR)) { + if ((s32)(psxRegs.cycle - event_cycles[PSXINT_CDR]) < 0) + psxRegs.cycle = event_cycles[PSXINT_CDR] + 1; + irq_test(&psxRegs.CP0); + } + if (do_ack) { + cdrWrite0(1); + r = cdrRead3() & 0x1f; + cdrWrite3(0x5f); // ack; clear params + } + return r; +} + +static void cdrom_cmd_and_wait(u8 cmd, int arg_cnt, int resp_cnt, ...) +{ + va_list ap; + + cdrom_sync(0); + cdrWrite0(0); + va_start(ap, resp_cnt); + while (arg_cnt-- > 0) + cdrWrite2(va_arg(ap, u32)); + va_end(ap); + cdrWrite1(cmd); + + if (resp_cnt > 0) { + u8 r = cdrom_sync(1); + assert(r == 3); (void)r; + cdrRead1(); + } + if (resp_cnt > 1) { + u8 r = cdrom_sync(1); + assert(r == 2); (void)r; + cdrRead1(); + } +} + /* * long Load(char *name, struct EXEC *header); */ void psxBios_Load() { // 0x42 + u8 time[3] = { 2, 0, 0x16 }; EXE_HEADER eheader; char path[256]; char *pa0, *p; @@ -1609,7 +1654,7 @@ void psxBios_Load() { // 0x42 else snprintf(path, sizeof(path), "%s", (char *)pa0); - if (LoadCdromFile(path, &eheader) == 0) { + if (LoadCdromFile(path, &eheader, time) == 0) { memcpy(pa1, ((char*)&eheader)+16, sizeof(EXEC)); psxCpu->Clear(a1, sizeof(EXEC) / 4); FlushCache(); @@ -1618,6 +1663,17 @@ void psxBios_Load() { // 0x42 PSXBIOS_LOG(" -> %d\n", v0); pc0 = ra; + + // set the cdrom to a state of just after exe read + psxRegs.CP0.n.SR &= ~0x404; + cdrom_sync(1); + cdrWrite0(1); + cdrWrite2(0x1f); // unmask + cdrom_cmd_and_wait(0x0e, 1, 1, 0x80u); // CdlSetmode + cdrom_cmd_and_wait(0x02, 3, 1, time[0], time[1], time[2]); // CdlSetloc + cdrom_cmd_and_wait(0x15, 0, 2); // CdlSeekL + psxHwWrite16(0x1f801070, ~4); + MTC0(&psxRegs, 12, psxRegs.CP0.n.SR | 0x404); } /* @@ -1686,6 +1742,14 @@ void psxBios_GPU_dw() { // 0x46 pc0 = ra; } +static void gpu_sync() { + // not implemented... + // might be problematic to do because of Config.GpuListWalking + if (psxRegs.interrupt & (1u << PSXINT_GPUDMA)) + log_unhandled("gpu_sync with active dma\n"); + mips_return_c(0, 21); +} + void psxBios_mem2vram() { // 0x47 int size; gpuSyncPluginSR(); // flush @@ -1712,8 +1776,8 @@ void psxBios_SendGPU() { // 0x48 void psxBios_GPU_cw() { // 0x49 GPU_writeData(a0); gpuSyncPluginSR(); - v0 = HW_GPU_STATUS; - pc0 = ra; + use_cycles(13); + gpu_sync(); } void psxBios_GPU_cwb() { // 0x4a @@ -1785,24 +1849,86 @@ void psxBios__bu_init() { // 70 pc0 = ra; } -void psxBios__96_init() { // 71 -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x71]); -#endif - - pc0 = ra; -} - static void write_chain(u32 *d, u32 next, u32 handler1, u32 handler2); static void psxBios_SysEnqIntRP_(u32 priority, u32 chain_eptr); static void psxBios_SysDeqIntRP_(u32 priority, u32 chain_rm_eptr); +static void psxBios_EnqueueCdIntr_(void) +{ + u32 *ram32 = (u32 *)psxM; + + // traps should already be installed by write_chain() + ram32[0x91d0/4] = 0; + ram32[0x91d4/4] = SWAP32(0xbfc0506c); + ram32[0x91d8/4] = SWAP32(0xbfc04dec); + psxBios_SysEnqIntRP_(0, 0x91d0); + ram32[0x91e0/4] = 0; + ram32[0x91e4/4] = SWAP32(0xbfc050a4); + ram32[0x91e8/4] = SWAP32(0xbfc04fbc); + psxBios_SysEnqIntRP_(0, 0x91e0); + use_cycles(31); +} + +static void setup_cd_irq_and_events(void) +{ + u16 specs[] = { 0x10, 0x20, 0x40, 0x80, 0x8000 }; + size_t i; + + psxBios_EnqueueCdIntr_(); + + for (i = 0; i < sizeof(specs) / sizeof(specs[0]); i++) { + u32 h = OpenEvent(0xf0000003, specs[i], EvMdMARK, 0); + // no error checks + storeRam32(A_CD_EVENTS + i * 4, h); + EnableEvent(h, 0); + } +} + +static void psxBios_CdReset_() { + psxRegs.CP0.n.SR &= ~0x404; // disable interrupts + + cdrom_sync(1); + cdrWrite0(1); + cdrWrite2(0x1f); // unmask + cdrom_cmd_and_wait(0x0a, 0, 2); // CdlReset + cdrom_cmd_and_wait(0x0e, 1, 1, 0x80u); // CdlSetmode + + // todo(?): should read something (iso root directory?) + // from { 0, 2, 16 } to somewhere and pause + + mips_return(1); + psxHwWrite16(0x1f801070, ~4); + MTC0(&psxRegs, 12, psxRegs.CP0.n.SR | 0x404); + DeliverEvent(0xf0000003, 0x0020); +} + +static void psxBios_CdInit() { // 54, 71 + PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x71]); + setup_cd_irq_and_events(); + + psxBios_CdReset_(); + + // this function takes pretty much forever + mips_return_c(0, 50000*11); +} + static void psxBios_DequeueCdIntr_() { psxBios_SysDeqIntRP_(0, 0x91d0); psxBios_SysDeqIntRP_(0, 0x91e0); use_cycles(16); } +static void psxBios_CdReset() { // 95 + PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x95]); + psxBios_CdReset_(); +} + +static void psxBios_EnqueueCdIntr() { // a2 + PSXBIOS_LOG("psxBios_%s\n", biosA0n[0xa2]); + psxBios_EnqueueCdIntr_(); + // return value comes from SysEnqIntRP() insternal call +} + static void psxBios_DequeueCdIntr() { // a3 PSXBIOS_LOG("psxBios_%s\n", biosA0n[0xa3]); psxBios_DequeueCdIntr_(); @@ -2172,13 +2298,17 @@ static void psxBios_TestEvent() { // 0b mips_return_c(ret, 15); } -static void psxBios_EnableEvent() { // 0c +static void EnableEvent(u32 ev, int do_log) { u32 base = loadRam32(A_TT_EvCB); - u32 status = loadRam32(base + (a0 & 0xffff) * sizeof(EvCB) + 4); - PSXBIOS_LOG("psxBios_%s %x (%x)\n", biosB0n[0x0c], a0, status); + u32 status = loadRam32(base + (ev & 0xffff) * sizeof(EvCB) + 4); + if (do_log) + PSXBIOS_LOG("psxBios_%s %x (%x)\n", biosB0n[0x0c], ev, status); if (status != EvStUNUSED) - storeRam32(base + (a0 & 0xffff) * sizeof(EvCB) + 4, EvStACTIVE); + storeRam32(base + (ev & 0xffff) * sizeof(EvCB) + 4, EvStACTIVE); +} +static void psxBios_EnableEvent() { // 0c + EnableEvent(a0, 1); mips_return_c(1, 15); } @@ -3039,7 +3169,8 @@ static void psxBios_InitRCnt() { // 00 psxHwWrite16(0x1f801100 + i*0x10 + 8, 0); psxHwWrite16(0x1f801100 + i*0x10 + 0, 0); } - psxBios_SysEnqIntRP_(a0, 0x6d88); + for (i = 0; i < 4; i++) + psxBios_SysEnqIntRP_(a0, 0x6d58 + i * 0x10); mips_return_c(0, 9); } @@ -3279,7 +3410,7 @@ static void setup_tt(u32 tcb_cnt, u32 evcb_cnt, u32 stack) ram32[0x0150/4] = SWAPu32(0x6ee0); // DCB - device control ram32[0x0154/4] = SWAPu32(0x0320); // DCB size - storeRam32(p_excb + 0*4, 0x91e0); // chain0 + storeRam32(p_excb + 0*4, 0x0000); // chain0 storeRam32(p_excb + 2*4, 0x6d88); // chain1 storeRam32(p_excb + 4*4, 0x0000); // chain2 storeRam32(p_excb + 6*4, 0x6d98); // chain3 @@ -3289,12 +3420,8 @@ static void setup_tt(u32 tcb_cnt, u32 evcb_cnt, u32 stack) for (i = 1; i < tcb_cnt; i++) storeRam32(p_tcb + sizeof(TCB) * i, 0x1000); - // default events - storeRam32(A_CD_EVENTS + 0x00, OpenEvent(0xf0000003, 0x0010, EvMdMARK, 0)); - storeRam32(A_CD_EVENTS + 0x04, OpenEvent(0xf0000003, 0x0020, EvMdMARK, 0)); - storeRam32(A_CD_EVENTS + 0x08, OpenEvent(0xf0000003, 0x0040, EvMdMARK, 0)); - storeRam32(A_CD_EVENTS + 0x0c, OpenEvent(0xf0000003, 0x0080, EvMdMARK, 0)); - storeRam32(A_CD_EVENTS + 0x10, OpenEvent(0xf0000003, 0x8000, EvMdMARK, 0)); + psxBios_SysEnqIntRP_(0, 0x6da8); + setup_cd_irq_and_events(); storeRam32(A_CONF_EvCB, evcb_cnt); storeRam32(A_CONF_TCB, tcb_cnt); @@ -3509,7 +3636,7 @@ void psxBiosInit() { biosA0[0x51] = psxBios_LoadExec; //biosA0[0x52] = psxBios_GetSysSp; //biosA0[0x53] = psxBios_sys_a0_53; - //biosA0[0x54] = psxBios__96_init_a54; + biosA0[0x54] = psxBios_CdInit; //biosA0[0x55] = psxBios__bu_init_a55; biosA0[0x56] = psxBios_CdRemove; //biosA0[0x57] = psxBios_sys_a0_57; @@ -3538,7 +3665,7 @@ void psxBiosInit() { //biosA0[0x6e] = psxBios_dev_card_rename; //biosA0[0x6f] = psxBios_dev_card_6f; biosA0[0x70] = psxBios__bu_init; - biosA0[0x71] = psxBios__96_init; + biosA0[0x71] = psxBios_CdInit; biosA0[0x72] = psxBios_CdRemove; //biosA0[0x73] = psxBios_sys_a0_73; //biosA0[0x74] = psxBios_sys_a0_74; @@ -3574,7 +3701,7 @@ void psxBiosInit() { biosA0[0x92] = hleExc0_1_1; biosA0[0x93] = hleExc0_0_1; //biosA0[0x94] = psxBios_sys_a0_94; - //biosA0[0x95] = psxBios_sys_a0_95; + biosA0[0x95] = psxBios_CdReset; //biosA0[0x96] = psxBios_AddCDROMDevice; //biosA0[0x97] = psxBios_AddMemCardDevide; //biosA0[0x98] = psxBios_DisableKernelIORedirection; @@ -3587,7 +3714,7 @@ void psxBiosInit() { biosA0[0x9f] = psxBios_SetMem; //biosA0[0xa0] = psxBios__boot; //biosA0[0xa1] = psxBios_SystemError; - //biosA0[0xa2] = psxBios_EnqueueCdIntr; + biosA0[0xa2] = psxBios_EnqueueCdIntr; biosA0[0xa3] = psxBios_DequeueCdIntr; //biosA0[0xa4] = psxBios_sys_a0_a4; //biosA0[0xa5] = psxBios_ReadSector; @@ -3794,6 +3921,7 @@ void psxBiosInit() { strcpy((char *)&ram32[0xeff0/4], "bu"); // default exception handler chains + // see also setup_cd_irq_and_events() write_chain(&ram32[0x91e0/4], 0x91d0, 0xbfc050a4, 0xbfc04fbc); // chain0.e0 write_chain(&ram32[0x91d0/4], 0x6da8, 0xbfc0506c, 0xbfc04dec); // chain0.e1 write_chain(&ram32[0x6da8/4], 0, 0, 0x1a00); // chain0.e2 From 06926f13a7830edc4fa1a4c0c01a6c86112a4eac Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 6 Dec 2023 23:59:11 +0200 Subject: [PATCH 494/597] psxbios: implement some more memcard details --- libpcsxcore/psxbios.c | 243 ++++++++++++++++++++++++++---------------- 1 file changed, 152 insertions(+), 91 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 936f07613..1ecb63ec9 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -45,6 +45,10 @@ //#define PSXBIOS_LOG printf #define PSXBIOS_LOG(...) #endif +#ifndef PSXBIOS_EV_LOG +//#define PSXBIOS_EV_LOG printf +#define PSXBIOS_EV_LOG(...) +#endif #define PTR_1 (void *)(size_t)1 @@ -114,10 +118,10 @@ char *biosB0n[256] = { "SysMalloc", "sys_b0_01", "sys_b0_02", "sys_b0_03", "sys_b0_04", "sys_b0_05", "sys_b0_06", "DeliverEvent", "OpenEvent", "CloseEvent", "WaitEvent", "TestEvent", - "EnableEvent", "DisableEvent", "OpenTh", "CloseTh", + "EnableEvent", "DisableEvent", "OpenTh", "CloseTh", // 0x10 - "ChangeTh", "sys_b0_11", "InitPAD", "StartPAD", - "StopPAD", "PAD_init", "PAD_dr", "ReturnFromExecption", + "ChangeTh", "sys_b0_11", "InitPAD", "StartPAD", + "StopPAD", "PAD_init", "PAD_dr", "ReturnFromException", "ResetEntryInt", "HookEntryInt", "sys_b0_1a", "sys_b0_1b", "sys_b0_1c", "sys_b0_1d", "sys_b0_1e", "sys_b0_1f", // 0x20 @@ -274,6 +278,7 @@ static u32 floodchk; #define A_EXCEPTION 0x0c80 #define A_EXC_SP 0x6cf0 #define A_EEXIT_DEF 0x6cf4 +#define A_CARD_ISLOT 0x7264 // 0 or 1, toggled by card vint handler #define A_KMALLOC_PTR 0x7460 #define A_KMALLOC_SIZE 0x7464 #define A_KMALLOC_END 0x7468 @@ -285,7 +290,10 @@ static u32 floodchk; #define A_PAD_IN_LEN 0x74d8 #define A_PAD_OUT_LEN 0x74e0 #define A_PAD_DR_DST 0x74c4 -#define A_CARD_CHAN1 0x7500 +#define A_CARD_ACHAN 0x7500 // currently active port in 0xPortSlot format +#define A_CARD_HANDLER 0x7528 // ptr to irq handler +#define A_CARD_STATUS1 0x7568 +#define A_CARD_STATUS2 0x7569 #define A_PAD_DR_BUF1 0x7570 #define A_PAD_DR_BUF2 0x7598 #define A_EEXIT_PTR 0x75d0 @@ -310,6 +318,10 @@ static u32 floodchk; #define A_C0_TRAPS 0x3010 #define A_B0_5B_TRAP 0x43d0 +#define CARD_HARDLER_WRITE 0x51F4 +#define CARD_HARDLER_READ 0x5688 +#define CARD_HARDLER_INFO 0x5B64 + #define HLEOP(n) SWAPu32((0x3b << 26) | (n)); static u8 loadRam8(u32 addr) @@ -1838,10 +1850,8 @@ void psxBios_LoadExec() { // 51 psxBios_Exec(); } -void psxBios__bu_init() { // 70 -#ifdef PSXBIOS_LOG +static void psxBios__bu_init() { // 70 PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x70]); -#endif DeliverEvent(0xf0000011, 0x0004); DeliverEvent(0xf4000001, 0x0004); @@ -2003,47 +2013,6 @@ static void psxBios_get_cd_status() // a6 pc0 = ra; } -static void psxBios__card_info() { // ab - PSXBIOS_LOG("psxBios_%s: %x\n", biosA0n[0xab], a0); - u32 ret, port; - storeRam32(A_CARD_CHAN1, a0); - port = a0 >> 4; - - switch (port) { - case 0x0: - case 0x1: - ret = 0x0004; - if (McdDisable[port & 1]) - ret = 0x0100; - break; - default: - PSXBIOS_LOG("psxBios_%s: UNKNOWN PORT 0x%x\n", biosA0n[0xab], a0); - ret = 0x0302; - break; - } - - if (McdDisable[0] && McdDisable[1]) - ret = 0x0100; - - DeliverEvent(0xf0000011, 0x0004); -// DeliverEvent(0xf4000001, 0x0004); - DeliverEvent(0xf4000001, ret); - v0 = 1; pc0 = ra; -} - -void psxBios__card_load() { // ac -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s: %x\n", biosA0n[0xac], a0); -#endif - - storeRam32(A_CARD_CHAN1, a0); - -// DeliverEvent(0xf0000011, 0x0004); - DeliverEvent(0xf4000001, 0x0004); - - v0 = 1; pc0 = ra; -} - static void psxBios_GetSystemInfo() { // b4 u32 ret = 0; //PSXBIOS_LOG("psxBios_%s %x\n", biosA0n[0xb4], a0); @@ -2141,13 +2110,13 @@ void psxBios_ResetRCnt() { // 06 } static u32 DeliverEvent(u32 class, u32 spec) { - EvCB *ev = (EvCB *)loadRam32ptr(A_TT_EvCB); + EvCB *ev, *ev_first = (EvCB *)loadRam32ptr(A_TT_EvCB); u32 evcb_len = loadRam32(A_TT_EvCB + 4); u32 ret = loadRam32(A_TT_EvCB) + evcb_len; u32 i, lim = evcb_len / 0x1c; //printf("%s %08x %x\n", __func__, class, spec); - for (i = 0; i < lim; i++, ev++) { + for (i = 0, ev = ev_first; i < lim; i++, ev++) { use_cycles(8); if (SWAP32(ev->status) != EvStACTIVE) continue; @@ -2160,12 +2129,17 @@ static u32 DeliverEvent(u32 class, u32 spec) { use_cycles(6); ret = SWAP32(ev->mode); if (ret == EvMdMARK) { + if (ev->status != SWAP32(EvStALREADY)) + PSXBIOS_EV_LOG("DeliverEvent %08x %x (%08zx) set\n", + class, spec, (ev - ev_first) | 0xf1000000u); ev->status = SWAP32(EvStALREADY); continue; } use_cycles(8); if (ret == EvMdCALL) { ret = SWAP32(ev->fhandler); + PSXBIOS_EV_LOG("DeliverEvent %08x %x (%08zx) cb %x\n", + class, spec, (ev - ev_first) | 0xf1000000u, ret); if (ret) { v0 = ret; softCall(ret); @@ -2947,9 +2921,14 @@ void psxBios_delete() { // 45 } void psxBios_InitCARD() { // 4a + u8 *ram8 = (u8 *)psxM; u32 *ram32 = (u32 *)psxM; PSXBIOS_LOG("psxBios_%s: %x\n", biosB0n[0x4a], a0); write_chain(ram32 + A_PADCRD_CHN_E/4, 0, 0x49bc, 0x4a4c); + //card_error = 0; + ram8[A_CARD_ISLOT] = 0; + ram8[A_CARD_STATUS1] = 1; + ram8[A_CARD_STATUS2] = 1; // (maybe) todo: early_card_irq, etc ram32[A_PAD_IRQR_ENA/4] = SWAP32(a0); @@ -2985,23 +2964,19 @@ void psxBios__card_write() { // 0x4e void *pa2 = Ra2; int port; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s: %x,%x,%x\n", biosB0n[0x4e], a0, a1, a2); -#endif - /* - Function also accepts sector 400h (a bug). - But notaz said we shouldn't allow sector 400h because it can corrupt the emulator. - */ - if (!(a1 <= 0x3FF)) + PSXBIOS_LOG("psxBios_%s %02x,%x,%x\n", biosB0n[0x4e], a0, a1, a2); + // function also accepts sector 400h (a bug), + // but what actually happens then? + if (a1 > 0x400) { /* Invalid sectors */ v0 = 0; pc0 = ra; return; } - storeRam32(A_CARD_CHAN1, a0); + storeRam32(A_CARD_ACHAN, a0); port = a0 >> 4; - if (pa2 != INVALID_PTR) { + if (pa2 != INVALID_PTR && a1 < 0x400) { if (port == 0) { memcpy(Mcd1Data + a1 * 128, pa2, 128); SaveMcd(Config.Mcd1, Mcd1Data, a1 * 128, 128); @@ -3011,33 +2986,27 @@ void psxBios__card_write() { // 0x4e } } - DeliverEvent(0xf0000011, 0x0004); -// DeliverEvent(0xf4000001, 0x0004); + storeRam8(A_CARD_STATUS1 + port, 4); // busy/write + storeRam32(A_CARD_HANDLER, CARD_HARDLER_READ); v0 = 1; pc0 = ra; } -void psxBios__card_read() { // 0x4f +static void psxBios__card_read() { // 0x4f void *pa2 = Ra2; int port; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x4f]); -#endif - /* - Function also accepts sector 400h (a bug). - But notaz said we shouldn't allow sector 400h because it can corrupt the emulator. - */ - if (!(a1 <= 0x3FF)) + PSXBIOS_LOG("psxBios_%s %x,%x,%x\n", biosB0n[0x4f], a0, a1, a2); + if (a1 > 0x400) { /* Invalid sectors */ v0 = 0; pc0 = ra; return; } - storeRam32(A_CARD_CHAN1, a0); + storeRam32(A_CARD_ACHAN, a0); port = a0 >> 4; - if (pa2 != INVALID_PTR) { + if (pa2 != INVALID_PTR && a1 < 0x400) { if (port == 0) { memcpy(pa2, Mcd1Data + a1 * 128, 128); } else { @@ -3045,8 +3014,8 @@ void psxBios__card_read() { // 0x4f } } - DeliverEvent(0xf0000011, 0x0004); -// DeliverEvent(0xf4000001, 0x0004); + storeRam8(A_CARD_STATUS1 + port, 2); // busy/read + storeRam32(A_CARD_HANDLER, CARD_HARDLER_READ); v0 = 1; pc0 = ra; } @@ -3126,12 +3095,11 @@ void psxBios_GetB0Table() { // 57 } static void psxBios__card_chan() { // 0x58 - u32 ret; - PSXBIOS_LOG("psxBios_%s\n", biosB0n[0x58]); - - // todo: should return active slot chan + // todo: should return active slot channel // (active - which was last processed by irq code) - ret = loadRam32(A_CARD_CHAN1); + u32 ret = loadRam32(A_CARD_ACHAN); + PSXBIOS_LOG("psxBios_%s -> %02x\n", biosB0n[0x58], ret); + mips_return_c(ret, 8); } @@ -3145,17 +3113,111 @@ static void psxBios_ChangeClearPad() { // 5b } static void psxBios__card_status() { // 5c - PSXBIOS_LOG("psxBios_%s %x\n", biosB0n[0x5c], a0); + u8 s = loadRam8(A_CARD_STATUS1 + a0); + PSXBIOS_LOG("psxBios_%s %x -> %x\n", biosB0n[0x5c], a0, s); - v0 = 1; // ready - pc0 = ra; + mips_return_c(s, 5); } static void psxBios__card_wait() { // 5d - PSXBIOS_LOG("psxBios_%s %x\n", biosB0n[0x5d], a0); + u8 s = loadRam8(A_CARD_STATUS1 + a0); + PSXBIOS_LOG("psxBios_%s %x -> %x\n", biosB0n[0x5d], a0, s); - v0 = 1; // ready - pc0 = ra; + // todo + if (!(s & 1)) + log_unhandled("%s %x\n", __func__, s); + + mips_return_c(s, 11); +} + +static void psxBios__card_info() { // A ab + PSXBIOS_LOG("psxBios_%s %02x\n", biosA0n[0xab], a0); + u32 ret, port; + storeRam32(A_CARD_ACHAN, a0); + port = a0 >> 4; + + switch (port) { + case 0x0: + case 0x1: + ret = 0x0004; + if (McdDisable[port & 1]) + ret = 0x0100; + break; + default: + PSXBIOS_LOG("psxBios_%s: UNKNOWN PORT 0x%x\n", biosA0n[0xab], a0); + ret = 0x0302; + break; + } + + if (McdDisable[0] && McdDisable[1]) + ret = 0x0100; + + if (ret == 4) { + // deliver from card_vint_handler() + storeRam8(A_CARD_STATUS1 + port, 8); // busy/info + storeRam32(A_CARD_HANDLER, CARD_HARDLER_INFO); + } else { + DeliverEvent(0xf4000001, ret); + DeliverEvent(0xf0000011, 0x0004); // ? + } + mips_return(1); +} + +static void psxBios__card_load() { // A ac + PSXBIOS_LOG("psxBios_%s %02x\n", biosA0n[0xac], a0); + + storeRam32(A_CARD_ACHAN, a0); + +// DeliverEvent(0xf0000011, 0x0004); + DeliverEvent(0xf4000001, 0x0004); + + mips_return(1); +} + +static void card_vint_handler(void) { + u8 select, status; + u32 handler; + UnDeliverEvent(0xf0000011, 0x0004); + UnDeliverEvent(0xf0000011, 0x8000); + UnDeliverEvent(0xf0000011, 0x0100); + UnDeliverEvent(0xf0000011, 0x0200); + UnDeliverEvent(0xf0000011, 0x2000); + +#if 0 + select = loadRam8(A_CARD_ISLOT); + select = (select ^ 1) & 1; + storeRam8(A_CARD_ISLOT, select); +#else + select = loadRam8(A_CARD_ACHAN) >> 4; + storeRam8(A_CARD_ISLOT, select); +#endif + status = loadRam8(A_CARD_STATUS1 + select); + if (status & 1) + return; // done + + //psxBios_SysDeqIntRP_(0, 0x7540); + //psxBios_SysDeqIntRP_(0, 0x7540); + //card_state_machine = 0; + //card_error_flag = 0; + handler = loadRam32(A_CARD_HANDLER); + switch (handler) { + case CARD_HARDLER_INFO: + DeliverEvent(0xf4000001, 4); + DeliverEvent(0xf0000011, 4); + storeRam8(A_CARD_STATUS1 + select, 1); + storeRam32(A_CARD_HANDLER, 0); + break; + case CARD_HARDLER_WRITE: + case CARD_HARDLER_READ: + DeliverEvent(0xf0000011, 4); + storeRam8(A_CARD_STATUS1 + select, 1); + storeRam32(A_CARD_HANDLER, 0); + break; + case 0: + break; + default: + log_unhandled("%s: unhandled handler %x\n", __func__, handler); + } } /* System calls C0 */ @@ -3347,7 +3409,7 @@ static const struct { { 0x1920, hleop_exc1_3_1 }, { 0x1794, hleop_exc1_3_2 }, { 0x2458, hleop_exc3_0_2 }, - { 0x49bc, hleop_exc_padcard1 }, + { 0x49bc, hleop_exc_padcard1 }, // hleExcPadCard1 { 0x4a4c, hleop_exc_padcard2 }, }; @@ -3637,7 +3699,7 @@ void psxBiosInit() { //biosA0[0x52] = psxBios_GetSysSp; //biosA0[0x53] = psxBios_sys_a0_53; biosA0[0x54] = psxBios_CdInit; - //biosA0[0x55] = psxBios__bu_init_a55; + biosA0[0x55] = psxBios__bu_init; biosA0[0x56] = psxBios_CdRemove; //biosA0[0x57] = psxBios_sys_a0_57; //biosA0[0x58] = psxBios_sys_a0_58; @@ -4201,9 +4263,8 @@ static void hleExcPadCard1(void) } if (loadRam32(A_PAD_ACK_VBL)) psxHwWrite16(0x1f801070, ~1); - if (loadRam32(A_CARD_IRQR_ENA)) { - // todo, maybe - } + if (loadRam32(A_CARD_IRQR_ENA)) + card_vint_handler(); mips_return_c(0, 18); } From 1237304d5560b492b8601a04e5cfa30b56f1537a Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 7 Dec 2023 00:03:37 +0200 Subject: [PATCH 495/597] cdrom: fix a copy-paste mistake thanks to xjsxjs197 for the report --- libpcsxcore/cdrom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 24ff961d4..b980be061 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1588,8 +1588,8 @@ void cdrWrite3(unsigned char rt) { rl == cdr.AttenuatorRightToLeft && rr == cdr.AttenuatorRightToRight) return; - cdr.AttenuatorLeftToLeftT = ll; cdr.AttenuatorLeftToRightT = lr; - cdr.AttenuatorRightToLeftT = rl; cdr.AttenuatorRightToRightT = rr; + cdr.AttenuatorLeftToLeft = ll; cdr.AttenuatorLeftToRight = lr; + cdr.AttenuatorRightToLeft = rl; cdr.AttenuatorRightToRight = rr; CDR_LOG_I("CD-XA Volume: %02x %02x | %02x %02x\n", ll, lr, rl, rr); SPU_setCDvol(ll, lr, rl, rr, psxRegs.cycle); } From 894cbc49fa8263d7ee465159175c994155c87ea6 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 7 Dec 2023 01:45:51 +0200 Subject: [PATCH 496/597] spu: fix a wrong assumption about cd volume reported by xjsxjs197 again --- plugins/dfsound/xa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/dfsound/xa.c b/plugins/dfsound/xa.c index e3a9fb61a..6b5433fbd 100644 --- a/plugins/dfsound/xa.c +++ b/plugins/dfsound/xa.c @@ -82,7 +82,8 @@ INLINE void MixCD(int *SSumLR, int *RVB, int ns_to, int decode_pos) int ns; uint32_t v = spu.XALastVal; - if ((vll | vlr | vrl | vrr) == 0) + // note: spu volume doesn't affect cd capture + if ((spu.cdv.ll | spu.cdv.lr | spu.cdv.rl | spu.cdv.rr) == 0) { SkipCD(ns_to, decode_pos); return; From a3fcdfcb3e366264a1d9f7cfda4268eb6cd91cc0 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 7 Dec 2023 01:31:48 +0200 Subject: [PATCH 497/597] gpu_neon: try to patch more pal vs enhancement trouble --- plugins/gpu_neon/psx_gpu_if.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index 570cc5d25..ccd60bebc 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -129,6 +129,8 @@ sync_enhancement_buffers(int x, int y, int w, int h) // due to intersection stuff, see the update_enhancement_buf_scanouts() mess int s_w = max(gpu.screen.hres, gpu.screen.w); int s_h = gpu.screen.vres; + if (gpu.screen.y < 0) + s_h -= gpu.screen.y; s_w = min(s_w, 512); for (i = 0; i < ARRAY_SIZE(egpu.enhancement_scanouts); i++) { const struct psx_gpu_scanout *s = &egpu.enhancement_scanouts[i]; @@ -164,10 +166,13 @@ void renderer_update_caches(int x, int y, int w, int h, int state_changed) if (gpu.state.enhancement_active) { if (state_changed) { + int vres = gpu.screen.vres; + if (gpu.screen.y < 0) + vres -= gpu.screen.y; memset(egpu.enhancement_scanouts, 0, sizeof(egpu.enhancement_scanouts)); egpu.enhancement_scanout_eselect = 0; update_enhancement_buf_scanouts(&egpu, - gpu.screen.src_x, gpu.screen.src_y, gpu.screen.hres, gpu.screen.vres); + gpu.screen.src_x, gpu.screen.src_y, gpu.screen.hres, vres); return; } sync_enhancement_buffers(x, y, w, h); @@ -195,10 +200,13 @@ void renderer_notify_res_change(void) void renderer_notify_scanout_change(int x, int y) { + int vres = gpu.screen.vres; if (!gpu.state.enhancement_active || !egpu.enhancement_buf_ptr) return; - update_enhancement_buf_scanouts(&egpu, x, y, gpu.screen.hres, gpu.screen.vres); + if (gpu.screen.y < 0) + vres -= gpu.screen.y; + update_enhancement_buf_scanouts(&egpu, x, y, gpu.screen.hres, vres); } #include "../../frontend/plugin_lib.h" From 442afc304ad15b4f61f8808a184d971f834f4b3f Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 9 Dec 2023 00:50:39 +0200 Subject: [PATCH 498/597] gpu: adjust timing seemed to be causing races, likely because we run the CPU too fast notaz/pcsx_rearmed#330 --- plugins/gpulib/gpu_timing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/gpulib/gpu_timing.h b/plugins/gpulib/gpu_timing.h index 0dfe0d68c..363e608d5 100644 --- a/plugins/gpulib/gpu_timing.h +++ b/plugins/gpulib/gpu_timing.h @@ -1,6 +1,6 @@ // very conservative and wrong -#define gput_fill(w, h) (23 + (4 + (w) / 16u) * (h)) +#define gput_fill(w, h) (23 + (4 + (w) / 32u) * (h)) #define gput_copy(w, h) ((w) * (h)) #define gput_poly_base() (23) #define gput_poly_base_t() (gput_poly_base() + 90) From 60693a92fbeeba0afec0b34c7628499881e25cfb Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 10 Dec 2023 23:26:27 +0200 Subject: [PATCH 499/597] indicate values are hex in config notaz/pcsx_rearmed#285 --- frontend/libpicofe | 2 +- frontend/menu.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/frontend/libpicofe b/frontend/libpicofe index 740c6f25f..be3677f18 160000 --- a/frontend/libpicofe +++ b/frontend/libpicofe @@ -1 +1 @@ -Subproject commit 740c6f25f8240deeb732a0a999f2a57cc2f6f6d6 +Subproject commit be3677f1867cef839334e7746ea1c8205ec73c8c diff --git a/frontend/menu.c b/frontend/menu.c index 413f3caf8..53713570d 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -499,6 +499,13 @@ static void make_cfg_fname(char *buf, size_t size, int is_game) static void keys_write_all(FILE *f); static char *mystrip(char *str); +static void write_u32_value(FILE *f, u32 v) +{ + if (v > 7) + fprintf(f, "0x"); + fprintf(f, "%x\n", v); +} + static int menu_write_config(int is_game) { char cfgfile[MAXPATHLEN]; @@ -521,13 +528,13 @@ static int menu_write_config(int is_game) fprintf(f, "%s\n", (char *)config_data[i].val); break; case 1: - fprintf(f, "%x\n", *(u8 *)config_data[i].val); + write_u32_value(f, *(u8 *)config_data[i].val); break; case 2: - fprintf(f, "%x\n", *(u16 *)config_data[i].val); + write_u32_value(f, *(u16 *)config_data[i].val); break; case 4: - fprintf(f, "%x\n", *(u32 *)config_data[i].val); + write_u32_value(f, *(u32 *)config_data[i].val); break; default: printf("menu_write_config: unhandled len %d for %s\n", From 2f70bda7011efe14e8e689cab687cf00b9fdfe7c Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 10 Dec 2023 23:30:48 +0200 Subject: [PATCH 500/597] standalone: allow other that 1 line scanlines doesn't seem useful, but since the code is done I'll keep it notaz/pcsx_rearmed/#287 --- frontend/menu.c | 3 ++- frontend/plugin_lib.c | 16 ++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/frontend/menu.c b/frontend/menu.c index 53713570d..7622c4c16 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1279,6 +1279,7 @@ static const char h_cscaler[] = "Displays the scaler layer, you can resize it\ static const char h_soft_filter[] = "Works only if game uses low resolution modes"; static const char h_gamma[] = "Gamma/brightness adjustment (default 100)"; #ifdef __ARM_NEON__ +static const char *men_scanlines[] = { "OFF", "1", "2", "3", NULL }; static const char h_scanline_l[] = "Scanline brightness, 0-100%"; #endif @@ -1343,7 +1344,7 @@ static menu_entry e_menu_gfx_options[] = mee_enum ("Hardware Filter", MA_OPT_HWFILTER, plat_target.hwfilter, men_dummy), mee_enum_h ("Software Filter", MA_OPT_SWFILTER, soft_filter, men_soft_filter, h_soft_filter), #ifdef __ARM_NEON__ - mee_onoff ("Scanlines", MA_OPT_SCANLINES, scanlines, 1), + mee_enum ("Scanlines", MA_OPT_SCANLINES, scanlines, men_scanlines), mee_range_h ("Scanline brightness", MA_OPT_SCANLINE_LEVEL, scanline_level, 0, 100, h_scanline_l), #endif mee_range_h ("Gamma adjustment", MA_OPT_GAMMA, g_gamma, 1, 200, h_gamma), diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 50aba227c..159da702c 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -391,17 +391,21 @@ static void pl_vout_flip(const void *vram, int stride, int bgr24, } else if (scanlines != 0 && scanline_level != 100) { - int l = scanline_level * 2048 / 100; + int h2, l = scanline_level * 2048 / 100; int stride_0 = pl_vout_scale_h >= 2 ? 0 : stride; h1 *= pl_vout_scale_h; - for (; h1 >= 2; h1 -= 2) + while (h1 > 0) { - bgr555_to_rgb565(dest, src, w * 2); - dest += dstride * 2, src += stride_0; + for (h2 = scanlines; h2 > 0 && h1 > 0; h2--, h1--) { + bgr555_to_rgb565(dest, src, w * 2); + dest += dstride * 2, src += stride_0; + } - bgr555_to_rgb565_b(dest, src, w * 2, l); - dest += dstride * 2, src += stride; + for (h2 = scanlines; h2 > 0 && h1 > 0; h2--, h1--) { + bgr555_to_rgb565_b(dest, src, w * 2, l); + dest += dstride * 2, src += stride; + } } } #endif From 96867f0e3c8f3a78ac3b9e34ba8bee757ecbc1d0 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 10 Dec 2023 23:53:51 +0200 Subject: [PATCH 501/597] standalone: allow scaler to cut off the letterbox notaz/pcsx_rearmed#286 --- frontend/menu.c | 60 ++++++++++++++++++++++++++++++++----------- frontend/plat_omap.c | 38 ++++++++++++++++++++------- frontend/plugin_lib.c | 12 ++++----- 3 files changed, 80 insertions(+), 30 deletions(-) diff --git a/frontend/menu.c b/frontend/menu.c index 7622c4c16..6705b6397 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1285,19 +1285,41 @@ static const char h_scanline_l[] = "Scanline brightness, 0-100%"; static int menu_loop_cscaler(int id, int keys) { + void *saved_layer = NULL; + size_t saved_layer_size = 0; + int was_layer_clipped = 0; unsigned int inp; + if (!pl_vout_buf) + return -1; + g_scaler = SCALE_CUSTOM; + saved_layer_size = last_vout_w * last_vout_h * last_vout_bpp / 8; + saved_layer = malloc(saved_layer_size); + if (saved_layer) + memcpy(saved_layer, pl_vout_buf, saved_layer_size); plat_gvideo_open(Config.PsxType); + menu_draw_begin(0, 1); + memset(g_menuscreen_ptr, 4, g_menuscreen_w * g_menuscreen_h * 2); + menu_draw_end(); + for (;;) { - menu_draw_begin(0, 1); - memset(g_menuscreen_ptr, 4, g_menuscreen_w * g_menuscreen_h * 2); - text_out16(2, 2, "%d,%d", g_layer_x, g_layer_y); - text_out16(2, 480 - 18, "%dx%d | d-pad: resize, R+d-pad: move", g_layer_w, g_layer_h); - menu_draw_end(); + if (saved_layer && last_vout_bpp == 16) { + int top_x = max(0, -g_layer_x * last_vout_h / 800) + 1; + int top_y = max(0, -g_layer_y * last_vout_h / 480) + 1; + char text[128]; + memcpy(pl_vout_buf, saved_layer, saved_layer_size); + snprintf(text, sizeof(text), "%d,%d %dx%d", + g_layer_x, g_layer_y, g_layer_w, g_layer_h); + basic_text_out16_nf(pl_vout_buf, last_vout_w, + top_x, top_y, text); + basic_text_out16_nf(pl_vout_buf, last_vout_w, 2, + last_vout_h - 20, "d-pad: resize, R+d-pad: move"); + pl_vout_buf = plat_gvideo_flip(); + } inp = in_menu_wait(PBTN_UP|PBTN_DOWN|PBTN_LEFT|PBTN_RIGHT |PBTN_R|PBTN_MOK|PBTN_MBACK, NULL, 40); @@ -1315,22 +1337,30 @@ static int menu_loop_cscaler(int id, int keys) break; if (inp & (PBTN_UP|PBTN_DOWN|PBTN_LEFT|PBTN_RIGHT)) { - if (g_layer_x < 0) g_layer_x = 0; - if (g_layer_x > 640) g_layer_x = 640; - if (g_layer_y < 0) g_layer_y = 0; - if (g_layer_y > 420) g_layer_y = 420; - if (g_layer_w < 160) g_layer_w = 160; - if (g_layer_h < 60) g_layer_h = 60; - if (g_layer_x + g_layer_w > 800) - g_layer_w = 800 - g_layer_x; - if (g_layer_y + g_layer_h > 480) - g_layer_h = 480 - g_layer_y; + int layer_clipped = 0; + g_layer_x = max(-320, min(g_layer_x, 640)); + g_layer_y = max(-240, min(g_layer_y, 400)); + g_layer_w = max(160, g_layer_w); + g_layer_h = max( 60, g_layer_h); + if (g_layer_x < 0 || g_layer_x + g_layer_w > 800) + layer_clipped = 1; + if (g_layer_w > 800+400) + g_layer_w = 800+400; + if (g_layer_y < 0 || g_layer_y + g_layer_h > 480) + layer_clipped = 1; + if (g_layer_h > 480+360) + g_layer_h = 480+360; // resize the layer plat_gvideo_open(Config.PsxType); + if (layer_clipped || was_layer_clipped) + pl_vout_buf = plat_gvideo_set_mode(&last_vout_w, + &last_vout_h, &last_vout_bpp); + was_layer_clipped = layer_clipped; } } plat_gvideo_close(); + free(saved_layer); return 0; } diff --git a/frontend/plat_omap.c b/frontend/plat_omap.c index a4ff846d6..699e19730 100644 --- a/frontend/plat_omap.c +++ b/frontend/plat_omap.c @@ -79,11 +79,21 @@ static int omap_setup_layer_(int fd, int enabled, int x, int y, int w, int h) static int omap_enable_layer(int enabled) { + int x = g_layer_x, y = g_layer_y; + int w = g_layer_w, h = g_layer_h; + + // it's not allowed for the layer to be partially offscreen, + // instead it is faked by plat_gvideo_set_mode() + if (x < 0) { w += x; x = 0; } + if (y < 0) { h += y; y = 0; } + if (x + w > 800) w = 800 - x; + if (y + h > 480) h = 480 - y; + if (enabled) - pl_set_gun_rect(g_layer_x, g_layer_y, g_layer_w, g_layer_h); + pl_set_gun_rect(x, y, w, h); - return omap_setup_layer_(vout_fbdev_get_fd(layer_fb), enabled, - g_layer_x, g_layer_y, g_layer_w, g_layer_h); + return omap_setup_layer_(vout_fbdev_get_fd(layer_fb), + enabled, x, y, w, h); } void plat_omap_gvideo_open(void) @@ -101,15 +111,25 @@ void *plat_gvideo_set_mode(int *w_in, int *h_in, int *bpp) void *buf; if (g_scaler == SCALE_1_1 || g_scaler == SCALE_2_2) { - if (w > g_menuscreen_w) { + if (w > g_menuscreen_w) l = r = (w - g_menuscreen_w) / 2; - w -= l + r; - } - if (h > g_menuscreen_h) { + if (h > g_menuscreen_h) t = b = (h - g_menuscreen_h) / 2; - h -= t + b; - } } + else if (g_scaler == SCALE_CUSTOM) { + int right = g_layer_x + g_layer_w; + int bottom = g_layer_y + g_layer_h; + if (g_layer_x < 0) + l = -g_layer_x * w / g_menuscreen_w; + if (g_layer_y < 0) + t = -g_layer_y * h / g_menuscreen_h; + if (right > g_menuscreen_w) + r = (right - g_menuscreen_w) * w / g_menuscreen_w; + if (bottom > g_menuscreen_h) + b = (bottom - g_menuscreen_h) * h / g_menuscreen_h; + } + w -= l + r; + h -= t + b; buf = vout_fbdev_resize(layer_fb, w, h, *bpp, l, r, t, b, 3, 1); diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 159da702c..43216c675 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -227,12 +227,12 @@ static void update_layer_size(int w, int h) break; } - g_layer_x = g_menuscreen_w / 2 - g_layer_w / 2; - g_layer_y = g_menuscreen_h / 2 - g_layer_h / 2; - if (g_layer_x < 0) g_layer_x = 0; - if (g_layer_y < 0) g_layer_y = 0; - if (g_layer_w > g_menuscreen_w) g_layer_w = g_menuscreen_w; - if (g_layer_h > g_menuscreen_h) g_layer_h = g_menuscreen_h; + if (g_scaler != SCALE_CUSTOM) { + g_layer_x = g_menuscreen_w / 2 - g_layer_w / 2; + g_layer_y = g_menuscreen_h / 2 - g_layer_h / 2; + } + if (g_layer_w > g_menuscreen_w * 2) g_layer_w = g_menuscreen_w * 2; + if (g_layer_h > g_menuscreen_h * 2) g_layer_h = g_menuscreen_h * 2; } // XXX: this is platform specific really From 8338889c466f3dc392a7a83598a65dcac8cbcc38 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 10 Dec 2023 23:54:53 +0200 Subject: [PATCH 502/597] misc: avoid assertion failure when state save fails --- libpcsxcore/misc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index e652cc484..0848c267b 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -677,6 +677,10 @@ int SaveState(const char *file) { assert(!psxRegs.branching); assert(!psxRegs.cpuInRecursion); assert(!misc->magic); + + f = SaveFuncs.open(file, "wb"); + if (f == NULL) return -1; + misc->magic = MISC_MAGIC; misc->gteBusyCycle = psxRegs.gteBusyCycle; misc->muldivBusyCycle = psxRegs.muldivBusyCycle; @@ -687,9 +691,6 @@ int SaveState(const char *file) { misc->frame_counter = frame_counter; misc->CdromFrontendId = CdromFrontendId; - f = SaveFuncs.open(file, "wb"); - if (f == NULL) return -1; - psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); SaveFuncs.write(f, (void *)PcsxHeader, 32); From 42dde520a83b9c391b41b0eceecb1ce3eaed7e72 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 11 Dec 2023 23:31:40 +0200 Subject: [PATCH 503/597] standalone: revive spu_c64x build and some other minor pandora stuff --- Makefile | 4 +++- frontend/menu.c | 5 +++-- frontend/plugin_lib.c | 2 +- plugins/dfsound/Makefile.c64p | 3 +++ plugins/dfsound/externals.h | 10 +++++++--- plugins/dfsound/gauss_i.h | 2 +- plugins/dfsound/spu.c | 2 ++ plugins/dfsound/spu_c64x.c | 9 ++++----- plugins/dfsound/spu_c64x.h | 7 +++++-- plugins/dfsound/spu_c64x_dspcode.c | 6 +++--- plugins/dfsound/stdafx.h | 2 +- 11 files changed, 33 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 6ded0df00..7a52cdb0f 100644 --- a/Makefile +++ b/Makefile @@ -102,7 +102,7 @@ ifeq "$(ARCH)" "arm" OBJS += plugins/dfsound/arm_utils.o endif ifeq "$(HAVE_C64_TOOLS)" "1" -plugins/dfsound/spu.o: CFLAGS += -DC64X_DSP +plugins/dfsound/%.o: CFLAGS += -DC64X_DSP -DWANT_THREAD_CODE plugins/dfsound/spu.o: plugins/dfsound/spu_c64x.c frontend/menu.o: CFLAGS += -DC64X_DSP endif @@ -220,6 +220,8 @@ frontend/main.o frontend/menu.o: CFLAGS += -include frontend/pandora/ui_feat.h frontend/libpicofe/linux/plat.o: CFLAGS += -DPANDORA USE_PLUGIN_LIB = 1 USE_FRONTEND = 1 +CFLAGS += -gdwarf-3 -ffunction-sections -fdata-sections +LDFLAGS += -Wl,--gc-sections endif ifeq "$(PLATFORM)" "caanoo" OBJS += frontend/libpicofe/gp2x/in_gp2x.o frontend/warm/warm.o diff --git a/frontend/menu.c b/frontend/menu.c index 6705b6397..6f74d4516 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1720,6 +1720,7 @@ static const char h_confirm_save[] = "Ask for confirmation when overwriting s static const char h_restore_def[] = "Switches back to default / recommended\n" "configuration"; static const char h_frameskip[] = "Warning: frameskip sometimes causes glitches\n"; +static const char h_sputhr[] = "Warning: has some known bugs\n"; static menu_entry e_menu_options[] = { @@ -1730,9 +1731,9 @@ static menu_entry e_menu_options[] = mee_enum ("Region", 0, region, men_region), mee_range ("CPU clock", MA_OPT_CPU_CLOCKS, cpu_clock, 20, 5000), #ifdef C64X_DSP - mee_onoff ("Use C64x DSP for sound", MA_OPT_SPU_THREAD, spu_config.iUseThread, 1), + mee_onoff_h ("Use C64x DSP for sound", MA_OPT_SPU_THREAD, spu_config.iUseThread, 1, h_sputhr), #else - mee_onoff ("Threaded SPU", MA_OPT_SPU_THREAD, spu_config.iUseThread, 1), + mee_onoff_h ("Threaded SPU", MA_OPT_SPU_THREAD, spu_config.iUseThread, 1, h_sputhr), #endif mee_handler_id("[Display]", MA_OPT_DISP_OPTS, menu_loop_gfx_options), mee_handler ("[BIOS/Plugins]", menu_loop_plugin_options), diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 43216c675..1cbfdb0e0 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -338,7 +338,7 @@ static void pl_vout_flip(const void *vram, int stride, int bgr24, doffs = xoffs + y * dstride; if (dims_changed) - flip_clear_counter = 2; + flip_clear_counter = 3; if (flip_clear_counter > 0) { if (pl_plat_clear) diff --git a/plugins/dfsound/Makefile.c64p b/plugins/dfsound/Makefile.c64p index 5b9778107..5942017eb 100644 --- a/plugins/dfsound/Makefile.c64p +++ b/plugins/dfsound/Makefile.c64p @@ -1,6 +1,9 @@ ifndef C64_TOOLS_DSP_ROOT $(error need C64_TOOLS_DSP_ROOT) endif +ifndef TI_CGTOOLS +$(error need TI_CGTOOLS and stuff, source c64_tools/dsp/setenv.sh) +endif include $(C64_TOOLS_DSP_ROOT)/install.mk diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index e85c191bd..2b593ec7b 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -253,13 +253,17 @@ typedef struct sample_buf sb[MAXCHAN]; int interpolation; - sample_buf sb_thread[MAXCHAN]; + +#if P_HAVE_PTHREAD || defined(WANT_THREAD_CODE) + sample_buf * sb_thread; + sample_buf sb_thread_[MAXCHAN]; +#endif } SPUInfo; #define regAreaGet(offset) \ - spu.regArea[((offset) - 0xc00)>>1] + spu.regArea[((offset) - 0xc00) >> 1] #define regAreaGetCh(ch, offset) \ - spu.regArea[((ch<<4)|(offset))>>1] + spu.regArea[(((ch) << 4) | (offset)) >> 1] /////////////////////////////////////////////////////////// // SPU.C globals diff --git a/plugins/dfsound/gauss_i.h b/plugins/dfsound/gauss_i.h index 012cf701d..e754347ed 100644 --- a/plugins/dfsound/gauss_i.h +++ b/plugins/dfsound/gauss_i.h @@ -308,4 +308,4 @@ const int gauss[]={ 0xffffffff, 0x1307, 0x59b3, 0x12c7, }; -#endif \ No newline at end of file +#endif diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 6671e3eda..0e4b154e6 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1490,6 +1490,8 @@ static void init_spu_thread(void) { int ret; + spu.sb_thread = spu.sb_thread_; + if (sysconf(_SC_NPROCESSORS_ONLN) <= 1) return; diff --git a/plugins/dfsound/spu_c64x.c b/plugins/dfsound/spu_c64x.c index e0aa0da22..cb0d0b58b 100644 --- a/plugins/dfsound/spu_c64x.c +++ b/plugins/dfsound/spu_c64x.c @@ -162,7 +162,7 @@ static void thread_work_wait_sync(struct work_item *work, int force) preload(work->SSumLR); preload(work->SSumLR + 64/4); - f.stale_caches = 1; // SB, spuMem + f.stale_caches = 1; // sb, spuMem if (limit == 0) printf("dsp: wait timeout\n"); @@ -188,7 +188,7 @@ static void thread_work_wait_sync(struct work_item *work, int force) static void thread_sync_caches(void) { if (f.stale_caches) { - f.dsp_cache_inv_virt(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24); + f.dsp_cache_inv_virt(spu.sb_thread, sizeof(spu.sb_thread[0]) * MAXCHAN); f.dsp_cache_inv_virt(spu.spuMemC + 0x800, 0x800); if (spu.rvb->StartAddr) { int left = 0x40000 - spu.rvb->StartAddr; @@ -281,8 +281,7 @@ static void init_spu_thread(void) // override default allocations free(spu.spuMemC); spu.spuMemC = mem->spu_ram; - free(spu.SB); - spu.SB = mem->SB; + spu.sb_thread = mem->sb_thread; free(spu.s_chan); spu.s_chan = mem->in.s_chan; free(spu.rvb); @@ -326,7 +325,7 @@ static void exit_spu_thread(void) f.dsp_close(); spu.spuMemC = NULL; - spu.SB = NULL; + spu.sb_thread = spu.sb_thread_; spu.s_chan = NULL; spu.rvb = NULL; worker = NULL; diff --git a/plugins/dfsound/spu_c64x.h b/plugins/dfsound/spu_c64x.h index 56ede38da..dd07da1ba 100644 --- a/plugins/dfsound/spu_c64x.h +++ b/plugins/dfsound/spu_c64x.h @@ -1,6 +1,9 @@ #ifndef __P_SPU_C64X_H__ #define __P_SPU_C64X_H__ +#include "externals.h" +#include "spu_config.h" + #define COMPONENT_NAME "pcsxr_spu" enum { @@ -10,10 +13,10 @@ enum { struct region_mem { unsigned char spu_ram[512 * 1024]; - int SB[SB_SIZE * 24]; + sample_buf sb_thread[MAXCHAN]; // careful not to lose ARM writes by DSP overwriting // with old data when it's writing out neighbor cachelines - int _pad1[128/4 - ((SB_SIZE * 24) & (128/4 - 1))]; + int _pad1[128/4 - ((sizeof(sample_buf) * MAXCHAN / 4) & (128/4 - 1))]; struct spu_in { // these are not to be modified by DSP SPUCHAN s_chan[24 + 1]; diff --git a/plugins/dfsound/spu_c64x_dspcode.c b/plugins/dfsound/spu_c64x_dspcode.c index 570da5eda..709519c1d 100644 --- a/plugins/dfsound/spu_c64x_dspcode.c +++ b/plugins/dfsound/spu_c64x_dspcode.c @@ -114,7 +114,7 @@ static void do_processing(void) // nothing to do? Write out non-critical caches if (dirty) { syscalls.cache_wb(spu.spuMemC + 0x800, 0x800, 1); - syscalls.cache_wb(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24, 1); + syscalls.cache_wb(spu.sb_thread, sizeof(spu.sb_thread[0]) * MAXCHAN, 1); if (had_rvb) { left = 0x40000 - spu.rvb->StartAddr; syscalls.cache_wb(spu.spuMem + spu.rvb->StartAddr, left * 2, 1); @@ -143,7 +143,7 @@ static unsigned int exec(dsp_component_cmd_t cmd, InitADSR(); spu.spuMemC = mem->spu_ram; - spu.SB = mem->SB; + spu.sb_thread = mem->sb_thread; spu.s_chan = mem->in.s_chan; spu.rvb = &mem->in.rvb; worker = &mem->worker; @@ -169,7 +169,7 @@ static unsigned int exec(dsp_component_cmd_t cmd, do_processing(); - syscalls.cache_inv(&mem->SB, sizeof(mem->SB), 0); + syscalls.cache_inv(&mem->sb_thread, sizeof(mem->sb_thread), 0); syscalls.cache_inv(&mem->in, sizeof(mem->in), 0); break; diff --git a/plugins/dfsound/stdafx.h b/plugins/dfsound/stdafx.h index ff082bc85..71921a928 100644 --- a/plugins/dfsound/stdafx.h +++ b/plugins/dfsound/stdafx.h @@ -32,7 +32,7 @@ #define INLINE static inline #endif -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define HTOLE16(x) __builtin_bswap16(x) #define LE16TOH(x) __builtin_bswap16(x) #else From 8412166f53abb220b85e0aff47924c04724abfa4 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 13 Dec 2023 22:14:54 +0200 Subject: [PATCH 504/597] gpu: rework dma vs busy timing maybe should implement actual fifo instead someday libretro/pcsx_rearmed#809 --- frontend/plugin.c | 2 +- libpcsxcore/plugins.h | 2 +- libpcsxcore/psxdma.c | 33 +++++--- plugins/dfxvideo/gpulib_if.c | 54 ++++++------- plugins/gpu-gles/gpulib_if.c | 3 +- plugins/gpu_neon/psx_gpu/psx_gpu.h | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 96 ++++++++++++------------ plugins/gpu_neon/psx_gpu_if.c | 11 ++- plugins/gpu_unai/gpulib_if.cpp | 56 +++++++------- plugins/gpu_unai_old/gpulib_if.cpp | 5 +- plugins/gpulib/gpu.c | 44 ++++++----- plugins/gpulib/gpu.h | 6 +- plugins/gpulib/gpu_timing.h | 6 +- 13 files changed, 178 insertions(+), 142 deletions(-) diff --git a/frontend/plugin.c b/frontend/plugin.c index 02354639d..c400165f3 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -122,7 +122,7 @@ extern void GPUwriteDataMem(uint32_t *, int); extern uint32_t GPUreadStatus(void); extern uint32_t GPUreadData(void); extern void GPUreadDataMem(uint32_t *, int); -extern long GPUdmaChain(uint32_t *, uint32_t, uint32_t *); +extern long GPUdmaChain(uint32_t *, uint32_t, uint32_t *, int32_t *); extern void GPUupdateLace(void); extern long GPUfreeze(uint32_t, void *); extern void GPUvBlank(int, int); diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index d080baed0..df8ed87db 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -58,7 +58,7 @@ typedef void (CALLBACK* GPUwriteDataMem)(uint32_t *, int); typedef uint32_t (CALLBACK* GPUreadStatus)(void); typedef uint32_t (CALLBACK* GPUreadData)(void); typedef void (CALLBACK* GPUreadDataMem)(uint32_t *, int); -typedef long (CALLBACK* GPUdmaChain)(uint32_t *,uint32_t, uint32_t *); +typedef long (CALLBACK* GPUdmaChain)(uint32_t *, uint32_t, uint32_t *, int32_t *); typedef void (CALLBACK* GPUupdateLace)(void); typedef void (CALLBACK* GPUmakeSnapshot)(void); typedef void (CALLBACK* GPUkeypressed)(int); diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index 3ec42ede2..25ee2f0dc 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -90,6 +90,7 @@ void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU DMA_INTERRUPT(4); } +#if 0 // Taken from PEOPS SOFTGPU static inline boolean CheckForEndlessLoop(u32 laddr, u32 *lUsedAddr) { if (laddr == lUsedAddr[1]) return TRUE; @@ -130,11 +131,12 @@ static u32 gpuDmaChainSize(u32 addr) { return size; } +#endif void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU - u32 *ptr, madr_next, *madr_next_p, size; + u32 *ptr, madr_next, *madr_next_p; u32 words, words_left, words_max, words_copy; - int do_walking; + int cycles_sum, cycles_last_cmd = 0, do_walking; madr &= ~3; switch (chcr) { @@ -195,18 +197,19 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU do_walking = Config.hacks.gpu_slow_list_walking; madr_next_p = do_walking ? &madr_next : NULL; - size = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, madr_next_p); - if ((int)size <= 0) - size = gpuDmaChainSize(madr); + cycles_sum = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, + madr_next_p, &cycles_last_cmd); HW_DMA2_MADR = SWAPu32(madr_next); // a hack for Judge Dredd which is annoyingly sensitive to timing if (Config.hacks.gpu_timing1024) - size = 1024; + cycles_sum = 1024; - psxRegs.gpuIdleAfter = psxRegs.cycle + size + 16; - set_event(PSXINT_GPUDMA, size); + psxRegs.gpuIdleAfter = psxRegs.cycle + cycles_sum + cycles_last_cmd; + set_event(PSXINT_GPUDMA, cycles_sum); + //printf("%u dma2cf: %d,%d %08x\n", psxRegs.cycle, cycles_sum, + // cycles_last_cmd, HW_DMA2_MADR); return; default: @@ -221,11 +224,17 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU void gpuInterrupt() { if (HW_DMA2_CHCR == SWAP32(0x01000401) && !(HW_DMA2_MADR & SWAP32(0x800000))) { - u32 size, madr_next = 0xffffff, madr = SWAPu32(HW_DMA2_MADR); - size = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, &madr_next); + u32 madr_next = 0xffffff, madr = SWAPu32(HW_DMA2_MADR); + int cycles_sum, cycles_last_cmd = 0; + cycles_sum = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, + &madr_next, &cycles_last_cmd); HW_DMA2_MADR = SWAPu32(madr_next); - psxRegs.gpuIdleAfter = psxRegs.cycle + size + 64; - set_event(PSXINT_GPUDMA, size); + if ((s32)(psxRegs.gpuIdleAfter - psxRegs.cycle) > 0) + cycles_sum += psxRegs.gpuIdleAfter - psxRegs.cycle; + psxRegs.gpuIdleAfter = psxRegs.cycle + cycles_sum + cycles_last_cmd; + set_event(PSXINT_GPUDMA, cycles_sum); + //printf("%u dma2cn: %d,%d %08x\n", psxRegs.cycle, cycles_sum, + // cycles_last_cmd, HW_DMA2_MADR); return; } if (HW_DMA2_CHCR & SWAP32(0x01000000)) diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index 20383ab52..af35f3cba 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -306,12 +306,13 @@ void renderer_notify_scanout_change(int x, int y) #include "../gpulib/gpu_timing.h" extern const unsigned char cmd_lengths[256]; -int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd) +int do_cmd_list(uint32_t *list, int list_len, + int *cycles_sum_out, int *cycles_last, int *last_cmd) { + int cpu_cycles_sum = 0, cpu_cycles = *cycles_last; unsigned int cmd = 0, len; uint32_t *list_start = list; uint32_t *list_end = list + list_len; - u32 cpu_cycles = 0; for (; list < list_end; list += 1 + len) { @@ -341,7 +342,7 @@ int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd while(1) { - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); if(list_position >= list_end) { cmd = -1; @@ -366,7 +367,7 @@ int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd while(1) { - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); if(list_position >= list_end) { cmd = -1; @@ -387,8 +388,8 @@ int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd #ifdef TEST case 0xA0: // sys -> vid { - u32 load_width = LE2HOST32(slist[4]); - u32 load_height = LE2HOST32(slist[5]); + u32 load_width = LE2HOST16(slist[4]); + u32 load_height = LE2HOST16(slist[5]); u32 load_size = load_width * load_height; len += load_size / 2; @@ -398,32 +399,32 @@ int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd // timing case 0x02: - cpu_cycles += gput_fill(LE2HOST32(slist[4]) & 0x3ff, - LE2HOST32(slist[5]) & 0x1ff); + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_fill(LE2HOST16(slist[4]) & 0x3ff, LE2HOST16(slist[5]) & 0x1ff)); break; - case 0x20 ... 0x23: cpu_cycles += gput_poly_base(); break; - case 0x24 ... 0x27: cpu_cycles += gput_poly_base_t(); break; - case 0x28 ... 0x2B: cpu_cycles += gput_quad_base(); break; - case 0x2C ... 0x2F: cpu_cycles += gput_quad_base_t(); break; - case 0x30 ... 0x33: cpu_cycles += gput_poly_base_g(); break; - case 0x34 ... 0x37: cpu_cycles += gput_poly_base_gt(); break; - case 0x38 ... 0x3B: cpu_cycles += gput_quad_base_g(); break; - case 0x3C ... 0x3F: cpu_cycles += gput_quad_base_gt(); break; - case 0x40 ... 0x47: cpu_cycles += gput_line(0); break; - case 0x50 ... 0x57: cpu_cycles += gput_line(0); break; + case 0x20 ... 0x23: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); break; + case 0x24 ... 0x27: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); break; + case 0x28 ... 0x2B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); break; + case 0x2C ... 0x2F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); break; + case 0x30 ... 0x33: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); break; + case 0x34 ... 0x37: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); break; + case 0x38 ... 0x3B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); break; + case 0x3C ... 0x3F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); break; + case 0x40 ... 0x47: gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; + case 0x50 ... 0x57: gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; case 0x60 ... 0x63: - cpu_cycles += gput_sprite(LE2HOST32(slist[4]) & 0x3ff, - LE2HOST32(slist[5]) & 0x1ff); + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_sprite(LE2HOST16(slist[4]) & 0x3ff, LE2HOST16(slist[5]) & 0x1ff)); break; case 0x64 ... 0x67: - cpu_cycles += gput_sprite(LE2HOST32(slist[6]) & 0x3ff, - LE2HOST32(slist[7]) & 0x1ff); + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_sprite(LE2HOST16(slist[6]) & 0x3ff, LE2HOST16(slist[7]) & 0x1ff)); break; - case 0x68 ... 0x6B: cpu_cycles += gput_sprite(1, 1); break; + case 0x68 ... 0x6B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); break; case 0x70 ... 0x73: - case 0x74 ... 0x77: cpu_cycles += gput_sprite(8, 8); break; + case 0x74 ... 0x77: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(8, 8)); break; case 0x78 ... 0x7B: - case 0x7C ... 0x7F: cpu_cycles += gput_sprite(16, 16); break; + case 0x7C ... 0x7F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(16, 16)); break; } } @@ -431,7 +432,8 @@ int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd gpu.ex_regs[1] &= ~0x1ff; gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff; - *cpu_cycles_out += cpu_cycles; + *cycles_sum_out += cpu_cycles_sum; + *cycles_last = cpu_cycles; *last_cmd = cmd; return list - list_start; } diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c index d440fdb10..f00c4592d 100644 --- a/plugins/gpu-gles/gpulib_if.c +++ b/plugins/gpu-gles/gpulib_if.c @@ -521,7 +521,8 @@ void renderer_notify_scanout_change(int x, int y) extern const unsigned char cmd_lengths[256]; // XXX: mostly dupe code from soft peops -int do_cmd_list(unsigned int *list, int list_len, int *cycles, int *last_cmd) +int do_cmd_list(uint32_t *list, int list_len, + int *cycles_sum_out, int *cycles_last, int *last_cmd) { unsigned int cmd, len; unsigned int *list_start = list; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index c40c8ae4a..687715a55 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -255,7 +255,7 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu); void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram); u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, - s32 *cpu_cycles, u32 *last_command); + s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command); void triangle_benchmark(psx_gpu_struct *psx_gpu); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index af26fa37c..d401522ac 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -252,10 +252,11 @@ static void do_fill(psx_gpu_struct *psx_gpu, u32 x, u32 y, #endif u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, - s32 *cpu_cycles_out, u32 *last_command) + s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command) { vertex_struct vertexes[4] __attribute__((aligned(16))) = {}; - u32 current_command = 0, command_length, cpu_cycles = 0; + u32 current_command = 0, command_length; + u32 cpu_cycles_sum = 0, cpu_cycles = *cpu_cycles_last; u32 *list_start = list; u32 *list_end = list + (size / 4); @@ -284,7 +285,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 color = list[0] & 0xFFFFFF; do_fill(psx_gpu, x, y, width, height, color); - cpu_cycles += gput_fill(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_fill(width, height)); break; } @@ -297,7 +298,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy(2, 6); render_triangle(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); break; } @@ -312,7 +313,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv(2, 10); render_triangle(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base_t(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); break; } @@ -327,7 +328,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - cpu_cycles += gput_quad_base(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); break; } @@ -344,7 +345,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - cpu_cycles += gput_quad_base_t(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); break; } @@ -355,7 +356,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_rgb(2, 8); render_triangle(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base_g(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); break; } @@ -369,7 +370,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv_rgb(2, 12); render_triangle(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base_gt(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); break; } @@ -382,7 +383,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - cpu_cycles += gput_quad_base_g(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); break; } @@ -398,7 +399,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_triangle(psx_gpu, vertexes, current_command); render_triangle(psx_gpu, &(vertexes[1]), current_command); - cpu_cycles += gput_quad_base_gt(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); break; } @@ -410,7 +411,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, vertexes[1].y = list_s16[5] + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, list[0], 0); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; } @@ -432,7 +433,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, list[0], 0); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); list_position++; num_vertexes++; @@ -467,7 +468,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, vertexes[1].y = list_s16[7] + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, 0, 0); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; } @@ -498,7 +499,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, 0, 0); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); list_position += 2; num_vertexes++; @@ -527,7 +528,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, 0, 0, &width, &height, current_command, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -543,7 +544,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, &width, &height, current_command, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -555,7 +556,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, 0, 0, &width, &height, current_command, list[0]); - cpu_cycles += gput_sprite(1, 1); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); break; } @@ -567,7 +568,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, 0, 0, &width, &height, current_command, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -582,7 +583,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, &width, &height, current_command, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -594,7 +595,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, 0, 0, &width, &height, current_command, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -609,7 +610,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, &width, &height, current_command, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -789,7 +790,8 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, } breakloop: - *cpu_cycles_out += cpu_cycles; + *cpu_cycles_sum_out += cpu_cycles_sum; + *cpu_cycles_last = cpu_cycles; *last_command = current_command; return list - list_start; } @@ -1202,10 +1204,11 @@ static void do_sprite_enhanced(psx_gpu_struct *psx_gpu, int x, int y, #endif u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, - s32 *cpu_cycles_out, u32 *last_command) + s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command) { vertex_struct vertexes[4] __attribute__((aligned(16))) = {}; - u32 current_command = 0, command_length, cpu_cycles = 0; + u32 current_command = 0, command_length; + u32 cpu_cycles_sum = 0, cpu_cycles = *cpu_cycles_last; u32 *list_start = list; u32 *list_end = list + (size / 4); @@ -1244,7 +1247,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, x &= ~0xF; width = ((width + 0xF) & ~0xF); - cpu_cycles += gput_fill(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_fill(width, height)); if (width == 0 || height == 0) break; @@ -1275,7 +1278,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy(2, 6); do_triangle_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); break; } @@ -1290,7 +1293,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv(2, 10); do_triangle_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base_t(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); break; } @@ -1304,7 +1307,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy(3, 8); do_quad_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_quad_base(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); break; } @@ -1321,7 +1324,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, uv_hack(vertexes, 4); do_quad_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_quad_base_t(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); break; } @@ -1332,7 +1335,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_rgb(2, 8); do_triangle_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base_g(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); break; } @@ -1346,7 +1349,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv_rgb(2, 12); do_triangle_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_poly_base_gt(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); break; } @@ -1358,7 +1361,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_rgb(3, 12); do_quad_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_quad_base_g(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); break; } @@ -1374,7 +1377,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, uv_hack(vertexes, 4); do_quad_enhanced(psx_gpu, vertexes, current_command); - cpu_cycles += gput_quad_base_gt(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); break; } @@ -1388,7 +1391,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, list[0], 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, list[0], 1); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; } @@ -1413,7 +1416,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, list[0], 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, list[0], 1); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); list_position++; num_vertexes++; @@ -1450,7 +1453,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, 0, 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, 0, 1); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; } @@ -1484,7 +1487,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_line(psx_gpu, vertexes, current_command, 0, 0); if (enhancement_enable(psx_gpu)) render_line(psx_gpu, vertexes, current_command, 0, 1); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); list_position += 2; num_vertexes++; @@ -1516,7 +1519,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + width)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -1536,7 +1539,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + width)) do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -1551,7 +1554,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 1)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); - cpu_cycles += gput_sprite(1, 1); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); break; } @@ -1566,7 +1569,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 8)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -1585,7 +1588,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 8)) do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -1600,7 +1603,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 16)) do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -1619,7 +1622,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, if (check_enhanced_range(psx_gpu, x, x + 16)) do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); - cpu_cycles += gput_sprite(width, height); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); break; } @@ -1773,7 +1776,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, enhancement_disable(); breakloop: - *cpu_cycles_out += cpu_cycles; + *cpu_cycles_sum_out += cpu_cycles_sum; + *cpu_cycles_last = cpu_cycles; *last_command = current_command; return list - list_start; } diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index ccd60bebc..81eadfe92 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -39,7 +39,8 @@ sync_enhancement_buffers(int x, int y, int w, int h); static psx_gpu_struct egpu __attribute__((aligned(256))); -int do_cmd_list(uint32_t *list, int count, int *cycles, int *last_cmd) +int do_cmd_list(uint32_t *list, int count, + int *cycles_sum, int *cycles_last, int *last_cmd) { int ret; @@ -49,9 +50,11 @@ int do_cmd_list(uint32_t *list, int count, int *cycles, int *last_cmd) #endif if (gpu.state.enhancement_active) - ret = gpu_parse_enhanced(&egpu, list, count * 4, cycles, (u32 *)last_cmd); + ret = gpu_parse_enhanced(&egpu, list, count * 4, + cycles_sum, cycles_last, (u32 *)last_cmd); else - ret = gpu_parse(&egpu, list, count * 4, cycles, (u32 *)last_cmd); + ret = gpu_parse(&egpu, list, count * 4, + cycles_sum, cycles_last, (u32 *)last_cmd); #if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD) __asm__ __volatile__("":::"q4","q5","q6","q7"); @@ -157,7 +160,7 @@ void renderer_sync_ecmds(uint32_t *ecmds) { s32 dummy0 = 0; u32 dummy1 = 0; - gpu_parse(&egpu, ecmds + 1, 6 * 4, &dummy0, &dummy1); + gpu_parse(&egpu, ecmds + 1, 6 * 4, &dummy0, &dummy0, &dummy1); } void renderer_update_caches(int x, int y, int w, int h, int state_changed) diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 45c73a737..6816e2bd9 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -393,13 +393,14 @@ static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word) #include "../gpulib/gpu_timing.h" extern const unsigned char cmd_lengths[256]; -int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) +int do_cmd_list(u32 *list_, int list_len, + int *cycles_sum_out, int *cycles_last, int *last_cmd) { + int cpu_cycles_sum = 0, cpu_cycles = *cycles_last; u32 cmd = 0, len, i; - le32_t *list = (le32_t *)_list; + le32_t *list = (le32_t *)list_; le32_t *list_start = list; le32_t *list_end = list + list_len; - u32 cpu_cycles = 0; //TODO: set ilace_mask when resolution changes instead of every time, // eliminate #ifdef below. @@ -432,8 +433,8 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) { case 0x02: gpuClearImage(packet); - cpu_cycles += gput_fill(le16_to_s16(packet.U2[4]) & 0x3ff, - le16_to_s16(packet.U2[5]) & 0x1ff); + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_fill(le16_to_s16(packet.U2[4]) & 0x3ff, le16_to_s16(packet.U2[5]) & 0x1ff)); break; case 0x20: @@ -446,7 +447,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.Masking | Blending | gpu_unai.PixelMSB ]; gpuDrawPolyF(packet, driver, false); - cpu_cycles += gput_poly_base(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); } break; case 0x24: @@ -471,7 +472,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) PP driver = gpuPolySpanDrivers[driver_idx]; gpuDrawPolyFT(packet, driver, false); - cpu_cycles += gput_poly_base_t(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); } break; case 0x28: @@ -484,7 +485,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.Masking | Blending | gpu_unai.PixelMSB ]; gpuDrawPolyF(packet, driver, true); // is_quad = true - cpu_cycles += gput_quad_base(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); } break; case 0x2C: @@ -509,7 +510,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) PP driver = gpuPolySpanDrivers[driver_idx]; gpuDrawPolyFT(packet, driver, true); // is_quad = true - cpu_cycles += gput_quad_base_t(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); } break; case 0x30: @@ -527,7 +528,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB ]; gpuDrawPolyG(packet, driver, false); - cpu_cycles += gput_poly_base_g(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); } break; case 0x34: @@ -543,7 +544,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB ]; gpuDrawPolyGT(packet, driver, false); - cpu_cycles += gput_poly_base_gt(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); } break; case 0x38: @@ -558,7 +559,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB ]; gpuDrawPolyG(packet, driver, true); // is_quad = true - cpu_cycles += gput_quad_base_g(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); } break; case 0x3C: @@ -574,7 +575,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB ]; gpuDrawPolyGT(packet, driver, true); // is_quad = true - cpu_cycles += gput_quad_base_gt(); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); } break; case 0x40: @@ -585,7 +586,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; PSD driver = gpuPixelSpanDrivers[driver_idx]; gpuDrawLineF(packet, driver); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); } break; case 0x48 ... 0x4F: { // Monochrome line strip @@ -602,7 +603,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2]; gpu_unai.PacketBuffer.U4[2] = *list_position++; gpuDrawLineF(packet, driver); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); num_vertexes++; if(list_position >= list_end) { @@ -626,7 +627,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) driver_idx |= (1 << 5); PSD driver = gpuPixelSpanDrivers[driver_idx]; gpuDrawLineG(packet, driver); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); } break; case 0x58 ... 0x5F: { // Gouraud-shaded line strip @@ -647,7 +648,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu_unai.PacketBuffer.U4[2] = *list_position++; gpu_unai.PacketBuffer.U4[3] = *list_position++; gpuDrawLineG(packet, driver); - cpu_cycles += gput_line(0); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); num_vertexes++; if(list_position >= list_end) { @@ -668,7 +669,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; s32 w = 0, h = 0; gpuDrawT(packet, driver, &w, &h); - cpu_cycles += gput_sprite(w, h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); } break; case 0x64: @@ -697,7 +698,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver, &w, &h); - cpu_cycles += gput_sprite(w, h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); } break; case 0x68: @@ -708,7 +709,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; s32 w = 0, h = 0; gpuDrawT(packet, driver, &w, &h); - cpu_cycles += gput_sprite(1, 1); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); } break; case 0x70: @@ -719,7 +720,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; s32 w = 0, h = 0; gpuDrawT(packet, driver, &w, &h); - cpu_cycles += gput_sprite(w, h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); } break; case 0x74: @@ -738,7 +739,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver, &w, &h); - cpu_cycles += gput_sprite(w, h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); } break; case 0x78: @@ -749,7 +750,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; s32 w = 0, h = 0; gpuDrawT(packet, driver, &w, &h); - cpu_cycles += gput_sprite(w, h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); } break; case 0x7C: @@ -760,7 +761,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) s32 w = 0, h = 0; gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); gpuDrawS16(packet, &w, &h); - cpu_cycles += gput_sprite(w, h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); break; } // fallthrough @@ -778,7 +779,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver, &w, &h); - cpu_cycles += gput_sprite(w, h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); } break; #ifdef TEST @@ -815,7 +816,8 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) gpu.ex_regs[1] &= ~0x1ff; gpu.ex_regs[1] |= gpu_unai.GPU_GP1 & 0x1ff; - *cpu_cycles_out += cpu_cycles; + *cycles_sum_out += cpu_cycles_sum; + *cycles_last = cpu_cycles; *last_cmd = cmd; return list - list_start; } @@ -823,7 +825,7 @@ int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd) void renderer_sync_ecmds(u32 *ecmds) { int dummy; - do_cmd_list(&ecmds[1], 6, &dummy, &dummy); + do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy); } void renderer_update_caches(int x, int y, int w, int h, int state_changed) diff --git a/plugins/gpu_unai_old/gpulib_if.cpp b/plugins/gpu_unai_old/gpulib_if.cpp index ee694d35d..e0d2005f9 100644 --- a/plugins/gpu_unai_old/gpulib_if.cpp +++ b/plugins/gpu_unai_old/gpulib_if.cpp @@ -169,7 +169,8 @@ void renderer_notify_scanout_change(int x, int y) extern const unsigned char cmd_lengths[256]; -int do_cmd_list(unsigned int *list, int list_len, int *cycles, int *last_cmd) +int do_cmd_list(uint32_t *list, int list_len, + int *cycles_sum_out, int *cycles_last, int *last_cmd) { unsigned int cmd = 0, len, i; unsigned int *list_start = list; @@ -523,7 +524,7 @@ int do_cmd_list(unsigned int *list, int list_len, int *cycles, int *last_cmd) void renderer_sync_ecmds(uint32_t *ecmds) { int dummy; - do_cmd_list(&ecmds[1], 6, &dummy, &dummy); + do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy); } void renderer_update_caches(int x, int y, int w, int h, int state_changed) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index bf511becb..a85136090 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -34,14 +34,15 @@ struct psx_gpu gpu; -static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles); +static noinline int do_cmd_buffer(uint32_t *data, int count, + int *cycles_sum, int *cycles_last); static void finish_vram_transfer(int is_read); static noinline void do_cmd_reset(void) { int dummy = 0; if (unlikely(gpu.cmd_len > 0)) - do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy); + do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy); gpu.cmd_len = 0; if (unlikely(gpu.dma.h > 0)) @@ -175,7 +176,7 @@ static noinline void decide_frameskip(void) if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) { int dummy = 0; - do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy); + do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy); gpu.frameskip.pending_fill[0] = 0; } } @@ -536,7 +537,7 @@ static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) case 0x02: if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h) // clearing something large, don't skip - do_cmd_list(list, 3, &dummy, &dummy); + do_cmd_list(list, 3, &dummy, &dummy, &dummy); else memcpy(gpu.frameskip.pending_fill, list, 3 * 4); break; @@ -586,7 +587,8 @@ static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) return pos; } -static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles) +static noinline int do_cmd_buffer(uint32_t *data, int count, + int *cycles_sum, int *cycles_last) { int cmd, pos; uint32_t old_e3 = gpu.ex_regs[3]; @@ -620,7 +622,9 @@ static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles) cmd = -1; // incomplete cmd, can't consume yet break; } - do_vram_copy(data + pos + 1, cpu_cycles); + *cycles_sum += *cycles_last; + *cycles_last = 0; + do_vram_copy(data + pos + 1, cycles_last); vram_dirty = 1; pos += 4; continue; @@ -635,7 +639,7 @@ static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles) if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0)) pos += do_cmd_list_skip(data + pos, count - pos, &cmd); else { - pos += do_cmd_list(data + pos, count - pos, cpu_cycles, &cmd); + pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd); vram_dirty = 1; } @@ -659,7 +663,7 @@ static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles) static noinline void flush_cmd_buffer(void) { int dummy = 0, left; - left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy); + left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy); if (left > 0) memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4); if (left != gpu.cmd_len) { @@ -678,7 +682,7 @@ void GPUwriteDataMem(uint32_t *mem, int count) if (unlikely(gpu.cmd_len > 0)) flush_cmd_buffer(); - left = do_cmd_buffer(mem, count, &dummy); + left = do_cmd_buffer(mem, count, &dummy, &dummy); if (left) log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count); } @@ -691,11 +695,13 @@ void GPUwriteData(uint32_t data) flush_cmd_buffer(); } -long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr) +long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, + uint32_t *progress_addr, int32_t *cycles_last_cmd) { uint32_t addr, *list, ld_addr = 0; int len, left, count; - int cpu_cycles = 0; + int cpu_cycles_sum = 0; + int cpu_cycles_last = 0; preload(rambase + (start_addr & 0x1fffff) / 4); @@ -711,12 +717,12 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr addr = LE32TOH(list[0]) & 0xffffff; preload(rambase + (addr & 0x1fffff) / 4); - cpu_cycles += 10; + cpu_cycles_sum += 10; if (len > 0) - cpu_cycles += 5 + len; + cpu_cycles_sum += 5 + len; - log_io(".chain %08lx #%d+%d %u\n", - (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles); + log_io(".chain %08lx #%d+%d %u+%u\n", + (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last); if (unlikely(gpu.cmd_len > 0)) { if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) { log_anomaly("cmd_buffer overflow, likely garbage commands\n"); @@ -729,7 +735,7 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr } if (len) { - left = do_cmd_buffer(list + 1, len, &cpu_cycles); + left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last); if (left) { memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4); gpu.cmd_len = left; @@ -766,12 +772,14 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr } } + //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last); gpu.state.last_list.frame = *gpu.state.frame_count; gpu.state.last_list.hcnt = *gpu.state.hcnt; - gpu.state.last_list.cycles = cpu_cycles; + gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last; gpu.state.last_list.addr = start_addr; - return cpu_cycles; + *cycles_last_cmd = cpu_cycles_last; + return cpu_cycles_sum; } void GPUreadDataMem(uint32_t *mem, int count) diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 13e73c5aa..96a818370 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -119,7 +119,8 @@ extern struct psx_gpu gpu; extern const unsigned char cmd_lengths[256]; -int do_cmd_list(uint32_t *list, int count, int *cycles, int *last_cmd); +int do_cmd_list(uint32_t *list, int count, + int *cycles_sum, int *cycles_last, int *last_cmd); struct rearmed_cbs; @@ -145,7 +146,8 @@ struct GPUFreeze; long GPUinit(void); long GPUshutdown(void); void GPUwriteDataMem(uint32_t *mem, int count); -long GPUdmaChain(uint32_t *rambase, uint32_t addr, uint32_t *progress_addr); +long GPUdmaChain(uint32_t *rambase, uint32_t addr, + uint32_t *progress_addr, int32_t *cycles_last_cmd); void GPUwriteData(uint32_t data); void GPUreadDataMem(uint32_t *mem, int count); uint32_t GPUreadData(void); diff --git a/plugins/gpulib/gpu_timing.h b/plugins/gpulib/gpu_timing.h index 363e608d5..9991fd80d 100644 --- a/plugins/gpulib/gpu_timing.h +++ b/plugins/gpulib/gpu_timing.h @@ -1,6 +1,6 @@ // very conservative and wrong -#define gput_fill(w, h) (23 + (4 + (w) / 32u) * (h)) +#define gput_fill(w, h) (23 + (4 + (w) / 16u) * (h)) #define gput_copy(w, h) ((w) * (h)) #define gput_poly_base() (23) #define gput_poly_base_t() (gput_poly_base() + 90) @@ -13,3 +13,7 @@ #define gput_line(k) (8 + (k)) #define gput_sprite(w, h) (8 + ((w) / 2u) * (h)) +// sort of a workaround for lack of proper fifo emulation +#define gput_sum(sum, cnt, new_cycles) do { \ + sum += cnt; cnt = new_cycles; \ +} while (0) From 5162c3ff5086640220dcfa12e8598e4d4e89c5c6 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 13 Dec 2023 00:24:23 +0200 Subject: [PATCH 505/597] cheats: handle 1F and D4 libretro/pcsx_rearmed#808 --- libpcsxcore/cheat.c | 27 ++++++++++++++++++++++++--- libpcsxcore/cheat.h | 36 ++++++++++++++++++++---------------- 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/libpcsxcore/cheat.c b/libpcsxcore/cheat.c index e0cf411e0..2727fd23d 100644 --- a/libpcsxcore/cheat.c +++ b/libpcsxcore/cheat.c @@ -20,6 +20,7 @@ #include "r3000a.h" #include "psxmem.h" #include "misc.h" +#include "../frontend/plugin_lib.h" // in_keystate for D4 #include "cheat.h" @@ -228,6 +229,10 @@ void ApplyCheats() { psxMu16ref(addr) = SWAPu16(val); break; + case CHEAT_SCRATCHPAD16: // 1F + psxHs16ref(addr) = SWAPu16(val); + break; + case CHEAT_INC16: psxMu16ref(addr) = SWAPu16(psxMu16(addr) + val); break; @@ -319,6 +324,20 @@ void ApplyCheats() { if (PSXMu16(addr) <= val) j++; // skip the next code break; + + case CHEAT_BUTTONS1_16: { // D4 + u16 keys = in_keystate[0]; + keys = (keys << 8) | (keys >> 8); + if (keys != val) + j++; // skip the next code + break; + } + + default: + SysPrintf("unhandled cheat %d,%d code %08X\n", + i, j, CheatCodes[j].Addr); + Cheats[i].WasEnabled = Cheats[i].Enabled = 0; + break; } } } @@ -350,7 +369,7 @@ int AddCheat(const char *descr, char *code) { p2 = code; while (c) { - unsigned int t1, t2; + unsigned int t1, t2, r; while (*p2 != '\n' && *p2 != '\0') p2++; @@ -363,9 +382,11 @@ int AddCheat(const char *descr, char *code) { t1 = 0; t2 = 0; - sscanf(p1, "%x %x", &t1, &t2); + r = sscanf(p1, "%x %x", &t1, &t2); - if (t1 > 0x10000000) { + if (r != 2) + SysPrintf("cheat %d: couldn't parse '%s'\n", NumCodes, p1); + else if (t1 >= 0x10000000) { if (NumCodes >= NumCodesAllocated) { NumCodesAllocated += ALLOC_INCREMENT; diff --git a/libpcsxcore/cheat.h b/libpcsxcore/cheat.h index b3d8bc4ac..d8c2c66ff 100644 --- a/libpcsxcore/cheat.h +++ b/libpcsxcore/cheat.h @@ -98,23 +98,27 @@ extern int NumCodesAllocated; #define PrevMu32(mem) (SWAP32(*(u32 *)PREVM(mem))) // cheat types -#define CHEAT_CONST8 0x30 /* 8-bit Constant Write */ -#define CHEAT_CONST16 0x80 /* 16-bit Constant Write */ -#define CHEAT_INC16 0x10 /* 16-bit Increment */ -#define CHEAT_DEC16 0x11 /* 16-bit Decrement */ -#define CHEAT_INC8 0x20 /* 8-bit Increment */ -#define CHEAT_DEC8 0x21 /* 8-bit Decrement */ -#define CHEAT_SLIDE 0x50 /* Slide Codes */ -#define CHEAT_MEMCPY 0xC2 /* Memory Copy */ - -#define CHEAT_EQU8 0xE0 /* 8-bit Equal To */ -#define CHEAT_NOTEQU8 0xE1 /* 8-bit Not Equal To */ -#define CHEAT_LESSTHAN8 0xE2 /* 8-bit Less Than */ -#define CHEAT_GREATERTHAN8 0xE3 /* 8-bit Greater Than */ -#define CHEAT_EQU16 0xD0 /* 16-bit Equal To */ -#define CHEAT_NOTEQU16 0xD1 /* 16-bit Not Equal To */ -#define CHEAT_LESSTHAN16 0xD2 /* 16-bit Less Than */ +#define CHEAT_CONST8 0x30 /* 8-bit Constant Write */ +#define CHEAT_CONST16 0x80 /* 16-bit Constant Write */ + +#define CHEAT_INC16 0x10 /* 16-bit Increment */ +#define CHEAT_DEC16 0x11 /* 16-bit Decrement */ +#define CHEAT_SCRATCHPAD16 0x1F /* 16-bit Scratchpad Write */ +#define CHEAT_INC8 0x20 /* 8-bit Increment */ +#define CHEAT_DEC8 0x21 /* 8-bit Decrement */ +#define CHEAT_SLIDE 0x50 /* Slide Codes */ +#define CHEAT_MEMCPY 0xC2 /* Memory Copy */ + +// conditionals +#define CHEAT_EQU16 0xD0 /* 16-bit Equal To */ +#define CHEAT_NOTEQU16 0xD1 /* 16-bit Not Equal To */ +#define CHEAT_LESSTHAN16 0xD2 /* 16-bit Less Than */ #define CHEAT_GREATERTHAN16 0xD3 /* 16-bit Greater Than */ +#define CHEAT_BUTTONS1_16 0xD4 /* button presses equate to YYYY */ +#define CHEAT_EQU8 0xE0 /* 8-bit Equal To */ +#define CHEAT_NOTEQU8 0xE1 /* 8-bit Not Equal To */ +#define CHEAT_LESSTHAN8 0xE2 /* 8-bit Less Than */ +#define CHEAT_GREATERTHAN8 0xE3 /* 8-bit Greater Than */ #ifdef __cplusplus } From 7f2a91d0a12d2c096cafc3fa4e1f1da8f63e5af9 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 14 Dec 2023 00:12:53 +0200 Subject: [PATCH 506/597] more timing hacks notaz/pcsx_rearmed#329 --- libpcsxcore/database.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index b35658ba9..66b54f441 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -27,6 +27,8 @@ static const char * const gpu_slow_llist_db[] = "SCES02834", "SCUS94570", "SCUS94616", "SCUS94654", /* Final Fantasy IV */ "SCES03840", "SLPM86028", "SLUS01360", + /* Point Blank - calibration cursor */ + "SCED00287", "SCES00886", "SLUS00481", /* Simple 1500 Series Vol. 57: The Meiro */ "SLPM86715", /* Spot Goes to Hollywood */ @@ -111,6 +113,8 @@ cycle_multiplier_overrides[] = /* Psychic Detective - some weird race condition in the game's cdrom code */ { 222, { "SLUS00165", "SLUS00166", "SLUS00167" } }, { 222, { "SLES00070", "SLES10070", "SLES20070" } }, + /* Vib-Ribbon - cd timing issues (PAL+ari64drc only?) */ + { 200, { "SCES02873" } }, /* Zero Divide - sometimes too fast */ { 200, { "SLUS00183", "SLES00159", "SLPS00083", "SLPM80008" } }, }; From 899d08bdfa1d222a9c0e73ee2604b96ff4287687 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 14 Dec 2023 21:00:04 +0200 Subject: [PATCH 507/597] drc: handle upto 64k page size libretro/pcsx_rearmed#810 --- libpcsxcore/new_dynarec/new_dynarec.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 74f32ee35..b42952394 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -23,6 +23,7 @@ #include #include #include +#include #ifdef __MACH__ #include #endif @@ -111,18 +112,16 @@ struct ndrc_mem struct ndrc_tramp tramp; }; -#ifdef BASE_ADDR_DYNAMIC static struct ndrc_mem *ndrc; -#else -static struct ndrc_mem ndrc_ __attribute__((aligned(4096))); -static struct ndrc_mem *ndrc = &ndrc_; +#ifndef BASE_ADDR_DYNAMIC +// reserve .bss space with upto 64k page size in mind +static char ndrc_bss[((sizeof(*ndrc) + 65535) & ~65535) + 65536]; #endif #ifdef TC_WRITE_OFFSET # ifdef __GLIBC__ # include # include # include -# include # endif static long ndrc_write_ofs; #define NDRC_WRITE_OFFSET(x) (void *)((char *)(x) + ndrc_write_ofs) @@ -6262,9 +6261,20 @@ void new_dynarec_clear_full(void) new_dynarec_hacks_old = new_dynarec_hacks; } +static int pgsize(void) +{ +#ifdef _SC_PAGESIZE + return sysconf(_SC_PAGESIZE); +#else + return 4096; +#endif +} + void new_dynarec_init(void) { - SysPrintf("Init new dynarec, ndrc size %x\n", (int)sizeof(*ndrc)); + int align = pgsize() - 1; + SysPrintf("Init new dynarec, ndrc size %x, pgsize %d\n", + (int)sizeof(*ndrc), align + 1); #ifdef _3DS check_rosalina(); @@ -6320,11 +6330,12 @@ void new_dynarec_init(void) #endif #else #ifndef NO_WRITE_EXEC + ndrc = (struct ndrc_mem *)((size_t)(ndrc_bss + align) & ~align); // not all systems allow execute in data segment by default // size must be 4K aligned for 3DS? if (mprotect(ndrc, sizeof(*ndrc), PROT_READ | PROT_WRITE | PROT_EXEC) != 0) - SysPrintf("mprotect() failed: %s\n", strerror(errno)); + SysPrintf("mprotect(%p) failed: %s\n", ndrc, strerror(errno)); #endif #endif out = ndrc->translation_cache; From 0c2126b9446fea1eb2a4e4c84fcb5ac1f364e81c Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 14 Dec 2023 21:05:21 +0200 Subject: [PATCH 508/597] remove one of the hacks seems to no longer be needed added in 979b861b31ef1f5033db5bd4433b842944300a3e --- libpcsxcore/database.c | 7 ------- libpcsxcore/new_dynarec/pcsxmem.c | 4 ---- libpcsxcore/psxcommon.h | 1 - libpcsxcore/psxhw.c | 18 +----------------- libpcsxcore/psxhw.h | 1 - 5 files changed, 1 insertion(+), 30 deletions(-) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 66b54f441..86a24a490 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -39,12 +39,6 @@ static const char * const gpu_slow_llist_db[] = "SLES02731", "SLPS02477", "SLPS03198", "SLUS01138", }; -static const char * const gpu_busy_hack_db[] = -{ - /* ToHeart (Japan) */ - "SLPS01919", "SLPS01920", -}; - static const char * const gpu_centering_hack_db[] = { /* Gradius Gaiden */ @@ -79,7 +73,6 @@ hack_db[] = { HACK_ENTRY(cdr_read_timing, cdr_read_hack_db), HACK_ENTRY(gpu_slow_list_walking, gpu_slow_llist_db), - HACK_ENTRY(gpu_busy, gpu_busy_hack_db), HACK_ENTRY(gpu_centering, gpu_centering_hack_db), HACK_ENTRY(gpu_timing1024, dualshock_timing1024_hack_db), HACK_ENTRY(dualshock_init_analog, dualshock_init_analog_hack_db), diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index e61e8a352..905f3a2ca 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -510,10 +510,6 @@ void new_dyna_pcsx_mem_reset(void) // plugins might change so update the pointers map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); map_item(&mem_iowtab[IOMEM32(0x1810)], GPU_writeData, 1); - if (Config.hacks.gpu_busy) - map_item(&mem_iortab[IOMEM32(0x1814)], psxHwReadGpuSRbusyHack, 1); - else - map_item(&mem_iortab[IOMEM32(0x1814)], psxHwReadGpuSR, 1); } void new_dyna_pcsx_mem_shutdown(void) diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index dce4f41e2..a25e6252c 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -150,7 +150,6 @@ typedef struct { struct { boolean cdr_read_timing; boolean gpu_slow_list_walking; - boolean gpu_busy; boolean gpu_centering; boolean dualshock_init_analog; boolean gpu_timing1024; diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index 8179d9597..0a468753a 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -27,8 +27,6 @@ #include "cdrom.h" #include "gpu.h" -static u32 (*psxHwReadGpuSRptr)(void) = psxHwReadGpuSR; - void psxHwReset() { memset(psxH, 0, 0x10000); @@ -36,8 +34,6 @@ void psxHwReset() { cdrReset(); psxRcntInit(); HW_GPU_STATUS = SWAP32(0x10802000); - psxHwReadGpuSRptr = Config.hacks.gpu_busy - ? psxHwReadGpuSRbusyHack : psxHwReadGpuSR; } void psxHwWriteIstat(u32 value) @@ -121,18 +117,6 @@ u32 psxHwReadGpuSR(void) return v; } -// a hack due to poor timing of gpu idle bit -// to get rid of this, GPU draw times, DMAs, cpu timing has to fall within -// certain timing window or else games like "ToHeart" softlock -u32 psxHwReadGpuSRbusyHack(void) -{ - u32 v = psxHwReadGpuSR(); - static u32 hack; - if (!(hack++ & 3)) - v &= ~PSXGPU_nBUSY; - return v; -} - u8 psxHwRead8(u32 add) { u8 hard; @@ -254,7 +238,7 @@ u32 psxHwRead32(u32 add) { case 0x1124: hard = psxRcntRmode(2); break; case 0x1128: hard = psxRcntRtarget(2); break; case 0x1810: hard = GPU_readData(); break; - case 0x1814: hard = psxHwReadGpuSRptr(); break; + case 0x1814: hard = psxHwReadGpuSR(); break; case 0x1820: hard = mdecRead0(); break; case 0x1824: hard = mdecRead1(); break; diff --git a/libpcsxcore/psxhw.h b/libpcsxcore/psxhw.h index 574ee3337..3017c9011 100644 --- a/libpcsxcore/psxhw.h +++ b/libpcsxcore/psxhw.h @@ -93,7 +93,6 @@ void psxHwWriteChcr6(u32 value); void psxHwWriteDmaIcr32(u32 value); void psxHwWriteGpuSR(u32 value); u32 psxHwReadGpuSR(void); -u32 psxHwReadGpuSRbusyHack(void); #ifdef __cplusplus } From fb640dd9d9affafea31495762980117323636258 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 15 Dec 2023 00:24:38 +0200 Subject: [PATCH 509/597] standalone: update gun support --- frontend/main.c | 4 ++++ frontend/main.h | 1 + frontend/menu.c | 12 +++++++---- frontend/pl_gun_ts.c | 4 ++-- frontend/plugin_lib.c | 46 ++++++++++++++++++++++++++++++++++++------- frontend/plugin_lib.h | 1 - 6 files changed, 54 insertions(+), 14 deletions(-) diff --git a/frontend/main.c b/frontend/main.c index 019835dd9..53f31b0a1 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -288,6 +288,10 @@ void do_emu_action(void) SysMessage("GPU_open returned %d", ret); } return; + case SACTION_ANALOG_TOGGLE: + ret = padToggleAnalog(0); + snprintf(hud_msg, sizeof(hud_msg), "ANALOG %s", ret ? "ON" : "OFF"); + break; #endif default: return; diff --git a/frontend/main.h b/frontend/main.h index 22053bbc8..98b0f370e 100644 --- a/frontend/main.h +++ b/frontend/main.h @@ -79,6 +79,7 @@ enum sched_action { SACTION_GUN_A, SACTION_GUN_B, SACTION_GUN_TRIGGER2, + SACTION_ANALOG_TOGGLE, }; #define SACTION_GUN_MASK (0x0f << SACTION_GUN_TRIGGER) diff --git a/frontend/menu.c b/frontend/menu.c index 6f74d4516..e6c15402d 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -316,14 +316,16 @@ static void menu_sync_config(void) switch (in_type_sel1) { case 1: in_type[0] = PSE_PAD_TYPE_ANALOGPAD; break; - case 2: in_type[0] = PSE_PAD_TYPE_NEGCON; break; - case 3: in_type[0] = PSE_PAD_TYPE_NONE; break; + case 2: in_type[0] = PSE_PAD_TYPE_GUNCON; break; + case 3: in_type[0] = PSE_PAD_TYPE_GUN; break; + case 4: in_type[0] = PSE_PAD_TYPE_NONE; break; default: in_type[0] = PSE_PAD_TYPE_STANDARD; } switch (in_type_sel2) { case 1: in_type[1] = PSE_PAD_TYPE_ANALOGPAD; break; - case 2: in_type[1] = PSE_PAD_TYPE_NEGCON; break; - case 3: in_type[1] = PSE_PAD_TYPE_NONE; break; + case 2: in_type[1] = PSE_PAD_TYPE_GUNCON; break; + case 3: in_type[1] = PSE_PAD_TYPE_GUN; break; + case 4: in_type[1] = PSE_PAD_TYPE_NONE; break; default: in_type[1] = PSE_PAD_TYPE_STANDARD; } if (in_evdev_allow_abs_only != allow_abs_only_old) { @@ -901,6 +903,7 @@ me_bind_action emuctrl_actions[] = { "Volume Up ", 1 << SACTION_VOLUME_UP }, { "Volume Down ", 1 << SACTION_VOLUME_DOWN }, #endif + { "Analog toggle ", 1 << SACTION_ANALOG_TOGGLE }, { NULL, 0 } }; @@ -1217,6 +1220,7 @@ static const char *men_in_type_sel[] = { "Standard (SCPH-1080)", "Analog (SCPH-1150)", "GunCon", + "Konami Gun", "None", NULL }; diff --git a/frontend/pl_gun_ts.c b/frontend/pl_gun_ts.c index 6c05b7c3f..6e7fa9401 100644 --- a/frontend/pl_gun_ts.c +++ b/frontend/pl_gun_ts.c @@ -54,7 +54,7 @@ int pl_gun_ts_update_raw(struct tsdev *ts, int *x, int *y, int *p) gun_y = (sy - ts_offs_y) * ts_multiplier_y >> 10; limit(gun_x, 0, 1023); limit(gun_y, 0, 1023); - if (sp && !(g_opts & OPT_TSGUN_NOTRIGGER)) + if (sp) gun_in |= 1; else gun_in &= ~1; @@ -78,7 +78,7 @@ void pl_gun_ts_update(struct tsdev *ts, int *x, int *y, int *in) *x = gun_x; *y = gun_y; - *in = gun_in | in_state_gun; + *in = gun_in; } void pl_set_gun_rect(int x, int y, int w, int h) diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 1cbfdb0e0..0deab1574 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -34,6 +34,7 @@ #include "../libpcsxcore/new_dynarec/new_dynarec.h" #include "../libpcsxcore/psxmem_map.h" #include "../libpcsxcore/gpu.h" +#include "../libpcsxcore/r3000a.h" #define HUD_HEIGHT 10 @@ -46,7 +47,6 @@ int in_adev[2] = { -1, -1 }, in_adev_axis[2][2] = {{ 0, 1 }, { 0, 1 }}; int in_adev_is_nublike[2]; unsigned short in_keystate[8]; int in_mouse[8][2]; -int in_state_gun; int in_enable_vibration; void *tsdev; void *pl_vout_buf; @@ -613,12 +613,14 @@ static void update_input(void) { int actions[IN_BINDTYPE_COUNT] = { 0, }; unsigned int emu_act; + int in_state_gun; + int i; in_update(actions); if (in_type[0] == PSE_PAD_TYPE_ANALOGJOY || in_type[0] == PSE_PAD_TYPE_ANALOGPAD) update_analogs(); emu_act = actions[IN_BINDTYPE_EMU]; - in_state_gun = (emu_act & SACTION_GUN_MASK) >> SACTION_GUN_TRIGGER; + in_state_gun = emu_act & SACTION_GUN_MASK; emu_act &= ~SACTION_GUN_MASK; if (emu_act) { @@ -629,12 +631,35 @@ static void update_input(void) } emu_set_action(emu_act); - in_keystate[0] = actions[IN_BINDTYPE_PLAYER12]; + in_keystate[0] = actions[IN_BINDTYPE_PLAYER12] & 0xffff; + in_keystate[1] = (actions[IN_BINDTYPE_PLAYER12] >> 16) & 0xffff; - // fixme - //if (in_type[0] == PSE_PAD_TYPE_GUNCON && tsdev) - // pl_gun_ts_update(tsdev, xn, yn, in); - // in_analog_left[0][0] = xn + if (tsdev) for (i = 0; i < 2; i++) { + int in = 0, x = 0, y = 0, trigger;; + if (in_type[i] != PSE_PAD_TYPE_GUN + && in_type[i] != PSE_PAD_TYPE_GUNCON) + continue; + trigger = in_type[i] == PSE_PAD_TYPE_GUN + ? (1 << DKEY_SQUARE) : (1 << DKEY_CIRCLE); + + pl_gun_ts_update(tsdev, &x, &y, &in); + in_analog_left[i][0] = 65536; + in_analog_left[i][1] = 65536; + if (in && !(in_state_gun & (1 << SACTION_GUN_TRIGGER2))) { + in_analog_left[i][0] = x; + in_analog_left[i][1] = y; + if (!(g_opts & OPT_TSGUN_NOTRIGGER)) + in_state_gun |= (1 << SACTION_GUN_TRIGGER); + } + in_keystate[i] = 0; + if (in_state_gun & ((1 << SACTION_GUN_TRIGGER) + | (1 << SACTION_GUN_TRIGGER2))) + in_keystate[i] |= trigger; + if (in_state_gun & (1 << SACTION_GUN_A)) + in_keystate[i] |= (1 << DKEY_START); + if (in_state_gun & (1 << SACTION_GUN_B)) + in_keystate[i] |= (1 << DKEY_CROSS); + } } #else /* MAEMO */ extern void update_input(void); @@ -642,6 +667,13 @@ extern void update_input(void); void pl_gun_byte2(int port, unsigned char byte) { + if (!tsdev || in_type[port] != PSE_PAD_TYPE_GUN || !(byte & 0x10)) + return; + if (in_analog_left[port][0] == 65536) + return; + + psxScheduleIrq10(4, in_analog_left[port][0] * 1629 / 1024, + in_analog_left[port][1] * psx_h / 1024); } #define MAX_LAG_FRAMES 3 diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index 6cfe59649..4e5ad8b08 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -23,7 +23,6 @@ enum { DKEY_CROSS, DKEY_SQUARE, }; -extern int in_state_gun; extern int in_type[8]; extern int multitap1; extern int multitap2; From 24a7af90024e950ee1d2d39518743904f4b164f5 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 28 Dec 2023 00:43:10 +0200 Subject: [PATCH 510/597] gpulib: maybe better loop detection Note that this thing isn't needed at all with gpu_slow_llists enabled. libretro/pcsx_rearmed#812 --- plugins/gpulib/gpu.c | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index a85136090..f2bf6ce9e 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -698,8 +698,8 @@ void GPUwriteData(uint32_t data) long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr, int32_t *cycles_last_cmd) { - uint32_t addr, *list, ld_addr = 0; - int len, left, count; + uint32_t addr, *list, ld_addr; + int len, left, count, ld_count = 32; int cpu_cycles_sum = 0; int cpu_cycles_last = 0; @@ -709,7 +709,7 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, flush_cmd_buffer(); log_io("gpu_dma_chain\n"); - addr = start_addr & 0xffffff; + addr = ld_addr = start_addr & 0xffffff; for (count = 0; (addr & 0x800000) == 0; count++) { list = rambase + (addr & 0x1fffff) / 4; @@ -747,28 +747,13 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, *progress_addr = addr; break; } - #define LD_THRESHOLD (8*1024) - if (count >= LD_THRESHOLD) { - if (count == LD_THRESHOLD) { - ld_addr = addr; - continue; - } - - // loop detection marker - // (bit23 set causes DMA error on real machine, so - // unlikely to be ever set by the game) - list[0] |= HTOLE32(0x800000); + if (addr == ld_addr) { + log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count); + break; } - } - - if (ld_addr != 0) { - // remove loop detection markers - count -= LD_THRESHOLD + 2; - addr = ld_addr & 0x1fffff; - while (count-- > 0) { - list = rambase + addr / 4; - addr = LE32TOH(list[0]) & 0x1fffff; - list[0] &= HTOLE32(~0x800000); + if (count == ld_count) { + ld_addr = addr; + ld_count *= 2; } } From e99fe7a30d0ec65a11032451147eebc99eb436c5 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 28 Dec 2023 01:33:02 +0200 Subject: [PATCH 511/597] gpu_neon: unbreak sprites in enhanced res broken in c296224f47ceebab4d6fbd071959bff294e80293 libretro/pcsx_rearmed#813 --- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 34 ++++++++++++++---------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index d401522ac..44fce93af 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -1516,10 +1516,13 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, 0, 0, &width, &height, current_command, list[0]); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); - if (check_enhanced_range(psx_gpu, x, x + width)) + if (check_enhanced_range(psx_gpu, x, x + width)) { + width = list_s16[4] & 0x3FF; + height = list_s16[5] & 0x1FF; do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); - gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); + } break; } @@ -1536,10 +1539,13 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, u, v, &width, &height, current_command, list[0]); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); - if (check_enhanced_range(psx_gpu, x, x + width)) + if (check_enhanced_range(psx_gpu, x, x + width)) { + width = list_s16[6] & 0x3FF; + height = list_s16[7] & 0x1FF; do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); - gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); + } break; } @@ -1551,10 +1557,10 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, 0, 0, &width, &height, current_command, list[0]); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); if (check_enhanced_range(psx_gpu, x, x + 1)) - do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); - gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 1, 1, list[0]); break; } @@ -1566,10 +1572,10 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, 0, 0, &width, &height, current_command, list[0]); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); if (check_enhanced_range(psx_gpu, x, x + 8)) - do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); - gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]); break; } @@ -1585,10 +1591,10 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, u, v, &width, &height, current_command, list[0]); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); if (check_enhanced_range(psx_gpu, x, x + 8)) - do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); - gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); + do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]); break; } @@ -1600,10 +1606,10 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, 0, 0, &width, &height, current_command, list[0]); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); if (check_enhanced_range(psx_gpu, x, x + 16)) - do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); - gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]); break; } @@ -1619,10 +1625,10 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, u, v, &width, &height, current_command, list[0]); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); if (check_enhanced_range(psx_gpu, x, x + 16)) - do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); - gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height)); + do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]); break; } From a706d36146491e9010540e5c2e9e5a77fc1221e1 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 3 Jan 2024 01:11:04 +0200 Subject: [PATCH 512/597] cdrom: attempt to improve reset libretro/pcsx_rearmed#814 --- libpcsxcore/cdrom.c | 52 ++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index b980be061..872cc0c80 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -701,6 +701,30 @@ void cdrPlayReadInterrupt(void) CDRPLAYREAD_INT(cdReadTime, 0); } +static void softReset(void) +{ + CDR_getStatus(&stat); + if (stat.Status & STATUS_SHELLOPEN) { + cdr.DriveState = DRIVESTATE_LID_OPEN; + cdr.StatP = STATUS_SHELLOPEN; + } + else if (CdromId[0] == '\0') { + cdr.DriveState = DRIVESTATE_STOPPED; + cdr.StatP = 0; + } + else { + cdr.DriveState = DRIVESTATE_STANDBY; + cdr.StatP = STATUS_ROTATING; + } + + cdr.FifoOffset = DATA_SIZE; // fifo empty + cdr.LocL[0] = LOCL_INVALID; + cdr.Mode = MODE_SIZE_2340; + cdr.Muted = FALSE; + SPU_setCDvol(cdr.AttenuatorLeftToLeft, cdr.AttenuatorLeftToRight, + cdr.AttenuatorRightToLeft, cdr.AttenuatorRightToRight, psxRegs.cycle); +} + #define CMD_PART2 0x100 #define CMD_WHILE_NOT_READY 0x200 @@ -983,15 +1007,11 @@ void cdrInterrupt(void) { case CdlReset: case CdlReset + CMD_WHILE_NOT_READY: + // note: nocash and Duckstation calls this 'Init', but + // the official SDK calls it 'Reset', and so do we StopCdda(); StopReading(); - SetPlaySeekRead(cdr.StatP, 0); - cdr.LocL[0] = LOCL_INVALID; - cdr.Mode = MODE_SIZE_2340; /* This fixes This is Football 2, Pooh's Party lockups */ - cdr.DriveState = DRIVESTATE_PAUSED; - cdr.Muted = FALSE; - SPU_setCDvol(cdr.AttenuatorLeftToLeft, cdr.AttenuatorLeftToRight, - cdr.AttenuatorRightToLeft, cdr.AttenuatorRightToRight, psxRegs.cycle); + softReset(); second_resp_time = not_ready ? 70000 : 4100000; start_rotating = 1; break; @@ -1715,30 +1735,14 @@ void cdrReset() { cdr.FilterChannel = 0; cdr.IrqMask = 0x1f; cdr.IrqStat = NoIntr; - cdr.FifoOffset = DATA_SIZE; // fifo empty - CDR_getStatus(&stat); - if (stat.Status & STATUS_SHELLOPEN) { - cdr.DriveState = DRIVESTATE_LID_OPEN; - cdr.StatP = STATUS_SHELLOPEN; - } - else if (CdromId[0] == '\0') { - cdr.DriveState = DRIVESTATE_STOPPED; - cdr.StatP = 0; - } - else { - cdr.DriveState = DRIVESTATE_STANDBY; - cdr.StatP = STATUS_ROTATING; - } - // BIOS player - default values cdr.AttenuatorLeftToLeft = 0x80; cdr.AttenuatorLeftToRight = 0x00; cdr.AttenuatorRightToLeft = 0x00; cdr.AttenuatorRightToRight = 0x80; - SPU_setCDvol(cdr.AttenuatorLeftToLeft, cdr.AttenuatorLeftToRight, - cdr.AttenuatorRightToLeft, cdr.AttenuatorRightToRight, psxRegs.cycle); + softReset(); getCdInfo(); } From 6ab6ab973403d7232ce056c0f233c89a6456b403 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 3 Jan 2024 02:36:58 +0200 Subject: [PATCH 513/597] gpu_neon: make enh. res. texturing hack optional until something better is figured out, if ever libretro/pcsx_rearmed#815 --- frontend/libretro.c | 17 ++++++++++++++--- frontend/libretro_core_options.h | 14 ++++++++++++++ frontend/menu.c | 2 ++ frontend/plugin_lib.h | 1 + plugins/gpu_neon/psx_gpu/psx_gpu.h | 5 ++++- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 11 ++++++----- plugins/gpu_neon/psx_gpu_if.c | 3 ++- 7 files changed, 43 insertions(+), 10 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index d361be153..7b29a23af 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2101,10 +2101,21 @@ static void update_variables(bool in_flight) if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - if (strcmp(var.value, "disabled") == 0) - pl_rearmed_cbs.gpu_neon.enhancement_no_main = 0; - else if (strcmp(var.value, "enabled") == 0) + if (strcmp(var.value, "enabled") == 0) pl_rearmed_cbs.gpu_neon.enhancement_no_main = 1; + else + pl_rearmed_cbs.gpu_neon.enhancement_no_main = 0; + } + + var.value = NULL; + var.key = "pcsx_rearmed_neon_enhancement_tex_adj"; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + pl_rearmed_cbs.gpu_neon.enhancement_tex_adj = 1; + else + pl_rearmed_cbs.gpu_neon.enhancement_tex_adj = 0; } #endif diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 25b2da4c3..02f65c732 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -510,6 +510,20 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled", }, + { + "pcsx_rearmed_neon_enhancement_tex_adj", + "(GPU) Enhanced Resolution Texture Adjustment", + "Enhanced Resolution Texture Adjustment", + "(Hack) Attempts to solve some texturing issues is some games, but causes new ones in others.", + NULL, + "gpu_neon", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, #endif /* GPU_NEON */ #ifdef GPU_PEOPS { diff --git a/frontend/menu.c b/frontend/menu.c index e6c15402d..51cb37710 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -450,6 +450,7 @@ static const struct { CE_INTVAL_P(gpu_neon.allow_interlace), CE_INTVAL_P(gpu_neon.enhancement_enable), CE_INTVAL_P(gpu_neon.enhancement_no_main), + CE_INTVAL_P(gpu_neon.enhancement_tex_adj), CE_INTVAL_P(gpu_peopsgl.bDrawDither), CE_INTVAL_P(gpu_peopsgl.iFilterType), CE_INTVAL_P(gpu_peopsgl.iFrameTexType), @@ -1414,6 +1415,7 @@ static menu_entry e_menu_plugin_gpu_neon[] = mee_enum ("Enable interlace mode", 0, pl_rearmed_cbs.gpu_neon.allow_interlace, men_gpu_interlace), mee_onoff_h ("Enhanced resolution", 0, pl_rearmed_cbs.gpu_neon.enhancement_enable, 1, h_gpu_neon_enhanced), mee_onoff_h ("Enhanced res. speed hack", 0, pl_rearmed_cbs.gpu_neon.enhancement_no_main, 1, h_gpu_neon_enhanced_hack), + mee_onoff ("Enh. res. texture adjust", 0, pl_rearmed_cbs.gpu_neon.enhancement_tex_adj, 1), mee_end, }; diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index 4e5ad8b08..7879e70ba 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -81,6 +81,7 @@ struct rearmed_cbs { int allow_interlace; // 0 off, 1 on, 2 guess int enhancement_enable; int enhancement_no_main; + int enhancement_tex_adj; } gpu_neon; struct { int iUseDither; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 687715a55..2539521bf 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -200,9 +200,12 @@ typedef struct u16 enhancement_scanout_eselect; // eviction selector u16 enhancement_current_buf; + u32 hack_disable_main:1; + u32 hack_texture_adj:1; + // Align up to 64 byte boundary to keep the upcoming buffers cache line // aligned, also make reachable with single immediate addition - u8 reserved_a[188 + 9*4 - 9*sizeof(void *)]; + u8 reserved_a[184 + 9*4 - 9*sizeof(void *)]; // 8KB block_struct blocks[MAX_BLOCKS_PER_ROW]; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 44fce93af..d81b70785 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -1018,8 +1018,6 @@ void scale2x_tiles8(void *dst, const void *src, int w8, int h) } #endif -static int disable_main_render; - // simple check for a case where no clipping is used // - now handled by adjusting the viewport static int check_enhanced_range(psx_gpu_struct *psx_gpu, int x, int y) @@ -1065,6 +1063,7 @@ static void patch_v(vertex_struct *vertex_ptrs, int count, int old, int new) vertex_ptrs[i].v = new; } +// this sometimes does more harm than good, like in PE2 static void uv_hack(vertex_struct *vertex_ptrs, int vertex_count) { int i, u[4], v[4]; @@ -1103,7 +1102,7 @@ static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, if (!prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) return; - if (!disable_main_render) + if (!psx_gpu->hack_disable_main) render_triangle_p(psx_gpu, vertex_ptrs, current_command); if (!check_enhanced_range(psx_gpu, vertex_ptrs[0]->x, vertex_ptrs[2]->x)) @@ -1322,7 +1321,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv(2, 10); get_vertex_data_xy_uv(3, 14); - uv_hack(vertexes, 4); + if (psx_gpu->hack_texture_adj) + uv_hack(vertexes, 4); do_quad_enhanced(psx_gpu, vertexes, current_command); gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); break; @@ -1375,7 +1375,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv_rgb(2, 12); get_vertex_data_xy_uv_rgb(3, 18); - uv_hack(vertexes, 4); + if (psx_gpu->hack_texture_adj) + uv_hack(vertexes, 4); do_quad_enhanced(psx_gpu, vertexes, current_command); gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); break; diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index 81eadfe92..84fa9322a 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -223,7 +223,8 @@ void renderer_set_config(const struct rearmed_cbs *cbs) if (cbs->pl_set_gpu_caps) cbs->pl_set_gpu_caps(GPU_CAP_SUPPORTS_2X); - disable_main_render = cbs->gpu_neon.enhancement_no_main; + egpu.hack_disable_main = cbs->gpu_neon.enhancement_no_main; + egpu.hack_texture_adj = cbs->gpu_neon.enhancement_tex_adj; if (gpu.state.enhancement_enable) { if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL) map_enhancement_buffer(); From 47bdde5dd6c427af9b0ffd049f1a5c5eba8ce398 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 4 Jan 2024 01:15:18 +0200 Subject: [PATCH 514/597] libretro: adjust some option text --- frontend/libretro_core_options.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 02f65c732..3c5614e19 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -500,7 +500,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { "pcsx_rearmed_neon_enhancement_no_main", "(GPU) Enhanced Resolution Speed Hack", "Enhanced Resolution Speed Hack", - "Improves performance when 'Enhanced Resolution' is enabled, but reduces compatibility and may cause rendering errors.", + "('Enhanced Resolution' Hack) Improves performance but reduces compatibility and may cause rendering errors.", NULL, "gpu_neon", { @@ -514,7 +514,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { "pcsx_rearmed_neon_enhancement_tex_adj", "(GPU) Enhanced Resolution Texture Adjustment", "Enhanced Resolution Texture Adjustment", - "(Hack) Attempts to solve some texturing issues is some games, but causes new ones in others.", + "('Enhanced Resolution' Hack) Attempts to solve some texturing issues in some games, but causes new ones in others.", NULL, "gpu_neon", { From 1ea7bcb2211e01fd34c629416c010378e40f553e Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 7 Jan 2024 01:31:17 +0200 Subject: [PATCH 515/597] standalone: fix w/h confusion --- frontend/menu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/menu.c b/frontend/menu.c index 51cb37710..6c669dc3c 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1313,7 +1313,7 @@ static int menu_loop_cscaler(int id, int keys) for (;;) { if (saved_layer && last_vout_bpp == 16) { - int top_x = max(0, -g_layer_x * last_vout_h / 800) + 1; + int top_x = max(0, -g_layer_x * last_vout_w / 800) + 1; int top_y = max(0, -g_layer_y * last_vout_h / 480) + 1; char text[128]; memcpy(pl_vout_buf, saved_layer, saved_layer_size); From 3a52f747f66646ccf5382969e1ffc62c795c1075 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 7 Jan 2024 21:25:10 +0200 Subject: [PATCH 516/597] standalone: make unaccelerated fullscreen work just centering; the performance is terrible though notaz/pcsx_rearmed#331 --- frontend/libpicofe | 2 +- frontend/plat_sdl.c | 128 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 107 insertions(+), 23 deletions(-) diff --git a/frontend/libpicofe b/frontend/libpicofe index be3677f18..a8ded55fc 160000 --- a/frontend/libpicofe +++ b/frontend/libpicofe @@ -1 +1 @@ -Subproject commit be3677f1867cef839334e7746ea1c8205ec73c8c +Subproject commit a8ded55fc9df952b5582a6da72e1de887e65a34b diff --git a/frontend/plat_sdl.c b/frontend/plat_sdl.c index c5570253e..64cac7cea 100644 --- a/frontend/plat_sdl.c +++ b/frontend/plat_sdl.c @@ -88,13 +88,16 @@ static const struct in_pdata in_sdl_platform_data = { .jmap_size = sizeof(in_sdl_joy_map) / sizeof(in_sdl_joy_map[0]), }; -static int psx_w, psx_h; +static int psx_w = 256, psx_h = 240; static void *shadow_fb, *menubg_img; static int in_menu; +static void centered_clear(void); +static void *setup_blit_callbacks(int w); + static int change_video_mode(int force) { - int w, h; + int w, h, ret; if (in_menu) { w = g_menuscreen_w; @@ -105,7 +108,10 @@ static int change_video_mode(int force) h = psx_h; } - return plat_sdl_change_video_mode(w, h, force); + ret = plat_sdl_change_video_mode(w, h, force); + if (ret == 0 && plat_sdl_overlay == NULL && !plat_sdl_gl_active) + centered_clear(); + return ret; } static void resize_cb(int w, int h) @@ -116,6 +122,7 @@ static void resize_cb(int w, int h) pl_rearmed_cbs.gles_display = gl_es_display; pl_rearmed_cbs.gles_surface = gl_es_surface; plugin_call_rearmed_cbs(); + setup_blit_callbacks(psx_w); } static void quit_cb(void) @@ -227,26 +234,109 @@ static void overlay_hud_print(int x, int y, const char *str, int bpp) SDL_UnlockYUVOverlay(plat_sdl_overlay); } -void *plat_gvideo_set_mode(int *w, int *h, int *bpp) +static void centered_clear(void) { - psx_w = *w; - psx_h = *h; - change_video_mode(0); + int dstride = plat_sdl_screen->pitch / 2; + int w = plat_sdl_screen->w; + int h = plat_sdl_screen->h; + unsigned short *dst; + + SDL_LockSurface(plat_sdl_screen); + dst = plat_sdl_screen->pixels; + + for (; h > 0; dst += dstride, h--) + memset(dst, 0, w * 2); + + SDL_UnlockSurface(plat_sdl_screen); +} + +static void centered_blit(int doffs, const void *src_, int w, int h, + int sstride, int bgr24) +{ + const unsigned short *src = src_; + unsigned short *dst; + int dstride; + + SDL_LockSurface(plat_sdl_screen); + dst = plat_sdl_screen->pixels; + dstride = plat_sdl_screen->pitch / 2; + + dst += doffs + (plat_sdl_screen->w - w) / 2; + dst += dstride * (plat_sdl_screen->h - h) / 2; + if (bgr24) { + for (; h > 0; dst += dstride, src += sstride, h--) + bgr888_to_rgb565(dst, src, w * 3); + } + else { + for (; h > 0; dst += dstride, src += sstride, h--) + bgr555_to_rgb565(dst, src, w * 2); + } + + SDL_UnlockSurface(plat_sdl_screen); +} + +static void centered_blit_menu(void) +{ + const unsigned short *src = g_menuscreen_ptr; + int w = g_menuscreen_w; + int h = g_menuscreen_h; + unsigned short *dst; + int dstride; + + SDL_LockSurface(plat_sdl_screen); + dst = plat_sdl_screen->pixels; + dstride = plat_sdl_screen->pitch / 2; + + dst += (plat_sdl_screen->w - w) / 2; + dst += dstride * (plat_sdl_screen->h - h) / 2; + for (; h > 0; dst += dstride, src += g_menuscreen_pp, h--) + memcpy(dst, src, w * 2); + + SDL_UnlockSurface(plat_sdl_screen); +} + +static void centered_hud_print(int x, int y, const char *str, int bpp) +{ + x += (plat_sdl_screen->w - psx_w) / 2; + y += (plat_sdl_screen->h - psx_h) / 2; + SDL_LockSurface(plat_sdl_screen); + basic_text_out16_nf(plat_sdl_screen->pixels, plat_sdl_screen->pitch / 2, x, y, str); + SDL_UnlockSurface(plat_sdl_screen); +} + +static void *setup_blit_callbacks(int w) +{ + pl_plat_clear = NULL; + pl_plat_blit = NULL; + pl_plat_hud_print = NULL; if (plat_sdl_overlay != NULL) { pl_plat_clear = plat_sdl_overlay_clear; pl_plat_blit = overlay_blit; pl_plat_hud_print = overlay_hud_print; - return NULL; + } + else if (plat_sdl_gl_active) { + return shadow_fb; } else { - pl_plat_clear = NULL; - pl_plat_blit = NULL; - pl_plat_hud_print = NULL; - if (plat_sdl_gl_active) - return shadow_fb; - else + if (w == plat_sdl_screen->w) return plat_sdl_screen->pixels; + else { + pl_plat_clear = centered_clear; + pl_plat_blit = centered_blit; + pl_plat_hud_print = centered_hud_print; + } } + return NULL; +} + +void *plat_gvideo_set_mode(int *w, int *h, int *bpp) +{ + psx_w = *w; + psx_h = *h; + change_video_mode(0); + if (plat_sdl_gl_active) + memset(shadow_fb, 0, psx_w * psx_h * 2); + return setup_blit_callbacks(*w); } void *plat_gvideo_flip(void) @@ -295,13 +385,7 @@ void plat_video_menu_enter(int is_rom_loaded) void plat_video_menu_begin(void) { - if (plat_sdl_overlay != NULL || plat_sdl_gl_active) { - g_menuscreen_ptr = shadow_fb; - } - else { - SDL_LockSurface(plat_sdl_screen); - g_menuscreen_ptr = plat_sdl_screen->pixels; - } + g_menuscreen_ptr = shadow_fb; } void plat_video_menu_end(void) @@ -320,7 +404,7 @@ void plat_video_menu_end(void) gl_flip(g_menuscreen_ptr, g_menuscreen_w, g_menuscreen_h); } else { - SDL_UnlockSurface(plat_sdl_screen); + centered_blit_menu(); SDL_Flip(plat_sdl_screen); } g_menuscreen_ptr = NULL; From 8fb79cd1347e938cfecd799ad8ce7934164d6d63 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 21 Jan 2024 23:14:27 +0200 Subject: [PATCH 517/597] spu: add missing sample copying for thread mode was probably a bad idea keeping them separate --- plugins/dfsound/externals.h | 8 ++++---- plugins/dfsound/spu.c | 38 +++++++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index 2b593ec7b..ac0960f6e 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -273,15 +273,15 @@ typedef struct extern SPUInfo spu; -void do_samples(unsigned int cycles_to, int do_sync); +void do_samples(unsigned int cycles_to, int force_no_thread); void schedule_next_irq(void); void check_irq_io(unsigned int addr); void do_irq_io(int cycles_after); -#define do_samples_if_needed(c, sync, samples) \ +#define do_samples_if_needed(c, no_thread, samples) \ do { \ - if (sync || (int)((c) - spu.cycles_played) >= (samples) * 768) \ - do_samples(c, sync); \ + if ((no_thread) || (int)((c) - spu.cycles_played) >= (samples) * 768) \ + do_samples(c, no_thread); \ } while (0) #endif diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 0e4b154e6..ad86cabc8 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -859,11 +859,14 @@ static void do_samples_finish(int *SSumLR, int ns_to, static struct spu_worker { union { struct { - unsigned int exit_thread; + unsigned char exit_thread; + unsigned char prev_work_in_thread; + unsigned char pad[2]; unsigned int i_ready; unsigned int i_reaped; unsigned int last_boot_cnt; // dsp unsigned int ram_dirty; + unsigned int channels_last; }; // aligning for C64X_DSP unsigned int _pad0[128/4]; @@ -954,6 +957,16 @@ static void queue_channel_work(int ns_to, unsigned int silentch) work->decode_pos = spu.decode_pos; work->channels_silent = silentch; + if (!worker->prev_work_in_thread) { + // copy adpcm and interpolation state to sb_thread + worker->prev_work_in_thread = 1; + mask = spu.dwChannelsAudible & ~spu.dwNewChannel & 0xffffff; + for (ch = 0; mask != 0; ch++, mask >>= 1) { + if (mask & 1) + memcpy(spu.sb_thread[ch].SB, spu.sb[ch].SB, sizeof(spu.sb_thread[ch].SB)); + } + } + mask = work->channels_new = spu.dwNewChannel & 0xffffff; for (ch = 0; mask != 0; ch++, mask >>= 1) { if (mask & 1) @@ -961,6 +974,7 @@ static void queue_channel_work(int ns_to, unsigned int silentch) } mask = work->channels_on = spu.dwChannelsAudible & 0xffffff; + worker->channels_last = mask; spu.decode_dirty_ch |= mask & 0x0a; for (ch = 0; mask != 0; ch++, mask >>= 1) @@ -1095,8 +1109,9 @@ static void do_channel_work(struct work_item *work) REVERBDo(work->SSumLR, RVB, ns_to, work->rvb_addr); } -static void sync_worker_thread(int force) +static void sync_worker_thread(int force_no_thread) { + int force = force_no_thread; struct work_item *work; int done, used_space; @@ -1121,14 +1136,21 @@ static void sync_worker_thread(int force) done = thread_get_i_done() - worker->i_reaped; used_space = worker->i_ready - worker->i_reaped; } - if (force) + if (force_no_thread && worker->prev_work_in_thread) { + unsigned int ch, mask = worker->channels_last; + worker->prev_work_in_thread = 0; thread_sync_caches(); + for (ch = 0; mask != 0; ch++, mask >>= 1) { + if (mask & 1) + memcpy(spu.sb[ch].SB, spu.sb_thread[ch].SB, sizeof(spu.sb_thread[ch].SB)); + } + } } #else static void queue_channel_work(int ns_to, int silentch) {} -static void sync_worker_thread(int force) {} +static void sync_worker_thread(int force_no_thread) {} static const void * const worker = NULL; @@ -1139,7 +1161,7 @@ static const void * const worker = NULL; // here is the main job handler... //////////////////////////////////////////////////////////////////////// -void do_samples(unsigned int cycles_to, int do_direct) +void do_samples(unsigned int cycles_to, int force_no_thread) { unsigned int silentch; int cycle_diff; @@ -1155,9 +1177,9 @@ void do_samples(unsigned int cycles_to, int do_direct) silentch = ~(spu.dwChannelsAudible | spu.dwNewChannel) & 0xffffff; - do_direct |= (silentch == 0xffffff); + force_no_thread |= (silentch == 0xffffff); if (worker != NULL) - sync_worker_thread(do_direct); + sync_worker_thread(force_no_thread); if (cycle_diff < 2 * 768) return; @@ -1205,7 +1227,7 @@ void do_samples(unsigned int cycles_to, int do_direct) if (unlikely(spu.rvb->dirty)) REVERBPrep(); - if (do_direct || worker == NULL || !spu_config.iUseThread) { + if (force_no_thread || worker == NULL || !spu_config.iUseThread) { do_channels(ns_to); do_samples_finish(spu.SSumLR, ns_to, silentch, spu.decode_pos); } From fdcc150109f35e3d892a0c92f8e565d9385a9d36 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 22 Jan 2024 00:27:01 +0200 Subject: [PATCH 518/597] psxbios: some missed malloc merge behavior --- libpcsxcore/psxbios.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 1ecb63ec9..961efdb41 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -1320,7 +1320,7 @@ static int malloc_heap_grow(u32 size) { heap_addr = loadRam32(A_HEAP_BASE); heap_end = loadRam32(A_HEAP_END); - heap_addr_new = heap_addr + 4 + size; + heap_addr_new = heap_addr + size + 4; if (heap_addr_new >= heap_end) return -1; storeRam32(A_HEAP_BASE, heap_addr_new); @@ -1372,6 +1372,12 @@ static void psxBios_malloc() { // 0x33 break; } // chunk too small + if (next_chunk_hdr == ~1) { + // rm useless last free block + storeRam32(A_HEAP_BASE, chunk + 4); + storeRam32(chunk, ~1); + continue; + } if (next_chunk_hdr & 1) { // merge u32 msize = (chunk_hdr & ~3) + 4 + (next_chunk_hdr & ~3); @@ -1391,10 +1397,15 @@ static void psxBios_malloc() { // 0x33 } } - if (i == limit) + if (i == limit) { + PSXBIOS_LOG("malloc: limit OOM\n"); ret = 0; - else if (tries == 0 && malloc_heap_grow(size)) + } + else if (tries == 0 && malloc_heap_grow(size)) { + PSXBIOS_LOG("malloc: grow OOM s=%d end=%08x/%08x\n", + size, loadRam32(A_HEAP_BASE), loadRam32(A_HEAP_END)); ret = 0; + } else { u32 chunk = loadRam32(A_HEAP_CURCHNK); storeRam32(chunk, loadRam32(chunk) & ~3); @@ -1428,9 +1439,8 @@ static void psxBios_calloc() { // 0x37 void psxBios_realloc() { // 0x38 u32 block = a0; u32 size = a1; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x38]); -#endif + + PSXBIOS_LOG("psxBios_%s %08x %d\n", biosA0n[0x38], a0, a1); a0 = block; /* If "old_buf" is zero, executes malloc(new_size), and returns r2=new_buf (or 0=failed). */ From 46d596bb5ba732fc0609d2e98c5cd934d0edd731 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 21 Jan 2024 01:06:36 +0200 Subject: [PATCH 519/597] release r24 --- .github/ISSUE_TEMPLATE/report.yml | 2 +- frontend/libretro.c | 2 +- readme.txt | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/report.yml b/.github/ISSUE_TEMPLATE/report.yml index f68f236b4..aa2c3c371 100644 --- a/.github/ISSUE_TEMPLATE/report.yml +++ b/.github/ISSUE_TEMPLATE/report.yml @@ -6,7 +6,7 @@ body: attributes: label: PCSX-ReARMed Version description: Version number of the emulator as shown in the menus and printed in logs. - placeholder: r23l a4e249a1 + placeholder: r24l 3a52f747 validations: required: true - type: input diff --git a/frontend/libretro.c b/frontend/libretro.c index 7b29a23af..0a98e83e7 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -989,7 +989,7 @@ void retro_get_system_info(struct retro_system_info *info) #endif memset(info, 0, sizeof(*info)); info->library_name = "PCSX-ReARMed"; - info->library_version = "r23l" GIT_VERSION; + info->library_version = "r24l" GIT_VERSION; info->valid_extensions = "bin|cue|img|mdf|pbp|toc|cbn|m3u|chd|iso|exe"; info->need_fullpath = true; } diff --git a/readme.txt b/readme.txt index 1125bf94f..1f1f2d52e 100644 --- a/readme.txt +++ b/readme.txt @@ -113,6 +113,14 @@ the main menu where it is possible to enable/disable individual cheats. Changelog --------- +r24 (2024-01-22) +* HLE compatibility has been greatly improved +* various compatibility and accuracy improvements ++ support for more lightgun types +- savestates from older versions might not work, especially if HLE was used. + Memory card files remain fully compatible. +* many small changes I forgot about + r23 (2022-03-14) * many fixes from various contributors on github and from the libretro fork * dynarec related slowdowns have been greatly reduced From 98ca06284f04c10acf81d2c6a537a42ee8f39c97 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 15 Feb 2024 01:18:57 +0200 Subject: [PATCH 520/597] psxbios: implement yet more memcard details libretro/pcsx_rearmed#821 --- libpcsxcore/psxbios.c | 208 ++++++++++++++++++++++-------------------- 1 file changed, 109 insertions(+), 99 deletions(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 961efdb41..1f2e37a23 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -265,6 +265,7 @@ static char ffile[64]; static int nfile; static char cdir[8*8+8]; static u32 floodchk; +static int card_io_delay; // fixed RAM offsets, SCPH1001 compatible #define A_TT_ExCB 0x0100 @@ -319,7 +320,9 @@ static u32 floodchk; #define A_B0_5B_TRAP 0x43d0 #define CARD_HARDLER_WRITE 0x51F4 +#define CARD_HARDLER_WRITEM 0x51F5 // fake, for psxBios_write() #define CARD_HARDLER_READ 0x5688 +#define CARD_HARDLER_READM 0x5689 // fake, for psxBios_read() #define CARD_HARDLER_INFO 0x5B64 #define HLEOP(n) SWAPu32((0x3b << 26) | (n)); @@ -459,92 +462,99 @@ static u32 DeliverEvent(u32 class, u32 spec); static u32 UnDeliverEvent(u32 class, u32 spec); static void CloseEvent(u32 ev); +static int card_buf_io(int is_write, int port, void *buf, u32 size) +{ + char *mcdptr = port ? Mcd2Data : Mcd1Data; + FileDesc *desc = &FDesc[2 + port]; + u32 offset = 8192 * desc->mcfile + desc->offset; + + PSXBIOS_LOG("card_%s_buf %d,%d: ofs=%x(%x) sz=%x (%s)\n", + is_write ? "write" : "read", port, desc->mcfile, + desc->offset, offset, size, mcdptr + 128 * desc->mcfile + 0xa); + if (!(loadRam8(A_CARD_STATUS1 + port) & 1)) { + PSXBIOS_LOG(" ...busy %x\n", loadRam8(A_CARD_STATUS1 + port)); + return -1; + } + UnDeliverEvent(0xf4000001, 0x0004); + UnDeliverEvent(0xf4000001, 0x8000); + UnDeliverEvent(0xf4000001, 0x2000); + UnDeliverEvent(0xf4000001, 0x0100); + + if (offset >= 128*1024u) { + log_unhandled("card offs %x(%x)\n", desc->offset, offset); + DeliverEvent(0xf4000001, 0x8000); // ? + return -1; + } + if (offset + size >= 128*1024u) { + log_unhandled("card offs+size %x+%x\n", offset, size); + size = 128*1024 - offset; + } + if (is_write) { + memcpy(mcdptr + offset, buf, size); + if (port == 0) + SaveMcd(Config.Mcd1, Mcd1Data, offset, size); + else + SaveMcd(Config.Mcd2, Mcd2Data, offset, size); + } + else { + size_t ram_offset = (s8 *)buf - psxM; + memcpy(buf, mcdptr + offset, size); + if (ram_offset < 0x200000) + psxCpu->Clear(ram_offset, (size + 3) / 4); + } + desc->offset += size; + if (desc->mode & 0x8000) { // async + storeRam8(A_CARD_STATUS1 + port, is_write ? 4 : 2); // busy + storeRam32(A_CARD_HANDLER, + is_write ? CARD_HARDLER_WRITEM : CARD_HARDLER_READM); + card_io_delay = 2 + size / 1024; // hack + return 0; + } + return size; +} + /* * // * // * // System calls A0 */ - -#define buread(Ra1, mcd, length) { \ - PSXBIOS_LOG("read %d: %x,%x (%s)\n", FDesc[1 + mcd].mcfile, FDesc[1 + mcd].offset, a2, Mcd##mcd##Data + 128 * FDesc[1 + mcd].mcfile + 0xa); \ - ptr = Mcd##mcd##Data + 8192 * FDesc[1 + mcd].mcfile + FDesc[1 + mcd].offset; \ - memcpy(Ra1, ptr, length); \ - psxCpu->Clear(a1, (length + 3) / 4); \ - if (FDesc[1 + mcd].mode & 0x8000) { \ - DeliverEvent(0xf0000011, 0x0004); \ - DeliverEvent(0xf4000001, 0x0004); \ - v0 = 0; } \ - else v0 = length; \ - FDesc[1 + mcd].offset += v0; \ -} - -#define buwrite(Ra1, mcd, length) { \ - u32 offset = + 8192 * FDesc[1 + mcd].mcfile + FDesc[1 + mcd].offset; \ - PSXBIOS_LOG("write %d: %x,%x\n", FDesc[1 + mcd].mcfile, FDesc[1 + mcd].offset, a2); \ - ptr = Mcd##mcd##Data + offset; \ - memcpy(ptr, Ra1, length); \ - FDesc[1 + mcd].offset += length; \ - SaveMcd(Config.Mcd##mcd, Mcd##mcd##Data, offset, length); \ - if (FDesc[1 + mcd].mode & 0x8000) { \ - DeliverEvent(0xf0000011, 0x0004); \ - DeliverEvent(0xf4000001, 0x0004); \ - v0 = 0; } \ - else v0 = length; \ -} - /* Internally redirects to "FileRead(fd,tempbuf,1)".*/ /* For some strange reason, the returned character is sign-expanded; */ /* So if a return value of FFFFFFFFh could mean either character FFh, or error. */ -/* TODO FIX ME : Properly implement this behaviour */ -void psxBios_getc(void) // 0x03, 0x35 +static void psxBios_getc(void) // 0x03, 0x35 { - char *ptr; - void *pa1 = Ra1; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x03]); -#endif - v0 = -1; + s8 buf[1] = { -1 }; + int ret = -1; - if (pa1 != INVALID_PTR) { - switch (a0) { - case 2: buread(pa1, 1, 1); break; - case 3: buread(pa1, 2, 1); break; - } + PSXBIOS_LOG("psxBios_%s %d\n", biosA0n[0x03], a0); + + if (a0 == 1) + ret = -1; + else if (a0 == 2 || a0 == 3) { + card_buf_io(0, a0 - 2, buf, 1); + ret = buf[0]; } - pc0 = ra; + mips_return_c(ret, 100); } /* Copy of psxBios_write, except size is 1. */ -void psxBios_putc(void) // 0x09, 0x3B +static void psxBios_putc(void) // 0x09, 0x3B { - char *ptr; - void *pa1 = Ra1; -#ifdef PSXBIOS_LOG - PSXBIOS_LOG("psxBios_%s\n", biosA0n[0x09]); -#endif - v0 = -1; - if (pa1 == INVALID_PTR) { - pc0 = ra; - return; - } + u8 buf[1] = { (u8)a0 }; + int ret = -1; - if (a0 == 1) { // stdout - char *ptr = (char *)pa1; + if (a1 != 1) // not stdout + PSXBIOS_LOG("psxBios_%s '%c' %d\n", biosA0n[0x09], (char)a0, a1); - v0 = a2; - while (a2 > 0) { - printf("%c", *ptr++); a2--; - } - pc0 = ra; return; + if (a1 == 1) { // stdout + if (Config.PsxOut) printf("%c", (char)a0); } - - switch (a0) { - case 2: buwrite(pa1, 1, 1); break; - case 3: buwrite(pa1, 2, 1); break; + else if (a1 == 2 || a1 == 3) { + ret = card_buf_io(1, a1 - 2, buf, 1); } - pc0 = ra; + mips_return_c(ret, 100); } static u32 do_todigit(u32 c) @@ -2535,8 +2545,8 @@ static void buopen(int mcd, char *ptr, char *cfg) if ((*fptr & 0xF0) != 0x50) continue; if (strcmp(FDesc[1 + mcd].name, fptr+0xa)) continue; FDesc[1 + mcd].mcfile = i; - PSXBIOS_LOG("open %s\n", fptr+0xa); v0 = 1 + mcd; + PSXBIOS_LOG("open %s -> %d\n", fptr+0xa, v0); break; } if (a1 & 0x200 && v0 == -1) { /* FCREAT */ @@ -2591,7 +2601,7 @@ static void buopen(int mcd, char *ptr, char *cfg) void psxBios_open() { // 0x32 void *pa0 = Ra0; - PSXBIOS_LOG("psxBios_%s %s %x\n", biosB0n[0x32], Ra0, a1); + PSXBIOS_LOG("psxBios_%s %s(%x) %x\n", biosB0n[0x32], Ra0, a0, a1); v0 = -1; @@ -2639,44 +2649,34 @@ void psxBios_lseek() { // 0x33 * int read(int fd , void *buf , int nbytes); */ -void psxBios_read() { // 0x34 - char *ptr; +static void psxBios_read() { // 0x34 void *pa1 = Ra1; + int ret = -1; -#ifdef PSXBIOS_LOG PSXBIOS_LOG("psxBios_%s: %x, %x, %x\n", biosB0n[0x34], a0, a1, a2); -#endif - v0 = -1; - - if (pa1 != INVALID_PTR) { - switch (a0) { - case 2: buread(pa1, 1, a2); break; - case 3: buread(pa1, 2, a2); break; - } - } + if (pa1 == INVALID_PTR) + ; + else if (a0 == 2 || a0 == 3) + ret = card_buf_io(0, a0 - 2, pa1, a2); - pc0 = ra; + mips_return_c(ret, 100); } /* * int write(int fd , void *buf , int nbytes); */ -void psxBios_write() { // 0x35/0x03 - char *ptr; +static void psxBios_write() { // 0x35/0x03 void *pa1 = Ra1; + int ret = -1; - if (a0 != 1) // stdout + if (a0 != 1) // not stdout PSXBIOS_LOG("psxBios_%s: %x,%x,%x\n", biosB0n[0x35], a0, a1, a2); - v0 = -1; - if (pa1 == INVALID_PTR) { - pc0 = ra; - return; - } - - if (a0 == 1) { // stdout + if (pa1 == INVALID_PTR) + ; + else if (a0 == 1) { // stdout char *ptr = pa1; v0 = a2; @@ -2685,13 +2685,10 @@ void psxBios_write() { // 0x35/0x03 } pc0 = ra; return; } + else if (a0 == 2 || a0 == 3) + ret = card_buf_io(1, a0 - 2, pa1, a2); - switch (a0) { - case 2: buwrite(pa1, 1, a2); break; - case 3: buwrite(pa1, 2, a2); break; - } - - pc0 = ra; + mips_return_c(ret, 100); } static void psxBios_write_psxout() { @@ -2997,7 +2994,7 @@ void psxBios__card_write() { // 0x4e } storeRam8(A_CARD_STATUS1 + port, 4); // busy/write - storeRam32(A_CARD_HANDLER, CARD_HARDLER_READ); + storeRam32(A_CARD_HANDLER, CARD_HARDLER_WRITE); v0 = 1; pc0 = ra; } @@ -3187,6 +3184,11 @@ static void psxBios__card_load() { // A ac static void card_vint_handler(void) { u8 select, status; u32 handler; + + if (card_io_delay) { + card_io_delay--; + return; + } UnDeliverEvent(0xf0000011, 0x0004); UnDeliverEvent(0xf0000011, 0x8000); UnDeliverEvent(0xf0000011, 0x0100); @@ -3217,6 +3219,10 @@ static void card_vint_handler(void) { storeRam8(A_CARD_STATUS1 + select, 1); storeRam32(A_CARD_HANDLER, 0); break; + case CARD_HARDLER_WRITEM: + case CARD_HARDLER_READM: + DeliverEvent(0xf4000001, 4); + // fallthrough case CARD_HARDLER_WRITE: case CARD_HARDLER_READ: DeliverEvent(0xf0000011, 4); @@ -3227,6 +3233,9 @@ static void card_vint_handler(void) { break; default: log_unhandled("%s: unhandled handler %x\n", __func__, handler); + DeliverEvent(0xf0000011, 0x8000); + storeRam8(A_CARD_STATUS1 + select, 1); + storeRam32(A_CARD_HANDLER, 0); } } @@ -3864,8 +3873,8 @@ void psxBiosInit() { //biosB0[0x37] = psxBios_ioctl; //biosB0[0x38] = psxBios_exit; //biosB0[0x39] = psxBios_sys_b0_39; - //biosB0[0x3a] = psxBios_getc; - //biosB0[0x3b] = psxBios_putc; + biosB0[0x3a] = psxBios_getc; + biosB0[0x3b] = psxBios_putc; biosB0[0x3c] = psxBios_getchar; biosB0[0x3d] = psxBios_putchar; //biosB0[0x3e] = psxBios_gets; @@ -4580,4 +4589,5 @@ void psxBiosFreeze(int Mode) { bfreezes(ffile); bfreezel(&nfile); bfreezes(cdir); + bfreezel(&card_io_delay); } From 8496ee3ebc130025078a50d45880edcc0c95ae55 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 11 Mar 2024 01:28:41 +0200 Subject: [PATCH 521/597] psxhw: adjust sio stat libretro/pcsx_rearmed#822 --- libpcsxcore/new_dynarec/pcsxmem.c | 7 +------ libpcsxcore/psxhw.c | 8 +++++++- libpcsxcore/psxhw.h | 1 + 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index 905f3a2ca..151fb4bba 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -97,11 +97,6 @@ static void io_write_sio32(u32 value) sioWrite8((unsigned char)(value >> 24)); } -static u32 io_read_sio2_status() -{ - return 0x80; -} - #if !defined(DRC_DBG) && defined(__arm__) static void map_rcnt_rcount0(u32 mode) @@ -386,7 +381,7 @@ void new_dyna_pcsx_mem_init(void) map_item(&mem_iortab[IOMEM16(0x1048)], sioReadMode16, 1); map_item(&mem_iortab[IOMEM16(0x104a)], sioReadCtrl16, 1); map_item(&mem_iortab[IOMEM16(0x104e)], sioReadBaud16, 1); - map_item(&mem_iortab[IOMEM16(0x1054)], io_read_sio2_status, 1); + map_item(&mem_iortab[IOMEM16(0x1054)], sio1ReadStat16, 1); map_item(&mem_iortab[IOMEM16(0x1100)], psxRcntRcount0, 1); map_item(&mem_iortab[IOMEM16(0x1104)], io_rcnt_read_mode0, 1); map_item(&mem_iortab[IOMEM16(0x1108)], io_rcnt_read_target0, 1); diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index 0a468753a..678811243 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -117,6 +117,12 @@ u32 psxHwReadGpuSR(void) return v; } +u32 sio1ReadStat16(void) +{ + // Armored Core, F1 Link cable misdetection + return 0xa0; +} + u8 psxHwRead8(u32 add) { u8 hard; @@ -180,7 +186,7 @@ u16 psxHwRead16(u32 add) { case 0x1048: hard = sioReadMode16(); break; case 0x104a: hard = sioReadCtrl16(); break; case 0x104e: hard = sioReadBaud16(); break; - case 0x1054: hard = 0x80; break; // Armored Core Link cable misdetection + case 0x1054: hard = sio1ReadStat16(); break; case 0x1100: hard = psxRcntRcount0(); break; case 0x1104: hard = psxRcntRmode(0); break; case 0x1108: hard = psxRcntRtarget(0); break; diff --git a/libpcsxcore/psxhw.h b/libpcsxcore/psxhw.h index 3017c9011..b2dcee62b 100644 --- a/libpcsxcore/psxhw.h +++ b/libpcsxcore/psxhw.h @@ -80,6 +80,7 @@ u32 psxHwRead32(u32 add); void psxHwWrite8(u32 add, u32 value); void psxHwWrite16(u32 add, u32 value); void psxHwWrite32(u32 add, u32 value); +u32 sio1ReadStat16(void); int psxHwFreeze(void *f, int Mode); void psxHwWriteIstat(u32 value); From 835c219c2cef86c7f45c444ab71a6c9c999e289f Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 18 Mar 2024 02:37:51 +0200 Subject: [PATCH 522/597] libretro: preliminary physical cdrom support --- Makefile | 21 ++++++++ frontend/libretro.c | 116 +++++++++++++++++++++++++++++++++++++++++++ libpcsxcore/cdriso.c | 2 +- libpcsxcore/cdriso.h | 1 + 4 files changed, 139 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7a52cdb0f..7079ecddf 100644 --- a/Makefile +++ b/Makefile @@ -242,6 +242,27 @@ CFLAGS += `pkg-config --cflags glib-2.0 libosso dbus-1 hildon-fm-2` LDFLAGS += `pkg-config --libs glib-2.0 libosso dbus-1 hildon-fm-2` endif ifeq "$(PLATFORM)" "libretro" +ifneq "$(HAVE_PHYSICAL_CDROM)$(USE_LIBRETRO_VFS)" "00" +OBJS += deps/libretro-common/compat/compat_strl.o +OBJS += deps/libretro-common/file/file_path.o +OBJS += deps/libretro-common/string/stdstring.o +OBJS += deps/libretro-common/vfs/vfs_implementation.o +endif +ifeq "$(HAVE_PHYSICAL_CDROM)" "1" +OBJS += deps/libretro-common/cdrom/cdrom.o +OBJS += deps/libretro-common/memmap/memalign.o +OBJS += deps/libretro-common/vfs/vfs_implementation_cdrom.o +CFLAGS += -DHAVE_CDROM +endif +ifeq "$(USE_LIBRETRO_VFS)" "1" +OBJS += deps/libretro-common/compat/compat_posix_string.o +OBJS += deps/libretro-common/compat/fopen_utf8.o +OBJS += deps/libretro-common/encodings/encoding_utf.o +OBJS += deps/libretro-common/streams/file_stream.o +OBJS += deps/libretro-common/streams/file_stream_transforms.o +OBJS += deps/libretro-common/time/rtime.o +CFLAGS += -DUSE_LIBRETRO_VFS +endif OBJS += frontend/libretro.o CFLAGS += -DFRONTEND_SUPPORTS_RGB565 diff --git a/frontend/libretro.c b/frontend/libretro.c index 0a98e83e7..a3c3610b3 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -1497,6 +1497,103 @@ static void extract_directory(char *buf, const char *path, size_t size) } } +// raw cdrom support +#ifdef HAVE_CDROM +#include "vfs/vfs_implementation.h" +#include "vfs/vfs_implementation_cdrom.h" +#include "cdrom/cdrom.h" +static libretro_vfs_implementation_file *rcdrom_h; + +static long CALLBACK rcdrom_open(void) +{ + //printf("%s %s\n", __func__, GetIsoFile()); + rcdrom_h = retro_vfs_file_open_impl(GetIsoFile(), RETRO_VFS_FILE_ACCESS_READ, + RETRO_VFS_FILE_ACCESS_HINT_NONE); + return rcdrom_h ? 0 : -1; +} + +static long CALLBACK rcdrom_close(void) +{ + //printf("%s\n", __func__); + if (rcdrom_h) { + retro_vfs_file_close_impl(rcdrom_h); + rcdrom_h = NULL; + } + return 0; +} + +static long CALLBACK rcdrom_getTN(unsigned char *tn) +{ + const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc(); + tn[0] = 1; + tn[1] = toc->num_tracks; + //printf("%s -> %d %d\n", __func__, tn[0], tn[1]); + return 0; +} + +static long CALLBACK rcdrom_getTD(unsigned char track, unsigned char *rt) +{ + const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc(); + rt[0] = 0, rt[1] = 2, rt[2] = 0; + if (track == 0) { + const cdrom_track_t *last = &toc->track[toc->num_tracks - 1]; + unsigned lba = cdrom_msf_to_lba(last->min, last->sec, last->frame); + lba += last->track_size; + cdrom_lba_to_msf(lba, &rt[2], &rt[1], &rt[0]); + } + else if (track <= toc->num_tracks) { + int i = track - 1; + rt[2] = toc->track[i].min; + rt[1] = toc->track[i].sec; + rt[0] = toc->track[i].frame; + } + //printf("%s %d -> %d:%02d:%02d\n", __func__, track, rt[2], rt[1], rt[0]); + return 0; +} + +static boolean CALLBACK rcdrom_readTrack(unsigned char *time) +{ + void *buf = ISOgetBuffer(); + int ret = -1; + if (rcdrom_h) + ret = cdrom_read(rcdrom_h, NULL, + btoi(time[0]), btoi(time[1]), btoi(time[2]), buf, 2340, 12); + //printf("%s %x:%02x:%02x -> %d\n", __func__, time[0], time[1], time[2], ret); + return !ret; +} + +static unsigned char * CALLBACK rcdrom_getBuffer(void) +{ + //printf("%s\n", __func__); + return ISOgetBuffer(); +} + +static unsigned char * CALLBACK rcdrom_getBufferSub(int sector) +{ + //printf("%s %d %d\n", __func__, sector, rcdrom_h->cdrom.last_frame_lba); + return NULL; +} + +static long CALLBACK rcdrom_getStatus(struct CdrStat *stat) +{ + const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc(); + //printf("%s %p\n", __func__, stat); + CDR__getStatus(stat); + stat->Type = toc->track[0].audio ? 2 : 1; + return 0; +} + +static long CALLBACK rcdrom_readCDDA(unsigned char m, unsigned char s, unsigned char f, + unsigned char *buffer) +{ + int ret = -1; + if (rcdrom_h) + ret = cdrom_read(rcdrom_h, NULL, m, s, f, buffer, 2352, 0); + //printf("%s %d:%02d:%02d -> %d\n", __func__, m, s, f, ret); + return ret; +} +#endif // HAVE_CDROM + #if defined(__QNX__) || defined(_WIN32) /* Blackberry QNX doesn't have strcasestr */ @@ -1742,6 +1839,25 @@ bool retro_load_game(const struct retro_game_info *info) LogErr("failed to load plugins\n"); return false; } + if (!strncmp(info->path, "cdrom:", 6)) + { +#ifdef HAVE_CDROM + CDR_open = rcdrom_open; + CDR_close = rcdrom_close; + CDR_getTN = rcdrom_getTN; + CDR_getTD = rcdrom_getTD; + CDR_readTrack = rcdrom_readTrack; + CDR_getBuffer = rcdrom_getBuffer; + CDR_getBufferSub = rcdrom_getBufferSub; + CDR_getStatus = rcdrom_getStatus; + CDR_readCDDA = rcdrom_readCDDA; +#else + ReleasePlugins(); + LogErr("%s\n", "Physical CD-ROM support is not compiled in."); + show_notification("Physical CD-ROM support is not compiled in.", 6000, 3); + return false; +#endif + } plugins_opened = 1; NetOpened = 0; diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index c352b168b..7924bb358 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -1326,7 +1326,7 @@ static unsigned char * CALLBACK ISOgetBuffer_chd(void) { } #endif -static unsigned char * CALLBACK ISOgetBuffer(void) { +unsigned char * CALLBACK ISOgetBuffer(void) { return cdbuffer + 12; } diff --git a/libpcsxcore/cdriso.h b/libpcsxcore/cdriso.h index 16ad52ff1..079e0b8c3 100644 --- a/libpcsxcore/cdriso.h +++ b/libpcsxcore/cdriso.h @@ -27,6 +27,7 @@ extern "C" { void cdrIsoInit(void); int cdrIsoActive(void); +unsigned char * CALLBACK ISOgetBuffer(void); extern unsigned int cdrIsoMultidiskCount; extern unsigned int cdrIsoMultidiskSelect; From e02164097b6a90b94305d5761d7eccd63e336490 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 21 Mar 2024 01:14:56 +0200 Subject: [PATCH 523/597] physical cdrom readahead much more messy than I hoped notaz/pcsx_rearmed#335 --- Makefile | 4 +- frontend/libretro-cdrom.c | 70 ++++++++ frontend/libretro-cdrom.h | 5 + frontend/libretro.c | 286 +++++++++++++++++++++++++++---- frontend/libretro_core_options.h | 19 ++ frontend/plugin.c | 2 + frontend/plugin.h | 2 + libpcsxcore/cdriso.c | 2 + libpcsxcore/cdrom.c | 16 +- libpcsxcore/plugins.c | 3 + libpcsxcore/plugins.h | 2 + 11 files changed, 377 insertions(+), 34 deletions(-) create mode 100644 frontend/libretro-cdrom.c create mode 100644 frontend/libretro-cdrom.h diff --git a/Makefile b/Makefile index 7079ecddf..8b2b87d9d 100644 --- a/Makefile +++ b/Makefile @@ -249,8 +249,10 @@ OBJS += deps/libretro-common/string/stdstring.o OBJS += deps/libretro-common/vfs/vfs_implementation.o endif ifeq "$(HAVE_PHYSICAL_CDROM)" "1" -OBJS += deps/libretro-common/cdrom/cdrom.o +OBJS += frontend/libretro-cdrom.o +OBJS += deps/libretro-common/lists/string_list.o OBJS += deps/libretro-common/memmap/memalign.o +OBJS += deps/libretro-common/rthreads/rthreads.o OBJS += deps/libretro-common/vfs/vfs_implementation_cdrom.o CFLAGS += -DHAVE_CDROM endif diff --git a/frontend/libretro-cdrom.c b/frontend/libretro-cdrom.c new file mode 100644 index 000000000..654de1c5e --- /dev/null +++ b/frontend/libretro-cdrom.c @@ -0,0 +1,70 @@ +#include "libretro-cdrom.h" +#include "../deps/libretro-common/cdrom/cdrom.c" +#if defined(__linux__) && !defined(ANDROID) +//#include +#endif + +static int cdrom_send_command_dummy(const libretro_vfs_implementation_file *stream, + CDROM_CMD_Direction dir, void *buf, size_t len, unsigned char *cmd, size_t cmd_len, + unsigned char *sense, size_t sense_len) +{ + return 1; +} + +static int cdrom_send_command_once(const libretro_vfs_implementation_file *stream, + CDROM_CMD_Direction dir, void *buf, size_t len, unsigned char *cmd, size_t cmd_len) +{ + unsigned char sense[CDROM_MAX_SENSE_BYTES] = {0}; + int ret = +#if defined(__linux__) && !defined(ANDROID) + cdrom_send_command_linux +#elif defined(_WIN32) && !defined(_XBOX) + cdrom_send_command_win32 +#else + cdrom_send_command_dummy +#endif + (stream, dir, buf, len, cmd, cmd_len, sense, sizeof(sense)); +#ifdef CDROM_DEBUG + if (ret && sense[2]) + cdrom_print_sense_data(sense, sizeof(sense)); +#endif + (void)cdrom_send_command_dummy; + return ret; +} + +// "extensions" to libretro-common +int cdrom_set_read_speed_x(libretro_vfs_implementation_file *stream, unsigned speed) +{ + // SET CD-ROM SPEED, DA is newer? + unsigned char cmd1[] = {0xDA, 0, speed - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + unsigned char cmd2[] = {0xBB, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + int ret; + ret = cdrom_send_command_once(stream, DIRECTION_NONE, NULL, 0, cmd1, sizeof(cmd1)); + if (ret) { + printf("DA failed\n"); +#if defined(__linux__) && !defined(ANDROID) + // doesn't work, too late? + //ret = ioctl(fileno(stream->fp), CDROM_SELECT_SPEED, &speed); +#endif + } + if (ret) { + speed = speed * 2352 * 75 / 1024; + cmd2[2] = speed >> 8; + cmd2[3] = speed; + ret = cdrom_send_command_once(stream, DIRECTION_NONE, NULL, 0, cmd2, sizeof(cmd2)); + } + return ret; +} + +int cdrom_read_sector(libretro_vfs_implementation_file *stream, + unsigned int lba, void *b) +{ + unsigned char cmd[] = {0xBE, 0, 0, 0, 0, 0, 0, 0, 1, 0xF8, 0, 0}; + cmd[2] = lba >> 24; + cmd[3] = lba >> 16; + cmd[4] = lba >> 8; + cmd[5] = lba; + return cdrom_send_command_once(stream, DIRECTION_IN, b, 2352, cmd, sizeof(cmd)); +} + +// vim:sw=3:ts=3:expandtab diff --git a/frontend/libretro-cdrom.h b/frontend/libretro-cdrom.h new file mode 100644 index 000000000..a09f7738d --- /dev/null +++ b/frontend/libretro-cdrom.h @@ -0,0 +1,5 @@ +#include "cdrom/cdrom.h" + +int cdrom_set_read_speed_x(libretro_vfs_implementation_file *stream, unsigned speed); +int cdrom_read_sector(libretro_vfs_implementation_file *stream, + unsigned int lba, void *b); diff --git a/frontend/libretro.c b/frontend/libretro.c index a3c3610b3..5cd0aee8e 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -15,6 +15,7 @@ #include #endif +#include "retro_miscellaneous.h" #ifdef SWITCH #include #endif @@ -1237,12 +1238,25 @@ static void disk_init(void) } } +#ifdef HAVE_CDROM +static long CALLBACK rcdrom_open(void); +static long CALLBACK rcdrom_close(void); +#endif + static bool disk_set_eject_state(bool ejected) { // weird PCSX API.. SetCdOpenCaseTime(ejected ? -1 : (time(NULL) + 2)); LidInterrupt(); +#ifdef HAVE_CDROM + if (CDR_open == rcdrom_open) { + // likely the real cd was also changed - rescan + rcdrom_close(); + if (!ejected) + rcdrom_open(); + } +#endif disk_ejected = ejected; return true; } @@ -1295,8 +1309,7 @@ static bool disk_set_image_index(unsigned int index) if (!disk_ejected) { - SetCdOpenCaseTime(time(NULL) + 2); - LidInterrupt(); + disk_set_eject_state(disk_ejected); } disk_current_index = index; @@ -1501,23 +1514,176 @@ static void extract_directory(char *buf, const char *path, size_t size) #ifdef HAVE_CDROM #include "vfs/vfs_implementation.h" #include "vfs/vfs_implementation_cdrom.h" -#include "cdrom/cdrom.h" -static libretro_vfs_implementation_file *rcdrom_h; +#include "libretro-cdrom.h" +#include "rthreads/rthreads.h" +#include "retro_timers.h" +struct cached_buf { + unsigned char buf[2352]; + unsigned int lba; +}; +static struct { + libretro_vfs_implementation_file *h; + sthread_t *thread; + slock_t *read_lock; + slock_t *buf_lock; + scond_t *cond; + struct cached_buf *buf; + unsigned int buf_cnt, thread_exit, do_prefetch; + unsigned int total_lba, prefetch_lba; +} rcdrom; + +static void lbacache_do(unsigned int lba) +{ + unsigned char m, s, f, buf[2352]; + unsigned int i = lba % rcdrom.buf_cnt; + int ret; + + cdrom_lba_to_msf(lba + 150, &m, &s, &f); + slock_lock(rcdrom.read_lock); + ret = cdrom_read_sector(rcdrom.h, lba, buf); + slock_lock(rcdrom.buf_lock); + slock_unlock(rcdrom.read_lock); + //printf("%d:%02d:%02d m%d f%d\n", m, s, f, buf[12+3], ((buf[12+4+2] >> 5) & 1) + 1); + if (ret) { + rcdrom.do_prefetch = 0; + slock_unlock(rcdrom.buf_lock); + LogErr("cdrom_read_sector failed for lba %d\n", ret, lba); + return; + } + + if (lba != rcdrom.buf[i].lba) { + memcpy(rcdrom.buf[i].buf, buf, sizeof(rcdrom.buf[i].buf)); + rcdrom.buf[i].lba = lba; + } + slock_unlock(rcdrom.buf_lock); + retro_sleep(0); // why does the main thread stall without this? +} + +static int lbacache_get(unsigned int lba, void *buf) +{ + unsigned int i; + int ret = 0; + + i = lba % rcdrom.buf_cnt; + slock_lock(rcdrom.buf_lock); + if (lba == rcdrom.buf[i].lba) { + memcpy(buf, rcdrom.buf[i].buf, 2352); + ret = 1; + } + slock_unlock(rcdrom.buf_lock); + return ret; +} + +static void rcdrom_prefetch_thread(void *unused) +{ + unsigned int buf_cnt, lba, lba_to; + + slock_lock(rcdrom.buf_lock); + while (!rcdrom.thread_exit) + { +#ifdef __GNUC__ + __asm__ __volatile__("":::"memory"); // barrier +#endif + if (!rcdrom.do_prefetch) + scond_wait(rcdrom.cond, rcdrom.buf_lock); + if (!rcdrom.do_prefetch || !rcdrom.h || rcdrom.thread_exit) + continue; + + buf_cnt = rcdrom.buf_cnt; + lba = rcdrom.prefetch_lba; + lba_to = lba + buf_cnt; + if (lba_to > rcdrom.total_lba) + lba_to = rcdrom.total_lba; + for (; lba < lba_to; lba++) { + if (lba != rcdrom.buf[lba % buf_cnt].lba) + break; + } + if (lba == lba_to) { + // caching complete + rcdrom.do_prefetch = 0; + continue; + } + + slock_unlock(rcdrom.buf_lock); + lbacache_do(lba); + slock_lock(rcdrom.buf_lock); + } + slock_unlock(rcdrom.buf_lock); +} + +static void rcdrom_stop_thread(void) +{ + rcdrom.thread_exit = 1; + if (rcdrom.buf_lock) { + slock_lock(rcdrom.buf_lock); + rcdrom.do_prefetch = 0; + if (rcdrom.cond) + scond_signal(rcdrom.cond); + slock_unlock(rcdrom.buf_lock); + } + if (rcdrom.thread) { + sthread_join(rcdrom.thread); + rcdrom.thread = NULL; + } + if (rcdrom.cond) { scond_free(rcdrom.cond); rcdrom.cond = NULL; } + if (rcdrom.buf_lock) { slock_free(rcdrom.buf_lock); rcdrom.buf_lock = NULL; } + if (rcdrom.read_lock) { slock_free(rcdrom.read_lock); rcdrom.read_lock = NULL; } + free(rcdrom.buf); + rcdrom.buf = NULL; +} + +// the thread is optional, if anything fails we can do direct reads +static void rcdrom_start_thread(void) +{ + rcdrom_stop_thread(); + rcdrom.thread_exit = rcdrom.prefetch_lba = rcdrom.do_prefetch = 0; + if (rcdrom.buf_cnt == 0) + return; + rcdrom.buf = calloc(rcdrom.buf_cnt, sizeof(rcdrom.buf[0])); + rcdrom.buf_lock = slock_new(); + rcdrom.read_lock = slock_new(); + rcdrom.cond = scond_new(); + if (rcdrom.buf && rcdrom.buf_lock && rcdrom.read_lock && rcdrom.cond) { + rcdrom.thread = sthread_create(rcdrom_prefetch_thread, NULL); + rcdrom.buf[0].lba = ~0; + } + if (!rcdrom.thread) { + LogErr("cdrom precache thread init failed.\n"); + rcdrom_stop_thread(); + } +} static long CALLBACK rcdrom_open(void) { - //printf("%s %s\n", __func__, GetIsoFile()); - rcdrom_h = retro_vfs_file_open_impl(GetIsoFile(), RETRO_VFS_FILE_ACCESS_READ, + const char *name = GetIsoFile(); + //printf("%s %s\n", __func__, name); + rcdrom.h = retro_vfs_file_open_impl(name, RETRO_VFS_FILE_ACCESS_READ, RETRO_VFS_FILE_ACCESS_HINT_NONE); - return rcdrom_h ? 0 : -1; + if (rcdrom.h) { + int ret = cdrom_set_read_speed_x(rcdrom.h, 4); + if (ret) LogErr("CD speed set failed\n"); + const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc(); + const cdrom_track_t *last = &toc->track[toc->num_tracks - 1]; + unsigned int lba = cdrom_msf_to_lba(last->min, last->sec, last->frame) - 150; + rcdrom.total_lba = lba + last->track_size; + //cdrom_get_current_config_random_readable(rcdrom.h); + //cdrom_get_current_config_multiread(rcdrom.h); + //cdrom_get_current_config_cdread(rcdrom.h); + //cdrom_get_current_config_profiles(rcdrom.h); + rcdrom_start_thread(); + return 0; + } + LogErr("retro_vfs_file_open failed for '%s'\n", name); + return -1; } static long CALLBACK rcdrom_close(void) { //printf("%s\n", __func__); - if (rcdrom_h) { - retro_vfs_file_close_impl(rcdrom_h); - rcdrom_h = NULL; + if (rcdrom.h) { + rcdrom_stop_thread(); + retro_vfs_file_close_impl(rcdrom.h); + rcdrom.h = NULL; } return 0; } @@ -1536,10 +1702,7 @@ static long CALLBACK rcdrom_getTD(unsigned char track, unsigned char *rt) const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc(); rt[0] = 0, rt[1] = 2, rt[2] = 0; if (track == 0) { - const cdrom_track_t *last = &toc->track[toc->num_tracks - 1]; - unsigned lba = cdrom_msf_to_lba(last->min, last->sec, last->frame); - lba += last->track_size; - cdrom_lba_to_msf(lba, &rt[2], &rt[1], &rt[0]); + cdrom_lba_to_msf(rcdrom.total_lba + 150, &rt[2], &rt[1], &rt[0]); } else if (track <= toc->num_tracks) { int i = track - 1; @@ -1551,15 +1714,63 @@ static long CALLBACK rcdrom_getTD(unsigned char track, unsigned char *rt) return 0; } +static long CALLBACK rcdrom_prefetch(unsigned char m, unsigned char s, unsigned char f) +{ + unsigned int lba = cdrom_msf_to_lba(m, s, f) - 150; + if (rcdrom.cond && rcdrom.h) { + rcdrom.prefetch_lba = lba; + rcdrom.do_prefetch = 1; + scond_signal(rcdrom.cond); + } + if (rcdrom.buf) { + unsigned int c = rcdrom.buf_cnt; + if (c) + return rcdrom.buf[lba % c].lba == lba; + } + return 1; +} + +static int rcdrom_read_msf(unsigned char m, unsigned char s, unsigned char f, + void *buf, const char *func) +{ + unsigned int lba = cdrom_msf_to_lba(m, s, f) - 150; + int hit = 0, ret = -1; + if (rcdrom.buf_lock) + hit = lbacache_get(lba, buf); + if (!hit && rcdrom.read_lock) { + // maybe still prefetching + slock_lock(rcdrom.read_lock); + slock_unlock(rcdrom.read_lock); + hit = lbacache_get(lba, buf); + if (hit) + hit = 2; + } + if (!hit) { + slock_t *lock = rcdrom.read_lock; + rcdrom.do_prefetch = 0; + if (lock) + slock_lock(lock); + if (rcdrom.h) + ret = cdrom_read_sector(rcdrom.h, lba, buf); + if (lock) + slock_unlock(lock); + } + else + ret = 0; + //printf("%s %d:%02d:%02d -> %d hit %d\n", func, m, s, f, ret, hit); + return ret; +} + static boolean CALLBACK rcdrom_readTrack(unsigned char *time) { - void *buf = ISOgetBuffer(); - int ret = -1; - if (rcdrom_h) - ret = cdrom_read(rcdrom_h, NULL, - btoi(time[0]), btoi(time[1]), btoi(time[2]), buf, 2340, 12); - //printf("%s %x:%02x:%02x -> %d\n", __func__, time[0], time[1], time[2], ret); - return !ret; + unsigned char m = btoi(time[0]), s = btoi(time[1]), f = btoi(time[2]); + return !rcdrom_read_msf(m, s, f, ISOgetBuffer() - 12, __func__); +} + +static long CALLBACK rcdrom_readCDDA(unsigned char m, unsigned char s, unsigned char f, + unsigned char *buffer) +{ + return rcdrom_read_msf(m, s, f, buffer, __func__); } static unsigned char * CALLBACK rcdrom_getBuffer(void) @@ -1582,16 +1793,6 @@ static long CALLBACK rcdrom_getStatus(struct CdrStat *stat) stat->Type = toc->track[0].audio ? 2 : 1; return 0; } - -static long CALLBACK rcdrom_readCDDA(unsigned char m, unsigned char s, unsigned char f, - unsigned char *buffer) -{ - int ret = -1; - if (rcdrom_h) - ret = cdrom_read(rcdrom_h, NULL, m, s, f, buffer, 2352, 0); - //printf("%s %d:%02d:%02d -> %d\n", __func__, m, s, f, ret); - return ret; -} #endif // HAVE_CDROM #if defined(__QNX__) || defined(_WIN32) @@ -1851,7 +2052,8 @@ bool retro_load_game(const struct retro_game_info *info) CDR_getBufferSub = rcdrom_getBufferSub; CDR_getStatus = rcdrom_getStatus; CDR_readCDDA = rcdrom_readCDDA; -#else + CDR_prefetch = rcdrom_prefetch; +#elif !defined(USE_LIBRETRO_VFS) ReleasePlugins(); LogErr("%s\n", "Physical CD-ROM support is not compiled in."); show_notification("Physical CD-ROM support is not compiled in.", 6000, 3); @@ -2246,6 +2448,26 @@ static void update_variables(bool in_flight) display_internal_fps = true; } +#ifdef HAVE_CDROM + var.value = NULL; + var.key = "pcsx_rearmed_phys_cd_readahead"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + long newval = strtol(var.value, NULL, 10); + bool changed = rcdrom.buf_cnt != newval; + if (rcdrom.h && changed) + rcdrom_stop_thread(); + rcdrom.buf_cnt = newval; + if (rcdrom.h && changed) { + rcdrom_start_thread(); + if (rcdrom.cond && rcdrom.prefetch_lba) { + rcdrom.do_prefetch = 1; + scond_signal(rcdrom.cond); + } + } + } +#endif + // // CPU emulation related config #ifndef DRC_DISABLE diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 3c5614e19..be9e8abfd 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -182,6 +182,25 @@ struct retro_core_option_v2_definition option_defs_us[] = { "sync", }, #endif +#ifdef HAVE_CDROM +#define V(x) { #x, NULL } + { + "pcsx_rearmed_phys_cd_readahead", + "Physical CD read-ahead", + NULL, + "(Hardware CD-ROM only) Reads the specified amount of sectors ahead of time to try to avoid later stalls. 333000 will try to read the complete disk (requires an additional 750MB of RAM).", + NULL, + "system", + { + V(0), V(1), V(2), V(3), V(4), V(5), V(6), V(7), + V(8), V(9), V(10), V(11), V(12), V(13), V(14), V(15), + V(16), V(32), V(64), V(128), V(256), V(512), V(1024), V(333000), + { NULL, NULL}, + }, + "12", + }, +#undef V +#endif #ifndef DRC_DISABLE { "pcsx_rearmed_drc", diff --git a/frontend/plugin.c b/frontend/plugin.c index c400165f3..b3ad3bd98 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -41,6 +41,7 @@ static long CALLBACK CDRgetStatus(struct CdrStat *_) { return 0; } static char * CALLBACK CDRgetDriveLetter(void) { return NULL; } static long CALLBACK CDRreadCDDA(unsigned char _, unsigned char __, unsigned char ___, unsigned char *____) { return 0; } static long CALLBACK CDRgetTE(unsigned char _, unsigned char *__, unsigned char *___, unsigned char *____) { return 0; } +static long CALLBACK CDRprefetch(unsigned char m, unsigned char s, unsigned char f) { return 1; } /* GPU */ static void CALLBACK GPUdisplayText(char *_) { return; } @@ -163,6 +164,7 @@ static const struct { DIRECT_CDR(CDRsetfilename), DIRECT_CDR(CDRreadCDDA), DIRECT_CDR(CDRgetTE), + DIRECT_CDR(CDRprefetch), /* SPU */ DIRECT_SPU(SPUinit), DIRECT_SPU(SPUshutdown), diff --git a/frontend/plugin.h b/frontend/plugin.h index 5e12f9024..a96d6098d 100644 --- a/frontend/plugin.h +++ b/frontend/plugin.h @@ -1,7 +1,9 @@ #ifndef __PLUGIN_H__ #define __PLUGIN_H__ +#ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#endif #define PLUGIN_DL_BASE 0xfbad0000 diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index 7924bb358..cd2d20262 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -100,6 +100,7 @@ long CALLBACK CDR__configure(void); long CALLBACK CDR__test(void); void CALLBACK CDR__about(void); long CALLBACK CDR__setfilename(char *filename); +long CALLBACK CDR__prefetch(u8 m, u8 s, u8 f); static void DecodeRawSubData(void); @@ -1738,6 +1739,7 @@ void cdrIsoInit(void) { CDR_test = CDR__test; CDR_about = CDR__about; CDR_setfilename = CDR__setfilename; + CDR_prefetch = CDR__prefetch; numtracks = 0; } diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 872cc0c80..5da24c825 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -68,7 +68,7 @@ static struct { } subq; unsigned char TrackChanged; unsigned char ReportDelay; - unsigned char unused3; + unsigned char PhysCdPropagations; unsigned short sectorsRead; unsigned int freeze_ver; @@ -660,6 +660,14 @@ static void msfiSub(u8 *msfi, u32 count) void cdrPlayReadInterrupt(void) { + int hit = CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); + if (!hit && cdr.PhysCdPropagations++ < 222) { + // this propagates real cdrom delays to the emulated game + CDRPLAYREAD_INT(cdReadTime / 2, 0); + return; + } + cdr.PhysCdPropagations = 0; + cdr.LastReadSeekCycles = psxRegs.cycle; if (cdr.Reading) { @@ -694,6 +702,7 @@ void cdrPlayReadInterrupt(void) } msfiAdd(cdr.SetSectorPlay, 1); + CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); // update for CdlGetlocP/autopause generate_subq(cdr.SetSectorPlay); @@ -1109,6 +1118,8 @@ void cdrInterrupt(void) { seekTime = cdrSeekTime(cdr.SetSector); memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); cdr.DriveState = DRIVESTATE_SEEK; + CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], + cdr.SetSectorPlay[2]); /* Crusaders of Might and Magic = 0.5x-4x - fix cutscene speech start @@ -1246,6 +1257,8 @@ void cdrInterrupt(void) { cdr.SubqForwardSectors = 1; cdr.sectorsRead = 0; cdr.DriveState = DRIVESTATE_SEEK; + CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], + cdr.SetSectorPlay[2]); cycles = (cdr.Mode & MODE_SPEED) ? cdReadTime : cdReadTime * 2; cycles += seekTime; @@ -1423,6 +1436,7 @@ static void cdrReadInterrupt(void) cdrReadInterruptSetResult(cdr.StatP); msfiAdd(cdr.SetSectorPlay, 1); + CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); CDRPLAYREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime, 0); } diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index 9a3998236..369ea9743 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -68,6 +68,7 @@ CDRabout CDR_about; CDRsetfilename CDR_setfilename; CDRreadCDDA CDR_readCDDA; CDRgetTE CDR_getTE; +CDRprefetch CDR_prefetch; SPUinit SPU_init; SPUshutdown SPU_shutdown; @@ -257,6 +258,7 @@ long CALLBACK CDR__configure(void) { return 0; } long CALLBACK CDR__test(void) { return 0; } void CALLBACK CDR__about(void) {} long CALLBACK CDR__setfilename(char*filename) { return 0; } +long CALLBACK CDR__prefetch(u8 m, u8 s, u8 f) { return 1; } #define LoadCdrSym1(dest, name) \ LoadSym(CDR_##dest, CDR##dest, name, TRUE); @@ -301,6 +303,7 @@ static int LoadCDRplugin(const char *CDRdll) { LoadCdrSym0(setfilename, "CDRsetfilename"); LoadCdrSymN(readCDDA, "CDRreadCDDA"); LoadCdrSymN(getTE, "CDRgetTE"); + LoadCdrSym0(prefetch, "CDRprefetch"); return 0; } diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index df8ed87db..4054bf67a 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -133,6 +133,7 @@ struct SubQ { }; typedef long (CALLBACK* CDRreadCDDA)(unsigned char, unsigned char, unsigned char, unsigned char *); typedef long (CALLBACK* CDRgetTE)(unsigned char, unsigned char *, unsigned char *, unsigned char *); +typedef long (CALLBACK* CDRprefetch)(unsigned char, unsigned char, unsigned char); // CD-ROM function pointers extern CDRinit CDR_init; @@ -154,6 +155,7 @@ extern CDRabout CDR_about; extern CDRsetfilename CDR_setfilename; extern CDRreadCDDA CDR_readCDDA; extern CDRgetTE CDR_getTE; +extern CDRprefetch CDR_prefetch; long CALLBACK CDR__getStatus(struct CdrStat *stat); From c7f3cc746f2113b3c4a309c9b073f1bf3a64afce Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 22 Mar 2024 00:21:20 +0200 Subject: [PATCH 524/597] libretro: try to handle physical cdrom ejection notaz/pcsx_rearmed#335 --- frontend/libretro-cdrom.c | 1 - frontend/libretro.c | 42 ++++++++++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/frontend/libretro-cdrom.c b/frontend/libretro-cdrom.c index 654de1c5e..1c71f8a24 100644 --- a/frontend/libretro-cdrom.c +++ b/frontend/libretro-cdrom.c @@ -41,7 +41,6 @@ int cdrom_set_read_speed_x(libretro_vfs_implementation_file *stream, unsigned sp int ret; ret = cdrom_send_command_once(stream, DIRECTION_NONE, NULL, 0, cmd1, sizeof(cmd1)); if (ret) { - printf("DA failed\n"); #if defined(__linux__) && !defined(ANDROID) // doesn't work, too late? //ret = ioctl(fileno(stream->fp), CDROM_SELECT_SPEED, &speed); diff --git a/frontend/libretro.c b/frontend/libretro.c index 5cd0aee8e..ffd64c7e9 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -1241,20 +1241,26 @@ static void disk_init(void) #ifdef HAVE_CDROM static long CALLBACK rcdrom_open(void); static long CALLBACK rcdrom_close(void); +static void rcdrom_stop_thread(void); #endif static bool disk_set_eject_state(bool ejected) { - // weird PCSX API.. + if (ejected != disk_ejected) + SysPrintf("new eject_state: %d\n", ejected); + + // weird PCSX API... SetCdOpenCaseTime(ejected ? -1 : (time(NULL) + 2)); LidInterrupt(); #ifdef HAVE_CDROM - if (CDR_open == rcdrom_open) { - // likely the real cd was also changed - rescan - rcdrom_close(); - if (!ejected) + if (CDR_open == rcdrom_open && ejected != disk_ejected) { + rcdrom_stop_thread(); + if (!ejected) { + // likely the real cd was also changed - rescan + rcdrom_close(); rcdrom_open(); + } } #endif disk_ejected = ejected; @@ -1530,6 +1536,7 @@ static struct { struct cached_buf *buf; unsigned int buf_cnt, thread_exit, do_prefetch; unsigned int total_lba, prefetch_lba; + int check_eject_delay; } rcdrom; static void lbacache_do(unsigned int lba) @@ -1547,9 +1554,10 @@ static void lbacache_do(unsigned int lba) if (ret) { rcdrom.do_prefetch = 0; slock_unlock(rcdrom.buf_lock); - LogErr("cdrom_read_sector failed for lba %d\n", ret, lba); + LogErr("prefetch: cdrom_read_sector failed for lba %d\n", lba); return; } + rcdrom.check_eject_delay = 100; if (lba != rcdrom.buf[i].lba) { memcpy(rcdrom.buf[i].buf, buf, sizeof(rcdrom.buf[i].buf)); @@ -1750,13 +1758,17 @@ static int rcdrom_read_msf(unsigned char m, unsigned char s, unsigned char f, rcdrom.do_prefetch = 0; if (lock) slock_lock(lock); - if (rcdrom.h) + if (rcdrom.h) { ret = cdrom_read_sector(rcdrom.h, lba, buf); + if (ret) + LogErr("cdrom_read_sector failed for lba %d\n", lba); + } if (lock) slock_unlock(lock); } else ret = 0; + rcdrom.check_eject_delay = ret ? 0 : 100; //printf("%s %d:%02d:%02d -> %d hit %d\n", func, m, s, f, ret, hit); return ret; } @@ -1793,6 +1805,17 @@ static long CALLBACK rcdrom_getStatus(struct CdrStat *stat) stat->Type = toc->track[0].audio ? 2 : 1; return 0; } + +static void rcdrom_check_eject(void) +{ + bool media_inserted; + if (!rcdrom.h || rcdrom.do_prefetch || rcdrom.check_eject_delay-- > 0) + return; + rcdrom.check_eject_delay = 100; + media_inserted = cdrom_is_media_inserted(rcdrom.h); // 1-2ms + if (!media_inserted != disk_ejected) + disk_set_eject_state(!media_inserted); +} #endif // HAVE_CDROM #if defined(__QNX__) || defined(_WIN32) @@ -3433,6 +3456,11 @@ void retro_run(void) video_cb((vout_fb_dirty || !vout_can_dupe) ? vout_buf_ptr : NULL, vout_width, vout_height, vout_pitch * 2); vout_fb_dirty = 0; + +#ifdef HAVE_CDROM + if (CDR_open == rcdrom_open) + rcdrom_check_eject(); +#endif } static bool try_use_bios(const char *path, bool preferred_only) From 1f6c92b8e13a8cff95c45f56b885adeff9d32c30 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 22 Mar 2024 00:52:37 +0200 Subject: [PATCH 525/597] misc: allow slow-booting to cdda or whatever notaz/pcsx_rearmed#335 --- libpcsxcore/misc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 0848c267b..889639d66 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -362,6 +362,7 @@ int LoadCdromFile(const char *filename, EXE_HEADER *head, u8 *time_bcd_out) { int CheckCdrom() { struct iso_directory_record *dir; + struct CdrStat stat = { 0, 0, }; unsigned char time[4]; char *buf; unsigned char mdir[4096]; @@ -369,17 +370,22 @@ int CheckCdrom() { int i, len, c; FreePPFCache(); + memset(CdromLabel, 0, sizeof(CdromLabel)); + memset(CdromId, 0, sizeof(CdromId)); + memset(exename, 0, sizeof(exename)); time[0] = itob(0); time[1] = itob(2); time[2] = itob(0x10); + if (!Config.HLE && Config.SlowBoot) { + // boot to BIOS in case of CDDA ir lid open + CDR_getStatus(&stat); + if ((stat.Status & 0x10) || stat.Type == 2 || !CDR_readTrack(time)) + return 0; + } READTRACK(); - memset(CdromLabel, 0, sizeof(CdromLabel)); - memset(CdromId, 0, sizeof(CdromId)); - memset(exename, 0, sizeof(exename)); - strncpy(CdromLabel, buf + 52, 32); // skip head and sub, and go to the root directory record From 8295d332b43963ebba54f3ad74388aaaeb11a789 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 23 Mar 2024 00:57:41 +0200 Subject: [PATCH 526/597] gpulib: fix frameskip on weird lists notaz/pcsx_rearmed#337 --- plugins/gpulib/gpu.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index f2bf6ce9e..b444dcf52 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -532,6 +532,10 @@ static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) uint32_t *list = data + pos; cmd = LE32TOH(list[0]) >> 24; len = 1 + cmd_lengths[cmd]; + if (pos + len > count) { + cmd = -1; + break; // incomplete cmd + } switch (cmd) { case 0x02: @@ -571,11 +575,6 @@ static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) gpu.ex_regs[cmd & 7] = LE32TOH(list[0]); break; } - - if (pos + len > count) { - cmd = -1; - break; // incomplete cmd - } if (0x80 <= cmd && cmd <= 0xdf) break; // image i/o From 0e067f5504ef81074ecc435b3493e8190c84c0ea Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 24 Mar 2024 02:46:33 +0200 Subject: [PATCH 527/597] psxdma: implement a few more details notaz/pcsx_rearmed#336 --- libpcsxcore/psxdma.c | 10 +++++++--- libpcsxcore/psxdma.h | 1 + libpcsxcore/psxhw.c | 31 ++++++++++++++++++++----------- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index 25ee2f0dc..55d2a0a77 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -208,8 +208,8 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU psxRegs.gpuIdleAfter = psxRegs.cycle + cycles_sum + cycles_last_cmd; set_event(PSXINT_GPUDMA, cycles_sum); - //printf("%u dma2cf: %d,%d %08x\n", psxRegs.cycle, cycles_sum, - // cycles_last_cmd, HW_DMA2_MADR); + //printf("%u dma2cf: %6d,%4d %08x %08x %08x %08x\n", psxRegs.cycle, + // cycles_sum, cycles_last_cmd, madr, bcr, chcr, HW_DMA2_MADR); return; default: @@ -233,7 +233,7 @@ void gpuInterrupt() { cycles_sum += psxRegs.gpuIdleAfter - psxRegs.cycle; psxRegs.gpuIdleAfter = psxRegs.cycle + cycles_sum + cycles_last_cmd; set_event(PSXINT_GPUDMA, cycles_sum); - //printf("%u dma2cn: %d,%d %08x\n", psxRegs.cycle, cycles_sum, + //printf("%u dma2cn: %6d,%4d %08x\n", psxRegs.cycle, cycles_sum, // cycles_last_cmd, HW_DMA2_MADR); return; } @@ -244,6 +244,10 @@ void gpuInterrupt() { } } +void psxAbortDma2() { + psxRegs.gpuIdleAfter = psxRegs.cycle + 32; +} + void psxDma6(u32 madr, u32 bcr, u32 chcr) { u32 words, words_max; u32 *mem; diff --git a/libpcsxcore/psxdma.h b/libpcsxcore/psxdma.h index 5c0ab4e6b..ce10d9d32 100644 --- a/libpcsxcore/psxdma.h +++ b/libpcsxcore/psxdma.h @@ -34,6 +34,7 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr); void psxDma3(u32 madr, u32 bcr, u32 chcr); void psxDma4(u32 madr, u32 bcr, u32 chcr); void psxDma6(u32 madr, u32 bcr, u32 chcr); +void psxAbortDma2(); void gpuInterrupt(); void spuInterrupt(); void gpuotcInterrupt(); diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c index 678811243..b96db97ad 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c @@ -60,22 +60,31 @@ void psxHwWriteImask(u32 value) psxRegs.CP0.n.Cause |= 0x400; } -#define make_dma_func(n) \ +#define make_dma_func(n, abort_func) \ void psxHwWriteChcr##n(u32 value) \ { \ - if (value & SWAPu32(HW_DMA##n##_CHCR) & 0x01000000) \ - log_unhandled("dma" #n " %08x -> %08x\n", HW_DMA##n##_CHCR, value); \ + u32 old = SWAPu32(HW_DMA##n##_CHCR); \ + if (n == 6) { value &= 0x51000002; value |= 2; } \ + else { value &= 0x71770703; } \ + if (value == old) \ + return; \ + if (old & 0x01000000) \ + log_unhandled("%u dma" #n " %08x -> %08x\n", psxRegs.cycle, old, value); \ HW_DMA##n##_CHCR = SWAPu32(value); \ - if (value & 0x01000000 && SWAPu32(HW_DMA_PCR) & (8u << (n * 4))) \ - psxDma##n(SWAPu32(HW_DMA##n##_MADR), SWAPu32(HW_DMA##n##_BCR), value); \ + if ((value ^ old) & 0x01000000) { \ + if (!(value & 0x01000000)) \ + abort_func; \ + else if (SWAPu32(HW_DMA_PCR) & (8u << (n * 4))) \ + psxDma##n(SWAPu32(HW_DMA##n##_MADR), SWAPu32(HW_DMA##n##_BCR), value); \ + } \ } -make_dma_func(0) -make_dma_func(1) -make_dma_func(2) -make_dma_func(3) -make_dma_func(4) -make_dma_func(6) +make_dma_func(0,) +make_dma_func(1,) +make_dma_func(2, psxAbortDma2()) +make_dma_func(3,) +make_dma_func(4,) +make_dma_func(6,) void psxHwWriteDmaIcr32(u32 value) { From ee98433b9ec9eb891cb285ff4818dc169d55ce87 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 24 Mar 2024 02:47:33 +0200 Subject: [PATCH 528/597] spu: don't assert on noise in thread mode --- plugins/dfsound/spu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index ad86cabc8..9d43e4389 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1000,8 +1000,10 @@ static void queue_channel_work(int ns_to, unsigned int silentch) if (unlikely(s_chan->bFMod == 2)) { // sucks, have to do double work - assert(!s_chan->bNoise); - d = do_samples_gauss(tmpFMod, decode_block, NULL, ch, ns_to, + if (s_chan->bNoise) + d = do_samples_noise(tmpFMod, ch, ns_to); + else + d = do_samples_gauss(tmpFMod, decode_block, NULL, ch, ns_to, &spu.sb[ch], s_chan->sinc, &s_chan->spos, &s_chan->iSBPos); if (!s_chan->bStarting) { d = MixADSR(tmpFMod, &s_chan->ADSRX, d); From 3047daea4d4f6f00fdaa0467c15efd128b822367 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 25 Mar 2024 01:50:25 +0200 Subject: [PATCH 529/597] spu: more status bits notaz/pcsx_rearmed#338 --- plugins/dfsound/registers.c | 2 +- plugins/dfsound/registers.h | 5 ++++- plugins/dfsound/spu.c | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index 1e79c0e6b..6d72d3ca1 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -144,7 +144,7 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, break; //-------------------------------------------------// case H_SPUctrl: - spu.spuStat = (spu.spuStat & ~0x3f) | (val & 0x3f); + spu.spuStat = (spu.spuStat & ~0xbf) | (val & 0x3f) | ((val << 2) & 0x80); spu.spuStat &= ~STAT_IRQ | val; if (!(spu.spuCtrl & CTRL_IRQ)) { if (val & CTRL_IRQ) diff --git a/plugins/dfsound/registers.h b/plugins/dfsound/registers.h index a296431ff..4acfe6f24 100644 --- a/plugins/dfsound/registers.h +++ b/plugins/dfsound/registers.h @@ -153,7 +153,10 @@ #define CTRL_MUTE 0x4000 #define CTRL_ON 0x8000 -#define STAT_IRQ 0x40 +#define STAT_IRQ 0x0040 +#define STAT_DMA_W 0x0100 +#define STAT_DMA_R 0x0200 +#define STAT_DMA_BUSY 0x0400 /////////////////////////////////////////////////////////// diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 9d43e4389..022a1e3d3 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1245,6 +1245,7 @@ void do_samples(unsigned int cycles_to, int force_no_thread) spu.cycles_played += ns_to * 768; spu.decode_pos = (spu.decode_pos + ns_to) & 0x1ff; + spu.spuStat = (spu.spuStat & ~0x800) | ((spu.decode_pos << 3) & 0x800); #if 0 static int ccount; static time_t ctime; ccount++; if (time(NULL) != ctime) From 67c020eeafd6d3db7f9d2c314a77990177a1c4ff Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 25 Mar 2024 23:26:10 +0200 Subject: [PATCH 530/597] spu: don't leave garbage in capture buffers --- plugins/dfsound/externals.h | 1 + plugins/dfsound/freeze.c | 1 + plugins/dfsound/spu.c | 2 ++ plugins/dfsound/xa.c | 10 +++++++++- 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index ac0960f6e..8e479c531 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -214,6 +214,7 @@ typedef struct int iLeftXAVol; int iRightXAVol; + int cdClearSamples; // extra samples to clear the capture buffers struct { // channel volume in the cd controller unsigned char ll, lr, rl, rr; // see cdr.Attenuator* in cdrom.c } cdv; // applied on spu side for easier emulation diff --git a/plugins/dfsound/freeze.c b/plugins/dfsound/freeze.c index bddf0adad..36b899592 100644 --- a/plugins/dfsound/freeze.c +++ b/plugins/dfsound/freeze.c @@ -338,6 +338,7 @@ long CALLBACK SPUfreeze(unsigned int ulFreezeMode, SPUFreeze_t * pF, spu.XAPlay = spu.XAFeed = spu.XAStart; spu.CDDAPlay = spu.CDDAFeed = spu.CDDAStart; + spu.cdClearSamples = 512; if (pFO && pFO->xa_left && pF->xaS.nsamples) { // start xa again FeedXA(&pF->xaS); spu.XAPlay = spu.XAFeed - pFO->xa_left; diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 022a1e3d3..ed5b4b5c7 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1389,6 +1389,7 @@ void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycle, int is_s FeedXA(xap); // call main XA feeder spu.xapGlobal = xap; // store info for save states + spu.cdClearSamples = 512; } // CDDA AUDIO @@ -1401,6 +1402,7 @@ int CALLBACK SPUplayCDDAchannel(short *pcm, int nbytes, unsigned int cycle, int do_samples(cycle, 1); // catch up to prevent source underflows later FeedCDDA((unsigned char *)pcm, nbytes); + spu.cdClearSamples = 512; return 0; } diff --git a/plugins/dfsound/xa.c b/plugins/dfsound/xa.c index 6b5433fbd..380d1388c 100644 --- a/plugins/dfsound/xa.c +++ b/plugins/dfsound/xa.c @@ -152,8 +152,16 @@ INLINE void MixCD(int *SSumLR, int *RVB, int ns_to, int decode_pos) } spu.XALastVal = v; } - else + else if (spu.cdClearSamples > 0) + { + for(ns = 0; ns < ns_to; ns++) + { + spu.spuMem[cursor] = spu.spuMem[cursor + 0x400/2] = 0; + cursor = (cursor + 1) & 0x1ff; + } + spu.cdClearSamples -= ns_to; spu.XALastVal = 0; + } } //////////////////////////////////////////////////////////////////////// From 58126d968b891713eb9dfd7a0d92f87e81ff57ff Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 28 Mar 2024 01:47:05 +0200 Subject: [PATCH 531/597] update libchdr also move to a consistent location with the libretro branch notaz/pcsx_rearmed#339 --- .gitmodules | 2 +- Makefile | 47 +++++++++++++++++++++++++++++++++++++---------- deps/libchdr | 1 + libchdr | 1 - 4 files changed, 39 insertions(+), 12 deletions(-) create mode 160000 deps/libchdr delete mode 160000 libchdr diff --git a/.gitmodules b/.gitmodules index 691f83092..44495e687 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,5 +5,5 @@ path = frontend/warm url = https://github.com/notaz/warm.git [submodule "libchdr"] - path = libchdr + path = deps/libchdr url = https://github.com/rtissera/libchdr.git diff --git a/Makefile b/Makefile index 8b2b87d9d..b3ccda79f 100644 --- a/Makefile +++ b/Makefile @@ -169,17 +169,44 @@ endif # cdrcimg OBJS += plugins/cdrcimg/cdrcimg.o #ifeq "$(CHD_SUPPORT)" "1" -OBJS += libchdr/src/libchdr_bitstream.o -OBJS += libchdr/src/libchdr_cdrom.o -OBJS += libchdr/src/libchdr_chd.o -OBJS += libchdr/src/libchdr_flac.o -OBJS += libchdr/src/libchdr_huffman.o -OBJS += libchdr/deps/lzma-19.00/src/Alloc.o libchdr/deps/lzma-19.00/src/Bra86.o libchdr/deps/lzma-19.00/src/BraIA64.o libchdr/deps/lzma-19.00/src/CpuArch.o libchdr/deps/lzma-19.00/src/Delta.o -OBJS += libchdr/deps/lzma-19.00/src/LzFind.o libchdr/deps/lzma-19.00/src/Lzma86Dec.o libchdr/deps/lzma-19.00/src/LzmaDec.o libchdr/deps/lzma-19.00/src/LzmaEnc.o libchdr/deps/lzma-19.00/src/Sort.o -CFLAGS += -DHAVE_CHD -Ilibchdr/include +LCHDR = deps/libchdr +LCHDR_LZMA = $(LCHDR)/deps/lzma-22.01 +LCHDR_ZSTD = $(LCHDR)/deps/zstd-1.5.5/lib +OBJS += $(LCHDR)/src/libchdr_bitstream.o +OBJS += $(LCHDR)/src/libchdr_cdrom.o +OBJS += $(LCHDR)/src/libchdr_chd.o +OBJS += $(LCHDR)/src/libchdr_flac.o +OBJS += $(LCHDR)/src/libchdr_huffman.o +$(LCHDR)/src/%.o: CFLAGS += -Wno-unused -std=gnu11 +OBJS += $(LCHDR_LZMA)/src/Alloc.o +OBJS += $(LCHDR_LZMA)/src/Bra86.o +OBJS += $(LCHDR_LZMA)/src/BraIA64.o +OBJS += $(LCHDR_LZMA)/src/CpuArch.o +OBJS += $(LCHDR_LZMA)/src/Delta.o +OBJS += $(LCHDR_LZMA)/src/LzFind.o +OBJS += $(LCHDR_LZMA)/src/Lzma86Dec.o +OBJS += $(LCHDR_LZMA)/src/LzmaDec.o +OBJS += $(LCHDR_LZMA)/src/LzmaEnc.o +OBJS += $(LCHDR_LZMA)/src/Sort.o +$(LCHDR_LZMA)/src/%.o: CFLAGS += -Wno-unused -D_7ZIP_ST -I$(LCHDR_LZMA)/include +$(LCHDR)/src/%.o: CFLAGS += -I$(LCHDR_LZMA)/include +OBJS += $(LCHDR_ZSTD)/common/debug.o +OBJS += $(LCHDR_ZSTD)/common/entropy_common.o +OBJS += $(LCHDR_ZSTD)/common/error_private.o +OBJS += $(LCHDR_ZSTD)/common/fse_decompress.o +OBJS += $(LCHDR_ZSTD)/common/pool.o +OBJS += $(LCHDR_ZSTD)/common/threading.o +OBJS += $(LCHDR_ZSTD)/common/xxhash.o +OBJS += $(LCHDR_ZSTD)/common/zstd_common.o +OBJS += $(LCHDR_ZSTD)/decompress/huf_decompress.o +OBJS += $(LCHDR_ZSTD)/decompress/zstd_ddict.o +OBJS += $(LCHDR_ZSTD)/decompress/zstd_decompress_block.o +OBJS += $(LCHDR_ZSTD)/decompress/zstd_decompress.o +$(LCHDR_ZSTD)/common/%.o \ +$(LCHDR_ZSTD)/decompress/%.o: CFLAGS += -DZSTD_DISABLE_ASM -I$(LCHDR_ZSTD) +$(LCHDR)/src/%.o: CFLAGS += -I$(LCHDR_ZSTD) libpcsxcore/cdriso.o: CFLAGS += -Wno-unused-function -libchdr/src/%.o: CFLAGS += -Wno-unused -Ilibchdr/deps/lzma-19.00/include -std=gnu11 -libchdr/deps/lzma-19.00/src/%.o: CFLAGS += -Wno-unused -D_7ZIP_ST -Ilibchdr/deps/lzma-19.00/include +CFLAGS += -DHAVE_CHD -I$(LCHDR)/include #endif # frontend/gui diff --git a/deps/libchdr b/deps/libchdr new file mode 160000 index 000000000..5c598c2df --- /dev/null +++ b/deps/libchdr @@ -0,0 +1 @@ +Subproject commit 5c598c2df3a7717552a76410d79f5af01ff51b1d diff --git a/libchdr b/libchdr deleted file mode 160000 index 54bfb871c..000000000 --- a/libchdr +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 54bfb871ccae31903b95a8feb7f2bf7121f304be From a51adab49babe92d242df9a5ce891a37d4120f30 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 29 Mar 2024 01:28:11 +0200 Subject: [PATCH 532/597] libretro: update build for newer libchdr --- Makefile | 4 +++- frontend/libretro.c | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index b3ccda79f..32cd6cf9c 100644 --- a/Makefile +++ b/Makefile @@ -168,7 +168,9 @@ endif # cdrcimg OBJS += plugins/cdrcimg/cdrcimg.o -#ifeq "$(CHD_SUPPORT)" "1" + +# libchdr +#ifeq "$(HAVE_CHD)" "1" LCHDR = deps/libchdr LCHDR_LZMA = $(LCHDR)/deps/lzma-22.01 LCHDR_ZSTD = $(LCHDR)/deps/zstd-1.5.5/lib diff --git a/frontend/libretro.c b/frontend/libretro.c index ffd64c7e9..1808c5e71 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -294,16 +294,17 @@ static void convert(void *buf, size_t bytes) static void addCrosshair(int port, int crosshair_color, unsigned short *buffer, int bufferStride, int pos_x, int pos_y, int thickness, int size_x, int size_y) { for (port = 0; port < 2; port++) { // Draw the horizontal line of the crosshair - for (int i = pos_y - thickness / 2; i <= pos_y + thickness / 2; i++) { - for (int j = pos_x - size_x / 2; j <= pos_x + size_x / 2; j++) { + int i, j; + for (i = pos_y - thickness / 2; i <= pos_y + thickness / 2; i++) { + for (j = pos_x - size_x / 2; j <= pos_x + size_x / 2; j++) { if ((i + vout_height) >= 0 && (i + vout_height) < bufferStride && j >= 0 && j < bufferStride && in_enable_crosshair[port] > 0) buffer[i * bufferStride + j] = crosshair_color; - } } + } // Draw the vertical line of the crosshair - for (int i = pos_x - thickness / 2; i <= pos_x + thickness / 2; i++) { - for (int j = pos_y - size_y / 2; j <= pos_y + size_y / 2; j++) { + for (i = pos_x - thickness / 2; i <= pos_x + thickness / 2; i++) { + for (j = pos_y - size_y / 2; j <= pos_y + size_y / 2; j++) { if (i >= 0 && i < bufferStride && (j + vout_height) >= 0 && (j + vout_height) < bufferStride && in_enable_crosshair[port] > 0) buffer[j * bufferStride + i] = crosshair_color; } @@ -366,8 +367,8 @@ static void vout_flip(const void *vram, int stride, int bgr24, for (port = 0; port < 2; port++) { if (in_enable_crosshair[port] > 0 && (in_type[port] == PSE_PAD_TYPE_GUNCON || in_type[port] == PSE_PAD_TYPE_GUN)) { - struct CrosshairInfo crosshairInfo; - CrosshairDimensions(port, &crosshairInfo); + struct CrosshairInfo crosshairInfo; + CrosshairDimensions(port, &crosshairInfo); addCrosshair(port, in_enable_crosshair[port], dest, dstride, crosshairInfo.pos_x, crosshairInfo.pos_y, crosshairInfo.thickness, crosshairInfo.size_x, crosshairInfo.size_y); } } @@ -727,8 +728,8 @@ static bool update_option_visibility(void) "pcsx_rearmed_negcon_deadzone", "pcsx_rearmed_negcon_response", "pcsx_rearmed_input_sensitivity", - "pcsx_rearmed_crosshair1", - "pcsx_rearmed_crosshair2", + "pcsx_rearmed_crosshair1", + "pcsx_rearmed_crosshair2", "pcsx_rearmed_konamigunadjustx", "pcsx_rearmed_konamigunadjusty", "pcsx_rearmed_gunconadjustx", From 642638a3d38b945942d950b2d98943864e5e29b4 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sat, 6 Apr 2024 17:22:58 +0200 Subject: [PATCH 533/597] db: Override cycle multiplier for Colin McRae PAL Decrease it from 175 to 174, which is enough for the language selection menu to start working. Signed-off-by: Paul Cercueil --- libpcsxcore/database.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 86a24a490..c70ed8691 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -91,6 +91,8 @@ cycle_multiplier_overrides[] = /* Super Robot Taisen Alpha - on the edge with 175, * changing memcard settings is enough to break/unbreak it */ { 190, { "SLPS02528", "SLPS02636" } }, + /* Colin McRae Rally - language selection menu does not work with 175 */ + { 174, { "SLES00477" } }, /* Brave Fencer Musashi - cd sectors arrive too fast */ { 170, { "SLUS00726", "SLPS01490" } }, #if defined(DRC_DISABLE) || defined(LIGHTREC) /* new_dynarec has a hack for this game */ From df4bcb0e330029b90ffbd4e963d99fc6bc142d0e Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 15 Apr 2024 01:52:31 +0300 Subject: [PATCH 534/597] cdrom: accept more channel bits Although it contradicts nocash docs, Spyro 2 (PAL) uses 0x21. Should only 0xff be rejected? --- libpcsxcore/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 5da24c825..83a6de04d 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -1399,7 +1399,7 @@ static void cdrReadInterrupt(void) subhdr->file, subhdr->chan, cdr.CurFile, cdr.CurChannel, cdr.FilterFile, cdr.FilterChannel); if ((cdr.Mode & MODE_SF) && (subhdr->file != cdr.FilterFile || subhdr->chan != cdr.FilterChannel)) break; - if (subhdr->chan & 0xe0) { // ? + if (subhdr->chan & 0x80) { // ? if (subhdr->chan != 0xff) log_unhandled("adpcm %d:%d\n", subhdr->file, subhdr->chan); break; From 4c8f1c25f56f6bdae42431f758658a3fc5872940 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 15 Apr 2024 01:56:42 +0300 Subject: [PATCH 535/597] introduce fractional framerates notaz/pcsx_rearmed#91 --- frontend/libretro.c | 26 +++++++++++++++++++++--- frontend/libretro_core_options.h | 17 +++++++++++++++- frontend/main.c | 1 + frontend/menu.c | 12 ++++++++--- frontend/plugin_lib.c | 11 ++++++---- libpcsxcore/cdrom.c | 10 +++------ libpcsxcore/database.c | 17 ++++++++++++++++ libpcsxcore/psxcommon.h | 4 +++- libpcsxcore/psxcounters.c | 35 ++++++++++++++++++++++++++++++-- libpcsxcore/psxcounters.h | 2 ++ plugins/dfsound/externals.h | 2 +- 11 files changed, 115 insertions(+), 22 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 1808c5e71..6a719ae40 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -131,7 +131,8 @@ static unsigned previous_width = 0; static unsigned previous_height = 0; static int plugins_opened; -static int is_pal_mode; + +#define is_pal_mode Config.PsxType /* memory card data */ extern char Mcd1Data[MCD_SIZE]; @@ -586,7 +587,6 @@ void pl_frame_limit(void) void pl_timing_prepare(int is_pal) { - is_pal_mode = is_pal; } void plat_trigger_vibrate(int pad, int low, int high) @@ -1002,7 +1002,7 @@ void retro_get_system_av_info(struct retro_system_av_info *info) unsigned geom_width = vout_width; memset(info, 0, sizeof(*info)); - info->timing.fps = is_pal_mode ? 50.0 : 60.0; + info->timing.fps = psxGetFps(); info->timing.sample_rate = 44100.0; info->geometry.base_width = geom_width; info->geometry.base_height = geom_height; @@ -2265,6 +2265,7 @@ static void update_variables(bool in_flight) int gpu_peops_fix = GPU_PEOPS_OLD_FRAME_SKIP; #endif frameskip_type_t prev_frameskip_type; + double old_fps = psxGetFps(); var.value = NULL; var.key = "pcsx_rearmed_frameskip_type"; @@ -2706,6 +2707,18 @@ static void update_variables(bool in_flight) Config.GpuListWalking = -1; } + var.value = NULL; + var.key = "pcsx_rearmed_fractional_framerate"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + Config.FractionalFramerate = 0; + else if (strcmp(var.value, "enabled") == 0) + Config.FractionalFramerate = 1; + else // auto + Config.FractionalFramerate = -1; + } + var.value = NULL; var.key = "pcsx_rearmed_screen_centering"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) @@ -3005,6 +3018,13 @@ static void update_variables(bool in_flight) } update_option_visibility(); + + if (old_fps != psxGetFps()) + { + struct retro_system_av_info info; + retro_get_system_av_info(&info); + environ_cb(RETRO_ENVIRONMENT_SET_SYSTEM_AV_INFO, &info); + } } // Taken from beetle-psx-libretro diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index be9e8abfd..451ee4783 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -425,6 +425,21 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled", }, + { + "pcsx_rearmed_fractional_framerate", + "Use fractional frame rate", + NULL, + "Instead of the exact 50 or 60 (maximum) fps for PAL/NTSC the real console runs closer to something like 49.75 and 59.81fps (varies slightly between hw versions). PCSX-ReARMed uses the former \"round\" framerates to better match modern displays, however that may cause audio/video desync in games like DDR and Spyro 2 (intro). With this option you can try to use fractional framerates.", + NULL, + "video", + { + { "auto", "Auto" }, + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "auto", + }, { "pcsx_rearmed_gpu_slow_llists", "(GPU) Slow linked list processing", @@ -433,7 +448,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { NULL, "video", { - { "auto", NULL }, + { "auto", "Auto" }, { "disabled", NULL }, { "enabled", NULL }, { NULL, NULL }, diff --git a/frontend/main.c b/frontend/main.c index 53f31b0a1..ce7eca6ca 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -136,6 +136,7 @@ void emu_set_default_config(void) Config.PsxAuto = 1; Config.cycle_multiplier = CYCLE_MULT_DEFAULT; Config.GpuListWalking = -1; + Config.FractionalFramerate = -1; pl_rearmed_cbs.gpu_neon.allow_interlace = 2; // auto pl_rearmed_cbs.gpu_neon.enhancement_enable = diff --git a/frontend/menu.c b/frontend/menu.c index 6c669dc3c..58efd218d 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -408,6 +408,7 @@ static const struct { CE_CONFIG_VAL(DisableStalls), CE_CONFIG_VAL(Cpu), CE_CONFIG_VAL(GpuListWalking), + CE_CONFIG_VAL(FractionalFramerate), CE_CONFIG_VAL(PreciseExceptions), CE_INTVAL(region), CE_INTVAL_V(g_scaler, 3), @@ -1638,7 +1639,7 @@ static int menu_loop_speed_hacks(int id, int keys) return 0; } -static const char *men_gpul[] = { "Auto", "Off", "On", NULL }; +static const char *men_autooo[] = { "Auto", "Off", "On", NULL }; static const char h_cfg_cpul[] = "Shows CPU usage in %"; static const char h_cfg_spu[] = "Shows active SPU channels\n" @@ -1657,10 +1658,12 @@ static const char h_cfg_exc[] = "Emulate some PSX's debug hw like breakpoints "and exceptions (slow, interpreter only, keep off)"; static const char h_cfg_gpul[] = "Try enabling this if the game misses some graphics\n" "causes a performance hit"; +static const char h_cfg_ffps[] = "Instead of 50/60fps for PAL/NTSC use ~49.75/59.81\n" + "Closer to real hw but doesn't match modern displays."; static const char h_cfg_psxclk[] = "Over/under-clock the PSX, default is " DEFAULT_PSX_CLOCK_S "\n" "(adjust this if the game is too slow/too fast/hangs)"; -enum { AMO_XA, AMO_CDDA, AMO_IC, AMO_BP, AMO_CPU, AMO_GPUL }; +enum { AMO_XA, AMO_CDDA, AMO_IC, AMO_BP, AMO_CPU, AMO_GPUL, AMO_FFPS }; static menu_entry e_menu_adv_options[] = { @@ -1671,7 +1674,8 @@ static menu_entry e_menu_adv_options[] = mee_onoff_h ("Disable CD Audio", 0, menu_iopts[AMO_CDDA], 1, h_cfg_cdda), mee_onoff_h ("ICache emulation", 0, menu_iopts[AMO_IC], 1, h_cfg_icache), mee_onoff_h ("BP exception emulation", 0, menu_iopts[AMO_BP], 1, h_cfg_exc), - mee_enum_h ("GPU l-list slow walking",0, menu_iopts[AMO_GPUL], men_gpul, h_cfg_gpul), + mee_enum_h ("GPU l-list slow walking",0, menu_iopts[AMO_GPUL], men_autooo, h_cfg_gpul), + mee_enum_h ("Fractional framerate", 0, menu_iopts[AMO_FFPS], men_autooo, h_cfg_ffps), #if !defined(DRC_DISABLE) || defined(LIGHTREC) mee_onoff_h ("Disable dynarec (slow!)",0, menu_iopts[AMO_CPU], 1, h_cfg_nodrc), #endif @@ -1697,12 +1701,14 @@ static int menu_loop_adv_options(int id, int keys) for (i = 0; i < ARRAY_SIZE(opts); i++) *opts[i].mopt = *opts[i].opt; menu_iopts[AMO_GPUL] = Config.GpuListWalking + 1; + menu_iopts[AMO_FFPS] = Config.FractionalFramerate + 1; me_loop(e_menu_adv_options, &sel); for (i = 0; i < ARRAY_SIZE(opts); i++) *opts[i].opt = *opts[i].mopt; Config.GpuListWalking = menu_iopts[AMO_GPUL] - 1; + Config.FractionalFramerate = menu_iopts[AMO_FFPS] - 1; return 0; } diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 0deab1574..9b6faf44e 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -35,6 +35,7 @@ #include "../libpcsxcore/psxmem_map.h" #include "../libpcsxcore/gpu.h" #include "../libpcsxcore/r3000a.h" +#include "../libpcsxcore/psxcounters.h" #define HUD_HEIGHT 10 @@ -778,18 +779,20 @@ void pl_frame_limit(void) void pl_timing_prepare(int is_pal_) { + double fps; pl_rearmed_cbs.fskip_advice = 0; pl_rearmed_cbs.flips_per_sec = 0; pl_rearmed_cbs.cpu_usage = 0; is_pal = is_pal_; - frame_interval = is_pal ? 20000 : 16667; - frame_interval1024 = is_pal ? 20000*1024 : 17066667; + fps = psxGetFps(); + frame_interval = (int)(1000000.0 / fps); + frame_interval1024 = (int)(1000000.0 * 1024.0 / fps); // used by P.E.Op.S. frameskip code - pl_rearmed_cbs.gpu_peops.fFrameRateHz = is_pal ? 50.0f : 59.94f; + pl_rearmed_cbs.gpu_peops.fFrameRateHz = (float)fps; pl_rearmed_cbs.gpu_peops.dwFrameRateTicks = - (100000*100 / (unsigned long)(pl_rearmed_cbs.gpu_peops.fFrameRateHz*100)); + (100000*100 / (int)(pl_rearmed_cbs.gpu_peops.fFrameRateHz*100)); } static void pl_get_layer_pos(int *x, int *y, int *w, int *h) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 83a6de04d..1615e1867 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -583,19 +583,15 @@ static int cdrSeekTime(unsigned char *target) int cyclesSinceRS = psxRegs.cycle - cdr.LastReadSeekCycles; seekTime = MAX_VALUE(seekTime, 20000); - // need this stupidly long penalty or else Spyro2 intro desyncs - // note: if misapplied this breaks MGS cutscenes among other things - if (cdr.DriveState == DRIVESTATE_PAUSED && cyclesSinceRS > cdReadTime * 50) - seekTime += cdReadTime * 25; // Transformers Beast Wars Transmetals does Setloc(x),SeekL,Setloc(x),ReadN // and then wants some slack time - else if (cdr.DriveState == DRIVESTATE_PAUSED || cyclesSinceRS < cdReadTime *3/2) + if (cdr.DriveState == DRIVESTATE_PAUSED || cyclesSinceRS < cdReadTime *3/2) seekTime += cdReadTime; seekTime = MIN_VALUE(seekTime, PSXCLK * 2 / 3); - CDR_LOG("seek: %.2f %.2f (%.2f) st %d\n", (float)seekTime / PSXCLK, + CDR_LOG("seek: %.2f %.2f (%.2f) st %d di %d\n", (float)seekTime / PSXCLK, (float)seekTime / cdReadTime, (float)cyclesSinceRS / cdReadTime, - cdr.DriveState); + cdr.DriveState, diff); return seekTime; } diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index c70ed8691..ef990ac48 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -59,6 +59,22 @@ static const char * const dualshock_init_analog_hack_db[] = "SLUS00546", }; +static const char * const fractional_Framerate_hack_db[] = +{ + /* Dance Dance Revolution */ + "SLPM86503", // 3rd Mix + "SLPM86752", // 4th Mix + "SLPM86266", // 4thMix: The Beat Goes On + "SLPM86831", // Extra Mix + "SLUS01446", // Konamix + /* Dancing Stage Fever */ + "SLES04097", + /* Dancing Stage Fusion */ + "SLES04163", + /* Spyro 2 */ + "SCUS94425", "SCES02104", +}; + #define HACK_ENTRY(var, list) \ { #var, &Config.hacks.var, list, ARRAY_SIZE(list) } @@ -76,6 +92,7 @@ hack_db[] = HACK_ENTRY(gpu_centering, gpu_centering_hack_db), HACK_ENTRY(gpu_timing1024, dualshock_timing1024_hack_db), HACK_ENTRY(dualshock_init_analog, dualshock_init_analog_hack_db), + HACK_ENTRY(fractional_Framerate, fractional_Framerate_hack_db), }; static const struct diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index a25e6252c..53bda9735 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -142,9 +142,10 @@ typedef struct { boolean icache_emulation; boolean DisableStalls; boolean PreciseExceptions; - int GpuListWalking; int cycle_multiplier; // 100 for 1.0 int cycle_multiplier_override; + s8 GpuListWalking; + s8 FractionalFramerate; // ~49.75 and ~59.81 instead of 50 and 60 u8 Cpu; // CPU_DYNAREC or CPU_INTERPRETER u8 PsxType; // PSX_TYPE_NTSC or PSX_TYPE_PAL struct { @@ -153,6 +154,7 @@ typedef struct { boolean gpu_centering; boolean dualshock_init_analog; boolean gpu_timing1024; + boolean fractional_Framerate; } hacks; } PcsxConfig; diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index c62712034..887fe8a4c 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -79,9 +79,40 @@ u32 psxNextCounter = 0, psxNextsCounter = 0; /******************************************************************************/ +#define FPS_FRACTIONAL_PAL (53203425/314./3406) // ~49.75 +#define FPS_FRACTIONAL_NTSC (53693175/263./3413) // ~59.81 + +static inline +u32 frameCycles(void) +{ + int ff = Config.FractionalFramerate >= 0 + ? Config.FractionalFramerate : Config.hacks.fractional_Framerate; + if (ff) + { + if (Config.PsxType) + return (u32)(PSXCLK / FPS_FRACTIONAL_PAL); + else + return (u32)(PSXCLK / FPS_FRACTIONAL_NTSC); + } + return Config.PsxType ? (PSXCLK / 50) : (PSXCLK / 60); +} + +// used to inform the frontend about the exact framerate +double psxGetFps() +{ + int ff = Config.FractionalFramerate >= 0 + ? Config.FractionalFramerate : Config.hacks.fractional_Framerate; + if (ff) + return Config.PsxType ? FPS_FRACTIONAL_PAL : FPS_FRACTIONAL_NTSC; + else + return Config.PsxType ? 50.0 : 60.0; +} + +// to inform the frontend about the exact famerate static inline u32 lineCycles(void) { + // should be more like above, but our timing is already poor anyway if (Config.PsxType) return PSXCLK / 50 / HSyncTotal[1]; else @@ -308,7 +339,7 @@ static void scheduleRcntBase(void) if (hSyncCount + hsync_steps == HSyncTotal[Config.PsxType]) { - rcnts[3].cycle = Config.PsxType ? PSXCLK / 50 : PSXCLK / 60; + rcnts[3].cycle = frameCycles(); } else { @@ -380,7 +411,7 @@ void psxRcntUpdate() if( hSyncCount >= HSyncTotal[Config.PsxType] ) { u32 status, field = 0; - rcnts[3].cycleStart += Config.PsxType ? PSXCLK / 50 : PSXCLK / 60; + rcnts[3].cycleStart += frameCycles(); hSyncCount = 0; frame_counter++; diff --git a/libpcsxcore/psxcounters.h b/libpcsxcore/psxcounters.h index 03cd46843..77025a617 100644 --- a/libpcsxcore/psxcounters.h +++ b/libpcsxcore/psxcounters.h @@ -56,6 +56,8 @@ u32 psxRcntRtarget(u32 index); s32 psxRcntFreeze(void *f, s32 Mode); +double psxGetFps(); + #ifdef __cplusplus } #endif diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index 8e479c531..6dbbac67a 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -58,7 +58,7 @@ #define MAXCHAN 24 // note: must be even due to the way reverb works now -#define NSSIZE ((44100 / 50 + 16) & ~1) +#define NSSIZE ((44100 / 50 + 32) & ~1) /////////////////////////////////////////////////////////// // struct defines From 1631b0537f583dee1875f4538d89e4867efec1ae Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 16 Apr 2024 00:38:14 +0300 Subject: [PATCH 536/597] cdrom: change pause timing again for DDR series --- libpcsxcore/cdrom.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 1615e1867..d4d14742c 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -971,27 +971,19 @@ void cdrInterrupt(void) { cdr.sectorsRead = 0; /* - Gundam Battle Assault 2: much slower (*) - - Fixes boot, gameplay - - Hokuto no Ken 2: slower - - Fixes intro + subtitles - - InuYasha - Feudal Fairy Tale: slower - - Fixes battles + Gundam Battle Assault 2 + Hokuto no Ken 2 + InuYasha - Feudal Fairy Tale + Dance Dance Revolution Konamix + ... */ - /* Gameblabla - Tightening the timings (as taken from Duckstation). - * The timings from Duckstation are based upon hardware tests. - * Mednafen's timing don't work for Gundam Battle Assault 2 in PAL/50hz mode, - * seems to be timing sensitive as it can depend on the CPU's clock speed. - * */ if (!(cdr.StatP & (STATUS_PLAY | STATUS_READ))) { second_resp_time = 7000; } else { - second_resp_time = (((cdr.Mode & MODE_SPEED) ? 1 : 2) * 1097107); + second_resp_time = 2 * 1097107; } SetPlaySeekRead(cdr.StatP, 0); DriveStateOld = cdr.DriveState; From 0aa7361c7c226c94d0f9ae4779bcc635454ef276 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 18 Apr 2024 21:12:41 +0300 Subject: [PATCH 537/597] new experimental TurboCD option --- frontend/libretro.c | 12 +++++++++++ frontend/libretro_core_options.h | 20 +++++++++++++++---- frontend/main.c | 2 ++ frontend/menu.c | 6 +++++- libpcsxcore/cdrom.c | 34 +++++++++++++++++++++++++++----- libpcsxcore/misc.c | 2 +- libpcsxcore/psxcommon.h | 1 + 7 files changed, 66 insertions(+), 11 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 6a719ae40..7ad4caa8a 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2191,6 +2191,8 @@ bool retro_load_game(const struct retro_game_info *info) if (check_unsatisfied_libcrypt()) show_notification("LibCrypt protected game with missing SBI detected", 3000, 3); + if (Config.TurboCD) + show_notification("TurboCD is ON", 700, 2); return true; } @@ -2473,6 +2475,16 @@ static void update_variables(bool in_flight) display_internal_fps = true; } + var.value = NULL; + var.key = "pcsx_rearmed_cd_turbo"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + Config.TurboCD = true; + else + Config.TurboCD = false; + } + #ifdef HAVE_CDROM var.value = NULL; var.key = "pcsx_rearmed_phys_cd_readahead"; diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 451ee4783..e79e90275 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -97,13 +97,11 @@ struct retro_core_option_v2_category option_cats_us[] = { "Compatibility Fixes", "Configure settings/workarounds required for correct operation of specific games." }, -#if !defined(DRC_DISABLE) && !defined(LIGHTREC) { "speed_hack", "Speed Hacks (Advanced)", "Configure hacks that may improve performance at the expense of decreased accuracy/stability." }, -#endif { NULL, NULL, NULL }, }; @@ -1572,6 +1570,20 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled", }, + { + "pcsx_rearmed_cd_turbo", + "Turbo CD", + NULL, + "This makes the emulated CD-ROM extremely fast and can reduce loading times in some cases. Warning: many games were not programmed to handle such a speed. The game (or even the emulator) MAY CRASH at ANY TIME if this is enabled.", + NULL, + "speed_hack", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, #if !defined(DRC_DISABLE) && !defined(LIGHTREC) { "pcsx_rearmed_nocompathacks", @@ -1634,13 +1646,13 @@ struct retro_core_option_v2_definition option_defs_us[] = { "pcsx_rearmed_nostalls", "Disable CPU/GTE Stalls", NULL, - "Will cause some games to run too quickly." + "Will cause some games to run too quickly. Should be disabled in almost all cases." #if defined(LIGHTREC) " Interpreter only." #endif , NULL, - "compat_hack", + "speed_hack", { { "disabled", NULL }, { "enabled", NULL }, diff --git a/frontend/main.c b/frontend/main.c index ce7eca6ca..6df1731d1 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -417,6 +417,8 @@ void emu_on_new_cd(int show_hud_msg) SysPrintf("note: running with HLE BIOS, expect compatibility problems\n"); SysPrintf("----------------------------------------------------------\n"); } + if (Config.TurboCD) + SysPrintf("note: TurboCD is enabled, this breaks games\n"); if (show_hud_msg) { if (check_unsatisfied_libcrypt()) diff --git a/frontend/menu.c b/frontend/menu.c index 58efd218d..39f0a6972 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -410,6 +410,7 @@ static const struct { CE_CONFIG_VAL(GpuListWalking), CE_CONFIG_VAL(FractionalFramerate), CE_CONFIG_VAL(PreciseExceptions), + CE_CONFIG_VAL(TurboCD), CE_INTVAL(region), CE_INTVAL_V(g_scaler, 3), CE_INTVAL(g_gamma), @@ -1660,10 +1661,11 @@ static const char h_cfg_gpul[] = "Try enabling this if the game misses some gr "causes a performance hit"; static const char h_cfg_ffps[] = "Instead of 50/60fps for PAL/NTSC use ~49.75/59.81\n" "Closer to real hw but doesn't match modern displays."; +static const char h_cfg_tcd[] = "Greatly reduce CD load times. Breaks some games."; static const char h_cfg_psxclk[] = "Over/under-clock the PSX, default is " DEFAULT_PSX_CLOCK_S "\n" "(adjust this if the game is too slow/too fast/hangs)"; -enum { AMO_XA, AMO_CDDA, AMO_IC, AMO_BP, AMO_CPU, AMO_GPUL, AMO_FFPS }; +enum { AMO_XA, AMO_CDDA, AMO_IC, AMO_BP, AMO_CPU, AMO_GPUL, AMO_FFPS, AMO_TCD }; static menu_entry e_menu_adv_options[] = { @@ -1676,6 +1678,7 @@ static menu_entry e_menu_adv_options[] = mee_onoff_h ("BP exception emulation", 0, menu_iopts[AMO_BP], 1, h_cfg_exc), mee_enum_h ("GPU l-list slow walking",0, menu_iopts[AMO_GPUL], men_autooo, h_cfg_gpul), mee_enum_h ("Fractional framerate", 0, menu_iopts[AMO_FFPS], men_autooo, h_cfg_ffps), + mee_onoff_h ("Turbo CD-ROM ", 0, menu_iopts[AMO_TCD], 1, h_cfg_tcd), #if !defined(DRC_DISABLE) || defined(LIGHTREC) mee_onoff_h ("Disable dynarec (slow!)",0, menu_iopts[AMO_CPU], 1, h_cfg_nodrc), #endif @@ -1696,6 +1699,7 @@ static int menu_loop_adv_options(int id, int keys) { &Config.icache_emulation, &menu_iopts[AMO_IC] }, { &Config.PreciseExceptions, &menu_iopts[AMO_BP] }, { &Config.Cpu, &menu_iopts[AMO_CPU] }, + { &Config.TurboCD, &menu_iopts[AMO_TCD] }, }; int i; for (i = 0; i < ARRAY_SIZE(opts); i++) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index d4d14742c..200dcf98a 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -111,7 +111,7 @@ static struct { u8 AdpcmActive; u32 LastReadSeekCycles; - u8 unused7; + u8 RetryDetected; u8 DriveState; // enum drive_state u8 FastForward; @@ -576,6 +576,14 @@ static void cdrPlayInterrupt_Autopause() cdr.ReportDelay--; } +static boolean canDoTurbo(void) +{ + u32 c = psxRegs.cycle; + return Config.TurboCD && !cdr.RetryDetected && !cdr.AdpcmActive + //&& c - psxRegs.intCycle[PSXINT_SPUDMA].sCycle > (u32)cdReadTime * 2 + && c - psxRegs.intCycle[PSXINT_MDECOUTDMA].sCycle > (u32)cdReadTime * 16; +} + static int cdrSeekTime(unsigned char *target) { int diff = msf2sec(cdr.SetSectorPlay) - msf2sec(target); @@ -654,6 +662,11 @@ static void msfiSub(u8 *msfi, u32 count) } } +static int msfiEq(const u8 *a, const u8 *b) +{ + return a[0] == b[0] && a[1] == b[1] && a[2] == b[2]; +} + void cdrPlayReadInterrupt(void) { int hit = CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); @@ -822,6 +835,9 @@ void cdrInterrupt(void) { { for (i = 0; i < 3; i++) set_loc[i] = btoi(cdr.Param[i]); + cdr.RetryDetected = msfiEq(cdr.SetSector, set_loc) + && !cdr.SetlocPending; + //cdr.RetryDetected |= msfiEq(cdr.Param, cdr.Transfer); memcpy(cdr.SetSector, set_loc, 3); cdr.SetSector[3] = 0; cdr.SetlocPending = 1; @@ -1103,7 +1119,8 @@ void cdrInterrupt(void) { StopReading(); SetPlaySeekRead(cdr.StatP, STATUS_SEEK | STATUS_ROTATING); - seekTime = cdrSeekTime(cdr.SetSector); + if (!canDoTurbo()) + seekTime = cdrSeekTime(cdr.SetSector); memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); cdr.DriveState = DRIVESTATE_SEEK; CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], @@ -1252,6 +1269,8 @@ void cdrInterrupt(void) { cycles += seekTime; if (Config.hacks.cdr_read_timing) cycles = cdrAlignTimingHack(cycles); + else if (canDoTurbo()) + cycles = cdReadTime / 2; CDRPLAYREAD_INT(cycles, 1); SetPlaySeekRead(cdr.StatP, STATUS_SEEK); @@ -1443,6 +1462,8 @@ unsigned char cdrRead0(void) { cdr.Ctrl |= cdr.AdpcmActive << 2; cdr.Ctrl |= cdr.ResultReady << 5; + //cdr.Ctrl &= ~0x40; + //if (cdr.FifoOffset != DATA_SIZE) cdr.Ctrl |= 0x40; // data fifo not empty // What means the 0x10 and the 0x08 bits? I only saw it used by the bios @@ -1642,7 +1663,7 @@ void cdrWrite3(unsigned char rt) { } void psxDma3(u32 madr, u32 bcr, u32 chcr) { - u32 cdsize, max_words; + u32 cdsize, max_words, cycles; int size; u8 *ptr; @@ -1688,7 +1709,8 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { } psxCpu->Clear(madr, cdsize / 4); - set_event(PSXINT_CDRDMA, (cdsize / 4) * 24); + cycles = (cdsize / 4) * 24; + set_event(PSXINT_CDRDMA, cycles); HW_DMA3_CHCR &= SWAPu32(~0x10000000); if (chcr & 0x100) { @@ -1697,8 +1719,10 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { } else { // halted - psxRegs.cycle += (cdsize/4) * 24 - 20; + psxRegs.cycle += cycles - 20; } + if (canDoTurbo() && cdr.Reading && cdr.FifoOffset >= 2048) + CDRPLAYREAD_INT(cycles + 4096, 1); return; default: diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 889639d66..47a32cce2 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -379,7 +379,7 @@ int CheckCdrom() { time[2] = itob(0x10); if (!Config.HLE && Config.SlowBoot) { - // boot to BIOS in case of CDDA ir lid open + // boot to BIOS in case of CDDA or lid is open CDR_getStatus(&stat); if ((stat.Status & 0x10) || stat.Type == 2 || !CDR_readTrack(time)) return 0; diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index 53bda9735..0a0bd8633 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -142,6 +142,7 @@ typedef struct { boolean icache_emulation; boolean DisableStalls; boolean PreciseExceptions; + boolean TurboCD; int cycle_multiplier; // 100 for 1.0 int cycle_multiplier_override; s8 GpuListWalking; From 8c0658995f3fcb6f79d1624b675de5320b5d8e1a Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 21 May 2024 02:02:19 +0300 Subject: [PATCH 538/597] update libchdr --- Makefile | 15 +++++---------- deps/libchdr | 2 +- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 32cd6cf9c..5b890600d 100644 --- a/Makefile +++ b/Makefile @@ -172,8 +172,8 @@ OBJS += plugins/cdrcimg/cdrcimg.o # libchdr #ifeq "$(HAVE_CHD)" "1" LCHDR = deps/libchdr -LCHDR_LZMA = $(LCHDR)/deps/lzma-22.01 -LCHDR_ZSTD = $(LCHDR)/deps/zstd-1.5.5/lib +LCHDR_LZMA = $(LCHDR)/deps/lzma-24.05 +LCHDR_ZSTD = $(LCHDR)/deps/zstd-1.5.6/lib OBJS += $(LCHDR)/src/libchdr_bitstream.o OBJS += $(LCHDR)/src/libchdr_cdrom.o OBJS += $(LCHDR)/src/libchdr_chd.o @@ -181,31 +181,26 @@ OBJS += $(LCHDR)/src/libchdr_flac.o OBJS += $(LCHDR)/src/libchdr_huffman.o $(LCHDR)/src/%.o: CFLAGS += -Wno-unused -std=gnu11 OBJS += $(LCHDR_LZMA)/src/Alloc.o -OBJS += $(LCHDR_LZMA)/src/Bra86.o -OBJS += $(LCHDR_LZMA)/src/BraIA64.o OBJS += $(LCHDR_LZMA)/src/CpuArch.o OBJS += $(LCHDR_LZMA)/src/Delta.o OBJS += $(LCHDR_LZMA)/src/LzFind.o -OBJS += $(LCHDR_LZMA)/src/Lzma86Dec.o OBJS += $(LCHDR_LZMA)/src/LzmaDec.o OBJS += $(LCHDR_LZMA)/src/LzmaEnc.o OBJS += $(LCHDR_LZMA)/src/Sort.o -$(LCHDR_LZMA)/src/%.o: CFLAGS += -Wno-unused -D_7ZIP_ST -I$(LCHDR_LZMA)/include +$(LCHDR_LZMA)/src/%.o: CFLAGS += -Wno-unused -DZ7_ST -I$(LCHDR_LZMA)/include $(LCHDR)/src/%.o: CFLAGS += -I$(LCHDR_LZMA)/include -OBJS += $(LCHDR_ZSTD)/common/debug.o OBJS += $(LCHDR_ZSTD)/common/entropy_common.o OBJS += $(LCHDR_ZSTD)/common/error_private.o OBJS += $(LCHDR_ZSTD)/common/fse_decompress.o -OBJS += $(LCHDR_ZSTD)/common/pool.o -OBJS += $(LCHDR_ZSTD)/common/threading.o OBJS += $(LCHDR_ZSTD)/common/xxhash.o OBJS += $(LCHDR_ZSTD)/common/zstd_common.o OBJS += $(LCHDR_ZSTD)/decompress/huf_decompress.o +OBJS += $(LCHDR_ZSTD)/decompress/huf_decompress_amd64.o OBJS += $(LCHDR_ZSTD)/decompress/zstd_ddict.o OBJS += $(LCHDR_ZSTD)/decompress/zstd_decompress_block.o OBJS += $(LCHDR_ZSTD)/decompress/zstd_decompress.o $(LCHDR_ZSTD)/common/%.o \ -$(LCHDR_ZSTD)/decompress/%.o: CFLAGS += -DZSTD_DISABLE_ASM -I$(LCHDR_ZSTD) +$(LCHDR_ZSTD)/decompress/%.o: CFLAGS += -I$(LCHDR_ZSTD) $(LCHDR)/src/%.o: CFLAGS += -I$(LCHDR_ZSTD) libpcsxcore/cdriso.o: CFLAGS += -Wno-unused-function CFLAGS += -DHAVE_CHD -I$(LCHDR)/include diff --git a/deps/libchdr b/deps/libchdr index 5c598c2df..86b272076 160000 --- a/deps/libchdr +++ b/deps/libchdr @@ -1 +1 @@ -Subproject commit 5c598c2df3a7717552a76410d79f5af01ff51b1d +Subproject commit 86b272076d542287d3f03952e7d4efe283e815bf From 38c77d7816167f736aa3c2a90c9a5de8b6c618c9 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 29 May 2024 15:46:57 +0200 Subject: [PATCH 539/597] cdrom: Rename "stat" to "cdr_stat" "stat" is a C function from . While libpcsxcore/cdrom.c does not include this header, some of the headers it does include may actually include it themselves (depending on the C library). Work around possible name conflicts by just renaming the offending variable. Signed-off-by: Paul Cercueil --- libpcsxcore/cdrom.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 200dcf98a..4ef0a237a 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -232,7 +232,7 @@ enum drive_state { DRIVESTATE_SEEK, }; -static struct CdrStat stat; +static struct CdrStat cdr_stat; static unsigned int msf2sec(const u8 *msf) { return ((msf[0] * 60 + msf[1]) * 75) + msf[2]; @@ -327,10 +327,10 @@ void cdrLidSeekInterrupt(void) //StopReading(); SetPlaySeekRead(cdr.StatP, 0); - if (CDR_getStatus(&stat) == -1) + if (CDR_getStatus(&cdr_stat) == -1) return; - if (stat.Status & STATUS_SHELLOPEN) + if (cdr_stat.Status & STATUS_SHELLOPEN) { memset(cdr.Prev, 0xff, sizeof(cdr.Prev)); cdr.DriveState = DRIVESTATE_LID_OPEN; @@ -339,8 +339,8 @@ void cdrLidSeekInterrupt(void) break; case DRIVESTATE_LID_OPEN: - if (CDR_getStatus(&stat) == -1) - stat.Status &= ~STATUS_SHELLOPEN; + if (CDR_getStatus(&cdr_stat) == -1) + cdr_stat.Status &= ~STATUS_SHELLOPEN; // 02, 12, 10 if (!(cdr.StatP & STATUS_SHELLOPEN)) { @@ -367,7 +367,7 @@ void cdrLidSeekInterrupt(void) else if (cdr.StatP & STATUS_ROTATING) { cdr.StatP &= ~STATUS_ROTATING; } - else if (!(stat.Status & STATUS_SHELLOPEN)) { + else if (!(cdr_stat.Status & STATUS_SHELLOPEN)) { // closed now CheckCdrom(); @@ -721,8 +721,8 @@ void cdrPlayReadInterrupt(void) static void softReset(void) { - CDR_getStatus(&stat); - if (stat.Status & STATUS_SHELLOPEN) { + CDR_getStatus(&cdr_stat); + if (cdr_stat.Status & STATUS_SHELLOPEN) { cdr.DriveState = DRIVESTATE_LID_OPEN; cdr.StatP = STATUS_SHELLOPEN; } @@ -1188,11 +1188,11 @@ void cdrInterrupt(void) { cdr.Result[3] = 0; // 0x10 - audio | 0x40 - disk missing | 0x80 - unlicensed - if (CDR_getStatus(&stat) == -1 || stat.Type == 0 || stat.Type == 0xff) { + if (CDR_getStatus(&cdr_stat) == -1 || cdr_stat.Type == 0 || cdr_stat.Type == 0xff) { cdr.Result[1] = 0xc0; } else { - if (stat.Type == 2) + if (cdr_stat.Type == 2) cdr.Result[1] |= 0x10; if (CdromId[0] == '\0') cdr.Result[1] |= 0x80; From 49f5a273fd9ee501d5cab2fb554b0a50c243e487 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 30 May 2024 11:52:21 +0200 Subject: [PATCH 540/597] Fix invalid variable types On SH4, uint32_t is "unsigned long" and int32_t is "long"; which means that "int32_t" and "int" pointers cannot be used interchangeably without an explicit cast. Signed-off-by: Paul Cercueil --- libpcsxcore/psxdma.c | 8 ++++++-- plugins/gpulib/gpu.c | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index 55d2a0a77..68b9694f3 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -136,7 +136,9 @@ static u32 gpuDmaChainSize(u32 addr) { void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU u32 *ptr, madr_next, *madr_next_p; u32 words, words_left, words_max, words_copy; - int cycles_sum, cycles_last_cmd = 0, do_walking; + s32 cycles_last_cmd = 0; + int do_walking; + long cycles_sum; madr &= ~3; switch (chcr) { @@ -225,7 +227,9 @@ void gpuInterrupt() { if (HW_DMA2_CHCR == SWAP32(0x01000401) && !(HW_DMA2_MADR & SWAP32(0x800000))) { u32 madr_next = 0xffffff, madr = SWAPu32(HW_DMA2_MADR); - int cycles_sum, cycles_last_cmd = 0; + s32 cycles_last_cmd = 0; + long cycles_sum; + cycles_sum = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, &madr_next, &cycles_last_cmd); HW_DMA2_MADR = SWAPu32(madr_next); diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index b444dcf52..940278741 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -919,8 +919,8 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs) gpu.frameskip.advice = &cbs->fskip_advice; gpu.frameskip.active = 0; gpu.frameskip.frame_ready = 1; - gpu.state.hcnt = cbs->gpu_hcnt; - gpu.state.frame_count = cbs->gpu_frame_count; + gpu.state.hcnt = (uint32_t *)cbs->gpu_hcnt; + gpu.state.frame_count = (uint32_t *)cbs->gpu_frame_count; gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace; gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable; gpu.state.screen_centering_type_default = cbs->screen_centering_type_default; From eedbe278db2e34a61c7beef4692e211facb595a5 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 30 May 2024 11:54:55 +0200 Subject: [PATCH 541/597] spunull: Add missing callback and fix prototypes - Add SPUregisterScheduleCb() which was missing - Fix prototypes of some functions, which did not have the right number of parameters. This caused warnings at the linking phase when building PCSX with LTO. Signed-off-by: Paul Cercueil --- plugins/spunull/spunull.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/plugins/spunull/spunull.c b/plugins/spunull/spunull.c index 7f16ed5ef..38cd44b53 100644 --- a/plugins/spunull/spunull.c +++ b/plugins/spunull/spunull.c @@ -182,7 +182,7 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, unsigned i //////////////////////////////////////////////////////////////////////// -unsigned short CALLBACK SPUreadRegister(unsigned long reg) +unsigned short CALLBACK SPUreadRegister(unsigned long reg, unsigned int cycles) { unsigned long r=reg&0xfff; @@ -280,7 +280,7 @@ void CALLBACK SPUreadDMAMem(unsigned short * pusPSXMem,int iSize,unsigned int cy // XA AUDIO //////////////////////////////////////////////////////////////////////// -void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap) +void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycles, int is_start) { } @@ -352,7 +352,7 @@ void SPUasync(unsigned int cycle, unsigned int flags) { } -int SPUplayCDDAchannel(short *pcm, int nbytes) +int SPUplayCDDAchannel(short *pcm, int nbytes, unsigned int cycle, int is_start) { return -1; } @@ -542,3 +542,7 @@ void CALLBACK SPUstopChannels2(unsigned short channels) { //SoundOff(16,24,channels); } + +void CALLBACK SPUregisterScheduleCb(void (CALLBACK *callback)(unsigned int)) +{ +} From f51cdd352cb53389c9dadd5004c9ebb4f33cea24 Mon Sep 17 00:00:00 2001 From: saulfabreg Wii VC Project Date: Sun, 16 Jun 2024 15:28:41 -0500 Subject: [PATCH 542/597] more timing hacks: fix jump command on Power Rangers - Lightspeed Rescue https://github.com/libretro/pcsx_rearmed/issues/837 --- libpcsxcore/database.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index ef990ac48..c234e54ad 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -120,6 +120,8 @@ cycle_multiplier_overrides[] = { 222, { "SLES01549", "SLES02063", "SLES02064" } }, /* Digimon World */ { 153, { "SLUS01032", "SLES02914" } }, + /* Power Rangers: Lightspeed Rescue - jump does not work with 175 */ + { 222, { "SLUS01114", "SLES03286" } }, /* Syphon Filter - reportedly hangs under unknown conditions */ { 169, { "SCUS94240" } }, /* Psychic Detective - some weird race condition in the game's cdrom code */ From 77d753f4f0e9b9e66347d913be3e6b21cc3a004a Mon Sep 17 00:00:00 2001 From: saulfabreg Wii VC Project Date: Thu, 27 Jun 2024 16:09:13 -0500 Subject: [PATCH 543/597] Update timing (override) hack for Power Rangers: Lightspeed Rescue (SuperrSonic) (#341) * Update timing (override) hack for Power Rangers: Lightspeed Rescue (SuperrSonic) The game's jumping command does not work if the game FPS is over 30. At 280, on level 3 has audio stutters. Needs more testing. SuperrSonic found 310 ideal to maintain the emulation at full speed, at the cost of the game dropping more frames, this allowed jumping to always work and keep the sound stutter free, as it was tested the entire game this way. Credit to SuperrSonic for this fix. * update timing hack description for PRLR (SuperrSonic) --- libpcsxcore/database.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index c234e54ad..159b7e4f3 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -120,8 +120,8 @@ cycle_multiplier_overrides[] = { 222, { "SLES01549", "SLES02063", "SLES02064" } }, /* Digimon World */ { 153, { "SLUS01032", "SLES02914" } }, - /* Power Rangers: Lightspeed Rescue - jump does not work with 175 */ - { 222, { "SLUS01114", "SLES03286" } }, + /* Power Rangers: Lightspeed Rescue - jumping fails if FPS is over 30 */ + { 310, { "SLUS01114", "SLES03286" } }, /* Syphon Filter - reportedly hangs under unknown conditions */ { 169, { "SCUS94240" } }, /* Psychic Detective - some weird race condition in the game's cdrom code */ From e1fbc1a86d39bdf445b360c7cb41f4946de73203 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 29 Jun 2024 01:02:05 +0300 Subject: [PATCH 544/597] standalone: some ppf loading mostly for testing, like game.chd.ppf would load game.chd and apply game.chd.ppf patch --- frontend/menu.c | 19 +++++++++++++++++-- libpcsxcore/misc.c | 2 +- libpcsxcore/ppf.c | 43 +++++++++++++++++++++++-------------------- libpcsxcore/ppf.h | 2 +- 4 files changed, 42 insertions(+), 24 deletions(-) diff --git a/frontend/menu.c b/frontend/menu.c index 39f0a6972..eb3237e3a 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -40,6 +40,7 @@ #include "../libpcsxcore/cdrom.h" #include "../libpcsxcore/cdriso.h" #include "../libpcsxcore/cheat.h" +#include "../libpcsxcore/ppf.h" #include "../libpcsxcore/new_dynarec/new_dynarec.h" #include "../plugins/dfsound/spu_config.h" #include "psemu_plugin_defs.h" @@ -740,7 +741,7 @@ static const char *filter_exts[] = { #ifdef HAVE_CHD "chd", #endif - "bz", "znx", "pbp", "cbn", NULL + "bz", "znx", "pbp", "cbn", "ppf", NULL }; // rrrr rggg gggb bbbb @@ -2157,6 +2158,18 @@ static int run_exe(void) static int run_cd_image(const char *fname) { int autoload_state = g_autostateld_opt; + size_t fname_len = strlen(fname); + const char *ppfname = NULL; + char fname2[256]; + + // simle ppf handling, like game.chd.ppf + if (4 < fname_len && fname_len < sizeof(fname2) + && strcasecmp(fname + fname_len - 4, ".ppf") == 0) { + memcpy(fname2, fname, fname_len - 4); + fname2[fname_len - 4] = 0; + ppfname = fname; + fname = fname2; + } ready_to_go = 0; reload_plugins(fname); @@ -2170,6 +2183,8 @@ static int run_cd_image(const char *fname) menu_update_msg("unsupported/invalid CD image"); return -1; } + if (ppfname) + BuildPPFCache(ppfname); SysReset(); @@ -2185,7 +2200,7 @@ static int run_cd_image(const char *fname) if (autoload_state) { unsigned int newest = 0; - int time, slot, newest_slot = -1; + int time = 0, slot, newest_slot = -1; for (slot = 0; slot < 10; slot++) { if (emu_check_save_file(slot, &time)) { diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 47a32cce2..aafe52217 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -466,7 +466,7 @@ int CheckCdrom() { Apply_Hacks_Cdrom(); - BuildPPFCache(); + BuildPPFCache(NULL); return 0; } diff --git a/libpcsxcore/ppf.c b/libpcsxcore/ppf.c index f37687cc0..6a88e0536 100644 --- a/libpcsxcore/ppf.c +++ b/libpcsxcore/ppf.c @@ -181,7 +181,7 @@ static void AddToPPF(s32 ladr, s32 pos, s32 anz, unsigned char *ppfmem) { } } -void BuildPPFCache() { +void BuildPPFCache(const char *fname) { FILE *ppffile; char buffer[12]; char method, undo = 0, blockcheck = 0; @@ -196,23 +196,25 @@ void BuildPPFCache() { if (CdromId[0] == '\0') return; - // Generate filename in the format of SLUS_123.45 - buffer[0] = toupper(CdromId[0]); - buffer[1] = toupper(CdromId[1]); - buffer[2] = toupper(CdromId[2]); - buffer[3] = toupper(CdromId[3]); - buffer[4] = '_'; - buffer[5] = CdromId[4]; - buffer[6] = CdromId[5]; - buffer[7] = CdromId[6]; - buffer[8] = '.'; - buffer[9] = CdromId[7]; - buffer[10] = CdromId[8]; - buffer[11] = '\0'; - - sprintf(szPPF, "%s%s", Config.PatchesDir, buffer); - - ppffile = fopen(szPPF, "rb"); + if (!fname) { + // Generate filename in the format of SLUS_123.45 + buffer[0] = toupper(CdromId[0]); + buffer[1] = toupper(CdromId[1]); + buffer[2] = toupper(CdromId[2]); + buffer[3] = toupper(CdromId[3]); + buffer[4] = '_'; + buffer[5] = CdromId[4]; + buffer[6] = CdromId[5]; + buffer[7] = CdromId[6]; + buffer[8] = '.'; + buffer[9] = CdromId[7]; + buffer[10] = CdromId[8]; + buffer[11] = '\0'; + + sprintf(szPPF, "%s%s", Config.PatchesDir, buffer); + fname = szPPF; + } + ppffile = fopen(fname, "rb"); if (ppffile == NULL) return; memset(buffer, 0, 5); @@ -220,7 +222,7 @@ void BuildPPFCache() { goto fail_io; if (strcmp(buffer, "PPF") != 0) { - SysPrintf(_("Invalid PPF patch: %s.\n"), szPPF); + SysPrintf(_("Invalid PPF patch: %s.\n"), fname); fclose(ppffile); return; } @@ -346,7 +348,8 @@ void BuildPPFCache() { FillPPFCache(); // build address array - SysPrintf(_("Loaded PPF %d.0 patch: %s.\n"), method + 1, szPPF); + SysPrintf(_("Loaded PPF %d.0 patch: %s.\n"), method + 1, fname); + return; fail_io: #ifndef NDEBUG diff --git a/libpcsxcore/ppf.h b/libpcsxcore/ppf.h index a1b147510..e646e554f 100644 --- a/libpcsxcore/ppf.h +++ b/libpcsxcore/ppf.h @@ -23,7 +23,7 @@ extern "C" { #endif -void BuildPPFCache(); +void BuildPPFCache(const char *fname); void FreePPFCache(); void CheckPPFCache(unsigned char *pB, unsigned char m, unsigned char s, unsigned char f); From f9ffa42ca29c5d050e80b986c00b2d783b9731a2 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 29 Jun 2024 01:08:41 +0300 Subject: [PATCH 545/597] try some overscan display option libretro/pcsx_rearmed#313 --- frontend/libretro.c | 12 ++++++++++++ frontend/libretro_core_options.h | 15 +++++++++++++++ frontend/menu.c | 4 ++++ frontend/plugin_lib.h | 1 + plugins/gpulib/gpu.c | 13 ++++++++++--- plugins/gpulib/gpu.h | 1 + 6 files changed, 43 insertions(+), 3 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 7ad4caa8a..4eb5caba3 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2759,6 +2759,18 @@ static void update_variables(bool in_flight) pl_rearmed_cbs.screen_centering_y = atoi(var.value); } + var.value = NULL; + var.key = "pcsx_rearmed_show_overscan"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "auto") == 0) + pl_rearmed_cbs.show_overscan = 1; + else if (strcmp(var.value, "hack") == 0) + pl_rearmed_cbs.show_overscan = 2; + else + pl_rearmed_cbs.show_overscan = 0; + } + #ifdef THREAD_RENDERING var.key = "pcsx_rearmed_gpu_thread_rendering"; var.value = NULL; diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index e79e90275..e72ddb5c2 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -453,6 +453,21 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "auto", }, + { + "pcsx_rearmed_show_overscan", + "(GPU) Show horizontal overscan", + NULL, + "The PSX can display graphics way into the horizontal borders, even if most screens would crop it. This option tries to display all such graphics. Note that this may result in unusual resolutions that your device might not handle well. The 'Hack' option is intended for the widescreen hacks.", + NULL, + "video", + { + { "disabled", NULL }, + { "auto", "Auto" }, + { "hack", "Hack" }, + { NULL, NULL }, + }, + "disabled", + }, { "pcsx_rearmed_screen_centering", "(GPU) Screen centering", diff --git a/frontend/menu.c b/frontend/menu.c index eb3237e3a..9b9af7c87 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -93,6 +93,7 @@ typedef enum MA_OPT_SCANLINES, MA_OPT_SCANLINE_LEVEL, MA_OPT_CENTERING, + MA_OPT_OVERSCAN, } menu_id; static int last_vout_w, last_vout_h, last_vout_bpp; @@ -467,6 +468,7 @@ static const struct { CE_INTVAL_P(screen_centering_type), CE_INTVAL_P(screen_centering_x), CE_INTVAL_P(screen_centering_y), + CE_INTVAL_P(show_overscan), CE_INTVAL(spu_config.iUseReverb), CE_INTVAL(spu_config.iXAPitch), CE_INTVAL(spu_config.iUseInterpolation), @@ -1280,6 +1282,7 @@ static const char *men_soft_filter[] = { "None", NULL }; static const char *men_dummy[] = { NULL }; static const char *men_centering[] = { "Auto", "Ingame", "Borderless", "Force", NULL }; +static const char *men_overscan[] = { "OFF", "Auto", "Hack", NULL }; static const char h_scaler[] = "int. 2x - scales w. or h. 2x if it fits on screen\n" "int. 4:3 - uses integer if possible, else fractional"; static const char h_cscaler[] = "Displays the scaler layer, you can resize it\n" @@ -1376,6 +1379,7 @@ static int menu_loop_cscaler(int id, int keys) static menu_entry e_menu_gfx_options[] = { mee_enum ("Screen centering", MA_OPT_CENTERING, pl_rearmed_cbs.screen_centering_type, men_centering), + mee_enum ("Show overscan", MA_OPT_OVERSCAN, pl_rearmed_cbs.show_overscan, men_overscan), mee_enum_h ("Scaler", MA_OPT_VARSCALER, g_scaler, men_scaler, h_scaler), mee_enum ("Video output mode", MA_OPT_VOUT_MODE, plat_target.vout_method, men_dummy), mee_onoff ("Software Scaling", MA_OPT_SCALER2, soft_scaling, 1), diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index 7879e70ba..71fcd3fc0 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -115,6 +115,7 @@ struct rearmed_cbs { int screen_centering_type_default; int screen_centering_x; int screen_centering_y; + int show_overscan; }; extern struct rearmed_cbs pl_rearmed_cbs; diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 940278741..f48a53572 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -89,6 +89,11 @@ static noinline void update_width(void) x = (x + 1) & ~1; // blitter limitation sw /= hdiv; sw = (sw + 2) & ~3; // according to nocash + + if (gpu.state.show_overscan == 2) // widescreen hack + sw = (sw + 63) & ~63; + if (gpu.state.show_overscan && sw >= hres) + x = 0, hres = sw; switch (type) { case C_INGAME: break; @@ -113,8 +118,8 @@ static noinline void update_width(void) gpu.screen.w = sw; gpu.screen.hres = hres; gpu.state.dims_changed = 1; - //printf("xx %d %d -> %2d, %d / %d\n", - // gpu.screen.x1, gpu.screen.x2, x, sw, hres); + //printf("xx %d %d (%d) -> %2d, %d / %d\n", gpu.screen.x1, + // gpu.screen.x2, gpu.screen.x2 - gpu.screen.x1, x, sw, hres); } static noinline void update_height(void) @@ -926,10 +931,12 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs) gpu.state.screen_centering_type_default = cbs->screen_centering_type_default; if (gpu.state.screen_centering_type != cbs->screen_centering_type || gpu.state.screen_centering_x != cbs->screen_centering_x - || gpu.state.screen_centering_y != cbs->screen_centering_y) { + || gpu.state.screen_centering_y != cbs->screen_centering_y + || gpu.state.show_overscan != cbs->show_overscan) { gpu.state.screen_centering_type = cbs->screen_centering_type; gpu.state.screen_centering_x = cbs->screen_centering_x; gpu.state.screen_centering_y = cbs->screen_centering_y; + gpu.state.show_overscan = cbs->show_overscan; update_width(); update_height(); } diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 96a818370..fb5c4ff1f 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -81,6 +81,7 @@ struct psx_gpu { uint32_t downscale_enable:1; uint32_t downscale_active:1; uint32_t dims_changed:1; + uint32_t show_overscan:2; uint32_t *frame_count; uint32_t *hcnt; /* hsync count */ struct { From 1fea3be9aa7061f82f0c7d1ba3ae693027444263 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 30 Jun 2024 01:48:22 +0300 Subject: [PATCH 546/597] gpu_neon: fix sign extension unclear why it's added differently from sprites, might need to revisit libretro/pcsx_rearmed#833 --- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index d81b70785..53f33e4c5 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -200,21 +200,14 @@ static void do_fill(psx_gpu_struct *psx_gpu, u32 x, u32 y, } } -#define sign_extend_12bit(value) \ - (((s32)((value) << 20)) >> 20) \ - #define sign_extend_11bit(value) \ (((s32)((value) << 21)) >> 21) \ -#define sign_extend_10bit(value) \ - (((s32)((value) << 22)) >> 22) \ - - #define get_vertex_data_xy(vertex_number, offset16) \ vertexes[vertex_number].x = \ - sign_extend_12bit(list_s16[offset16]) + psx_gpu->offset_x; \ + sign_extend_11bit(list_s16[offset16]) + psx_gpu->offset_x; \ vertexes[vertex_number].y = \ - sign_extend_12bit(list_s16[(offset16) + 1]) + psx_gpu->offset_y; \ + sign_extend_11bit(list_s16[(offset16) + 1]) + psx_gpu->offset_y; \ #define get_vertex_data_uv(vertex_number, offset16) \ vertexes[vertex_number].u = list_s16[offset16] & 0xFF; \ @@ -1746,10 +1739,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, case 0xE5: { - s32 offset_x = list[0] << 21; - s32 offset_y = list[0] << 10; - psx_gpu->offset_x = offset_x >> 21; - psx_gpu->offset_y = offset_y >> 21; + psx_gpu->offset_x = sign_extend_11bit(list[0]); + psx_gpu->offset_y = sign_extend_11bit(list[0] >> 11); SET_Ex(5, list[0]); break; From 52fb89090faee4fba571004583943e0c32c57216 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 30 Jun 2024 02:23:25 +0300 Subject: [PATCH 547/597] gpu_neon: revive the old tests --- plugins/gpu_neon/psx_gpu/psx_gpu_main.c | 15 ++++++++------- plugins/gpu_neon/psx_gpu/tests/Makefile | 5 ++++- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c index 435c51a2e..5f1f38348 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c @@ -136,7 +136,8 @@ int main(int argc, char *argv[]) FILE *list_file; u32 no_display = 0; s32 dummy0 = 0; - u32 dummy1 = 0; + s32 dummy1 = 0; + u32 dummy2 = 0; if((argc != 3) && (argc != 4)) { @@ -180,7 +181,7 @@ int main(int argc, char *argv[]) u32 fbdev_handle = open("/dev/fb1", O_RDWR); vram_ptr = (mmap((void *)0x50000000, 1024 * 1024 * 2, PROT_READ | PROT_WRITE, MAP_SHARED | 0xA0000000, fbdev_handle, 0)); -#elif 1 +#elif 0 #ifndef MAP_HUGETLB #define MAP_HUGETLB 0x40000 /* arch specific */ #endif @@ -211,23 +212,23 @@ int main(int argc, char *argv[]) clear_stats(); -#ifdef NEON_BUILD +#ifdef CYCLE_COUNTER init_counter(); #endif - gpu_parse(psx_gpu, list, size, &dummy0, &dummy1); + gpu_parse(psx_gpu, list, size, &dummy0, &dummy1, &dummy2); flush_render_block_buffer(psx_gpu); clear_stats(); -#ifdef NEON_BUILD +#ifdef CYCLE_COUNTER u32 cycles = get_counter(); #endif - gpu_parse(psx_gpu, list, size, &dummy0, &dummy1); + gpu_parse(psx_gpu, list, size, &dummy0, &dummy1, &dummy2); flush_render_block_buffer(psx_gpu); -#ifdef NEON_BUILD +#ifdef CYCLE_COUNTER u32 cycles_elapsed = get_counter() - cycles; printf("%-64s: %d\n", argv[1], cycles_elapsed); diff --git a/plugins/gpu_neon/psx_gpu/tests/Makefile b/plugins/gpu_neon/psx_gpu/tests/Makefile index bb91a5a88..21d615589 100644 --- a/plugins/gpu_neon/psx_gpu/tests/Makefile +++ b/plugins/gpu_neon/psx_gpu/tests/Makefile @@ -4,7 +4,7 @@ CFLAGS += -Wall -ggdb CFLAGS += -fno-strict-aliasing CFLAGS += `sdl-config --cflags` -LDFLAGS += `sdl-config --libs` +LDLIBS += `sdl-config --libs` VPATH += .. @@ -12,6 +12,9 @@ ifdef NEON CFLAGS += -mcpu=cortex-a8 -mfpu=neon -DNEON_BUILD ASFLAGS = $(CFLAGS) OBJ += psx_gpu_arm_neon.o +else +CFLAGS += -DNEON_BUILD -DSIMD_BUILD +OBJ += psx_gpu_simd.o endif ifndef DEBUG CFLAGS += -O2 -DNDEBUG From 22af444c0561326b82d64506d326035ade1417fc Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 25 Jul 2024 02:34:23 +0300 Subject: [PATCH 548/597] cdrom: more hacks for more timing issues libretro/pcsx_rearmed#840 --- libpcsxcore/cdrom.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 4ef0a237a..335c2dc58 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -835,9 +835,11 @@ void cdrInterrupt(void) { { for (i = 0; i < 3; i++) set_loc[i] = btoi(cdr.Param[i]); - cdr.RetryDetected = msfiEq(cdr.SetSector, set_loc) - && !cdr.SetlocPending; - //cdr.RetryDetected |= msfiEq(cdr.Param, cdr.Transfer); + if ((msfiEq(cdr.SetSector, set_loc)) //|| msfiEq(cdr.Param, cdr.Transfer)) + && !cdr.SetlocPending) + cdr.RetryDetected++; + else + cdr.RetryDetected = 0; memcpy(cdr.SetSector, set_loc, 3); cdr.SetSector[3] = 0; cdr.SetlocPending = 1; @@ -991,6 +993,7 @@ void cdrInterrupt(void) { Hokuto no Ken 2 InuYasha - Feudal Fairy Tale Dance Dance Revolution Konamix + Digimon Rumble Arena ... */ if (!(cdr.StatP & (STATUS_PLAY | STATUS_READ))) @@ -999,7 +1002,9 @@ void cdrInterrupt(void) { } else { - second_resp_time = 2 * 1097107; + second_resp_time = 2100011; + // a hack to try to avoid weird cmd vs irq1 races causing games to retry + second_resp_time += (cdr.RetryDetected & 15) * 100001; } SetPlaySeekRead(cdr.StatP, 0); DriveStateOld = cdr.DriveState; From 5f71e0dec57d1902b100511b7daec2a00f76ab76 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 16 Aug 2024 03:05:46 +0300 Subject: [PATCH 549/597] avoid double prints with bios+Config.PsxOut --- libpcsxcore/psxbios.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 1f2e37a23..64a04b853 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -3615,7 +3615,8 @@ void psxBiosInit() { biosA0[0x03] = biosB0[0x35] = psxBios_write_psxout; biosA0[0x3c] = biosB0[0x3d] = psxBios_putchar_psxout; biosA0[0x3e] = biosB0[0x3f] = psxBios_puts_psxout; - biosA0[0x3f] = psxBios_printf_psxout; + // calls putchar() internally so no need to override + //biosA0[0x3f] = psxBios_printf_psxout; if (!Config.HLE) { char verstr[0x24+1]; From 31a12b073905a7e4cd8f83d3d45e50a130af5cec Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 17 Aug 2024 01:44:00 +0300 Subject: [PATCH 550/597] gpulib: adjust masking no idea if anything uses this, but tested on hw --- plugins/gpulib/gpu.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index f48a53572..54bf63e69 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -370,21 +370,31 @@ const unsigned char cmd_lengths[256] = #define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)] -static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb) +// this isn't very useful so should be rare +static void cpy_mask(uint16_t *dst, const uint16_t *src, int l, uint32_t r6) { int i; - for (i = 0; i < l; i++) - dst[i] = src[i] | msb; + if (r6 == 1) { + for (i = 0; i < l; i++) + dst[i] = src[i] | 0x8000; + } + else { + uint16_t msb = r6 << 15; + for (i = 0; i < l; i++) { + uint16_t mask = (int16_t)dst[i] >> 15; + dst[i] = (dst[i] & mask) | ((src[i] | msb) & ~mask); + } + } } static inline void do_vram_line(int x, int y, uint16_t *mem, int l, - int is_read, uint16_t msb) + int is_read, uint32_t r6) { uint16_t *vram = VRAM_MEM_XY(x, y); if (unlikely(is_read)) memcpy(mem, vram, l * 2); - else if (unlikely(msb)) - cpy_msb(vram, mem, l, msb); + else if (unlikely(r6)) + cpy_mask(vram, mem, l, r6); else memcpy(vram, mem, l * 2); } @@ -392,7 +402,7 @@ static inline void do_vram_line(int x, int y, uint16_t *mem, int l, static int do_vram_io(uint32_t *data, int count, int is_read) { int count_initial = count; - uint16_t msb = gpu.ex_regs[6] << 15; + uint32_t r6 = gpu.ex_regs[6] & 3; uint16_t *sdata = (uint16_t *)data; int x = gpu.dma.x, y = gpu.dma.y; int w = gpu.dma.w, h = gpu.dma.h; @@ -405,7 +415,7 @@ static int do_vram_io(uint32_t *data, int count, int is_read) if (count < l) l = count; - do_vram_line(x + o, y, sdata, l, is_read, msb); + do_vram_line(x + o, y, sdata, l, is_read, r6); if (o + l < w) o += l; @@ -420,13 +430,13 @@ static int do_vram_io(uint32_t *data, int count, int is_read) for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) { y &= 511; - do_vram_line(x, y, sdata, w, is_read, msb); + do_vram_line(x, y, sdata, w, is_read, r6); } if (h > 0) { if (count > 0) { y &= 511; - do_vram_line(x, y, sdata, count, is_read, msb); + do_vram_line(x, y, sdata, count, is_read, r6); o = count; count = 0; } From 03eb3b69632444da8993e70eea740d3ba6372162 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 17 Aug 2024 01:38:45 +0300 Subject: [PATCH 551/597] gpu_neon: fix wrong mask bit for fills Fixes notaz/pcsx_rearmed#344 --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 6 +-- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 46 +-------------------- 2 files changed, 3 insertions(+), 49 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index b671a757b..a59e9cdc8 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -4810,8 +4810,7 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, u32 r = color & 0xFF; u32 g = (color >> 8) & 0xFF; u32 b = (color >> 16) & 0xFF; - u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) | - psx_gpu->mask_msb; + u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10); u32 color_32bpp = color_16bpp | (color_16bpp << 16); u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024)); @@ -4863,8 +4862,7 @@ void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, u32 r = color & 0xFF; u32 g = (color >> 8) & 0xFF; u32 b = (color >> 16) & 0xFF; - u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) | - psx_gpu->mask_msb; + u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10); u32 color_32bpp = color_16bpp | (color_16bpp << 16); u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024)); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index ffbea043c..1ba562b53 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -4386,51 +4386,6 @@ function(warmup) #undef vram_ptr #undef color -#undef width -#undef height -#undef pitch - -#define vram_ptr r0 -#define color r1 -#define width r2 -#define height r3 - -#define pitch r1 - -#define num_width r12 - -#undef colors_a -#undef colors_b - -#define colors_a q0 -#define colors_b q1 - -.align 3 - -function(render_block_fill_body) - vdup.u16 colors_a, color - mov pitch, #2048 - - vmov colors_b, colors_a - sub pitch, pitch, width, lsl #1 - - mov num_width, width - - 0: - vst1.u32 { colors_a, colors_b }, [vram_ptr, :256]! - - subs num_width, num_width, #16 - bne 0b - - add vram_ptr, vram_ptr, pitch - mov num_width, width - - subs height, height, #1 - bne 0b - - bx lr - - #undef x #undef y #undef width @@ -4523,6 +4478,7 @@ function(render_block_fill_body) #define texels_wide_high d15 #define texels_wide q7 +.align 3 setup_sprite_flush_blocks: vpush { q1 - q5 } From 8489010bfce4a5a051b215523d6e41189d742e4c Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 22 Aug 2024 00:38:45 +0300 Subject: [PATCH 552/597] cdriso: log main cd img size --- libpcsxcore/cdriso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index cd2d20262..243c03621 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -1457,7 +1457,7 @@ static long CALLBACK ISOopen(void) { } } - SysPrintf("%s.\n", image_str); + SysPrintf("%s (%lld bytes).\n", image_str, (long long)size_main); PrintTracks(); From ee060c582cd44ba9cda6626466414c318a09b697 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 20 Aug 2024 23:49:45 +0300 Subject: [PATCH 553/597] gpu_neon: rework enh. res. texturing hack libretro/pcsx_rearmed#841 --- plugins/gpu_neon/psx_gpu/common.h | 2 - plugins/gpu_neon/psx_gpu/psx_gpu.c | 63 +++++++++++- plugins/gpu_neon/psx_gpu/psx_gpu.h | 13 ++- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 59 +++++++++++- plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h | 1 + .../gpu_neon/psx_gpu/psx_gpu_offsets_update.c | 1 + plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 95 +++++-------------- plugins/gpu_neon/psx_gpu/psx_gpu_simd.c | 36 +++++++ 8 files changed, 191 insertions(+), 79 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/common.h b/plugins/gpu_neon/psx_gpu/common.h index 820dfbefd..5881e2a0e 100644 --- a/plugins/gpu_neon/psx_gpu/common.h +++ b/plugins/gpu_neon/psx_gpu/common.h @@ -9,7 +9,5 @@ #include "vector_types.h" #include "psx_gpu.h" -#define unlikely(x) __builtin_expect((x), 0) - #endif diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index a59e9cdc8..19f1c199b 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -560,8 +560,9 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu) y##set##_b.e[1] = vertex->b \ -void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, - vertex_struct *b, vertex_struct *c) +void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, + const vertex_struct * __restrict__ a, const vertex_struct * __restrict__ b, + const vertex_struct * __restrict__ c) { u32 triangle_area = psx_gpu->triangle_area; u32 winding_mask_scalar; @@ -1163,6 +1164,8 @@ static void setup_spans_debug_check(psx_gpu_struct *psx_gpu, setup_spans_set_x4(alternate, down, alternate_active); \ height -= 4; \ } while(height > 0); \ + if (psx_gpu->hacks_active & (AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V)) \ + span_uvrg_offset[height - 1].low = span_uvrg_offset[height - 2].low; \ } \ @@ -1216,6 +1219,8 @@ static void setup_spans_debug_check(psx_gpu_struct *psx_gpu, setup_spans_set_x4(alternate, up, alternate_active); \ height -= 4; \ } \ + if (psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_V) \ + psx_gpu->span_uvrg_offset[0].low = psx_gpu->span_uvrg_offset[1].low; \ } \ #define index_left 0 @@ -1452,6 +1457,11 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_spans_set_x4(none, down, no); height_minor_b -= 4; } + if (psx_gpu->hacks_active & (AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V)) + { + span_uvrg_offset[height_minor_b - 1].low = + span_uvrg_offset[height_minor_b - 2].low; + } } left_split_triangles++; @@ -1459,6 +1469,41 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, #endif +// this is some hacky mess, can this be improved somehow? +// ideally change things to not have to do this hack at all +void __attribute__((noinline)) +setup_blocks_uv_adj_hack(psx_gpu_struct *psx_gpu, block_struct *block, + edge_data_struct *span_edge_data, vec_4x32u *span_uvrg_offset) +{ + size_t span_i = span_uvrg_offset - psx_gpu->span_uvrg_offset; + if (span_i != 0 && span_i != psx_gpu->num_spans - 1 + && !(psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_U)) + return; + u32 num_blocks = span_edge_data->num_blocks - 1; + s32 offset = __builtin_ctz(span_edge_data->right_mask | 0x100) - 1; + s32 toffset = 8 * num_blocks + offset - 1; + if (toffset < 0 && !(psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_U)) + return; + + toffset += span_edge_data->left_x; + s32 u_dx = psx_gpu->uvrg_dx.low.e[0]; + s32 v_dx = psx_gpu->uvrg_dx.low.e[1]; + u32 u = span_uvrg_offset->low.e[0]; + u32 v = span_uvrg_offset->low.e[1]; + u += u_dx * toffset; + v += v_dx * toffset; + u = (u >> 16) & psx_gpu->texture_mask_width; + v = (v >> 16) & psx_gpu->texture_mask_height; + if (!(psx_gpu->render_state_base & (TEXTURE_MODE_16BPP << 8))) { + // 4bpp 8bpp are swizzled + u32 u_ = u; + u = (u & 0x0f) | ((v & 0x0f) << 4); + v = (v & 0xf0) | (u_ >> 4); + } + assert(offset >= 0); + //assert(block->uv.e[offset] == ((v << 8) | u)); + block->uv.e[offset] = (v << 8) | u; +} #define dither_table_entry_normal(value) \ (value) \ @@ -1868,6 +1913,14 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, #define setup_blocks_store_draw_mask_untextured_direct(_block, bits) \ +#define setup_blocks_uv_adj_hack_untextured(_block, edge_data, uvrg_offset) \ + +#define setup_blocks_uv_adj_hack_textured(_block, edge_data, uvrg_offset) \ +{ \ + u32 m_ = AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V; \ + if (unlikely(psx_gpu->hacks_active & m_)) \ + setup_blocks_uv_adj_hack(psx_gpu, _block, edge_data, uvrg_offset); \ +} \ #define setup_blocks_add_blocks_indirect() \ num_blocks += span_num_blocks; \ @@ -1938,6 +1991,8 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \ setup_blocks_store_##shading##_##texturing(sw, dithering, target, edge); \ setup_blocks_store_draw_mask_##texturing##_##target(block, \ span_edge_data->right_mask); \ + setup_blocks_uv_adj_hack_##texturing(block, span_edge_data, \ + span_uvrg_offset); \ \ block++; \ } \ @@ -5016,8 +5071,10 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN; psx_gpu->saved_hres = 256; + psx_gpu->hacks_active = 0; - // check some offset + // check some offsets, asm relies on these + psx_gpu->reserved_a[(offsetof(psx_gpu_struct, test_mask) == 0) - 1] = 0; psx_gpu->reserved_a[(offsetof(psx_gpu_struct, blocks) == psx_gpu_blocks_offset) - 1] = 0; } diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 2539521bf..f65351cf2 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -21,10 +21,17 @@ #define SPAN_DATA_BLOCKS_SIZE 32 +#define AHACK_TEXTURE_ADJ_U (1 << 0) +#define AHACK_TEXTURE_ADJ_V (1 << 1) + #ifndef __ASSEMBLER__ #include "vector_types.h" +#ifndef unlikely +#define unlikely(x) __builtin_expect((x), 0) +#endif + typedef enum { PRIMITIVE_TYPE_TRIANGLE = 0, @@ -189,6 +196,7 @@ typedef struct // enhancement stuff u16 *enhancement_buf_ptr; // main alloc u16 *enhancement_current_buf_ptr; // offset into above, 4 bufs + u32 hacks_active; // AHACK_TEXTURE_ADJ_U ... u32 saved_hres; s16 saved_viewport_start_x; s16 saved_viewport_start_y; @@ -205,7 +213,7 @@ typedef struct // Align up to 64 byte boundary to keep the upcoming buffers cache line // aligned, also make reachable with single immediate addition - u8 reserved_a[184 + 9*4 - 9*sizeof(void *)]; + u8 reserved_a[184 + 8*4 - 9*sizeof(void *)]; // 8KB block_struct blocks[MAX_BLOCKS_PER_ROW]; @@ -256,6 +264,9 @@ u32 texture_region_mask(s32 x1, s32 y1, s32 x2, s32 y2); void update_texture_8bpp_cache(psx_gpu_struct *psx_gpu); void flush_render_block_buffer(psx_gpu_struct *psx_gpu); +void setup_blocks_uv_adj_hack(psx_gpu_struct *psx_gpu, block_struct *block, + edge_data_struct *span_edge_data, vec_4x32u *span_uvrg_offset); + void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram); u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 1ba562b53..827388551 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -223,6 +223,7 @@ #ifdef __MACH__ #define flush_render_block_buffer _flush_render_block_buffer #define update_texture_8bpp_cache _update_texture_8bpp_cache +#define setup_blocks_uv_adj_hack _setup_blocks_uv_adj_hack #endif @ r0: psx_gpu @@ -543,6 +544,7 @@ function(compute_all_gradients) #define uvrg q14 #define uvrg_dy q15 +#define uv d28 #define alternate_x_16 d4 @@ -925,6 +927,14 @@ function(compute_all_gradients) subs height, height, #4; \ bhi 2b; \ \ + nop; \ + ldr temp, [psx_gpu, #psx_gpu_hacks_active_offset]; \ + tst temp, #(AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V); \ + beq 1f; \ + add temp, span_uvrg_offset, height, lsl #4; \ + vldr uv, [temp, #(-16*2)]; \ + vstr uv, [temp, #(-16)]; \ + \ 1: \ @@ -986,6 +996,14 @@ function(compute_all_gradients) subs height, height, #4; \ bhi 2b; \ \ + nop; \ + ldr temp, [psx_gpu, #psx_gpu_hacks_active_offset]; \ + tst temp, #AHACK_TEXTURE_ADJ_V; \ + beq 1f; \ + add temp, psx_gpu, #psx_gpu_span_uvrg_offset_offset; \ + vldr uv, [temp, #16]; \ + vstr uv, [temp, #0]; \ + \ 1: \ @@ -1216,6 +1234,14 @@ function(setup_spans_up_down) subs height_minor_b, height_minor_b, #4 bhi 2b + nop + ldr temp, [psx_gpu, #psx_gpu_hacks_active_offset] + tst temp, #(AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V) + beq 1f + add temp, span_uvrg_offset, height, lsl #4 + vldr uv, [temp, #(-16*2)] + vstr uv, [temp, #(-16)] + 1: setup_spans_epilogue() @@ -1256,6 +1282,7 @@ function(setup_spans_up_down) #define uvrg_dx_ptr r2 #define texture_mask_ptr r3 +#define hacks_active r6 #define dither_shift r8 #define dither_row r10 @@ -1273,6 +1300,7 @@ function(setup_spans_up_down) #define color_b r5 #undef uvrg +#undef uv #define u_block q0 #define v_block q1 @@ -1350,6 +1378,26 @@ function(setup_spans_up_down) #define setup_blocks_texture_unswizzled() \ +#define setup_blocks_uv_adj_hack_textured(hacks_active) \ + tst hacks_active, #(AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V); \ + beq 91f; \ + /* see flush_render_block_buffer below for a reg saving note */ \ + vpush { texture_mask }; \ + vpush { uvrg_dx4 }; \ + \ + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ + mov r12, span_uvrg_offset; \ + sub r1, block_ptr_a, #64; \ + mov r2, span_edge_data; \ + mov r3, r12; \ + bl setup_blocks_uv_adj_hack; /* psx_gpu=r0 */ \ + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ + \ + vpop { uvrg_dx4 }; \ + vpop { texture_mask }; \ + vadd.u32 uvrg_dx8, uvrg_dx4, uvrg_dx4; \ +91: \ + #define setup_blocks_shaded_textured_builder(swizzling) \ .align 3; \ @@ -1575,6 +1623,7 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ vld1.u32 { test_mask }, [psx_gpu, :128]; \ vdup.u8 draw_mask, right_mask; \ \ + ldr hacks_active, [psx_gpu, #psx_gpu_hacks_active_offset]; \ vmov.u32 fb_mask_ptrs[0], right_mask; \ vtst.u16 draw_mask, draw_mask, test_mask; \ vzip.u8 u_whole_8, v_whole_8; \ @@ -1585,6 +1634,8 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ vst1.u32 { dither_offsets }, [block_ptr_b, :128], c_32; \ vst1.u32 { b_whole_8, fb_mask_ptrs }, [block_ptr_a, :128], c_32; \ \ + setup_blocks_uv_adj_hack_textured(hacks_active); \ + \ 1: \ add span_uvrg_offset, span_uvrg_offset, #16; \ add span_b_offset, span_b_offset, #4; \ @@ -1599,7 +1650,8 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ ldmia sp!, { r4 - r11, pc }; \ \ 2: \ - /* TODO: Load from psx_gpu instead of saving/restoring these */\ + /* this callee-save reg saving may look unnecessary but it actually is */ \ + /* because the callee violates the ABI */ \ vpush { texture_mask }; \ vpush { uvrg_dx4 }; \ \ @@ -1776,6 +1828,7 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ vld1.u32 { test_mask }, [psx_gpu, :128]; \ vdup.u8 draw_mask, right_mask; \ \ + ldr hacks_active, [psx_gpu, #psx_gpu_hacks_active_offset]; \ vmov.u32 fb_mask_ptrs[0], right_mask; \ vtst.u16 draw_mask, draw_mask, test_mask; \ vzip.u8 u_whole_8, v_whole_8; \ @@ -1786,6 +1839,8 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ vst1.u32 { dither_offsets }, [block_ptr_b, :128], c_32; \ vst1.u32 { b_whole_8, fb_mask_ptrs }, [block_ptr_a, :128], c_32; \ \ + setup_blocks_uv_adj_hack_textured(hacks_active); \ + \ 1: \ add span_uvrg_offset, span_uvrg_offset, #16; \ add span_edge_data, span_edge_data, #8; \ @@ -1798,7 +1853,6 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ ldmia sp!, { r4 - r11, pc }; \ \ 2: \ - /* TODO: Load from psx_gpu instead of saving/restoring these */\ vpush { texture_mask }; \ vpush { uvrg_dx4 }; \ \ @@ -2334,7 +2388,6 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect) \ ldmia sp!, { r4 - r11, pc }; \ \ 2: \ - /* TODO: Load from psx_gpu instead of saving/restoring these */\ vpush { rg_dx4 }; \ \ stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h index 2f8a64635..7c21d31c8 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h @@ -36,6 +36,7 @@ #define psx_gpu_texture_mask_width_offset 0xfa #define psx_gpu_texture_mask_height_offset 0xfb #define psx_gpu_reciprocal_table_ptr_offset 0x108 +#define psx_gpu_hacks_active_offset 0x114 #define psx_gpu_blocks_offset 0x200 #define psx_gpu_span_uvrg_offset_offset 0x2200 #define psx_gpu_span_edge_data_offset 0x4200 diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c index 9b3784827..740df981e 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c @@ -76,6 +76,7 @@ int main() //WRITE_OFFSET(f, clut_settings); //WRITE_OFFSET(f, texture_settings); WRITE_OFFSET(f, reciprocal_table_ptr); + WRITE_OFFSET(f, hacks_active); WRITE_OFFSET(f, blocks); WRITE_OFFSET(f, span_uvrg_offset); WRITE_OFFSET(f, span_edge_data); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 53f33e4c5..f398695d2 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -903,6 +903,7 @@ static void select_enhancement_buf(psx_gpu_struct *psx_gpu) psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y; \ psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x; \ psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y; \ + psx_gpu->hacks_active = 0; \ psx_gpu->uvrgb_phase = 0x8000; \ } @@ -917,7 +918,7 @@ static int enhancement_enable(psx_gpu_struct *psx_gpu) psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2 + 1; if (psx_gpu->viewport_end_x - psx_gpu->viewport_start_x + 1 > 1024) psx_gpu->viewport_end_x = psx_gpu->viewport_start_x + 1023; - psx_gpu->uvrgb_phase = 0x7fff; + //psx_gpu->uvrgb_phase = 0x7fff; return 1; } @@ -1018,73 +1019,29 @@ static int check_enhanced_range(psx_gpu_struct *psx_gpu, int x, int y) return 1; } -static int is_in_array(int val, int array[], int len) +static u32 uv_hack(psx_gpu_struct *psx_gpu, const vertex_struct *vertex_ptrs) { - int i; - for (i = 0; i < len; i++) - if (array[i] == val) - return 1; - return 0; -} - -static int make_members_unique(int array[], int len) -{ - int i, j; - for (i = j = 1; i < len; i++) - if (!is_in_array(array[i], array, j)) - array[j++] = array[i]; - - if (array[0] > array[1]) { - i = array[0]; array[0] = array[1]; array[1] = i; - } - return j; -} - -static void patch_u(vertex_struct *vertex_ptrs, int count, int old, int new) -{ - int i; - for (i = 0; i < count; i++) - if (vertex_ptrs[i].u == old) - vertex_ptrs[i].u = new; -} - -static void patch_v(vertex_struct *vertex_ptrs, int count, int old, int new) -{ - int i; - for (i = 0; i < count; i++) - if (vertex_ptrs[i].v == old) - vertex_ptrs[i].v = new; -} - -// this sometimes does more harm than good, like in PE2 -static void uv_hack(vertex_struct *vertex_ptrs, int vertex_count) -{ - int i, u[4], v[4]; - - for (i = 0; i < vertex_count; i++) { - u[i] = vertex_ptrs[i].u; - v[i] = vertex_ptrs[i].v; - } - if (make_members_unique(u, vertex_count) == 2 && u[1] - u[0] >= 8) { - if ((u[0] & 7) == 7) { - patch_u(vertex_ptrs, vertex_count, u[0], u[0] + 1); - //printf("u hack: %3u-%3u -> %3u-%3u\n", u[0], u[1], u[0]+1, u[1]); - } - else if ((u[1] & 7) == 0 || u[1] - u[0] > 128) { - patch_u(vertex_ptrs, vertex_count, u[1], u[1] - 1); - //printf("u hack: %3u-%3u -> %3u-%3u\n", u[0], u[1], u[0], u[1]-1); - } - } - if (make_members_unique(v, vertex_count) == 2 && ((v[0] - v[1]) & 7) == 0) { - if ((v[0] & 7) == 7) { - patch_v(vertex_ptrs, vertex_count, v[0], v[0] + 1); - //printf("v hack: %3u-%3u -> %3u-%3u\n", v[0], v[1], v[0]+1, v[1]); - } - else if ((v[1] & 7) == 0) { - patch_v(vertex_ptrs, vertex_count, v[1], v[1] - 1); - //printf("v hack: %3u-%3u -> %3u-%3u\n", v[0], v[1], v[0], v[1]-1); - } + int i, have_right_edge = 0, have_bottom_edge = 0, bad_u = 0, bad_v = 0; + u32 hacks = 0; + + for (i = 0; i < 3; i++) { + int j = (i + 1) % 3, k = (i + 2) % 3; + int du = abs((int)vertex_ptrs[i].u - (int)vertex_ptrs[j].u); + int dv = abs((int)vertex_ptrs[i].v - (int)vertex_ptrs[j].v); + if (du && (du & 7) != 7) + bad_u = 1; + if (dv && (dv & 7) != 7) + bad_v = 1; + if (vertex_ptrs[i].x == vertex_ptrs[j].x && vertex_ptrs[k].x < vertex_ptrs[j].x) + have_right_edge = 1; + if (vertex_ptrs[i].y == vertex_ptrs[j].y)// && vertex_ptrs[k].y < vertex_ptrs[j].y) + have_bottom_edge = 1; } + if (have_right_edge && bad_u) + hacks |= AHACK_TEXTURE_ADJ_U; + if (have_bottom_edge && bad_v) + hacks |= AHACK_TEXTURE_ADJ_V; + return hacks; } static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, @@ -1104,6 +1061,8 @@ static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, if (!enhancement_enable(psx_gpu)) return; + if ((current_command & RENDER_FLAGS_TEXTURE_MAP) && psx_gpu->hack_texture_adj) + psx_gpu->hacks_active |= uv_hack(psx_gpu, vertexes); shift_vertices3(vertex_ptrs); shift_triangle_area(); render_triangle_p(psx_gpu, vertex_ptrs, current_command); @@ -1314,8 +1273,6 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv(2, 10); get_vertex_data_xy_uv(3, 14); - if (psx_gpu->hack_texture_adj) - uv_hack(vertexes, 4); do_quad_enhanced(psx_gpu, vertexes, current_command); gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); break; @@ -1368,8 +1325,6 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv_rgb(2, 12); get_vertex_data_xy_uv_rgb(3, 18); - if (psx_gpu->hack_texture_adj) - uv_hack(vertexes, 4); do_quad_enhanced(psx_gpu, vertexes, current_command); gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); break; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c index b5274362a..174e61d2e 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c @@ -192,6 +192,7 @@ typedef union #define gvld1_u8(d, s) d.u8 = vld1_u8(s) #define gvld1_u32(d, s) d.u32 = vld1_u32((const u32 *)(s)) +#define gvld1_u64(d, s) d.u64 = vld1_u64((const u64 *)(s)) #define gvld1q_u8(d, s) d.u8 = vld1q_u8(s) #define gvld1q_u16(d, s) d.u16 = vld1q_u16(s) #define gvld1q_u32(d, s) d.u32 = vld1q_u32((const u32 *)(s)) @@ -206,6 +207,8 @@ typedef union #define gvst1_u8(v, p) \ vst1_u8(p, v.u8) +#define gvst1_u64(v, p) \ + vst1_u64((u64 *)(p), v.u64) #define gvst1q_u16(v, p) \ vst1q_u16(p, v.u16) #define gvst1q_inc_u32(v, p, i) { \ @@ -388,10 +391,14 @@ typedef union #define gvld1_u8(d, s) d.m = _mm_loadu_si64(s) #define gvld1_u32 gvld1_u8 +#define gvld1_u64 gvld1_u8 #define gvld1q_u8(d, s) d.m = _mm_loadu_si128((__m128i *)(s)) #define gvld1q_u16 gvld1q_u8 #define gvld1q_u32 gvld1q_u8 +#define gvst1_u8(v, p) _mm_storeu_si64(p, v.m) +#define gvst1_u64 gvst1_u8 + #define gvst4_4_inc_u32(v0, v1, v2, v3, p, i) { \ __m128i t0 = _mm_unpacklo_epi32(v0.m, v1.m); \ __m128i t1 = _mm_unpacklo_epi32(v2.m, v3.m); \ @@ -1401,6 +1408,12 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, setup_spans_set_x4(alternate, down, alternate_active); \ height -= 4; \ } while(height > 0); \ + if (psx_gpu->hacks_active & (AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V)) \ + { \ + vec_2x32u tmp; \ + gvld1_u64(tmp, &span_uvrg_offset[height - 2]); \ + gvst1_u64(tmp, &span_uvrg_offset[height - 1]); \ + } \ } \ @@ -1452,6 +1465,12 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, setup_spans_set_x4(alternate, up, alternate_active); \ height -= 4; \ } \ + if (psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_V) \ + { \ + vec_2x32u tmp; \ + gvld1_u64(tmp, &psx_gpu->span_uvrg_offset[1]); \ + gvst1_u64(tmp, &psx_gpu->span_uvrg_offset[0]); \ + } \ } \ #define half_left lo @@ -1714,6 +1733,12 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_spans_set_x4(none, down, no); height_minor_b -= 4; } + if (psx_gpu->hacks_active & (AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V)) + { + vec_2x32u tmp; + gvld1_u64(tmp, &span_uvrg_offset[height_minor_b - 2]); + gvst1_u64(tmp, &span_uvrg_offset[height_minor_b - 1]); + } } } @@ -2152,6 +2177,15 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, #define setup_blocks_store_draw_mask_untextured_direct(_block, bits) \ +#define setup_blocks_uv_adj_hack_untextured(_block, edge_data, uvrg_offset) \ + +#define setup_blocks_uv_adj_hack_textured(_block, edge_data, uvrg_offset) \ +{ \ + u32 m_ = AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V; \ + if (unlikely(psx_gpu->hacks_active & m_)) \ + setup_blocks_uv_adj_hack(psx_gpu, _block, edge_data, (void *)uvrg_offset); \ +} \ + #define setup_blocks_add_blocks_indirect() \ num_blocks += span_num_blocks; \ \ @@ -2211,6 +2245,8 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_blocks_store_##shading##_##texturing(sw, dithering, target, edge); \ setup_blocks_store_draw_mask_##texturing##_##target(block, \ span_edge_data->right_mask); \ + setup_blocks_uv_adj_hack_##texturing(block, span_edge_data, \ + span_uvrg_offset); \ \ block++; \ } \ From f0ca3e2f2c919b91664c5f40322023ee3ec8c0ef Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 23 Aug 2024 00:09:55 +0300 Subject: [PATCH 554/597] gpu_neon: enable tex hack by default --- frontend/libretro.c | 2 +- frontend/libretro_core_options.h | 6 +++--- frontend/main.c | 1 + frontend/menu.c | 8 +++++--- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 4eb5caba3..cbaddb6ce 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2453,7 +2453,7 @@ static void update_variables(bool in_flight) } var.value = NULL; - var.key = "pcsx_rearmed_neon_enhancement_tex_adj"; + var.key = "pcsx_rearmed_neon_enhancement_tex_adj_v2"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index e72ddb5c2..f5c80eede 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -558,10 +558,10 @@ struct retro_core_option_v2_definition option_defs_us[] = { "disabled", }, { - "pcsx_rearmed_neon_enhancement_tex_adj", + "pcsx_rearmed_neon_enhancement_tex_adj_v2", "(GPU) Enhanced Resolution Texture Adjustment", "Enhanced Resolution Texture Adjustment", - "('Enhanced Resolution' Hack) Attempts to solve some texturing issues in some games, but causes new ones in others.", + "('Enhanced Resolution' Hack) Solves some texturing issues in some games in Enhanced Resolution mode. May cause a small performance hit.", NULL, "gpu_neon", { @@ -569,7 +569,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { { "enabled", NULL }, { NULL, NULL }, }, - "disabled", + "enabled", }, #endif /* GPU_NEON */ #ifdef GPU_PEOPS diff --git a/frontend/main.c b/frontend/main.c index 6df1731d1..82e670e99 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -141,6 +141,7 @@ void emu_set_default_config(void) pl_rearmed_cbs.gpu_neon.allow_interlace = 2; // auto pl_rearmed_cbs.gpu_neon.enhancement_enable = pl_rearmed_cbs.gpu_neon.enhancement_no_main = 0; + pl_rearmed_cbs.gpu_neon.enhancement_tex_adj = 1; pl_rearmed_cbs.gpu_peops.iUseDither = 0; pl_rearmed_cbs.gpu_peops.dwActFixes = 1<<7; pl_rearmed_cbs.gpu_unai.ilace_force = 0; diff --git a/frontend/menu.c b/frontend/menu.c index 9b9af7c87..15034a90f 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -454,7 +454,7 @@ static const struct { CE_INTVAL_P(gpu_neon.allow_interlace), CE_INTVAL_P(gpu_neon.enhancement_enable), CE_INTVAL_P(gpu_neon.enhancement_no_main), - CE_INTVAL_P(gpu_neon.enhancement_tex_adj), + CE_INTVAL_PV(gpu_neon.enhancement_tex_adj, 2), CE_INTVAL_P(gpu_peopsgl.bDrawDither), CE_INTVAL_P(gpu_peopsgl.iFilterType), CE_INTVAL_P(gpu_peopsgl.iFrameTexType), @@ -1411,10 +1411,12 @@ static int menu_loop_gfx_options(int id, int keys) static const char h_gpu_neon[] = "Configure built-in NEON GPU plugin"; static const char h_gpu_neon_enhanced[] = - "Renders in double resolution at the cost of lower performance\n" + "Renders in double resolution at perf. cost\n" "(not available for high resolution games)"; static const char h_gpu_neon_enhanced_hack[] = "Speed hack for above option (glitches some games)"; +static const char h_gpu_neon_enhanced_texadj[] = + "Solves some Enh. res. texture issues, some perf hit"; static const char *men_gpu_interlace[] = { "Off", "On", "Auto", NULL }; static menu_entry e_menu_plugin_gpu_neon[] = @@ -1422,7 +1424,7 @@ static menu_entry e_menu_plugin_gpu_neon[] = mee_enum ("Enable interlace mode", 0, pl_rearmed_cbs.gpu_neon.allow_interlace, men_gpu_interlace), mee_onoff_h ("Enhanced resolution", 0, pl_rearmed_cbs.gpu_neon.enhancement_enable, 1, h_gpu_neon_enhanced), mee_onoff_h ("Enhanced res. speed hack", 0, pl_rearmed_cbs.gpu_neon.enhancement_no_main, 1, h_gpu_neon_enhanced_hack), - mee_onoff ("Enh. res. texture adjust", 0, pl_rearmed_cbs.gpu_neon.enhancement_tex_adj, 1), + mee_onoff_h ("Enh. res. texture adjust", 0, pl_rearmed_cbs.gpu_neon.enhancement_tex_adj, 1, h_gpu_neon_enhanced_texadj), mee_end, }; From 76c5f5c43847baa8724b62d1851f00d74774dac4 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 4 Sep 2024 00:17:32 +0300 Subject: [PATCH 555/597] libretro: use shorter option names to fit narrow frontends --- frontend/libretro_core_options.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index f5c80eede..f20567b7d 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -151,7 +151,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, { "pcsx_rearmed_memcard2", - "Enable Second Memory Card (Shared)", + "Second Memory Card (Shared)", NULL, "Emulate a second memory card in slot 2. This will be shared by all games.", NULL, @@ -455,7 +455,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, { "pcsx_rearmed_show_overscan", - "(GPU) Show horizontal overscan", + "(GPU) Horizontal overscan", NULL, "The PSX can display graphics way into the horizontal borders, even if most screens would crop it. This option tries to display all such graphics. Note that this may result in unusual resolutions that your device might not handle well. The 'Hack' option is intended for the widescreen hacks.", NULL, @@ -487,7 +487,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { #define V(x) { #x, NULL } { "pcsx_rearmed_screen_centering_x", - "(GPU) Manual screen centering X", + "(GPU) Manual position X", NULL, "X offset of the frame buffer. Only effective when 'Screen centering' is set to 'Manual'.", NULL, @@ -500,7 +500,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, { "pcsx_rearmed_screen_centering_y", - "(GPU) Manual screen centering Y", + "(GPU) Manual position Y", NULL, "Y offset of the frame buffer. Only effective when 'Screen centering' is set to 'Manual'.", NULL, @@ -546,7 +546,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { { "pcsx_rearmed_neon_enhancement_no_main", "(GPU) Enhanced Resolution Speed Hack", - "Enhanced Resolution Speed Hack", + "Enh. Res. Speed Hack", "('Enhanced Resolution' Hack) Improves performance but reduces compatibility and may cause rendering errors.", NULL, "gpu_neon", @@ -560,7 +560,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { { "pcsx_rearmed_neon_enhancement_tex_adj_v2", "(GPU) Enhanced Resolution Texture Adjustment", - "Enhanced Resolution Texture Adjustment", + "Enh. Res. Texture Fixup", "('Enhanced Resolution' Hack) Solves some texturing issues in some games in Enhanced Resolution mode. May cause a small performance hit.", NULL, "gpu_neon", From 90545255b4061e044ab6933913486e09681078f2 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 4 Sep 2024 00:19:25 +0300 Subject: [PATCH 556/597] libretro: improve retro_memory_map libretro/pcsx_rearmed#845 --- frontend/libretro.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index cbaddb6ce..9b9149dae 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -1851,17 +1851,21 @@ strcasestr(const char *s, const char *find) static void set_retro_memmap(void) { -#ifndef NDEBUG + uint64_t flags_ram = RETRO_MEMDESC_SYSTEM_RAM; struct retro_memory_map retromap = { 0 }; - struct retro_memory_descriptor mmap = { - 0, psxM, 0, 0, 0, 0, 0x200000 + struct retro_memory_descriptor descs[] = { + { flags_ram, psxM, 0, 0x00000000, 0x5fe00000, 0, 0x200000 }, + { flags_ram, psxH, 0, 0x1f800000, 0x7ffffc00, 0, 0x000400 }, + // not ram but let the frontend patch it if it wants; should be last + { flags_ram, psxR, 0, 0x1fc00000, 0x5ff80000, 0, 0x080000 }, }; - retromap.descriptors = &mmap; - retromap.num_descriptors = 1; + retromap.descriptors = descs; + retromap.num_descriptors = sizeof(descs) / sizeof(descs[0]); + if (Config.HLE) + retromap.num_descriptors--; environ_cb(RETRO_ENVIRONMENT_SET_MEMORY_MAPS, &retromap); -#endif } static void show_notification(const char *msg_str, From a3c46b7ffdd74fd773d10aeb796c27efa58a538e Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 8 Oct 2024 01:21:32 +0300 Subject: [PATCH 557/597] sync from libretro branch To ease maintenance burden. Includes lightrec. Only gpulib_thread_if is left out, unsure what to do with it. --- frontend/3ds/3ds_utils.h | 78 ++ frontend/3ds/pthread.h | 321 +++++ frontend/3ds/semaphore.h | 35 + frontend/3ds/sys/mman.h | 113 ++ frontend/3ds/utils.S | 25 + frontend/3ds/zconf.h | 511 +++++++ frontend/3ds/zlib.h | 1768 +++++++++++++++++++++++++ frontend/libretro.c | 2 +- frontend/libretro_core_options.h | 6 +- frontend/main.c | 4 +- frontend/plugin_lib.h | 4 + frontend/switch/sys/mman.h | 60 + frontend/switch/zconf.h | 511 +++++++ frontend/switch/zlib.h | 1768 +++++++++++++++++++++++++ frontend/vita/retro_inline.h | 39 + frontend/vita/sys/mman.h | 70 + frontend/wiiu/coreinit/memorymap.h | 199 +++ include/lightning/lightning.h | 1580 ++++++++++++++++++++++ include/lightrec/lightrec-config.h | 31 + libpcsxcore/database.c | 46 + libpcsxcore/lightrec/mem.c | 224 ++++ libpcsxcore/lightrec/mem.h | 12 +- libpcsxcore/lightrec/mem_wiiu.c | 113 ++ libpcsxcore/lightrec/plugin.c | 671 ++++++++++ libpcsxcore/lightrec/plugin.h | 20 + libpcsxcore/lightrec/sysconf.c | 13 + libpcsxcore/new_dynarec/new_dynarec.c | 8 +- plugins/dfsound/dma.c | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu.h | 1 + plugins/gpu_neon/psx_gpu_if.c | 14 +- plugins/gpulib/gpu.c | 59 +- plugins/gpulib/vout_pl.c | 12 + 32 files changed, 8298 insertions(+), 22 deletions(-) create mode 100644 frontend/3ds/3ds_utils.h create mode 100644 frontend/3ds/pthread.h create mode 100644 frontend/3ds/semaphore.h create mode 100644 frontend/3ds/sys/mman.h create mode 100644 frontend/3ds/utils.S create mode 100644 frontend/3ds/zconf.h create mode 100644 frontend/3ds/zlib.h create mode 100644 frontend/switch/sys/mman.h create mode 100644 frontend/switch/zconf.h create mode 100644 frontend/switch/zlib.h create mode 100644 frontend/vita/retro_inline.h create mode 100644 frontend/vita/sys/mman.h create mode 100644 frontend/wiiu/coreinit/memorymap.h create mode 100644 include/lightning/lightning.h create mode 100644 include/lightrec/lightrec-config.h create mode 100644 libpcsxcore/lightrec/mem.c create mode 100644 libpcsxcore/lightrec/mem_wiiu.c create mode 100644 libpcsxcore/lightrec/plugin.c create mode 100644 libpcsxcore/lightrec/plugin.h create mode 100644 libpcsxcore/lightrec/sysconf.c diff --git a/frontend/3ds/3ds_utils.h b/frontend/3ds/3ds_utils.h new file mode 100644 index 000000000..75ab63b91 --- /dev/null +++ b/frontend/3ds/3ds_utils.h @@ -0,0 +1,78 @@ +#ifndef _3DS_UTILS_H +#define _3DS_UTILS_H + +#include +#include +#include <3ds.h> + +#ifdef OS_HEAP_AREA_BEGIN // defined in libctru 2.0+ +#define USE_CTRULIB_2 1 +#endif + +#define MEMOP_PROT 6 +#define MEMOP_MAP 4 +#define MEMOP_UNMAP 5 + +#define DEBUG_HOLD() do{printf("%s@%s:%d.\n",__FUNCTION__, __FILE__, __LINE__);fflush(stdout);wait_for_input();}while(0) + +void wait_for_input(void); + +extern __attribute__((weak)) int __ctr_svchax; + +static bool has_rosalina; + +static inline void check_rosalina() { + int64_t version; + uint32_t major; + + has_rosalina = false; + + if (!svcGetSystemInfo(&version, 0x10000, 0)) { + major = GET_VERSION_MAJOR(version); + + if (major >= 8) + has_rosalina = true; + } +} + +void ctr_clear_cache(void); + +typedef int32_t (*ctr_callback_type)(void); + +static inline void ctr_invalidate_ICache_kernel(void) +{ + __asm__ volatile( + "cpsid aif\n\t" + "mov r0, #0\n\t" + "mcr p15, 0, r0, c7, c5, 0\n\t"); +} + +static inline void ctr_flush_DCache_kernel(void) +{ + __asm__ volatile( + "cpsid aif\n\t" + "mov r0, #0\n\t" + "mcr p15, 0, r0, c7, c10, 0\n\t"); +} + +static inline void ctr_invalidate_ICache(void) +{ + svcBackdoor((ctr_callback_type)ctr_invalidate_ICache_kernel); +} + +static inline void ctr_flush_DCache(void) +{ + svcBackdoor((ctr_callback_type)ctr_flush_DCache_kernel); +} + +static inline void ctr_flush_invalidate_cache(void) +{ + if (has_rosalina) { + ctr_clear_cache(); + } else { + ctr_flush_DCache(); + ctr_invalidate_ICache(); + } +} + +#endif // _3DS_UTILS_H diff --git a/frontend/3ds/pthread.h b/frontend/3ds/pthread.h new file mode 100644 index 000000000..76f1681c9 --- /dev/null +++ b/frontend/3ds/pthread.h @@ -0,0 +1,321 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (gx_pthread.h). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _CTR_PTHREAD_WRAP_CTR_ +#define _CTR_PTHREAD_WRAP_CTR_ + +#include "3ds_utils.h" + +#include +#include + +#define STACKSIZE (4 * 1024) +#define FALSE 0 + +#ifndef PTHREAD_SCOPE_PROCESS +/* An earlier version of devkitARM does not define the pthread types. Can remove in r54+. */ + +typedef Thread pthread_t; +typedef LightLock pthread_mutex_t; +typedef void* pthread_mutexattr_t; +typedef int pthread_attr_t; +typedef LightEvent pthread_cond_t; +typedef int pthread_condattr_t; +#endif + +#ifndef USE_CTRULIB_2 +/* Backported CondVar API from libctru 2.0, and under its license: + https://github.com/devkitPro/libctru + Slightly modified for compatibility with older libctru. */ + +typedef s32 CondVar; + +static inline Result syncArbitrateAddress(s32* addr, ArbitrationType type, s32 value) +{ + return svcArbitrateAddress(__sync_get_arbiter(), (u32)addr, type, value, 0); +} + +static inline Result syncArbitrateAddressWithTimeout(s32* addr, ArbitrationType type, s32 value, s64 timeout_ns) +{ + return svcArbitrateAddress(__sync_get_arbiter(), (u32)addr, type, value, timeout_ns); +} + +static inline void __dmb(void) +{ + __asm__ __volatile__("mcr p15, 0, %[val], c7, c10, 5" :: [val] "r" (0) : "memory"); +} + +static inline void CondVar_BeginWait(CondVar* cv, LightLock* lock) +{ + s32 val; + do + val = __ldrex(cv) - 1; + while (__strex(cv, val)); + LightLock_Unlock(lock); +} + +static inline bool CondVar_EndWait(CondVar* cv, s32 num_threads) +{ + bool hasWaiters; + s32 val; + + do { + val = __ldrex(cv); + hasWaiters = val < 0; + if (hasWaiters) + { + if (num_threads < 0) + val = 0; + else if (val <= -num_threads) + val += num_threads; + else + val = 0; + } + } while (__strex(cv, val)); + + return hasWaiters; +} + +static inline void CondVar_Init(CondVar* cv) +{ + *cv = 0; +} + +static inline void CondVar_Wait(CondVar* cv, LightLock* lock) +{ + CondVar_BeginWait(cv, lock); + syncArbitrateAddress(cv, ARBITRATION_WAIT_IF_LESS_THAN, 0); + LightLock_Lock(lock); +} + +static inline int CondVar_WaitTimeout(CondVar* cv, LightLock* lock, s64 timeout_ns) +{ + CondVar_BeginWait(cv, lock); + + bool timedOut = false; + Result rc = syncArbitrateAddressWithTimeout(cv, ARBITRATION_WAIT_IF_LESS_THAN_TIMEOUT, 0, timeout_ns); + if (R_DESCRIPTION(rc) == RD_TIMEOUT) + { + timedOut = CondVar_EndWait(cv, 1); + __dmb(); + } + + LightLock_Lock(lock); + return timedOut; +} + +static inline void CondVar_WakeUp(CondVar* cv, s32 num_threads) +{ + __dmb(); + if (CondVar_EndWait(cv, num_threads)) + syncArbitrateAddress(cv, ARBITRATION_SIGNAL, num_threads); + else + __dmb(); +} + +static inline void CondVar_Signal(CondVar* cv) +{ + CondVar_WakeUp(cv, 1); +} + +static inline void CondVar_Broadcast(CondVar* cv) +{ + CondVar_WakeUp(cv, ARBITRATION_SIGNAL_ALL); +} +/* End libctru 2.0 backport */ +#endif + +/* libctru threads return void but pthreads return void pointer */ +static bool mutex_inited = false; +static LightLock safe_double_thread_launch; +static void *(*start_routine_jump)(void*); + +static void ctr_thread_launcher(void* data) +{ + void *(*start_routine_jump_safe)(void*) = start_routine_jump; + LightLock_Unlock(&safe_double_thread_launch); + start_routine_jump_safe(data); +} + +static inline int pthread_create(pthread_t *thread, + const pthread_attr_t *attr, void *(*start_routine)(void*), void *arg) +{ + s32 prio = 0; + Thread new_ctr_thread; + int procnum = -2; // use default cpu + bool isNew3DS; + + APT_CheckNew3DS(&isNew3DS); + + if (isNew3DS) + procnum = 2; + + if (!mutex_inited) + { + LightLock_Init(&safe_double_thread_launch); + mutex_inited = true; + } + + /*Must wait if attempting to launch 2 threads at once to prevent corruption of function pointer*/ + while (LightLock_TryLock(&safe_double_thread_launch) != 0); + + svcGetThreadPriority(&prio, CUR_THREAD_HANDLE); + + start_routine_jump = start_routine; + new_ctr_thread = threadCreate(ctr_thread_launcher, arg, STACKSIZE, prio - 1, procnum, FALSE); + + if (!new_ctr_thread) + { + LightLock_Unlock(&safe_double_thread_launch); + return EAGAIN; + } + + *thread = (pthread_t)new_ctr_thread; + return 0; +} + +static inline pthread_t pthread_self(void) +{ + return (pthread_t)threadGetCurrent(); +} + +static inline int pthread_mutex_init(pthread_mutex_t *mutex, + const pthread_mutexattr_t *attr) +{ + LightLock_Init((LightLock *)mutex); + return 0; +} + +static inline int pthread_mutex_destroy(pthread_mutex_t *mutex) +{ + /*Nothing to destroy*/ + return 0; +} + +static inline int pthread_mutex_lock(pthread_mutex_t *mutex) +{ + LightLock_Lock((LightLock *)mutex); + return 0; +} + +static inline int pthread_mutex_unlock(pthread_mutex_t *mutex) +{ + LightLock_Unlock((LightLock *)mutex); + return 0; +} + +static inline void pthread_exit(void *retval) +{ + /*Yes the pointer to int cast is not ideal*/ + /*threadExit((int)retval);*/ + (void)retval; + + threadExit(0); +} + +static inline int pthread_detach(pthread_t thread) +{ + threadDetach((Thread)thread); + return 0; +} + +static inline int pthread_join(pthread_t thread, void **retval) +{ + /*retval is ignored*/ + if(threadJoin((Thread)thread, INT64_MAX)) + return -1; + + threadFree((Thread)thread); + + return 0; +} + +static inline int pthread_mutex_trylock(pthread_mutex_t *mutex) +{ + return LightLock_TryLock((LightLock *)mutex); +} + +static inline int pthread_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex) +{ + CondVar_Wait((CondVar *)cond, (LightLock *)mutex); + return 0; +} + +static inline int pthread_cond_timedwait(pthread_cond_t *cond, + pthread_mutex_t *mutex, const struct timespec *abstime) +{ + struct timespec now = {0}; + /* Missing clock_gettime*/ + struct timeval tm; + int retval = 0; + + gettimeofday(&tm, NULL); + now.tv_sec = tm.tv_sec; + now.tv_nsec = tm.tv_usec * 1000; + s64 timeout = (abstime->tv_sec - now.tv_sec) * 1000000000 + (abstime->tv_nsec - now.tv_nsec); + + if (timeout < 0) + { + retval = ETIMEDOUT; + } + else if (CondVar_WaitTimeout((CondVar *)cond, (LightLock *)mutex, timeout)) + { + retval = ETIMEDOUT; + } + + return retval; +} + +static inline int pthread_cond_init(pthread_cond_t *cond, + const pthread_condattr_t *attr) +{ + CondVar_Init((CondVar *)cond); + return 0; +} + +static inline int pthread_cond_signal(pthread_cond_t *cond) +{ + CondVar_Signal((CondVar *)cond); + return 0; +} + +static inline int pthread_cond_broadcast(pthread_cond_t *cond) +{ + CondVar_Broadcast((CondVar *)cond); + return 0; +} + +static inline int pthread_cond_destroy(pthread_cond_t *cond) +{ + /*Nothing to destroy*/ + return 0; +} + +static inline int pthread_equal(pthread_t t1, pthread_t t2) +{ + if (threadGetHandle((Thread)t1) == threadGetHandle((Thread)t2)) + return 1; + return 0; +} + +#endif diff --git a/frontend/3ds/semaphore.h b/frontend/3ds/semaphore.h new file mode 100644 index 000000000..6eddd98fc --- /dev/null +++ b/frontend/3ds/semaphore.h @@ -0,0 +1,35 @@ + +#ifndef _3DS_SEMAPHORE_WRAP__ +#define _3DS_SEMAPHORE_WRAP__ + +#include +#include +#include + +#include "3ds_utils.h" + +typedef uint32_t sem_t; + +static inline int sem_init(sem_t *sem, int pshared, unsigned int value) +{ + return svcCreateSemaphore(sem, value, INT32_MAX); +} + +static inline int sem_post(sem_t *sem) +{ + int32_t count; + return svcReleaseSemaphore(&count, *sem, 1); +} + +static inline int sem_wait(sem_t *sem) +{ + return svcWaitSynchronization(*sem, INT64_MAX); +} + +static inline int sem_destroy(sem_t *sem) +{ + return svcCloseHandle(*sem); +} + +#endif //_3DS_SEMAPHORE_WRAP__ + diff --git a/frontend/3ds/sys/mman.h b/frontend/3ds/sys/mman.h new file mode 100644 index 000000000..fdf5ac6a9 --- /dev/null +++ b/frontend/3ds/sys/mman.h @@ -0,0 +1,113 @@ +#ifndef MMAN_H +#define MMAN_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include + +#include "3ds_utils.h" + +#define PROT_READ 0b001 +#define PROT_WRITE 0b010 +#define PROT_EXEC 0b100 +#define MAP_PRIVATE 2 +#define MAP_FIXED 0x10 +#define MAP_ANONYMOUS 0x20 + +#define MAP_FAILED ((void *)-1) + +static void* dynarec_cache = NULL; +static void* dynarec_cache_mapping = NULL; + +static inline void* mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) +{ + (void)fd; + (void)offset; + + void* addr_out; + + if((prot == (PROT_READ | PROT_WRITE | PROT_EXEC)) && + (flags == (MAP_PRIVATE | MAP_ANONYMOUS))) + { + if(__ctr_svchax) + { + /* this hack works only for pcsx_rearmed */ + uint32_t currentHandle; + + if (!dynarec_cache) { + dynarec_cache = memalign(0x1000, len); + if (!dynarec_cache) + return MAP_FAILED; + } + + svcDuplicateHandle(¤tHandle, 0xFFFF8001); + svcControlProcessMemory(currentHandle, addr, dynarec_cache, + len, MEMOP_MAP, prot); + svcCloseHandle(currentHandle); + dynarec_cache_mapping = addr; + memset(addr, 0, len); + return addr; + } + else + { + printf("tried to mmap RWX pages without svcControlProcessMemory access !\n"); + return MAP_FAILED; + } + + } + + addr_out = memalign(0x1000, len); + if (!addr_out) + return MAP_FAILED; + + memset(addr_out, 0, len); + return addr_out; +} + +static inline int mprotect(void *addr, size_t len, int prot) +{ + if(__ctr_svchax) + { + uint32_t currentHandle; + svcDuplicateHandle(¤tHandle, 0xFFFF8001); + svcControlProcessMemory(currentHandle, addr, NULL, + len, MEMOP_PROT, prot); + svcCloseHandle(currentHandle); + return 0; + } + + printf("mprotect called without svcControlProcessMemory access !\n"); + return -1; +} + +static inline int munmap(void *addr, size_t len) +{ + if((addr == dynarec_cache_mapping) && __ctr_svchax) + { + uint32_t currentHandle; + svcDuplicateHandle(¤tHandle, 0xFFFF8001); + svcControlProcessMemory(currentHandle, + dynarec_cache, dynarec_cache_mapping, + len, MEMOP_UNMAP, 0b111); + svcCloseHandle(currentHandle); + dynarec_cache_mapping = NULL; + + } + else + free(addr); + + return 0; +} + +#ifdef __cplusplus +}; +#endif + +#endif // MMAN_H + diff --git a/frontend/3ds/utils.S b/frontend/3ds/utils.S new file mode 100644 index 000000000..c8df651a7 --- /dev/null +++ b/frontend/3ds/utils.S @@ -0,0 +1,25 @@ + .text + .arm + .balign 4 + + .func ctr_clear_cache_kernel +ctr_clear_cache_kernel: + cpsid aif + mov r0, #0 + mcr p15, 0, r0, c7, c10, 0 @ Clean entire data cache + mcr p15, 0, r0, c7, c10, 5 @ Data Memory Barrier + mcr p15, 0, r0, c7, c5, 0 @ Invalidate entire instruction cache / Flush BTB + mcr p15, 0, r0, c7, c10, 4 @ Data Sync Barrier + bx lr + .endfunc + + @@ Clear the entire data cache / invalidate the instruction cache. Uses + @@ Rosalina svcCustomBackdoor to avoid svcBackdoor stack corruption + @@ during interrupts. + .global ctr_clear_cache + .func ctr_clear_cache +ctr_clear_cache: + ldr r0, =ctr_clear_cache_kernel + svc 0x80 @ svcCustomBackdoor + bx lr + .endfunc diff --git a/frontend/3ds/zconf.h b/frontend/3ds/zconf.h new file mode 100644 index 000000000..996fff292 --- /dev/null +++ b/frontend/3ds/zconf.h @@ -0,0 +1,511 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2013 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + * Even better than compiling with -DZ_PREFIX would be to use configure to set + * this permanently in zconf.h using "./configure --zprefix". + */ +#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ +# define Z_PREFIX_SET + +/* all linked symbols */ +# define _dist_code z__dist_code +# define _length_code z__length_code +# define _tr_align z__tr_align +# define _tr_flush_bits z__tr_flush_bits +# define _tr_flush_block z__tr_flush_block +# define _tr_init z__tr_init +# define _tr_stored_block z__tr_stored_block +# define _tr_tally z__tr_tally +# define adler32 z_adler32 +# define adler32_combine z_adler32_combine +# define adler32_combine64 z_adler32_combine64 +# ifndef Z_SOLO +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# endif +# define crc32 z_crc32 +# define crc32_combine z_crc32_combine +# define crc32_combine64 z_crc32_combine64 +# define deflate z_deflate +# define deflateBound z_deflateBound +# define deflateCopy z_deflateCopy +# define deflateEnd z_deflateEnd +# define deflateInit2_ z_deflateInit2_ +# define deflateInit_ z_deflateInit_ +# define deflateParams z_deflateParams +# define deflatePending z_deflatePending +# define deflatePrime z_deflatePrime +# define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep +# define deflateSetDictionary z_deflateSetDictionary +# define deflateSetHeader z_deflateSetHeader +# define deflateTune z_deflateTune +# define deflate_copyright z_deflate_copyright +# define get_crc_table z_get_crc_table +# ifndef Z_SOLO +# define gz_error z_gz_error +# define gz_intmax z_gz_intmax +# define gz_strwinerror z_gz_strwinerror +# define gzbuffer z_gzbuffer +# define gzclearerr z_gzclearerr +# define gzclose z_gzclose +# define gzclose_r z_gzclose_r +# define gzclose_w z_gzclose_w +# define gzdirect z_gzdirect +# define gzdopen z_gzdopen +# define gzeof z_gzeof +# define gzerror z_gzerror +# define gzflush z_gzflush +# define gzgetc z_gzgetc +# define gzgetc_ z_gzgetc_ +# define gzgets z_gzgets +# define gzoffset z_gzoffset +# define gzoffset64 z_gzoffset64 +# define gzopen z_gzopen +# define gzopen64 z_gzopen64 +# ifdef _WIN32 +# define gzopen_w z_gzopen_w +# endif +# define gzprintf z_gzprintf +# define gzvprintf z_gzvprintf +# define gzputc z_gzputc +# define gzputs z_gzputs +# define gzread z_gzread +# define gzrewind z_gzrewind +# define gzseek z_gzseek +# define gzseek64 z_gzseek64 +# define gzsetparams z_gzsetparams +# define gztell z_gztell +# define gztell64 z_gztell64 +# define gzungetc z_gzungetc +# define gzwrite z_gzwrite +# endif +# define inflate z_inflate +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define inflateBackInit_ z_inflateBackInit_ +# define inflateCopy z_inflateCopy +# define inflateEnd z_inflateEnd +# define inflateGetHeader z_inflateGetHeader +# define inflateInit2_ z_inflateInit2_ +# define inflateInit_ z_inflateInit_ +# define inflateMark z_inflateMark +# define inflatePrime z_inflatePrime +# define inflateReset z_inflateReset +# define inflateReset2 z_inflateReset2 +# define inflateSetDictionary z_inflateSetDictionary +# define inflateGetDictionary z_inflateGetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateUndermine z_inflateUndermine +# define inflateResetKeep z_inflateResetKeep +# define inflate_copyright z_inflate_copyright +# define inflate_fast z_inflate_fast +# define inflate_table z_inflate_table +# ifndef Z_SOLO +# define uncompress z_uncompress +# endif +# define zError z_zError +# ifndef Z_SOLO +# define zcalloc z_zcalloc +# define zcfree z_zcfree +# endif +# define zlibCompileFlags z_zlibCompileFlags +# define zlibVersion z_zlibVersion + +/* all zlib typedefs in zlib.h and zconf.h */ +# define Byte z_Byte +# define Bytef z_Bytef +# define alloc_func z_alloc_func +# define charf z_charf +# define free_func z_free_func +# ifndef Z_SOLO +# define gzFile z_gzFile +# endif +# define gz_header z_gz_header +# define gz_headerp z_gz_headerp +# define in_func z_in_func +# define intf z_intf +# define out_func z_out_func +# define uInt z_uInt +# define uIntf z_uIntf +# define uLong z_uLong +# define uLongf z_uLongf +# define voidp z_voidp +# define voidpc z_voidpc +# define voidpf z_voidpf + +/* all zlib structs in zlib.h and zconf.h */ +# define gz_header_s z_gz_header_s +# define internal_state z_internal_state + +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +#if defined(ZLIB_CONST) && !defined(z_const) +# define z_const const +#else +# define z_const +#endif + +/* Some Mac compilers merge all .h files incorrectly: */ +#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__) +# define NO_DUMMY_DECL +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus a few kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +#ifndef Z_ARG /* function prototypes for stdarg */ +# if defined(STDC) || defined(Z_HAVE_STDARG_H) +# define Z_ARG(args) args +# else +# define Z_ARG(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC) +# include +# if (UINT_MAX == 0xffffffffUL) +# define Z_U4 unsigned +# elif (ULONG_MAX == 0xffffffffUL) +# define Z_U4 unsigned long +# elif (USHRT_MAX == 0xffffffffUL) +# define Z_U4 unsigned short +# endif +#endif + +#ifdef Z_U4 + typedef Z_U4 z_crc_t; +#else + typedef unsigned long z_crc_t; +#endif + +#if 1 /* was set to #if 1 by ./configure */ +# define Z_HAVE_UNISTD_H +#endif + +#if 1 /* was set to #if 1 by ./configure */ +# define Z_HAVE_STDARG_H +#endif + +#ifdef STDC +# ifndef Z_SOLO +# include /* for off_t */ +# endif +#endif + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +# include /* for va_list */ +# endif +#endif + +#ifdef _WIN32 +# ifndef Z_SOLO +# include /* for wchar_t */ +# endif +#endif + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H) +# define Z_HAVE_UNISTD_H +#endif +#ifndef Z_SOLO +# if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE) +# include /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +# ifdef VMS +# include /* for off_t */ +# endif +# ifndef z_off_t +# define z_off_t off_t +# endif +# endif +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#if !defined(SEEK_SET) && !defined(Z_SOLO) +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif + +#ifndef z_off_t +# define z_off_t long +#endif + +#if !defined(_WIN32) && defined(Z_LARGE64) +# define z_off64_t off64_t +#else +# if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO) +# define z_off64_t __int64 +# else +# define z_off64_t z_off_t +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) + #pragma map(deflateInit_,"DEIN") + #pragma map(deflateInit2_,"DEIN2") + #pragma map(deflateEnd,"DEEND") + #pragma map(deflateBound,"DEBND") + #pragma map(inflateInit_,"ININ") + #pragma map(inflateInit2_,"ININ2") + #pragma map(inflateEnd,"INEND") + #pragma map(inflateSync,"INSY") + #pragma map(inflateSetDictionary,"INSEDI") + #pragma map(compressBound,"CMBND") + #pragma map(inflate_table,"INTABL") + #pragma map(inflate_fast,"INFA") + #pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ diff --git a/frontend/3ds/zlib.h b/frontend/3ds/zlib.h new file mode 100644 index 000000000..3e0c7672a --- /dev/null +++ b/frontend/3ds/zlib.h @@ -0,0 +1,1768 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.8, April 28th, 2013 + + Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 + (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). +*/ + +#ifndef ZLIB_H +#define ZLIB_H + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.2.8" +#define ZLIB_VERNUM 0x1280 +#define ZLIB_VER_MAJOR 1 +#define ZLIB_VER_MINOR 2 +#define ZLIB_VER_REVISION 8 +#define ZLIB_VER_SUBREVISION 0 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed data. + This version of the library supports only one compression method (deflation) + but other algorithms will be added later and will have the same stream + interface. + + Compression can be done in a single step if the buffers are large enough, + or can be done by repeated calls of the compression function. In the latter + case, the application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip streams in memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never crash + even in case of corrupted input. +*/ + +typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +typedef void (*free_func) OF((voidpf opaque, voidpf address)); + +struct internal_state; + +typedef struct z_stream_s { + z_const Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total number of input bytes read so far */ + + Bytef *next_out; /* next output byte should be put there */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total number of bytes output so far */ + + z_const char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text */ + uLong adler; /* adler32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has dropped + to zero. It must update next_out and avail_out when avail_out has dropped + to zero. The application must initialize zalloc, zfree and opaque before + calling the init function. All other fields are set by the compression + library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this if + the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers + returned by zalloc for objects of exactly 65536 bytes *must* have their + offset normalized to zero. The default allocation function provided by this + library ensures this (see zutil.c). To reduce memory requirements and avoid + any allocation of 64K objects, at the expense of compression ratio, compile + the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or progress + reports. After compression, total_in holds the total size of the + uncompressed data and may be saved for use in the decompressor (particularly + if the decompressor wants to decompress everything in a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +#define Z_TREES 6 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative values + * are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field (though see inflate()) */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +#define zlib_version zlibVersion() +/* for compatibility with versions < 1.0.2 */ + + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion OF((void)); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is not + compatible with the zlib.h header file used by the application. This check + is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. If + zalloc and zfree are set to Z_NULL, deflateInit updates them to use default + allocation functions. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at all + (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION + requests a default compromise between speed and compression (currently + equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if level is not a valid compression level, or + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). msg is set to null + if there is no error message. deflateInit does not perform any compression: + this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary (in interactive applications). Some + output may be provided even if flush is not set. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating avail_in or avail_out accordingly; avail_out should + never be zero before the call. The application can consume the compressed + output when it wants, for example when the output buffer is full (avail_out + == 0), or after each call of deflate(). If deflate returns Z_OK and with + zero avail_out, it must be called again after making room in the output + buffer because there might be more output pending. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumulate before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In + particular avail_in is zero after the call if enough output space has been + provided before the call.) Flushing may degrade compression for some + compression algorithms and so it should be used only when necessary. This + completes the current deflate block and follows it with an empty stored block + that is three bits plus filler bits to the next byte, followed by four bytes + (00 00 ff ff). + + If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the + output buffer, but the output is not aligned to a byte boundary. All of the + input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. + This completes the current deflate block and follows it with an empty fixed + codes block that is 10 bits long. This assures that enough bytes are output + in order for the decompressor to finish the block before the empty fixed code + block. + + If flush is set to Z_BLOCK, a deflate block is completed and emitted, as + for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to + seven bits of the current block are held to be written as the next byte after + the next deflate block is completed. In this case, the decompressor may not + be provided enough bits at this point in order to complete decompression of + the data provided so far to the compressor. It may need to wait for the next + block to be emitted. This is for advanced applications that need to control + the emission of deflate blocks. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six to avoid repeated flush markers due to + avail_out == 0 on return. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there was + enough output space; if deflate returns with Z_OK, this function must be + called again with Z_FINISH and more output space (updated avail_out) but no + more input data, until it returns with Z_STREAM_END or an error. After + deflate has returned Z_STREAM_END, the only possible operations on the stream + are deflateReset or deflateEnd. + + Z_FINISH can be used immediately after deflateInit if all the compression + is to be done in a single step. In this case, avail_out must be at least the + value returned by deflateBound (see below). Then deflate is guaranteed to + return Z_STREAM_END. If not enough output space is provided, deflate will + not return Z_STREAM_END, and it must be called again as described above. + + deflate() sets strm->adler to the adler32 checksum of all input read + so far (that is, total_in bytes). + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered + binary. This field is only for information purposes and does not affect the + compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was Z_NULL), Z_BUF_ERROR if no progress is possible + (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not + fatal, and deflate() can be called again with more input and more output + space to continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, msg + may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. If next_in is not Z_NULL and avail_in is large enough (the + exact value depends on the compression method), inflateInit determines the + compression method from the zlib header and allocates all data structures + accordingly; otherwise the allocation will be deferred to the first call of + inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to + use default allocation functions. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit() does not process any header information -- that is deferred + until inflate() is called. +*/ + + +ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in is updated and processing will + resume at this point for the next call of inflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there is + no more input data or no more space in the output buffer (see below about + the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating the next_* and avail_* values accordingly. The + application can consume the uncompressed output when it wants, for example + when the output buffer is full (avail_out == 0), or after each call of + inflate(). If inflate returns Z_OK and with zero avail_out, it must be + called again after making room in the output buffer because there might be + more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, + Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() + stop if and when it gets to the next deflate block boundary. When decoding + the zlib or gzip format, this will cause inflate() to return immediately + after the header and before the first block. When doing a raw inflate, + inflate() will go ahead and process the first block, and will return when it + gets to the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + Also to assist in this, on return inflate() will set strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 if + inflate() is currently decoding the last block in the deflate stream, plus + 128 if inflate() returned immediately after decoding an end-of-block code or + decoding the complete header up to just before the first byte of the deflate + stream. The end-of-block will not be indicated until all of the uncompressed + data from that block has been written to strm->next_out. The number of + unused bits may in general be greater than seven, except when bit 7 of + data_type is set, in which case the number of unused bits will be less than + eight. data_type is set as noted here every time inflate() returns for all + flush options, and so can be used to determine the amount of currently + consumed input in bits. + + The Z_TREES option behaves as Z_BLOCK does, but it also returns when the + end of each deflate block header is reached, before any actual data in that + block is decoded. This allows the caller to determine the length of the + deflate block header for later use in random access within a deflate block. + 256 is added to the value of strm->data_type when inflate() returns + immediately after reaching the end of the deflate block header. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step (a + single call of inflate), the parameter flush should be set to Z_FINISH. In + this case all pending input is processed and all pending output is flushed; + avail_out must be large enough to hold all of the uncompressed data for the + operation to complete. (The size of the uncompressed data may have been + saved by the compressor for this purpose.) The use of Z_FINISH is not + required to perform an inflation in one step. However it may be used to + inform inflate that a faster approach can be used for the single inflate() + call. Z_FINISH also informs inflate to not maintain a sliding window if the + stream completes, which reduces inflate's memory footprint. If the stream + does not complete, either because not all of the stream is provided or not + enough output space is provided, then a sliding window will be allocated and + inflate() can be called again to continue the operation as if Z_NO_FLUSH had + been used. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the effects of the flush parameter in this implementation are + on the return value of inflate() as noted below, when inflate() returns early + when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of + memory for a sliding window when Z_FINISH is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the Adler-32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the Adler-32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed adler32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() can decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically, if requested when + initializing with inflateInit2(). Any information contained in the gzip + header is not retained, so applications that need that information should + instead use raw inflate, see inflateInit2() below, or inflateBack() and + perform their own processing of the gzip header and trailer. When processing + gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output + producted so far. The CRC-32 is checked against the gzip trailer. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value), Z_STREAM_ERROR if the stream structure was inconsistent (for example + next_in or next_out was Z_NULL), Z_MEM_ERROR if there was not enough memory, + Z_BUF_ERROR if no progress is possible or if there was not enough room in the + output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may + then call inflateSync() to look for a good compression block if a partial + recovery of the data is desired. +*/ + + +ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state + was inconsistent. In the error case, msg may be set but then points to a + static string (which must not be deallocated). +*/ + + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy)); + + This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by the + caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute an adler32 check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), no + header crc, and the operating system will be set to 255 (unknown). If a + gzip stream is being written, strm->adler is a crc32 instead of an adler32. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but is + slow and reduces compression ratio; memLevel=9 uses maximum memory for + optimal speed. The default value is 8. See zconf.h for total memory usage + as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as + fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The + strategy parameter only affects the compression ratio but not the + correctness of the compressed output even if it is not set appropriately. + Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler + decoder for special applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid + method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is + incompatible with the version assumed by the caller (ZLIB_VERSION). msg is + set to null if there is no error message. deflateInit2 does not perform any + compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size + provided in deflateInit or deflateInit2. Thus the strings most likely to be + useful should be put at the end of the dictionary, not at the front. In + addition, the current implementation of deflate will use at most the window + size minus 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the adler32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The adler32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + adler32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and can + consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); +/* + This function is equivalent to deflateEnd followed by deflateInit, + but does not free and reallocate all the internal compression state. The + stream will keep the same compression level and any other attributes that + may have been set by deflateInit2. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, + int level, + int strategy)); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2. This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different strategy. + If the compression level is changed, the input available so far is + compressed with the old level (and may be flushed); the new level will take + effect only at the next call of deflate(). + + Before the call of deflateParams, the stream state must be set as for + a call of deflate(), since the currently available input may have to be + compressed and flushed. In particular, strm->avail_out must be non-zero. + + deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source + stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR if + strm->avail_out was zero. +*/ + +ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain)); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, + uLong sourceLen)); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() or + deflateInit2(), and after deflateSetHeader(), if used. This would be used + to allocate an output buffer for deflation in a single pass, and so would be + called before deflate(). If that first deflate() call is provided the + sourceLen input bytes, an output buffer allocated to the size returned by + deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed + to return Z_STREAM_END. Note that it is possible for the compressed size to + be larger than the value returned by deflateBound() if flush options other + than Z_FINISH or Z_NO_FLUSH are used. +*/ + +ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm, + unsigned *pending, + int *bits)); +/* + deflatePending() returns the number of bytes and bits of output that have + been generated, but not yet provided in the available output. The bytes not + provided would be due to the available output space having being consumed. + The number of bits of output not provided are between 0 and 7, where they + await more bits to join them in order to fill out a full byte. If pending + or bits are Z_NULL, then those values are not set. + + deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. + */ + +ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the bits + leftover from a previous deflate stream when appending to it. As such, this + function can only be used for raw deflate, and must be used before the first + deflate() call after a deflateInit2() or deflateReset(). bits must be less + than or equal to 16, and that many of the least significant bits of value + will be inserted in the output. + + deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough + room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, + gz_headerp head)); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to 255, with no extra, name, or comment + fields. The gzip header is returned to the default state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, + int windowBits)); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be zero to request that inflate use the window size in + the zlib header of the compressed stream. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an adler32 or a crc32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a + crc32 instead of an adler32. + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit2 does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit2() does not process any header information -- that is + deferred until inflate() is called. +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the adler32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect adler32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm, + Bytef *dictionary, + uInt *dictLength)); +/* + Returns the sliding dictionary being maintained by inflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If inflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similary, if dictLength is Z_NULL, then it is not set. + + inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); +/* + Skips invalid compressed data until a possible full flush point (see above + for the description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync searches for a 00 00 FF FF pattern in the compressed data. + All full flush points have this pattern, but not all occurrences of this + pattern are full flush points. + + inflateSync returns Z_OK if a possible full flush point has been found, + Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point + has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. + In the success case, the application may save the current current value of + total_in which indicates where valid compressed data was found. In the + error case, the application may repeatedly call inflateSync, providing more + input each time, until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate all the internal decompression state. The + stream will keep attributes that may have been set by inflateInit2. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm, + int windowBits)); +/* + This function is the same as inflateReset, but it also permits changing + the wrap and window size requests. The windowBits parameter is interpreted + the same as it is for inflateInit2. + + inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL), or if + the windowBits parameter is invalid. +*/ + +ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + If bits is negative, then the input stream bit buffer is emptied. Then + inflatePrime() can be called again to put bits in the buffer. This is used + to clear out bits leftover after feeding inflate a block description prior + to feeding inflate codes. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm)); +/* + This function returns two values, one in the lower 16 bits of the return + value, and the other in the remaining upper bits, obtained by shifting the + return value down 16 bits. If the upper value is -1 and the lower value is + zero, then inflate() is currently decoding information outside of a block. + If the upper value is -1 and the lower value is non-zero, then inflate is in + the middle of a stored block, with the lower value equaling the number of + bytes from the input remaining to copy. If the upper value is not -1, then + it is the number of bits back from the current bit position in the input of + the code (literal or length/distance pair) currently being processed. In + that case the lower value is the number of bytes already emitted for that + code. + + A code is being processed if inflate is waiting for more input to complete + decoding of the code, or if it has completed decoding but is waiting for + more output space to write the literal or match data. + + inflateMark() is used to mark locations in the input data for random + access, which may be at bit positions, and to note those cases where the + output of a code may span boundaries of random access blocks. The current + location in the input stream can be determined from avail_in and data_type + as noted in the description for the Z_BLOCK flush parameter for inflate. + + inflateMark returns the value noted above or -1 << 16 if the provided + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm, + gz_headerp head)); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be + used to force inflate() to return immediately after header processing is + complete and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When any + of extra, name, or comment are not Z_NULL and the respective field is not + present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, + unsigned char FAR *window)); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the parameters are invalid, Z_MEM_ERROR if the internal state could not be + allocated, or Z_VERSION_ERROR if the version of the library does not match + the version of the header file. +*/ + +typedef unsigned (*in_func) OF((void FAR *, + z_const unsigned char FAR * FAR *)); +typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); + +ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc)); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is potentially more efficient than + inflate() for file i/o applications, in that it avoids copying between the + output and the sliding window by simply making the window itself the output + buffer. inflate() can be faster on modern CPUs when used with large + buffers. inflateBack() trusts the application to not change the output + buffer passed by the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free the + allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects only + the raw deflate stream to decompress. This is different from the normal + behavior of inflate(), which expects either a zlib or gzip header and + trailer around the deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero--buf is ignored in that + case--and inflateBack() will return a buffer error. inflateBack() will call + out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. out() + should return zero on success, or non-zero on failure. If out() returns + non-zero, inflateBack() will return with an error. Neither in() nor out() + are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format error + in the deflate stream (in which case strm->msg is set to indicate the nature + of the error), or Z_STREAM_ERROR if the stream was not properly initialized. + In the case of Z_BUF_ERROR, an input or output error can be distinguished + using strm->next_in which will be Z_NULL only if in() returned an error. If + strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning + non-zero. (in() will always be called before out(), so strm->next_in is + assured to be defined if out() returns non-zero.) Note that inflateBack() + cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void)); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + +#ifndef Z_SOLO + + /* utility functions */ + +/* + The following utility functions are implemented on top of the basic + stream-oriented functions. To simplify the interface, some default options + are assumed (compression level and memory usage, standard memory allocation + functions). The source code of these utility functions can be modified if + you need special options. +*/ + +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed buffer. + + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level)); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen)); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before a + compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, destLen + is the actual size of the uncompressed buffer. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In + the case where there is not enough room, uncompress() will fill the output + buffer with the uncompressed data up to that point. +*/ + + /* gzip file access functions */ + +/* + This library supports reading and writing files in gzip (.gz) format with + an interface similar to that of stdio, using the functions that start with + "gz". The gzip format is different from the zlib format. gzip is a gzip + wrapper, documented in RFC 1952, wrapped around a deflate stream. +*/ + +typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ + +/* +ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); + + Opens a gzip (.gz) file for reading or writing. The mode parameter is as + in fopen ("rb" or "wb") but can also include a compression level ("wb9") or + a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only + compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F' + for fixed code compression as in "wb9F". (See the description of + deflateInit2 for more information about the strategy parameter.) 'T' will + request transparent writing or appending with no compression and not using + the gzip format. + + "a" can be used instead of "w" to request that the gzip stream that will + be written be appended to the file. "+" will result in an error, since + reading and writing to the same gzip file is not supported. The addition of + "x" when writing will create the file exclusively, which fails if the file + already exists. On systems that support it, the addition of "e" when + reading or writing will set the flag to close the file on an execve() call. + + These functions, as well as gzip, will read and decode a sequence of gzip + streams in a file. The append function of gzopen() can be used to create + such a file. (Also see gzflush() for another way to do this.) When + appending, gzopen does not test whether the file begins with a gzip stream, + nor does it look for the end of the gzip streams to begin appending. gzopen + will simply append a gzip stream to the existing file. + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. When + reading, this will be detected automatically by looking for the magic two- + byte gzip header. + + gzopen returns NULL if the file could not be opened, if there was + insufficient memory to allocate the gzFile state, or if an invalid mode was + specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). + errno can be checked to determine if the reason gzopen failed was that the + file could not be opened. +*/ + +ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); +/* + gzdopen associates a gzFile with the file descriptor fd. File descriptors + are obtained from calls like open, dup, creat, pipe or fileno (if the file + has been previously opened with fopen). The mode parameter is as in gzopen. + + The next call of gzclose on the returned gzFile will also close the file + descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor + fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, + mode);. The duplicated descriptor should be saved to avoid a leak, since + gzdopen does not close fd if it fails. If you are using fileno() to get the + file descriptor from a FILE *, then you will have to use dup() to avoid + double-close()ing the file descriptor. Both gzclose() and fclose() will + close the associated file descriptor, so they need to have different file + descriptors. + + gzdopen returns NULL if there was insufficient memory to allocate the + gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not + provided, or '+' was provided), or if fd is -1. The file descriptor is not + used until the next gz* read, write, seek, or close operation, so gzdopen + will not detect if fd is invalid (unless fd is -1). +*/ + +ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size)); +/* + Set the internal buffer size used by this library's functions. The + default buffer size is 8192 bytes. This function must be called after + gzopen() or gzdopen(), and before any other calls that read or write the + file. The buffer memory allocation is always deferred to the first read or + write. Two buffers are allocated, either both of the specified size when + writing, or one of the specified size and the other twice that size when + reading. A larger buffer size of, for example, 64K or 128K bytes will + noticeably increase the speed of decompression (reading). + + The new buffer size also affects the maximum length for gzprintf(). + + gzbuffer() returns 0 on success, or -1 on failure, such as being called + too late. +*/ + +ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); +/* + Dynamically update the compression level or strategy. See the description + of deflateInit2 for the meaning of these parameters. + + gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not + opened for writing. +*/ + +ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); +/* + Reads the given number of uncompressed bytes from the compressed file. If + the input file is not in gzip format, gzread copies the given number of + bytes into the buffer directly from the file. + + After reaching the end of a gzip stream in the input, gzread will continue + to read, looking for another gzip stream. Any number of gzip streams may be + concatenated in the input file, and will all be decompressed by gzread(). + If something other than a gzip stream is encountered after a gzip stream, + that remaining trailing garbage is ignored (and no error is returned). + + gzread can be used to read a gzip file that is being concurrently written. + Upon reaching the end of the input, gzread will return with the available + data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then + gzclearerr can be used to clear the end of file indicator in order to permit + gzread to be tried again. Z_OK indicates that a gzip stream was completed + on the last gzread. Z_BUF_ERROR indicates that the input file ended in the + middle of a gzip stream. Note that gzread does not return -1 in the event + of an incomplete gzip stream. This error is deferred until gzclose(), which + will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip + stream. Alternatively, gzerror can be used before gzclose to detect this + case. + + gzread returns the number of uncompressed bytes actually read, less than + len for end of file, or -1 for error. +*/ + +ZEXTERN int ZEXPORT gzwrite OF((gzFile file, + voidpc buf, unsigned len)); +/* + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of uncompressed bytes written or 0 in case of + error. +*/ + +ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...)); +/* + Converts, formats, and writes the arguments to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written, or 0 in case of error. The number of + uncompressed bytes written is limited to 8191, or one less than the buffer + size given to gzbuffer(). The caller should assure that this limit is not + exceeded. If it is exceeded, then gzprintf() will return an error (0) with + nothing written. In this case, there may also be a buffer overflow with + unpredictable consequences, which is possible only if zlib was compiled with + the insecure functions sprintf() or vsprintf() because the secure snprintf() + or vsnprintf() functions were not available. This can be determined using + zlibCompileFlags(). +*/ + +ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); +/* + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); +/* + Reads bytes from the compressed file until len-1 characters are read, or a + newline character is read and transferred to buf, or an end-of-file + condition is encountered. If any characters are read or if len == 1, the + string is terminated with a null character. If no characters are read due + to an end-of-file or len < 1, then the buffer is left untouched. + + gzgets returns buf which is a null-terminated string, or it returns NULL + for end-of-file or in case of error. If there was an error, the contents at + buf are indeterminate. +*/ + +ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); +/* + Writes c, converted to an unsigned char, into the compressed file. gzputc + returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); +/* + Reads one byte from the compressed file. gzgetc returns this byte or -1 + in case of end of file or error. This is implemented as a macro for speed. + As such, it does not do all of the checking the other functions do. I.e. + it does not check to see if file is NULL, nor whether the structure file + points to has been clobbered or not. +*/ + +ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); +/* + Push one character back onto the stream to be read as the first character + on the next read. At least one character of push-back is allowed. + gzungetc() returns the character pushed, or -1 on failure. gzungetc() will + fail if c is -1, and may fail if a character has been pushed but not read + yet. If gzungetc is used immediately after gzopen or gzdopen, at least the + output buffer size of pushed characters is allowed. (See gzbuffer above.) + The pushed character will be discarded if the stream is repositioned with + gzseek() or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); +/* + Flushes all pending output into the compressed file. The parameter flush + is as in the deflate() function. The return value is the zlib error number + (see function gzerror below). gzflush is only permitted when writing. + + If the flush parameter is Z_FINISH, the remaining data is written and the + gzip stream is completed in the output. If gzwrite() is called again, a new + gzip stream will be started in the output. gzread() is able to read such + concatented gzip streams. + + gzflush should be called only when strictly necessary because it will + degrade compression if called too often. +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, + z_off_t offset, int whence)); + + Sets the starting position for the next gzread or gzwrite on the given + compressed file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); +/* + Rewinds the given file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); + + Returns the starting position for the next gzread or gzwrite on the given + compressed file. This position represents a number of bytes in the + uncompressed data stream, and is zero when starting, even if appending or + reading a gzip stream from the middle of a file using gzdopen(). + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file)); + + Returns the current offset in the file being read or written. This offset + includes the count of bytes that precede the gzip stream, for example when + appending or when using gzdopen() for reading. When reading, the offset + does not include as yet unused buffered input. This information can be used + for a progress indicator. On error, gzoffset() returns -1. +*/ + +ZEXTERN int ZEXPORT gzeof OF((gzFile file)); +/* + Returns true (1) if the end-of-file indicator has been set while reading, + false (0) otherwise. Note that the end-of-file indicator is set only if the + read tried to go past the end of the input, but came up short. Therefore, + just like feof(), gzeof() may return false even if there is no more data to + read, in the event that the last read request was for the exact number of + bytes remaining in the input file. This will happen if the input file size + is an exact multiple of the buffer size. + + If gzeof() returns true, then the read functions will return no more data, + unless the end-of-file indicator is reset by gzclearerr() and the input file + has grown since the previous end of file was detected. +*/ + +ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); +/* + Returns true (1) if file is being copied directly while reading, or false + (0) if file is a gzip stream being decompressed. + + If the input file is empty, gzdirect() will return true, since the input + does not contain a gzip stream. + + If gzdirect() is used immediately after gzopen() or gzdopen() it will + cause buffers to be allocated to allow reading the file to determine if it + is a gzip file. Therefore if gzbuffer() is used, it should be called before + gzdirect(). + + When writing, gzdirect() returns true (1) if transparent writing was + requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: + gzdirect() is not needed when writing. Transparent writing must be + explicitly requested, so the application already knows the answer. When + linking statically, using gzdirect() will include all of the zlib code for + gzip file reading and decompression, which may not be desired.) +*/ + +ZEXTERN int ZEXPORT gzclose OF((gzFile file)); +/* + Flushes all pending output if necessary, closes the compressed file and + deallocates the (de)compression state. Note that once file is closed, you + cannot call gzerror with file, since its structures have been deallocated. + gzclose must not be called more than once on the same file, just as free + must not be called more than once on the same allocation. + + gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a + file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the + last read ended in the middle of a gzip stream, or Z_OK on success. +*/ + +ZEXTERN int ZEXPORT gzclose_r OF((gzFile file)); +ZEXTERN int ZEXPORT gzclose_w OF((gzFile file)); +/* + Same as gzclose(), but gzclose_r() is only for use when reading, and + gzclose_w() is only for use when writing or appending. The advantage to + using these instead of gzclose() is that they avoid linking in zlib + compression or decompression code that is not used when only reading or only + writing respectively. If gzclose() is used, then both compression and + decompression code will be included the application when linking to a static + zlib library. +*/ + +ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); +/* + Returns the error message for the last error which occurred on the given + compressed file. errnum is set to zlib error number. If an error occurred + in the file system and not in the compression library, errnum is set to + Z_ERRNO and the application may consult errno to get the exact error code. + + The application must not modify the returned string. Future calls to + this function may invalidate the previously returned string. If file is + closed, then the string previously returned by gzerror will no longer be + available. + + gzerror() should be used to distinguish errors from end-of-file for those + functions above that do not distinguish those cases in their return values. +*/ + +ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); +/* + Clears the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + +#endif /* !Z_SOLO */ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the compression + library. +*/ + +ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is Z_NULL, this function returns the + required initial value for the checksum. + + An Adler-32 checksum is almost as reliable as a CRC32 but can be computed + much faster. + + Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +/* +ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, + z_off_t len2)); + + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note + that the z_off_t type (like off_t) is a signed integer. If len2 is + negative, the result has no meaning or utility. +*/ + +ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. If buf is Z_NULL, this function returns the required + initial value for the crc. Pre- and post-conditioning (one's complement) is + performed within this function so it shouldn't be done by the application. + + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); + + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size)); +ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size)); +#define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +#define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +#define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +#define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) + +#ifndef Z_SOLO + +/* gzgetc() macro and its supporting function and exposed data structure. Note + * that the real internal state is much larger than the exposed structure. + * This abbreviated structure exposes just enough for the gzgetc() macro. The + * user should not mess with these exposed elements, since their names or + * behavior could change in the future, perhaps even capriciously. They can + * only be used by the gzgetc() macro. You have been warned. + */ +struct gzFile_s { + unsigned have; + unsigned char *next; + z_off64_t pos; +}; +ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +# define z_gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g)) +#else +# define gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g)) +#endif + +/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or + * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if + * both are true, the application gets the *64 functions, and the regular + * functions are changed to 64 bits) -- in case these are set on systems + * without large file support, _LFS64_LARGEFILE must also be true + */ +#ifdef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); + ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t)); +#endif + +#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) +# ifdef Z_PREFIX_SET +# define z_gzopen z_gzopen64 +# define z_gzseek z_gzseek64 +# define z_gztell z_gztell64 +# define z_gzoffset z_gzoffset64 +# define z_adler32_combine z_adler32_combine64 +# define z_crc32_combine z_crc32_combine64 +# else +# define gzopen gzopen64 +# define gzseek gzseek64 +# define gztell gztell64 +# define gzoffset gzoffset64 +# define adler32_combine adler32_combine64 +# define crc32_combine crc32_combine64 +# endif +# ifndef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); +# endif +#else + ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); +#endif + +#else /* Z_SOLO */ + + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); + +#endif /* !Z_SOLO */ + +/* hack for buggy compilers */ +#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL) + struct internal_state {int dummy;}; +#endif + +/* undocumented functions */ +ZEXTERN const char * ZEXPORT zError OF((int)); +ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp)); +ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table OF((void)); +ZEXTERN int ZEXPORT inflateUndermine OF((z_streamp, int)); +ZEXTERN int ZEXPORT inflateResetKeep OF((z_streamp)); +ZEXTERN int ZEXPORT deflateResetKeep OF((z_streamp)); +#if defined(_WIN32) && !defined(Z_SOLO) +ZEXTERN gzFile ZEXPORT gzopen_w OF((const wchar_t *path, + const char *mode)); +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +ZEXTERN int ZEXPORTVA gzvprintf Z_ARG((gzFile file, + const char *format, + va_list va)); +# endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ZLIB_H */ diff --git a/frontend/libretro.c b/frontend/libretro.c index 9b9149dae..1ad39b515 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -3047,7 +3047,7 @@ static void update_variables(bool in_flight) update_option_visibility(); - if (old_fps != psxGetFps()) + if (in_flight && old_fps != psxGetFps()) { struct retro_system_av_info info; retro_get_system_av_info(&info); diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index f20567b7d..47a3ed4ef 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -163,7 +163,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled", }, -#ifndef _WIN32 +#if 0 // ndef _WIN32 // currently disabled, see USE_READ_THREAD in libpcsxcore/cdriso.c { "pcsx_rearmed_async_cd", "CD Access Method (Restart)", @@ -217,9 +217,9 @@ struct retro_core_option_v2_definition option_defs_us[] = { #endif { "pcsx_rearmed_psxclock", - "PSX CPU Clock Speed", + "PSX CPU Clock Speed (%)", NULL, - "Overclock or under-clock the PSX CPU. Try adjusting this if the game is too slow, too fast or hangs." + "Overclock or under-clock the PSX CPU. The value has to be lower than 100 because of some slowdowns (cache misses, hw access penalties, etc.) that are not emulated. Try adjusting this if the game is too slow, too fast or hangs." #if defined(HAVE_PRE_ARMV7) && !defined(_3DS) " Default is 50." #else diff --git a/frontend/main.c b/frontend/main.c index 82e670e99..4c051e2c7 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -31,7 +31,9 @@ #include "arm_features.h" #include "revision.h" -#if defined(__has_builtin) +#if defined(__EMSCRIPTEN__) +#define DO_CPU_CHECKS 0 +#elif defined(__has_builtin) #define DO_CPU_CHECKS __has_builtin(__builtin_cpu_init) #elif defined(__x86_64__) || defined(__i386__) #define DO_CPU_CHECKS 1 diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index 71fcd3fc0..c7ca247e2 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -73,14 +73,18 @@ struct rearmed_cbs { // gpu options int frameskip; int fskip_advice; + int fskip_force; + int fskip_dirty; unsigned int *gpu_frame_count; unsigned int *gpu_hcnt; unsigned int flip_cnt; // increment manually if not using pl_vout_flip unsigned int only_16bpp; // platform is 16bpp-only + unsigned int thread_rendering; struct { int allow_interlace; // 0 off, 1 on, 2 guess int enhancement_enable; int enhancement_no_main; + int allow_dithering; int enhancement_tex_adj; } gpu_neon; struct { diff --git a/frontend/switch/sys/mman.h b/frontend/switch/sys/mman.h new file mode 100644 index 000000000..2e084a64e --- /dev/null +++ b/frontend/switch/sys/mman.h @@ -0,0 +1,60 @@ +#ifndef MMAN_H +#define MMAN_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define PROT_READ 0b001 +#define PROT_WRITE 0b010 +#define PROT_EXEC 0b100 +#define MAP_PRIVATE 2 +#define MAP_FIXED 0x10 +#define MAP_ANONYMOUS 0x20 + +#define MAP_FAILED ((void *)-1) + +#define ALIGNMENT 0x1000 + +static inline void *mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) +{ + (void)fd; + (void)offset; + + // match Linux behavior + len = (len + ALIGNMENT - 1) & ~(ALIGNMENT - 1); + + Result rc = svcMapPhysicalMemory(addr, len); + if (R_FAILED(rc)) + { + //printf("mmap failed\n"); + addr = aligned_alloc(ALIGNMENT, len); + } + if (!addr) + return MAP_FAILED; + memset(addr, 0, len); + return addr; +} + +static inline int munmap(void *addr, size_t len) +{ + len = (len + ALIGNMENT - 1) & ~(ALIGNMENT - 1); + Result rc = svcUnmapPhysicalMemory(addr, len); + if (R_FAILED(rc)) + { + //printf("munmap failed\n"); + free(addr); + } + return 0; +} + +#ifdef __cplusplus +}; +#endif + +#endif // MMAN_H + diff --git a/frontend/switch/zconf.h b/frontend/switch/zconf.h new file mode 100644 index 000000000..996fff292 --- /dev/null +++ b/frontend/switch/zconf.h @@ -0,0 +1,511 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2013 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + * Even better than compiling with -DZ_PREFIX would be to use configure to set + * this permanently in zconf.h using "./configure --zprefix". + */ +#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ +# define Z_PREFIX_SET + +/* all linked symbols */ +# define _dist_code z__dist_code +# define _length_code z__length_code +# define _tr_align z__tr_align +# define _tr_flush_bits z__tr_flush_bits +# define _tr_flush_block z__tr_flush_block +# define _tr_init z__tr_init +# define _tr_stored_block z__tr_stored_block +# define _tr_tally z__tr_tally +# define adler32 z_adler32 +# define adler32_combine z_adler32_combine +# define adler32_combine64 z_adler32_combine64 +# ifndef Z_SOLO +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# endif +# define crc32 z_crc32 +# define crc32_combine z_crc32_combine +# define crc32_combine64 z_crc32_combine64 +# define deflate z_deflate +# define deflateBound z_deflateBound +# define deflateCopy z_deflateCopy +# define deflateEnd z_deflateEnd +# define deflateInit2_ z_deflateInit2_ +# define deflateInit_ z_deflateInit_ +# define deflateParams z_deflateParams +# define deflatePending z_deflatePending +# define deflatePrime z_deflatePrime +# define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep +# define deflateSetDictionary z_deflateSetDictionary +# define deflateSetHeader z_deflateSetHeader +# define deflateTune z_deflateTune +# define deflate_copyright z_deflate_copyright +# define get_crc_table z_get_crc_table +# ifndef Z_SOLO +# define gz_error z_gz_error +# define gz_intmax z_gz_intmax +# define gz_strwinerror z_gz_strwinerror +# define gzbuffer z_gzbuffer +# define gzclearerr z_gzclearerr +# define gzclose z_gzclose +# define gzclose_r z_gzclose_r +# define gzclose_w z_gzclose_w +# define gzdirect z_gzdirect +# define gzdopen z_gzdopen +# define gzeof z_gzeof +# define gzerror z_gzerror +# define gzflush z_gzflush +# define gzgetc z_gzgetc +# define gzgetc_ z_gzgetc_ +# define gzgets z_gzgets +# define gzoffset z_gzoffset +# define gzoffset64 z_gzoffset64 +# define gzopen z_gzopen +# define gzopen64 z_gzopen64 +# ifdef _WIN32 +# define gzopen_w z_gzopen_w +# endif +# define gzprintf z_gzprintf +# define gzvprintf z_gzvprintf +# define gzputc z_gzputc +# define gzputs z_gzputs +# define gzread z_gzread +# define gzrewind z_gzrewind +# define gzseek z_gzseek +# define gzseek64 z_gzseek64 +# define gzsetparams z_gzsetparams +# define gztell z_gztell +# define gztell64 z_gztell64 +# define gzungetc z_gzungetc +# define gzwrite z_gzwrite +# endif +# define inflate z_inflate +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define inflateBackInit_ z_inflateBackInit_ +# define inflateCopy z_inflateCopy +# define inflateEnd z_inflateEnd +# define inflateGetHeader z_inflateGetHeader +# define inflateInit2_ z_inflateInit2_ +# define inflateInit_ z_inflateInit_ +# define inflateMark z_inflateMark +# define inflatePrime z_inflatePrime +# define inflateReset z_inflateReset +# define inflateReset2 z_inflateReset2 +# define inflateSetDictionary z_inflateSetDictionary +# define inflateGetDictionary z_inflateGetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateUndermine z_inflateUndermine +# define inflateResetKeep z_inflateResetKeep +# define inflate_copyright z_inflate_copyright +# define inflate_fast z_inflate_fast +# define inflate_table z_inflate_table +# ifndef Z_SOLO +# define uncompress z_uncompress +# endif +# define zError z_zError +# ifndef Z_SOLO +# define zcalloc z_zcalloc +# define zcfree z_zcfree +# endif +# define zlibCompileFlags z_zlibCompileFlags +# define zlibVersion z_zlibVersion + +/* all zlib typedefs in zlib.h and zconf.h */ +# define Byte z_Byte +# define Bytef z_Bytef +# define alloc_func z_alloc_func +# define charf z_charf +# define free_func z_free_func +# ifndef Z_SOLO +# define gzFile z_gzFile +# endif +# define gz_header z_gz_header +# define gz_headerp z_gz_headerp +# define in_func z_in_func +# define intf z_intf +# define out_func z_out_func +# define uInt z_uInt +# define uIntf z_uIntf +# define uLong z_uLong +# define uLongf z_uLongf +# define voidp z_voidp +# define voidpc z_voidpc +# define voidpf z_voidpf + +/* all zlib structs in zlib.h and zconf.h */ +# define gz_header_s z_gz_header_s +# define internal_state z_internal_state + +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +#if defined(ZLIB_CONST) && !defined(z_const) +# define z_const const +#else +# define z_const +#endif + +/* Some Mac compilers merge all .h files incorrectly: */ +#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__) +# define NO_DUMMY_DECL +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus a few kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +#ifndef Z_ARG /* function prototypes for stdarg */ +# if defined(STDC) || defined(Z_HAVE_STDARG_H) +# define Z_ARG(args) args +# else +# define Z_ARG(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC) +# include +# if (UINT_MAX == 0xffffffffUL) +# define Z_U4 unsigned +# elif (ULONG_MAX == 0xffffffffUL) +# define Z_U4 unsigned long +# elif (USHRT_MAX == 0xffffffffUL) +# define Z_U4 unsigned short +# endif +#endif + +#ifdef Z_U4 + typedef Z_U4 z_crc_t; +#else + typedef unsigned long z_crc_t; +#endif + +#if 1 /* was set to #if 1 by ./configure */ +# define Z_HAVE_UNISTD_H +#endif + +#if 1 /* was set to #if 1 by ./configure */ +# define Z_HAVE_STDARG_H +#endif + +#ifdef STDC +# ifndef Z_SOLO +# include /* for off_t */ +# endif +#endif + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +# include /* for va_list */ +# endif +#endif + +#ifdef _WIN32 +# ifndef Z_SOLO +# include /* for wchar_t */ +# endif +#endif + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H) +# define Z_HAVE_UNISTD_H +#endif +#ifndef Z_SOLO +# if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE) +# include /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +# ifdef VMS +# include /* for off_t */ +# endif +# ifndef z_off_t +# define z_off_t off_t +# endif +# endif +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#if !defined(SEEK_SET) && !defined(Z_SOLO) +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif + +#ifndef z_off_t +# define z_off_t long +#endif + +#if !defined(_WIN32) && defined(Z_LARGE64) +# define z_off64_t off64_t +#else +# if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO) +# define z_off64_t __int64 +# else +# define z_off64_t z_off_t +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) + #pragma map(deflateInit_,"DEIN") + #pragma map(deflateInit2_,"DEIN2") + #pragma map(deflateEnd,"DEEND") + #pragma map(deflateBound,"DEBND") + #pragma map(inflateInit_,"ININ") + #pragma map(inflateInit2_,"ININ2") + #pragma map(inflateEnd,"INEND") + #pragma map(inflateSync,"INSY") + #pragma map(inflateSetDictionary,"INSEDI") + #pragma map(compressBound,"CMBND") + #pragma map(inflate_table,"INTABL") + #pragma map(inflate_fast,"INFA") + #pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ diff --git a/frontend/switch/zlib.h b/frontend/switch/zlib.h new file mode 100644 index 000000000..3e0c7672a --- /dev/null +++ b/frontend/switch/zlib.h @@ -0,0 +1,1768 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.8, April 28th, 2013 + + Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 + (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). +*/ + +#ifndef ZLIB_H +#define ZLIB_H + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.2.8" +#define ZLIB_VERNUM 0x1280 +#define ZLIB_VER_MAJOR 1 +#define ZLIB_VER_MINOR 2 +#define ZLIB_VER_REVISION 8 +#define ZLIB_VER_SUBREVISION 0 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed data. + This version of the library supports only one compression method (deflation) + but other algorithms will be added later and will have the same stream + interface. + + Compression can be done in a single step if the buffers are large enough, + or can be done by repeated calls of the compression function. In the latter + case, the application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip streams in memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never crash + even in case of corrupted input. +*/ + +typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +typedef void (*free_func) OF((voidpf opaque, voidpf address)); + +struct internal_state; + +typedef struct z_stream_s { + z_const Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total number of input bytes read so far */ + + Bytef *next_out; /* next output byte should be put there */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total number of bytes output so far */ + + z_const char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text */ + uLong adler; /* adler32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has dropped + to zero. It must update next_out and avail_out when avail_out has dropped + to zero. The application must initialize zalloc, zfree and opaque before + calling the init function. All other fields are set by the compression + library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this if + the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers + returned by zalloc for objects of exactly 65536 bytes *must* have their + offset normalized to zero. The default allocation function provided by this + library ensures this (see zutil.c). To reduce memory requirements and avoid + any allocation of 64K objects, at the expense of compression ratio, compile + the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or progress + reports. After compression, total_in holds the total size of the + uncompressed data and may be saved for use in the decompressor (particularly + if the decompressor wants to decompress everything in a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +#define Z_TREES 6 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative values + * are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field (though see inflate()) */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +#define zlib_version zlibVersion() +/* for compatibility with versions < 1.0.2 */ + + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion OF((void)); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is not + compatible with the zlib.h header file used by the application. This check + is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. If + zalloc and zfree are set to Z_NULL, deflateInit updates them to use default + allocation functions. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at all + (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION + requests a default compromise between speed and compression (currently + equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if level is not a valid compression level, or + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). msg is set to null + if there is no error message. deflateInit does not perform any compression: + this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary (in interactive applications). Some + output may be provided even if flush is not set. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating avail_in or avail_out accordingly; avail_out should + never be zero before the call. The application can consume the compressed + output when it wants, for example when the output buffer is full (avail_out + == 0), or after each call of deflate(). If deflate returns Z_OK and with + zero avail_out, it must be called again after making room in the output + buffer because there might be more output pending. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumulate before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In + particular avail_in is zero after the call if enough output space has been + provided before the call.) Flushing may degrade compression for some + compression algorithms and so it should be used only when necessary. This + completes the current deflate block and follows it with an empty stored block + that is three bits plus filler bits to the next byte, followed by four bytes + (00 00 ff ff). + + If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the + output buffer, but the output is not aligned to a byte boundary. All of the + input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. + This completes the current deflate block and follows it with an empty fixed + codes block that is 10 bits long. This assures that enough bytes are output + in order for the decompressor to finish the block before the empty fixed code + block. + + If flush is set to Z_BLOCK, a deflate block is completed and emitted, as + for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to + seven bits of the current block are held to be written as the next byte after + the next deflate block is completed. In this case, the decompressor may not + be provided enough bits at this point in order to complete decompression of + the data provided so far to the compressor. It may need to wait for the next + block to be emitted. This is for advanced applications that need to control + the emission of deflate blocks. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six to avoid repeated flush markers due to + avail_out == 0 on return. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there was + enough output space; if deflate returns with Z_OK, this function must be + called again with Z_FINISH and more output space (updated avail_out) but no + more input data, until it returns with Z_STREAM_END or an error. After + deflate has returned Z_STREAM_END, the only possible operations on the stream + are deflateReset or deflateEnd. + + Z_FINISH can be used immediately after deflateInit if all the compression + is to be done in a single step. In this case, avail_out must be at least the + value returned by deflateBound (see below). Then deflate is guaranteed to + return Z_STREAM_END. If not enough output space is provided, deflate will + not return Z_STREAM_END, and it must be called again as described above. + + deflate() sets strm->adler to the adler32 checksum of all input read + so far (that is, total_in bytes). + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered + binary. This field is only for information purposes and does not affect the + compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was Z_NULL), Z_BUF_ERROR if no progress is possible + (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not + fatal, and deflate() can be called again with more input and more output + space to continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, msg + may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. If next_in is not Z_NULL and avail_in is large enough (the + exact value depends on the compression method), inflateInit determines the + compression method from the zlib header and allocates all data structures + accordingly; otherwise the allocation will be deferred to the first call of + inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to + use default allocation functions. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit() does not process any header information -- that is deferred + until inflate() is called. +*/ + + +ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in is updated and processing will + resume at this point for the next call of inflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there is + no more input data or no more space in the output buffer (see below about + the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating the next_* and avail_* values accordingly. The + application can consume the uncompressed output when it wants, for example + when the output buffer is full (avail_out == 0), or after each call of + inflate(). If inflate returns Z_OK and with zero avail_out, it must be + called again after making room in the output buffer because there might be + more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, + Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() + stop if and when it gets to the next deflate block boundary. When decoding + the zlib or gzip format, this will cause inflate() to return immediately + after the header and before the first block. When doing a raw inflate, + inflate() will go ahead and process the first block, and will return when it + gets to the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + Also to assist in this, on return inflate() will set strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 if + inflate() is currently decoding the last block in the deflate stream, plus + 128 if inflate() returned immediately after decoding an end-of-block code or + decoding the complete header up to just before the first byte of the deflate + stream. The end-of-block will not be indicated until all of the uncompressed + data from that block has been written to strm->next_out. The number of + unused bits may in general be greater than seven, except when bit 7 of + data_type is set, in which case the number of unused bits will be less than + eight. data_type is set as noted here every time inflate() returns for all + flush options, and so can be used to determine the amount of currently + consumed input in bits. + + The Z_TREES option behaves as Z_BLOCK does, but it also returns when the + end of each deflate block header is reached, before any actual data in that + block is decoded. This allows the caller to determine the length of the + deflate block header for later use in random access within a deflate block. + 256 is added to the value of strm->data_type when inflate() returns + immediately after reaching the end of the deflate block header. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step (a + single call of inflate), the parameter flush should be set to Z_FINISH. In + this case all pending input is processed and all pending output is flushed; + avail_out must be large enough to hold all of the uncompressed data for the + operation to complete. (The size of the uncompressed data may have been + saved by the compressor for this purpose.) The use of Z_FINISH is not + required to perform an inflation in one step. However it may be used to + inform inflate that a faster approach can be used for the single inflate() + call. Z_FINISH also informs inflate to not maintain a sliding window if the + stream completes, which reduces inflate's memory footprint. If the stream + does not complete, either because not all of the stream is provided or not + enough output space is provided, then a sliding window will be allocated and + inflate() can be called again to continue the operation as if Z_NO_FLUSH had + been used. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the effects of the flush parameter in this implementation are + on the return value of inflate() as noted below, when inflate() returns early + when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of + memory for a sliding window when Z_FINISH is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the Adler-32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the Adler-32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed adler32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() can decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically, if requested when + initializing with inflateInit2(). Any information contained in the gzip + header is not retained, so applications that need that information should + instead use raw inflate, see inflateInit2() below, or inflateBack() and + perform their own processing of the gzip header and trailer. When processing + gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output + producted so far. The CRC-32 is checked against the gzip trailer. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value), Z_STREAM_ERROR if the stream structure was inconsistent (for example + next_in or next_out was Z_NULL), Z_MEM_ERROR if there was not enough memory, + Z_BUF_ERROR if no progress is possible or if there was not enough room in the + output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may + then call inflateSync() to look for a good compression block if a partial + recovery of the data is desired. +*/ + + +ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state + was inconsistent. In the error case, msg may be set but then points to a + static string (which must not be deallocated). +*/ + + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy)); + + This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by the + caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute an adler32 check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), no + header crc, and the operating system will be set to 255 (unknown). If a + gzip stream is being written, strm->adler is a crc32 instead of an adler32. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but is + slow and reduces compression ratio; memLevel=9 uses maximum memory for + optimal speed. The default value is 8. See zconf.h for total memory usage + as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as + fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The + strategy parameter only affects the compression ratio but not the + correctness of the compressed output even if it is not set appropriately. + Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler + decoder for special applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid + method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is + incompatible with the version assumed by the caller (ZLIB_VERSION). msg is + set to null if there is no error message. deflateInit2 does not perform any + compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size + provided in deflateInit or deflateInit2. Thus the strings most likely to be + useful should be put at the end of the dictionary, not at the front. In + addition, the current implementation of deflate will use at most the window + size minus 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the adler32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The adler32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + adler32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and can + consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); +/* + This function is equivalent to deflateEnd followed by deflateInit, + but does not free and reallocate all the internal compression state. The + stream will keep the same compression level and any other attributes that + may have been set by deflateInit2. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, + int level, + int strategy)); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2. This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different strategy. + If the compression level is changed, the input available so far is + compressed with the old level (and may be flushed); the new level will take + effect only at the next call of deflate(). + + Before the call of deflateParams, the stream state must be set as for + a call of deflate(), since the currently available input may have to be + compressed and flushed. In particular, strm->avail_out must be non-zero. + + deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source + stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR if + strm->avail_out was zero. +*/ + +ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain)); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, + uLong sourceLen)); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() or + deflateInit2(), and after deflateSetHeader(), if used. This would be used + to allocate an output buffer for deflation in a single pass, and so would be + called before deflate(). If that first deflate() call is provided the + sourceLen input bytes, an output buffer allocated to the size returned by + deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed + to return Z_STREAM_END. Note that it is possible for the compressed size to + be larger than the value returned by deflateBound() if flush options other + than Z_FINISH or Z_NO_FLUSH are used. +*/ + +ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm, + unsigned *pending, + int *bits)); +/* + deflatePending() returns the number of bytes and bits of output that have + been generated, but not yet provided in the available output. The bytes not + provided would be due to the available output space having being consumed. + The number of bits of output not provided are between 0 and 7, where they + await more bits to join them in order to fill out a full byte. If pending + or bits are Z_NULL, then those values are not set. + + deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. + */ + +ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the bits + leftover from a previous deflate stream when appending to it. As such, this + function can only be used for raw deflate, and must be used before the first + deflate() call after a deflateInit2() or deflateReset(). bits must be less + than or equal to 16, and that many of the least significant bits of value + will be inserted in the output. + + deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough + room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, + gz_headerp head)); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to 255, with no extra, name, or comment + fields. The gzip header is returned to the default state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, + int windowBits)); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be zero to request that inflate use the window size in + the zlib header of the compressed stream. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an adler32 or a crc32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a + crc32 instead of an adler32. + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit2 does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit2() does not process any header information -- that is + deferred until inflate() is called. +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the adler32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect adler32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm, + Bytef *dictionary, + uInt *dictLength)); +/* + Returns the sliding dictionary being maintained by inflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If inflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similary, if dictLength is Z_NULL, then it is not set. + + inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); +/* + Skips invalid compressed data until a possible full flush point (see above + for the description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync searches for a 00 00 FF FF pattern in the compressed data. + All full flush points have this pattern, but not all occurrences of this + pattern are full flush points. + + inflateSync returns Z_OK if a possible full flush point has been found, + Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point + has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. + In the success case, the application may save the current current value of + total_in which indicates where valid compressed data was found. In the + error case, the application may repeatedly call inflateSync, providing more + input each time, until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate all the internal decompression state. The + stream will keep attributes that may have been set by inflateInit2. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm, + int windowBits)); +/* + This function is the same as inflateReset, but it also permits changing + the wrap and window size requests. The windowBits parameter is interpreted + the same as it is for inflateInit2. + + inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL), or if + the windowBits parameter is invalid. +*/ + +ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + If bits is negative, then the input stream bit buffer is emptied. Then + inflatePrime() can be called again to put bits in the buffer. This is used + to clear out bits leftover after feeding inflate a block description prior + to feeding inflate codes. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm)); +/* + This function returns two values, one in the lower 16 bits of the return + value, and the other in the remaining upper bits, obtained by shifting the + return value down 16 bits. If the upper value is -1 and the lower value is + zero, then inflate() is currently decoding information outside of a block. + If the upper value is -1 and the lower value is non-zero, then inflate is in + the middle of a stored block, with the lower value equaling the number of + bytes from the input remaining to copy. If the upper value is not -1, then + it is the number of bits back from the current bit position in the input of + the code (literal or length/distance pair) currently being processed. In + that case the lower value is the number of bytes already emitted for that + code. + + A code is being processed if inflate is waiting for more input to complete + decoding of the code, or if it has completed decoding but is waiting for + more output space to write the literal or match data. + + inflateMark() is used to mark locations in the input data for random + access, which may be at bit positions, and to note those cases where the + output of a code may span boundaries of random access blocks. The current + location in the input stream can be determined from avail_in and data_type + as noted in the description for the Z_BLOCK flush parameter for inflate. + + inflateMark returns the value noted above or -1 << 16 if the provided + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm, + gz_headerp head)); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be + used to force inflate() to return immediately after header processing is + complete and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When any + of extra, name, or comment are not Z_NULL and the respective field is not + present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, + unsigned char FAR *window)); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the parameters are invalid, Z_MEM_ERROR if the internal state could not be + allocated, or Z_VERSION_ERROR if the version of the library does not match + the version of the header file. +*/ + +typedef unsigned (*in_func) OF((void FAR *, + z_const unsigned char FAR * FAR *)); +typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); + +ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc)); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is potentially more efficient than + inflate() for file i/o applications, in that it avoids copying between the + output and the sliding window by simply making the window itself the output + buffer. inflate() can be faster on modern CPUs when used with large + buffers. inflateBack() trusts the application to not change the output + buffer passed by the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free the + allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects only + the raw deflate stream to decompress. This is different from the normal + behavior of inflate(), which expects either a zlib or gzip header and + trailer around the deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero--buf is ignored in that + case--and inflateBack() will return a buffer error. inflateBack() will call + out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. out() + should return zero on success, or non-zero on failure. If out() returns + non-zero, inflateBack() will return with an error. Neither in() nor out() + are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format error + in the deflate stream (in which case strm->msg is set to indicate the nature + of the error), or Z_STREAM_ERROR if the stream was not properly initialized. + In the case of Z_BUF_ERROR, an input or output error can be distinguished + using strm->next_in which will be Z_NULL only if in() returned an error. If + strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning + non-zero. (in() will always be called before out(), so strm->next_in is + assured to be defined if out() returns non-zero.) Note that inflateBack() + cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void)); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + +#ifndef Z_SOLO + + /* utility functions */ + +/* + The following utility functions are implemented on top of the basic + stream-oriented functions. To simplify the interface, some default options + are assumed (compression level and memory usage, standard memory allocation + functions). The source code of these utility functions can be modified if + you need special options. +*/ + +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed buffer. + + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level)); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen)); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before a + compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, destLen + is the actual size of the uncompressed buffer. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In + the case where there is not enough room, uncompress() will fill the output + buffer with the uncompressed data up to that point. +*/ + + /* gzip file access functions */ + +/* + This library supports reading and writing files in gzip (.gz) format with + an interface similar to that of stdio, using the functions that start with + "gz". The gzip format is different from the zlib format. gzip is a gzip + wrapper, documented in RFC 1952, wrapped around a deflate stream. +*/ + +typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ + +/* +ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); + + Opens a gzip (.gz) file for reading or writing. The mode parameter is as + in fopen ("rb" or "wb") but can also include a compression level ("wb9") or + a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only + compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F' + for fixed code compression as in "wb9F". (See the description of + deflateInit2 for more information about the strategy parameter.) 'T' will + request transparent writing or appending with no compression and not using + the gzip format. + + "a" can be used instead of "w" to request that the gzip stream that will + be written be appended to the file. "+" will result in an error, since + reading and writing to the same gzip file is not supported. The addition of + "x" when writing will create the file exclusively, which fails if the file + already exists. On systems that support it, the addition of "e" when + reading or writing will set the flag to close the file on an execve() call. + + These functions, as well as gzip, will read and decode a sequence of gzip + streams in a file. The append function of gzopen() can be used to create + such a file. (Also see gzflush() for another way to do this.) When + appending, gzopen does not test whether the file begins with a gzip stream, + nor does it look for the end of the gzip streams to begin appending. gzopen + will simply append a gzip stream to the existing file. + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. When + reading, this will be detected automatically by looking for the magic two- + byte gzip header. + + gzopen returns NULL if the file could not be opened, if there was + insufficient memory to allocate the gzFile state, or if an invalid mode was + specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). + errno can be checked to determine if the reason gzopen failed was that the + file could not be opened. +*/ + +ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); +/* + gzdopen associates a gzFile with the file descriptor fd. File descriptors + are obtained from calls like open, dup, creat, pipe or fileno (if the file + has been previously opened with fopen). The mode parameter is as in gzopen. + + The next call of gzclose on the returned gzFile will also close the file + descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor + fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, + mode);. The duplicated descriptor should be saved to avoid a leak, since + gzdopen does not close fd if it fails. If you are using fileno() to get the + file descriptor from a FILE *, then you will have to use dup() to avoid + double-close()ing the file descriptor. Both gzclose() and fclose() will + close the associated file descriptor, so they need to have different file + descriptors. + + gzdopen returns NULL if there was insufficient memory to allocate the + gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not + provided, or '+' was provided), or if fd is -1. The file descriptor is not + used until the next gz* read, write, seek, or close operation, so gzdopen + will not detect if fd is invalid (unless fd is -1). +*/ + +ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size)); +/* + Set the internal buffer size used by this library's functions. The + default buffer size is 8192 bytes. This function must be called after + gzopen() or gzdopen(), and before any other calls that read or write the + file. The buffer memory allocation is always deferred to the first read or + write. Two buffers are allocated, either both of the specified size when + writing, or one of the specified size and the other twice that size when + reading. A larger buffer size of, for example, 64K or 128K bytes will + noticeably increase the speed of decompression (reading). + + The new buffer size also affects the maximum length for gzprintf(). + + gzbuffer() returns 0 on success, or -1 on failure, such as being called + too late. +*/ + +ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); +/* + Dynamically update the compression level or strategy. See the description + of deflateInit2 for the meaning of these parameters. + + gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not + opened for writing. +*/ + +ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); +/* + Reads the given number of uncompressed bytes from the compressed file. If + the input file is not in gzip format, gzread copies the given number of + bytes into the buffer directly from the file. + + After reaching the end of a gzip stream in the input, gzread will continue + to read, looking for another gzip stream. Any number of gzip streams may be + concatenated in the input file, and will all be decompressed by gzread(). + If something other than a gzip stream is encountered after a gzip stream, + that remaining trailing garbage is ignored (and no error is returned). + + gzread can be used to read a gzip file that is being concurrently written. + Upon reaching the end of the input, gzread will return with the available + data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then + gzclearerr can be used to clear the end of file indicator in order to permit + gzread to be tried again. Z_OK indicates that a gzip stream was completed + on the last gzread. Z_BUF_ERROR indicates that the input file ended in the + middle of a gzip stream. Note that gzread does not return -1 in the event + of an incomplete gzip stream. This error is deferred until gzclose(), which + will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip + stream. Alternatively, gzerror can be used before gzclose to detect this + case. + + gzread returns the number of uncompressed bytes actually read, less than + len for end of file, or -1 for error. +*/ + +ZEXTERN int ZEXPORT gzwrite OF((gzFile file, + voidpc buf, unsigned len)); +/* + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of uncompressed bytes written or 0 in case of + error. +*/ + +ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...)); +/* + Converts, formats, and writes the arguments to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written, or 0 in case of error. The number of + uncompressed bytes written is limited to 8191, or one less than the buffer + size given to gzbuffer(). The caller should assure that this limit is not + exceeded. If it is exceeded, then gzprintf() will return an error (0) with + nothing written. In this case, there may also be a buffer overflow with + unpredictable consequences, which is possible only if zlib was compiled with + the insecure functions sprintf() or vsprintf() because the secure snprintf() + or vsnprintf() functions were not available. This can be determined using + zlibCompileFlags(). +*/ + +ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); +/* + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); +/* + Reads bytes from the compressed file until len-1 characters are read, or a + newline character is read and transferred to buf, or an end-of-file + condition is encountered. If any characters are read or if len == 1, the + string is terminated with a null character. If no characters are read due + to an end-of-file or len < 1, then the buffer is left untouched. + + gzgets returns buf which is a null-terminated string, or it returns NULL + for end-of-file or in case of error. If there was an error, the contents at + buf are indeterminate. +*/ + +ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); +/* + Writes c, converted to an unsigned char, into the compressed file. gzputc + returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); +/* + Reads one byte from the compressed file. gzgetc returns this byte or -1 + in case of end of file or error. This is implemented as a macro for speed. + As such, it does not do all of the checking the other functions do. I.e. + it does not check to see if file is NULL, nor whether the structure file + points to has been clobbered or not. +*/ + +ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); +/* + Push one character back onto the stream to be read as the first character + on the next read. At least one character of push-back is allowed. + gzungetc() returns the character pushed, or -1 on failure. gzungetc() will + fail if c is -1, and may fail if a character has been pushed but not read + yet. If gzungetc is used immediately after gzopen or gzdopen, at least the + output buffer size of pushed characters is allowed. (See gzbuffer above.) + The pushed character will be discarded if the stream is repositioned with + gzseek() or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); +/* + Flushes all pending output into the compressed file. The parameter flush + is as in the deflate() function. The return value is the zlib error number + (see function gzerror below). gzflush is only permitted when writing. + + If the flush parameter is Z_FINISH, the remaining data is written and the + gzip stream is completed in the output. If gzwrite() is called again, a new + gzip stream will be started in the output. gzread() is able to read such + concatented gzip streams. + + gzflush should be called only when strictly necessary because it will + degrade compression if called too often. +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, + z_off_t offset, int whence)); + + Sets the starting position for the next gzread or gzwrite on the given + compressed file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); +/* + Rewinds the given file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); + + Returns the starting position for the next gzread or gzwrite on the given + compressed file. This position represents a number of bytes in the + uncompressed data stream, and is zero when starting, even if appending or + reading a gzip stream from the middle of a file using gzdopen(). + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file)); + + Returns the current offset in the file being read or written. This offset + includes the count of bytes that precede the gzip stream, for example when + appending or when using gzdopen() for reading. When reading, the offset + does not include as yet unused buffered input. This information can be used + for a progress indicator. On error, gzoffset() returns -1. +*/ + +ZEXTERN int ZEXPORT gzeof OF((gzFile file)); +/* + Returns true (1) if the end-of-file indicator has been set while reading, + false (0) otherwise. Note that the end-of-file indicator is set only if the + read tried to go past the end of the input, but came up short. Therefore, + just like feof(), gzeof() may return false even if there is no more data to + read, in the event that the last read request was for the exact number of + bytes remaining in the input file. This will happen if the input file size + is an exact multiple of the buffer size. + + If gzeof() returns true, then the read functions will return no more data, + unless the end-of-file indicator is reset by gzclearerr() and the input file + has grown since the previous end of file was detected. +*/ + +ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); +/* + Returns true (1) if file is being copied directly while reading, or false + (0) if file is a gzip stream being decompressed. + + If the input file is empty, gzdirect() will return true, since the input + does not contain a gzip stream. + + If gzdirect() is used immediately after gzopen() or gzdopen() it will + cause buffers to be allocated to allow reading the file to determine if it + is a gzip file. Therefore if gzbuffer() is used, it should be called before + gzdirect(). + + When writing, gzdirect() returns true (1) if transparent writing was + requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: + gzdirect() is not needed when writing. Transparent writing must be + explicitly requested, so the application already knows the answer. When + linking statically, using gzdirect() will include all of the zlib code for + gzip file reading and decompression, which may not be desired.) +*/ + +ZEXTERN int ZEXPORT gzclose OF((gzFile file)); +/* + Flushes all pending output if necessary, closes the compressed file and + deallocates the (de)compression state. Note that once file is closed, you + cannot call gzerror with file, since its structures have been deallocated. + gzclose must not be called more than once on the same file, just as free + must not be called more than once on the same allocation. + + gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a + file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the + last read ended in the middle of a gzip stream, or Z_OK on success. +*/ + +ZEXTERN int ZEXPORT gzclose_r OF((gzFile file)); +ZEXTERN int ZEXPORT gzclose_w OF((gzFile file)); +/* + Same as gzclose(), but gzclose_r() is only for use when reading, and + gzclose_w() is only for use when writing or appending. The advantage to + using these instead of gzclose() is that they avoid linking in zlib + compression or decompression code that is not used when only reading or only + writing respectively. If gzclose() is used, then both compression and + decompression code will be included the application when linking to a static + zlib library. +*/ + +ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); +/* + Returns the error message for the last error which occurred on the given + compressed file. errnum is set to zlib error number. If an error occurred + in the file system and not in the compression library, errnum is set to + Z_ERRNO and the application may consult errno to get the exact error code. + + The application must not modify the returned string. Future calls to + this function may invalidate the previously returned string. If file is + closed, then the string previously returned by gzerror will no longer be + available. + + gzerror() should be used to distinguish errors from end-of-file for those + functions above that do not distinguish those cases in their return values. +*/ + +ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); +/* + Clears the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + +#endif /* !Z_SOLO */ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the compression + library. +*/ + +ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is Z_NULL, this function returns the + required initial value for the checksum. + + An Adler-32 checksum is almost as reliable as a CRC32 but can be computed + much faster. + + Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +/* +ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, + z_off_t len2)); + + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note + that the z_off_t type (like off_t) is a signed integer. If len2 is + negative, the result has no meaning or utility. +*/ + +ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. If buf is Z_NULL, this function returns the required + initial value for the crc. Pre- and post-conditioning (one's complement) is + performed within this function so it shouldn't be done by the application. + + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); + + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size)); +ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size)); +#define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +#define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +#define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +#define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) + +#ifndef Z_SOLO + +/* gzgetc() macro and its supporting function and exposed data structure. Note + * that the real internal state is much larger than the exposed structure. + * This abbreviated structure exposes just enough for the gzgetc() macro. The + * user should not mess with these exposed elements, since their names or + * behavior could change in the future, perhaps even capriciously. They can + * only be used by the gzgetc() macro. You have been warned. + */ +struct gzFile_s { + unsigned have; + unsigned char *next; + z_off64_t pos; +}; +ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +# define z_gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g)) +#else +# define gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g)) +#endif + +/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or + * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if + * both are true, the application gets the *64 functions, and the regular + * functions are changed to 64 bits) -- in case these are set on systems + * without large file support, _LFS64_LARGEFILE must also be true + */ +#ifdef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); + ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t)); +#endif + +#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) +# ifdef Z_PREFIX_SET +# define z_gzopen z_gzopen64 +# define z_gzseek z_gzseek64 +# define z_gztell z_gztell64 +# define z_gzoffset z_gzoffset64 +# define z_adler32_combine z_adler32_combine64 +# define z_crc32_combine z_crc32_combine64 +# else +# define gzopen gzopen64 +# define gzseek gzseek64 +# define gztell gztell64 +# define gzoffset gzoffset64 +# define adler32_combine adler32_combine64 +# define crc32_combine crc32_combine64 +# endif +# ifndef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); +# endif +#else + ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); +#endif + +#else /* Z_SOLO */ + + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); + +#endif /* !Z_SOLO */ + +/* hack for buggy compilers */ +#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL) + struct internal_state {int dummy;}; +#endif + +/* undocumented functions */ +ZEXTERN const char * ZEXPORT zError OF((int)); +ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp)); +ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table OF((void)); +ZEXTERN int ZEXPORT inflateUndermine OF((z_streamp, int)); +ZEXTERN int ZEXPORT inflateResetKeep OF((z_streamp)); +ZEXTERN int ZEXPORT deflateResetKeep OF((z_streamp)); +#if defined(_WIN32) && !defined(Z_SOLO) +ZEXTERN gzFile ZEXPORT gzopen_w OF((const wchar_t *path, + const char *mode)); +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +ZEXTERN int ZEXPORTVA gzvprintf Z_ARG((gzFile file, + const char *format, + va_list va)); +# endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ZLIB_H */ diff --git a/frontend/vita/retro_inline.h b/frontend/vita/retro_inline.h new file mode 100644 index 000000000..8535d8480 --- /dev/null +++ b/frontend/vita/retro_inline.h @@ -0,0 +1,39 @@ +/* Copyright (C) 2010-2015 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (retro_inline.h). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __LIBRETRO_SDK_INLINE_H +#define __LIBRETRO_SDK_INLINE_H + +#ifndef INLINE + +#if !defined(__cplusplus) && defined(_WIN32) +#define INLINE _inline +#elif defined(__STDC_VERSION__) && __STDC_VERSION__>=199901L +#define INLINE inline +#elif defined(__GNUC__) +#define INLINE __inline__ +#else +#define INLINE +#endif + +#endif +#endif diff --git a/frontend/vita/sys/mman.h b/frontend/vita/sys/mman.h new file mode 100644 index 000000000..d2634836f --- /dev/null +++ b/frontend/vita/sys/mman.h @@ -0,0 +1,70 @@ +#ifndef MMAN_H +#define MMAN_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define PROT_READ 0b001 +#define PROT_WRITE 0b010 +#define PROT_EXEC 0b100 +#define MAP_PRIVATE 2 +#define MAP_ANONYMOUS 0x20 + +#define MAP_FAILED ((void *)-1) + +static inline void* mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) +{ + (void)prot; + (void)flags; + (void)fd; + (void)offset; + + int block, ret; + + block = sceKernelAllocMemBlockForVM("code", len); + if(block<=0){ + sceClibPrintf("could not alloc mem block @0x%08X 0x%08X \n", block, len); + exit(1); + } + + // get base address + ret = sceKernelGetMemBlockBase(block, &addr); + if (ret < 0) + { + sceClibPrintf("could get address @0x%08X 0x%08X \n", block, addr); + exit(1); + } + + + if(!addr) + return MAP_FAILED; + + return addr; +} + +static inline int mprotect(void *addr, size_t len, int prot) +{ + (void)addr; + (void)len; + (void)prot; + return 0; +} + +static inline int munmap(void *addr, size_t len) +{ + int uid = sceKernelFindMemBlockByAddr(addr, len); + + return sceKernelFreeMemBlock(uid); + +} + +#ifdef __cplusplus +}; +#endif + +#endif // MMAN_H diff --git a/frontend/wiiu/coreinit/memorymap.h b/frontend/wiiu/coreinit/memorymap.h new file mode 100644 index 000000000..1e068719d --- /dev/null +++ b/frontend/wiiu/coreinit/memorymap.h @@ -0,0 +1,199 @@ +//SPDX-License-Identifier: GPL-2.0-or-later +/* From wut: + * https://github.com/devkitPro/wut/blob/0b196e8abcedeb0238105f3ffab7cb0093638b86/include/coreinit/memorymap.h + */ + +#pragma once +#include +#include +typedef bool BOOL; + +/** + * \defgroup coreinit_memorymap Memory Map + * \ingroup coreinit + * + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum OSMemoryMapMode +{ + OS_MAP_MEMORY_INVALID = 0, + OS_MAP_MEMORY_READ_ONLY = 1, + OS_MAP_MEMORY_READ_WRITE = 2, + OS_MAP_MEMORY_FREE = 3, + OS_MAP_MEMORY_ALLOCATED = 4, +} OSMemoryMapMode; + +#define OS_PAGE_SIZE (128 * 1024) + +uint32_t +OSEffectiveToPhysical(uint32_t virtualAddress); + +BOOL +OSIsAddressValid(uint32_t virtualAddress); + +BOOL +__OSValidateAddressSpaceRange(int /* unused */, + uint32_t virtualAddress, + uint32_t size); + +/** + * Allocates virtual address range for later mapping. + * + * \param virtualAddress + * Requested start address for the range. If there is no preference, NULL can be + * used. + * + * \param size + * Size of address range to allocate. + * + * \param align + * Alignment of address range to allocate. + * + * \return + * The starting address of the newly allocated range, or NULL on failure. + * + * \sa + * - OSFreeVirtAddr() + * - OSMapMemory() + */ +uint32_t +OSAllocVirtAddr(uint32_t virtualAddress, + uint32_t size, + uint32_t align); + +/** + * Frees a previously allocated virtual address range back to the system. + * + * \param virtualAddress + * The start of the virtual address range to free. + * + * \param size + * The size of the virtual address range to free. + * + * \return + * \c true on success. + */ +BOOL +OSFreeVirtAddr(uint32_t virtualAddress, + uint32_t size); + +/** + * Determines the status of the given virtual memory address - mapped read-write + * or read-only, free, allocated or invalid. + * + * \param virtualAddress + * The virtual address to query. + * + * \return + * The status of the memory address - see #OSMemoryMapMode. + */ +OSMemoryMapMode +OSQueryVirtAddr(uint32_t virtualAddress); + +/** + * Maps a physical address to a virtual address, with a given size and set of + * permissions. + * + * \param virtualAddress + * The target virtual address for the mapping. + * + * \param physicalAddress + * Physical address of the memory to back the mapping. + * + * \param size + * Size, in bytes, of the desired mapping. Likely has an alignment requirement. + * + * \param mode + * Permissions to map the memory with - see #OSMemoryMapMode. + * + * \return + * \c true on success. + * + * \sa + * - OSAllocVirtAddr() + * - OSUnmapMemory() + */ +BOOL +OSMapMemory(uint32_t virtualAddress, + uint32_t physicalAddress, + uint32_t size, + OSMemoryMapMode mode); + +/** + * Unmaps previously mapped memory. + * + * \param virtualAddress + * Starting address of the area to unmap. + * + * \param size + * Size of the memory area to unmap. + * + * \return + * \c true on success. + */ +BOOL +OSUnmapMemory(uint32_t virtualAddress, + uint32_t size); + +/** + * Gets the range of virtual addresses available for mapping. + * + * \param outVirtualAddress + * Pointer to write the starting address of the memory area to. + * + * \param outSize + * Pointer to write the size of the memory area to. + * + * \sa + * - OSMapMemory() + */ +void +OSGetMapVirtAddrRange(uint32_t *outVirtualAddress, + uint32_t *outSize); + +/** + * Gets the range of available physical memory (not reserved for app code or + * data). + * + * \param outPhysicalAddress + * Pointer to write the starting physical address of the memory area to. + * + * \param outSize + * Pointer to write the size of the memory area to. + * + * \if false + * Is memory returned by this function actually safe to map and use? couldn't + * get a straight answer from decaf-emu's kernel_memory.cpp... + * \endif + */ +void +OSGetAvailPhysAddrRange(uint32_t *outPhysicalAddress, + uint32_t *outSize); + +/** + * Gets the range of physical memory used for the application's data. + * + * \param outPhysicalAddress + * Pointer to write the starting physical address of the memory area to. + * + * \param outSize + * Pointer to write the size of the memory area to. + * + * \if false + * does this include the main heap? + * \endif + */ +void +OSGetDataPhysAddrRange(uint32_t *outPhysicalAddress, + uint32_t *outSize); + +#ifdef __cplusplus +} +#endif + +/** @} */ diff --git a/include/lightning/lightning.h b/include/lightning/lightning.h new file mode 100644 index 000000000..b6b7bbea7 --- /dev/null +++ b/include/lightning/lightning.h @@ -0,0 +1,1580 @@ +/* + * Copyright (C) 2012-2023 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#ifndef _lightning_h +#define _lightning_h + +#include +#include +#include +#include +#include +#include + +#if defined(__hpux) && defined(__hppa__) +# include +#endif +#if defined(__alpha__) && defined(__osf__) +# include +#endif + +#ifndef __WORDSIZE +# if defined(WORDSIZE) /* ppc darwin */ +# define __WORDSIZE WORDSIZE +# elif defined(__SIZEOF_POINTER__) /* ppc aix */ +# define __WORDSIZE (__SIZEOF_POINTER__ << 3) +# elif defined(_ILP32) /* hppa hp-ux */ +# define __WORDSIZE 32 +# elif defined(_LP64) /* ia64 hp-ux (with cc +DD64) */ +# define __WORDSIZE 64 +# elif defined(_MIPS_SZPTR) /* mips irix */ +# if _MIPS_SZPTR == 32 +# define __WORDSIZE 32 +# else +# define __WORDSIZE 64 +# endif +# else /* From FreeBSD 9.1 stdint.h */ +# if defined(UINTPTR_MAX) && defined(UINT64_MAX) && \ + (UINTPTR_MAX == UINT64_MAX) +# define __WORDSIZE 64 +# else +# define __WORDSIZE 32 +# endif +# endif +#endif +#ifndef __LITTLE_ENDIAN +# if defined(LITTLE_ENDIAN) /* ppc darwin */ +# define __LITTLE_ENDIAN LITTLE_ENDIAN +# elif defined(__ORDER_LITTLE_ENDIAN__) /* ppc aix */ +# define __LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__ +# else +# define __LITTLE_ENDIAN 1234 +# endif +#endif +#ifndef __BIG_ENDIAN +# if defined(BIG_ENDIAN) /* ppc darwin */ +# define __BIG_ENDIAN BIG_ENDIAN +# elif defined(__ORDER_BIG_ENDIAN__) /* ppc aix */ +# define __BIG_ENDIAN __ORDER_BIG_ENDIAN__ +# else +# define __BIG_ENDIAN 4321 +# endif +#endif +#ifndef __BYTE_ORDER +# if defined(BYTE_ORDER) /* ppc darwin */ +# define __BYTE_ORDER BYTE_ORDER +# elif defined(__BYTE_ORDER__) /* ppc aix */ +# define __BYTE_ORDER __BYTE_ORDER__ +# elif defined(_BIG_ENDIAN) /* hppa hp-ux */ +# define __BYTE_ORDER __BIG_ENDIAN +# elif defined(__BIG_ENDIAN__) /* ia64 hp-ux */ +# define __BYTE_ORDER __BIG_ENDIAN +# elif defined(__i386__) /* 32 bit x86 solaris */ +# define __BYTE_ORDER __LITTLE_ENDIAN +# elif defined(__x86_64__) /* 64 bit x86 solaris */ +# define __BYTE_ORDER __LITTLE_ENDIAN +# elif defined(__MIPSEB) /* mips irix */ +# define __BYTE_ORDER __BIG_ENDIAN +# else +# error cannot figure __BYTE_ORDER +# endif +#endif + +typedef signed char jit_int8_t; +typedef unsigned char jit_uint8_t; +typedef signed short jit_int16_t; +typedef unsigned short jit_uint16_t; +typedef signed int jit_int32_t; +typedef unsigned int jit_uint32_t; +#if __WORDSIZE == 32 +typedef signed long long jit_int64_t; +typedef unsigned long long jit_uint64_t; +typedef jit_int32_t jit_word_t; +typedef jit_uint32_t jit_uword_t; +#elif (_WIN32 && !__CYGWIN__) +typedef signed long long jit_int64_t; +typedef unsigned long long jit_uint64_t; +typedef jit_int64_t jit_word_t; +typedef jit_uint64_t jit_uword_t; +#else +typedef signed long jit_int64_t; +typedef unsigned long jit_uint64_t; +typedef jit_int64_t jit_word_t; +typedef jit_uint64_t jit_uword_t; +#endif +typedef float jit_float32_t; +typedef double jit_float64_t; +typedef void* jit_pointer_t; +typedef jit_int32_t jit_bool_t; +typedef jit_int32_t jit_gpr_t; +typedef jit_int32_t jit_fpr_t; + +#if !defined(__powerpc__) && \ + (defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__)) +#define __powerpc__ 1 +#endif + +#if defined(__i386__) || defined(__x86_64__) +# include +#elif defined(__mips__) +# include +#elif defined(__arm__) +# include +#elif defined(__powerpc__) +# include +#elif defined(__sparc__) +# include +#elif defined(__ia64__) +# include +#elif defined(__hppa__) +# include +#elif defined(__aarch64__) +# include +#elif defined(__s390__) || defined(__s390x__) +# include +#elif defined(__alpha__) +# include +#elif defined(__riscv) +# include +#elif defined(__loongarch__) +# include +#endif + +#define jit_flag_node 0x0001 /* patch node not absolute */ +#define jit_flag_patch 0x0002 /* jump already patched */ +#define jit_flag_data 0x0004 /* data in the constant pool */ +#define jit_flag_use 0x0008 /* do not remove marker label */ +#define jit_flag_synth 0x0010 /* synthesized instruction */ +#define jit_flag_head 0x1000 /* label reached by normal flow */ +#define jit_flag_varargs 0x2000 /* call{r,i} to varargs function */ + +#define JIT_R(index) jit_r(index) +#define JIT_V(index) jit_v(index) +#define JIT_F(index) jit_f(index) +#define JIT_R_NUM jit_r_num() +#define JIT_V_NUM jit_v_num() +#define JIT_F_NUM jit_f_num() + +#define JIT_DISABLE_DATA 1 /* force synthesize of constants */ +#define JIT_DISABLE_NOTE 2 /* disable debug info generation */ + +#define jit_class_chk 0x02000000 /* just checking */ +#define jit_class_arg 0x08000000 /* argument register */ +#define jit_class_sav 0x10000000 /* callee save */ +#define jit_class_gpr 0x20000000 /* general purpose */ +#define jit_class_fpr 0x40000000 /* float */ +#define jit_class(reg) ((reg) & 0xffff0000) +#define jit_regno(reg) ((reg) & 0x00007fff) + +typedef struct jit_node jit_node_t; +typedef struct jit_state jit_state_t; + +typedef enum { + jit_code_data, +#define jit_live(u) jit_new_node_w(jit_code_live, u) +#define jit_align(u) jit_new_node_w(jit_code_align, u) + jit_code_live, jit_code_align, + jit_code_save, jit_code_load, +#define jit_skip(u) jit_new_node_w(jit_code_skip, u) + jit_code_skip, +#define jit_name(u) _jit_name(_jit,u) + jit_code_name, +#define jit_note(u, v) _jit_note(_jit, u, v) +#define jit_label() _jit_label(_jit) +#define jit_forward() _jit_forward(_jit) +#define jit_indirect() _jit_indirect(_jit) +#define jit_link(u) _jit_link(_jit,u) + jit_code_note, jit_code_label, + +#define jit_prolog() _jit_prolog(_jit) + jit_code_prolog, + +#define jit_ellipsis() _jit_ellipsis(_jit) + jit_code_ellipsis, +#define jit_va_push(u) _jit_va_push(_jit,u) + jit_code_va_push, +#define jit_allocai(u) _jit_allocai(_jit,u) +#define jit_allocar(u, v) _jit_allocar(_jit,u,v) + jit_code_allocai, jit_code_allocar, + +#define jit_arg_c() _jit_arg(_jit, jit_code_arg_c) +#define jit_arg_s() _jit_arg(_jit, jit_code_arg_s) +#define jit_arg_i() _jit_arg(_jit, jit_code_arg_i) +# if __WORDSIZE == 32 +# define jit_arg() jit_arg_i() +#else +# define jit_arg_l() _jit_arg(_jit, jit_code_arg_l) +# define jit_arg() jit_arg_l() +#endif + jit_code_arg_c, jit_code_arg_s, + jit_code_arg_i, jit_code_arg_l, +#if __WORDSIZE == 32 +# define jit_code_arg jit_code_arg_i +#else +# define jit_code_arg jit_code_arg_l +#endif + +#define jit_getarg_c(u,v) _jit_getarg_c(_jit,u,v) +#define jit_getarg_uc(u,v) _jit_getarg_uc(_jit,u,v) +#define jit_getarg_s(u,v) _jit_getarg_s(_jit,u,v) +#define jit_getarg_us(u,v) _jit_getarg_us(_jit,u,v) +#define jit_getarg_i(u,v) _jit_getarg_i(_jit,u,v) +#if __WORDSIZE == 32 +# define jit_getarg(u,v) jit_getarg_i(u,v) +#else +# define jit_getarg_ui(u,v) _jit_getarg_ui(_jit,u,v) +# define jit_getarg_l(u,v) _jit_getarg_l(_jit,u,v) +# define jit_getarg(u,v) jit_getarg_l(u,v) +#endif + jit_code_getarg_c, jit_code_getarg_uc, + jit_code_getarg_s, jit_code_getarg_us, + jit_code_getarg_i, jit_code_getarg_ui, + jit_code_getarg_l, +#if __WORDSIZE == 32 +# define jit_code_getarg jit_code_getarg_i +#else +# define jit_code_getarg jit_code_getarg_l +#endif + +#define jit_putargr_c(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_c) +#define jit_putargi_c(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_c) +#define jit_putargr_uc(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_uc) +#define jit_putargi_uc(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_uc) +#define jit_putargr_s(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_s) +#define jit_putargi_s(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_s) +#define jit_putargr_us(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_us) +#define jit_putargi_us(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_us) +#define jit_putargr_i(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_i) +#define jit_putargi_i(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_i) +#if __WORDSIZE == 32 +# define jit_putargr(u,v) jit_putargr_i(u,v) +# define jit_putargi(u,v) jit_putargi_i(u,v) +#else +# define jit_putargr_ui(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_ui) +# define jit_putargi_ui(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_ui) +# define jit_putargr_l(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_l) +# define jit_putargi_l(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_l) +# define jit_putargr(u,v) jit_putargr_l(u,v) +# define jit_putargi(u,v) jit_putargi_l(u,v) +#endif + jit_code_putargr_c, jit_code_putargi_c, + jit_code_putargr_uc, jit_code_putargi_uc, + jit_code_putargr_s, jit_code_putargi_s, + jit_code_putargr_us, jit_code_putargi_us, + jit_code_putargr_i, jit_code_putargi_i, + jit_code_putargr_ui, jit_code_putargi_ui, + jit_code_putargr_l, jit_code_putargi_l, +#if __WORDSIZE == 32 +# define jit_code_putargr jit_code_putargr_i +# define jit_code_putargi jit_code_putargi_i +#else +# define jit_code_putargr jit_code_putargr_l +# define jit_code_putargi jit_code_putargi_l +#endif + +#define jit_va_start(u) jit_new_node_w(jit_code_va_start, u) + jit_code_va_start, +#define jit_va_arg(u, v) jit_new_node_ww(jit_code_va_arg, u, v) +#define jit_va_arg_d(u, v) jit_new_node_ww(jit_code_va_arg_d, u, v) + jit_code_va_arg, jit_code_va_arg_d, +#define jit_va_end(u) jit_new_node_w(jit_code_va_end, u) + jit_code_va_end, + +#define jit_addr(u,v,w) jit_new_node_www(jit_code_addr,u,v,w) +#define jit_addi(u,v,w) jit_new_node_www(jit_code_addi,u,v,w) + jit_code_addr, jit_code_addi, +#define jit_addcr(u,v,w) jit_new_node_www(jit_code_addcr,u,v,w) +#define jit_addci(u,v,w) jit_new_node_www(jit_code_addci,u,v,w) + jit_code_addcr, jit_code_addci, +#define jit_addxr(u,v,w) jit_new_node_www(jit_code_addxr,u,v,w) +#define jit_addxi(u,v,w) jit_new_node_www(jit_code_addxi,u,v,w) + jit_code_addxr, jit_code_addxi, +#define jit_subr(u,v,w) jit_new_node_www(jit_code_subr,u,v,w) +#define jit_subi(u,v,w) jit_new_node_www(jit_code_subi,u,v,w) + jit_code_subr, jit_code_subi, +#define jit_subcr(u,v,w) jit_new_node_www(jit_code_subcr,u,v,w) +#define jit_subci(u,v,w) jit_new_node_www(jit_code_subci,u,v,w) + jit_code_subcr, jit_code_subci, +#define jit_subxr(u,v,w) jit_new_node_www(jit_code_subxr,u,v,w) +#define jit_subxi(u,v,w) jit_new_node_www(jit_code_subxi,u,v,w) + jit_code_subxr, jit_code_subxi, +#define jit_rsbr(u,v,w) jit_subr(u,w,v) +#define jit_rsbi(u,v,w) jit_new_node_www(jit_code_rsbi,u,v,w) + jit_code_rsbi, +#define jit_mulr(u,v,w) jit_new_node_www(jit_code_mulr,u,v,w) +#define jit_muli(u,v,w) jit_new_node_www(jit_code_muli,u,v,w) + jit_code_mulr, jit_code_muli, +#define jit_qmulr(l,h,v,w) jit_new_node_qww(jit_code_qmulr,l,h,v,w) +#define jit_qmuli(l,h,v,w) jit_new_node_qww(jit_code_qmuli,l,h,v,w) + jit_code_qmulr, jit_code_qmuli, +#define jit_qmulr_u(l,h,v,w) jit_new_node_qww(jit_code_qmulr_u,l,h,v,w) +#define jit_qmuli_u(l,h,v,w) jit_new_node_qww(jit_code_qmuli_u,l,h,v,w) + jit_code_qmulr_u, jit_code_qmuli_u, +#define jit_divr(u,v,w) jit_new_node_www(jit_code_divr,u,v,w) +#define jit_divi(u,v,w) jit_new_node_www(jit_code_divi,u,v,w) + jit_code_divr, jit_code_divi, +#define jit_divr_u(u,v,w) jit_new_node_www(jit_code_divr_u,u,v,w) +#define jit_divi_u(u,v,w) jit_new_node_www(jit_code_divi_u,u,v,w) + jit_code_divr_u, jit_code_divi_u, +#define jit_qdivr(l,h,v,w) jit_new_node_qww(jit_code_qdivr,l,h,v,w) +#define jit_qdivi(l,h,v,w) jit_new_node_qww(jit_code_qdivi,l,h,v,w) + jit_code_qdivr, jit_code_qdivi, +#define jit_qdivr_u(l,h,v,w) jit_new_node_qww(jit_code_qdivr_u,l,h,v,w) +#define jit_qdivi_u(l,h,v,w) jit_new_node_qww(jit_code_qdivi_u,l,h,v,w) + jit_code_qdivr_u, jit_code_qdivi_u, +#define jit_remr(u,v,w) jit_new_node_www(jit_code_remr,u,v,w) +#define jit_remi(u,v,w) jit_new_node_www(jit_code_remi,u,v,w) + jit_code_remr, jit_code_remi, +#define jit_remr_u(u,v,w) jit_new_node_www(jit_code_remr_u,u,v,w) +#define jit_remi_u(u,v,w) jit_new_node_www(jit_code_remi_u,u,v,w) + jit_code_remr_u, jit_code_remi_u, + +#define jit_andr(u,v,w) jit_new_node_www(jit_code_andr,u,v,w) +#define jit_andi(u,v,w) jit_new_node_www(jit_code_andi,u,v,w) + jit_code_andr, jit_code_andi, +#define jit_orr(u,v,w) jit_new_node_www(jit_code_orr,u,v,w) +#define jit_ori(u,v,w) jit_new_node_www(jit_code_ori,u,v,w) + jit_code_orr, jit_code_ori, +#define jit_xorr(u,v,w) jit_new_node_www(jit_code_xorr,u,v,w) +#define jit_xori(u,v,w) jit_new_node_www(jit_code_xori,u,v,w) + jit_code_xorr, jit_code_xori, + +#define jit_lshr(u,v,w) jit_new_node_www(jit_code_lshr,u,v,w) +#define jit_lshi(u,v,w) jit_new_node_www(jit_code_lshi,u,v,w) + jit_code_lshr, jit_code_lshi, +#define jit_rshr(u,v,w) jit_new_node_www(jit_code_rshr,u,v,w) +#define jit_rshi(u,v,w) jit_new_node_www(jit_code_rshi,u,v,w) + jit_code_rshr, jit_code_rshi, +#define jit_rshr_u(u,v,w) jit_new_node_www(jit_code_rshr_u,u,v,w) +#define jit_rshi_u(u,v,w) jit_new_node_www(jit_code_rshi_u,u,v,w) + jit_code_rshr_u, jit_code_rshi_u, + +#define jit_negr(u,v) jit_new_node_ww(jit_code_negr,u,v) +#define jit_negi(u,v) jit_new_node_ww(jit_code_negi,u,v) + jit_code_negr, jit_code_negi, +#define jit_comr(u,v) jit_new_node_ww(jit_code_comr,u,v) +#define jit_comi(u,v) jit_new_node_ww(jit_code_comi,u,v) + jit_code_comr, jit_code_comi, + +#define jit_ltr(u,v,w) jit_new_node_www(jit_code_ltr,u,v,w) +#define jit_lti(u,v,w) jit_new_node_www(jit_code_lti,u,v,w) + jit_code_ltr, jit_code_lti, +#define jit_ltr_u(u,v,w) jit_new_node_www(jit_code_ltr_u,u,v,w) +#define jit_lti_u(u,v,w) jit_new_node_www(jit_code_lti_u,u,v,w) + jit_code_ltr_u, jit_code_lti_u, +#define jit_ler(u,v,w) jit_new_node_www(jit_code_ler,u,v,w) +#define jit_lei(u,v,w) jit_new_node_www(jit_code_lei,u,v,w) + jit_code_ler, jit_code_lei, +#define jit_ler_u(u,v,w) jit_new_node_www(jit_code_ler_u,u,v,w) +#define jit_lei_u(u,v,w) jit_new_node_www(jit_code_lei_u,u,v,w) + jit_code_ler_u, jit_code_lei_u, +#define jit_eqr(u,v,w) jit_new_node_www(jit_code_eqr,u,v,w) +#define jit_eqi(u,v,w) jit_new_node_www(jit_code_eqi,u,v,w) + jit_code_eqr, jit_code_eqi, +#define jit_ger(u,v,w) jit_new_node_www(jit_code_ger,u,v,w) +#define jit_gei(u,v,w) jit_new_node_www(jit_code_gei,u,v,w) + jit_code_ger, jit_code_gei, +#define jit_ger_u(u,v,w) jit_new_node_www(jit_code_ger_u,u,v,w) +#define jit_gei_u(u,v,w) jit_new_node_www(jit_code_gei_u,u,v,w) + jit_code_ger_u, jit_code_gei_u, +#define jit_gtr(u,v,w) jit_new_node_www(jit_code_gtr,u,v,w) +#define jit_gti(u,v,w) jit_new_node_www(jit_code_gti,u,v,w) + jit_code_gtr, jit_code_gti, +#define jit_gtr_u(u,v,w) jit_new_node_www(jit_code_gtr_u,u,v,w) +#define jit_gti_u(u,v,w) jit_new_node_www(jit_code_gti_u,u,v,w) + jit_code_gtr_u, jit_code_gti_u, +#define jit_ner(u,v,w) jit_new_node_www(jit_code_ner,u,v,w) +#define jit_nei(u,v,w) jit_new_node_www(jit_code_nei,u,v,w) + jit_code_ner, jit_code_nei, + +#define jit_movr(u,v) jit_new_node_ww(jit_code_movr,u,v) +#define jit_movi(u,v) jit_new_node_ww(jit_code_movi,u,v) + jit_code_movr, jit_code_movi, + +#define jit_movnr(u,v,w) jit_new_node_www(jit_code_movnr,u,v,w) +#define jit_movzr(u,v,w) jit_new_node_www(jit_code_movzr,u,v,w) + jit_code_movnr, jit_code_movzr, + + jit_code_casr, jit_code_casi, +#define jit_casr(u, v, w, x) jit_new_node_wwq(jit_code_casr, u, v, w, x) +#define jit_casi(u, v, w, x) jit_new_node_wwq(jit_code_casi, u, v, w, x) + +#define jit_extr_c(u,v) jit_new_node_ww(jit_code_extr_c,u,v) +#define jit_exti_c(u,v) jit_new_node_ww(jit_code_exti_c,u,v) + jit_code_extr_c, jit_code_exti_c, + +#define jit_extr_uc(u,v) jit_new_node_ww(jit_code_extr_uc,u,v) +#define jit_exti_uc(u,v) jit_new_node_ww(jit_code_exti_uc,u,v) + jit_code_extr_uc, jit_code_exti_uc, + +#define jit_extr_s(u,v) jit_new_node_ww(jit_code_extr_s,u,v) +#define jit_exti_s(u,v) jit_new_node_ww(jit_code_exti_s,u,v) + jit_code_extr_s, jit_code_exti_s, + +#define jit_extr_us(u,v) jit_new_node_ww(jit_code_extr_us,u,v) +#define jit_exti_us(u,v) jit_new_node_ww(jit_code_exti_us,u,v) + jit_code_extr_us, jit_code_exti_us, + +#if __WORDSIZE == 64 +# define jit_extr_i(u,v) jit_new_node_ww(jit_code_extr_i,u,v) +# define jit_exti_i(u,v) jit_new_node_ww(jit_code_exti_i,u,v) +# define jit_extr_ui(u,v) jit_new_node_ww(jit_code_extr_ui,u,v) +# define jit_exti_ui(u,v) jit_new_node_ww(jit_code_exti_ui,u,v) +#endif + jit_code_extr_i, jit_code_exti_i, + jit_code_extr_ui, jit_code_exti_ui, + +#define jit_bswapr_us(u,v) jit_new_node_ww(jit_code_bswapr_us,u,v) +#define jit_bswapi_us(u,v) jit_new_node_ww(jit_code_bswapi_us,u,v) + jit_code_bswapr_us, jit_code_bswapi_us, + +#define jit_bswapr_ui(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) +#define jit_bswapi_ui(u,v) jit_new_node_ww(jit_code_bswapi_ui,u,v) + jit_code_bswapr_ui, jit_code_bswapi_ui, + +#if __WORDSIZE == 64 +# define jit_bswapr_ul(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) +# define jit_bswapi_ul(u,v) jit_new_node_ww(jit_code_bswapi_ul,u,v) +#endif + jit_code_bswapr_ul, jit_code_bswapi_ul, + +#if __WORDSIZE == 32 +# define jit_bswapr(u,v) jit_bswapr_ui(u,v) +# define jit_bswapi(u,v) jit_bswapi_ui(u,v) +#else +# define jit_bswapr(u,v) jit_bswapr_ul(u,v) +# define jit_bswapi(u,v) jit_bswapi_ul(u,v) +#endif + +#define jit_htonr_us(u,v) jit_new_node_ww(jit_code_htonr_us,u,v) +#define jit_ntohr_us(u,v) jit_htonr_us(u,v) +#define jit_htoni_us(u,v) jit_new_node_ww(jit_code_htoni_us,u,v) +#define jit_ntohi_us(u,v) jit_htoni_us(u, v) + jit_code_htonr_us, jit_code_htoni_us, + +#define jit_htonr_ui(u,v) jit_new_node_ww(jit_code_htonr_ui,u,v) +#define jit_ntohr_ui(u,v) jit_htonr_ui(u,v) +#define jit_htoni_ui(u,v) jit_new_node_ww(jit_code_htoni_ui,u,v) +#define jit_ntohi_ui(u,v) jit_htoni_ui(u, v) + jit_code_htonr_ui, jit_code_htoni_ui, + +#if __WORDSIZE == 64 +# define jit_htonr_ul(u,v) jit_new_node_ww(jit_code_htonr_ul,u,v) +# define jit_ntohr_ul(u,v) jit_htonr_ul(u,v) +# define jit_htoni_ul(u,v) jit_new_node_ww(jit_code_htoni_ul,u,v) +# define jit_ntohi_ul(u,v) jit_htoni_ul(u, v) +#endif + jit_code_htonr_ul, jit_code_htoni_ul, + +#if __WORDSIZE == 32 +# define jit_htonr(u,v) jit_htonr_ui(u,v) +# define jit_htoni(u,v) jit_htoni_ui(u,v) +#else +# define jit_htonr(u,v) jit_htonr_ul(u,v) +# define jit_htoni(u,v) jit_htoni_ul(u,v) +#endif +#define jit_ntohr(u,v) jit_htonr(u,v) +#define jit_ntohi(u,v) jit_htoni(u,v) + +#define jit_ldr_c(u,v) jit_new_node_ww(jit_code_ldr_c,u,v) +#define jit_ldi_c(u,v) jit_new_node_wp(jit_code_ldi_c,u,v) + jit_code_ldr_c, jit_code_ldi_c, +#define jit_ldr_uc(u,v) jit_new_node_ww(jit_code_ldr_uc,u,v) +#define jit_ldi_uc(u,v) jit_new_node_wp(jit_code_ldi_uc,u,v) + jit_code_ldr_uc, jit_code_ldi_uc, +#define jit_ldr_s(u,v) jit_new_node_ww(jit_code_ldr_s,u,v) +#define jit_ldi_s(u,v) jit_new_node_wp(jit_code_ldi_s,u,v) + jit_code_ldr_s, jit_code_ldi_s, +#define jit_ldr_us(u,v) jit_new_node_ww(jit_code_ldr_us,u,v) +#define jit_ldi_us(u,v) jit_new_node_wp(jit_code_ldi_us,u,v) + jit_code_ldr_us, jit_code_ldi_us, +#define jit_ldr_i(u,v) jit_new_node_ww(jit_code_ldr_i,u,v) +#define jit_ldi_i(u,v) jit_new_node_wp(jit_code_ldi_i,u,v) + jit_code_ldr_i, jit_code_ldi_i, +#if __WORDSIZE == 32 +# define jit_ldr(u,v) jit_ldr_i(u,v) +# define jit_ldi(u,v) jit_ldi_i(u,v) +#else +# define jit_ldr(u,v) jit_ldr_l(u,v) +# define jit_ldi(u,v) jit_ldi_l(u,v) +# define jit_ldr_ui(u,v) jit_new_node_ww(jit_code_ldr_ui,u,v) +# define jit_ldi_ui(u,v) jit_new_node_wp(jit_code_ldi_ui,u,v) +#define jit_ldr_l(u,v) jit_new_node_ww(jit_code_ldr_l,u,v) +#define jit_ldi_l(u,v) jit_new_node_wp(jit_code_ldi_l,u,v) +#endif + jit_code_ldr_ui, jit_code_ldi_ui, + jit_code_ldr_l, jit_code_ldi_l, + +#define jit_ldxr_c(u,v,w) jit_new_node_www(jit_code_ldxr_c,u,v,w) +#define jit_ldxi_c(u,v,w) jit_new_node_www(jit_code_ldxi_c,u,v,w) + jit_code_ldxr_c, jit_code_ldxi_c, +#define jit_ldxr_uc(u,v,w) jit_new_node_www(jit_code_ldxr_uc,u,v,w) +#define jit_ldxi_uc(u,v,w) jit_new_node_www(jit_code_ldxi_uc,u,v,w) + jit_code_ldxr_uc, jit_code_ldxi_uc, +#define jit_ldxr_s(u,v,w) jit_new_node_www(jit_code_ldxr_s,u,v,w) +#define jit_ldxi_s(u,v,w) jit_new_node_www(jit_code_ldxi_s,u,v,w) + jit_code_ldxr_s, jit_code_ldxi_s, +#define jit_ldxr_us(u,v,w) jit_new_node_www(jit_code_ldxr_us,u,v,w) +#define jit_ldxi_us(u,v,w) jit_new_node_www(jit_code_ldxi_us,u,v,w) + jit_code_ldxr_us, jit_code_ldxi_us, +#define jit_ldxr_i(u,v,w) jit_new_node_www(jit_code_ldxr_i,u,v,w) +#define jit_ldxi_i(u,v,w) jit_new_node_www(jit_code_ldxi_i,u,v,w) + jit_code_ldxr_i, jit_code_ldxi_i, +#if __WORDSIZE == 32 +# define jit_ldxr(u,v,w) jit_ldxr_i(u,v,w) +# define jit_ldxi(u,v,w) jit_ldxi_i(u,v,w) +#else +# define jit_ldxr_ui(u,v,w) jit_new_node_www(jit_code_ldxr_ui,u,v,w) +# define jit_ldxi_ui(u,v,w) jit_new_node_www(jit_code_ldxi_ui,u,v,w) +# define jit_ldxr_l(u,v,w) jit_new_node_www(jit_code_ldxr_l,u,v,w) +# define jit_ldxi_l(u,v,w) jit_new_node_www(jit_code_ldxi_l,u,v,w) +# define jit_ldxr(u,v,w) jit_ldxr_l(u,v,w) +# define jit_ldxi(u,v,w) jit_ldxi_l(u,v,w) +#endif + jit_code_ldxr_ui, jit_code_ldxi_ui, + jit_code_ldxr_l, jit_code_ldxi_l, + +#define jit_str_c(u,v) jit_new_node_ww(jit_code_str_c,u,v) +#define jit_sti_c(u,v) jit_new_node_pw(jit_code_sti_c,u,v) + jit_code_str_c, jit_code_sti_c, +#define jit_str_s(u,v) jit_new_node_ww(jit_code_str_s,u,v) +#define jit_sti_s(u,v) jit_new_node_pw(jit_code_sti_s,u,v) + jit_code_str_s, jit_code_sti_s, +#define jit_str_i(u,v) jit_new_node_ww(jit_code_str_i,u,v) +#define jit_sti_i(u,v) jit_new_node_pw(jit_code_sti_i,u,v) + jit_code_str_i, jit_code_sti_i, +#if __WORDSIZE == 32 +# define jit_str(u,v) jit_str_i(u,v) +# define jit_sti(u,v) jit_sti_i(u,v) +#else +# define jit_str(u,v) jit_str_l(u,v) +# define jit_sti(u,v) jit_sti_l(u,v) +# define jit_str_l(u,v) jit_new_node_ww(jit_code_str_l,u,v) +# define jit_sti_l(u,v) jit_new_node_pw(jit_code_sti_l,u,v) +#endif + jit_code_str_l, jit_code_sti_l, + +#define jit_stxr_c(u,v,w) jit_new_node_www(jit_code_stxr_c,u,v,w) +#define jit_stxi_c(u,v,w) jit_new_node_www(jit_code_stxi_c,u,v,w) + jit_code_stxr_c, jit_code_stxi_c, +#define jit_stxr_s(u,v,w) jit_new_node_www(jit_code_stxr_s,u,v,w) +#define jit_stxi_s(u,v,w) jit_new_node_www(jit_code_stxi_s,u,v,w) + jit_code_stxr_s, jit_code_stxi_s, +#define jit_stxr_i(u,v,w) jit_new_node_www(jit_code_stxr_i,u,v,w) +#define jit_stxi_i(u,v,w) jit_new_node_www(jit_code_stxi_i,u,v,w) + jit_code_stxr_i, jit_code_stxi_i, +#if __WORDSIZE == 32 +# define jit_stxr(u,v,w) jit_stxr_i(u,v,w) +# define jit_stxi(u,v,w) jit_stxi_i(u,v,w) +#else +# define jit_stxr(u,v,w) jit_stxr_l(u,v,w) +# define jit_stxi(u,v,w) jit_stxi_l(u,v,w) +# define jit_stxr_l(u,v,w) jit_new_node_www(jit_code_stxr_l,u,v,w) +# define jit_stxi_l(u,v,w) jit_new_node_www(jit_code_stxi_l,u,v,w) +#endif + jit_code_stxr_l, jit_code_stxi_l, + +#define jit_bltr(v,w) jit_new_node_pww(jit_code_bltr,NULL,v,w) +#define jit_blti(v,w) jit_new_node_pww(jit_code_blti,NULL,v,w) + jit_code_bltr, jit_code_blti, +#define jit_bltr_u(v,w) jit_new_node_pww(jit_code_bltr_u,NULL,v,w) +#define jit_blti_u(v,w) jit_new_node_pww(jit_code_blti_u,NULL,v,w) + jit_code_bltr_u, jit_code_blti_u, +#define jit_bler(v,w) jit_new_node_pww(jit_code_bler,NULL,v,w) +#define jit_blei(v,w) jit_new_node_pww(jit_code_blei,NULL,v,w) + jit_code_bler, jit_code_blei, +#define jit_bler_u(v,w) jit_new_node_pww(jit_code_bler_u,NULL,v,w) +#define jit_blei_u(v,w) jit_new_node_pww(jit_code_blei_u,NULL,v,w) + jit_code_bler_u, jit_code_blei_u, +#define jit_beqr(v,w) jit_new_node_pww(jit_code_beqr,NULL,v,w) +#define jit_beqi(v,w) jit_new_node_pww(jit_code_beqi,NULL,v,w) + jit_code_beqr, jit_code_beqi, +#define jit_bger(v,w) jit_new_node_pww(jit_code_bger,NULL,v,w) +#define jit_bgei(v,w) jit_new_node_pww(jit_code_bgei,NULL,v,w) + jit_code_bger, jit_code_bgei, +#define jit_bger_u(v,w) jit_new_node_pww(jit_code_bger_u,NULL,v,w) +#define jit_bgei_u(v,w) jit_new_node_pww(jit_code_bgei_u,NULL,v,w) + jit_code_bger_u, jit_code_bgei_u, +#define jit_bgtr(v,w) jit_new_node_pww(jit_code_bgtr,NULL,v,w) +#define jit_bgti(v,w) jit_new_node_pww(jit_code_bgti,NULL,v,w) + jit_code_bgtr, jit_code_bgti, +#define jit_bgtr_u(v,w) jit_new_node_pww(jit_code_bgtr_u,NULL,v,w) +#define jit_bgti_u(v,w) jit_new_node_pww(jit_code_bgti_u,NULL,v,w) + jit_code_bgtr_u, jit_code_bgti_u, +#define jit_bner(v,w) jit_new_node_pww(jit_code_bner,NULL,v,w) +#define jit_bnei(v,w) jit_new_node_pww(jit_code_bnei,NULL,v,w) + jit_code_bner, jit_code_bnei, + +#define jit_bmsr(v,w) jit_new_node_pww(jit_code_bmsr,NULL,v,w) +#define jit_bmsi(v,w) jit_new_node_pww(jit_code_bmsi,NULL,v,w) + jit_code_bmsr, jit_code_bmsi, +#define jit_bmcr(v,w) jit_new_node_pww(jit_code_bmcr,NULL,v,w) +#define jit_bmci(v,w) jit_new_node_pww(jit_code_bmci,NULL,v,w) + jit_code_bmcr, jit_code_bmci, + +#define jit_boaddr(v,w) jit_new_node_pww(jit_code_boaddr,NULL,v,w) +#define jit_boaddi(v,w) jit_new_node_pww(jit_code_boaddi,NULL,v,w) + jit_code_boaddr, jit_code_boaddi, +#define jit_boaddr_u(v,w) jit_new_node_pww(jit_code_boaddr_u,NULL,v,w) +#define jit_boaddi_u(v,w) jit_new_node_pww(jit_code_boaddi_u,NULL,v,w) + jit_code_boaddr_u, jit_code_boaddi_u, +#define jit_bxaddr(v,w) jit_new_node_pww(jit_code_bxaddr,NULL,v,w) +#define jit_bxaddi(v,w) jit_new_node_pww(jit_code_bxaddi,NULL,v,w) + jit_code_bxaddr, jit_code_bxaddi, +#define jit_bxaddr_u(v,w) jit_new_node_pww(jit_code_bxaddr_u,NULL,v,w) +#define jit_bxaddi_u(v,w) jit_new_node_pww(jit_code_bxaddi_u,NULL,v,w) + jit_code_bxaddr_u, jit_code_bxaddi_u, +#define jit_bosubr(v,w) jit_new_node_pww(jit_code_bosubr,NULL,v,w) +#define jit_bosubi(v,w) jit_new_node_pww(jit_code_bosubi,NULL,v,w) + jit_code_bosubr, jit_code_bosubi, +#define jit_bosubr_u(v,w) jit_new_node_pww(jit_code_bosubr_u,NULL,v,w) +#define jit_bosubi_u(v,w) jit_new_node_pww(jit_code_bosubi_u,NULL,v,w) + jit_code_bosubr_u, jit_code_bosubi_u, +#define jit_bxsubr(v,w) jit_new_node_pww(jit_code_bxsubr,NULL,v,w) +#define jit_bxsubi(v,w) jit_new_node_pww(jit_code_bxsubi,NULL,v,w) + jit_code_bxsubr, jit_code_bxsubi, +#define jit_bxsubr_u(v,w) jit_new_node_pww(jit_code_bxsubr_u,NULL,v,w) +#define jit_bxsubi_u(v,w) jit_new_node_pww(jit_code_bxsubi_u,NULL,v,w) + jit_code_bxsubr_u, jit_code_bxsubi_u, + +#define jit_jmpr(u) jit_new_node_w(jit_code_jmpr,u) +#define jit_jmpi() jit_new_node_p(jit_code_jmpi,NULL) + jit_code_jmpr, jit_code_jmpi, +#define jit_callr(u) jit_new_node_w(jit_code_callr,u) +#define jit_calli(u) jit_new_node_p(jit_code_calli,u) + jit_code_callr, jit_code_calli, + +#define jit_prepare() _jit_prepare(_jit) + jit_code_prepare, + +#define jit_pushargr_c(u) _jit_pushargr(_jit,u,jit_code_pushargr_c) +#define jit_pushargi_c(u) _jit_pushargi(_jit,u,jit_code_pushargi_c) +#define jit_pushargr_uc(u) _jit_pushargr(_jit,u,jit_code_pushargr_uc) +#define jit_pushargi_uc(u) _jit_pushargi(_jit,u,jit_code_pushargi_uc) +#define jit_pushargr_s(u) _jit_pushargr(_jit,u,jit_code_pushargr_s) +#define jit_pushargi_s(u) _jit_pushargi(_jit,u,jit_code_pushargi_s) +#define jit_pushargr_us(u) _jit_pushargr(_jit,u,jit_code_pushargr_us) +#define jit_pushargi_us(u) _jit_pushargi(_jit,u,jit_code_pushargi_us) +#define jit_pushargr_i(u) _jit_pushargr(_jit,u,jit_code_pushargr_i) +#define jit_pushargi_i(u) _jit_pushargi(_jit,u,jit_code_pushargi_i) +#if __WORDSIZE == 32 +# define jit_pushargr(u) jit_pushargr_i(u) +# define jit_pushargi(u) jit_pushargi_i(u) +#else +# define jit_pushargr_ui(u) _jit_pushargr(_jit,u,jit_code_pushargr_ui) +# define jit_pushargi_ui(u) _jit_pushargi(_jit,u,jit_code_pushargi_ui) +# define jit_pushargr_l(u) _jit_pushargr(_jit,u,jit_code_pushargr_l) +# define jit_pushargi_l(u) _jit_pushargi(_jit,u,jit_code_pushargi_l) +# define jit_pushargr(u) jit_pushargr_l(u) +# define jit_pushargi(u) jit_pushargi_l(u) +#endif + jit_code_pushargr_c, jit_code_pushargi_c, + jit_code_pushargr_uc, jit_code_pushargi_uc, + jit_code_pushargr_s, jit_code_pushargi_s, + jit_code_pushargr_us, jit_code_pushargi_us, + jit_code_pushargr_i, jit_code_pushargi_i, + jit_code_pushargr_ui, jit_code_pushargi_ui, + jit_code_pushargr_l, jit_code_pushargi_l, +#if __WORDSIZE == 32 +# define jit_code_pushargr jit_code_pushargr_i +# define jit_code_pushargi jit_code_pushargi_i +#else +# define jit_code_pushargr jit_code_pushargr_l +# define jit_code_pushargi jit_code_pushargi_l +#endif + +#define jit_finishr(u) _jit_finishr(_jit,u) +#define jit_finishi(u) _jit_finishi(_jit,u) + jit_code_finishr, jit_code_finishi, +#define jit_ret() _jit_ret(_jit) + jit_code_ret, + +#define jit_retr_c(u) _jit_retr(_jit,u,jit_code_retr_c) +#define jit_reti_c(u) _jit_reti(_jit,u,jit_code_reti_c) +#define jit_retr_uc(u) _jit_retr(_jit,u,jit_code_retr_uc) +#define jit_reti_uc(u) _jit_reti(_jit,u,jit_code_reti_uc) +#define jit_retr_s(u) _jit_retr(_jit,u,jit_code_retr_s) +#define jit_reti_s(u) _jit_reti(_jit,u,jit_code_reti_s) +#define jit_retr_us(u) _jit_retr(_jit,u,jit_code_retr_us) +#define jit_reti_us(u) _jit_reti(_jit,u,jit_code_reti_us) +#define jit_retr_i(u) _jit_retr(_jit,u,jit_code_retr_i) +#define jit_reti_i(u) _jit_reti(_jit,u,jit_code_reti_i) +#if __WORDSIZE == 32 +# define jit_retr(u) jit_retr_i(u) +# define jit_reti(u) jit_reti_i(u) +#else +# define jit_retr_ui(u) _jit_retr(_jit,u,jit_code_retr_ui) +# define jit_reti_ui(u) _jit_reti(_jit,u,jit_code_reti_ui) +# define jit_retr_l(u) _jit_retr(_jit,u,jit_code_retr_l) +# define jit_reti_l(u) _jit_reti(_jit,u,jit_code_reti_l) +# define jit_retr(u) jit_retr_l(u) +# define jit_reti(u) jit_reti_l(u) +#endif + jit_code_retr_c, jit_code_reti_c, + jit_code_retr_uc, jit_code_reti_uc, + jit_code_retr_s, jit_code_reti_s, + jit_code_retr_us, jit_code_reti_us, + jit_code_retr_i, jit_code_reti_i, + jit_code_retr_ui, jit_code_reti_ui, + jit_code_retr_l, jit_code_reti_l, +#if __WORDSIZE == 32 +# define jit_code_retr jit_code_retr_i +# define jit_code_reti jit_code_reti_i +#else +# define jit_code_retr jit_code_retr_l +# define jit_code_reti jit_code_reti_l +#endif + +#define jit_retval_c(u) _jit_retval_c(_jit,u) +#define jit_retval_uc(u) _jit_retval_uc(_jit,u) +#define jit_retval_s(u) _jit_retval_s(_jit,u) +#define jit_retval_us(u) _jit_retval_us(_jit,u) +#define jit_retval_i(u) _jit_retval_i(_jit,u) +#if __WORDSIZE == 32 +# define jit_retval(u) jit_retval_i(u) +#else +# define jit_retval_ui(u) _jit_retval_ui(_jit,u) +# define jit_retval_l(u) _jit_retval_l(_jit,u) +# define jit_retval(u) jit_retval_l(u) +#endif + jit_code_retval_c, jit_code_retval_uc, + jit_code_retval_s, jit_code_retval_us, + jit_code_retval_i, jit_code_retval_ui, + jit_code_retval_l, +#if __WORDSIZE == 32 +# define jit_code_retval jit_code_retval_i +#else +# define jit_code_retval jit_code_retval_l +#endif + +#define jit_epilog() _jit_epilog(_jit) + jit_code_epilog, + +#define jit_arg_f() _jit_arg_f(_jit) + jit_code_arg_f, +#define jit_getarg_f(u,v) _jit_getarg_f(_jit,u,v) + jit_code_getarg_f, +#define jit_putargr_f(u,v) _jit_putargr_f(_jit,u,v) +#define jit_putargi_f(u,v) _jit_putargi_f(_jit,u,v) + jit_code_putargr_f, jit_code_putargi_f, + +#define jit_addr_f(u,v,w) jit_new_node_www(jit_code_addr_f,u,v,w) +#define jit_addi_f(u,v,w) jit_new_node_wwf(jit_code_addi_f,u,v,w) + jit_code_addr_f, jit_code_addi_f, +#define jit_subr_f(u,v,w) jit_new_node_www(jit_code_subr_f,u,v,w) +#define jit_subi_f(u,v,w) jit_new_node_wwf(jit_code_subi_f,u,v,w) + jit_code_subr_f, jit_code_subi_f, +#define jit_rsbr_f(u,v,w) jit_subr_f(u,w,v) +#define jit_rsbi_f(u,v,w) jit_new_node_wwf(jit_code_rsbi_f,u,v,w) + jit_code_rsbi_f, +#define jit_mulr_f(u,v,w) jit_new_node_www(jit_code_mulr_f,u,v,w) +#define jit_muli_f(u,v,w) jit_new_node_wwf(jit_code_muli_f,u,v,w) + jit_code_mulr_f, jit_code_muli_f, +#define jit_divr_f(u,v,w) jit_new_node_www(jit_code_divr_f,u,v,w) +#define jit_divi_f(u,v,w) jit_new_node_wwf(jit_code_divi_f,u,v,w) + jit_code_divr_f, jit_code_divi_f, + +#define jit_negr_f(u,v) jit_new_node_ww(jit_code_negr_f,u,v) +#define jit_negi_f(u,v) _jit_negi_f(_jit,u,v) + jit_code_negr_f, jit_code_negi_f, +#define jit_absr_f(u,v) jit_new_node_ww(jit_code_absr_f,u,v) +#define jit_absi_f(u,v) _jit_absi_f(_jit,u,v) + jit_code_absr_f, jit_code_absi_f, +#define jit_sqrtr_f(u,v) jit_new_node_ww(jit_code_sqrtr_f,u,v) +#define jit_sqrti_f(u,v) _jit_sqrti_f(_jit,u,v) + jit_code_sqrtr_f, jit_code_sqrti_f, + +#define jit_ltr_f(u,v,w) jit_new_node_www(jit_code_ltr_f,u,v,w) +#define jit_lti_f(u,v,w) jit_new_node_wwf(jit_code_lti_f,u,v,w) + jit_code_ltr_f, jit_code_lti_f, +#define jit_ler_f(u,v,w) jit_new_node_www(jit_code_ler_f,u,v,w) +#define jit_lei_f(u,v,w) jit_new_node_wwf(jit_code_lei_f,u,v,w) + jit_code_ler_f, jit_code_lei_f, +#define jit_eqr_f(u,v,w) jit_new_node_www(jit_code_eqr_f,u,v,w) +#define jit_eqi_f(u,v,w) jit_new_node_wwf(jit_code_eqi_f,u,v,w) + jit_code_eqr_f, jit_code_eqi_f, +#define jit_ger_f(u,v,w) jit_new_node_www(jit_code_ger_f,u,v,w) +#define jit_gei_f(u,v,w) jit_new_node_wwf(jit_code_gei_f,u,v,w) + jit_code_ger_f, jit_code_gei_f, +#define jit_gtr_f(u,v,w) jit_new_node_www(jit_code_gtr_f,u,v,w) +#define jit_gti_f(u,v,w) jit_new_node_wwf(jit_code_gti_f,u,v,w) + jit_code_gtr_f, jit_code_gti_f, +#define jit_ner_f(u,v,w) jit_new_node_www(jit_code_ner_f,u,v,w) +#define jit_nei_f(u,v,w) jit_new_node_wwf(jit_code_nei_f,u,v,w) + jit_code_ner_f, jit_code_nei_f, +#define jit_unltr_f(u,v,w) jit_new_node_www(jit_code_unltr_f,u,v,w) +#define jit_unlti_f(u,v,w) jit_new_node_wwf(jit_code_unlti_f,u,v,w) + jit_code_unltr_f, jit_code_unlti_f, +#define jit_unler_f(u,v,w) jit_new_node_www(jit_code_unler_f,u,v,w) +#define jit_unlei_f(u,v,w) jit_new_node_wwf(jit_code_unlei_f,u,v,w) + jit_code_unler_f, jit_code_unlei_f, +#define jit_uneqr_f(u,v,w) jit_new_node_www(jit_code_uneqr_f,u,v,w) +#define jit_uneqi_f(u,v,w) jit_new_node_wwf(jit_code_uneqi_f,u,v,w) + jit_code_uneqr_f, jit_code_uneqi_f, +#define jit_unger_f(u,v,w) jit_new_node_www(jit_code_unger_f,u,v,w) +#define jit_ungei_f(u,v,w) jit_new_node_wwf(jit_code_ungei_f,u,v,w) + jit_code_unger_f, jit_code_ungei_f, +#define jit_ungtr_f(u,v,w) jit_new_node_www(jit_code_ungtr_f,u,v,w) +#define jit_ungti_f(u,v,w) jit_new_node_wwf(jit_code_ungti_f,u,v,w) + jit_code_ungtr_f, jit_code_ungti_f, +#define jit_ltgtr_f(u,v,w) jit_new_node_www(jit_code_ltgtr_f,u,v,w) +#define jit_ltgti_f(u,v,w) jit_new_node_wwf(jit_code_ltgti_f,u,v,w) + jit_code_ltgtr_f, jit_code_ltgti_f, +#define jit_ordr_f(u,v,w) jit_new_node_www(jit_code_ordr_f,u,v,w) +#define jit_ordi_f(u,v,w) jit_new_node_wwf(jit_code_ordi_f,u,v,w) + jit_code_ordr_f, jit_code_ordi_f, +#define jit_unordr_f(u,v,w) jit_new_node_www(jit_code_unordr_f,u,v,w) +#define jit_unordi_f(u,v,w) jit_new_node_wwf(jit_code_unordi_f,u,v,w) + jit_code_unordr_f, jit_code_unordi_f, + +#define jit_truncr_f_i(u,v) jit_new_node_ww(jit_code_truncr_f_i,u,v) + jit_code_truncr_f_i, +#if __WORDSIZE == 32 +# define jit_truncr_f(u,v) jit_truncr_f_i(u,v) +#else +# define jit_truncr_f(u,v) jit_truncr_f_l(u,v) +# define jit_truncr_f_l(u,v) jit_new_node_ww(jit_code_truncr_f_l,u,v) +#endif + jit_code_truncr_f_l, +#define jit_extr_f(u,v) jit_new_node_ww(jit_code_extr_f,u,v) +#define jit_extr_d_f(u,v) jit_new_node_ww(jit_code_extr_d_f,u,v) + jit_code_extr_f, jit_code_extr_d_f, +#define jit_movr_f(u,v) jit_new_node_ww(jit_code_movr_f,u,v) +#define jit_movi_f(u,v) jit_new_node_wf(jit_code_movi_f,u,v) + jit_code_movr_f, jit_code_movi_f, + +#define jit_ldr_f(u,v) jit_new_node_ww(jit_code_ldr_f,u,v) +#define jit_ldi_f(u,v) jit_new_node_wp(jit_code_ldi_f,u,v) + jit_code_ldr_f, jit_code_ldi_f, +#define jit_ldxr_f(u,v,w) jit_new_node_www(jit_code_ldxr_f,u,v,w) +#define jit_ldxi_f(u,v,w) jit_new_node_www(jit_code_ldxi_f,u,v,w) + jit_code_ldxr_f, jit_code_ldxi_f, +#define jit_str_f(u,v) jit_new_node_ww(jit_code_str_f,u,v) +#define jit_sti_f(u,v) jit_new_node_pw(jit_code_sti_f,u,v) + jit_code_str_f, jit_code_sti_f, +#define jit_stxr_f(u,v,w) jit_new_node_www(jit_code_stxr_f,u,v,w) +#define jit_stxi_f(u,v,w) jit_new_node_www(jit_code_stxi_f,u,v,w) + jit_code_stxr_f, jit_code_stxi_f, + +#define jit_bltr_f(v,w) jit_new_node_pww(jit_code_bltr_f,NULL,v,w) +#define jit_blti_f(v,w) jit_new_node_pwf(jit_code_blti_f,NULL,v,w) + jit_code_bltr_f, jit_code_blti_f, +#define jit_bler_f(v,w) jit_new_node_pww(jit_code_bler_f,NULL,v,w) +#define jit_blei_f(v,w) jit_new_node_pwf(jit_code_blei_f,NULL,v,w) + jit_code_bler_f, jit_code_blei_f, +#define jit_beqr_f(v,w) jit_new_node_pww(jit_code_beqr_f,NULL,v,w) +#define jit_beqi_f(v,w) jit_new_node_pwf(jit_code_beqi_f,NULL,v,w) + jit_code_beqr_f, jit_code_beqi_f, +#define jit_bger_f(v,w) jit_new_node_pww(jit_code_bger_f,NULL,v,w) +#define jit_bgei_f(v,w) jit_new_node_pwf(jit_code_bgei_f,NULL,v,w) + jit_code_bger_f, jit_code_bgei_f, +#define jit_bgtr_f(v,w) jit_new_node_pww(jit_code_bgtr_f,NULL,v,w) +#define jit_bgti_f(v,w) jit_new_node_pwf(jit_code_bgti_f,NULL,v,w) + jit_code_bgtr_f, jit_code_bgti_f, +#define jit_bner_f(v,w) jit_new_node_pww(jit_code_bner_f,NULL,v,w) +#define jit_bnei_f(v,w) jit_new_node_pwf(jit_code_bnei_f,NULL,v,w) + jit_code_bner_f, jit_code_bnei_f, +#define jit_bunltr_f(v,w) jit_new_node_pww(jit_code_bunltr_f,NULL,v,w) +#define jit_bunlti_f(v,w) jit_new_node_pwf(jit_code_bunlti_f,NULL,v,w) + jit_code_bunltr_f, jit_code_bunlti_f, +#define jit_bunler_f(v,w) jit_new_node_pww(jit_code_bunler_f,NULL,v,w) +#define jit_bunlei_f(v,w) jit_new_node_pwf(jit_code_bunlei_f,NULL,v,w) + jit_code_bunler_f, jit_code_bunlei_f, +#define jit_buneqr_f(v,w) jit_new_node_pww(jit_code_buneqr_f,NULL,v,w) +#define jit_buneqi_f(v,w) jit_new_node_pwf(jit_code_buneqi_f,NULL,v,w) + jit_code_buneqr_f, jit_code_buneqi_f, +#define jit_bunger_f(v,w) jit_new_node_pww(jit_code_bunger_f,NULL,v,w) +#define jit_bungei_f(v,w) jit_new_node_pwf(jit_code_bungei_f,NULL,v,w) + jit_code_bunger_f, jit_code_bungei_f, +#define jit_bungtr_f(v,w) jit_new_node_pww(jit_code_bungtr_f,NULL,v,w) +#define jit_bungti_f(v,w) jit_new_node_pwf(jit_code_bungti_f,NULL,v,w) + jit_code_bungtr_f, jit_code_bungti_f, +#define jit_bltgtr_f(v,w) jit_new_node_pww(jit_code_bltgtr_f,NULL,v,w) +#define jit_bltgti_f(v,w) jit_new_node_pwf(jit_code_bltgti_f,NULL,v,w) + jit_code_bltgtr_f, jit_code_bltgti_f, +#define jit_bordr_f(v,w) jit_new_node_pww(jit_code_bordr_f,NULL,v,w) +#define jit_bordi_f(v,w) jit_new_node_pwf(jit_code_bordi_f,NULL,v,w) + jit_code_bordr_f, jit_code_bordi_f, +#define jit_bunordr_f(v,w) jit_new_node_pww(jit_code_bunordr_f,NULL,v,w) +#define jit_bunordi_f(v,w) jit_new_node_pwf(jit_code_bunordi_f,NULL,v,w) + jit_code_bunordr_f, jit_code_bunordi_f, + +#define jit_pushargr_f(u) _jit_pushargr_f(_jit,u) +#define jit_pushargi_f(u) _jit_pushargi_f(_jit,u) + jit_code_pushargr_f, jit_code_pushargi_f, +#define jit_retr_f(u) _jit_retr_f(_jit,u) +#define jit_reti_f(u) _jit_reti_f(_jit,u) + jit_code_retr_f, jit_code_reti_f, +#define jit_retval_f(u) _jit_retval_f(_jit,u) + jit_code_retval_f, + +#define jit_arg_d() _jit_arg_d(_jit) + jit_code_arg_d, +#define jit_getarg_d(u,v) _jit_getarg_d(_jit,u,v) + jit_code_getarg_d, +#define jit_putargr_d(u,v) _jit_putargr_d(_jit,u,v) +#define jit_putargi_d(u,v) _jit_putargi_d(_jit,u,v) + jit_code_putargr_d, jit_code_putargi_d, + +#define jit_addr_d(u,v,w) jit_new_node_www(jit_code_addr_d,u,v,w) +#define jit_addi_d(u,v,w) jit_new_node_wwd(jit_code_addi_d,u,v,w) + jit_code_addr_d, jit_code_addi_d, +#define jit_subr_d(u,v,w) jit_new_node_www(jit_code_subr_d,u,v,w) +#define jit_subi_d(u,v,w) jit_new_node_wwd(jit_code_subi_d,u,v,w) + jit_code_subr_d, jit_code_subi_d, +#define jit_rsbr_d(u,v,w) jit_subr_d(u,w,v) +#define jit_rsbi_d(u,v,w) jit_new_node_wwd(jit_code_rsbi_d,u,v,w) + jit_code_rsbi_d, +#define jit_mulr_d(u,v,w) jit_new_node_www(jit_code_mulr_d,u,v,w) +#define jit_muli_d(u,v,w) jit_new_node_wwd(jit_code_muli_d,u,v,w) + jit_code_mulr_d, jit_code_muli_d, +#define jit_divr_d(u,v,w) jit_new_node_www(jit_code_divr_d,u,v,w) +#define jit_divi_d(u,v,w) jit_new_node_wwd(jit_code_divi_d,u,v,w) + jit_code_divr_d, jit_code_divi_d, + +#define jit_negr_d(u,v) jit_new_node_ww(jit_code_negr_d,u,v) +#define jit_negi_d(u,v) _jit_negi_d(_jit,u,v) + jit_code_negr_d, jit_code_negi_d, +#define jit_absr_d(u,v) jit_new_node_ww(jit_code_absr_d,u,v) +#define jit_absi_d(u,v) _jit_absi_d(_jit,u,v) + jit_code_absr_d, jit_code_absi_d, +#define jit_sqrtr_d(u,v) jit_new_node_ww(jit_code_sqrtr_d,u,v) +#define jit_sqrti_d(u,v) _jit_sqrti_d(_jit,u,v) + jit_code_sqrtr_d, jit_code_sqrti_d, + +#define jit_ltr_d(u,v,w) jit_new_node_www(jit_code_ltr_d,u,v,w) +#define jit_lti_d(u,v,w) jit_new_node_wwd(jit_code_lti_d,u,v,w) + jit_code_ltr_d, jit_code_lti_d, +#define jit_ler_d(u,v,w) jit_new_node_www(jit_code_ler_d,u,v,w) +#define jit_lei_d(u,v,w) jit_new_node_wwd(jit_code_lei_d,u,v,w) + jit_code_ler_d, jit_code_lei_d, +#define jit_eqr_d(u,v,w) jit_new_node_www(jit_code_eqr_d,u,v,w) +#define jit_eqi_d(u,v,w) jit_new_node_wwd(jit_code_eqi_d,u,v,w) + jit_code_eqr_d, jit_code_eqi_d, +#define jit_ger_d(u,v,w) jit_new_node_www(jit_code_ger_d,u,v,w) +#define jit_gei_d(u,v,w) jit_new_node_wwd(jit_code_gei_d,u,v,w) + jit_code_ger_d, jit_code_gei_d, +#define jit_gtr_d(u,v,w) jit_new_node_www(jit_code_gtr_d,u,v,w) +#define jit_gti_d(u,v,w) jit_new_node_wwd(jit_code_gti_d,u,v,w) + jit_code_gtr_d, jit_code_gti_d, +#define jit_ner_d(u,v,w) jit_new_node_www(jit_code_ner_d,u,v,w) +#define jit_nei_d(u,v,w) jit_new_node_wwd(jit_code_nei_d,u,v,w) + jit_code_ner_d, jit_code_nei_d, +#define jit_unltr_d(u,v,w) jit_new_node_www(jit_code_unltr_d,u,v,w) +#define jit_unlti_d(u,v,w) jit_new_node_wwd(jit_code_unlti_d,u,v,w) + jit_code_unltr_d, jit_code_unlti_d, +#define jit_unler_d(u,v,w) jit_new_node_www(jit_code_unler_d,u,v,w) +#define jit_unlei_d(u,v,w) jit_new_node_wwd(jit_code_unlei_d,u,v,w) + jit_code_unler_d, jit_code_unlei_d, +#define jit_uneqr_d(u,v,w) jit_new_node_www(jit_code_uneqr_d,u,v,w) +#define jit_uneqi_d(u,v,w) jit_new_node_wwd(jit_code_uneqi_d,u,v,w) + jit_code_uneqr_d, jit_code_uneqi_d, +#define jit_unger_d(u,v,w) jit_new_node_www(jit_code_unger_d,u,v,w) +#define jit_ungei_d(u,v,w) jit_new_node_wwd(jit_code_ungei_d,u,v,w) + jit_code_unger_d, jit_code_ungei_d, +#define jit_ungtr_d(u,v,w) jit_new_node_www(jit_code_ungtr_d,u,v,w) +#define jit_ungti_d(u,v,w) jit_new_node_wwd(jit_code_ungti_d,u,v,w) + jit_code_ungtr_d, jit_code_ungti_d, +#define jit_ltgtr_d(u,v,w) jit_new_node_www(jit_code_ltgtr_d,u,v,w) +#define jit_ltgti_d(u,v,w) jit_new_node_wwd(jit_code_ltgti_d,u,v,w) + jit_code_ltgtr_d, jit_code_ltgti_d, +#define jit_ordr_d(u,v,w) jit_new_node_www(jit_code_ordr_d,u,v,w) +#define jit_ordi_d(u,v,w) jit_new_node_wwd(jit_code_ordi_d,u,v,w) + jit_code_ordr_d, jit_code_ordi_d, +#define jit_unordr_d(u,v,w) jit_new_node_www(jit_code_unordr_d,u,v,w) +#define jit_unordi_d(u,v,w) jit_new_node_wwd(jit_code_unordi_d,u,v,w) + jit_code_unordr_d, jit_code_unordi_d, + +#define jit_truncr_d_i(u,v) jit_new_node_ww(jit_code_truncr_d_i,u,v) + jit_code_truncr_d_i, +#if __WORDSIZE == 32 +# define jit_truncr_d(u,v) jit_truncr_d_i(u,v) +#else +# define jit_truncr_d(u,v) jit_truncr_d_l(u,v) +# define jit_truncr_d_l(u,v) jit_new_node_ww(jit_code_truncr_d_l,u,v) +#endif + jit_code_truncr_d_l, +#define jit_extr_d(u,v) jit_new_node_ww(jit_code_extr_d,u,v) +#define jit_extr_f_d(u,v) jit_new_node_ww(jit_code_extr_f_d,u,v) + jit_code_extr_d, jit_code_extr_f_d, +#define jit_movr_d(u,v) jit_new_node_ww(jit_code_movr_d,u,v) +#define jit_movi_d(u,v) jit_new_node_wd(jit_code_movi_d,u,v) + jit_code_movr_d, jit_code_movi_d, + +#define jit_ldr_d(u,v) jit_new_node_ww(jit_code_ldr_d,u,v) +#define jit_ldi_d(u,v) jit_new_node_wp(jit_code_ldi_d,u,v) + jit_code_ldr_d, jit_code_ldi_d, +#define jit_ldxr_d(u,v,w) jit_new_node_www(jit_code_ldxr_d,u,v,w) +#define jit_ldxi_d(u,v,w) jit_new_node_www(jit_code_ldxi_d,u,v,w) + jit_code_ldxr_d, jit_code_ldxi_d, +#define jit_str_d(u,v) jit_new_node_ww(jit_code_str_d,u,v) +#define jit_sti_d(u,v) jit_new_node_pw(jit_code_sti_d,u,v) + jit_code_str_d, jit_code_sti_d, +#define jit_stxr_d(u,v,w) jit_new_node_www(jit_code_stxr_d,u,v,w) +#define jit_stxi_d(u,v,w) jit_new_node_www(jit_code_stxi_d,u,v,w) + jit_code_stxr_d, jit_code_stxi_d, + +#define jit_bltr_d(v,w) jit_new_node_pww(jit_code_bltr_d,NULL,v,w) +#define jit_blti_d(v,w) jit_new_node_pwd(jit_code_blti_d,NULL,v,w) + jit_code_bltr_d, jit_code_blti_d, +#define jit_bler_d(v,w) jit_new_node_pww(jit_code_bler_d,NULL,v,w) +#define jit_blei_d(v,w) jit_new_node_pwd(jit_code_blei_d,NULL,v,w) + jit_code_bler_d, jit_code_blei_d, +#define jit_beqr_d(v,w) jit_new_node_pww(jit_code_beqr_d,NULL,v,w) +#define jit_beqi_d(v,w) jit_new_node_pwd(jit_code_beqi_d,NULL,v,w) + jit_code_beqr_d, jit_code_beqi_d, +#define jit_bger_d(v,w) jit_new_node_pww(jit_code_bger_d,NULL,v,w) +#define jit_bgei_d(v,w) jit_new_node_pwd(jit_code_bgei_d,NULL,v,w) + jit_code_bger_d, jit_code_bgei_d, +#define jit_bgtr_d(v,w) jit_new_node_pww(jit_code_bgtr_d,NULL,v,w) +#define jit_bgti_d(v,w) jit_new_node_pwd(jit_code_bgti_d,NULL,v,w) + jit_code_bgtr_d, jit_code_bgti_d, +#define jit_bner_d(v,w) jit_new_node_pww(jit_code_bner_d,NULL,v,w) +#define jit_bnei_d(v,w) jit_new_node_pwd(jit_code_bnei_d,NULL,v,w) + jit_code_bner_d, jit_code_bnei_d, +#define jit_bunltr_d(v,w) jit_new_node_pww(jit_code_bunltr_d,NULL,v,w) +#define jit_bunlti_d(v,w) jit_new_node_pwd(jit_code_bunlti_d,NULL,v,w) + jit_code_bunltr_d, jit_code_bunlti_d, +#define jit_bunler_d(v,w) jit_new_node_pww(jit_code_bunler_d,NULL,v,w) +#define jit_bunlei_d(v,w) jit_new_node_pwd(jit_code_bunlei_d,NULL,v,w) + jit_code_bunler_d, jit_code_bunlei_d, +#define jit_buneqr_d(v,w) jit_new_node_pww(jit_code_buneqr_d,NULL,v,w) +#define jit_buneqi_d(v,w) jit_new_node_pwd(jit_code_buneqi_d,NULL,v,w) + jit_code_buneqr_d, jit_code_buneqi_d, +#define jit_bunger_d(v,w) jit_new_node_pww(jit_code_bunger_d,NULL,v,w) +#define jit_bungei_d(v,w) jit_new_node_pwd(jit_code_bungei_d,NULL,v,w) + jit_code_bunger_d, jit_code_bungei_d, +#define jit_bungtr_d(v,w) jit_new_node_pww(jit_code_bungtr_d,NULL,v,w) +#define jit_bungti_d(v,w) jit_new_node_pwd(jit_code_bungti_d,NULL,v,w) + jit_code_bungtr_d, jit_code_bungti_d, +#define jit_bltgtr_d(v,w) jit_new_node_pww(jit_code_bltgtr_d,NULL,v,w) +#define jit_bltgti_d(v,w) jit_new_node_pwd(jit_code_bltgti_d,NULL,v,w) + jit_code_bltgtr_d, jit_code_bltgti_d, +#define jit_bordr_d(v,w) jit_new_node_pww(jit_code_bordr_d,NULL,v,w) +#define jit_bordi_d(v,w) jit_new_node_pwd(jit_code_bordi_d,NULL,v,w) + jit_code_bordr_d, jit_code_bordi_d, +#define jit_bunordr_d(v,w) jit_new_node_pww(jit_code_bunordr_d,NULL,v,w) +#define jit_bunordi_d(v,w) jit_new_node_pwd(jit_code_bunordi_d,NULL,v,w) + jit_code_bunordr_d, jit_code_bunordi_d, + +#define jit_pushargr_d(u) _jit_pushargr_d(_jit,u) +#define jit_pushargi_d(u) _jit_pushargi_d(_jit,u) + jit_code_pushargr_d, jit_code_pushargi_d, +#define jit_retr_d(u) _jit_retr_d(_jit,u) +#define jit_reti_d(u) _jit_reti_d(_jit,u) + jit_code_retr_d, jit_code_reti_d, +#define jit_retval_d(u) _jit_retval_d(_jit,u) + jit_code_retval_d, + + /* w* -> f|d */ +#define jit_movr_w_f(u, v) jit_new_node_ww(jit_code_movr_w_f, u, v) +#define jit_movi_w_f(u,v) jit_new_node_ww(jit_code_movi_w_f, u, v) + jit_code_movr_w_f, jit_code_movi_w_f, +#define jit_movr_ww_d(u, v, w) jit_new_node_www(jit_code_movr_ww_d, u, v, w) +#define jit_movi_ww_d(u,v, w) jit_new_node_www(jit_code_movi_ww_d, u, v, w) + jit_code_movr_ww_d, jit_code_movi_ww_d, + + /* w -> d */ +#define jit_movr_w_d(u, v) jit_new_node_ww(jit_code_movr_w_d, u, v) +#define jit_movi_w_d(u,v) jit_new_node_ww(jit_code_movi_w_d, u, v) + jit_code_movr_w_d, jit_code_movi_w_d, + + /* f|d -> w* */ +#define jit_movr_f_w(u, v) jit_new_node_ww(jit_code_movr_f_w, u, v) +#define jit_movi_f_w(u, v) jit_new_node_wf(jit_code_movi_f_w, u, v) + jit_code_movr_f_w, jit_code_movi_f_w, +#define jit_movr_d_ww(u, v, w) jit_new_node_www(jit_code_movr_d_ww, u, v, w) +#define jit_movi_d_ww(u, v, w) jit_new_node_wwd(jit_code_movi_d_ww, u, v, w) + jit_code_movr_d_ww, jit_code_movi_d_ww, + + /* d -> w */ +#define jit_movr_d_w(u, v) jit_new_node_ww(jit_code_movr_d_w, u, v) +#define jit_movi_d_w(u, v) jit_new_node_wd(jit_code_movi_d_w, u, v) + jit_code_movr_d_w, jit_code_movi_d_w, + +#define jit_clor(u,v) jit_new_node_ww(jit_code_clor,u,v) +#define jit_cloi(u,v) jit_new_node_ww(jit_code_cloi,u,v) + jit_code_clor, jit_code_cloi, + +#define jit_clzr(u,v) jit_new_node_ww(jit_code_clzr,u,v) +#define jit_clzi(u,v) jit_new_node_ww(jit_code_clzi,u,v) + jit_code_clzr, jit_code_clzi, + +#define jit_ctor(u,v) jit_new_node_ww(jit_code_ctor,u,v) +#define jit_ctoi(u,v) jit_new_node_ww(jit_code_ctoi,u,v) + jit_code_ctor, jit_code_ctoi, +#define jit_ctzr(u,v) jit_new_node_ww(jit_code_ctzr,u,v) +#define jit_ctzi(u,v) jit_new_node_ww(jit_code_ctzi,u,v) + jit_code_ctzr, jit_code_ctzi, + +#define jit_rbitr(u,v) jit_new_node_ww(jit_code_rbitr,u,v) +#define jit_rbiti(u,v) jit_new_node_ww(jit_code_rbiti,u,v) + jit_code_rbitr, jit_code_rbiti, + +#define jit_popcntr(u,v) jit_new_node_ww(jit_code_popcntr,u,v) +#define jit_popcnti(u,v) jit_new_node_ww(jit_code_popcnti,u,v) + jit_code_popcntr, jit_code_popcnti, + +#define jit_lrotr(u,v,w) jit_new_node_www(jit_code_lrotr,u,v,w) +#define jit_lroti(u,v,w) jit_new_node_www(jit_code_lroti,u,v,w) + jit_code_lrotr, jit_code_lroti, +#define jit_rrotr(u,v,w) jit_new_node_www(jit_code_rrotr,u,v,w) +#define jit_rroti(u,v,w) jit_new_node_www(jit_code_rroti,u,v,w) + jit_code_rrotr, jit_code_rroti, + +#define jit_extr(u,v,w,x) jit_new_node_wwq(jit_code_extr, u, v, w, x) +#define jit_exti(u,v,w,x) jit_new_node_wwq(jit_code_exti, u, v, w, x) + jit_code_extr, jit_code_exti, +#define jit_extr_u(u,v,w,x) jit_new_node_wwq(jit_code_extr_u, u, v, w, x) +#define jit_exti_u(u,v,w,x) jit_new_node_wwq(jit_code_exti_u, u, v, w, x) + jit_code_extr_u, jit_code_exti_u, +#define jit_depr(u,v,w,x) jit_new_node_wwq(jit_code_depr, u, v, w, x) +#define jit_depi(u,v,w,x) jit_new_node_wwq(jit_code_depi, u, v, w, x) + jit_code_depr, jit_code_depi, + +#define jit_qlshr(l,h,v,w) jit_new_node_qww(jit_code_qlshr,l,h,v,w) +#define jit_qlshi(l,h,v,w) jit_new_node_qww(jit_code_qlshi,l,h,v,w) + jit_code_qlshr, jit_code_qlshi, +#define jit_qlshr_u(l,h,v,w) jit_new_node_qww(jit_code_qlshr_u,l,h,v,w) +#define jit_qlshi_u(l,h,v,w) jit_new_node_qww(jit_code_qlshi_u,l,h,v,w) + jit_code_qlshr_u, jit_code_qlshi_u, +#define jit_qrshr(l,h,v,w) jit_new_node_qww(jit_code_qrshr,l,h,v,w) +#define jit_qrshi(l,h,v,w) jit_new_node_qww(jit_code_qrshi,l,h,v,w) + jit_code_qrshr, jit_code_qrshi, +#define jit_qrshr_u(l,h,v,w) jit_new_node_qww(jit_code_qrshr_u,l,h,v,w) +#define jit_qrshi_u(l,h,v,w) jit_new_node_qww(jit_code_qrshi_u,l,h,v,w) + jit_code_qrshr_u, jit_code_qrshi_u, + +#define jit_unldr(u,v,w) jit_new_node_www(jit_code_unldr, u, v, w) +#define jit_unldi(u,v,w) jit_new_node_www(jit_code_unldi, u, v, w) + jit_code_unldr, jit_code_unldi, +#define jit_unldr_u(u,v,w) jit_new_node_www(jit_code_unldr_u, u, v, w) +#define jit_unldi_u(u,v,w) jit_new_node_www(jit_code_unldi_u, u, v, w) + jit_code_unldr_u, jit_code_unldi_u, +#define jit_unstr(u,v,w) jit_new_node_www(jit_code_unstr, u, v, w) +#define jit_unsti(u,v,w) jit_new_node_www(jit_code_unsti, u, v, w) + jit_code_unstr, jit_code_unsti, + +#define jit_unldr_x(u,v,w) jit_new_node_www(jit_code_unldr_x, u, v, w) +#define jit_unldi_x(u,v,w) jit_new_node_www(jit_code_unldi_x, u, v, w) + jit_code_unldr_x, jit_code_unldi_x, +#define jit_unstr_x(u,v,w) jit_new_node_www(jit_code_unstr_x, u, v, w) +#define jit_unsti_x(u,v,w) jit_new_node_www(jit_code_unsti_x, u, v, w) + jit_code_unstr_x, jit_code_unsti_x, + +#define jit_fmar_f(u,v,w,x) jit_new_node_wqw(jit_code_fmar_f, u, v, w, x) +#define jit_fmai_f(u,v,w,x) _jit_fmai_f(_jit, u, v, w, x) + jit_code_fmar_f, jit_code_fmai_f, +#define jit_fmsr_f(u,v,w,x) jit_new_node_wqw(jit_code_fmsr_f, u, v, w, x) +#define jit_fmsi_f(u,v,w,x) _jit_fmsi_f(_jit, u, v, w, x) + jit_code_fmsr_f, jit_code_fmsi_f, +#define jit_fmar_d(u,v,w,x) jit_new_node_wqw(jit_code_fmar_d, u, v, w, x) +#define jit_fmai_d(u,v,w,x) _jit_fmai_d(_jit, u, v, w, x) + jit_code_fmar_d, jit_code_fmai_d, +#define jit_fmsr_d(u,v,w,x) jit_new_node_wqw(jit_code_fmsr_d, u, v, w, x) +#define jit_fmsi_d(u,v,w,x) _jit_fmsi_d(_jit, u, v, w, x) + jit_code_fmsr_d, jit_code_fmsi_d, + +#define jit_fnmar_f(u,v,w,x) jit_new_node_wqw(jit_code_fnmar_f, u, v, w, x) +#define jit_fnmai_f(u,v,w,x) _jit_fnmai_f(_jit, u, v, w, x) + jit_code_fnmar_f, jit_code_fnmai_f, +#define jit_fnmsr_f(u,v,w,x) jit_new_node_wqw(jit_code_fnmsr_f, u, v, w, x) +#define jit_fnmsi_f(u,v,w,x) _jit_fnmsi_f(_jit, u, v, w, x) + jit_code_fnmsr_f, jit_code_fnmsi_f, +#define jit_fnmar_d(u,v,w,x) jit_new_node_wqw(jit_code_fnmar_d, u, v, w, x) +#define jit_fnmai_d(u,v,w,x) _jit_fnmai_d(_jit, u, v, w, x) + jit_code_fnmar_d, jit_code_fnmai_d, +#define jit_fnmsr_d(u,v,w,x) jit_new_node_wqw(jit_code_fnmsr_d, u, v, w, x) +#define jit_fnmsi_d(u,v,w,x) _jit_fnmsi_d(_jit, u, v, w, x) + jit_code_fnmsr_d, jit_code_fnmsi_d, + +#define jit_hmulr(u,v,w) jit_new_node_www(jit_code_hmulr,u,v,w) +#define jit_hmuli(u,v,w) jit_new_node_www(jit_code_hmuli,u,v,w) + jit_code_hmulr, jit_code_hmuli, +#define jit_hmulr_u(u,v,w) jit_new_node_www(jit_code_hmulr_u,u,v,w) +#define jit_hmuli_u(u,v,w) jit_new_node_www(jit_code_hmuli_u,u,v,w) + jit_code_hmulr_u, jit_code_hmuli_u, + +#define jit_ldxbr_c(u,v,w) jit_new_node_www(jit_code_ldxbr_c,u,v,w) +#define jit_ldxbi_c(u,v,w) jit_new_node_www(jit_code_ldxbi_c,u,v,w) + jit_code_ldxbr_c, jit_code_ldxbi_c, +#define jit_ldxar_c(u,v,w) jit_new_node_www(jit_code_ldxar_c,u,v,w) +#define jit_ldxai_c(u,v,w) jit_new_node_www(jit_code_ldxai_c,u,v,w) + jit_code_ldxar_c, jit_code_ldxai_c, +#define jit_ldxbr_uc(u,v,w) jit_new_node_www(jit_code_ldxbr_uc,u,v,w) +#define jit_ldxbi_uc(u,v,w) jit_new_node_www(jit_code_ldxbi_uc,u,v,w) + jit_code_ldxbr_uc, jit_code_ldxbi_uc, +#define jit_ldxar_uc(u,v,w) jit_new_node_www(jit_code_ldxar_uc,u,v,w) +#define jit_ldxai_uc(u,v,w) jit_new_node_www(jit_code_ldxai_uc,u,v,w) + jit_code_ldxar_uc, jit_code_ldxai_uc, +#define jit_ldxbr_s(u,v,w) jit_new_node_www(jit_code_ldxbr_s,u,v,w) +#define jit_ldxbi_s(u,v,w) jit_new_node_www(jit_code_ldxbi_s,u,v,w) + jit_code_ldxbr_s, jit_code_ldxbi_s, +#define jit_ldxar_s(u,v,w) jit_new_node_www(jit_code_ldxar_s,u,v,w) +#define jit_ldxai_s(u,v,w) jit_new_node_www(jit_code_ldxai_s,u,v,w) + jit_code_ldxar_s, jit_code_ldxai_s, +#define jit_ldxbr_us(u,v,w) jit_new_node_www(jit_code_ldxbr_us,u,v,w) +#define jit_ldxbi_us(u,v,w) jit_new_node_www(jit_code_ldxbi_us,u,v,w) + jit_code_ldxbr_us, jit_code_ldxbi_us, +#define jit_ldxar_us(u,v,w) jit_new_node_www(jit_code_ldxar_us,u,v,w) +#define jit_ldxai_us(u,v,w) jit_new_node_www(jit_code_ldxai_us,u,v,w) + jit_code_ldxar_us, jit_code_ldxai_us, +#define jit_ldxbr_i(u,v,w) jit_new_node_www(jit_code_ldxbr_i,u,v,w) +#define jit_ldxbi_i(u,v,w) jit_new_node_www(jit_code_ldxbi_i,u,v,w) + jit_code_ldxbr_i, jit_code_ldxbi_i, +#define jit_ldxar_i(u,v,w) jit_new_node_www(jit_code_ldxar_i,u,v,w) +#define jit_ldxai_i(u,v,w) jit_new_node_www(jit_code_ldxai_i,u,v,w) + jit_code_ldxar_i, jit_code_ldxai_i, +#if __WORDSIZE == 32 +# define jit_ldxbr(u,v,w) jit_ldxbr_i(u,v,w) +# define jit_ldxbi(u,v,w) jit_ldxbi_i(u,v,w) +# define jit_ldxar(u,v,w) jit_ldxar_i(u,v,w) +# define jit_ldxai(u,v,w) jit_ldxai_i(u,v,w) +#else +# define jit_ldxbr(u,v,w) jit_ldxbr_l(u,v,w) +# define jit_ldxbi(u,v,w) jit_ldxbi_l(u,v,w) +# define jit_ldxar(u,v,w) jit_ldxar_l(u,v,w) +# define jit_ldxai(u,v,w) jit_ldxai_l(u,v,w) +# define jit_ldxbr_ui(u,v,w) jit_new_node_www(jit_code_ldxbr_ui,u,v,w) +# define jit_ldxbi_ui(u,v,w) jit_new_node_www(jit_code_ldxbi_ui,u,v,w) +# define jit_ldxar_ui(u,v,w) jit_new_node_www(jit_code_ldxar_ui,u,v,w) +# define jit_ldxai_ui(u,v,w) jit_new_node_www(jit_code_ldxai_ui,u,v,w) +# define jit_ldxbr_l(u,v,w) jit_new_node_www(jit_code_ldxbr_l,u,v,w) +# define jit_ldxbi_l(u,v,w) jit_new_node_www(jit_code_ldxbi_l,u,v,w) +# define jit_ldxar_l(u,v,w) jit_new_node_www(jit_code_ldxar_l,u,v,w) +# define jit_ldxai_l(u,v,w) jit_new_node_www(jit_code_ldxai_l,u,v,w) +#endif + jit_code_ldxbr_ui, jit_code_ldxbi_ui, + jit_code_ldxar_ui, jit_code_ldxai_ui, + jit_code_ldxbr_l, jit_code_ldxbi_l, + jit_code_ldxar_l, jit_code_ldxai_l, +# define jit_ldxbr_f(u,v,w) jit_new_node_www(jit_code_ldxbr_f,u,v,w) +# define jit_ldxbi_f(u,v,w) jit_new_node_www(jit_code_ldxbi_f,u,v,w) +# define jit_ldxar_f(u,v,w) jit_new_node_www(jit_code_ldxar_f,u,v,w) +# define jit_ldxai_f(u,v,w) jit_new_node_www(jit_code_ldxai_f,u,v,w) + jit_code_ldxbr_f, jit_code_ldxbi_f, + jit_code_ldxar_f, jit_code_ldxai_f, +# define jit_ldxbr_d(u,v,w) jit_new_node_www(jit_code_ldxbr_d,u,v,w) +# define jit_ldxbi_d(u,v,w) jit_new_node_www(jit_code_ldxbi_d,u,v,w) +# define jit_ldxar_d(u,v,w) jit_new_node_www(jit_code_ldxar_d,u,v,w) +# define jit_ldxai_d(u,v,w) jit_new_node_www(jit_code_ldxai_d,u,v,w) + jit_code_ldxbr_d, jit_code_ldxbi_d, + jit_code_ldxar_d, jit_code_ldxai_d, +#define jit_stxbr_c(u,v,w) jit_new_node_www(jit_code_stxbr_c,u,v,w) +#define jit_stxbi_c(u,v,w) jit_new_node_www(jit_code_stxbi_c,u,v,w) +#define jit_stxar_c(u,v,w) jit_new_node_www(jit_code_stxar_c,u,v,w) +#define jit_stxai_c(u,v,w) jit_new_node_www(jit_code_stxai_c,u,v,w) + jit_code_stxbr_c, jit_code_stxbi_c, + jit_code_stxar_c, jit_code_stxai_c, +#define jit_stxbr_s(u,v,w) jit_new_node_www(jit_code_stxbr_s,u,v,w) +#define jit_stxbi_s(u,v,w) jit_new_node_www(jit_code_stxbi_s,u,v,w) +#define jit_stxar_s(u,v,w) jit_new_node_www(jit_code_stxar_s,u,v,w) +#define jit_stxai_s(u,v,w) jit_new_node_www(jit_code_stxai_s,u,v,w) + jit_code_stxbr_s, jit_code_stxbi_s, + jit_code_stxar_s, jit_code_stxai_s, +#define jit_stxbr_i(u,v,w) jit_new_node_www(jit_code_stxbr_i,u,v,w) +#define jit_stxbi_i(u,v,w) jit_new_node_www(jit_code_stxbi_i,u,v,w) +#define jit_stxar_i(u,v,w) jit_new_node_www(jit_code_stxar_i,u,v,w) +#define jit_stxai_i(u,v,w) jit_new_node_www(jit_code_stxai_i,u,v,w) + jit_code_stxbr_i, jit_code_stxbi_i, + jit_code_stxar_i, jit_code_stxai_i, +#if __WORDSIZE == 32 +# define jit_stxbr(u,v,w) jit_stxbr_i(u,v,w) +# define jit_stxbi(u,v,w) jit_stxbi_i(u,v,w) +# define jit_stxar(u,v,w) jit_stxar_i(u,v,w) +# define jit_stxai(u,v,w) jit_stxai_i(u,v,w) +#else +# define jit_stxbr(u,v,w) jit_stxbr_l(u,v,w) +# define jit_stxbi(u,v,w) jit_stxbi_l(u,v,w) +# define jit_stxar(u,v,w) jit_stxar_l(u,v,w) +# define jit_stxai(u,v,w) jit_stxai_l(u,v,w) +# define jit_stxbr_l(u,v,w) jit_new_node_www(jit_code_stxbr_l,u,v,w) +# define jit_stxbi_l(u,v,w) jit_new_node_www(jit_code_stxbi_l,u,v,w) +# define jit_stxar_l(u,v,w) jit_new_node_www(jit_code_stxar_l,u,v,w) +# define jit_stxai_l(u,v,w) jit_new_node_www(jit_code_stxai_l,u,v,w) +#endif + jit_code_stxbr_l, jit_code_stxbi_l, + jit_code_stxar_l, jit_code_stxai_l, +# define jit_stxbr_f(u,v,w) jit_new_node_www(jit_code_stxbr_f,u,v,w) +# define jit_stxbi_f(u,v,w) jit_new_node_www(jit_code_stxbi_f,u,v,w) +# define jit_stxar_f(u,v,w) jit_new_node_www(jit_code_stxar_f,u,v,w) +# define jit_stxai_f(u,v,w) jit_new_node_www(jit_code_stxai_f,u,v,w) + jit_code_stxbr_f, jit_code_stxbi_f, + jit_code_stxar_f, jit_code_stxai_f, +# define jit_stxbr_d(u,v,w) jit_new_node_www(jit_code_stxbr_d,u,v,w) +# define jit_stxbi_d(u,v,w) jit_new_node_www(jit_code_stxbi_d,u,v,w) +# define jit_stxar_d(u,v,w) jit_new_node_www(jit_code_stxar_d,u,v,w) +# define jit_stxai_d(u,v,w) jit_new_node_www(jit_code_stxai_d,u,v,w) + jit_code_stxbr_d, jit_code_stxbi_d, + jit_code_stxar_d, jit_code_stxai_d, + + jit_code_last_code +} jit_code_t; + +typedef void* (*jit_alloc_func_ptr) (size_t); +typedef void* (*jit_realloc_func_ptr) (void*, size_t); +typedef void (*jit_free_func_ptr) (void*); + +/* + * Prototypes + */ +extern void init_jit_with_debug(const char*,FILE*); +extern void init_jit(const char*); +extern void finish_jit(void); + +extern jit_state_t *jit_new_state(void); +#define jit_clear_state() _jit_clear_state(_jit) +extern void _jit_clear_state(jit_state_t*); +#define jit_destroy_state() _jit_destroy_state(_jit) +extern void _jit_destroy_state(jit_state_t*); + +#define jit_address(node) _jit_address(_jit, node) +extern jit_pointer_t _jit_address(jit_state_t*, jit_node_t*); +extern jit_node_t *_jit_name(jit_state_t*, const char*); +extern jit_node_t *_jit_note(jit_state_t*, const char*, int); +extern jit_node_t *_jit_label(jit_state_t*); +extern jit_node_t *_jit_forward(jit_state_t*); +extern jit_node_t *_jit_indirect(jit_state_t*); +extern void _jit_link(jit_state_t*, jit_node_t*); +#define jit_forward_p(u) _jit_forward_p(_jit,u) +extern jit_bool_t _jit_forward_p(jit_state_t*,jit_node_t*); +#define jit_indirect_p(u) _jit_indirect_p(_jit,u) +extern jit_bool_t _jit_indirect_p(jit_state_t*,jit_node_t*); +#define jit_target_p(u) _jit_target_p(_jit,u) +extern jit_bool_t _jit_target_p(jit_state_t*,jit_node_t*); + +extern void _jit_prolog(jit_state_t*); + +extern jit_int32_t _jit_allocai(jit_state_t*, jit_int32_t); +extern void _jit_allocar(jit_state_t*, jit_int32_t, jit_int32_t); +extern void _jit_ellipsis(jit_state_t*); + +extern jit_node_t *_jit_arg(jit_state_t*, jit_code_t); + +extern void _jit_getarg_c(jit_state_t*, jit_gpr_t, jit_node_t*); +extern void _jit_getarg_uc(jit_state_t*, jit_gpr_t, jit_node_t*); +extern void _jit_getarg_s(jit_state_t*, jit_gpr_t, jit_node_t*); +extern void _jit_getarg_us(jit_state_t*, jit_gpr_t, jit_node_t*); +extern void _jit_getarg_i(jit_state_t*, jit_gpr_t, jit_node_t*); +#if __WORDSIZE == 64 +extern void _jit_getarg_ui(jit_state_t*, jit_gpr_t, jit_node_t*); +extern void _jit_getarg_l(jit_state_t*, jit_gpr_t, jit_node_t*); +#endif + +extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*, jit_code_t); +extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*, jit_code_t); + +extern void _jit_prepare(jit_state_t*); +extern void _jit_ellipsis(jit_state_t*); +extern void _jit_va_push(jit_state_t*, jit_gpr_t); + +extern void _jit_pushargr(jit_state_t*, jit_gpr_t, jit_code_t); +extern void _jit_pushargi(jit_state_t*, jit_word_t, jit_code_t); + +extern void _jit_finishr(jit_state_t*, jit_gpr_t); +extern jit_node_t *_jit_finishi(jit_state_t*, jit_pointer_t); +extern void _jit_ret(jit_state_t*); + +extern void _jit_retr(jit_state_t*, jit_gpr_t, jit_code_t); +extern void _jit_reti(jit_state_t*, jit_word_t, jit_code_t); + +extern void _jit_retval_c(jit_state_t*, jit_gpr_t); +extern void _jit_retval_uc(jit_state_t*, jit_gpr_t); +extern void _jit_retval_s(jit_state_t*, jit_gpr_t); +extern void _jit_retval_us(jit_state_t*, jit_gpr_t); +extern void _jit_retval_i(jit_state_t*, jit_gpr_t); +#if __WORDSIZE == 64 +extern void _jit_retval_ui(jit_state_t*, jit_gpr_t); +extern void _jit_retval_l(jit_state_t*, jit_gpr_t); +#endif + +extern void _jit_epilog(jit_state_t*); + +#define jit_patch(u) _jit_patch(_jit,u) +extern void _jit_patch(jit_state_t*, jit_node_t*); +#define jit_patch_at(u,v) _jit_patch_at(_jit,u,v) +extern void _jit_patch_at(jit_state_t*, jit_node_t*, jit_node_t*); +#define jit_patch_abs(u,v) _jit_patch_abs(_jit,u,v) +extern void _jit_patch_abs(jit_state_t*, jit_node_t*, jit_pointer_t); +#define jit_realize() _jit_realize(_jit) +extern void _jit_realize(jit_state_t*); +#define jit_get_code(u) _jit_get_code(_jit,u) +extern jit_pointer_t _jit_get_code(jit_state_t*, jit_word_t*); +#define jit_set_code(u,v) _jit_set_code(_jit,u,v) +extern void _jit_set_code(jit_state_t*, jit_pointer_t, jit_word_t); +#define jit_get_data(u,v) _jit_get_data(_jit,u,v) +extern jit_pointer_t _jit_get_data(jit_state_t*, jit_word_t*, jit_word_t*); +#define jit_set_data(u,v,w) _jit_set_data(_jit,u,v,w) +extern void _jit_set_data(jit_state_t*, jit_pointer_t, jit_word_t, jit_word_t); +#define jit_frame(u) _jit_frame(_jit,u) +extern void _jit_frame(jit_state_t*, jit_int32_t); +#define jit_tramp(u) _jit_tramp(_jit,u) +extern void _jit_tramp(jit_state_t*, jit_int32_t); +#define jit_emit() _jit_emit(_jit) +extern jit_pointer_t _jit_emit(jit_state_t*); +#define jit_unprotect() _jit_unprotect(_jit) +extern void _jit_unprotect(jit_state_t*); +#define jit_protect() _jit_protect(_jit) +extern void _jit_protect(jit_state_t*); + +#define jit_print() _jit_print(_jit) +extern void _jit_print(jit_state_t*); + +extern jit_node_t *_jit_arg_f(jit_state_t*); +extern void _jit_getarg_f(jit_state_t*, jit_fpr_t, jit_node_t*); +extern void _jit_putargr_f(jit_state_t*, jit_fpr_t, jit_node_t*); +extern void _jit_putargi_f(jit_state_t*, jit_float32_t, jit_node_t*); +extern void _jit_pushargr_f(jit_state_t*, jit_fpr_t); +extern void _jit_pushargi_f(jit_state_t*, jit_float32_t); +extern void _jit_retr_f(jit_state_t*, jit_fpr_t); +extern void _jit_reti_f(jit_state_t*, jit_float32_t); +extern void _jit_retval_f(jit_state_t*, jit_fpr_t); +extern void _jit_negi_f(jit_state_t*, jit_fpr_t, jit_float32_t); +extern void _jit_absi_f(jit_state_t*, jit_fpr_t, jit_float32_t); +extern void _jit_sqrti_f(jit_state_t*, jit_fpr_t, jit_float32_t); +extern void _jit_fmai_f(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t); +extern void _jit_fmsi_f(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t); +extern void _jit_fnmai_f(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t); +extern void _jit_fnmsi_f(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t); + +extern jit_node_t *_jit_arg_d(jit_state_t*); +extern void _jit_getarg_d(jit_state_t*, jit_fpr_t, jit_node_t*); +extern void _jit_putargr_d(jit_state_t*, jit_fpr_t, jit_node_t*); +extern void _jit_putargi_d(jit_state_t*, jit_float64_t, jit_node_t*); +extern void _jit_pushargr_d(jit_state_t*, jit_fpr_t); +extern void _jit_pushargi_d(jit_state_t*, jit_float64_t); +extern void _jit_retr_d(jit_state_t*, jit_fpr_t); +extern void _jit_reti_d(jit_state_t*, jit_float64_t); +extern void _jit_retval_d(jit_state_t*, jit_fpr_t); +extern void _jit_negi_d(jit_state_t*, jit_fpr_t, jit_float64_t); +extern void _jit_absi_d(jit_state_t*, jit_fpr_t, jit_float64_t); +extern void _jit_sqrti_d(jit_state_t*, jit_fpr_t, jit_float64_t); +extern void _jit_fmai_d(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t); +extern void _jit_fmsi_d(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t); +extern void _jit_fnmai_d(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t); +extern void _jit_fnmsi_d(jit_state_t*, + jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t); + +#define jit_get_reg(s) _jit_get_reg(_jit,s) +extern jit_int32_t _jit_get_reg(jit_state_t*, jit_int32_t); + +#define jit_unget_reg(r) _jit_unget_reg(_jit,r) +extern void _jit_unget_reg(jit_state_t*, jit_int32_t); + +#define jit_new_node(c) _jit_new_node(_jit,c) +extern jit_node_t *_jit_new_node(jit_state_t*, jit_code_t); +#define jit_new_node_w(c,u) _jit_new_node_w(_jit,c,u) +extern jit_node_t *_jit_new_node_w(jit_state_t*, jit_code_t, + jit_word_t); +#define jit_new_node_f(c,u) _jit_new_node_f(_jit,c,u) +extern jit_node_t *_jit_new_node_f(jit_state_t*, jit_code_t, + jit_float32_t); +#define jit_new_node_d(c,u) _jit_new_node_d(_jit,c,u) +extern jit_node_t *_jit_new_node_d(jit_state_t*, jit_code_t, + jit_float64_t); +#define jit_new_node_p(c,u) _jit_new_node_p(_jit,c,u) +extern jit_node_t *_jit_new_node_p(jit_state_t*, jit_code_t, + jit_pointer_t); +#define jit_new_node_ww(c,u,v) _jit_new_node_ww(_jit,c,u,v) +extern jit_node_t *_jit_new_node_ww(jit_state_t*,jit_code_t, + jit_word_t, jit_word_t); +#define jit_new_node_wp(c,u,v) _jit_new_node_wp(_jit,c,u,v) +extern jit_node_t *_jit_new_node_wp(jit_state_t*,jit_code_t, + jit_word_t, jit_pointer_t); +#define jit_new_node_fp(c,u,v) _jit_new_node_fp(_jit,c,u,v) +extern jit_node_t *_jit_new_node_fp(jit_state_t*,jit_code_t, + jit_float32_t, jit_pointer_t); +#define jit_new_node_dp(c,u,v) _jit_new_node_dp(_jit,c,u,v) +extern jit_node_t *_jit_new_node_dp(jit_state_t*,jit_code_t, + jit_float64_t, jit_pointer_t); +#define jit_new_node_pw(c,u,v) _jit_new_node_pw(_jit,c,u,v) +extern jit_node_t *_jit_new_node_pw(jit_state_t*,jit_code_t, + jit_pointer_t, jit_word_t); +#define jit_new_node_wf(c,u,v) _jit_new_node_wf(_jit,c,u,v) +extern jit_node_t *_jit_new_node_wf(jit_state_t*, jit_code_t, + jit_word_t, jit_float32_t); +#define jit_new_node_wd(c,u,v) _jit_new_node_wd(_jit,c,u,v) +extern jit_node_t *_jit_new_node_wd(jit_state_t*, jit_code_t, + jit_word_t, jit_float64_t); +#define jit_new_node_www(c,u,v,w) _jit_new_node_www(_jit,c,u,v,w) +extern jit_node_t *_jit_new_node_www(jit_state_t*, jit_code_t, + jit_word_t, jit_word_t, jit_word_t); +#define jit_new_node_qww(c,l,h,v,w) _jit_new_node_qww(_jit,c,l,h,v,w) +extern jit_node_t *_jit_new_node_qww(jit_state_t*, jit_code_t, + jit_int32_t, jit_int32_t, + jit_word_t, jit_word_t); +#define jit_new_node_wqw(c,u,l,h,w) _jit_new_node_wqw(_jit,c,u,l,h,w) +extern jit_node_t *_jit_new_node_wqw(jit_state_t*, jit_code_t, + jit_word_t, jit_int32_t, + jit_int32_t, jit_word_t); +#define jit_new_node_wwq(c,u,v,l,h) _jit_new_node_wwq(_jit,c,u,v,l,h) +extern jit_node_t *_jit_new_node_wwq(jit_state_t*, jit_code_t, + jit_word_t, jit_word_t, + jit_int32_t, jit_int32_t); +#define jit_new_node_wwf(c,u,v,w) _jit_new_node_wwf(_jit,c,u,v,w) +extern jit_node_t *_jit_new_node_wwf(jit_state_t*, jit_code_t, + jit_word_t, jit_word_t, jit_float32_t); +#define jit_new_node_wqf(c,u,l,h,w) _jit_new_node_wqf(_jit,c,u,l,h,w) +extern jit_node_t *_jit_new_node_wqf(jit_state_t*, jit_code_t, + jit_word_t, jit_int32_t, + jit_int32_t, jit_float32_t); +#define jit_new_node_wwd(c,u,v,w) _jit_new_node_wwd(_jit,c,u,v,w) +extern jit_node_t *_jit_new_node_wwd(jit_state_t*, jit_code_t, + jit_word_t, jit_word_t, jit_float64_t); +#define jit_new_node_wqd(c,u,l,h,w) _jit_new_node_wqd(_jit,c,u,l,h,w) +extern jit_node_t *_jit_new_node_wqd(jit_state_t*, jit_code_t, + jit_word_t, jit_int32_t, + jit_int32_t, jit_float64_t); +#define jit_new_node_pww(c,u,v,w) _jit_new_node_pww(_jit,c,u,v,w) +extern jit_node_t *_jit_new_node_pww(jit_state_t*, jit_code_t, + jit_pointer_t, jit_word_t, jit_word_t); +#define jit_new_node_pwf(c,u,v,w) _jit_new_node_pwf(_jit,c,u,v,w) +extern jit_node_t *_jit_new_node_pwf(jit_state_t*, jit_code_t, + jit_pointer_t, jit_word_t, jit_float32_t); +#define jit_new_node_pwd(c,u,v,w) _jit_new_node_pwd(_jit,c,u,v,w) +extern jit_node_t *_jit_new_node_pwd(jit_state_t*, jit_code_t, + jit_pointer_t, jit_word_t, jit_float64_t); + +#define jit_arg_register_p(u) _jit_arg_register_p(_jit,u) +extern jit_bool_t _jit_arg_register_p(jit_state_t*, jit_node_t*); +#define jit_callee_save_p(u) _jit_callee_save_p(_jit,u) +extern jit_bool_t _jit_callee_save_p(jit_state_t*, jit_int32_t); +#define jit_pointer_p(u) _jit_pointer_p(_jit,u) +extern jit_bool_t _jit_pointer_p(jit_state_t*,jit_pointer_t); + +#define jit_get_note(n,u,v,w) _jit_get_note(_jit,n,u,v,w) +extern jit_bool_t _jit_get_note(jit_state_t*,jit_pointer_t,char**,char**,int*); + +#define jit_disassemble() _jit_disassemble(_jit) +extern void _jit_disassemble(jit_state_t*); + +extern void jit_set_memory_functions(jit_alloc_func_ptr, + jit_realloc_func_ptr, + jit_free_func_ptr); +extern void jit_get_memory_functions(jit_alloc_func_ptr*, + jit_realloc_func_ptr*, + jit_free_func_ptr*); +#endif /* _lightning_h */ diff --git a/include/lightrec/lightrec-config.h b/include/lightrec/lightrec-config.h new file mode 100644 index 000000000..3d4b81e6b --- /dev/null +++ b/include/lightrec/lightrec-config.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2019-2021 Paul Cercueil + */ + +#ifndef __LIGHTREC_CONFIG_H__ +#define __LIGHTREC_CONFIG_H__ + +#define ENABLE_THREADED_COMPILER LIGHTREC_ENABLE_THREADED_COMPILER +#define ENABLE_FIRST_PASS 1 +#define ENABLE_DISASSEMBLER 0 +#define ENABLE_CODE_BUFFER 1 + +#define HAS_DEFAULT_ELM 1 + +#define OPT_REMOVE_DIV_BY_ZERO_SEQ 1 +#define OPT_REPLACE_MEMSET 1 +#define OPT_DETECT_IMPOSSIBLE_BRANCHES 1 +#define OPT_HANDLE_LOAD_DELAYS 1 +#define OPT_TRANSFORM_OPS 1 +#define OPT_LOCAL_BRANCHES 1 +#define OPT_SWITCH_DELAY_SLOTS 1 +#define OPT_FLAG_IO 1 +#define OPT_FLAG_MULT_DIV 1 +#define OPT_EARLY_UNLOAD 1 +#define OPT_PRELOAD_PC 1 + +#define OPT_SH4_USE_GBR 0 + +#endif /* __LIGHTREC_CONFIG_H__ */ + diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 159b7e4f3..2e382e3c1 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -2,10 +2,16 @@ #include "sio.h" #include "ppf.h" #include "new_dynarec/new_dynarec.h" +#include "lightrec/plugin.h" /* It's duplicated from emu_if.c */ #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +/* Corresponds to LIGHTREC_OPT_INV_DMA_ONLY of lightrec.h */ +#define LIGHTREC_HACK_INV_DMA_ONLY (1 << 0) + +u32 lightrec_hacks; + static const char * const MemorycardHack_db[] = { /* Lifeforce Tenka, also known as Codename Tenka */ @@ -133,6 +139,35 @@ cycle_multiplier_overrides[] = { 200, { "SLUS00183", "SLES00159", "SLPS00083", "SLPM80008" } }, }; +static const struct +{ + const char * const id; + u32 hacks; +} +lightrec_hacks_db[] = +{ + /* Formula One Arcade */ + { "SCES03886", LIGHTREC_HACK_INV_DMA_ONLY }, + + /* Formula One '99 */ + { "SLUS00870", LIGHTREC_HACK_INV_DMA_ONLY }, + { "SCPS10101", LIGHTREC_HACK_INV_DMA_ONLY }, + { "SCES01979", LIGHTREC_HACK_INV_DMA_ONLY }, + { "SLES01979", LIGHTREC_HACK_INV_DMA_ONLY }, + + /* Formula One 2000 */ + { "SLUS01134", LIGHTREC_HACK_INV_DMA_ONLY }, + { "SCES02777", LIGHTREC_HACK_INV_DMA_ONLY }, + { "SCES02778", LIGHTREC_HACK_INV_DMA_ONLY }, + { "SCES02779", LIGHTREC_HACK_INV_DMA_ONLY }, + + /* Formula One 2001 */ + { "SCES03404", LIGHTREC_HACK_INV_DMA_ONLY }, + { "SCES03423", LIGHTREC_HACK_INV_DMA_ONLY }, + { "SCES03424", LIGHTREC_HACK_INV_DMA_ONLY }, + { "SCES03524", LIGHTREC_HACK_INV_DMA_ONLY }, +}; + /* Function for automatic patching according to GameID. */ void Apply_Hacks_Cdrom(void) { @@ -190,6 +225,17 @@ void Apply_Hacks_Cdrom(void) break; } } + + lightrec_hacks = 0; + + for (i = 0; drc_is_lightrec() && i < ARRAY_SIZE(lightrec_hacks_db); i++) { + if (strcmp(CdromId, lightrec_hacks_db[i].id) == 0) + { + lightrec_hacks = lightrec_hacks_db[i].hacks; + SysPrintf("using lightrec_hacks: 0x%x\n", lightrec_hacks); + break; + } + } } // from duckstation's gamedb.json diff --git a/libpcsxcore/lightrec/mem.c b/libpcsxcore/lightrec/mem.c new file mode 100644 index 000000000..5cd86b4fe --- /dev/null +++ b/libpcsxcore/lightrec/mem.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +/* + * Copyright (C) 2022 Paul Cercueil + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../psxhw.h" +#include "../psxmem.h" +#include "../r3000a.h" + +#include "mem.h" + +#define ARRAY_SIZE(a) (sizeof(a) ? (sizeof(a) / sizeof((a)[0])) : 0) + +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE 0x100000 +#endif + +#ifndef MFD_HUGETLB +#define MFD_HUGETLB 0x0004 +#endif + +static const uintptr_t supported_io_bases[] = { + 0x0, + 0x10000000, + 0x40000000, + 0x80000000, +}; + +static void * mmap_huge(void *addr, size_t length, int prot, int flags, + int fd, off_t offset) +{ + void *map = MAP_FAILED; + + if (length >= 0x200000) { + map = mmap(addr, length, prot, + flags | MAP_HUGETLB | (21 << MAP_HUGE_SHIFT), + fd, offset); + if (map != MAP_FAILED) + printf("Hugetlb mmap to address 0x%" PRIxPTR " succeeded\n", + (uintptr_t) addr); + } + + if (map == MAP_FAILED) { + map = mmap(addr, length, prot, flags, fd, offset); + if (map != MAP_FAILED) { + printf("Regular mmap to address 0x%" PRIxPTR " succeeded\n", + (uintptr_t) addr); +#ifdef MADV_HUGEPAGE + madvise(map, length, MADV_HUGEPAGE); +#endif + } + } + + return map; +} + +static int lightrec_mmap_ram(bool hugetlb) +{ + unsigned int i, j; + int err, memfd, flags = 0; + uintptr_t base; + void *map; + + if (hugetlb) + flags |= MFD_HUGETLB; + + memfd = syscall(SYS_memfd_create, "/lightrec_memfd", + flags); + if (memfd < 0) { + err = -errno; + fprintf(stderr, "Failed to create memfd: %d\n", err); + return err; + } + + err = ftruncate(memfd, 0x200000); + if (err < 0) { + err = -errno; + fprintf(stderr, "Could not trim memfd: %d\n", err); + goto err_close_memfd; + } + + for (i = 0; i < ARRAY_SIZE(supported_io_bases); i++) { + base = supported_io_bases[i]; + + for (j = 0; j < 4; j++) { + void *base_ptr = (void *)(base + j * 0x200000); + map = mmap_huge(base_ptr, 0x200000, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED_NOREPLACE, memfd, 0); + if (map == MAP_FAILED) + break; + // some systems ignore MAP_FIXED_NOREPLACE + if (map != base_ptr) { + munmap(map, 0x200000); + break; + } + } + + /* Impossible to map using this base */ + if (j == 0) + continue; + + /* All mirrors mapped - we got a match! */ + if (j == 4) + break; + + /* Only some mirrors mapped - clean the mess and try again */ + for (; j > 0; j--) + munmap((void *)(base + (j - 1) * 0x200000), 0x200000); + } + + if (i == ARRAY_SIZE(supported_io_bases)) { + err = -EINVAL; + goto err_close_memfd; + } + + err = 0; + psxM = (s8 *)base; + +err_close_memfd: + close(memfd); + return err; +} + +int lightrec_init_mmap(void) +{ + unsigned int i; + uintptr_t base; + void *map; + int err = lightrec_mmap_ram(true); + if (err) { + err = lightrec_mmap_ram(false); + if (err) { + fprintf(stderr, "Unable to mmap RAM and mirrors\n"); + return err; + } + } + + base = (uintptr_t) psxM; + + map = mmap((void *)(base + 0x1f000000), 0x10000, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_FIXED_NOREPLACE | MAP_ANONYMOUS, -1, 0); + if (map == MAP_FAILED) { + err = -EINVAL; + fprintf(stderr, "Unable to mmap parallel port\n"); + goto err_unmap; + } + + psxP = (s8 *)map; + + map = mmap_huge((void *)(base + 0x1fc00000), 0x200000, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_FIXED_NOREPLACE | MAP_ANONYMOUS, -1, 0); + if (map == MAP_FAILED) { + err = -EINVAL; + fprintf(stderr, "Unable to mmap BIOS\n"); + goto err_unmap_parallel; + } + + psxR = (s8 *)map; + + map = mmap((void *)(base + 0x1f800000), 0x10000, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_FIXED_NOREPLACE | MAP_ANONYMOUS, 0, 0); + if (map == MAP_FAILED) { + err = -EINVAL; + fprintf(stderr, "Unable to mmap scratchpad\n"); + goto err_unmap_bios; + } + + psxH = (s8 *)map; + + map = mmap_huge((void *)(base + 0x800000), CODE_BUFFER_SIZE, + PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_FIXED_NOREPLACE | MAP_ANONYMOUS, + -1, 0); + if (map == MAP_FAILED) { + err = -EINVAL; + fprintf(stderr, "Unable to mmap code buffer\n"); + goto err_unmap_scratch; + } + + code_buffer = map; + + return 0; + +err_unmap_scratch: + munmap(psxH, 0x10000); +err_unmap_bios: + munmap(psxR, 0x200000); +err_unmap_parallel: + munmap(psxP, 0x10000); +err_unmap: + for (i = 0; i < 4; i++) + munmap((void *)((uintptr_t)psxM + i * 0x200000), 0x200000); + return err; +} + +void lightrec_free_mmap(void) +{ + unsigned int i; + + munmap(code_buffer, CODE_BUFFER_SIZE); + munmap(psxH, 0x10000); + munmap(psxR, 0x200000); + munmap(psxP, 0x10000); + for (i = 0; i < 4; i++) + munmap((void *)((uintptr_t)psxM + i * 0x200000), 0x200000); +} diff --git a/libpcsxcore/lightrec/mem.h b/libpcsxcore/lightrec/mem.h index 98dbbdeb6..9984604e4 100644 --- a/libpcsxcore/lightrec/mem.h +++ b/libpcsxcore/lightrec/mem.h @@ -8,7 +8,17 @@ #ifdef LIGHTREC -#define CODE_BUFFER_SIZE (8 * 1024 * 1024) +#ifdef HW_WUP /* WiiU */ +# define WUP_RWX_MEM_BASE 0x00802000 +# define WUP_RWX_MEM_END 0x01000000 +# define CODE_BUFFER_SIZE_DFT (WUP_RWX_MEM_END - WUP_RWX_MEM_BASE) +#else +# define CODE_BUFFER_SIZE_DFT (8 * 1024 * 1024) +#endif + +#ifndef CODE_BUFFER_SIZE +#define CODE_BUFFER_SIZE CODE_BUFFER_SIZE_DFT +#endif extern void *code_buffer; diff --git a/libpcsxcore/lightrec/mem_wiiu.c b/libpcsxcore/lightrec/mem_wiiu.c new file mode 100644 index 000000000..7cba547fc --- /dev/null +++ b/libpcsxcore/lightrec/mem_wiiu.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2022 Ash Logan + */ + +#include +#include +#include + +#include "../memmap.h" +#include "../psxhw.h" +#include "../psxmem.h" +#include "../r3000a.h" + +#include "mem.h" + +void wiiu_clear_cache(void *start, void *end); + +static void* wiiu_mmap(uint32_t requested_va, size_t length, void* backing_mem) { + if (length < OS_PAGE_SIZE) length = OS_PAGE_SIZE; + + uint32_t va = OSAllocVirtAddr(requested_va, length, 0); + if (!va) return MAP_FAILED; + + BOOL mapped = OSMapMemory(va, OSEffectiveToPhysical((uint32_t)backing_mem), + length, OS_MAP_MEMORY_READ_WRITE); + if (!mapped) { + OSFreeVirtAddr(va, length); + return MAP_FAILED; + } + + return (void*)va; +} + +static void wiiu_unmap(void* va, size_t length) { + if (va == MAP_FAILED) return; + OSUnmapMemory((uint32_t)va, length); + OSFreeVirtAddr((uint32_t)va, length); +} + +static void* psx_mem; +static void* psx_parallel; +static void* psx_scratch; +static void* psx_bios; + +int lightrec_init_mmap(void) { + psx_mem = memalign(OS_PAGE_SIZE, 0x200000); + psx_parallel = memalign(OS_PAGE_SIZE, 0x10000); + psx_scratch = memalign(OS_PAGE_SIZE, 0x10000); + psx_bios = memalign(OS_PAGE_SIZE, 0x80000); + if (!psx_mem || !psx_parallel || !psx_scratch || !psx_bios) + goto cleanup_allocations; + + uint32_t avail_va; + uint32_t avail_va_size; + OSGetMapVirtAddrRange(&avail_va, &avail_va_size); + if (!avail_va || avail_va_size < 0x20000000) + goto cleanup_allocations; + + // Map 4x ram mirrors + int i; + for (i = 0; i < 4; i++) { + void* ret = wiiu_mmap(avail_va + 0x200000 * i, 0x200000, psx_mem); + if (ret == MAP_FAILED) break; + } + if (i != 4) { + for (int i = 0; i < 4; i++) + wiiu_unmap(avail_va + 0x200000 * i, 0x200000); + goto cleanup_allocations; + } + psxM = (void*)avail_va; + + psxP = wiiu_mmap(avail_va + 0x1f000000, 0x10000, psx_parallel); + psxH = wiiu_mmap(avail_va + 0x1f800000, 0x10000, psx_scratch); + psxR = wiiu_mmap(avail_va + 0x1fc00000, 0x80000, psx_bios); + + if (psxP == MAP_FAILED || psxH == MAP_FAILED || psxR == MAP_FAILED) { + for (int i = 0; i < 4; i++) + wiiu_unmap(psxM + 0x200000 * i, 0x200000); + wiiu_unmap(psxP, 0x10000); + wiiu_unmap(psxH, 0x10000); + wiiu_unmap(psxR, 0x80000); + goto cleanup_allocations; + } + + code_buffer = WUP_RWX_MEM_BASE; + + return 0; + +cleanup_allocations: + free(psx_mem); + free(psx_parallel); + free(psx_scratch); + free(psx_bios); + return -1; +} + +void lightrec_free_mmap(void) { + for (int i = 0; i < 4; i++) + wiiu_unmap(psxM + 0x200000 * i, 0x200000); + wiiu_unmap(psxP, 0x10000); + wiiu_unmap(psxH, 0x10000); + wiiu_unmap(psxR, 0x80000); + free(psx_mem); + free(psx_parallel); + free(psx_scratch); + free(psx_bios); +} + +void lightrec_code_inv(void *ptr, uint32_t len) +{ + wiiu_clear_cache(ptr, (void *)((uintptr_t)ptr + len)); +} diff --git a/libpcsxcore/lightrec/plugin.c b/libpcsxcore/lightrec/plugin.c new file mode 100644 index 000000000..7f500fd27 --- /dev/null +++ b/libpcsxcore/lightrec/plugin.c @@ -0,0 +1,671 @@ +#include +#include +#include +#include +#include +#include + +#if P_HAVE_MMAP +#include +#endif + +#include "lightrec.h" +#include "../cdrom.h" +#include "../gpu.h" +#include "../gte.h" +#include "../mdec.h" +#include "../psxdma.h" +#include "../psxhw.h" +#include "../psxmem.h" +#include "../r3000a.h" +#include "../psxinterpreter.h" +#include "../psxhle.h" +#include "../psxevents.h" + +#include "../frontend/main.h" + +#include "mem.h" +#include "plugin.h" + +#if (defined(__arm__) || defined(__aarch64__)) && !defined(ALLOW_LIGHTREC_ON_ARM) +#error "Lightrec should not be used on ARM (please specify DYNAREC=ari64 to make)" +#endif + +#define ARRAY_SIZE(x) (sizeof(x) ? sizeof(x) / sizeof((x)[0]) : 0) + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define LE32TOH(x) __builtin_bswap32(x) +# define HTOLE32(x) __builtin_bswap32(x) +# define LE16TOH(x) __builtin_bswap16(x) +# define HTOLE16(x) __builtin_bswap16(x) +#else +# define LE32TOH(x) (x) +# define HTOLE32(x) (x) +# define LE16TOH(x) (x) +# define HTOLE16(x) (x) +#endif + +#ifdef __GNUC__ +# define likely(x) __builtin_expect(!!(x),1) +# define unlikely(x) __builtin_expect(!!(x),0) +#else +# define likely(x) (x) +# define unlikely(x) (x) +#endif + +#ifndef LIGHTREC_PROG_NAME +# ifdef __linux__ +# define LIGHTREC_PROG_NAME "/proc/self/exe" +# else +# define LIGHTREC_PROG_NAME "retroarch.exe" +# endif +#endif + +psxRegisters psxRegs; +Rcnt rcnts[4]; + +void* code_buffer; + +static struct lightrec_state *lightrec_state; + +static bool use_lightrec_interpreter; +static bool use_pcsx_interpreter; +static bool block_stepping; + +extern u32 lightrec_hacks; + +extern void lightrec_code_inv(void *ptr, uint32_t len); + +enum my_cp2_opcodes { + OP_CP2_RTPS = 0x01, + OP_CP2_NCLIP = 0x06, + OP_CP2_OP = 0x0c, + OP_CP2_DPCS = 0x10, + OP_CP2_INTPL = 0x11, + OP_CP2_MVMVA = 0x12, + OP_CP2_NCDS = 0x13, + OP_CP2_CDP = 0x14, + OP_CP2_NCDT = 0x16, + OP_CP2_NCCS = 0x1b, + OP_CP2_CC = 0x1c, + OP_CP2_NCS = 0x1e, + OP_CP2_NCT = 0x20, + OP_CP2_SQR = 0x28, + OP_CP2_DCPL = 0x29, + OP_CP2_DPCT = 0x2a, + OP_CP2_AVSZ3 = 0x2d, + OP_CP2_AVSZ4 = 0x2e, + OP_CP2_RTPT = 0x30, + OP_CP2_GPF = 0x3d, + OP_CP2_GPL = 0x3e, + OP_CP2_NCCT = 0x3f, +}; + +static void (*cp2_ops[])(struct psxCP2Regs *) = { + [OP_CP2_RTPS] = gteRTPS, + [OP_CP2_RTPS] = gteRTPS, + [OP_CP2_NCLIP] = gteNCLIP, + [OP_CP2_OP] = gteOP, + [OP_CP2_DPCS] = gteDPCS, + [OP_CP2_INTPL] = gteINTPL, + [OP_CP2_MVMVA] = gteMVMVA, + [OP_CP2_NCDS] = gteNCDS, + [OP_CP2_CDP] = gteCDP, + [OP_CP2_NCDT] = gteNCDT, + [OP_CP2_NCCS] = gteNCCS, + [OP_CP2_CC] = gteCC, + [OP_CP2_NCS] = gteNCS, + [OP_CP2_NCT] = gteNCT, + [OP_CP2_SQR] = gteSQR, + [OP_CP2_DCPL] = gteDCPL, + [OP_CP2_DPCT] = gteDPCT, + [OP_CP2_AVSZ3] = gteAVSZ3, + [OP_CP2_AVSZ4] = gteAVSZ4, + [OP_CP2_RTPT] = gteRTPT, + [OP_CP2_GPF] = gteGPF, + [OP_CP2_GPL] = gteGPL, + [OP_CP2_NCCT] = gteNCCT, +}; + +static char cache_buf[64 * 1024]; + +static void cop2_op(struct lightrec_state *state, u32 func) +{ + struct lightrec_registers *regs = lightrec_get_registers(state); + + psxRegs.code = func; + + if (unlikely(!cp2_ops[func & 0x3f])) { + fprintf(stderr, "Invalid CP2 function %u\n", func); + } else { + /* This works because regs->cp2c comes right after regs->cp2d, + * so it can be cast to a pcsxCP2Regs pointer. */ + cp2_ops[func & 0x3f]((psxCP2Regs *) regs->cp2d); + } +} + +static bool has_interrupt(void) +{ + struct lightrec_registers *regs = lightrec_get_registers(lightrec_state); + + return ((psxHu32(0x1070) & psxHu32(0x1074)) && + (regs->cp0[12] & 0x401) == 0x401) || + (regs->cp0[12] & regs->cp0[13] & 0x0300); +} + +static void lightrec_tansition_to_pcsx(struct lightrec_state *state) +{ + psxRegs.cycle += lightrec_current_cycle_count(state) / 1024; + lightrec_reset_cycle_count(state, 0); +} + +static void lightrec_tansition_from_pcsx(struct lightrec_state *state) +{ + s32 cycles_left = next_interupt - psxRegs.cycle; + + if (block_stepping || cycles_left <= 0 || has_interrupt()) + lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT); + else { + lightrec_set_target_cycle_count(state, cycles_left * 1024); + } +} + +static void hw_write_byte(struct lightrec_state *state, + u32 op, void *host, u32 mem, u32 val) +{ + lightrec_tansition_to_pcsx(state); + + psxHwWrite8(mem, val); + + lightrec_tansition_from_pcsx(state); +} + +static void hw_write_half(struct lightrec_state *state, + u32 op, void *host, u32 mem, u32 val) +{ + lightrec_tansition_to_pcsx(state); + + psxHwWrite16(mem, val); + + lightrec_tansition_from_pcsx(state); +} + +static void hw_write_word(struct lightrec_state *state, + u32 op, void *host, u32 mem, u32 val) +{ + lightrec_tansition_to_pcsx(state); + + psxHwWrite32(mem, val); + + lightrec_tansition_from_pcsx(state); +} + +static u8 hw_read_byte(struct lightrec_state *state, u32 op, void *host, u32 mem) +{ + u8 val; + + lightrec_tansition_to_pcsx(state); + + val = psxHwRead8(mem); + + lightrec_tansition_from_pcsx(state); + + return val; +} + +static u16 hw_read_half(struct lightrec_state *state, + u32 op, void *host, u32 mem) +{ + u16 val; + + lightrec_tansition_to_pcsx(state); + + val = psxHwRead16(mem); + + lightrec_tansition_from_pcsx(state); + + return val; +} + +static u32 hw_read_word(struct lightrec_state *state, + u32 op, void *host, u32 mem) +{ + u32 val; + + lightrec_tansition_to_pcsx(state); + + val = psxHwRead32(mem); + + lightrec_tansition_from_pcsx(state); + + return val; +} + +static struct lightrec_mem_map_ops hw_regs_ops = { + .sb = hw_write_byte, + .sh = hw_write_half, + .sw = hw_write_word, + .lb = hw_read_byte, + .lh = hw_read_half, + .lw = hw_read_word, +}; + +static u32 cache_ctrl; + +static void cache_ctrl_write_word(struct lightrec_state *state, + u32 op, void *host, u32 mem, u32 val) +{ + cache_ctrl = val; +} + +static u32 cache_ctrl_read_word(struct lightrec_state *state, + u32 op, void *host, u32 mem) +{ + return cache_ctrl; +} + +static struct lightrec_mem_map_ops cache_ctrl_ops = { + .sw = cache_ctrl_write_word, + .lw = cache_ctrl_read_word, +}; + +static struct lightrec_mem_map lightrec_map[] = { + [PSX_MAP_KERNEL_USER_RAM] = { + /* Kernel and user memory */ + .pc = 0x00000000, + .length = 0x200000, + }, + [PSX_MAP_BIOS] = { + /* BIOS */ + .pc = 0x1fc00000, + .length = 0x80000, + }, + [PSX_MAP_SCRATCH_PAD] = { + /* Scratch pad */ + .pc = 0x1f800000, + .length = 0x400, + }, + [PSX_MAP_PARALLEL_PORT] = { + /* Parallel port */ + .pc = 0x1f000000, + .length = 0x10000, + }, + [PSX_MAP_HW_REGISTERS] = { + /* Hardware registers */ + .pc = 0x1f801000, + .length = 0x8000, + .ops = &hw_regs_ops, + }, + [PSX_MAP_CACHE_CONTROL] = { + /* Cache control */ + .pc = 0x5ffe0130, + .length = 4, + .ops = &cache_ctrl_ops, + }, + + /* Mirrors of the kernel/user memory */ + [PSX_MAP_MIRROR1] = { + .pc = 0x00200000, + .length = 0x200000, + .mirror_of = &lightrec_map[PSX_MAP_KERNEL_USER_RAM], + }, + [PSX_MAP_MIRROR2] = { + .pc = 0x00400000, + .length = 0x200000, + .mirror_of = &lightrec_map[PSX_MAP_KERNEL_USER_RAM], + }, + [PSX_MAP_MIRROR3] = { + .pc = 0x00600000, + .length = 0x200000, + .mirror_of = &lightrec_map[PSX_MAP_KERNEL_USER_RAM], + }, + + /* Mirror of the parallel port. Only used by the PS2/PS3 BIOS */ + [PSX_MAP_PPORT_MIRROR] = { + .pc = 0x1fa00000, + .length = 0x10000, + .mirror_of = &lightrec_map[PSX_MAP_PARALLEL_PORT], + }, + + /* Code buffer */ + [PSX_MAP_CODE_BUFFER] = { + .length = CODE_BUFFER_SIZE, + }, +}; + +static void lightrec_enable_ram(struct lightrec_state *state, bool enable) +{ + if (enable) + memcpy(psxM, cache_buf, sizeof(cache_buf)); + else + memcpy(cache_buf, psxM, sizeof(cache_buf)); +} + +static bool lightrec_can_hw_direct(u32 kaddr, bool is_write, u8 size) +{ + if (is_write && size != 32) { + // force32 so must go through handlers + if (0x1f801000 <= kaddr && kaddr < 0x1f801024) + return false; + if ((kaddr & 0x1fffff80) == 0x1f801080) // dma + return false; + } + + switch (size) { + case 8: + switch (kaddr) { + case 0x1f801040: + case 0x1f801050: + case 0x1f801800: + case 0x1f801801: + case 0x1f801802: + case 0x1f801803: + return false; + default: + return true; + } + case 16: + switch (kaddr) { + case 0x1f801040: + case 0x1f801044: + case 0x1f801048: + case 0x1f80104a: + case 0x1f80104e: + case 0x1f801050: + case 0x1f801054: + case 0x1f80105a: + case 0x1f80105e: + case 0x1f801100: + case 0x1f801104: + case 0x1f801108: + case 0x1f801110: + case 0x1f801114: + case 0x1f801118: + case 0x1f801120: + case 0x1f801124: + case 0x1f801128: + return false; + case 0x1f801070: + case 0x1f801074: + return !is_write; + default: + return kaddr < 0x1f801c00 || kaddr >= 0x1f801e00; + } + default: + switch (kaddr) { + case 0x1f801040: + case 0x1f801050: + case 0x1f801100: + case 0x1f801104: + case 0x1f801108: + case 0x1f801110: + case 0x1f801114: + case 0x1f801118: + case 0x1f801120: + case 0x1f801124: + case 0x1f801128: + case 0x1f801810: + case 0x1f801814: + case 0x1f801820: + case 0x1f801824: + return false; + case 0x1f801070: + case 0x1f801074: + case 0x1f801088: + case 0x1f801098: + case 0x1f8010a8: + case 0x1f8010b8: + case 0x1f8010c8: + case 0x1f8010e8: + case 0x1f8010f4: + return !is_write; + default: + return !is_write || kaddr < 0x1f801c00 || kaddr >= 0x1f801e00; + } + } +} + +static const struct lightrec_ops lightrec_ops = { + .cop2_op = cop2_op, + .enable_ram = lightrec_enable_ram, + .hw_direct = lightrec_can_hw_direct, + .code_inv = LIGHTREC_CODE_INV ? lightrec_code_inv : NULL, +}; + +static int lightrec_plugin_init(void) +{ + lightrec_map[PSX_MAP_KERNEL_USER_RAM].address = psxM; + lightrec_map[PSX_MAP_BIOS].address = psxR; + lightrec_map[PSX_MAP_SCRATCH_PAD].address = psxH; + lightrec_map[PSX_MAP_HW_REGISTERS].address = psxH + 0x1000; + lightrec_map[PSX_MAP_PARALLEL_PORT].address = psxP; + + if (!LIGHTREC_CUSTOM_MAP) { +#if P_HAVE_MMAP + code_buffer = mmap(0, CODE_BUFFER_SIZE, + PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (code_buffer == MAP_FAILED) + return -ENOMEM; +#else + code_buffer = malloc(CODE_BUFFER_SIZE); + if (!code_buffer) + return -ENOMEM; +#endif + } + + if (LIGHTREC_CUSTOM_MAP) { + lightrec_map[PSX_MAP_MIRROR1].address = psxM + 0x200000; + lightrec_map[PSX_MAP_MIRROR2].address = psxM + 0x400000; + lightrec_map[PSX_MAP_MIRROR3].address = psxM + 0x600000; + } + + lightrec_map[PSX_MAP_CODE_BUFFER].address = code_buffer; + + use_lightrec_interpreter = !!getenv("LIGHTREC_INTERPRETER"); + + lightrec_state = lightrec_init(LIGHTREC_PROG_NAME, + lightrec_map, ARRAY_SIZE(lightrec_map), + &lightrec_ops); + + // fprintf(stderr, "M=0x%lx, P=0x%lx, R=0x%lx, H=0x%lx\n", + // (uintptr_t) psxM, + // (uintptr_t) psxP, + // (uintptr_t) psxR, + // (uintptr_t) psxH); + +#ifndef _WIN32 + signal(SIGPIPE, exit); +#endif + return 0; +} + +static void lightrec_plugin_sync_regs_to_pcsx(bool need_cp2); +static void lightrec_plugin_sync_regs_from_pcsx(bool need_cp2); + +static void lightrec_plugin_execute_internal(bool block_only) +{ + struct lightrec_registers *regs; + u32 flags, cycles_pcsx; + + regs = lightrec_get_registers(lightrec_state); + gen_interupt((psxCP0Regs *)regs->cp0); + if (!block_only && stop) + return; + + cycles_pcsx = next_interupt - psxRegs.cycle; + assert((s32)cycles_pcsx > 0); + + // step during early boot so that 0x80030000 fastboot hack works + block_stepping = block_only; + if (block_only) + cycles_pcsx = 0; + + if (use_pcsx_interpreter) { + intExecuteBlock(0); + } else { + u32 cycles_lightrec = cycles_pcsx * 1024; + if (unlikely(use_lightrec_interpreter)) { + psxRegs.pc = lightrec_run_interpreter(lightrec_state, + psxRegs.pc, + cycles_lightrec); + } else { + psxRegs.pc = lightrec_execute(lightrec_state, + psxRegs.pc, cycles_lightrec); + } + + lightrec_tansition_to_pcsx(lightrec_state); + + flags = lightrec_exit_flags(lightrec_state); + + if (flags & LIGHTREC_EXIT_SEGFAULT) { + fprintf(stderr, "Exiting at cycle 0x%08x\n", + psxRegs.cycle); + exit(1); + } + + if (flags & LIGHTREC_EXIT_SYSCALL) + psxException(R3000E_Syscall << 2, 0, (psxCP0Regs *)regs->cp0); + if (flags & LIGHTREC_EXIT_BREAK) + psxException(R3000E_Bp << 2, 0, (psxCP0Regs *)regs->cp0); + else if (flags & LIGHTREC_EXIT_UNKNOWN_OP) { + u32 op = intFakeFetch(psxRegs.pc); + u32 hlec = op & 0x03ffffff; + if ((op >> 26) == 0x3b && hlec < ARRAY_SIZE(psxHLEt) && Config.HLE) { + lightrec_plugin_sync_regs_to_pcsx(0); + psxHLEt[hlec](); + lightrec_plugin_sync_regs_from_pcsx(0); + } + else + psxException(R3000E_RI << 2, 0, (psxCP0Regs *)regs->cp0); + } + } + + if ((regs->cp0[13] & regs->cp0[12] & 0x300) && (regs->cp0[12] & 0x1)) { + /* Handle software interrupts */ + regs->cp0[13] &= ~0x7c; + psxException(regs->cp0[13], 0, (psxCP0Regs *)regs->cp0); + } +} + +static void lightrec_plugin_execute(void) +{ + while (!stop) + lightrec_plugin_execute_internal(false); +} + +static void lightrec_plugin_execute_block(enum blockExecCaller caller) +{ + lightrec_plugin_execute_internal(true); +} + +static void lightrec_plugin_clear(u32 addr, u32 size) +{ + if ((addr == 0 && size == UINT32_MAX) + || (lightrec_hacks & LIGHTREC_OPT_INV_DMA_ONLY)) + lightrec_invalidate_all(lightrec_state); + else + /* size * 4: PCSX uses DMA units */ + lightrec_invalidate(lightrec_state, addr, size * 4); +} + +static void lightrec_plugin_notify(enum R3000Anote note, void *data) +{ + switch (note) + { + case R3000ACPU_NOTIFY_CACHE_ISOLATED: + case R3000ACPU_NOTIFY_CACHE_UNISOLATED: + /* not used, lightrec calls lightrec_enable_ram() instead */ + break; + case R3000ACPU_NOTIFY_BEFORE_SAVE: + /* non-null 'data' means this is HLE related sync */ + lightrec_plugin_sync_regs_to_pcsx(data == NULL); + break; + case R3000ACPU_NOTIFY_AFTER_LOAD: + lightrec_plugin_sync_regs_from_pcsx(data == NULL); + if (data == NULL) + lightrec_invalidate_all(lightrec_state); + break; + } +} + +static void lightrec_plugin_apply_config() +{ + static u32 cycles_per_op_old; + u32 cycle_mult = Config.cycle_multiplier_override && Config.cycle_multiplier == CYCLE_MULT_DEFAULT + ? Config.cycle_multiplier_override : Config.cycle_multiplier; + u32 cycles_per_op = cycle_mult * 1024 / 100; + assert(cycles_per_op); + + if (cycles_per_op_old && cycles_per_op_old != cycles_per_op) { + SysPrintf("lightrec: reinit block cache for cycles_per_op %.2f\n", + cycles_per_op / 1024.f); + } + cycles_per_op_old = cycles_per_op; + lightrec_set_cycles_per_opcode(lightrec_state, cycles_per_op); +} + +static void lightrec_plugin_shutdown(void) +{ + lightrec_destroy(lightrec_state); + + if (!LIGHTREC_CUSTOM_MAP) { +#if P_HAVE_MMAP + munmap(code_buffer, CODE_BUFFER_SIZE); +#else + free(code_buffer); +#endif + } +} + +static void lightrec_plugin_reset(void) +{ + struct lightrec_registers *regs; + + regs = lightrec_get_registers(lightrec_state); + + /* Invalidate all blocks */ + lightrec_invalidate_all(lightrec_state); + + /* Reset registers */ + memset(regs, 0, sizeof(*regs)); + + regs->cp0[12] = 0x10900000; // COP0 enabled | BEV = 1 | TS = 1 + regs->cp0[15] = 0x00000002; // PRevID = Revision ID, same as R3000A + + lightrec_set_unsafe_opt_flags(lightrec_state, lightrec_hacks); +} + +static void lightrec_plugin_sync_regs_from_pcsx(bool need_cp2) +{ + struct lightrec_registers *regs; + + regs = lightrec_get_registers(lightrec_state); + memcpy(regs->gpr, &psxRegs.GPR, sizeof(regs->gpr)); + memcpy(regs->cp0, &psxRegs.CP0, sizeof(regs->cp0)); + if (need_cp2) + memcpy(regs->cp2d, &psxRegs.CP2, sizeof(regs->cp2d) + sizeof(regs->cp2c)); +} + +static void lightrec_plugin_sync_regs_to_pcsx(bool need_cp2) +{ + struct lightrec_registers *regs; + + regs = lightrec_get_registers(lightrec_state); + memcpy(&psxRegs.GPR, regs->gpr, sizeof(regs->gpr)); + memcpy(&psxRegs.CP0, regs->cp0, sizeof(regs->cp0)); + if (need_cp2) + memcpy(&psxRegs.CP2, regs->cp2d, sizeof(regs->cp2d) + sizeof(regs->cp2c)); +} + +R3000Acpu psxRec = +{ + lightrec_plugin_init, + lightrec_plugin_reset, + lightrec_plugin_execute, + lightrec_plugin_execute_block, + lightrec_plugin_clear, + lightrec_plugin_notify, + lightrec_plugin_apply_config, + lightrec_plugin_shutdown, +}; diff --git a/libpcsxcore/lightrec/plugin.h b/libpcsxcore/lightrec/plugin.h new file mode 100644 index 000000000..a228a6f08 --- /dev/null +++ b/libpcsxcore/lightrec/plugin.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2022 Paul Cercueil + */ + +#ifndef __LIGHTREC_PLUGIN_H__ +#define __LIGHTREC_PLUGIN_H__ + +#ifdef LIGHTREC + +#define drc_is_lightrec() 1 + +#else /* if !LIGHTREC */ + +#define drc_is_lightrec() 0 + +#endif + +#endif /* __LIGHTREC_PLUGIN_H__ */ + diff --git a/libpcsxcore/lightrec/sysconf.c b/libpcsxcore/lightrec/sysconf.c new file mode 100644 index 000000000..6d51bea45 --- /dev/null +++ b/libpcsxcore/lightrec/sysconf.c @@ -0,0 +1,13 @@ +#include +#include + +/* Implement the sysconf() symbol which is needed by GNU Lightning */ +long sysconf(int name) +{ + switch (name) { + case _SC_PAGE_SIZE: + return 4096; + default: + return -EINVAL; + } +} diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index b42952394..d9438d884 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -6263,11 +6263,13 @@ void new_dynarec_clear_full(void) static int pgsize(void) { + long ret = -1; #ifdef _SC_PAGESIZE - return sysconf(_SC_PAGESIZE); -#else - return 4096; + ret = sysconf(_SC_PAGESIZE); #endif + if (ret < 1) + ret = 4096; + return ret; } void new_dynarec_init(void) diff --git a/plugins/dfsound/dma.c b/plugins/dfsound/dma.c index fde1f835b..6b4b63e9c 100644 --- a/plugins/dfsound/dma.c +++ b/plugins/dfsound/dma.c @@ -93,7 +93,7 @@ void CALLBACK SPUwriteDMAMem(unsigned short *pusPSXMem, int iSize, // might also need more delay like in set_dma_end() do_irq_io(irq_after * 4); } - for (i = 0; i < 24; i++) { + for (i = 0; i < MAXCHAN; i++) { size_t ediff, p = spu.s_chan[i].pCurr - spu.spuMemC; if (spu.s_chan[i].ADSRX.State == ADSR_RELEASE && !spu.s_chan[i].ADSRX.EnvelopeVol) continue; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index f65351cf2..2d0f7b124 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -226,6 +226,7 @@ typedef struct u8 texture_4bpp_cache[32][256 * 256]; u8 texture_8bpp_even_cache[16][256 * 256]; u8 texture_8bpp_odd_cache[16][256 * 256]; + int use_dithering; } psx_gpu_struct; typedef struct __attribute__((aligned(16))) diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index 84fa9322a..b2e899999 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -10,7 +10,6 @@ #include #include -#include #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #ifndef min @@ -222,6 +221,19 @@ void renderer_set_config(const struct rearmed_cbs *cbs) } if (cbs->pl_set_gpu_caps) cbs->pl_set_gpu_caps(GPU_CAP_SUPPORTS_2X); + + egpu.use_dithering = cbs->gpu_neon.allow_dithering; + if(!egpu.use_dithering) { + egpu.dither_table[0] = dither_table_row(0, 0, 0, 0); + egpu.dither_table[1] = dither_table_row(0, 0, 0, 0); + egpu.dither_table[2] = dither_table_row(0, 0, 0, 0); + egpu.dither_table[3] = dither_table_row(0, 0, 0, 0); + } else { + egpu.dither_table[0] = dither_table_row(-4, 0, -3, 1); + egpu.dither_table[1] = dither_table_row(2, -2, 3, -1); + egpu.dither_table[2] = dither_table_row(-3, 1, -4, 0); + egpu.dither_table[3] = dither_table_row(3, -1, 2, -2); + } egpu.hack_disable_main = cbs->gpu_neon.enhancement_no_main; egpu.hack_texture_adj = cbs->gpu_neon.enhancement_tex_adj; diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 54bf63e69..f6340e111 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -11,6 +11,8 @@ #include #include #include +#include /* for calloc */ + #include "gpu.h" #include "gpu_timing.h" #include "../../libpcsxcore/gpu.h" // meh @@ -222,14 +224,45 @@ static noinline void get_gpu_info(uint32_t data) } } -// double, for overdraw guard -#define VRAM_SIZE (1024 * 512 * 2 * 2) +#ifndef max +#define max(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping +// renderer/downscaler it uses in high res modes: +#ifdef GCW_ZERO + // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of + // fills. (Will change this value if it ever gets large page support) + #define VRAM_ALIGN 8192 +#else + #define VRAM_ALIGN 16 +#endif + +// double, for overdraw guard + at least 1 page before +#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096)) + +// vram ptr received from mmap/malloc/alloc (will deallocate using this) +static uint16_t *vram_ptr_orig = NULL; +#ifndef GPULIB_USE_MMAP +# ifdef __linux__ +# define GPULIB_USE_MMAP 1 +# else +# define GPULIB_USE_MMAP 0 +# endif +#endif static int map_vram(void) { - gpu.vram = gpu.mmap(VRAM_SIZE); - if (gpu.vram != NULL) { - gpu.vram += 4096 / 2; +#if GPULIB_USE_MMAP + gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE); +#else + gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1); +#endif + if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) { + // 4kb guard in front + gpu.vram += (4096 / 2); + // Align + gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1)); return 0; } else { @@ -252,10 +285,10 @@ long GPUinit(void) gpu.cmd_len = 0; do_reset(); - if (gpu.mmap != NULL) { + /*if (gpu.mmap != NULL) { if (map_vram() != 0) ret = -1; - } + }*/ return ret; } @@ -265,11 +298,15 @@ long GPUshutdown(void) renderer_finish(); ret = vout_finish(); - if (gpu.vram != NULL) { - gpu.vram -= 4096 / 2; - gpu.munmap(gpu.vram, VRAM_SIZE); + + if (vram_ptr_orig != NULL) { +#if GPULIB_USE_MMAP + gpu.munmap(vram_ptr_orig, VRAM_SIZE); +#else + free(vram_ptr_orig); +#endif } - gpu.vram = NULL; + vram_ptr_orig = gpu.vram = NULL; return ret; } diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index 80389a344..5c727bbc6 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -51,6 +51,15 @@ static void check_mode_change(int force) bpp = 24; } + gpu.state.downscale_active = + gpu.get_downscale_buffer != NULL && gpu.state.downscale_enable + && (w >= 512 || h >= 256); + + if (gpu.state.downscale_active) { + w_out = w < 512 ? w : 320; + h_out = h < 256 ? h : h / 2; + } + // width|rgb24 change? if (force || (gpu.status ^ gpu.state.status_vo_old) & ((7<<16)|(1<<21)) || w_out != gpu.state.w_out_old || h_out != gpu.state.h_out_old) @@ -97,6 +106,9 @@ void vout_update(void) src_x2 *= 2; } + if (gpu.state.downscale_active) + vram = (void *)gpu.get_downscale_buffer(&src_x, &src_y, &w, &h, &vram_h); + if (src_y + h > vram_h) { if (src_y + h - vram_h > h / 2) { // wrap From 5dd884df9640a3be38507be60832c3b7d323b8dc Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 28 Sep 2024 00:03:42 +0300 Subject: [PATCH 558/597] lightrec: relax mem requirements so that asan works Seems to work anyway with suboptimal mappings, and MAP_FIXED_NOREPLACE is unnecessary as the kernel will honour the address if it's free and return something else if it's not. --- Makefile | 5 ++--- libpcsxcore/lightrec/mem.c | 46 ++++++++++++++++++++++++-------------- libpcsxcore/psxmem.c | 15 +++++++------ 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/Makefile b/Makefile index 5b890600d..9c27f06c8 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,8 @@ CFLAGS += -O2 -DNDEBUG endif ifeq ($(DEBUG_ASAN), 1) CFLAGS += -fsanitize=address +LDFLAGS += -fsanitize=address +#LDFLAGS += -static-libasan endif CFLAGS += -DP_HAVE_MMAP=$(if $(NO_MMAP),0,1) \ -DP_HAVE_PTHREAD=$(if $(NO_PTHREAD),0,1) \ @@ -39,9 +41,6 @@ endif CC_LINK ?= $(CC) CC_AS ?= $(CC) LDFLAGS += $(MAIN_LDFLAGS) -ifeq ($(DEBUG_ASAN), 1) -LDFLAGS += -static-libasan -endif EXTRA_LDFLAGS ?= -Wl,-Map=$@.map LDLIBS += $(MAIN_LDLIBS) ifdef PCNT diff --git a/libpcsxcore/lightrec/mem.c b/libpcsxcore/lightrec/mem.c index 5cd86b4fe..4b582583c 100644 --- a/libpcsxcore/lightrec/mem.c +++ b/libpcsxcore/lightrec/mem.c @@ -82,15 +82,15 @@ static int lightrec_mmap_ram(bool hugetlb) memfd = syscall(SYS_memfd_create, "/lightrec_memfd", flags); if (memfd < 0) { + SysMessage("Failed to create memfd: %d", errno); err = -errno; - fprintf(stderr, "Failed to create memfd: %d\n", err); return err; } err = ftruncate(memfd, 0x200000); if (err < 0) { + SysMessage("Could not trim memfd: %d", errno); err = -errno; - fprintf(stderr, "Could not trim memfd: %d\n", err); goto err_close_memfd; } @@ -139,61 +139,73 @@ static int lightrec_mmap_ram(bool hugetlb) int lightrec_init_mmap(void) { unsigned int i; - uintptr_t base; + s8 *base, *target; void *map; int err = lightrec_mmap_ram(true); if (err) { err = lightrec_mmap_ram(false); if (err) { - fprintf(stderr, "Unable to mmap RAM and mirrors\n"); + SysMessage("Unable to mmap RAM and mirrors"); return err; } } - base = (uintptr_t) psxM; + base = psxM; - map = mmap((void *)(base + 0x1f000000), 0x10000, + target = base + 0x1f000000; + map = mmap(target, 0x10000, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_FIXED_NOREPLACE | MAP_ANONYMOUS, -1, 0); + MAP_PRIVATE | /*MAP_FIXED_NOREPLACE |*/ MAP_ANONYMOUS, -1, 0); if (map == MAP_FAILED) { + SysMessage("Unable to mmap parallel port: %d", errno); err = -EINVAL; - fprintf(stderr, "Unable to mmap parallel port\n"); goto err_unmap; } + if (map != target) + SysMessage("lightrec: mapped parallel port at %p, wanted %p", map, target); psxP = (s8 *)map; - map = mmap_huge((void *)(base + 0x1fc00000), 0x200000, + target = base + 0x1fc00000; + map = mmap_huge(target, 0x200000, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_FIXED_NOREPLACE | MAP_ANONYMOUS, -1, 0); + MAP_PRIVATE | /*MAP_FIXED_NOREPLACE |*/ MAP_ANONYMOUS, -1, 0); if (map == MAP_FAILED) { + SysMessage("Unable to mmap BIOS: %d", errno); err = -EINVAL; - fprintf(stderr, "Unable to mmap BIOS\n"); goto err_unmap_parallel; } + if (map != target) + SysMessage("lightrec: mapped bios at %p, wanted %p", map, target); psxR = (s8 *)map; - map = mmap((void *)(base + 0x1f800000), 0x10000, + target = base + 0x1f800000; + map = mmap(target, 0x10000, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_FIXED_NOREPLACE | MAP_ANONYMOUS, 0, 0); + MAP_PRIVATE | /*MAP_FIXED_NOREPLACE |*/ MAP_ANONYMOUS, 0, 0); if (map == MAP_FAILED) { + SysMessage("Unable to mmap scratchpad: %d", errno); err = -EINVAL; - fprintf(stderr, "Unable to mmap scratchpad\n"); goto err_unmap_bios; } + if (map != target) + SysMessage("lightrec: mapped scratchpad at %p, wanted %p", map, target); psxH = (s8 *)map; - map = mmap_huge((void *)(base + 0x800000), CODE_BUFFER_SIZE, + target = base + 0x800000; + map = mmap_huge(target, CODE_BUFFER_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_FIXED_NOREPLACE | MAP_ANONYMOUS, + MAP_PRIVATE | /*MAP_FIXED_NOREPLACE |*/ MAP_ANONYMOUS, -1, 0); if (map == MAP_FAILED) { + SysMessage("Unable to mmap code buffer: %d", errno); err = -EINVAL; - fprintf(stderr, "Unable to mmap code buffer\n"); goto err_unmap_scratch; } + if (map != target) + SysMessage("lightrec: mapped code at %p, wanted %p", map, target); code_buffer = map; diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 4e03b24bb..0e28b72c2 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -159,7 +159,7 @@ static int psxMemInitMap(void) if (psxM == MAP_FAILED) psxM = psxMap(0x77000000, 0x00210000, 0, MAP_TAG_RAM); if (psxM == MAP_FAILED) { - SysMessage(_("mapping main RAM failed")); + SysMessage("mapping main RAM failed"); psxM = NULL; return -1; } @@ -167,15 +167,15 @@ static int psxMemInitMap(void) psxH = psxMap(0x1f800000, 0x10000, 0, MAP_TAG_OTHER); if (psxH == MAP_FAILED) { - SysMessage(_("Error allocating memory!")); - psxMemShutdown(); + SysMessage("Error allocating psxH"); + psxH = NULL; return -1; } psxR = psxMap(0x1fc00000, 0x80000, 0, MAP_TAG_OTHER); if (psxR == MAP_FAILED) { - SysMessage(_("Error allocating memory!")); - psxMemShutdown(); + SysMessage("Error allocating psxR"); + psxR = NULL; return -1; } @@ -201,7 +201,8 @@ int psxMemInit(void) else ret = psxMemInitMap(); if (ret) { - SysMessage(_("Error allocating memory!")); + if (LIGHTREC_CUSTOM_MAP) + SysMessage("lightrec_init_mmap failed"); psxMemShutdown(); return -1; } @@ -213,7 +214,7 @@ int psxMemInit(void) psxMemWLUT = (u8 **)malloc(0x10000 * sizeof(void *)); if (psxMemRLUT == NULL || psxMemWLUT == NULL) { - SysMessage(_("Error allocating memory!")); + SysMessage("Error allocating psxMem LUTs"); psxMemShutdown(); return -1; } From 1131a8754fa92c5241ddc3c47913140290734ae4 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 30 Sep 2024 21:08:28 +0300 Subject: [PATCH 559/597] 3ds: some random tuning saves some memory --- frontend/3ds/sys/mman.h | 49 +++++++++++++++++++++++------------------ frontend/libretro.c | 34 +++++++++++++++++++++++++--- plugins/gpulib/gpu.h | 3 ++- 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/frontend/3ds/sys/mman.h b/frontend/3ds/sys/mman.h index fdf5ac6a9..4ba90db21 100644 --- a/frontend/3ds/sys/mman.h +++ b/frontend/3ds/sys/mman.h @@ -6,10 +6,7 @@ extern "C" { #endif #include -#include -#include #include -#include #include "3ds_utils.h" @@ -22,6 +19,9 @@ extern "C" { #define MAP_FAILED ((void *)-1) +void SysPrintf(const char *fmt, ...); + +#if 0 // not used static void* dynarec_cache = NULL; static void* dynarec_cache_mapping = NULL; @@ -47,7 +47,7 @@ static inline void* mmap(void *addr, size_t len, int prot, int flags, int fd, of } svcDuplicateHandle(¤tHandle, 0xFFFF8001); - svcControlProcessMemory(currentHandle, addr, dynarec_cache, + svcControlProcessMemory(currentHandle, (uintptr_t)addr, (uintptr_t)dynarec_cache, len, MEMOP_MAP, prot); svcCloseHandle(currentHandle); dynarec_cache_mapping = addr; @@ -70,22 +70,6 @@ static inline void* mmap(void *addr, size_t len, int prot, int flags, int fd, of return addr_out; } -static inline int mprotect(void *addr, size_t len, int prot) -{ - if(__ctr_svchax) - { - uint32_t currentHandle; - svcDuplicateHandle(¤tHandle, 0xFFFF8001); - svcControlProcessMemory(currentHandle, addr, NULL, - len, MEMOP_PROT, prot); - svcCloseHandle(currentHandle); - return 0; - } - - printf("mprotect called without svcControlProcessMemory access !\n"); - return -1; -} - static inline int munmap(void *addr, size_t len) { if((addr == dynarec_cache_mapping) && __ctr_svchax) @@ -93,7 +77,7 @@ static inline int munmap(void *addr, size_t len) uint32_t currentHandle; svcDuplicateHandle(¤tHandle, 0xFFFF8001); svcControlProcessMemory(currentHandle, - dynarec_cache, dynarec_cache_mapping, + (uintptr_t)dynarec_cache, (uintptr_t)dynarec_cache_mapping, len, MEMOP_UNMAP, 0b111); svcCloseHandle(currentHandle); dynarec_cache_mapping = NULL; @@ -104,6 +88,29 @@ static inline int munmap(void *addr, size_t len) return 0; } +#endif + +static inline int mprotect(void *addr, size_t len, int prot) +{ + if (__ctr_svchax) + { + uint32_t currentHandle = 0; + int r; + svcDuplicateHandle(¤tHandle, 0xFFFF8001); + r = svcControlProcessMemory(currentHandle, (uintptr_t)addr, 0, + len, MEMOP_PROT, prot); + svcCloseHandle(currentHandle); + if (r < 0) { + SysPrintf("svcControlProcessMemory failed for %p %u %x: %d\n", + addr, len, prot, r); + return -1; + } + return 0; + } + + SysPrintf("mprotect called without svcControlProcessMemory access!\n"); + return -1; +} #ifdef __cplusplus }; diff --git a/frontend/libretro.c b/frontend/libretro.c index 1ad39b515..90f40a626 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -179,7 +179,11 @@ static int negcon_linearity = 1; static bool axis_bounds_modifier; /* PSX max resolution is 640x512, but with enhancement it's 1024x512 */ +#ifdef GPU_NEON #define VOUT_MAX_WIDTH 1024 +#else +#define VOUT_MAX_WIDTH 640 +#endif #define VOUT_MAX_HEIGHT 512 //Dummy functions @@ -556,6 +560,21 @@ void pl_vita_munmap(void *ptr, size_t size, enum psxMapTag tag) } #endif +static void log_mem_usage(void) +{ +#ifdef _3DS + extern u32 __heap_size, __linear_heap_size, __stacksize__; + extern char __end__; // 3dsx.ld + u32 app_memory = *((volatile u32 *)0x1FF80040); + s64 mem_used = 0; + if (__ctr_svchax) + svcGetSystemInfo(&mem_used, 0, 1); + + SysPrintf("mem: %d/%d heap: %d linear: %d stack: %d exe: %d\n", (int)mem_used, app_memory, + __heap_size, __linear_heap_size, __stacksize__, (int)&__end__ - 0x100000); +#endif +} + static void *pl_mmap(unsigned int size) { return psxMap(0, size, 0, MAP_TAG_VRAM); @@ -1954,8 +1973,7 @@ bool retro_load_game(const struct retro_game_info *info) { size_t i; unsigned int cd_index = 0; - bool is_m3u = (strcasestr(info->path, ".m3u") != NULL); - bool is_exe = (strcasestr(info->path, ".exe") != NULL); + bool is_m3u, is_exe; int ret; struct retro_input_descriptor desc[] = { @@ -2015,6 +2033,8 @@ bool retro_load_game(const struct retro_game_info *info) LogErr("info->path required\n"); return false; } + is_m3u = (strcasestr(info->path, ".m3u") != NULL); + is_exe = (strcasestr(info->path, ".exe") != NULL); update_variables(false); @@ -2192,6 +2212,7 @@ bool retro_load_game(const struct retro_game_info *info) set_retro_memmap(); retro_set_audio_buff_status_cb(); + log_mem_usage(); if (check_unsatisfied_libcrypt()) show_notification("LibCrypt protected game with missing SBI detected", 3000, 3); @@ -3729,6 +3750,8 @@ void retro_init(void) struct retro_rumble_interface rumble; int ret; + log_mem_usage(); + msg_interface_version = 0; environ_cb(RETRO_ENVIRONMENT_GET_MESSAGE_INTERFACE_VERSION, &msg_interface_version); @@ -3766,12 +3789,17 @@ void retro_init(void) vout_buf = linearMemAlign(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2, 0x80); #elif defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) && P_HAVE_POSIX_MEMALIGN if (posix_memalign(&vout_buf, 16, VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2) != 0) - vout_buf = (void *) 0; + vout_buf = NULL; else memset(vout_buf, 0, VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2); #else vout_buf = calloc(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT, 2); #endif + if (vout_buf == NULL) + { + LogErr("OOM for vout_buf.\n"); + exit(1); + } vout_buf_ptr = vout_buf; diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index fb5c4ff1f..ec7e05754 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -39,7 +39,8 @@ extern "C" { #define LE16TOH(x) (x) #endif -#define BIT(x) (1 << (x)) +#undef BIT +#define BIT(x) (1u << (x)) #define PSX_GPU_STATUS_DHEIGHT BIT(19) #define PSX_GPU_STATUS_PAL BIT(20) From 4284a818154d82c5a1f762c6bb83722299ab7177 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 30 Sep 2024 21:14:41 +0300 Subject: [PATCH 560/597] new 3ds_mmap, avoid useless retries --- frontend/libretro.c | 126 +++++++++++++++++++++++---------------- frontend/plat_pollux.c | 5 +- frontend/plugin_lib.c | 8 ++- libpcsxcore/psxmem.c | 55 +++++++++-------- libpcsxcore/psxmem_map.h | 4 +- 5 files changed, 114 insertions(+), 84 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 90f40a626..dd45d168b 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -387,54 +387,78 @@ static void vout_flip(const void *vram, int stride, int bgr24, } #ifdef _3DS -typedef struct -{ - void *buffer; - uint32_t target_map; - size_t size; - enum psxMapTag tag; -} psx_map_t; - -psx_map_t custom_psx_maps[] = { - { NULL, 0x13000000, 0x210000, MAP_TAG_RAM }, // 0x80000000 - { NULL, 0x12800000, 0x010000, MAP_TAG_OTHER }, // 0x1f800000 - { NULL, 0x12c00000, 0x080000, MAP_TAG_OTHER }, // 0x1fc00000 - { NULL, 0x11000000, 0x800000, MAP_TAG_LUTS }, // 0x08000000 - { NULL, 0x12000000, 0x201000, MAP_TAG_VRAM }, // 0x00000000 -}; +static u32 mapped_addrs[8]; +static u32 mapped_ram, mapped_ram_src; -void *pl_3ds_mmap(unsigned long addr, size_t size, int is_fixed, - enum psxMapTag tag) +// http://3dbrew.org/wiki/Memory_layout#ARM11_User-land_memory_regions +void *pl_3ds_mmap(unsigned long addr, size_t size, + enum psxMapTag tag, int *can_retry_addr) { - (void)is_fixed; (void)addr; + *can_retry_addr = 0; - if (__ctr_svchax) + if (__ctr_svchax) do { - psx_map_t *custom_map = custom_psx_maps; + // idea from fbalpha2012_neogeo + s32 addr = 0x10000000 - 0x1000; + u32 found_addr = 0; + MemInfo mem_info; + PageInfo page_info; + void *ret = NULL; + size_t i; + int r; + + for (i = 0; i < sizeof(mapped_addrs) / sizeof(mapped_addrs[0]); i++) + if (mapped_addrs[i] == 0) + break; + if (i == sizeof(mapped_addrs) / sizeof(mapped_addrs[0])) + break; + + size = (size + 0xfff) & ~0xfff; - for (; custom_map->size; custom_map++) + while (addr >= 0x08000000) { - if ((custom_map->size == size) && (custom_map->tag == tag)) - { - uint32_t ptr_aligned, tmp; - void *ret; + if ((r = svcQueryMemory(&mem_info, &page_info, addr)) < 0) { + LogErr("svcQueryMemory failed: %d\n", r); + break; + } - custom_map->buffer = malloc(size + 0x1000); - ptr_aligned = (((u32)custom_map->buffer) + 0xFFF) & ~0xFFF; + if (mem_info.state == MEMSTATE_FREE && mem_info.size >= size) { + found_addr = mem_info.base_addr + mem_info.size - size; + break; + } - if (svcControlMemory(&tmp, (void *)custom_map->target_map, (void *)ptr_aligned, size, MEMOP_MAP, 0x3) < 0) - { - LogErr("could not map memory @0x%08X\n", custom_map->target_map); - exit(1); - } + addr = mem_info.base_addr - 0x1000; + } + if (found_addr == 0) { + LogErr("no addr space for %u bytes\n", size); + break; + } - ret = (void *)custom_map->target_map; - memset(ret, 0, size); - return ret; + // https://libctru.devkitpro.org/svc_8h.html#a8046e9b23b1b209a4e278cb1c19c7a5a + if ((r = svcControlMemory(&mapped_addrs[i], found_addr, 0, size, MEMOP_ALLOC, MEMPERM_READWRITE)) < 0) { + LogErr("svcControlMemory failed for %08x %u: %d\n", found_addr, size, r); + break; + } + if (mapped_addrs[i] == 0) // needed? + mapped_addrs[i] = found_addr; + ret = (void *)mapped_addrs[i]; + + // "round" address helps the dynarec slightly, map ram at 0x13000000 + if (tag == MAP_TAG_RAM && !mapped_ram) { + u32 target = 0x13000000; + if ((r = svcControlMemory(&mapped_ram, target, mapped_addrs[i], size, MEMOP_MAP, MEMPERM_READWRITE)) < 0) + LogErr("could not map ram %08x -> %08x: %d\n", mapped_addrs[i], target, r); + else { + mapped_ram_src = mapped_addrs[i]; + mapped_ram = target; + ret = (void *)mapped_ram; } } + memset(ret, 0, size); + return ret; } + while (0); return calloc(size, 1); } @@ -443,22 +467,22 @@ void pl_3ds_munmap(void *ptr, size_t size, enum psxMapTag tag) { (void)tag; - if (__ctr_svchax) + if (ptr && __ctr_svchax) { - psx_map_t *custom_map = custom_psx_maps; - - for (; custom_map->size; custom_map++) - { - if ((custom_map->target_map == (uint32_t)ptr)) - { - uint32_t ptr_aligned, tmp; + size_t i; + u32 tmp; - ptr_aligned = (((u32)custom_map->buffer) + 0xFFF) & ~0xFFF; + size = (size + 0xfff) & ~0xfff; - svcControlMemory(&tmp, (void *)custom_map->target_map, (void *)ptr_aligned, size, MEMOP_UNMAP, 0x3); - - free(custom_map->buffer); - custom_map->buffer = NULL; + if (ptr == (void *)mapped_ram) { + svcControlMemory(&tmp, mapped_ram, mapped_ram_src, size, MEMOP_UNMAP, 0); + ptr = (void *)mapped_ram_src; + mapped_ram = mapped_ram_src = 0; + } + for (i = 0; i < sizeof(mapped_addrs) / sizeof(mapped_addrs[0]); i++) { + if (ptr == (void *)mapped_addrs[i]) { + svcControlMemory(&tmp, mapped_addrs[i], 0, size, MEMOP_FREE, 0); + mapped_addrs[i] = 0; return; } } @@ -521,11 +545,11 @@ void deinit_vita_mmap() free(addr); } -void *pl_vita_mmap(unsigned long addr, size_t size, int is_fixed, - enum psxMapTag tag) +void *pl_vita_mmap(unsigned long addr, size_t size, + enum psxMapTag tag, int *can_retry_addr) { - (void)is_fixed; (void)addr; + *can_retry_addr = 0; psx_map_t *custom_map = custom_psx_maps; diff --git a/frontend/plat_pollux.c b/frontend/plat_pollux.c index 326a40f11..f349cad1c 100644 --- a/frontend/plat_pollux.c +++ b/frontend/plat_pollux.c @@ -410,13 +410,14 @@ void plat_gvideo_close(void) { } -static void *pl_emu_mmap(unsigned long addr, size_t size, int is_fixed, - enum psxMapTag tag) +static void *pl_emu_mmap(unsigned long addr, size_t size, + enum psxMapTag tag, int *can_retry_addr) { unsigned int pbase; void *retval; int ret; + *can_retry_addr = 1; if (!have_warm) goto basic_map; diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 9b6faf44e..21d6863da 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -873,9 +873,10 @@ void pl_start_watchdog(void) fprintf(stderr, "could not start watchdog: %d\n", ret); } -static void *pl_emu_mmap(unsigned long addr, size_t size, int is_fixed, - enum psxMapTag tag) +static void *pl_emu_mmap(unsigned long addr, size_t size, + enum psxMapTag tag, int *can_retry_addr) { + *can_retry_addr = 1; return plat_mmap(addr, size, 0, is_fixed); } @@ -886,7 +887,8 @@ static void pl_emu_munmap(void *ptr, size_t size, enum psxMapTag tag) static void *pl_mmap(unsigned int size) { - return psxMapHook(0, size, 0, MAP_TAG_VRAM); + int can_retry_addr; + return psxMapHook(0, size, MAP_TAG_VRAM, &can_retry_addr); } static void pl_munmap(void *ptr, unsigned int size) diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 0e28b72c2..a70cc6238 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -42,21 +42,23 @@ #endif static void * psxMapDefault(unsigned long addr, size_t size, - int is_fixed, enum psxMapTag tag) + enum psxMapTag tag, int *can_retry_addr) { void *ptr; #if !P_HAVE_MMAP + *can_retry_addr = 0; ptr = calloc(1, size); return ptr ? ptr : MAP_FAILED; #else int flags = MAP_PRIVATE | MAP_ANONYMOUS; + *can_retry_addr = 1; ptr = mmap((void *)(uintptr_t)addr, size, PROT_READ | PROT_WRITE, flags, -1, 0); #ifdef MADV_HUGEPAGE if (size >= 2*1024*1024) { if (ptr != MAP_FAILED && ((uintptr_t)ptr & (2*1024*1024 - 1))) { - // try to manually realign assuming bottom-to-top alloc + // try to manually realign assuming decreasing addr alloc munmap(ptr, size); addr = (uintptr_t)ptr & ~(2*1024*1024 - 1); ptr = mmap((void *)(uintptr_t)addr, size, @@ -79,43 +81,44 @@ static void psxUnmapDefault(void *ptr, size_t size, enum psxMapTag tag) #endif } -void *(*psxMapHook)(unsigned long addr, size_t size, int is_fixed, - enum psxMapTag tag) = psxMapDefault; +void *(*psxMapHook)(unsigned long addr, size_t size, + enum psxMapTag tag, int *can_retry_addr) = psxMapDefault; void (*psxUnmapHook)(void *ptr, size_t size, enum psxMapTag tag) = psxUnmapDefault; void *psxMap(unsigned long addr, size_t size, int is_fixed, enum psxMapTag tag) { - int try_ = 0; - unsigned long mask; + int try_, can_retry_addr = 0; void *ret; -retry: - ret = psxMapHook(addr, size, 0, tag); - if (ret == NULL) - return MAP_FAILED; + for (try_ = 0; try_ < 3; try_++) + { + ret = psxMapHook(addr, size, tag, &can_retry_addr); + if (ret == NULL) + return MAP_FAILED; - if (addr != 0 && ret != (void *)(uintptr_t)addr) { - SysMessage("psxMap: warning: wanted to map @%08x, got %p\n", - addr, ret); + if (addr != 0 && ret != (void *)(uintptr_t)addr) { + SysMessage("psxMap: warning: wanted to map @%08x, got %p\n", + addr, ret); + if (is_fixed) { + psxUnmap(ret, size, tag); + return MAP_FAILED; + } - if (is_fixed) { - psxUnmap(ret, size, tag); - return MAP_FAILED; - } + if (can_retry_addr && ((addr ^ (uintptr_t)ret) & ~0xff000000l)) { + unsigned long mask; - if (((addr ^ (unsigned long)(uintptr_t)ret) & ~0xff000000l) && try_ < 2) - { - psxUnmap(ret, size, tag); + psxUnmap(ret, size, tag); - // try to use similarly aligned memory instead - // (recompiler needs this) - mask = try_ ? 0xffff : 0xffffff; - addr = ((uintptr_t)ret + mask) & ~mask; - try_++; - goto retry; + // try to use similarly aligned memory instead + // (recompiler prefers this) + mask = try_ ? 0xffff : 0xffffff; + addr = ((uintptr_t)ret + mask) & ~mask; + continue; + } } + break; } return ret; diff --git a/libpcsxcore/psxmem_map.h b/libpcsxcore/psxmem_map.h index 9c15c035e..159f57512 100644 --- a/libpcsxcore/psxmem_map.h +++ b/libpcsxcore/psxmem_map.h @@ -29,8 +29,8 @@ enum psxMapTag { MAP_TAG_LUTS, }; -extern void *(*psxMapHook)(unsigned long addr, size_t size, int is_fixed, - enum psxMapTag tag); +extern void *(*psxMapHook)(unsigned long addr, size_t size, + enum psxMapTag tag, int *can_retry_addr); extern void (*psxUnmapHook)(void *ptr, size_t size, enum psxMapTag tag); void *psxMap(unsigned long addr, size_t size, int is_fixed, From d5780f8c3c3b23c2b16784ed5c7fbbc19ae2a682 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 30 Sep 2024 23:23:48 +0300 Subject: [PATCH 561/597] drc: less alarmist ram map message To discourage opening bugs like libretro/pcsx_rearmed#717 Also a warning fix --- libpcsxcore/new_dynarec/new_dynarec.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index d9438d884..ed2f4c638 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -6349,8 +6349,8 @@ void new_dynarec_init(void) arch_init(); new_dynarec_test(); ram_offset = (uintptr_t)psxM - 0x80000000; - if (ram_offset!=0) - SysPrintf("warning: RAM is not directly mapped, performance will suffer\n"); + if (ram_offset != 0) + SysPrintf("RAM is not directly mapped\n"); SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n"); SysPrintf("%p/%p/%p/%p/%p\n", psxM, psxH, psxR, mem_rtab, out); } @@ -8367,8 +8367,9 @@ static noinline void pass5a_preallocate1(void) // to use, which can avoid a load-use penalty on certain CPUs. static noinline void pass5b_preallocate2(void) { - int i, hr; - for(i=0;i Date: Tue, 1 Oct 2024 19:36:25 +0300 Subject: [PATCH 562/597] psxmem: fix map retry breakage fixes 417b59ebc326fc753f8c881c24fc7aff975df7ea --- libpcsxcore/psxmem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index a70cc6238..a8dfaa834 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -90,10 +90,12 @@ void *psxMap(unsigned long addr, size_t size, int is_fixed, enum psxMapTag tag) { int try_, can_retry_addr = 0; - void *ret; + void *ret = MAP_FAILED; for (try_ = 0; try_ < 3; try_++) { + if (ret != MAP_FAILED) + psxUnmap(ret, size, tag); ret = psxMapHook(addr, size, tag, &can_retry_addr); if (ret == NULL) return MAP_FAILED; @@ -109,8 +111,6 @@ void *psxMap(unsigned long addr, size_t size, int is_fixed, if (can_retry_addr && ((addr ^ (uintptr_t)ret) & ~0xff000000l)) { unsigned long mask; - psxUnmap(ret, size, tag); - // try to use similarly aligned memory instead // (recompiler prefers this) mask = try_ ? 0xffff : 0xffffff; From 7e9c30606d6c2dd0956ac5ca0f851d3e6ef6f89c Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 1 Oct 2024 19:40:59 +0300 Subject: [PATCH 563/597] try to clean up various mmap func failure return value confusion such a mess --- frontend/libretro.c | 32 ++++++++++++++++++++------------ frontend/plat_pollux.c | 6 +++--- frontend/plugin_lib.c | 2 +- libpcsxcore/psxmem.c | 4 ++-- plugins/gpu_neon/psx_gpu_if.c | 3 ++- plugins/gpu_unai/gpulib_if.cpp | 3 ++- plugins/gpulib/gpu.c | 5 +---- 7 files changed, 31 insertions(+), 24 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index dd45d168b..bcd3c615e 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -8,6 +8,7 @@ #define _GNU_SOURCE 1 // strcasestr #include #include +#include #include #include #ifdef __MACH__ @@ -51,6 +52,10 @@ #include "3ds/3ds_utils.h" #endif +#ifndef MAP_FAILED +#define MAP_FAILED ((void *)(intptr_t)-1) +#endif + #define PORTS_NUMBER 8 #ifndef MIN @@ -391,11 +396,12 @@ static u32 mapped_addrs[8]; static u32 mapped_ram, mapped_ram_src; // http://3dbrew.org/wiki/Memory_layout#ARM11_User-land_memory_regions -void *pl_3ds_mmap(unsigned long addr, size_t size, +static void *pl_3ds_mmap(unsigned long addr, size_t size, enum psxMapTag tag, int *can_retry_addr) { - (void)addr; + void *ret = MAP_FAILED; *can_retry_addr = 0; + (void)addr; if (__ctr_svchax) do { @@ -404,7 +410,6 @@ void *pl_3ds_mmap(unsigned long addr, size_t size, u32 found_addr = 0; MemInfo mem_info; PageInfo page_info; - void *ret = NULL; size_t i; int r; @@ -460,10 +465,11 @@ void *pl_3ds_mmap(unsigned long addr, size_t size, } while (0); - return calloc(size, 1); + ret = calloc(size, 1); + return ret ? ret : MAP_FAILED; } -void pl_3ds_munmap(void *ptr, size_t size, enum psxMapTag tag) +static void pl_3ds_munmap(void *ptr, size_t size, enum psxMapTag tag) { (void)tag; @@ -503,7 +509,7 @@ typedef struct static void *addr = NULL; -psx_map_t custom_psx_maps[] = { +static psx_map_t custom_psx_maps[] = { { NULL, 0x800000, MAP_TAG_LUTS }, { NULL, 0x080000, MAP_TAG_OTHER }, { NULL, 0x010000, MAP_TAG_OTHER }, @@ -512,7 +518,7 @@ psx_map_t custom_psx_maps[] = { { NULL, 0x210000, MAP_TAG_RAM }, }; -int init_vita_mmap() +static int init_vita_mmap() { int n; void *tmpaddr; @@ -535,7 +541,7 @@ int init_vita_mmap() return 0; } -void deinit_vita_mmap() +static void deinit_vita_mmap() { size_t i; for (i = 0; i < sizeof(custom_psx_maps) / sizeof(custom_psx_maps[0]); i++) { @@ -545,9 +551,10 @@ void deinit_vita_mmap() free(addr); } -void *pl_vita_mmap(unsigned long addr, size_t size, +static void *pl_vita_mmap(unsigned long addr, size_t size, enum psxMapTag tag, int *can_retry_addr) { + void *ret; (void)addr; *can_retry_addr = 0; @@ -562,10 +569,11 @@ void *pl_vita_mmap(unsigned long addr, size_t size, } } - return calloc(size, 1); + ret = calloc(size, 1); + return ret ? ret : MAP_FAILED; } -void pl_vita_munmap(void *ptr, size_t size, enum psxMapTag tag) +static void pl_vita_munmap(void *ptr, size_t size, enum psxMapTag tag) { (void)tag; @@ -3822,7 +3830,7 @@ void retro_init(void) if (vout_buf == NULL) { LogErr("OOM for vout_buf.\n"); - exit(1); + // may be able to continue if we get retro_framebuffer access } vout_buf_ptr = vout_buf; diff --git a/frontend/plat_pollux.c b/frontend/plat_pollux.c index f349cad1c..a27b410dd 100644 --- a/frontend/plat_pollux.c +++ b/frontend/plat_pollux.c @@ -475,12 +475,12 @@ static void *pl_emu_mmap(unsigned long addr, size_t size, } basic_map: - retval = plat_mmap(addr, size, 0, is_fixed); + retval = plat_mmap(addr, size, 0, 0); out: - if (tag == MAP_TAG_VRAM) + if (tag == MAP_TAG_VRAM && retval) psx_vram = retval; - return retval; + return retval ? retval : MAP_FAILED; } static void pl_emu_munmap(void *ptr, size_t size, enum psxMapTag tag) diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 21d6863da..c8a6fed45 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -877,7 +877,7 @@ static void *pl_emu_mmap(unsigned long addr, size_t size, enum psxMapTag tag, int *can_retry_addr) { *can_retry_addr = 1; - return plat_mmap(addr, size, 0, is_fixed); + return plat_mmap(addr, size, 0, 0); } static void pl_emu_munmap(void *ptr, size_t size, enum psxMapTag tag) diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index a8dfaa834..ad4725928 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -97,11 +97,11 @@ void *psxMap(unsigned long addr, size_t size, int is_fixed, if (ret != MAP_FAILED) psxUnmap(ret, size, tag); ret = psxMapHook(addr, size, tag, &can_retry_addr); - if (ret == NULL) + if (ret == MAP_FAILED) return MAP_FAILED; if (addr != 0 && ret != (void *)(uintptr_t)addr) { - SysMessage("psxMap: warning: wanted to map @%08x, got %p\n", + SysMessage("psxMap: tried to map @%08x, got %p\n", addr, ret); if (is_fixed) { psxUnmap(ret, size, tag); diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index b2e899999..3f43e431f 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -87,8 +87,9 @@ static void map_enhancement_buffer(void) // to be able to reuse 1024-width code better (triangle setup, // dithering phase, lines). egpu.enhancement_buf_ptr = gpu.mmap(ENHANCEMENT_BUF_SIZE); - if (egpu.enhancement_buf_ptr == NULL) { + if (egpu.enhancement_buf_ptr == NULL || egpu.enhancement_buf_ptr == (void *)(intptr_t)-1) { fprintf(stderr, "failed to map enhancement buffer\n"); + egpu.enhancement_buf_ptr = NULL; gpu.get_enhancement_bufer = NULL; } else { diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 6816e2bd9..5cc7792b4 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -201,8 +201,9 @@ static void map_downscale_buffer(void) gpu_unai.downscale_vram = (le16_t*)gpu.mmap(DOWNSCALE_VRAM_SIZE); - if (gpu_unai.downscale_vram == NULL) { + if (gpu_unai.downscale_vram == NULL || gpu_unai.downscale_vram == (le16_t *)(intptr_t)-1) { fprintf(stderr, "failed to map downscale buffer\n"); + gpu_unai.downscale_vram = NULL; gpu.get_downscale_buffer = NULL; } else { diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index f6340e111..70f212933 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -267,6 +267,7 @@ static int map_vram(void) } else { fprintf(stderr, "could not map vram, expect crashes\n"); + gpu.vram = NULL; return -1; } } @@ -285,10 +286,6 @@ long GPUinit(void) gpu.cmd_len = 0; do_reset(); - /*if (gpu.mmap != NULL) { - if (map_vram() != 0) - ret = -1; - }*/ return ret; } From 4db13cabae241c4c93614c9e2e966d6e19adf87d Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 1 Oct 2024 19:43:13 +0300 Subject: [PATCH 564/597] add dedicated mmap functions for libnx the previous code would call svcUnmapPhysicalMemory() on mem possibly allocated by aligned_alloc() which didn't look right --- frontend/libretro.c | 33 ++++++++++++++++++++++++++++++--- frontend/switch/sys/mman.h | 2 ++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index bcd3c615e..52c2c1c93 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -498,6 +498,31 @@ static void pl_3ds_munmap(void *ptr, size_t size, enum psxMapTag tag) } #endif +#ifdef HAVE_LIBNX +static void *pl_switch_mmap(unsigned long addr, size_t size, + enum psxMapTag tag, int *can_retry_addr) +{ + void *ret = MAP_FAILED; + *can_retry_addr = 0; + (void)addr; + + // there's svcMapPhysicalMemory() but user logs show it doesn't hand out + // any desired addresses, so don't even bother + ret = aligned_alloc(0x1000, size); + if (!ret) + return MAP_FAILED; + memset(ret, 0, size); + return ret; +} + +static void pl_switch_munmap(void *ptr, size_t size, enum psxMapTag tag) +{ + (void)size; + (void)tag; + free(ptr); +} +#endif + #ifdef VITA typedef struct { @@ -3792,11 +3817,13 @@ void retro_init(void) syscall(SYS_ptrace, 0 /*PTRACE_TRACEME*/, 0, 0, 0); #endif -#ifdef _3DS +#if defined(_3DS) psxMapHook = pl_3ds_mmap; psxUnmapHook = pl_3ds_munmap; -#endif -#ifdef VITA +#elif defined(HAVE_LIBNX) + psxMapHook = pl_switch_mmap; + psxUnmapHook = pl_switch_munmap; +#elif defined(VITA) if (init_vita_mmap() < 0) abort(); psxMapHook = pl_vita_mmap; diff --git a/frontend/switch/sys/mman.h b/frontend/switch/sys/mman.h index 2e084a64e..1d31e2529 100644 --- a/frontend/switch/sys/mman.h +++ b/frontend/switch/sys/mman.h @@ -20,6 +20,7 @@ extern "C" { #define ALIGNMENT 0x1000 +#if 0 // not used static inline void *mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) { (void)fd; @@ -51,6 +52,7 @@ static inline int munmap(void *addr, size_t len) } return 0; } +#endif #ifdef __cplusplus }; From 5612f82e351eac6c7ba9eb011e16f3fbb8637b3c Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 5 Oct 2024 02:07:42 +0300 Subject: [PATCH 565/597] detect bad SysMessage format --- libpcsxcore/misc.c | 2 +- libpcsxcore/psxmem.c | 2 +- libpcsxcore/system.h | 12 ++++++++++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index aafe52217..d4c886f4f 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -585,7 +585,7 @@ int Load(const char *ExePath) { case 0: /* End of file */ break; default: - SysPrintf(_("Unknown CPE opcode %02x at position %08x.\n"), opcode, ftell(tmpFile) - 1); + SysPrintf(_("Unknown CPE opcode %02x at position %08zx.\n"), opcode, ftell(tmpFile) - 1); retval = -1; break; } diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index ad4725928..e08bd895f 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -101,7 +101,7 @@ void *psxMap(unsigned long addr, size_t size, int is_fixed, return MAP_FAILED; if (addr != 0 && ret != (void *)(uintptr_t)addr) { - SysMessage("psxMap: tried to map @%08x, got %p\n", + SysMessage("psxMap: tried to map @%08lx, got %p\n", addr, ret); if (is_fixed) { psxUnmap(ret, size, tag); diff --git a/libpcsxcore/system.h b/libpcsxcore/system.h index fe4ab404a..4e65911a3 100644 --- a/libpcsxcore/system.h +++ b/libpcsxcore/system.h @@ -26,8 +26,16 @@ extern "C" { int SysInit(); // Init mem and plugins void SysReset(); // Resets mem -void SysPrintf(const char *fmt, ...); // Printf used by bios syscalls -void SysMessage(const char *fmt, ...); // Message used to print msg to users +void SysPrintf(const char *fmt, ...) +#if defined(__GNUC__) && defined(__x86_64__) // some platforms have int32_t as long + __attribute__((format(printf, 1, 2))) +#endif + ; +void SysMessage(const char *fmt, ...) // Message used to print msg to users +#if defined(__GNUC__) && defined(__x86_64__) + __attribute__((format(printf, 1, 2))) +#endif + ; void *SysLoadLibrary(const char *lib); // Loads Library void *SysLoadSym(void *lib, const char *sym); // Loads Symbol from Library const char *SysLibError(); // Gets previous error loading sysbols From 08d9d25754fcf96bbfed96de440683b95c7e3554 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 3 Oct 2024 02:56:07 +0300 Subject: [PATCH 566/597] new async cdrom + cdrom cleanup Should've split this really, but that's lots of extra work... Beware of breakage, but please report bugs. --- Makefile | 11 +- frontend/libretro-rthreads.c | 9 + frontend/libretro.c | 361 +------------------- frontend/libretro_core_options.h | 20 +- frontend/main.c | 102 +----- frontend/menu.c | 12 +- frontend/plugin.c | 48 --- frontend/plugin.h | 2 - libpcsxcore/cdriso.c | 301 ++++++++-------- libpcsxcore/cdriso.h | 17 +- libpcsxcore/cdrom-async.c | 566 +++++++++++++++++++++++++++++++ libpcsxcore/cdrom-async.h | 29 ++ libpcsxcore/cdrom.c | 165 ++++----- libpcsxcore/cdrom.h | 12 +- libpcsxcore/database.c | 6 +- libpcsxcore/misc.c | 27 +- libpcsxcore/misc.h | 2 +- libpcsxcore/plugins.c | 112 +----- libpcsxcore/plugins.h | 57 +--- libpcsxcore/ppf.c | 2 +- libpcsxcore/psxcommon.h | 2 - 21 files changed, 925 insertions(+), 938 deletions(-) create mode 100644 frontend/libretro-rthreads.c create mode 100644 libpcsxcore/cdrom-async.c create mode 100644 libpcsxcore/cdrom-async.h diff --git a/Makefile b/Makefile index 9c27f06c8..61eb95ef3 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,8 @@ CFLAGS += -DPCNT endif # core -OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cheat.o libpcsxcore/database.o \ +OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cdrom-async.o \ + libpcsxcore/cheat.o libpcsxcore/database.o \ libpcsxcore/decode_xa.o libpcsxcore/mdec.o \ libpcsxcore/misc.o libpcsxcore/plugins.o libpcsxcore/ppf.o libpcsxcore/psxbios.o \ libpcsxcore/psxcommon.o libpcsxcore/psxcounters.o libpcsxcore/psxdma.o \ @@ -165,9 +166,6 @@ plugins/gpu_unai/gpulib_if.o: CFLAGS += -DREARMED -DUSE_GPULIB=1 -O3 CC_LINK = $(CXX) endif -# cdrcimg -OBJS += plugins/cdrcimg/cdrcimg.o - # libchdr #ifeq "$(HAVE_CHD)" "1" LCHDR = deps/libchdr @@ -275,10 +273,13 @@ ifeq "$(HAVE_PHYSICAL_CDROM)" "1" OBJS += frontend/libretro-cdrom.o OBJS += deps/libretro-common/lists/string_list.o OBJS += deps/libretro-common/memmap/memalign.o -OBJS += deps/libretro-common/rthreads/rthreads.o OBJS += deps/libretro-common/vfs/vfs_implementation_cdrom.o CFLAGS += -DHAVE_CDROM endif +ifeq "$(USE_ASYNC_CDROM)" "1" +OBJS += frontend/libretro-rthreads.o +CFLAGS += -DUSE_ASYNC_CDROM +endif ifeq "$(USE_LIBRETRO_VFS)" "1" OBJS += deps/libretro-common/compat/compat_posix_string.o OBJS += deps/libretro-common/compat/fopen_utf8.o diff --git a/frontend/libretro-rthreads.c b/frontend/libretro-rthreads.c new file mode 100644 index 000000000..96c861d3c --- /dev/null +++ b/frontend/libretro-rthreads.c @@ -0,0 +1,9 @@ +// temporary(?) workaround: +// https://github.com/libretro/libretro-common/pull/216 +#ifdef _3DS +#include <3ds/svc.h> +#include <3ds/services/apt.h> +#include +#endif + +#include "../deps/libretro-common/rthreads/rthreads.c" diff --git a/frontend/libretro.c b/frontend/libretro.c index 52c2c1c93..b5c3b92d0 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -26,6 +26,7 @@ #include "../libpcsxcore/psxmem_map.h" #include "../libpcsxcore/new_dynarec/new_dynarec.h" #include "../libpcsxcore/cdrom.h" +#include "../libpcsxcore/cdrom-async.h" #include "../libpcsxcore/cdriso.h" #include "../libpcsxcore/cheat.h" #include "../libpcsxcore/r3000a.h" @@ -1315,12 +1316,6 @@ static void disk_init(void) } } -#ifdef HAVE_CDROM -static long CALLBACK rcdrom_open(void); -static long CALLBACK rcdrom_close(void); -static void rcdrom_stop_thread(void); -#endif - static bool disk_set_eject_state(bool ejected) { if (ejected != disk_ejected) @@ -1331,12 +1326,12 @@ static bool disk_set_eject_state(bool ejected) LidInterrupt(); #ifdef HAVE_CDROM - if (CDR_open == rcdrom_open && ejected != disk_ejected) { - rcdrom_stop_thread(); + if (cdra_is_physical() && ejected != disk_ejected) { + cdra_stop_thread(); if (!ejected) { // likely the real cd was also changed - rescan - rcdrom_close(); - rcdrom_open(); + cdra_close(); + cdra_open(); } } #endif @@ -1366,7 +1361,7 @@ static bool disk_set_image_index(unsigned int index) if (disks[index].fname == NULL) { LogErr("missing disk #%u\n", index); - CDR_shutdown(); + cdra_shutdown(); // RetroArch specifies "no disk" with index == count, // so don't fail here.. @@ -1384,7 +1379,7 @@ static bool disk_set_image_index(unsigned int index) LogErr("failed to load cdr plugin\n"); return false; } - if (CDR_open() < 0) + if (cdra_open() < 0) { LogErr("failed to open cdr plugin\n"); return false; @@ -1593,308 +1588,6 @@ static void extract_directory(char *buf, const char *path, size_t size) } } -// raw cdrom support -#ifdef HAVE_CDROM -#include "vfs/vfs_implementation.h" -#include "vfs/vfs_implementation_cdrom.h" -#include "libretro-cdrom.h" -#include "rthreads/rthreads.h" -#include "retro_timers.h" -struct cached_buf { - unsigned char buf[2352]; - unsigned int lba; -}; -static struct { - libretro_vfs_implementation_file *h; - sthread_t *thread; - slock_t *read_lock; - slock_t *buf_lock; - scond_t *cond; - struct cached_buf *buf; - unsigned int buf_cnt, thread_exit, do_prefetch; - unsigned int total_lba, prefetch_lba; - int check_eject_delay; -} rcdrom; - -static void lbacache_do(unsigned int lba) -{ - unsigned char m, s, f, buf[2352]; - unsigned int i = lba % rcdrom.buf_cnt; - int ret; - - cdrom_lba_to_msf(lba + 150, &m, &s, &f); - slock_lock(rcdrom.read_lock); - ret = cdrom_read_sector(rcdrom.h, lba, buf); - slock_lock(rcdrom.buf_lock); - slock_unlock(rcdrom.read_lock); - //printf("%d:%02d:%02d m%d f%d\n", m, s, f, buf[12+3], ((buf[12+4+2] >> 5) & 1) + 1); - if (ret) { - rcdrom.do_prefetch = 0; - slock_unlock(rcdrom.buf_lock); - LogErr("prefetch: cdrom_read_sector failed for lba %d\n", lba); - return; - } - rcdrom.check_eject_delay = 100; - - if (lba != rcdrom.buf[i].lba) { - memcpy(rcdrom.buf[i].buf, buf, sizeof(rcdrom.buf[i].buf)); - rcdrom.buf[i].lba = lba; - } - slock_unlock(rcdrom.buf_lock); - retro_sleep(0); // why does the main thread stall without this? -} - -static int lbacache_get(unsigned int lba, void *buf) -{ - unsigned int i; - int ret = 0; - - i = lba % rcdrom.buf_cnt; - slock_lock(rcdrom.buf_lock); - if (lba == rcdrom.buf[i].lba) { - memcpy(buf, rcdrom.buf[i].buf, 2352); - ret = 1; - } - slock_unlock(rcdrom.buf_lock); - return ret; -} - -static void rcdrom_prefetch_thread(void *unused) -{ - unsigned int buf_cnt, lba, lba_to; - - slock_lock(rcdrom.buf_lock); - while (!rcdrom.thread_exit) - { -#ifdef __GNUC__ - __asm__ __volatile__("":::"memory"); // barrier -#endif - if (!rcdrom.do_prefetch) - scond_wait(rcdrom.cond, rcdrom.buf_lock); - if (!rcdrom.do_prefetch || !rcdrom.h || rcdrom.thread_exit) - continue; - - buf_cnt = rcdrom.buf_cnt; - lba = rcdrom.prefetch_lba; - lba_to = lba + buf_cnt; - if (lba_to > rcdrom.total_lba) - lba_to = rcdrom.total_lba; - for (; lba < lba_to; lba++) { - if (lba != rcdrom.buf[lba % buf_cnt].lba) - break; - } - if (lba == lba_to) { - // caching complete - rcdrom.do_prefetch = 0; - continue; - } - - slock_unlock(rcdrom.buf_lock); - lbacache_do(lba); - slock_lock(rcdrom.buf_lock); - } - slock_unlock(rcdrom.buf_lock); -} - -static void rcdrom_stop_thread(void) -{ - rcdrom.thread_exit = 1; - if (rcdrom.buf_lock) { - slock_lock(rcdrom.buf_lock); - rcdrom.do_prefetch = 0; - if (rcdrom.cond) - scond_signal(rcdrom.cond); - slock_unlock(rcdrom.buf_lock); - } - if (rcdrom.thread) { - sthread_join(rcdrom.thread); - rcdrom.thread = NULL; - } - if (rcdrom.cond) { scond_free(rcdrom.cond); rcdrom.cond = NULL; } - if (rcdrom.buf_lock) { slock_free(rcdrom.buf_lock); rcdrom.buf_lock = NULL; } - if (rcdrom.read_lock) { slock_free(rcdrom.read_lock); rcdrom.read_lock = NULL; } - free(rcdrom.buf); - rcdrom.buf = NULL; -} - -// the thread is optional, if anything fails we can do direct reads -static void rcdrom_start_thread(void) -{ - rcdrom_stop_thread(); - rcdrom.thread_exit = rcdrom.prefetch_lba = rcdrom.do_prefetch = 0; - if (rcdrom.buf_cnt == 0) - return; - rcdrom.buf = calloc(rcdrom.buf_cnt, sizeof(rcdrom.buf[0])); - rcdrom.buf_lock = slock_new(); - rcdrom.read_lock = slock_new(); - rcdrom.cond = scond_new(); - if (rcdrom.buf && rcdrom.buf_lock && rcdrom.read_lock && rcdrom.cond) { - rcdrom.thread = sthread_create(rcdrom_prefetch_thread, NULL); - rcdrom.buf[0].lba = ~0; - } - if (!rcdrom.thread) { - LogErr("cdrom precache thread init failed.\n"); - rcdrom_stop_thread(); - } -} - -static long CALLBACK rcdrom_open(void) -{ - const char *name = GetIsoFile(); - //printf("%s %s\n", __func__, name); - rcdrom.h = retro_vfs_file_open_impl(name, RETRO_VFS_FILE_ACCESS_READ, - RETRO_VFS_FILE_ACCESS_HINT_NONE); - if (rcdrom.h) { - int ret = cdrom_set_read_speed_x(rcdrom.h, 4); - if (ret) LogErr("CD speed set failed\n"); - const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc(); - const cdrom_track_t *last = &toc->track[toc->num_tracks - 1]; - unsigned int lba = cdrom_msf_to_lba(last->min, last->sec, last->frame) - 150; - rcdrom.total_lba = lba + last->track_size; - //cdrom_get_current_config_random_readable(rcdrom.h); - //cdrom_get_current_config_multiread(rcdrom.h); - //cdrom_get_current_config_cdread(rcdrom.h); - //cdrom_get_current_config_profiles(rcdrom.h); - rcdrom_start_thread(); - return 0; - } - LogErr("retro_vfs_file_open failed for '%s'\n", name); - return -1; -} - -static long CALLBACK rcdrom_close(void) -{ - //printf("%s\n", __func__); - if (rcdrom.h) { - rcdrom_stop_thread(); - retro_vfs_file_close_impl(rcdrom.h); - rcdrom.h = NULL; - } - return 0; -} - -static long CALLBACK rcdrom_getTN(unsigned char *tn) -{ - const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc(); - tn[0] = 1; - tn[1] = toc->num_tracks; - //printf("%s -> %d %d\n", __func__, tn[0], tn[1]); - return 0; -} - -static long CALLBACK rcdrom_getTD(unsigned char track, unsigned char *rt) -{ - const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc(); - rt[0] = 0, rt[1] = 2, rt[2] = 0; - if (track == 0) { - cdrom_lba_to_msf(rcdrom.total_lba + 150, &rt[2], &rt[1], &rt[0]); - } - else if (track <= toc->num_tracks) { - int i = track - 1; - rt[2] = toc->track[i].min; - rt[1] = toc->track[i].sec; - rt[0] = toc->track[i].frame; - } - //printf("%s %d -> %d:%02d:%02d\n", __func__, track, rt[2], rt[1], rt[0]); - return 0; -} - -static long CALLBACK rcdrom_prefetch(unsigned char m, unsigned char s, unsigned char f) -{ - unsigned int lba = cdrom_msf_to_lba(m, s, f) - 150; - if (rcdrom.cond && rcdrom.h) { - rcdrom.prefetch_lba = lba; - rcdrom.do_prefetch = 1; - scond_signal(rcdrom.cond); - } - if (rcdrom.buf) { - unsigned int c = rcdrom.buf_cnt; - if (c) - return rcdrom.buf[lba % c].lba == lba; - } - return 1; -} - -static int rcdrom_read_msf(unsigned char m, unsigned char s, unsigned char f, - void *buf, const char *func) -{ - unsigned int lba = cdrom_msf_to_lba(m, s, f) - 150; - int hit = 0, ret = -1; - if (rcdrom.buf_lock) - hit = lbacache_get(lba, buf); - if (!hit && rcdrom.read_lock) { - // maybe still prefetching - slock_lock(rcdrom.read_lock); - slock_unlock(rcdrom.read_lock); - hit = lbacache_get(lba, buf); - if (hit) - hit = 2; - } - if (!hit) { - slock_t *lock = rcdrom.read_lock; - rcdrom.do_prefetch = 0; - if (lock) - slock_lock(lock); - if (rcdrom.h) { - ret = cdrom_read_sector(rcdrom.h, lba, buf); - if (ret) - LogErr("cdrom_read_sector failed for lba %d\n", lba); - } - if (lock) - slock_unlock(lock); - } - else - ret = 0; - rcdrom.check_eject_delay = ret ? 0 : 100; - //printf("%s %d:%02d:%02d -> %d hit %d\n", func, m, s, f, ret, hit); - return ret; -} - -static boolean CALLBACK rcdrom_readTrack(unsigned char *time) -{ - unsigned char m = btoi(time[0]), s = btoi(time[1]), f = btoi(time[2]); - return !rcdrom_read_msf(m, s, f, ISOgetBuffer() - 12, __func__); -} - -static long CALLBACK rcdrom_readCDDA(unsigned char m, unsigned char s, unsigned char f, - unsigned char *buffer) -{ - return rcdrom_read_msf(m, s, f, buffer, __func__); -} - -static unsigned char * CALLBACK rcdrom_getBuffer(void) -{ - //printf("%s\n", __func__); - return ISOgetBuffer(); -} - -static unsigned char * CALLBACK rcdrom_getBufferSub(int sector) -{ - //printf("%s %d %d\n", __func__, sector, rcdrom_h->cdrom.last_frame_lba); - return NULL; -} - -static long CALLBACK rcdrom_getStatus(struct CdrStat *stat) -{ - const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc(); - //printf("%s %p\n", __func__, stat); - CDR__getStatus(stat); - stat->Type = toc->track[0].audio ? 2 : 1; - return 0; -} - -static void rcdrom_check_eject(void) -{ - bool media_inserted; - if (!rcdrom.h || rcdrom.do_prefetch || rcdrom.check_eject_delay-- > 0) - return; - rcdrom.check_eject_delay = 100; - media_inserted = cdrom_is_media_inserted(rcdrom.h); // 1-2ms - if (!media_inserted != disk_ejected) - disk_set_eject_state(!media_inserted); -} -#endif // HAVE_CDROM - #if defined(__QNX__) || defined(_WIN32) /* Blackberry QNX doesn't have strcasestr */ @@ -2147,18 +1840,7 @@ bool retro_load_game(const struct retro_game_info *info) } if (!strncmp(info->path, "cdrom:", 6)) { -#ifdef HAVE_CDROM - CDR_open = rcdrom_open; - CDR_close = rcdrom_close; - CDR_getTN = rcdrom_getTN; - CDR_getTD = rcdrom_getTD; - CDR_readTrack = rcdrom_readTrack; - CDR_getBuffer = rcdrom_getBuffer; - CDR_getBufferSub = rcdrom_getBufferSub; - CDR_getStatus = rcdrom_getStatus; - CDR_readCDDA = rcdrom_readCDDA; - CDR_prefetch = rcdrom_prefetch; -#elif !defined(USE_LIBRETRO_VFS) +#if !defined(HAVE_CDROM) && !defined(USE_LIBRETRO_VFS) ReleasePlugins(); LogErr("%s\n", "Physical CD-ROM support is not compiled in."); show_notification("Physical CD-ROM support is not compiled in.", 6000, 3); @@ -2233,7 +1915,7 @@ bool retro_load_game(const struct retro_game_info *info) LogErr("failed to reload cdr plugins\n"); return false; } - if (CDR_open() < 0) + if (cdra_open() < 0) { LogErr("failed to open cdr plugin\n"); return false; @@ -2567,23 +2249,12 @@ static void update_variables(bool in_flight) Config.TurboCD = false; } -#ifdef HAVE_CDROM +#if defined(HAVE_CDROM) || defined(USE_ASYNC_CDROM) var.value = NULL; - var.key = "pcsx_rearmed_phys_cd_readahead"; + var.key = "pcsx_rearmed_cd_readahead"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - long newval = strtol(var.value, NULL, 10); - bool changed = rcdrom.buf_cnt != newval; - if (rcdrom.h && changed) - rcdrom_stop_thread(); - rcdrom.buf_cnt = newval; - if (rcdrom.h && changed) { - rcdrom_start_thread(); - if (rcdrom.cond && rcdrom.prefetch_lba) { - rcdrom.do_prefetch = 1; - scond_signal(rcdrom.cond); - } - } + cdra_set_buf_count(strtol(var.value, NULL, 10)); } #endif @@ -3585,8 +3256,12 @@ void retro_run(void) vout_fb_dirty = 0; #ifdef HAVE_CDROM - if (CDR_open == rcdrom_open) - rcdrom_check_eject(); + int inserted; + if (cdra_check_eject(&inserted) > 0) { + bool media_inserted = inserted != 0; + if (!media_inserted != disk_ejected) + disk_set_eject_state(!media_inserted); + } #endif } diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 47a3ed4ef..86fe78344 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -180,19 +180,29 @@ struct retro_core_option_v2_definition option_defs_us[] = { "sync", }, #endif -#ifdef HAVE_CDROM +#if defined(HAVE_CDROM) || defined(USE_ASYNC_CDROM) #define V(x) { #x, NULL } { - "pcsx_rearmed_phys_cd_readahead", - "Physical CD read-ahead", + "pcsx_rearmed_cd_readahead", + "CD read-ahead", NULL, - "(Hardware CD-ROM only) Reads the specified amount of sectors ahead of time to try to avoid later stalls. 333000 will try to read the complete disk (requires an additional 750MB of RAM).", + "Reads the specified amount of sectors ahead of time to try to avoid later stalls. " +#ifdef HAVE_CDROM + "Affects both physical CD-ROM and CD images. " +#endif +#if !defined(_3DS) && !defined(VITA) + "333000 will try to read the complete disk (requires an additional 750MB of RAM)." +#endif + , NULL, "system", { V(0), V(1), V(2), V(3), V(4), V(5), V(6), V(7), V(8), V(9), V(10), V(11), V(12), V(13), V(14), V(15), - V(16), V(32), V(64), V(128), V(256), V(512), V(1024), V(333000), + V(16), V(32), V(64), V(128), V(256), V(512), V(1024), +#if !defined(_3DS) && !defined(VITA) + V(333000), +#endif { NULL, NULL}, }, "12", diff --git a/frontend/main.c b/frontend/main.c index 4c051e2c7..61dbf637a 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -25,6 +25,7 @@ #include "../libpcsxcore/cheat.h" #include "../libpcsxcore/sio.h" #include "../libpcsxcore/database.h" +#include "../libpcsxcore/cdrom-async.h" #include "../libpcsxcore/new_dynarec/new_dynarec.h" #include "../plugins/cdrcimg/cdrcimg.h" #include "../plugins/dfsound/spu_config.h" @@ -66,7 +67,7 @@ enum sched_action emu_action, emu_action_old; char hud_msg[64]; int hud_new_msg; -static void make_path(char *buf, size_t size, const char *dir, const char *fname) +static inline void make_path(char *buf, size_t size, const char *dir, const char *fname) { if (fname) snprintf(buf, size, ".%s%s", dir, fname); @@ -95,21 +96,7 @@ static int get_gameid_filename(char *buf, int size, const char *fmt, int i) { void set_cd_image(const char *fname) { - const char *ext = NULL; - - if (fname != NULL) - ext = strrchr(fname, '.'); - - if (ext && ( - strcasecmp(ext, ".z") == 0 || strcasecmp(ext, ".bz") == 0 || - strcasecmp(ext, ".znx") == 0 /*|| strcasecmp(ext, ".pbp") == 0*/)) { - SetIsoFile(NULL); - cdrcimg_set_fname(fname); - strcpy(Config.Cdr, "builtin_cdrcimg"); - } else { - SetIsoFile(fname); - strcpy(Config.Cdr, "builtin_cdr"); - } + SetIsoFile(fname); } static void set_default_paths(void) @@ -124,7 +111,6 @@ static void set_default_paths(void) strcpy(Config.PluginsDir, "plugins"); strcpy(Config.Gpu, "builtin_gpu"); strcpy(Config.Spu, "builtin_spu"); - strcpy(Config.Cdr, "builtin_cdr"); strcpy(Config.Pad1, "builtin_pad"); strcpy(Config.Pad2, "builtin_pad"); strcpy(Config.Net, "Disabled"); @@ -819,9 +805,6 @@ void SysReset() { // reset can run code, timing must be set pl_timing_prepare(Config.PsxType); - // hmh core forgets this - CDR_stop(); - EmuReset(); GPU_updateLace = real_lace; @@ -946,7 +929,7 @@ static int _OpenPlugins(void) { signal(SIGPIPE, SignalExit); #endif - ret = CDR_open(); + ret = cdra_open(); if (ret < 0) { SysMessage(_("Error opening CD-ROM plugin!")); return -1; } ret = SPU_open(); if (ret < 0) { SysMessage(_("Error opening SPU plugin!")); return -1; } @@ -960,64 +943,6 @@ static int _OpenPlugins(void) { ret = PAD2_open(&gpuDisp); if (ret < 0) { SysMessage(_("Error opening Controller 2 plugin!")); return -1; } - if (Config.UseNet && !NetOpened) { - netInfo info; - char path[MAXPATHLEN * 2]; - char dotdir[MAXPATHLEN]; - - MAKE_PATH(dotdir, "/.pcsx/plugins/", NULL); - - strcpy(info.EmuName, "PCSX"); - memcpy(info.CdromID, CdromId, 9); /* no \0 trailing character? */ - memcpy(info.CdromLabel, CdromLabel, 9); - info.CdromLabel[9] = '\0'; - info.psxMem = psxM; - info.GPU_showScreenPic = GPU_showScreenPic; - info.GPU_displayText = GPU_displayText; - info.GPU_showScreenPic = GPU_showScreenPic; - info.PAD_setSensitive = PAD1_setSensitive; - sprintf(path, "%s%s", Config.BiosDir, Config.Bios); - strcpy(info.BIOSpath, path); - strcpy(info.MCD1path, Config.Mcd1); - strcpy(info.MCD2path, Config.Mcd2); - sprintf(path, "%s%s", dotdir, Config.Gpu); - strcpy(info.GPUpath, path); - sprintf(path, "%s%s", dotdir, Config.Spu); - strcpy(info.SPUpath, path); - sprintf(path, "%s%s", dotdir, Config.Cdr); - strcpy(info.CDRpath, path); - NET_setInfo(&info); - - ret = NET_open(&gpuDisp); - if (ret < 0) { - if (ret == -2) { - // -2 is returned when something in the info - // changed and needs to be synced - char *ptr; - - PARSEPATH(Config.Bios, info.BIOSpath); - PARSEPATH(Config.Gpu, info.GPUpath); - PARSEPATH(Config.Spu, info.SPUpath); - PARSEPATH(Config.Cdr, info.CDRpath); - - strcpy(Config.Mcd1, info.MCD1path); - strcpy(Config.Mcd2, info.MCD2path); - return -2; - } else { - Config.UseNet = FALSE; - } - } else { - if (NET_queryPlayer() == 1) { - if (SendPcsxInfo() == -1) Config.UseNet = FALSE; - } else { - if (RecvPcsxInfo() == -1) Config.UseNet = FALSE; - } - } - NetOpened = TRUE; - } else if (Config.UseNet) { - NET_resume(); - } - return 0; } @@ -1040,32 +965,25 @@ void ClosePlugins() { signal(SIGPIPE, SIG_DFL); #endif - ret = CDR_close(); - if (ret < 0) { SysMessage(_("Error closing CD-ROM plugin!")); return; } + cdra_close(); ret = SPU_close(); - if (ret < 0) { SysMessage(_("Error closing SPU plugin!")); return; } + if (ret < 0) { SysMessage(_("Error closing SPU plugin!")); } ret = PAD1_close(); - if (ret < 0) { SysMessage(_("Error closing Controller 1 Plugin!")); return; } + if (ret < 0) { SysMessage(_("Error closing Controller 1 Plugin!")); } ret = PAD2_close(); - if (ret < 0) { SysMessage(_("Error closing Controller 2 plugin!")); return; } + if (ret < 0) { SysMessage(_("Error closing Controller 2 plugin!")); } // pcsx-rearmed: we handle gpu elsewhere //ret = GPU_close(); //if (ret < 0) { SysMessage(_("Error closing GPU plugin!")); return; } - - if (Config.UseNet) { - NET_pause(); - } } /* we hook statically linked plugins here */ static const char *builtin_plugins[] = { - "builtin_gpu", "builtin_spu", "builtin_cdr", "builtin_pad", - "builtin_cdrcimg", + "builtin_gpu", "builtin_spu", "builtin_pad", }; static const int builtin_plugin_ids[] = { - PLUGIN_GPU, PLUGIN_SPU, PLUGIN_CDR, PLUGIN_PAD, - PLUGIN_CDRCIMG, + PLUGIN_GPU, PLUGIN_SPU, PLUGIN_PAD, }; void *SysLoadLibrary(const char *lib) { diff --git a/frontend/menu.c b/frontend/menu.c index 15034a90f..275028c55 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -38,6 +38,7 @@ #include "libpicofe/plat.h" #include "../libpcsxcore/misc.h" #include "../libpcsxcore/cdrom.h" +#include "../libpcsxcore/cdrom-async.h" #include "../libpcsxcore/cdriso.h" #include "../libpcsxcore/cheat.h" #include "../libpcsxcore/ppf.h" @@ -2285,7 +2286,7 @@ static int swap_cd_image(void) menu_update_msg("failed to load cdr plugin"); return -1; } - if (CDR_open() < 0) { + if (cdra_open() < 0) { menu_update_msg("failed to open cdr plugin"); return -1; } @@ -2303,8 +2304,8 @@ static int swap_cd_multidisk(void) CdromId[0] = '\0'; CdromLabel[0] = '\0'; - CDR_close(); - if (CDR_open() < 0) { + cdra_close(); + if (cdra_open() < 0) { menu_update_msg("failed to open cdr plugin"); return -1; } @@ -2759,11 +2760,6 @@ void menu_prepare_emu(void) menu_sync_config(); psxCpu->ApplyConfig(); - // core doesn't care about Config.Cdda changes, - // so handle them manually here - if (Config.Cdda) - CDR_stop(); - if (cpu_clock > 0) plat_target_cpu_clock_set(cpu_clock); diff --git a/frontend/plugin.c b/frontend/plugin.c index b3ad3bd98..19143251d 100644 --- a/frontend/plugin.c +++ b/frontend/plugin.c @@ -20,29 +20,6 @@ #undef CALLBACK #define CALLBACK -/* CDR */ -struct CdrStat; -static long CALLBACK CDRinit(void) { return 0; } -static long CALLBACK CDRshutdown(void) { return 0; } -static long CALLBACK CDRopen(void) { return 0; } -static long CALLBACK CDRclose(void) { return 0; } -static long CALLBACK CDRgetTN(unsigned char *_) { return 0; } -static long CALLBACK CDRgetTD(unsigned char _, unsigned char *__) { return 0; } -static boolean CALLBACK CDRreadTrack(unsigned char *_) { return FALSE; } -static unsigned char * CALLBACK CDRgetBuffer(void) { return NULL; } -static unsigned char * CALLBACK CDRgetBufferSub(int sector) { return NULL; } -static long CALLBACK CDRconfigure(void) { return 0; } -static long CALLBACK CDRtest(void) { return 0; } -static void CALLBACK CDRabout(void) { return; } -static long CALLBACK CDRplay(unsigned char *_) { return 0; } -static long CALLBACK CDRstop(void) { return 0; } -static long CALLBACK CDRsetfilename(char *_) { return 0; } -static long CALLBACK CDRgetStatus(struct CdrStat *_) { return 0; } -static char * CALLBACK CDRgetDriveLetter(void) { return NULL; } -static long CALLBACK CDRreadCDDA(unsigned char _, unsigned char __, unsigned char ___, unsigned char *____) { return 0; } -static long CALLBACK CDRgetTE(unsigned char _, unsigned char *__, unsigned char *___, unsigned char *____) { return 0; } -static long CALLBACK CDRprefetch(unsigned char m, unsigned char s, unsigned char f) { return 1; } - /* GPU */ static void CALLBACK GPUdisplayText(char *_) { return; } @@ -134,7 +111,6 @@ extern void GPUrearmedCallbacks(const struct rearmed_cbs *cbs); #define DIRECT(id, name) \ { id, #name, name } -#define DIRECT_CDR(name) DIRECT(PLUGIN_CDR, name) #define DIRECT_SPU(name) DIRECT(PLUGIN_SPU, name) #define DIRECT_GPU(name) DIRECT(PLUGIN_GPU, name) #define DIRECT_PAD(name) DIRECT(PLUGIN_PAD, name) @@ -144,27 +120,6 @@ static const struct { const char *name; void *func; } plugin_funcs[] = { - /* CDR */ - DIRECT_CDR(CDRinit), - DIRECT_CDR(CDRshutdown), - DIRECT_CDR(CDRopen), - DIRECT_CDR(CDRclose), - DIRECT_CDR(CDRtest), - DIRECT_CDR(CDRgetTN), - DIRECT_CDR(CDRgetTD), - DIRECT_CDR(CDRreadTrack), - DIRECT_CDR(CDRgetBuffer), - DIRECT_CDR(CDRgetBufferSub), - DIRECT_CDR(CDRplay), - DIRECT_CDR(CDRstop), - DIRECT_CDR(CDRgetStatus), - DIRECT_CDR(CDRgetDriveLetter), - DIRECT_CDR(CDRconfigure), - DIRECT_CDR(CDRabout), - DIRECT_CDR(CDRsetfilename), - DIRECT_CDR(CDRreadCDDA), - DIRECT_CDR(CDRgetTE), - DIRECT_CDR(CDRprefetch), /* SPU */ DIRECT_SPU(SPUinit), DIRECT_SPU(SPUshutdown), @@ -230,9 +185,6 @@ void *plugin_link(enum builtint_plugins_e id, const char *sym) { int i; - if (id == PLUGIN_CDRCIMG) - return cdrcimg_get_sym(sym); - for (i = 0; i < ARRAY_SIZE(plugin_funcs); i++) { if (id != plugin_funcs[i].id) continue; diff --git a/frontend/plugin.h b/frontend/plugin.h index a96d6098d..996da5574 100644 --- a/frontend/plugin.h +++ b/frontend/plugin.h @@ -10,9 +10,7 @@ enum builtint_plugins_e { PLUGIN_GPU, PLUGIN_SPU, - PLUGIN_CDR, PLUGIN_PAD, - PLUGIN_CDRCIMG, }; void *plugin_link(enum builtint_plugins_e id, const char *sym); diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index 243c03621..ff20731d5 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -20,27 +20,32 @@ ***************************************************************************/ #include "psxcommon.h" -#include "plugins.h" #include "cdrom.h" #include "cdriso.h" #include "ppf.h" +#include +#include +#ifdef HAVE_CHD +#include +#endif + #ifdef _WIN32 #define strcasecmp _stricmp #else #include #include #include -#if P_HAVE_PTHREAD -#include -#include -#endif #endif -#include -#include -#ifdef HAVE_CHD -#include "libchdr/chd.h" +#ifdef USE_LIBRETRO_VFS +#include +#undef fseeko +#undef ftello +#undef rewind +#define ftello rftell +#define fseeko rfseek +#define rewind(f_) rfseek(f_, 0, SEEK_SET) #endif #define OFF_T_MSB ((off_t)1 << (sizeof(off_t) * 8 - 1)) @@ -58,7 +63,6 @@ static boolean subChanRaw = FALSE; static boolean multifile = FALSE; static unsigned char cdbuffer[CD_FRAMESIZE_RAW]; -static unsigned char subbuffer[SUB_FRAMESIZE]; static boolean cddaBigEndian = FALSE; /* Frame offset into CD image where pregap data would be found if it was there. @@ -93,16 +97,9 @@ static struct { #endif static int (*cdimg_read_func)(FILE *f, unsigned int base, void *dest, int sector); -static int (*cdimg_read_sub_func)(FILE *f, int sector); - -char* CALLBACK CDR__getDriveLetter(void); -long CALLBACK CDR__configure(void); -long CALLBACK CDR__test(void); -void CALLBACK CDR__about(void); -long CALLBACK CDR__setfilename(char *filename); -long CALLBACK CDR__prefetch(u8 m, u8 s, u8 f); +static int (*cdimg_read_sub_func)(FILE *f, int sector, void *dest); -static void DecodeRawSubData(void); +static void DecodeRawSubData(unsigned char *subbuffer); struct trackinfo { enum {DATA=1, CDDA} type; @@ -118,11 +115,13 @@ static int numtracks = 0; static struct trackinfo ti[MAXTRACKS]; // get a sector from a msf-array -static unsigned int msf2sec(char *msf) { +static unsigned int msf2sec(const void *msf_) { + const unsigned char *msf = msf_; return ((msf[0] * 60 + msf[1]) * 75) + msf[2]; } -static void sec2msf(unsigned int s, char *msf) { +static void sec2msf(unsigned int s, void *msf_) { + unsigned char *msf = msf_; msf[0] = s / 75 / 60; s = s - msf[0] * 75 * 60; msf[1] = s / 75; @@ -1071,7 +1070,7 @@ static int opensbifile(const char *isoname) { strcpy(sbiname + strlen(sbiname) - 4, disknum); } else - strcpy(sbiname + strlen(sbiname) - 4, ".sbi"); + strcpy(sbiname + strlen(sbiname) - 4, ".sbi"); } else { return -1; @@ -1087,6 +1086,8 @@ static int cdread_normal(FILE *f, unsigned int base, void *dest, int sector) int ret; if (!f) return -1; + if (!dest) + dest = cdbuffer; if (fseeko(f, base + sector * CD_FRAMESIZE_RAW, SEEK_SET)) goto fail_io; ret = fread(dest, 1, CD_FRAMESIZE_RAW, f); @@ -1106,6 +1107,8 @@ static int cdread_sub_mixed(FILE *f, unsigned int base, void *dest, int sector) if (!f) return -1; + if (!dest) + dest = cdbuffer; if (fseeko(f, base + sector * (CD_FRAMESIZE_RAW + SUB_FRAMESIZE), SEEK_SET)) goto fail_io; ret = fread(dest, 1, CD_FRAMESIZE_RAW, f); @@ -1118,16 +1121,16 @@ static int cdread_sub_mixed(FILE *f, unsigned int base, void *dest, int sector) return -1; } -static int cdread_sub_sub_mixed(FILE *f, int sector) +static int cdread_sub_sub_mixed(FILE *f, int sector, void *buffer) { if (!f) return -1; if (fseeko(f, sector * (CD_FRAMESIZE_RAW + SUB_FRAMESIZE) + CD_FRAMESIZE_RAW, SEEK_SET)) goto fail_io; - if (fread(subbuffer, 1, SUB_FRAMESIZE, f) != SUB_FRAMESIZE) + if (fread(buffer, 1, SUB_FRAMESIZE, f) != SUB_FRAMESIZE) goto fail_io; - return SUB_FRAMESIZE; + return 0; fail_io: SysPrintf("subchannel: file IO error %d, sector %u\n", errno, sector); @@ -1208,7 +1211,7 @@ static int cdread_compressed(FILE *f, unsigned int base, void *dest, int sector) if (fread(is_compressed ? compr_img->buff_compressed : compr_img->buff_raw[0], 1, size, cdHandle) != size) { - SysPrintf("read error for block %d at %x: ", block, start_byte); + SysPrintf("read error for block %d at %zx: ", block, start_byte); perror(NULL); return -1; } @@ -1231,7 +1234,7 @@ static int cdread_compressed(FILE *f, unsigned int base, void *dest, int sector) compr_img->current_block = block; finish: - if (dest != cdbuffer) // copy avoid HACK + if (dest != NULL) memcpy(dest, compr_img->buff_raw[compr_img->sector_in_blk], CD_FRAMESIZE_RAW); return CD_FRAMESIZE_RAW; @@ -1265,13 +1268,13 @@ static int cdread_chd(FILE *f, unsigned int base, void *dest, int sector) chd_img->current_hunk[chd_img->current_buffer] = hunk; } - if (dest != cdbuffer) // copy avoid HACK + if (dest != NULL) memcpy(dest, chd_get_sector(chd_img->current_buffer, chd_img->sector_in_hunk), CD_FRAMESIZE_RAW); return CD_FRAMESIZE_RAW; } -static int cdread_sub_chd(FILE *f, int sector) +static int cdread_sub_chd(FILE *f, int sector, void *buffer_ptr) { unsigned int sector_in_hunk; unsigned int buffer; @@ -1295,41 +1298,48 @@ static int cdread_sub_chd(FILE *f, int sector) chd_img->current_hunk[buffer] = hunk; } - memcpy(subbuffer, chd_get_sector(buffer, sector_in_hunk) + CD_FRAMESIZE_RAW, SUB_FRAMESIZE); - return SUB_FRAMESIZE; + memcpy(buffer_ptr, chd_get_sector(buffer, sector_in_hunk) + CD_FRAMESIZE_RAW, SUB_FRAMESIZE); + return 0; } #endif static int cdread_2048(FILE *f, unsigned int base, void *dest, int sector) { + unsigned char *dst = dest ? dest : cdbuffer; int ret; if (!f) return -1; + fseeko(f, base + sector * 2048, SEEK_SET); - ret = fread((char *)dest + 12 * 2, 1, 2048, f); + ret = fread(dst + 12 * 2, 1, 2048, f); // not really necessary, fake mode 2 header - memset(cdbuffer, 0, 12 * 2); - sec2msf(sector + 2 * 75, (char *)&cdbuffer[12]); - cdbuffer[12 + 3] = 1; + memset(dst, 0, 12 * 2); + sec2msf(sector + 2 * 75, dst + 12); + dst[12 + 0] = itob(dst[12 + 0]); + dst[12 + 1] = itob(dst[12 + 1]); + dst[12 + 2] = itob(dst[12 + 2]); + dst[12 + 3] = 1; return 12*2 + ret; } -static unsigned char * CALLBACK ISOgetBuffer_compr(void) { - return compr_img->buff_raw[compr_img->sector_in_blk] + 12; +static void * ISOgetBuffer_normal(void) { + return cdbuffer + 12; +} + +static void * ISOgetBuffer_compr(void) { + return compr_img->buff_raw[compr_img->sector_in_blk] + 12; } #ifdef HAVE_CHD -static unsigned char * CALLBACK ISOgetBuffer_chd(void) { - return chd_get_sector(chd_img->current_buffer, chd_img->sector_in_hunk) + 12; +static void * ISOgetBuffer_chd(void) { + return chd_get_sector(chd_img->current_buffer, chd_img->sector_in_hunk) + 12; } #endif -unsigned char * CALLBACK ISOgetBuffer(void) { - return cdbuffer + 12; -} +void * (*ISOgetBuffer)(void) = ISOgetBuffer_normal; static void PrintTracks(void) { int i; @@ -1345,7 +1355,8 @@ static void PrintTracks(void) { // This function is invoked by the front-end when opening an ISO // file for playback -static long CALLBACK ISOopen(void) { +int ISOopen(const char *fname) +{ boolean isMode1ISO = FALSE; char alt_bin_filename[MAXPATHLEN]; const char *bin_filename; @@ -1356,16 +1367,16 @@ static long CALLBACK ISOopen(void) { return 0; // it's already open } - cdHandle = fopen(GetIsoFile(), "rb"); + cdHandle = fopen(fname, "rb"); if (cdHandle == NULL) { SysPrintf(_("Could't open '%s' for reading: %s\n"), - GetIsoFile(), strerror(errno)); + fname, strerror(errno)); return -1; } size_main = get_size(cdHandle); snprintf(image_str, sizeof(image_str) - 6*4 - 1, - "Loaded CD Image: %s", GetIsoFile()); + "Loaded CD Image: %s", fname); cddaBigEndian = FALSE; subChanMixed = FALSE; @@ -1374,36 +1385,36 @@ static long CALLBACK ISOopen(void) { cdrIsoMultidiskCount = 1; multifile = 0; - CDR_getBuffer = ISOgetBuffer; + ISOgetBuffer = ISOgetBuffer_normal; cdimg_read_func = cdread_normal; cdimg_read_sub_func = NULL; - if (parsetoc(GetIsoFile()) == 0) { + if (parsetoc(fname) == 0) { strcat(image_str, "[+toc]"); } - else if (parseccd(GetIsoFile()) == 0) { + else if (parseccd(fname) == 0) { strcat(image_str, "[+ccd]"); } - else if (parsemds(GetIsoFile()) == 0) { + else if (parsemds(fname) == 0) { strcat(image_str, "[+mds]"); } - else if (parsecue(GetIsoFile()) == 0) { + else if (parsecue(fname) == 0) { strcat(image_str, "[+cue]"); } - if (handlepbp(GetIsoFile()) == 0) { + if (handlepbp(fname) == 0) { strcat(image_str, "[+pbp]"); - CDR_getBuffer = ISOgetBuffer_compr; + ISOgetBuffer = ISOgetBuffer_compr; cdimg_read_func = cdread_compressed; } - else if (handlecbin(GetIsoFile()) == 0) { + else if (handlecbin(fname) == 0) { strcat(image_str, "[+cbin]"); - CDR_getBuffer = ISOgetBuffer_compr; + ISOgetBuffer = ISOgetBuffer_compr; cdimg_read_func = cdread_compressed; } #ifdef HAVE_CHD - else if (handlechd(GetIsoFile()) == 0) { + else if (handlechd(fname) == 0) { strcat(image_str, "[+chd]"); - CDR_getBuffer = ISOgetBuffer_chd; + ISOgetBuffer = ISOgetBuffer_chd; cdimg_read_func = cdread_chd; cdimg_read_sub_func = cdread_sub_chd; fclose(cdHandle); @@ -1411,15 +1422,15 @@ static long CALLBACK ISOopen(void) { } #endif - if (!subChanMixed && opensubfile(GetIsoFile()) == 0) { + if (!subChanMixed && opensubfile(fname) == 0) { strcat(image_str, "[+sub]"); } - if (opensbifile(GetIsoFile()) == 0) { + if (opensbifile(fname) == 0) { strcat(image_str, "[+sbi]"); } // maybe user selected metadata file instead of main .bin .. - bin_filename = GetIsoFile(); + bin_filename = fname; if (cdHandle && size_main < 2352 * 0x10) { static const char *exts[] = { ".bin", ".BIN", ".img", ".IMG" }; FILE *tmpf = NULL; @@ -1473,7 +1484,8 @@ static long CALLBACK ISOopen(void) { return 0; } -static long CALLBACK ISOclose(void) { +int ISOclose(void) +{ int i; if (cdHandle != NULL) { @@ -1491,7 +1503,7 @@ static long CALLBACK ISOclose(void) { free(compr_img); compr_img = NULL; } - + #ifdef HAVE_CHD if (chd_img != NULL) { chd_close(chd_img->chd); @@ -1512,28 +1524,31 @@ static long CALLBACK ISOclose(void) { UnloadSBI(); memset(cdbuffer, 0, sizeof(cdbuffer)); - CDR_getBuffer = ISOgetBuffer; + ISOgetBuffer = ISOgetBuffer_normal; return 0; } -static long CALLBACK ISOinit(void) { +int ISOinit(void) +{ assert(cdHandle == NULL); assert(subHandle == NULL); + numtracks = 0; return 0; // do nothing } -static long CALLBACK ISOshutdown(void) { - ISOclose(); - return 0; +int ISOshutdown(void) +{ + return ISOclose(); } // return Starting and Ending Track // buffer: // byte 0 - start track // byte 1 - end track -static long CALLBACK ISOgetTN(unsigned char *buffer) { +int ISOgetTN(unsigned char *buffer) +{ buffer[0] = 1; if (numtracks > 0) { @@ -1548,23 +1563,18 @@ static long CALLBACK ISOgetTN(unsigned char *buffer) { // return Track Time // buffer: -// byte 0 - frame +// byte 0 - minute // byte 1 - second -// byte 2 - minute -static long CALLBACK ISOgetTD(unsigned char track, unsigned char *buffer) { +// byte 2 - frame +int ISOgetTD(int track, unsigned char *buffer) +{ if (track == 0) { unsigned int sect; - unsigned char time[3]; sect = msf2sec(ti[numtracks].start) + msf2sec(ti[numtracks].length); - sec2msf(sect, (char *)time); - buffer[2] = time[0]; - buffer[1] = time[1]; - buffer[0] = time[2]; + sec2msf(sect, buffer); } else if (numtracks > 0 && track <= numtracks) { - buffer[2] = ti[track].start[0]; - buffer[1] = ti[track].start[1]; - buffer[0] = ti[track].start[2]; + memcpy(buffer, ti[track].start, 3); } else { buffer[2] = 0; @@ -1576,7 +1586,7 @@ static long CALLBACK ISOgetTD(unsigned char track, unsigned char *buffer) { } // decode 'raw' subchannel data ripped by cdrdao -static void DecodeRawSubData(void) { +static void DecodeRawSubData(unsigned char *subbuffer) { unsigned char subQData[12]; int i; @@ -1592,64 +1602,68 @@ static void DecodeRawSubData(void) { } // read track -// time: byte 0 - minute; byte 1 - second; byte 2 - frame -// uses bcd format -static boolean CALLBACK ISOreadTrack(unsigned char *time) { - int sector = MSF2SECT(btoi(time[0]), btoi(time[1]), btoi(time[2])); +// time: byte 0 - minute; byte 1 - second; byte 2 - frame (non-bcd) +// buf: if NULL, data is kept in internal buffer accessible by ISOgetBuffer() +int ISOreadTrack(const unsigned char *time, void *buf) +{ + int sector = msf2sec(time); long ret; if (!cdHandle && !chd_img) - return 0; + return -1; + + if (numtracks > 1 && sector >= msf2sec(ti[2].start)) + return ISOreadCDDA(time, buf); + sector -= 2 * 75; if (pregapOffset && sector >= pregapOffset) sector -= 2 * 75; - ret = cdimg_read_func(cdHandle, 0, cdbuffer, sector); - if (ret < 12*2 + 2048) - return 0; - - return 1; -} + ret = cdimg_read_func(cdHandle, 0, buf, sector); + if (ret < 12*2 + 2048) { + if (multifile && sector >= msf2sec(ti[1].length)) { + // assume a gap not backed by a file + memset(buf, 0, CD_FRAMESIZE_RAW); + return 0; + } + return -1; + } -// plays cdda audio -// sector: byte 0 - minute; byte 1 - second; byte 2 - frame -// does NOT uses bcd format -static long CALLBACK ISOplay(unsigned char *time) { return 0; } -// stops cdda audio -static long CALLBACK ISOstop(void) { - return 0; -} +// read subchannel data +int ISOreadSub(const unsigned char *time, void *buffer) +{ + int ret, sector = MSF2SECT(time[0], time[1], time[2]); -// gets subchannel data -static unsigned char* CALLBACK ISOgetBufferSub(int sector) { if (pregapOffset && sector >= pregapOffset) { sector -= 2 * 75; if (sector < pregapOffset) // ? - return NULL; + return -1; } if (cdimg_read_sub_func != NULL) { - if (cdimg_read_sub_func(cdHandle, sector) != SUB_FRAMESIZE) - return NULL; + if ((ret = cdimg_read_sub_func(cdHandle, sector, buffer))) + return ret; } else if (subHandle != NULL) { if (fseeko(subHandle, sector * SUB_FRAMESIZE, SEEK_SET)) - return NULL; - if (fread(subbuffer, 1, SUB_FRAMESIZE, subHandle) != SUB_FRAMESIZE) - return NULL; + return -1; + if (fread(buffer, 1, SUB_FRAMESIZE, subHandle) != SUB_FRAMESIZE) + return -1; } else { - return NULL; + return -1; } - if (subChanRaw) DecodeRawSubData(); - return subbuffer; + if (subChanRaw) + DecodeRawSubData(buffer); + return 0; } -static long CALLBACK ISOgetStatus(struct CdrStat *stat) { +int ISOgetStatus(struct CdrStat *stat) +{ CDR__getStatus(stat); // BIOS - boot ID (CD type) @@ -1659,14 +1673,14 @@ static long CALLBACK ISOgetStatus(struct CdrStat *stat) { } // read CDDA sector into buffer -long CALLBACK ISOreadCDDA(unsigned char m, unsigned char s, unsigned char f, unsigned char *buffer) { - unsigned char msf[3] = {m, s, f}; +int ISOreadCDDA(const unsigned char *time, void *buffer) +{ unsigned int track, track_start = 0; FILE *handle = cdHandle; unsigned int cddaCurPos; - int ret; + int ret, ret_clear = -1; - cddaCurPos = msf2sec((char *)msf); + cddaCurPos = msf2sec(time); // find current track index for (track = numtracks; ; track--) { @@ -1679,8 +1693,8 @@ long CALLBACK ISOreadCDDA(unsigned char m, unsigned char s, unsigned char f, uns // data tracks play silent if (ti[track].type != CDDA) { - memset(buffer, 0, CD_FRAMESIZE_RAW); - return 0; + ret_clear = 0; + goto clear_return; } if (multifile) { @@ -1693,57 +1707,32 @@ long CALLBACK ISOreadCDDA(unsigned char m, unsigned char s, unsigned char f, uns } } } - if (!handle && !chd_img) { - memset(buffer, 0, CD_FRAMESIZE_RAW); - return -1; - } + if (!handle && !chd_img) + goto clear_return; ret = cdimg_read_func(handle, ti[track].start_offset, buffer, cddaCurPos - track_start); if (ret != CD_FRAMESIZE_RAW) { - memset(buffer, 0, CD_FRAMESIZE_RAW); - return -1; + if (multifile && cddaCurPos - track_start >= msf2sec(ti[track].length)) + ret_clear = 0; // gap + goto clear_return; } - if (cddaBigEndian) { + if (cddaBigEndian && buffer) { + unsigned char tmp, *buf = buffer; int i; - unsigned char tmp; for (i = 0; i < CD_FRAMESIZE_RAW / 2; i++) { - tmp = buffer[i * 2]; - buffer[i * 2] = buffer[i * 2 + 1]; - buffer[i * 2 + 1] = tmp; + tmp = buf[i * 2]; + buf[i * 2] = buf[i * 2 + 1]; + buf[i * 2 + 1] = tmp; } } return 0; -} -void cdrIsoInit(void) { - CDR_init = ISOinit; - CDR_shutdown = ISOshutdown; - CDR_open = ISOopen; - CDR_close = ISOclose; - CDR_getTN = ISOgetTN; - CDR_getTD = ISOgetTD; - CDR_readTrack = ISOreadTrack; - CDR_getBuffer = ISOgetBuffer; - CDR_play = ISOplay; - CDR_stop = ISOstop; - CDR_getBufferSub = ISOgetBufferSub; - CDR_getStatus = ISOgetStatus; - CDR_readCDDA = ISOreadCDDA; - - CDR_getDriveLetter = CDR__getDriveLetter; - CDR_configure = CDR__configure; - CDR_test = CDR__test; - CDR_about = CDR__about; - CDR_setfilename = CDR__setfilename; - CDR_prefetch = CDR__prefetch; - - numtracks = 0; -} - -int cdrIsoActive(void) { - return (cdHandle || chd_img); +clear_return: + if (buffer) + memset(buffer, 0, CD_FRAMESIZE_RAW); + return ret_clear; } diff --git a/libpcsxcore/cdriso.h b/libpcsxcore/cdriso.h index 079e0b8c3..ed79d60e7 100644 --- a/libpcsxcore/cdriso.h +++ b/libpcsxcore/cdriso.h @@ -25,9 +25,20 @@ extern "C" { #endif -void cdrIsoInit(void); -int cdrIsoActive(void); -unsigned char * CALLBACK ISOgetBuffer(void); +struct CdrStat; + +int ISOinit(void); +int ISOshutdown(void); +int ISOopen(const char *fname); +int ISOclose(void); +int ISOgetTN(unsigned char *buffer); +int ISOgetTD(int track, unsigned char *buffer); +int ISOreadTrack(const unsigned char *time, void *buf); +int ISOreadCDDA(const unsigned char *time, void *buffer); +int ISOreadSub(const unsigned char *time, void *buffer); +int ISOgetStatus(struct CdrStat *stat); + +extern void * (*ISOgetBuffer)(void); extern unsigned int cdrIsoMultidiskCount; extern unsigned int cdrIsoMultidiskSelect; diff --git a/libpcsxcore/cdrom-async.c b/libpcsxcore/cdrom-async.c new file mode 100644 index 000000000..026a34517 --- /dev/null +++ b/libpcsxcore/cdrom-async.c @@ -0,0 +1,566 @@ +/*************************************************************************** + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + ***************************************************************************/ + +#include +#include +#include "system.h" +#include "plugins.h" +#include "cdriso.h" +#include "cdrom.h" +#include "cdrom-async.h" + +#if 0 +#define acdrom_dbg printf +#else +#define acdrom_dbg(...) +#endif + +#ifdef HAVE_CDROM + +#include "vfs/vfs_implementation.h" +#include "vfs/vfs_implementation_cdrom.h" +#include "../frontend/libretro-cdrom.h" + +static libretro_vfs_implementation_file *g_cd_handle; + +static int rcdrom_open(const char *name, u32 *total_lba) +{ + g_cd_handle = retro_vfs_file_open_impl(name, RETRO_VFS_FILE_ACCESS_READ, + RETRO_VFS_FILE_ACCESS_HINT_NONE); + if (!g_cd_handle) { + SysPrintf("retro_vfs_file_open failed for '%s'\n", name); + return -1; + } + else { + int ret = cdrom_set_read_speed_x(g_cd_handle, 4); + if (ret) SysPrintf("CD speed set failed\n"); + const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc(); + const cdrom_track_t *last = &toc->track[toc->num_tracks - 1]; + unsigned int lba = MSF2SECT(last->min, last->sec, last->frame); + *total_lba = lba + last->track_size; + //cdrom_get_current_config_random_readable(acdrom.h); + //cdrom_get_current_config_multiread(acdrom.h); + //cdrom_get_current_config_cdread(acdrom.h); + //cdrom_get_current_config_profiles(acdrom.h); + return 0; + } +} + +static void rcdrom_close(void) +{ + if (g_cd_handle) { + retro_vfs_file_close_impl(g_cd_handle); + g_cd_handle = NULL; + } +} + +static int rcdrom_getTN(u8 *tn) +{ + const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc(); + if (toc) { + tn[0] = 1; + tn[1] = toc->num_tracks; + return 0; + } + return -1; +} + +static int rcdrom_getTD(u32 total_lba, u8 track, u8 *rt) +{ + const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc(); + rt[0] = 0, rt[1] = 2, rt[2] = 0; + if (track == 0) { + lba2msf(total_lba + 150, &rt[0], &rt[1], &rt[2]); + } + else if (track <= toc->num_tracks) { + int i = track - 1; + rt[0] = toc->track[i].min; + rt[1] = toc->track[i].sec; + rt[2] = toc->track[i].frame; + } + return 0; +} + +static int rcdrom_getStatus(struct CdrStat *stat) +{ + const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc(); + stat->Type = toc->track[0].audio ? 2 : 1; + return 0; +} + +#elif defined(USE_ASYNC_CDROM) + +#define g_cd_handle 0 + +static int rcdrom_open(const char *name, u32 *total_lba) { return -1; } +static void rcdrom_close(void) {} +static int rcdrom_getTN(u8 *tn) { return -1; } +static int rcdrom_getTD(u32 total_lba, u8 track, u8 *rt) { return -1; } +static int rcdrom_getStatus(struct CdrStat *stat) { return -1; } + +static int cdrom_read_sector(void *stream, unsigned int lba, void *b) { return -1; } +static int cdrom_is_media_inserted(void *stream) { return 0; } + +#endif + +#ifdef USE_ASYNC_CDROM + +#include "rthreads/rthreads.h" +#include "retro_timers.h" + +struct cached_buf { + u32 lba; + u8 buf[CD_FRAMESIZE_RAW]; + u8 buf_sub[SUB_FRAMESIZE]; +}; +static struct { + sthread_t *thread; + slock_t *read_lock; + slock_t *buf_lock; + scond_t *cond; + struct cached_buf *buf_cache; + u32 buf_cnt, thread_exit, do_prefetch, prefetch_failed, have_subchannel; + u32 total_lba, prefetch_lba; + int check_eject_delay; + u8 buf_local[CD_FRAMESIZE_RAW]; // single sector cache, not touched by the thread +} acdrom; + +static void lbacache_do(u32 lba) +{ + unsigned char msf[3], buf[CD_FRAMESIZE_RAW], buf_sub[SUB_FRAMESIZE]; + u32 i = lba % acdrom.buf_cnt; + int ret; + + lba2msf(lba + 150, &msf[0], &msf[1], &msf[2]); + slock_lock(acdrom.read_lock); + if (g_cd_handle) + ret = cdrom_read_sector(g_cd_handle, lba, buf); + else + ret = ISOreadTrack(msf, buf); + if (acdrom.have_subchannel) + ret |= ISOreadSub(msf, buf_sub); + + slock_lock(acdrom.buf_lock); + slock_unlock(acdrom.read_lock); + acdrom_dbg("c %d:%02d:%02d %2d m%d f%d\n", msf[0], msf[1], msf[2], ret, + buf[12+3], ((buf[12+4+2] >> 5) & 1) + 1); + if (ret) { + acdrom.do_prefetch = 0; + acdrom.prefetch_failed = 1; + slock_unlock(acdrom.buf_lock); + SysPrintf("prefetch: read failed for lba %d: %d\n", lba, ret); + return; + } + acdrom.prefetch_failed = 0; + acdrom.check_eject_delay = 100; + + if (lba != acdrom.buf_cache[i].lba) { + acdrom.buf_cache[i].lba = lba; + memcpy(acdrom.buf_cache[i].buf, buf, sizeof(acdrom.buf_cache[i].buf)); + if (acdrom.have_subchannel) + memcpy(acdrom.buf_cache[i].buf_sub, buf_sub, sizeof(buf_sub)); + } + slock_unlock(acdrom.buf_lock); + if (g_cd_handle) + retro_sleep(0); // why does the main thread stall without this? +} + +static int lbacache_get(unsigned int lba, void *buf, void *sub_buf) +{ + unsigned int i; + int ret = 0; + + i = lba % acdrom.buf_cnt; + slock_lock(acdrom.buf_lock); + if (lba == acdrom.buf_cache[i].lba) { + if (!buf) + buf = acdrom.buf_local; + memcpy(buf, acdrom.buf_cache[i].buf, CD_FRAMESIZE_RAW); + if (sub_buf) + memcpy(sub_buf, acdrom.buf_cache[i].buf_sub, SUB_FRAMESIZE); + ret = 1; + } + slock_unlock(acdrom.buf_lock); + return ret; +} + +// note: This has races on some vars but that's ok, main thread can deal +// with it. Only unsafe buffer accesses and simultaneous reads are prevented. +static void cdra_prefetch_thread(void *unused) +{ + u32 buf_cnt, lba, lba_to; + + slock_lock(acdrom.buf_lock); + while (!acdrom.thread_exit) + { +#ifdef __GNUC__ + __asm__ __volatile__("":::"memory"); // barrier +#endif + if (!acdrom.do_prefetch) + scond_wait(acdrom.cond, acdrom.buf_lock); + if (!acdrom.do_prefetch || acdrom.thread_exit) + continue; + + buf_cnt = acdrom.buf_cnt; + lba = acdrom.prefetch_lba; + lba_to = lba + buf_cnt; + if (lba_to > acdrom.total_lba) + lba_to = acdrom.total_lba; + for (; lba < lba_to; lba++) { + if (lba != acdrom.buf_cache[lba % buf_cnt].lba) + break; + } + if (lba == lba_to || lba >= acdrom.total_lba) { + // caching complete + acdrom.do_prefetch = 0; + continue; + } + + slock_unlock(acdrom.buf_lock); + lbacache_do(lba); + slock_lock(acdrom.buf_lock); + } + slock_unlock(acdrom.buf_lock); +} + +void cdra_stop_thread(void) +{ + acdrom.thread_exit = 1; + if (acdrom.buf_lock) { + slock_lock(acdrom.buf_lock); + acdrom.do_prefetch = 0; + if (acdrom.cond) + scond_signal(acdrom.cond); + slock_unlock(acdrom.buf_lock); + } + if (acdrom.thread) { + sthread_join(acdrom.thread); + acdrom.thread = NULL; + } + if (acdrom.cond) { scond_free(acdrom.cond); acdrom.cond = NULL; } + if (acdrom.buf_lock) { slock_free(acdrom.buf_lock); acdrom.buf_lock = NULL; } + if (acdrom.read_lock) { slock_free(acdrom.read_lock); acdrom.read_lock = NULL; } + free(acdrom.buf_cache); + acdrom.buf_cache = NULL; +} + +// the thread is optional, if anything fails we can do direct reads +static void cdra_start_thread(void) +{ + cdra_stop_thread(); + acdrom.thread_exit = acdrom.prefetch_lba = acdrom.do_prefetch = 0; + acdrom.prefetch_failed = 0; + if (acdrom.buf_cnt == 0) + return; + acdrom.buf_cache = calloc(acdrom.buf_cnt, sizeof(acdrom.buf_cache[0])); + acdrom.buf_lock = slock_new(); + acdrom.read_lock = slock_new(); + acdrom.cond = scond_new(); + if (acdrom.buf_cache && acdrom.buf_lock && acdrom.read_lock && acdrom.cond) + { + int i; + acdrom.thread = sthread_create(cdra_prefetch_thread, NULL); + for (i = 0; i < acdrom.buf_cnt; i++) + acdrom.buf_cache[i].lba = ~0; + } + if (acdrom.thread) { + SysPrintf("cdrom precache: %d buffers%s\n", + acdrom.buf_cnt, acdrom.have_subchannel ? " +sub" : ""); + } + else { + SysPrintf("cdrom precache thread init failed.\n"); + cdra_stop_thread(); + } +} + +int cdra_init(void) +{ + return ISOinit(); +} + +void cdra_shutdown(void) +{ + cdra_close(); +} + +int cdra_open(void) +{ + const char *name = GetIsoFile(); + u8 buf_sub[SUB_FRAMESIZE]; + int ret = -1, ret2; + + acdrom_dbg("%s %s\n", __func__, name); + acdrom.have_subchannel = 0; + if (!strncmp(name, "cdrom:", 6)) + ret = rcdrom_open(name, &acdrom.total_lba); + + // try ISO even if it's cdrom:// as it might work through libretro vfs + if (ret < 0) { + ret = ISOopen(name); + if (ret == 0) { + u8 msf[3]; + ISOgetTD(0, msf); + acdrom.total_lba = MSF2SECT(msf[0], msf[1], msf[2]); + msf[0] = 0; msf[1] = 2; msf[2] = 16; + ret2 = ISOreadSub(msf, buf_sub); + acdrom.have_subchannel = (ret2 == 0); + } + } + if (ret == 0) + cdra_start_thread(); + return ret; +} + +void cdra_close(void) +{ + acdrom_dbg("%s\n", __func__); + cdra_stop_thread(); + if (g_cd_handle) + rcdrom_close(); + else + ISOclose(); +} + +int cdra_getTN(unsigned char *tn) +{ + int ret; + if (g_cd_handle) + ret = rcdrom_getTN(tn); + else + ret = ISOgetTN(tn); + acdrom_dbg("%s -> %d %d\n", __func__, tn[0], tn[1]); + return ret; +} + +int cdra_getTD(int track, unsigned char *rt) +{ + int ret; + if (g_cd_handle) + ret = rcdrom_getTD(acdrom.total_lba, track, rt); + else + ret = ISOgetTD(track, rt); + //acdrom_dbg("%s %d -> %d:%02d:%02d\n", __func__, track, rt[2], rt[1], rt[0]); + return ret; +} + +int cdra_prefetch(unsigned char m, unsigned char s, unsigned char f) +{ + u32 lba = MSF2SECT(m, s, f); + int ret = 1; + if (acdrom.cond) { + acdrom.prefetch_lba = lba; + acdrom.do_prefetch = 1; + scond_signal(acdrom.cond); + } + if (acdrom.buf_cache && !acdrom.prefetch_failed) { + u32 c = acdrom.buf_cnt; + if (c) + ret = acdrom.buf_cache[lba % c].lba == lba; + acdrom_dbg("p %d:%02d:%02d %d\n", m, s, f, ret); + } + return ret; +} + +static int cdra_do_read(const unsigned char *time, int cdda, + void *buf, void *buf_sub) +{ + u32 lba = MSF2SECT(time[0], time[1], time[2]); + int hit = 0, ret = -1, read_locked = 0; + do + { + if (acdrom.buf_lock) { + hit = lbacache_get(lba, buf, buf_sub); + if (hit) + break; + } + if (acdrom.read_lock) { + // maybe still prefetching + slock_lock(acdrom.read_lock); + read_locked = 1; + hit = lbacache_get(lba, buf, buf_sub); + if (hit) { + hit = 2; + break; + } + } + acdrom.do_prefetch = 0; + if (!buf) + buf = acdrom.buf_local; + if (g_cd_handle) + ret = cdrom_read_sector(g_cd_handle, lba, buf); + else if (buf_sub) + ret = ISOreadSub(time, buf_sub); + else if (cdda) + ret = ISOreadCDDA(time, buf); + else + ret = ISOreadTrack(time, buf); + if (ret) + SysPrintf("cdrom read failed for lba %d: %d\n", lba, ret); + } + while (0); + if (read_locked) + slock_unlock(acdrom.read_lock); + if (hit) + ret = 0; + acdrom.check_eject_delay = ret ? 0 : 100; + acdrom_dbg("f%c %d:%02d:%02d %d%s\n", + buf_sub ? 's' : (cdda ? 'c' : 'd'), + time[0], time[1], time[2], hit, ret ? " ERR" : ""); + return ret; +} + +// time: msf in non-bcd format +int cdra_readTrack(const unsigned char *time) +{ + if (!acdrom.thread && !g_cd_handle) { + // just forward to ISOreadTrack to avoid extra copying + return ISOreadTrack(time, NULL); + } + return cdra_do_read(time, 0, NULL, NULL); +} + +int cdra_readCDDA(const unsigned char *time, void *buffer) +{ + return cdra_do_read(time, 1, buffer, NULL); +} + +int cdra_readSub(const unsigned char *time, void *buffer) +{ + if (!acdrom.thread && !g_cd_handle) + return ISOreadSub(time, buffer); + if (!acdrom.have_subchannel) + return -1; + acdrom_dbg("s %d:%02d:%02d\n", time[0], time[1], time[2]); + return cdra_do_read(time, 0, NULL, buffer); +} + +// pointer to cached buffer from last cdra_readTrack() call +void *cdra_getBuffer(void) +{ + //acdrom_dbg("%s\n", __func__); + if (!acdrom.thread && !g_cd_handle) + return ISOgetBuffer(); + return acdrom.buf_local + 12; +} + +int cdra_getStatus(struct CdrStat *stat) +{ + int ret; + CDR__getStatus(stat); + if (g_cd_handle) + ret = rcdrom_getStatus(stat); + else + ret = ISOgetStatus(stat); + return ret; +} + +int cdra_is_physical(void) +{ + return !!g_cd_handle; +} + +int cdra_check_eject(int *inserted) +{ + if (!g_cd_handle || acdrom.do_prefetch || acdrom.check_eject_delay-- > 0) + return 0; + acdrom.check_eject_delay = 100; + *inserted = cdrom_is_media_inserted(g_cd_handle); // 1-2ms + return 1; +} + +void cdra_set_buf_count(int newcount) +{ + if (acdrom.buf_cnt == newcount) + return; + cdra_stop_thread(); + acdrom.buf_cnt = newcount; + cdra_start_thread(); +} + +#else + +// phys. CD-ROM without a cache is unusable so not implemented +#ifdef HAVE_CDROM +#error "HAVE_CDROM requires USE_ASYNC_CDROM" +#endif + +// just forward to cdriso +int cdra_init(void) +{ + return ISOinit(); +} + +void cdra_shutdown(void) +{ + ISOshutdown(); +} + +int cdra_open(void) +{ + return ISOopen(GetIsoFile()); +} + +void cdra_close(void) +{ + ISOclose(); +} + +int cdra_getTN(unsigned char *tn) +{ + return ISOgetTN(tn); +} + +int cdra_getTD(int track, unsigned char *rt) +{ + return ISOgetTD(track, rt); +} + +int cdra_prefetch(unsigned char m, unsigned char s, unsigned char f) +{ + return 1; // always hit +} + +// time: msf in non-bcd format +int cdra_readTrack(const unsigned char *time) +{ + return ISOreadTrack(time, NULL); +} + +int cdra_readCDDA(const unsigned char *time, void *buffer) +{ + return ISOreadCDDA(time, buffer); +} + +int cdra_readSub(const unsigned char *time, void *buffer) +{ + return ISOreadSub(time, buffer); +} + +// pointer to cached buffer from last cdra_readTrack() call +void *cdra_getBuffer(void) +{ + return ISOgetBuffer(); +} + +int cdra_getStatus(struct CdrStat *stat) +{ + return ISOgetStatus(stat); +} + +int cdra_is_physical(void) { return 0; } +int cdra_check_eject(int *inserted) { return 0; } +void cdra_stop_thread(void) {} +void cdra_set_buf_count(int newcount) {} + +#endif + +// vim:sw=3:ts=3:expandtab diff --git a/libpcsxcore/cdrom-async.h b/libpcsxcore/cdrom-async.h new file mode 100644 index 000000000..02fe6b717 --- /dev/null +++ b/libpcsxcore/cdrom-async.h @@ -0,0 +1,29 @@ + +#ifdef __cplusplus +extern "C" { +#endif + +struct CdrStat; + +int cdra_init(void); +void cdra_shutdown(void); +int cdra_open(void); +void cdra_close(void); +int cdra_getTN(unsigned char *tn); +int cdra_getTD(int track, unsigned char *rt); +int cdra_getStatus(struct CdrStat *stat); +int cdra_readTrack(const unsigned char *time); +int cdra_readCDDA(const unsigned char *time, void *buffer); +int cdra_readSub(const unsigned char *time, void *buffer); +int cdra_prefetch(unsigned char m, unsigned char s, unsigned char f); + +int cdra_is_physical(void); +int cdra_check_eject(int *inserted); +void cdra_stop_thread(void); +void cdra_set_buf_count(int count); + +void *cdra_getBuffer(void); + +#ifdef __cplusplus +} +#endif diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 335c2dc58..516ca8ed8 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -23,6 +23,7 @@ #include #include "cdrom.h" +#include "cdrom-async.h" #include "misc.h" #include "ppf.h" #include "psxdma.h" @@ -125,6 +126,18 @@ static struct { } cdr; static s16 read_buf[CD_FRAMESIZE_RAW/2]; +struct SubQ { + char res0[12]; + unsigned char ControlAndADR; + unsigned char TrackNumber; + unsigned char IndexNumber; + unsigned char TrackRelativeAddress[3]; + unsigned char Filler; + unsigned char AbsoluteAddress[3]; + unsigned char CRC[2]; + char res1[72]; +}; + /* CD-ROM magic numbers */ #define CdlSync 0 /* nocash documentation : "Uh, actually, returns error code 40h = Invalid Command...?" */ #define CdlNop 1 @@ -238,19 +251,6 @@ static unsigned int msf2sec(const u8 *msf) { return ((msf[0] * 60 + msf[1]) * 75) + msf[2]; } -// for that weird psemu API.. -static unsigned int fsm2sec(const u8 *msf) { - return ((msf[2] * 60 + msf[1]) * 75) + msf[0]; -} - -static void sec2msf(unsigned int s, u8 *msf) { - msf[0] = s / 75 / 60; - s = s - msf[0] * 75 * 60; - msf[1] = s / 75; - s = s - msf[1] * 75; - msf[2] = s; -} - // cdrPlayReadInterrupt #define CDRPLAYREAD_INT(eCycle, isFirst) { \ u32 e_ = eCycle; \ @@ -269,7 +269,6 @@ static void sec2msf(unsigned int s, u8 *msf) { } #define StopCdda() { \ - if (cdr.Play && !Config.Cdda) CDR_stop(); \ cdr.Play = FALSE; \ cdr.FastForward = 0; \ cdr.FastBackward = 0; \ @@ -327,7 +326,7 @@ void cdrLidSeekInterrupt(void) //StopReading(); SetPlaySeekRead(cdr.StatP, 0); - if (CDR_getStatus(&cdr_stat) == -1) + if (cdra_getStatus(&cdr_stat) == -1) return; if (cdr_stat.Status & STATUS_SHELLOPEN) @@ -339,7 +338,7 @@ void cdrLidSeekInterrupt(void) break; case DRIVESTATE_LID_OPEN: - if (CDR_getStatus(&cdr_stat) == -1) + if (cdra_getStatus(&cdr_stat) != 0) cdr_stat.Status &= ~STATUS_SHELLOPEN; // 02, 12, 10 @@ -412,8 +411,8 @@ static void Find_CurTrack(const u8 *time) current = msf2sec(time); for (cdr.CurTrack = 1; cdr.CurTrack < cdr.ResultTN[1]; cdr.CurTrack++) { - CDR_getTD(cdr.CurTrack + 1, cdr.ResultTD); - sect = fsm2sec(cdr.ResultTD); + cdra_getTD(cdr.CurTrack + 1, cdr.ResultTD); + sect = msf2sec(cdr.ResultTD); if (sect - current >= 150) break; } @@ -425,22 +424,20 @@ static void generate_subq(const u8 *time) unsigned int this_s, start_s, next_s, pregap; int relative_s; - CDR_getTD(cdr.CurTrack, start); + cdra_getTD(cdr.CurTrack, start); if (cdr.CurTrack + 1 <= cdr.ResultTN[1]) { pregap = 150; - CDR_getTD(cdr.CurTrack + 1, next); + cdra_getTD(cdr.CurTrack + 1, next); } else { // last track - cd size pregap = 0; - next[0] = cdr.SetSectorEnd[2]; - next[1] = cdr.SetSectorEnd[1]; - next[2] = cdr.SetSectorEnd[0]; + memcpy(next, cdr.SetSectorEnd, 3); } this_s = msf2sec(time); - start_s = fsm2sec(start); - next_s = fsm2sec(next); + start_s = msf2sec(start); + next_s = msf2sec(next); cdr.TrackChanged = FALSE; @@ -457,7 +454,8 @@ static void generate_subq(const u8 *time) cdr.subq.Index = 0; relative_s = -relative_s; } - sec2msf(relative_s, cdr.subq.Relative); + lba2msf(relative_s, &cdr.subq.Relative[0], + &cdr.subq.Relative[1], &cdr.subq.Relative[2]); cdr.subq.Track = itob(cdr.CurTrack); cdr.subq.Relative[0] = itob(cdr.subq.Relative[0]); @@ -470,41 +468,37 @@ static void generate_subq(const u8 *time) static int ReadTrack(const u8 *time) { - unsigned char tmp[3]; - int read_ok; + int ret; - tmp[0] = itob(time[0]); - tmp[1] = itob(time[1]); - tmp[2] = itob(time[2]); + CDR_LOG("ReadTrack *** %02d:%02d:%02d\n", tmp[0], tmp[1], tmp[2]); - CDR_LOG("ReadTrack *** %02x:%02x:%02x\n", tmp[0], tmp[1], tmp[2]); - - if (memcmp(cdr.Prev, tmp, 3) == 0) + if (memcmp(cdr.Prev, time, 3) == 0) return 1; - read_ok = CDR_readTrack(tmp); - if (read_ok) - memcpy(cdr.Prev, tmp, 3); - return read_ok; + ret = cdra_readTrack(time); + if (ret != 0) + memcpy(cdr.Prev, time, 3); + return ret == 0; } static void UpdateSubq(const u8 *time) { - const struct SubQ *subq; - int s = MSF2SECT(time[0], time[1], time[2]); + int ret = -1, s = MSF2SECT(time[0], time[1], time[2]); + struct SubQ subq; u16 crc; if (CheckSBI(s)) return; - subq = (struct SubQ *)CDR_getBufferSub(s); - if (subq != NULL && cdr.CurTrack == 1) { - crc = calcCrc((u8 *)subq + 12, 10); - if (crc == (((u16)subq->CRC[0] << 8) | subq->CRC[1])) { - cdr.subq.Track = subq->TrackNumber; - cdr.subq.Index = subq->IndexNumber; - memcpy(cdr.subq.Relative, subq->TrackRelativeAddress, 3); - memcpy(cdr.subq.Absolute, subq->AbsoluteAddress, 3); + if (cdr.CurTrack == 1) + ret = cdra_readSub(time, &subq); + if (ret == 0) { + crc = calcCrc((u8 *)&subq + 12, 10); + if (crc == (((u16)subq.CRC[0] << 8) | subq.CRC[1])) { + cdr.subq.Track = subq.TrackNumber; + cdr.subq.Index = subq.IndexNumber; + memcpy(cdr.subq.Relative, subq.TrackRelativeAddress, 3); + memcpy(cdr.subq.Absolute, subq.AbsoluteAddress, 3); } else { CDR_LOG_I("subq bad crc @%02d:%02d:%02d\n", @@ -669,7 +663,7 @@ static int msfiEq(const u8 *a, const u8 *b) void cdrPlayReadInterrupt(void) { - int hit = CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); + int hit = cdra_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); if (!hit && cdr.PhysCdPropagations++ < 222) { // this propagates real cdrom delays to the emulated game CDRPLAYREAD_INT(cdReadTime / 2, 0); @@ -699,7 +693,7 @@ void cdrPlayReadInterrupt(void) cdr.DriveState = DRIVESTATE_PAUSED; } else { - CDR_readCDDA(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], (u8 *)read_buf); + cdra_readCDDA(cdr.SetSectorPlay, read_buf); } if (!cdr.IrqStat && (cdr.Mode & (MODE_AUTOPAUSE|MODE_REPORT))) @@ -711,7 +705,7 @@ void cdrPlayReadInterrupt(void) } msfiAdd(cdr.SetSectorPlay, 1); - CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); + cdra_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); // update for CdlGetlocP/autopause generate_subq(cdr.SetSectorPlay); @@ -721,7 +715,7 @@ void cdrPlayReadInterrupt(void) static void softReset(void) { - CDR_getStatus(&cdr_stat); + cdra_getStatus(&cdr_stat); if (cdr_stat.Status & STATUS_SHELLOPEN) { cdr.DriveState = DRIVESTATE_LID_OPEN; cdr.StatP = STATUS_SHELLOPEN; @@ -753,7 +747,6 @@ void cdrInterrupt(void) { u32 second_resp_time = 0; const void *buf; u8 ParamC; - u8 set_loc[3]; int read_ok; u16 not_ready = 0; u8 IrqStat = Acknowledge; @@ -768,7 +761,7 @@ void cdrInterrupt(void) { } if (cdr.Irq1Pending) { // hand out the "newest" sector, according to nocash - cdrUpdateTransferBuf(CDR_getBuffer()); + cdrUpdateTransferBuf(cdra_getBuffer()); CDR_LOG_I("%x:%02x:%02x loaded on ack, cmd=%02x res=%02x\n", cdr.Transfer[0], cdr.Transfer[1], cdr.Transfer[2], cdr.CmdInProgress, cdr.Irq1Pending); @@ -833,6 +826,7 @@ void cdrInterrupt(void) { } else { + u8 set_loc[3]; for (i = 0; i < 3; i++) set_loc[i] = btoi(cdr.Param[i]); if ((msfiEq(cdr.SetSector, set_loc)) //|| msfiEq(cdr.Param, cdr.Transfer)) @@ -866,11 +860,9 @@ void cdrInterrupt(void) { CDR_LOG("PLAY track %d\n", cdr.CurTrack); - if (CDR_getTD((u8)cdr.CurTrack, cdr.ResultTD) != -1) { - for (i = 0; i < 3; i++) - set_loc[i] = cdr.ResultTD[2 - i]; - seekTime = cdrSeekTime(set_loc); - memcpy(cdr.SetSectorPlay, set_loc, 3); + if (cdra_getTD(cdr.CurTrack, cdr.ResultTD) != -1) { + seekTime = cdrSeekTime(cdr.ResultTD); + memcpy(cdr.SetSectorPlay, cdr.ResultTD, 3); } } else if (cdr.SetlocPending) { @@ -903,9 +895,6 @@ void cdrInterrupt(void) { cdr.ReportDelay = 60; cdr.sectorsRead = 0; - if (!Config.Cdda) - CDR_play(cdr.SetSectorPlay); - SetPlaySeekRead(cdr.StatP, STATUS_SEEK | STATUS_ROTATING); // BIOS player - set flag again @@ -950,11 +939,8 @@ void cdrInterrupt(void) { case CdlStop: if (cdr.Play) { // grab time for current track - CDR_getTD((u8)(cdr.CurTrack), cdr.ResultTD); - - cdr.SetSectorPlay[0] = cdr.ResultTD[2]; - cdr.SetSectorPlay[1] = cdr.ResultTD[1]; - cdr.SetSectorPlay[2] = cdr.ResultTD[0]; + cdra_getTD(cdr.CurTrack, cdr.ResultTD); + memcpy(cdr.SetSectorPlay, cdr.ResultTD, 3); } StopCdda(); @@ -1097,7 +1083,7 @@ void cdrInterrupt(void) { break; case CdlGetTN: - if (CDR_getTN(cdr.ResultTN) == -1) { + if (cdra_getTN(cdr.ResultTN) != 0) { assert(0); } SetResultSize_(3); @@ -1107,15 +1093,15 @@ void cdrInterrupt(void) { case CdlGetTD: cdr.Track = btoi(cdr.Param[0]); - if (CDR_getTD(cdr.Track, cdr.ResultTD) == -1) { + if (cdra_getTD(cdr.Track, cdr.ResultTD) != 0) { error = ERROR_BAD_ARGVAL; goto set_error; } SetResultSize_(3); - cdr.Result[1] = itob(cdr.ResultTD[2]); + cdr.Result[1] = itob(cdr.ResultTD[0]); cdr.Result[2] = itob(cdr.ResultTD[1]); // no sector number - //cdr.Result[3] = itob(cdr.ResultTD[0]); + //cdr.Result[3] = itob(cdr.ResultTD[2]); break; case CdlSeekL: @@ -1128,7 +1114,7 @@ void cdrInterrupt(void) { seekTime = cdrSeekTime(cdr.SetSector); memcpy(cdr.SetSectorPlay, cdr.SetSector, 4); cdr.DriveState = DRIVESTATE_SEEK; - CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], + cdra_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); /* Crusaders of Might and Magic = 0.5x-4x @@ -1155,7 +1141,7 @@ void cdrInterrupt(void) { Find_CurTrack(cdr.SetSectorPlay); read_ok = ReadTrack(cdr.SetSectorPlay); - if (read_ok && (buf = CDR_getBuffer())) + if (read_ok && (buf = cdra_getBuffer())) memcpy(cdr.LocL, buf, 8); UpdateSubq(cdr.SetSectorPlay); cdr.DriveState = DRIVESTATE_STANDBY; @@ -1193,7 +1179,7 @@ void cdrInterrupt(void) { cdr.Result[3] = 0; // 0x10 - audio | 0x40 - disk missing | 0x80 - unlicensed - if (CDR_getStatus(&cdr_stat) == -1 || cdr_stat.Type == 0 || cdr_stat.Type == 0xff) { + if (cdra_getStatus(&cdr_stat) != 0 || cdr_stat.Type == 0 || cdr_stat.Type == 0xff) { cdr.Result[1] = 0xc0; } else { @@ -1267,7 +1253,7 @@ void cdrInterrupt(void) { cdr.SubqForwardSectors = 1; cdr.sectorsRead = 0; cdr.DriveState = DRIVESTATE_SEEK; - CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], + cdra_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); cycles = (cdr.Mode & MODE_SPEED) ? cdReadTime : cdReadTime * 2; @@ -1385,7 +1371,7 @@ static void cdrReadInterrupt(void) read_ok = ReadTrack(cdr.SetSectorPlay); if (read_ok) - buf = CDR_getBuffer(); + buf = cdra_getBuffer(); if (buf == NULL) read_ok = 0; @@ -1448,7 +1434,7 @@ static void cdrReadInterrupt(void) cdrReadInterruptSetResult(cdr.StatP); msfiAdd(cdr.SetSectorPlay, 1); - CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); + cdra_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); CDRPLAYREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime, 0); } @@ -1750,13 +1736,8 @@ void cdrDmaInterrupt(void) static void getCdInfo(void) { - u8 tmp; - - CDR_getTN(cdr.ResultTN); - CDR_getTD(0, cdr.SetSectorEnd); - tmp = cdr.SetSectorEnd[0]; - cdr.SetSectorEnd[0] = cdr.SetSectorEnd[2]; - cdr.SetSectorEnd[2] = tmp; + cdra_getTN(cdr.ResultTN); + cdra_getTD(0, cdr.SetSectorEnd); } void cdrReset() { @@ -1781,10 +1762,7 @@ int cdrFreeze(void *f, int Mode) { u32 tmp; u8 tmpp[3]; - if (Mode == 0 && !Config.Cdda) - CDR_stop(); - - cdr.freeze_ver = 0x63647202; + cdr.freeze_ver = 0x63647203; gzfreeze(&cdr, sizeof(cdr)); if (Mode == 1) { @@ -1804,9 +1782,12 @@ int cdrFreeze(void *f, int Mode) { cdr.SubqForwardSectors = SUBQ_FORWARD_SECTORS; // read right sub data - tmpp[0] = btoi(cdr.Prev[0]); - tmpp[1] = btoi(cdr.Prev[1]); - tmpp[2] = btoi(cdr.Prev[2]); + memcpy(tmpp, cdr.Prev, sizeof(tmpp)); + if (cdr.freeze_ver < 0x63647203) { + tmpp[0] = btoi(tmpp[0]); + tmpp[1] = btoi(tmpp[1]); + tmpp[2] = btoi(tmpp[2]); + } cdr.Prev[0]++; ReadTrack(tmpp); @@ -1815,8 +1796,6 @@ int cdrFreeze(void *f, int Mode) { memcpy(cdr.SetSectorPlay, cdr.SetSector, 3); Find_CurTrack(cdr.SetSectorPlay); - if (!Config.Cdda) - CDR_play(cdr.SetSectorPlay); } if (!cdr.Muted) ll = cdr.AttenuatorLeftToLeft, lr = cdr.AttenuatorLeftToLeft, diff --git a/libpcsxcore/cdrom.h b/libpcsxcore/cdrom.h index ee0b4d4bb..b8682b034 100644 --- a/libpcsxcore/cdrom.h +++ b/libpcsxcore/cdrom.h @@ -38,13 +38,21 @@ extern "C" { #define MIN_VALUE(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a < _b ? _a : _b; }) #define MAX_VALUE(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a > _b ? _a : _b; }) -#define MSF2SECT(m, s, f) (((m) * 60 + (s) - 2) * 75 + (f)) - #define CD_FRAMESIZE_RAW 2352 #define DATA_SIZE (CD_FRAMESIZE_RAW - 12) #define SUB_FRAMESIZE 96 +#define MSF2SECT(m, s, f) (((m) * 60 + (s) - 2) * 75 + (f)) + +static inline void lba2msf(unsigned int lba, u8 *m, u8 *s, u8 *f) { + *m = lba / 75 / 60; + lba = lba - *m * 75 * 60; + *s = lba / 75; + lba = lba - *s * 75; + *f = lba; +} + void cdrReset(); void cdrInterrupt(void); diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index 2e382e3c1..c05b80cd9 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -1,6 +1,7 @@ #include "misc.h" #include "sio.h" #include "ppf.h" +#include "cdrom-async.h" #include "new_dynarec/new_dynarec.h" #include "lightrec/plugin.h" @@ -274,7 +275,9 @@ static const u16 libcrypt_sectors[16] = { int check_unsatisfied_libcrypt(void) { const char *p = CdromId + 4; + u8 buf_sub[SUB_FRAMESIZE]; u16 id, key = 0; + u8 msf[3]; size_t i; if (strncmp(CdromId, "SCE", 3) && strncmp(CdromId, "SLE", 3)) @@ -289,7 +292,8 @@ int check_unsatisfied_libcrypt(void) return 0; // detected a protected game - if (!CDR_getBufferSub(libcrypt_sectors[0]) && !sbi_sectors) { + lba2msf(libcrypt_sectors[0] + 150, &msf[0], &msf[1], &msf[2]); + if (!sbi_sectors && cdra_readSub(msf, buf_sub) != 0) { SysPrintf("==================================================\n"); SysPrintf("LibCrypt game detected with missing SBI/subchannel\n"); SysPrintf("==================================================\n"); diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index d4c886f4f..68982abda 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -26,6 +26,7 @@ #include #include "misc.h" #include "cdrom.h" +#include "cdrom-async.h" #include "mdec.h" #include "gpu.h" #include "ppf.h" @@ -71,17 +72,12 @@ static void mmssdd( char *b, char *p ) s = block / 75; // seconds d = block - s * 75; // seconds rest - m = ((m / 10) << 4) | m % 10; - s = ((s / 10) << 4) | s % 10; - d = ((d / 10) << 4) | d % 10; - p[0] = m; p[1] = s; p[2] = d; } #define incTime() \ - time[0] = btoi(time[0]); time[1] = btoi(time[1]); time[2] = btoi(time[2]); \ time[2]++; \ if(time[2] == 75) { \ time[2] = 0; \ @@ -91,11 +87,10 @@ static void mmssdd( char *b, char *p ) time[0]++; \ } \ } \ - time[0] = itob(time[0]); time[1] = itob(time[1]); time[2] = itob(time[2]); #define READTRACK() \ - if (!CDR_readTrack(time)) return -1; \ - buf = (void *)CDR_getBuffer(); \ + if (cdra_readTrack(time)) return -1; \ + buf = cdra_getBuffer(); \ if (buf == NULL) return -1; \ else CheckPPFCache((u8 *)buf, time[0], time[1], time[2]); @@ -216,7 +211,7 @@ int LoadCdrom() { return 0; } - time[0] = itob(0); time[1] = itob(2); time[2] = itob(0x10); + time[0] = 0; time[1] = 2; time[2] = 0x10; READTRACK(); @@ -320,7 +315,7 @@ int LoadCdromFile(const char *filename, EXE_HEADER *head, u8 *time_bcd_out) { p1++; snprintf(exename, sizeof(exename), "%s", p1); - time[0] = itob(0); time[1] = itob(2); time[2] = itob(0x10); + time[0] = 0; time[1] = 2; time[2] = 0x10; READTRACK(); @@ -374,14 +369,14 @@ int CheckCdrom() { memset(CdromId, 0, sizeof(CdromId)); memset(exename, 0, sizeof(exename)); - time[0] = itob(0); - time[1] = itob(2); - time[2] = itob(0x10); + time[0] = 0; + time[1] = 2; + time[2] = 0x10; if (!Config.HLE && Config.SlowBoot) { // boot to BIOS in case of CDDA or lid is open - CDR_getStatus(&stat); - if ((stat.Status & 0x10) || stat.Type == 2 || !CDR_readTrack(time)) + cdra_getStatus(&stat); + if ((stat.Status & 0x10) || stat.Type == 2 || cdra_readTrack(time)) return 0; } READTRACK(); @@ -967,7 +962,7 @@ static unsigned short crctab[256] = { 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0 }; -u16 calcCrc(u8 *d, int len) { +u16 calcCrc(const u8 *d, int len) { u16 crc = 0; int i; diff --git a/libpcsxcore/misc.h b/libpcsxcore/misc.h index 22245d88a..303af7b77 100644 --- a/libpcsxcore/misc.h +++ b/libpcsxcore/misc.h @@ -72,7 +72,7 @@ int SendPcsxInfo(); int RecvPcsxInfo(); void trim(char *str); -u16 calcCrc(u8 *d, int len); +u16 calcCrc(const u8 *d, int len); #ifdef __cplusplus } diff --git a/libpcsxcore/plugins.c b/libpcsxcore/plugins.c index 369ea9743..e59e8c602 100644 --- a/libpcsxcore/plugins.c +++ b/libpcsxcore/plugins.c @@ -23,6 +23,7 @@ #include "plugins.h" #include "cdriso.h" +#include "cdrom-async.h" #include "psxcounters.h" static char IsoFile[MAXPATHLEN] = ""; @@ -49,27 +50,6 @@ GPUshowScreenPic GPU_showScreenPic; GPUvBlank GPU_vBlank; GPUgetScreenInfo GPU_getScreenInfo; -CDRinit CDR_init; -CDRshutdown CDR_shutdown; -CDRopen CDR_open; -CDRclose CDR_close; -CDRtest CDR_test; -CDRgetTN CDR_getTN; -CDRgetTD CDR_getTD; -CDRreadTrack CDR_readTrack; -CDRgetBuffer CDR_getBuffer; -CDRplay CDR_play; -CDRstop CDR_stop; -CDRgetStatus CDR_getStatus; -CDRgetDriveLetter CDR_getDriveLetter; -CDRgetBufferSub CDR_getBufferSub; -CDRconfigure CDR_configure; -CDRabout CDR_about; -CDRsetfilename CDR_setfilename; -CDRreadCDDA CDR_readCDDA; -CDRgetTE CDR_getTE; -CDRprefetch CDR_prefetch; - SPUinit SPU_init; SPUshutdown SPU_shutdown; SPUopen SPU_open; @@ -239,12 +219,7 @@ static int LoadGPUplugin(const char *GPUdll) { return 0; } -void *hCDRDriver = NULL; - -long CALLBACK CDR__play(unsigned char *sector) { return 0; } -long CALLBACK CDR__stop(void) { return 0; } - -long CALLBACK CDR__getStatus(struct CdrStat *stat) { +int CDR__getStatus(struct CdrStat *stat) { if (cdOpenCaseTime < 0 || cdOpenCaseTime > (s64)time(NULL)) stat->Status = 0x10; else @@ -253,61 +228,6 @@ long CALLBACK CDR__getStatus(struct CdrStat *stat) { return 0; } -char* CALLBACK CDR__getDriveLetter(void) { return NULL; } -long CALLBACK CDR__configure(void) { return 0; } -long CALLBACK CDR__test(void) { return 0; } -void CALLBACK CDR__about(void) {} -long CALLBACK CDR__setfilename(char*filename) { return 0; } -long CALLBACK CDR__prefetch(u8 m, u8 s, u8 f) { return 1; } - -#define LoadCdrSym1(dest, name) \ - LoadSym(CDR_##dest, CDR##dest, name, TRUE); - -#define LoadCdrSym0(dest, name) \ - LoadSym(CDR_##dest, CDR##dest, name, FALSE); \ - if (CDR_##dest == NULL) CDR_##dest = (CDR##dest) CDR__##dest; - -#define LoadCdrSymN(dest, name) \ - LoadSym(CDR_##dest, CDR##dest, name, FALSE); - -static int LoadCDRplugin(const char *CDRdll) { - void *drv; - - if (CDRdll == NULL) { - cdrIsoInit(); - return 0; - } - - hCDRDriver = SysLoadLibrary(CDRdll); - if (hCDRDriver == NULL) { - CDR_configure = NULL; - SysMessage (_("Could not load CD-ROM plugin %s!"), CDRdll); return -1; - } - drv = hCDRDriver; - LoadCdrSym1(init, "CDRinit"); - LoadCdrSym1(shutdown, "CDRshutdown"); - LoadCdrSym1(open, "CDRopen"); - LoadCdrSym1(close, "CDRclose"); - LoadCdrSym1(getTN, "CDRgetTN"); - LoadCdrSym1(getTD, "CDRgetTD"); - LoadCdrSym1(readTrack, "CDRreadTrack"); - LoadCdrSym1(getBuffer, "CDRgetBuffer"); - LoadCdrSym1(getBufferSub, "CDRgetBufferSub"); - LoadCdrSym0(play, "CDRplay"); - LoadCdrSym0(stop, "CDRstop"); - LoadCdrSym0(getStatus, "CDRgetStatus"); - LoadCdrSym0(getDriveLetter, "CDRgetDriveLetter"); - LoadCdrSym0(configure, "CDRconfigure"); - LoadCdrSym0(test, "CDRtest"); - LoadCdrSym0(about, "CDRabout"); - LoadCdrSym0(setfilename, "CDRsetfilename"); - LoadCdrSymN(readCDDA, "CDRreadCDDA"); - LoadCdrSymN(getTE, "CDRgetTE"); - LoadCdrSym0(prefetch, "CDRprefetch"); - - return 0; -} - static void *hSPUDriver = NULL; static void CALLBACK SPU__registerScheduleCb(void (CALLBACK *cb)(unsigned int)) {} static void CALLBACK SPU__setCDvol(unsigned char ll, unsigned char lr, @@ -1161,19 +1081,12 @@ static int LoadSIO1plugin(const char *SIO1dll) { #endif int LoadPlugins() { - int ret; char Plugin[MAXPATHLEN * 2]; + int ret; ReleasePlugins(); SysLibError(); - if (UsingIso()) { - LoadCDRplugin(NULL); - } else { - sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Cdr); - if (LoadCDRplugin(Plugin) == -1) return -1; - } - sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Gpu); if (LoadGPUplugin(Plugin) == -1) return -1; @@ -1199,7 +1112,7 @@ int LoadPlugins() { if (LoadSIO1plugin(Plugin) == -1) return -1; #endif - ret = CDR_init(); + ret = cdra_init(); if (ret < 0) { SysMessage (_("Error initializing CD-ROM plugin: %d"), ret); return -1; } ret = GPU_init(); if (ret < 0) { SysMessage (_("Error initializing GPU plugin: %d"), ret); return -1; } @@ -1231,7 +1144,7 @@ void ReleasePlugins() { } NetOpened = FALSE; - if (hCDRDriver != NULL || cdrIsoActive()) CDR_shutdown(); + cdra_shutdown(); if (hGPUDriver != NULL) GPU_shutdown(); if (hSPUDriver != NULL) SPU_shutdown(); if (hPAD1Driver != NULL) PAD1_shutdown(); @@ -1239,7 +1152,6 @@ void ReleasePlugins() { if (Config.UseNet && hNETDriver != NULL) NET_shutdown(); - if (hCDRDriver != NULL) { SysCloseLibrary(hCDRDriver); hCDRDriver = NULL; } if (hGPUDriver != NULL) { SysCloseLibrary(hGPUDriver); hGPUDriver = NULL; } if (hSPUDriver != NULL) { SysCloseLibrary(hSPUDriver); hSPUDriver = NULL; } if (hPAD1Driver != NULL) { SysCloseLibrary(hPAD1Driver); hPAD1Driver = NULL; } @@ -1261,18 +1173,8 @@ void ReleasePlugins() { // for CD swap int ReloadCdromPlugin() { - if (hCDRDriver != NULL || cdrIsoActive()) CDR_shutdown(); - if (hCDRDriver != NULL) { SysCloseLibrary(hCDRDriver); hCDRDriver = NULL; } - - if (UsingIso()) { - LoadCDRplugin(NULL); - } else { - char Plugin[MAXPATHLEN * 2]; - sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Cdr); - if (LoadCDRplugin(Plugin) == -1) return -1; - } - - return CDR_init(); + cdra_shutdown(); + return cdra_init(); } void SetIsoFile(const char *filename) { diff --git a/libpcsxcore/plugins.h b/libpcsxcore/plugins.h index 4054bf67a..b2efbcea3 100644 --- a/libpcsxcore/plugins.h +++ b/libpcsxcore/plugins.h @@ -97,67 +97,14 @@ extern GPUshowScreenPic GPU_showScreenPic; extern GPUvBlank GPU_vBlank; extern GPUgetScreenInfo GPU_getScreenInfo; -// CD-ROM Functions -typedef long (CALLBACK* CDRinit)(void); -typedef long (CALLBACK* CDRshutdown)(void); -typedef long (CALLBACK* CDRopen)(void); -typedef long (CALLBACK* CDRclose)(void); -typedef long (CALLBACK* CDRgetTN)(unsigned char *); -typedef long (CALLBACK* CDRgetTD)(unsigned char, unsigned char *); -typedef boolean (CALLBACK* CDRreadTrack)(unsigned char *); -typedef unsigned char* (CALLBACK* CDRgetBuffer)(void); -typedef unsigned char* (CALLBACK* CDRgetBufferSub)(int sector); -typedef long (CALLBACK* CDRconfigure)(void); -typedef long (CALLBACK* CDRtest)(void); -typedef void (CALLBACK* CDRabout)(void); -typedef long (CALLBACK* CDRplay)(unsigned char *); -typedef long (CALLBACK* CDRstop)(void); -typedef long (CALLBACK* CDRsetfilename)(char *); +// CD-ROM struct CdrStat { uint32_t Type; // DATA, CDDA uint32_t Status; // same as cdr.StatP unsigned char Time_[3]; // unused }; -typedef long (CALLBACK* CDRgetStatus)(struct CdrStat *); -typedef char* (CALLBACK* CDRgetDriveLetter)(void); -struct SubQ { - char res0[12]; - unsigned char ControlAndADR; - unsigned char TrackNumber; - unsigned char IndexNumber; - unsigned char TrackRelativeAddress[3]; - unsigned char Filler; - unsigned char AbsoluteAddress[3]; - unsigned char CRC[2]; - char res1[72]; -}; -typedef long (CALLBACK* CDRreadCDDA)(unsigned char, unsigned char, unsigned char, unsigned char *); -typedef long (CALLBACK* CDRgetTE)(unsigned char, unsigned char *, unsigned char *, unsigned char *); -typedef long (CALLBACK* CDRprefetch)(unsigned char, unsigned char, unsigned char); - -// CD-ROM function pointers -extern CDRinit CDR_init; -extern CDRshutdown CDR_shutdown; -extern CDRopen CDR_open; -extern CDRclose CDR_close; -extern CDRtest CDR_test; -extern CDRgetTN CDR_getTN; -extern CDRgetTD CDR_getTD; -extern CDRreadTrack CDR_readTrack; -extern CDRgetBuffer CDR_getBuffer; -extern CDRgetBufferSub CDR_getBufferSub; -extern CDRplay CDR_play; -extern CDRstop CDR_stop; -extern CDRgetStatus CDR_getStatus; -extern CDRgetDriveLetter CDR_getDriveLetter; -extern CDRconfigure CDR_configure; -extern CDRabout CDR_about; -extern CDRsetfilename CDR_setfilename; -extern CDRreadCDDA CDR_readCDDA; -extern CDRgetTE CDR_getTE; -extern CDRprefetch CDR_prefetch; -long CALLBACK CDR__getStatus(struct CdrStat *stat); +int CDR__getStatus(struct CdrStat *stat); // SPU Functions typedef long (CALLBACK* SPUinit)(void); diff --git a/libpcsxcore/ppf.c b/libpcsxcore/ppf.c index 6a88e0536..3dcf416e2 100644 --- a/libpcsxcore/ppf.c +++ b/libpcsxcore/ppf.c @@ -94,7 +94,7 @@ void FreePPFCache() { void CheckPPFCache(unsigned char *pB, unsigned char m, unsigned char s, unsigned char f) { PPF_CACHE *pcstart, *pcend, *pcpos; - int addr = MSF2SECT(btoi(m), btoi(s), btoi(f)), pos, anz, start; + int addr = MSF2SECT(m, s, f), pos, anz, start; if (ppfCache == NULL) return; diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index 0a0bd8633..68c32a91a 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -117,7 +117,6 @@ void __Log(char *fmt, ...); typedef struct { char Gpu[MAXPATHLEN]; char Spu[MAXPATHLEN]; - char Cdr[MAXPATHLEN]; char Pad1[MAXPATHLEN]; char Pad2[MAXPATHLEN]; char Net[MAXPATHLEN]; @@ -132,7 +131,6 @@ typedef struct { boolean Mdec; boolean PsxAuto; boolean Cdda; - boolean AsyncCD; boolean CHD_Precache; /* loads disk image into memory, works with CHD only. */ boolean HLE; boolean SlowBoot; From de0ed23db1caf164f27fb98b3e8a5cdfc0333292 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 6 Oct 2024 02:56:57 +0300 Subject: [PATCH 567/597] deal with some platform-specific warnings --- libpcsxcore/misc.c | 2 +- libpcsxcore/new_dynarec/assem_arm.c | 1 + plugins/dfsound/spu.c | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 68982abda..396658723 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -432,7 +432,7 @@ int CheckCdrom() { for (i = 0; i < len; ++i) { if (exename[i] == ';' || c >= sizeof(CdromId) - 1) break; - if (isalnum(exename[i])) + if (isalnum((int)exename[i])) CdromId[c++] = exename[i]; } } diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 70798effe..308f4a000 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -19,6 +19,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#include // ffs #define FLAGLESS #include "../gte.h" #undef FLAGLESS diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index ed5b4b5c7..bc9cbf114 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -1723,6 +1723,9 @@ void spu_get_debug_info(int *chans_out, int *run_chans, int *fmod_chans_out, int *run_chans = ~spu.dwChannelsAudible & ~spu.dwChannelDead & irq_chans; *fmod_chans_out = fmod_chans; *noise_chans_out = noise_chans; + // sometimes unused + (void)do_samples_skip_fmod; + (void)SkipADSR; } // vim:shiftwidth=1:expandtab From a4c71501b85f6bfd791cea93082beadb8a49cdaa Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 8 Oct 2024 02:46:33 +0300 Subject: [PATCH 568/597] enable lightrec --- .gitmodules | 6 ++++++ Makefile | 56 ++++++++++++++++++++++++++++++++++++++++++++++++-- configure | 33 +++++++++++++++++++---------- deps/libchdr | 2 +- deps/lightning | 1 + deps/lightrec | 1 + 6 files changed, 85 insertions(+), 14 deletions(-) create mode 160000 deps/lightning create mode 160000 deps/lightrec diff --git a/.gitmodules b/.gitmodules index 44495e687..fa655497c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,9 @@ [submodule "libchdr"] path = deps/libchdr url = https://github.com/rtissera/libchdr.git +[submodule "deps/lightrec"] + path = deps/lightrec + url = https://github.com/pcercuei/lightrec.git +[submodule "deps/lightning"] + path = deps/lightning + url = https://github.com/pcercuei/gnu_lightning.git diff --git a/Makefile b/Makefile index 61eb95ef3..1e965a69d 100644 --- a/Makefile +++ b/Makefile @@ -19,6 +19,9 @@ CXXFLAGS += $(CFLAGS) #DRC_DBG = 1 #PCNT = 1 +# Suppress minor warnings for dependencies +deps/%: CFLAGS += -Wno-unused -Wno-unused-function + all: config.mak target_ plugins_ ifndef NO_CONFIG_MAK @@ -67,7 +70,54 @@ endif libpcsxcore/psxbios.o: CFLAGS += -Wno-nonnull # dynarec -ifeq "$(USE_DYNAREC)" "1" +ifeq "$(DYNAREC)" "lightrec" +CFLAGS += -Ideps/lightning/include -Ideps/lightrec -Iinclude/lightning -Iinclude/lightrec \ + -DLIGHTREC -DLIGHTREC_STATIC +LIGHTREC_CUSTOM_MAP ?= 0 +LIGHTREC_CUSTOM_MAP_OBJ ?= libpcsxcore/lightrec/mem.o +LIGHTREC_THREADED_COMPILER ?= 0 +LIGHTREC_CODE_INV ?= 0 +CFLAGS += -DLIGHTREC_CUSTOM_MAP=$(LIGHTREC_CUSTOM_MAP) \ + -DLIGHTREC_CODE_INV=$(LIGHTREC_CODE_INV) \ + -DLIGHTREC_ENABLE_THREADED_COMPILER=$(LIGHTREC_THREADED_COMPILER) +ifeq ($(LIGHTREC_CUSTOM_MAP),1) +LDLIBS += -lrt +OBJS += $(LIGHTREC_CUSTOM_MAP_OBJ) +endif +ifeq ($(NEED_SYSCONF),1) +OBJS += libpcsxcore/lightrec/sysconf.o +endif +ifeq ($(LIGHTREC_THREADED_COMPILER),1) +OBJS += deps/lightrec/recompiler.o \ + deps/lightrec/reaper.o +endif +OBJS += deps/lightrec/tlsf/tlsf.o +OBJS += libpcsxcore/lightrec/plugin.o +OBJS += deps/lightning/lib/jit_disasm.o \ + deps/lightning/lib/jit_memory.o \ + deps/lightning/lib/jit_names.o \ + deps/lightning/lib/jit_note.o \ + deps/lightning/lib/jit_print.o \ + deps/lightning/lib/jit_size.o \ + deps/lightning/lib/lightning.o \ + deps/lightrec/blockcache.o \ + deps/lightrec/constprop.o \ + deps/lightrec/disassembler.o \ + deps/lightrec/emitter.o \ + deps/lightrec/interpreter.o \ + deps/lightrec/lightrec.o \ + deps/lightrec/memmanager.o \ + deps/lightrec/optimizer.o \ + deps/lightrec/regcache.o +deps/lightning/%.o: CFLAGS += -DHAVE_MMAP=P_HAVE_MMAP +deps/lightning/%: CFLAGS += -Wno-uninitialized +deps/lightrec/%: CFLAGS += -Wno-uninitialized +libpcsxcore/lightrec/mem.o: CFLAGS += -D_GNU_SOURCE +ifeq ($(MMAP_WIN32),1) +CFLAGS += -Iinclude/mman -I deps/mman +OBJS += deps/mman/mman.o +endif +else ifeq "$(DYNAREC)" "ari64" OBJS += libpcsxcore/new_dynarec/new_dynarec.o OBJS += libpcsxcore/new_dynarec/pcsxmem.o ifeq "$(ARCH)" "arm" @@ -176,7 +226,7 @@ OBJS += $(LCHDR)/src/libchdr_cdrom.o OBJS += $(LCHDR)/src/libchdr_chd.o OBJS += $(LCHDR)/src/libchdr_flac.o OBJS += $(LCHDR)/src/libchdr_huffman.o -$(LCHDR)/src/%.o: CFLAGS += -Wno-unused -std=gnu11 +$(LCHDR)/src/%.o: CFLAGS += -Wno-unused -Wno-maybe-uninitialized -std=gnu11 OBJS += $(LCHDR_LZMA)/src/Alloc.o OBJS += $(LCHDR_LZMA)/src/CpuArch.o OBJS += $(LCHDR_LZMA)/src/Delta.o @@ -292,10 +342,12 @@ endif OBJS += frontend/libretro.o CFLAGS += -DFRONTEND_SUPPORTS_RGB565 +ifneq ($(DYNAREC),lightrec) ifeq ($(MMAP_WIN32),1) OBJS += libpcsxcore/memmap_win32.o endif endif +endif ifeq "$(USE_PLUGIN_LIB)" "1" OBJS += frontend/plugin_lib.o diff --git a/configure b/configure index ac7af0e2e..be2c4ff40 100755 --- a/configure +++ b/configure @@ -40,6 +40,7 @@ check_define_val() platform_list="generic pandora maemo caanoo libretro" platform="generic" builtin_gpu_list="neon peops unai unai_old" +dynarec_list="ari64 lightrec none" builtin_gpu="" sound_driver_list="oss alsa pulseaudio sdl libretro" sound_drivers="" @@ -54,7 +55,7 @@ have_arm_neon_asm="" have_tslib="" have_gles="" have_c64x_dsp="" -enable_dynarec="yes" +dynarec="" need_sdl="no" need_xlib="no" need_libpicofe="yes" @@ -145,7 +146,9 @@ for opt do ;; --disable-neon) have_arm_neon="no" ;; - --disable-dynarec) enable_dynarec="no" + --dynarec=*) dynarec="$optarg" + ;; + --disable-dynarec) dynarec="no" ;; *) echo "ERROR: unknown option $opt"; show_help="yes" ;; @@ -163,8 +166,8 @@ if [ "$show_help" = "yes" ]; then echo " available: $sound_driver_list" echo " --enable-neon" echo " --disable-neon enable/disable ARM NEON optimizations [guessed]" - echo " --disable-dynarec disable dynamic recompiler" - echo " (dynarec is only available and enabled on ARM)" + echo " --dynarec=NAME select dynamic recompiler [guessed]" + echo " available: $dynarec_list" echo "influential environment variables:" echo " CROSS_COMPILE CC CXX AS AR CFLAGS ASFLAGS LDFLAGS LDLIBS" exit 1 @@ -245,6 +248,10 @@ arm*) have_armv5=`check_define HAVE_ARMV5 && echo yes` || true fi + if [ "x$dynarec" = "x" ]; then + dynarec="ari64" + fi + if [ "x$builtin_gpu" = "x" ]; then if [ "$have_arm_neon" = "yes" ]; then builtin_gpu="neon" @@ -295,19 +302,25 @@ arm*) aarch64) have_arm_neon="yes" have_arm_neon_asm="no" + if [ "x$dynarec" = "x" ]; then + dynarec="ari64" + fi if [ "x$builtin_gpu" = "x" ]; then builtin_gpu="neon" fi ;; x86_64) - enable_dynarec="no" + if [ "x$dynarec" = "x" ]; then + dynarec="lightrec" + fi if [ "x$builtin_gpu" = "x" ]; then builtin_gpu="neon" fi ;; *) - # dynarec only available on ARM - enable_dynarec="no" + if [ "x$dynarec" = "x" ]; then + dynarec="lightrec" + fi ;; esac @@ -545,7 +558,7 @@ echo "C compiler $CC" echo "C compiler flags $CFLAGS" echo "libraries $MAIN_LDLIBS" echo "linker flags $LDFLAGS$MAIN_LDFLAGS" -echo "enable dynarec $enable_dynarec" +echo "dynarec $dynarec" if [ "$ARCH" = "arm" -o "$ARCH" = "aarch64" ]; then echo "enable ARM NEON $have_arm_neon" fi @@ -596,9 +609,7 @@ if [ "$have_gles" = "yes" ]; then echo "CFLAGS_GLES = $CFLAGS_GLES" >> $config_mak echo "LDLIBS_GLES = $LDLIBS_GLES" >> $config_mak fi -if [ "$enable_dynarec" = "yes" ]; then - echo "USE_DYNAREC = 1" >> $config_mak -fi +echo "DYNAREC = $dynarec" >> $config_mak if [ "$drc_cache_base" = "yes" ]; then echo "BASE_ADDR_DYNAMIC = 1" >> $config_mak fi diff --git a/deps/libchdr b/deps/libchdr index 86b272076..aaca599e1 160000 --- a/deps/libchdr +++ b/deps/libchdr @@ -1 +1 @@ -Subproject commit 86b272076d542287d3f03952e7d4efe283e815bf +Subproject commit aaca599e18e43933fc193bd1b715c368c306208b diff --git a/deps/lightning b/deps/lightning new file mode 160000 index 000000000..de026794c --- /dev/null +++ b/deps/lightning @@ -0,0 +1 @@ +Subproject commit de026794c71386983034461bce2df3c63ccd5827 diff --git a/deps/lightrec b/deps/lightrec new file mode 160000 index 000000000..ea20362c9 --- /dev/null +++ b/deps/lightrec @@ -0,0 +1 @@ +Subproject commit ea20362c9542f12fb6a0f27aa7df66b2af06b84d From 0b1633d72a8854f7ee4f62f320ef0ecf8ff71ea1 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 11 Oct 2024 02:34:16 +0300 Subject: [PATCH 569/597] drc: try compiling on another thread --- .gitmodules | 3 + Makefile | 20 +- deps/libretro-common | 1 + frontend/libretro-rthreads.c | 11 + frontend/libretro-rthreads.h | 3 + frontend/libretro.c | 32 ++- frontend/libretro_core_options.h | 17 ++ frontend/main.c | 2 +- frontend/menu.c | 12 +- frontend/plugin_lib.c | 4 +- include/compiler_features.h | 4 +- libpcsxcore/cdrom-async.c | 3 +- libpcsxcore/database.c | 4 +- libpcsxcore/misc.c | 4 +- libpcsxcore/new_dynarec/assem_arm.c | 20 +- libpcsxcore/new_dynarec/assem_arm64.c | 20 +- libpcsxcore/new_dynarec/emu_if.c | 345 ++++++++++++++++++++---- libpcsxcore/new_dynarec/emu_if.h | 7 - libpcsxcore/new_dynarec/linkage_arm.S | 15 +- libpcsxcore/new_dynarec/linkage_arm64.S | 27 +- libpcsxcore/new_dynarec/new_dynarec.c | 131 +++++---- libpcsxcore/new_dynarec/new_dynarec.h | 45 +++- libpcsxcore/psxinterpreter.c | 11 + libpcsxcore/r3000a.h | 4 +- 24 files changed, 570 insertions(+), 175 deletions(-) create mode 160000 deps/libretro-common create mode 100644 frontend/libretro-rthreads.h diff --git a/.gitmodules b/.gitmodules index fa655497c..d4665d30f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,3 +13,6 @@ [submodule "deps/lightning"] path = deps/lightning url = https://github.com/pcercuei/gnu_lightning.git +[submodule "deps/libretro-common"] + path = deps/libretro-common + url = https://github.com/libretro/libretro-common.git diff --git a/Makefile b/Makefile index 1e965a69d..0d8991620 100644 --- a/Makefile +++ b/Makefile @@ -129,6 +129,12 @@ OBJS += libpcsxcore/new_dynarec/pcsxmem.o else $(error no dynarec support for architecture $(ARCH)) endif + ifeq "$(NDRC_THREAD)" "1" + libpcsxcore/new_dynarec/new_dynarec.o: CFLAGS += -DNDRC_THREAD + libpcsxcore/new_dynarec/emu_if.o: CFLAGS += -DNDRC_THREAD + frontend/libretro.o: CFLAGS += -DNDRC_THREAD + USE_RTHREADS := 1 + endif else CFLAGS += -DDRC_DISABLE endif @@ -327,8 +333,8 @@ OBJS += deps/libretro-common/vfs/vfs_implementation_cdrom.o CFLAGS += -DHAVE_CDROM endif ifeq "$(USE_ASYNC_CDROM)" "1" -OBJS += frontend/libretro-rthreads.o CFLAGS += -DUSE_ASYNC_CDROM +USE_RTHREADS := 1 endif ifeq "$(USE_LIBRETRO_VFS)" "1" OBJS += deps/libretro-common/compat/compat_posix_string.o @@ -341,12 +347,24 @@ CFLAGS += -DUSE_LIBRETRO_VFS endif OBJS += frontend/libretro.o CFLAGS += -DFRONTEND_SUPPORTS_RGB565 +CFLAGS += -DHAVE_LIBRETRO +INC_LIBRETRO_COMMON := 1 ifneq ($(DYNAREC),lightrec) ifeq ($(MMAP_WIN32),1) OBJS += libpcsxcore/memmap_win32.o endif endif +endif # $(PLATFORM) == "libretro" + +ifeq "$(USE_RTHREADS)" "1" +OBJS += frontend/libretro-rthreads.o +OBJS += deps/libretro-common/features/features_cpu.o +frontend/main.o: CFLAGS += -DHAVE_CPU_FEATURES +INC_LIBRETRO_COMMON := 1 +endif +ifeq "$(INC_LIBRETRO_COMMON)" "1" +CFLAGS += -Ideps/libretro-common/include endif ifeq "$(USE_PLUGIN_LIB)" "1" diff --git a/deps/libretro-common b/deps/libretro-common new file mode 160000 index 000000000..0abedaac6 --- /dev/null +++ b/deps/libretro-common @@ -0,0 +1 @@ +Subproject commit 0abedaac6a795c093f2e1a22f3028fca9efdf3c9 diff --git a/frontend/libretro-rthreads.c b/frontend/libretro-rthreads.c index 96c861d3c..72784d4d2 100644 --- a/frontend/libretro-rthreads.c +++ b/frontend/libretro-rthreads.c @@ -7,3 +7,14 @@ #endif #include "../deps/libretro-common/rthreads/rthreads.c" + +// an "extension" +int sthread_set_name(sthread_t *thread, const char *name) +{ +#if defined(__GLIBC__) || defined(__MACH__) || \ + (defined(__ANDROID_API__) && __ANDROID_API__ >= 26) + if (thread) + return pthread_setname_np(thread->id, name); +#endif + return -1; +} diff --git a/frontend/libretro-rthreads.h b/frontend/libretro-rthreads.h new file mode 100644 index 000000000..851d448eb --- /dev/null +++ b/frontend/libretro-rthreads.h @@ -0,0 +1,3 @@ +#include "rthreads/rthreads.h" + +int sthread_set_name(sthread_t *thread, const char *name); diff --git a/frontend/libretro.c b/frontend/libretro.c index b5c3b92d0..f7eb64cdb 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2290,7 +2290,7 @@ static void update_variables(bool in_flight) psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); } } -#endif /* !DRC_DISABLE */ +#endif // !DRC_DISABLE var.value = NULL; var.key = "pcsx_rearmed_psxclock"; @@ -2301,14 +2301,28 @@ static void update_variables(bool in_flight) } #if !defined(DRC_DISABLE) && !defined(LIGHTREC) +#ifdef NDRC_THREAD + var.value = NULL; + var.key = "pcsx_rearmed_drc_thread"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + ndrc_g.hacks &= ~(NDHACK_THREAD_FORCE | NDHACK_THREAD_FORCE_ON); + if (strcmp(var.value, "disabled") == 0) + ndrc_g.hacks |= NDHACK_THREAD_FORCE; + else if (strcmp(var.value, "enabled") == 0) + ndrc_g.hacks |= NDHACK_THREAD_FORCE | NDHACK_THREAD_FORCE_ON; + // psxCpu->ApplyConfig(); will start/stop the thread + } +#endif + var.value = NULL; var.key = "pcsx_rearmed_nosmccheck"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "enabled") == 0) - new_dynarec_hacks |= NDHACK_NO_SMC_CHECK; + ndrc_g.hacks |= NDHACK_NO_SMC_CHECK; else - new_dynarec_hacks &= ~NDHACK_NO_SMC_CHECK; + ndrc_g.hacks &= ~NDHACK_NO_SMC_CHECK; } var.value = NULL; @@ -2316,9 +2330,9 @@ static void update_variables(bool in_flight) if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "enabled") == 0) - new_dynarec_hacks |= NDHACK_GTE_UNNEEDED; + ndrc_g.hacks |= NDHACK_GTE_UNNEEDED; else - new_dynarec_hacks &= ~NDHACK_GTE_UNNEEDED; + ndrc_g.hacks &= ~NDHACK_GTE_UNNEEDED; } var.value = NULL; @@ -2326,9 +2340,9 @@ static void update_variables(bool in_flight) if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "enabled") == 0) - new_dynarec_hacks |= NDHACK_GTE_NO_FLAGS; + ndrc_g.hacks |= NDHACK_GTE_NO_FLAGS; else - new_dynarec_hacks &= ~NDHACK_GTE_NO_FLAGS; + ndrc_g.hacks &= ~NDHACK_GTE_NO_FLAGS; } var.value = NULL; @@ -2336,9 +2350,9 @@ static void update_variables(bool in_flight) if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "enabled") == 0) - new_dynarec_hacks |= NDHACK_NO_COMPAT_HACKS; + ndrc_g.hacks |= NDHACK_NO_COMPAT_HACKS; else - new_dynarec_hacks &= ~NDHACK_NO_COMPAT_HACKS; + ndrc_g.hacks &= ~NDHACK_NO_COMPAT_HACKS; } #endif /* !DRC_DISABLE && !LIGHTREC */ diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 86fe78344..a4ead77ec 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -224,7 +224,24 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "enabled", }, +#if !defined(LIGHTREC) && defined(NDRC_THREAD) + { + "pcsx_rearmed_drc_thread", + "DynaRec threading", + NULL, + "Run the dynarec on another thread.", + NULL, + "system", + { + { "auto", "Auto" }, + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "auto", + }, #endif +#endif // DRC_DISABLE { "pcsx_rearmed_psxclock", "PSX CPU Clock Speed (%)", diff --git a/frontend/main.c b/frontend/main.c index 61dbf637a..750e5661c 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -159,7 +159,7 @@ void emu_set_default_config(void) spu_config.iTempo = 1; #endif #endif - new_dynarec_hacks = 0; + ndrc_g.hacks = 0; in_type[0] = PSE_PAD_TYPE_STANDARD; in_type[1] = PSE_PAD_TYPE_STANDARD; diff --git a/frontend/menu.c b/frontend/menu.c index 275028c55..49ffed9a6 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -479,7 +479,7 @@ static const struct { CE_INTVAL(in_evdev_allow_abs_only), CE_INTVAL(volume_boost), CE_INTVAL(psx_clock), - CE_INTVAL(new_dynarec_hacks), + CE_INTVAL(ndrc_g.hacks), CE_INTVAL(in_enable_vibration), }; @@ -1630,10 +1630,10 @@ static const char h_cfg_stalls[] = "Will cause some games to run too fast"; static menu_entry e_menu_speed_hacks[] = { #ifndef DRC_DISABLE - mee_onoff_h ("Disable compat hacks", 0, new_dynarec_hacks, NDHACK_NO_COMPAT_HACKS, h_cfg_noch), - mee_onoff_h ("Disable SMC checks", 0, new_dynarec_hacks, NDHACK_NO_SMC_CHECK, h_cfg_nosmc), - mee_onoff_h ("Assume GTE regs unneeded", 0, new_dynarec_hacks, NDHACK_GTE_UNNEEDED, h_cfg_gteunn), - mee_onoff_h ("Disable GTE flags", 0, new_dynarec_hacks, NDHACK_GTE_NO_FLAGS, h_cfg_gteflgs), + mee_onoff_h ("Disable compat hacks", 0, ndrc_g.hacks, NDHACK_NO_COMPAT_HACKS, h_cfg_noch), + mee_onoff_h ("Disable SMC checks", 0, ndrc_g.hacks, NDHACK_NO_SMC_CHECK, h_cfg_nosmc), + mee_onoff_h ("Assume GTE regs unneeded", 0, ndrc_g.hacks, NDHACK_GTE_UNNEEDED, h_cfg_gteunn), + mee_onoff_h ("Disable GTE flags", 0, ndrc_g.hacks, NDHACK_GTE_NO_FLAGS, h_cfg_gteflgs), #endif mee_onoff_h ("Disable CPU/GTE stalls", 0, menu_iopts[0], 1, h_cfg_stalls), mee_end, @@ -2243,7 +2243,7 @@ static int romsel_run(void) printf("selected file: %s\n", fname); - new_dynarec_clear_full(); + ndrc_clear_full(); if (run_cd_image(fname) != 0) return -1; diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index c8a6fed45..1b63f241a 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -764,14 +764,14 @@ void pl_frame_limit(void) // recompilation is not that fast and may cause frame skip on // loading screens and such, resulting in flicker or glitches - if (new_dynarec_did_compile) { + if (ndrc_g.did_compile) { if (drc_active_vsyncs < 32) pl_rearmed_cbs.fskip_advice = 0; drc_active_vsyncs++; } else drc_active_vsyncs = 0; - new_dynarec_did_compile = 0; + ndrc_g.did_compile = 0; } pcnt_start(PCNT_ALL); diff --git a/include/compiler_features.h b/include/compiler_features.h index 753706d7f..0ab8468b2 100644 --- a/include/compiler_features.h +++ b/include/compiler_features.h @@ -7,12 +7,12 @@ # else # define noinline __attribute__((noinline,noclone)) # endif -# define unused __attribute__((unused)) +# define attr_unused __attribute__((unused)) #else # define likely(x) (x) # define unlikely(x) (x) # define noinline -# define unused +# define attr_unused #endif #ifndef __has_builtin diff --git a/libpcsxcore/cdrom-async.c b/libpcsxcore/cdrom-async.c index 026a34517..2cb304733 100644 --- a/libpcsxcore/cdrom-async.c +++ b/libpcsxcore/cdrom-async.c @@ -114,7 +114,7 @@ static int cdrom_is_media_inserted(void *stream) { return 0; } #ifdef USE_ASYNC_CDROM -#include "rthreads/rthreads.h" +#include "../frontend/libretro-rthreads.h" #include "retro_timers.h" struct cached_buf { @@ -273,6 +273,7 @@ static void cdra_start_thread(void) acdrom.buf_cache[i].lba = ~0; } if (acdrom.thread) { + sthread_set_name(acdrom.thread, "pcsxr-cdrom"); SysPrintf("cdrom precache: %d buffers%s\n", acdrom.buf_cnt, acdrom.have_subchannel ? " +sub" : ""); } diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index c05b80cd9..054e2a66a 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -208,7 +208,7 @@ void Apply_Hacks_Cdrom(void) } /* Dynarec game-specific hacks */ - new_dynarec_hacks_pergame = 0; + ndrc_g.hacks_pergame = 0; Config.cycle_multiplier_override = 0; for (i = 0; i < ARRAY_SIZE(cycle_multiplier_overrides); i++) @@ -220,7 +220,7 @@ void Apply_Hacks_Cdrom(void) if (j < ARRAY_SIZE(cycle_multiplier_overrides[i].id)) { Config.cycle_multiplier_override = cycle_multiplier_overrides[i].mult; - new_dynarec_hacks_pergame |= NDHACK_OVERRIDE_CYCLE_M; + ndrc_g.hacks_pergame |= NDHACK_OVERRIDE_CYCLE_M; SysPrintf("using cycle_multiplier_override: %d\n", Config.cycle_multiplier_override); break; diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 396658723..286510257 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -735,7 +735,7 @@ int SaveState(const char *file) { psxHwFreeze(f, 1); psxRcntFreeze(f, 1); mdecFreeze(f, 1); - new_dyna_freeze(f, 1); + ndrc_freeze(f, 1); padFreeze(f, 1); result = 0; @@ -819,7 +819,7 @@ int LoadState(const char *file) { psxHwFreeze(f, 0); psxRcntFreeze(f, 0); mdecFreeze(f, 0); - new_dyna_freeze(f, 0); + ndrc_freeze(f, 0); padFreeze(f, 0); events_restore(); diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 308f4a000..5b1d6fdb8 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -242,7 +242,7 @@ static void alloc_cc_optional(struct regstat *cur, int i) /* Assembler */ -static unused char regname[16][4] = { +static attr_unused char regname[16][4] = { "r0", "r1", "r2", @@ -318,7 +318,7 @@ static u_int genjmp(u_int addr) return ((u_int)offset>>2)&0xffffff; } -static unused void emit_breakpoint(void) +static attr_unused void emit_breakpoint(void) { assem_debug("bkpt #0\n"); //output_w32(0xe1200070); @@ -730,7 +730,7 @@ static void emit_lsls_imm(int rs,int imm,int rt) output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); } -static unused void emit_lslpls_imm(int rs,int imm,int rt) +static attr_unused void emit_lslpls_imm(int rs,int imm,int rt) { assert(imm>0); assert(imm<32); @@ -812,7 +812,7 @@ static void emit_sar(u_int rs,u_int shift,u_int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8)); } -static unused void emit_orrshl(u_int rs,u_int shift,u_int rt) +static attr_unused void emit_orrshl(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -821,7 +821,7 @@ static unused void emit_orrshl(u_int rs,u_int shift,u_int rt) output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8)); } -static unused void emit_orrshr(u_int rs,u_int shift,u_int rt) +static attr_unused void emit_orrshr(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -892,7 +892,7 @@ static void emit_cmovs_imm(int imm,int rt) output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval); } -static unused void emit_cmovne_reg(int rs,int rt) +static attr_unused void emit_cmovne_reg(int rs,int rt) { assem_debug("movne %s,%s\n",regname[rt],regname[rs]); output_w32(0x11a00000|rd_rn_rm(rt,0,rs)); @@ -1089,7 +1089,7 @@ static void *emit_cbz(int rs, const void *a) return ret; } -static unused void emit_callreg(u_int r) +static attr_unused void emit_callreg(u_int r) { assert(r<15); assem_debug("blx %s\n",regname[r]); @@ -1404,7 +1404,7 @@ static void emit_teq(int rs, int rt) output_w32(0xe1300000|rd_rn_rm(0,rs,rt)); } -static unused void emit_rsbimm(int rs, int imm, int rt) +static attr_unused void emit_rsbimm(int rs, int imm, int rt) { u_int armval; genimm_checked(imm,&armval); @@ -1462,7 +1462,7 @@ static void emit_callne(int a) } // Used to preload hash table entries -static unused void emit_prefetchreg(int r) +static attr_unused void emit_prefetchreg(int r) { assem_debug("pld %s\n",regname[r]); output_w32(0xf5d0f000|rd_rn_rm(0,r,0)); @@ -1484,7 +1484,7 @@ static void emit_orrne_imm(int rs,int imm,int rt) output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval); } -static unused void emit_addpl_imm(int rs,int imm,int rt) +static attr_unused void emit_addpl_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index bad2854cf..259c8e882 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -119,14 +119,14 @@ static void alloc_cc_optional(struct regstat *cur, int i) /* Assembler */ -static unused const char *regname[32] = { +static attr_unused const char *regname[32] = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23", "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp" }; -static unused const char *regname64[32] = { +static attr_unused const char *regname64[32] = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23", @@ -138,7 +138,7 @@ enum { COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV }; -static unused const char *condname[16] = { +static attr_unused const char *condname[16] = { "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv" }; @@ -356,7 +356,7 @@ static void emit_subs(u_int rs1, u_int rs2, u_int rt) output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt)); } -static unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) +static attr_unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) { assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift); output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt)); @@ -618,7 +618,7 @@ static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt) static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt) { - unused const char *st = s ? "s" : ""; + attr_unused const char *st = s ? "s" : ""; s = s ? 0x20000000 : 0; is64 = is64 ? 0x80000000 : 0; if (imm < 4096) { @@ -1293,8 +1293,8 @@ static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs) { u_int op = 0xb9000000; - unused const char *ldst = is_st ? "st" : "ld"; - unused char rp = is64 ? 'x' : 'w'; + attr_unused const char *ldst = is_st ? "st" : "ld"; + attr_unused char rp = is64 ? 'x' : 'w'; assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs); is64 = is64 ? 1 : 0; assert((ofs & ((1 << (2+is64)) - 1)) == 0); @@ -1307,8 +1307,8 @@ static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs) static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs) { u_int op = 0x29000000; - unused const char *ldst = is_st ? "st" : "ld"; - unused char rp = is64 ? 'x' : 'w'; + attr_unused const char *ldst = is_st ? "st" : "ld"; + attr_unused char rp = is64 ? 'x' : 'w'; assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs); is64 = is64 ? 1 : 0; assert((ofs & ((1 << (2+is64)) - 1)) == 0); @@ -2082,7 +2082,7 @@ static void do_miniht_insert(u_int return_address,u_int rt,int temp) { emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]); } -static unused void clear_cache_arm64(char *start, char *end) +static attr_unused void clear_cache_arm64(char *start, char *end) { // Don't rely on GCC's __clear_cache implementation, as it caches // icache/dcache cache line sizes, that can vary between cores on diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 6c1b48c59..e49580183 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -16,14 +16,37 @@ #include "../r3000a.h" #include "../gte_arm.h" #include "../gte_neon.h" +#include "compiler_features.h" #define FLAGLESS #include "../gte.h" +#ifdef NDRC_THREAD +#include "../../frontend/libretro-rthreads.h" +#include "features/features_cpu.h" +#include "retro_timers.h" +#endif +#ifdef _3DS +#include <3ds_utils.h> +#endif +#ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#endif //#define evprintf printf #define evprintf(...) +#if !defined(DRC_DISABLE) && !defined(LIGHTREC) +// reduce global loads/literal pools (maybe) +#include "linkage_offsets.h" +#define dynarec_local_var4(x) dynarec_local[(x) / sizeof(dynarec_local[0])] +#define stop dynarec_local_var4(LO_stop) +#define psxRegs (*(psxRegisters *)((char *)dynarec_local + LO_psxRegs)) +#define next_interupt dynarec_local_var4(LO_next_interupt) +#define pending_exception dynarec_local_var4(LO_pending_exception) +#endif + +static void ari64_thread_sync(void); + void pcsx_mtc0(u32 reg, u32 val) { evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); @@ -41,7 +64,7 @@ void pcsx_mtc0_ds(u32 reg, u32 val) MTC0(&psxRegs, reg, val); } -void new_dyna_freeze(void *f, int mode) +void ndrc_freeze(void *f, int mode) { const char header_save[8] = "ariblks"; uint32_t addrs[1024 * 4]; @@ -49,6 +72,8 @@ void new_dyna_freeze(void *f, int mode) int bytes; char header[8]; + ari64_thread_sync(); + if (mode != 0) { // save size = new_dynarec_save_blocks(addrs, sizeof(addrs)); if (size == 0) @@ -86,8 +111,17 @@ void new_dyna_freeze(void *f, int mode) //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded"); } +void ndrc_clear_full(void) +{ + ari64_thread_sync(); + new_dynarec_clear_full(); +} + #if !defined(DRC_DISABLE) && !defined(LIGHTREC) +static void ari64_thread_init(void); +static int ari64_thread_check_range(unsigned int start, unsigned int end); + /* GTE stuff */ void *gte_handlers[64]; @@ -189,43 +223,9 @@ const uint64_t gte_reg_writes[64] = { [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27), }; -static int ari64_init() -{ - static u32 scratch_buf[8*8*2] __attribute__((aligned(64))); - size_t i; - - new_dynarec_init(); - new_dyna_pcsx_mem_init(); - - for (i = 0; i < ARRAY_SIZE(gte_handlers); i++) - if (psxCP2[i] != gteNULL) - gte_handlers[i] = psxCP2[i]; - -#if defined(__arm__) && !defined(DRC_DBG) - gte_handlers[0x06] = gteNCLIP_arm; -#ifdef HAVE_ARMV5 - gte_handlers_nf[0x01] = gteRTPS_nf_arm; - gte_handlers_nf[0x30] = gteRTPT_nf_arm; -#endif -#ifdef __ARM_NEON__ - // compiler's _nf version is still a lot slower than neon - // _nf_arm RTPS is roughly the same, RTPT slower - gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon; - gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon; -#endif -#endif -#ifdef DRC_DBG - memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf)); -#endif - psxH_ptr = psxH; - zeromem_ptr = zero_mem; - scratch_buf_ptr = scratch_buf; - - return 0; -} - static void ari64_reset() { + ari64_thread_sync(); new_dyna_pcsx_mem_reset(); new_dynarec_invalidate_all_pages(); new_dyna_pcsx_mem_load_state(); @@ -268,11 +268,16 @@ static void ari64_execute_block(enum blockExecCaller caller) static void ari64_clear(u32 addr, u32 size) { - size *= 4; /* PCSX uses DMA units (words) */ + u32 end = addr + size * 4; /* PCSX uses DMA units (words) */ + + evprintf("ari64_clear %08x %04x\n", addr, size * 4); - evprintf("ari64_clear %08x %04x\n", addr, size); + if (!new_dynarec_quick_check_range(addr, end) && + !ari64_thread_check_range(addr, end)) + return; - new_dynarec_invalidate_range(addr, addr + size); + ari64_thread_sync(); + new_dynarec_invalidate_range(addr, end); } static void ari64_notify(enum R3000Anote note, void *data) { @@ -294,22 +299,263 @@ static void ari64_notify(enum R3000Anote note, void *data) { static void ari64_apply_config() { + int thread_changed; + + ari64_thread_sync(); intApplyConfig(); if (Config.DisableStalls) - new_dynarec_hacks |= NDHACK_NO_STALLS; + ndrc_g.hacks |= NDHACK_NO_STALLS; else - new_dynarec_hacks &= ~NDHACK_NO_STALLS; + ndrc_g.hacks &= ~NDHACK_NO_STALLS; - if (Config.cycle_multiplier != cycle_multiplier_old - || new_dynarec_hacks != new_dynarec_hacks_old) + thread_changed = (ndrc_g.hacks ^ ndrc_g.hacks_old) + & (NDHACK_THREAD_FORCE | NDHACK_THREAD_FORCE_ON); + if (Config.cycle_multiplier != ndrc_g.cycle_multiplier_old + || ndrc_g.hacks != ndrc_g.hacks_old) { new_dynarec_clear_full(); } + if (thread_changed) + ari64_thread_init(); +} + +#ifdef NDRC_THREAD +static void clear_local_cache(void) +{ +#ifdef _3DS + if (ndrc_g.thread.cache_dirty) { + ndrc_g.thread.cache_dirty = 0; + ctr_clear_cache(); + } +#else + // hopefully nothing is needed, as tested on r-pi4 and switch +#endif +} + +static noinline void ari64_execute_threaded_slow(enum blockExecCaller block_caller) +{ + if (!ndrc_g.thread.busy) { + memcpy(ndrc_smrv_regs, psxRegs.GPR.r, sizeof(ndrc_smrv_regs)); + slock_lock(ndrc_g.thread.lock); + ndrc_g.thread.addr = psxRegs.pc; + ndrc_g.thread.busy = 1; + slock_unlock(ndrc_g.thread.lock); + scond_signal(ndrc_g.thread.cond); + } + + //ari64_notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); + psxInt.Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); + do + { + psxInt.ExecuteBlock(block_caller); + } + while (!stop && ndrc_g.thread.busy && block_caller == EXEC_CALLER_OTHER); + + psxInt.Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); + //ari64_notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); +} + +static void ari64_execute_threaded_once(enum blockExecCaller block_caller) +{ + psxRegisters *regs = (void *)((char *)dynarec_local + LO_psxRegs); + void *target; + + if (likely(!ndrc_g.thread.busy)) { + ndrc_g.thread.addr = 0; + target = ndrc_get_addr_ht_param(regs->pc, ndrc_cm_no_compile); + if (target) { + clear_local_cache(); + new_dyna_start_at(dynarec_local, target); + return; + } + } + ari64_execute_threaded_slow(block_caller); +} + +static void ari64_execute_threaded() +{ + schedule_timeslice(); + while (!stop) + { + ari64_execute_threaded_once(EXEC_CALLER_OTHER); + + if ((s32)(psxRegs.cycle - next_interupt) >= 0) + schedule_timeslice(); + } +} + +static void ari64_execute_threaded_block(enum blockExecCaller caller) +{ + if (caller == EXEC_CALLER_BOOT) + stop++; + + next_interupt = psxRegs.cycle + 1; + ari64_execute_threaded_once(caller); + + if (caller == EXEC_CALLER_BOOT) + stop--; +} + +static void ari64_thread_sync(void) +{ + if (!ndrc_g.thread.lock || !ndrc_g.thread.busy) + return; + for (;;) { + slock_lock(ndrc_g.thread.lock); + slock_unlock(ndrc_g.thread.lock); + if (!ndrc_g.thread.busy) + break; + retro_sleep(0); + } +} + +static int ari64_thread_check_range(unsigned int start, unsigned int end) +{ + u32 addr = ndrc_g.thread.addr; + if (!addr) + return 0; + + addr &= 0x1fffffff; + start &= 0x1fffffff; + end &= 0x1fffffff; + if (addr >= end) + return 0; + if (addr + MAXBLOCK * 4 <= start) + return 0; + + //SysPrintf("%x hits %x-%x\n", addr, start, end); + return 1; +} + +static void ari64_compile_thread(void *unused) +{ + void *target; + u32 addr; + + slock_lock(ndrc_g.thread.lock); + while (!ndrc_g.thread.exit) + { + if (!ndrc_g.thread.busy) + scond_wait(ndrc_g.thread.cond, ndrc_g.thread.lock); + addr = ndrc_g.thread.addr; + if (!ndrc_g.thread.busy || !addr || ndrc_g.thread.exit) + continue; + + target = ndrc_get_addr_ht_param(addr, ndrc_cm_compile_in_thread); + //printf("c %08x -> %p\n", addr, target); + ndrc_g.thread.busy = 0; + } + slock_unlock(ndrc_g.thread.lock); + (void)target; +} + +static void ari64_thread_shutdown(void) +{ + psxRec.Execute = ari64_execute; + psxRec.ExecuteBlock = ari64_execute_block; + + if (ndrc_g.thread.lock) + slock_lock(ndrc_g.thread.lock); + ndrc_g.thread.exit = 1; + if (ndrc_g.thread.lock) + slock_unlock(ndrc_g.thread.lock); + if (ndrc_g.thread.cond) + scond_signal(ndrc_g.thread.cond); + if (ndrc_g.thread.handle) { + sthread_join(ndrc_g.thread.handle); + ndrc_g.thread.handle = NULL; + } + if (ndrc_g.thread.cond) { + scond_free(ndrc_g.thread.cond); + ndrc_g.thread.cond = NULL; + } + if (ndrc_g.thread.lock) { + slock_free(ndrc_g.thread.lock); + ndrc_g.thread.lock = NULL; + } + ndrc_g.thread.busy = ndrc_g.thread.addr = 0; +} + +static void ari64_thread_init(void) +{ + int enable; + + if (ndrc_g.hacks & NDHACK_THREAD_FORCE) + enable = ndrc_g.hacks & NDHACK_THREAD_FORCE_ON; + else { + u32 cpu_count = cpu_features_get_core_amount(); + enable = cpu_count > 1; + } + + if (!ndrc_g.thread.handle == !enable) + return; + + ari64_thread_shutdown(); + ndrc_g.thread.busy = ndrc_g.thread.addr = ndrc_g.thread.exit = 0; + + if (enable) { + ndrc_g.thread.lock = slock_new(); + ndrc_g.thread.cond = scond_new(); + } + if (ndrc_g.thread.lock && ndrc_g.thread.cond) + ndrc_g.thread.handle = sthread_create(ari64_compile_thread, NULL); + if (ndrc_g.thread.handle) { + psxRec.Execute = ari64_execute_threaded; + psxRec.ExecuteBlock = ari64_execute_threaded_block; + } + else { + // clean up potential incomplete init + ari64_thread_shutdown(); + } + SysPrintf("compiler thread %sabled\n", ndrc_g.thread.handle ? "en" : "dis"); +} +#else // if !NDRC_THREAD +static void ari64_thread_init(void) {} +static void ari64_thread_shutdown(void) {} +static int ari64_thread_check_range(unsigned int start, unsigned int end) { return 0; } +#endif + +static int ari64_init() +{ + static u32 scratch_buf[8*8*2] __attribute__((aligned(64))); + size_t i; + + new_dynarec_init(); + new_dyna_pcsx_mem_init(); + + for (i = 0; i < ARRAY_SIZE(gte_handlers); i++) + if (psxCP2[i] != gteNULL) + gte_handlers[i] = psxCP2[i]; + +#if defined(__arm__) && !defined(DRC_DBG) + gte_handlers[0x06] = gteNCLIP_arm; +#ifdef HAVE_ARMV5 + gte_handlers_nf[0x01] = gteRTPS_nf_arm; + gte_handlers_nf[0x30] = gteRTPT_nf_arm; +#endif +#ifdef __ARM_NEON__ + // compiler's _nf version is still a lot slower than neon + // _nf_arm RTPS is roughly the same, RTPT slower + gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon; + gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon; +#endif +#endif +#ifdef DRC_DBG + memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf)); +#endif + psxH_ptr = psxH; + zeromem_ptr = zero_mem; + scratch_buf_ptr = scratch_buf; + + ari64_thread_init(); + + return 0; } static void ari64_shutdown() { + ari64_thread_shutdown(); new_dynarec_cleanup(); new_dyna_pcsx_mem_shutdown(); } @@ -327,14 +573,10 @@ R3000Acpu psxRec = { #else // if DRC_DISABLE +struct ndrc_globals ndrc_g; // dummy unsigned int address; int pending_exception, stop; u32 next_interupt; -int new_dynarec_did_compile; -int cycle_multiplier_old; -int new_dynarec_hacks_pergame; -int new_dynarec_hacks_old; -int new_dynarec_hacks; void *psxH_ptr; void *zeromem_ptr; u32 zero_mem[0x1000/4]; @@ -353,6 +595,11 @@ void new_dyna_pcsx_mem_isolate(int enable) {} void new_dyna_pcsx_mem_shutdown(void) {} int new_dynarec_save_blocks(void *save, int size) { return 0; } void new_dynarec_load_blocks(const void *save, int size) {} + +#endif // DRC_DISABLE + +#ifndef NDRC_THREAD +static void ari64_thread_sync(void) {} #endif #ifdef DRC_DBG @@ -624,4 +871,4 @@ void do_insn_cmp(void) badregs_mask_prev = badregs_mask; } -#endif +#endif // DRC_DBG diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index ec307fc4a..1b587661f 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -3,12 +3,6 @@ extern int dynarec_local[]; -/* same as psxRegs.GPR.n.* */ -extern int hi, lo; - -/* same as psxRegs.CP0.n.* */ -extern int reg_cop0[]; - /* COP2/GTE */ enum gte_opcodes { GTE_RTPS = 0x01, @@ -35,7 +29,6 @@ enum gte_opcodes { GTE_NCCT = 0x3f, }; -extern int reg_cop2d[], reg_cop2c[]; extern void *gte_handlers[64]; extern void *gte_handlers_nf[64]; extern const char *gte_regnames[64]; diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 2bcf66549..58e057b58 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -65,8 +65,8 @@ DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) /* psxRegs */ @DRC_VAR(reg, 128) -DRC_VAR(lo, 4) -DRC_VAR(hi, 4) +@DRC_VAR(lo, 4) +@DRC_VAR(hi, 4) DRC_VAR(reg_cop0, 128) DRC_VAR(reg_cop2d, 128) DRC_VAR(reg_cop2c, 128) @@ -155,7 +155,7 @@ FUNCTION(dyna_linker): mov r5, r1 lsl r6, r6, #8 /* must not compile - that might expire the caller block */ - mov r1, #0 + mov r1, #0 /* ndrc_compile_mode */ bl ndrc_get_addr_ht_param movs r8, r0 @@ -404,12 +404,19 @@ invalidate_addr_call: .size invalidate_addr_call, .-invalidate_addr_call .align 2 -FUNCTION(new_dyna_start): +FUNCTION(new_dyna_start_at): /* ip is stored to conform EABI alignment */ + stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr} + mov fp, r0 /* dynarec_local */ + mov r0, r1 + b new_dyna_start_at_e + +FUNCTION(new_dyna_start): stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr} mov fp, r0 /* dynarec_local */ ldr r0, [fp, #LO_pcaddr] bl ndrc_get_addr_ht +new_dyna_start_at_e: ldr r1, [fp, #LO_next_interupt] ldr r10, [fp, #LO_cycle] str r1, [fp, #LO_last_count] diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index fa8a41176..9e61ea1e5 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -67,8 +67,8 @@ DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) /* psxRegs */ #DRC_VAR(reg, 128) -DRC_VAR(lo, 4) -DRC_VAR(hi, 4) +#DRC_VAR(lo, 4) +#DRC_VAR(hi, 4) DRC_VAR(reg_cop0, 128) DRC_VAR(reg_cop2d, 128) DRC_VAR(reg_cop2c, 128) @@ -184,21 +184,28 @@ FUNCTION(jump_to_new_pc): /* stack must be aligned by 16, and include space for save_regs() use */ .align 2 +FUNCTION(new_dyna_start_at): + stp x29, x30, [sp, #-SSP_ALL]! + mov rFP, x0 + b new_dyna_start_at_e + FUNCTION(new_dyna_start): stp x29, x30, [sp, #-SSP_ALL]! - ldr w1, [x0, #LO_next_interupt] - ldr w2, [x0, #LO_cycle] + mov rFP, x0 + ldr w0, [rFP, #LO_pcaddr] + bl ndrc_get_addr_ht + mov x1, x0 +new_dyna_start_at_e: + ldr w3, [rFP, #LO_next_interupt] + ldr w2, [rFP, #LO_cycle] stp x19, x20, [sp, #16*1] stp x21, x22, [sp, #16*2] stp x23, x24, [sp, #16*3] stp x25, x26, [sp, #16*4] stp x27, x28, [sp, #16*5] - mov rFP, x0 - ldr w0, [rFP, #LO_pcaddr] - str w1, [rFP, #LO_last_count] - sub rCC, w2, w1 - bl ndrc_get_addr_ht - br x0 + str w3, [rFP, #LO_last_count] + sub rCC, w2, w3 + br x1 ESIZE(new_dyna_start, .-new_dyna_start) .align 2 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index ed2f4c638..c2899e42f 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -69,6 +69,20 @@ static Jit g_jit; //#define inv_debug printf #define inv_debug(...) +// from linkage_* +extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 (CCREG) +extern int last_count; // last absolute target, often = next_interupt +extern int pcaddr; +extern int pending_exception; +extern int branch_target; + +/* same as psxRegs.CP0.n.* */ +extern int reg_cop0[]; +extern int reg_cop2d[], reg_cop2c[]; + +extern uintptr_t ram_offset; +extern uintptr_t mini_ht[32][2]; + #ifdef __i386__ #include "assem_x86.h" #endif @@ -83,7 +97,6 @@ static Jit g_jit; #endif #define RAM_SIZE 0x200000 -#define MAXBLOCK 2048 #define MAX_OUTPUT_BLOCK_SIZE 262144 #define EXPIRITY_OFFSET (MAX_OUTPUT_BLOCK_SIZE * 2) #define PAGE_COUNT 1024 @@ -100,6 +113,8 @@ static Jit g_jit; #define TC_REDUCE_BYTES 0 #endif +struct ndrc_globals ndrc_g; + struct ndrc_tramp { struct tramp_insns ops[2048 / sizeof(struct tramp_insns)]; @@ -269,7 +284,7 @@ static struct compile_info static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs static uint64_t gte_rt[MAXBLOCK]; static uint64_t gte_unneeded[MAXBLOCK]; - static u_int smrv[32]; // speculated MIPS register values + unsigned int ndrc_smrv_regs[32]; // speculated MIPS register values static u_int smrv_strong; // mask or regs that are likely to have correct values static u_int smrv_weak; // same, but somewhat less likely static u_int smrv_strong_next; // same, but after current insn executes @@ -319,20 +334,7 @@ static struct compile_info #define stat_clear(s) #endif - int new_dynarec_hacks; - int new_dynarec_hacks_pergame; - int new_dynarec_hacks_old; - int new_dynarec_did_compile; - - #define HACK_ENABLED(x) ((new_dynarec_hacks | new_dynarec_hacks_pergame) & (x)) - - extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 (CCREG) - extern int last_count; // last absolute target, often = next_interupt - extern int pcaddr; - extern int pending_exception; - extern int branch_target; - extern uintptr_t ram_offset; - extern uintptr_t mini_ht[32][2]; + #define HACK_ENABLED(x) ((ndrc_g.hacks | ndrc_g.hacks_pergame) & (x)) /* registers that may be allocated */ /* 1-31 gpr */ @@ -403,7 +405,6 @@ void jump_to_new_pc(); void call_gteStall(); void new_dyna_leave(); -void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile); void *ndrc_get_addr_ht(u_int vaddr); void ndrc_add_jump_out(u_int vaddr, void *src); void ndrc_write_invalidate_one(u_int addr); @@ -494,6 +495,7 @@ static void end_tcache_write(void *start, void *end) sceKernelSyncVMDomain(sceBlock, start, len); #elif defined(_3DS) ctr_flush_invalidate_cache(); + ndrc_g.thread.cache_dirty = 1; #elif defined(HAVE_LIBNX) if (g_jit.type == JitType_CodeMemory) { armDCacheClean(start, len); @@ -502,8 +504,8 @@ static void end_tcache_write(void *start, void *end) __asm__ volatile("isb" ::: "memory"); } #elif defined(__aarch64__) - // as of 2021, __clear_cache() is still broken on arm64 - // so here is a custom one :( + // __clear_cache() doesn't handle differing cacheline sizes on big.LITTLE and + // leaves it to the kernel to virtualize ctr_el0, which some old kernels don't do clear_cache_arm64(start, end); #else __clear_cache(start, end); @@ -597,7 +599,6 @@ static void do_clear_cache(void) #define NO_CYCLE_PENALTY_THR 12 -int cycle_multiplier_old; static int cycle_multiplier_active; static int CLOCK_ADJUST(int x) @@ -726,7 +727,7 @@ static int doesnt_expire_soon(u_char *tcaddr) return diff > EXPIRITY_OFFSET + MAX_OUTPUT_BLOCK_SIZE; } -static unused void check_for_block_changes(u_int start, u_int end) +static attr_unused void check_for_block_changes(u_int start, u_int end) { u_int start_page = get_page_prev(start); u_int end_page = get_page(end - 1); @@ -805,7 +806,7 @@ static noinline u_int generate_exception(u_int pc) // Get address from virtual address // This is called from the recompiled JR/JALR instructions -static void noinline *get_addr(u_int vaddr, int can_compile) +static void noinline *get_addr(const u_int vaddr, enum ndrc_compile_mode compile_mode) { u_int start_page = get_page_prev(vaddr); u_int i, page, end_page = get_page(vaddr); @@ -833,18 +834,29 @@ static void noinline *get_addr(u_int vaddr, int can_compile) if (found_clean) return found_clean; - if (!can_compile) + if (compile_mode == ndrc_cm_no_compile) return NULL; +#ifdef NDRC_THREAD + if (ndrc_g.thread.handle && compile_mode == ndrc_cm_compile_live) { + psxRegs.pc = vaddr; + return new_dyna_leave; + } + if (!ndrc_g.thread.handle) +#endif + memcpy(ndrc_smrv_regs, psxRegs.GPR.r, sizeof(ndrc_smrv_regs)); int r = new_recompile_block(vaddr); if (likely(r == 0)) return ndrc_get_addr_ht(vaddr); - return ndrc_get_addr_ht(generate_exception(vaddr)); + if (compile_mode == ndrc_cm_compile_live) + return ndrc_get_addr_ht(generate_exception(vaddr)); + + return NULL; } // Look up address in hash table first -void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile) +void *ndrc_get_addr_ht_param(unsigned int vaddr, enum ndrc_compile_mode compile_mode) { //check_for_block_changes(vaddr, vaddr + MAXBLOCK); const struct ht_entry *ht_bin = hash_table_get(vaddr); @@ -852,12 +864,14 @@ void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile) stat_inc(stat_ht_lookups); if (ht_bin->vaddr[0] == vaddr_a) return ht_bin->tcaddr[0]; if (ht_bin->vaddr[1] == vaddr_a) return ht_bin->tcaddr[1]; - return get_addr(vaddr, can_compile); + return get_addr(vaddr, compile_mode); } +// "usual" addr lookup for indirect branches, etc +// to be used by currently running code only void *ndrc_get_addr_ht(u_int vaddr) { - return ndrc_get_addr_ht_param(vaddr, 1); + return ndrc_get_addr_ht_param(vaddr, ndrc_cm_compile_live); } static void clear_all_regs(signed char regmap[]) @@ -1239,6 +1253,7 @@ static const struct { FUNCNAME(cc_interrupt), FUNCNAME(gen_interupt), FUNCNAME(ndrc_get_addr_ht), + FUNCNAME(ndrc_get_addr_ht_param), FUNCNAME(jump_handler_read8), FUNCNAME(jump_handler_read16), FUNCNAME(jump_handler_read32), @@ -1615,6 +1630,24 @@ void new_dynarec_invalidate_range(unsigned int start, unsigned int end) invalidate_range(start, end, NULL, NULL); } +// check if the range may need invalidation (must be thread-safe) +int new_dynarec_quick_check_range(unsigned int start, unsigned int end) +{ + u_int start_page = get_page_prev(start); + u_int end_page = get_page(end - 1); + u_int page; + + if (inv_code_start <= start && end <= inv_code_end) + return 0; + for (page = start_page; page <= end_page; page++) { + if (blocks[page]) { + //SysPrintf("quick hit %x-%x\n", start, end); + return 1; + } + } + return 0; +} + static void ndrc_write_invalidate_many(u_int start, u_int end) { // this check is done by the caller @@ -2845,8 +2878,8 @@ static void *emit_fastpath_cmp_jump(int i, const struct regstat *i_regs, assert(addr >= 0); *offset_reg = -1; if(((smrv_strong|smrv_weak)>>mr)&1) { - type=get_ptr_mem_type(smrv[mr]); - //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type); + type=get_ptr_mem_type(ndrc_smrv_regs[mr]); + //printf("set %08x @%08x r%d %d\n", ndrc_smrv_regs[mr], start+i*4, mr, type); } else { // use the mirror we are running on @@ -4209,28 +4242,27 @@ static void intcall_assemble(int i, const struct regstat *i_regs, int ccadj_) static void speculate_mov(int rs,int rt) { - if(rt!=0) { - smrv_strong_next|=1<=0) { if(get_final_value(hr,i,&value)) - smrv[dops[i].rt1]=value; - else smrv[dops[i].rt1]=constmap[i][hr]; + ndrc_smrv_regs[dops[i].rt1]=value; + else ndrc_smrv_regs[dops[i].rt1]=constmap[i][hr]; smrv_strong_next|=1<>24)==0xa0)) { + if(start<0x2000&&(dops[i].rt1==26||(ndrc_smrv_regs[dops[i].rt1]>>24)==0xa0)) { // special case for BIOS - smrv[dops[i].rt1]=0xa0000000; + ndrc_smrv_regs[dops[i].rt1]=0xa0000000; smrv_strong_next|=1<>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst); #endif } @@ -6251,14 +6283,14 @@ void new_dynarec_clear_full(void) stat_clear(stat_blocks); stat_clear(stat_links); - if (cycle_multiplier_old != Config.cycle_multiplier - || new_dynarec_hacks_old != new_dynarec_hacks) + if (ndrc_g.cycle_multiplier_old != Config.cycle_multiplier + || ndrc_g.hacks_old != ndrc_g.hacks) { SysPrintf("ndrc config: mul=%d, ha=%x, pex=%d\n", - get_cycle_multiplier(), new_dynarec_hacks, Config.PreciseExceptions); + get_cycle_multiplier(), ndrc_g.hacks, Config.PreciseExceptions); } - cycle_multiplier_old = Config.cycle_multiplier; - new_dynarec_hacks_old = new_dynarec_hacks; + ndrc_g.cycle_multiplier_old = Config.cycle_multiplier; + ndrc_g.hacks_old = ndrc_g.hacks; } static int pgsize(void) @@ -6516,7 +6548,7 @@ void new_dynarec_load_blocks(const void *save, int size) psxRegs.GPR.r[i] = 0x1f800000; } - ndrc_get_addr_ht(sblocks[b].addr); + ndrc_get_addr_ht_param(sblocks[b].addr, ndrc_cm_compile_offline); for (f = sblocks[b].regflags, i = 0; f; f >>= 1, i++) { if (f & 1) @@ -8368,7 +8400,6 @@ static noinline void pass5a_preallocate1(void) static noinline void pass5b_preallocate2(void) { int i, hr, limit = min(slen - 1, MAXBLOCK - 2); - assert(slen < MAXBLOCK - 1); for (i = 0; i < limit; i++) { if (!i || !dops[i-1].is_jump) @@ -8987,7 +9018,7 @@ static int new_recompile_block(u_int addr) } start = addr; - new_dynarec_did_compile=1; + ndrc_g.did_compile = 1; if (Config.HLE && start == 0x80001000) // hlecall { void *beginning = start_block(); diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index d18ff6309..8c168084b 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -1,11 +1,9 @@ #define NEW_DYNAREC 1 -extern int pcaddr; +#define MAXBLOCK 2048 // in mips instructions + extern int pending_exception; extern int stop; -extern int new_dynarec_did_compile; - -extern int cycle_multiplier_old; #define NDHACK_NO_SMC_CHECK (1<<0) #define NDHACK_GTE_UNNEEDED (1<<1) @@ -13,17 +11,48 @@ extern int cycle_multiplier_old; #define NDHACK_OVERRIDE_CYCLE_M (1<<3) #define NDHACK_NO_STALLS (1<<4) #define NDHACK_NO_COMPAT_HACKS (1<<5) -extern int new_dynarec_hacks; -extern int new_dynarec_hacks_pergame; -extern int new_dynarec_hacks_old; +#define NDHACK_THREAD_FORCE (1<<6) +#define NDHACK_THREAD_FORCE_ON (1<<7) + +struct ndrc_globals +{ + int hacks; + int hacks_pergame; + int hacks_old; + int did_compile; + int cycle_multiplier_old; + struct { + void *handle; + void *lock; + void *cond; + unsigned int addr; + int busy; + int exit; + int cache_dirty; // 3ds only + } thread; +}; +extern struct ndrc_globals ndrc_g; void new_dynarec_init(void); void new_dynarec_cleanup(void); void new_dynarec_clear_full(void); -void new_dyna_start(void *context); int new_dynarec_save_blocks(void *save, int size); void new_dynarec_load_blocks(const void *save, int size); void new_dynarec_print_stats(void); +int new_dynarec_quick_check_range(unsigned int start, unsigned int end); void new_dynarec_invalidate_range(unsigned int start, unsigned int end); void new_dynarec_invalidate_all_pages(void); + +void new_dyna_start(void *context); +void new_dyna_start_at(void *context, void *compiled_code); + +enum ndrc_compile_mode { + ndrc_cm_no_compile = 0, + ndrc_cm_compile_live, // from executing code, vaddr is the current pc + ndrc_cm_compile_offline, + ndrc_cm_compile_in_thread, +}; +void *ndrc_get_addr_ht_param(unsigned int vaddr, enum ndrc_compile_mode compile_mode); + +extern unsigned int ndrc_smrv_regs[32]; diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 306085351..fadbf050b 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -1228,6 +1228,15 @@ void intExecuteBlock(enum blockExecCaller caller) { execI_(memRLUT, regs_); } +static void intExecuteBlockBp(enum blockExecCaller caller) { + psxRegisters *regs_ = &psxRegs; + u8 **memRLUT = psxMemRLUT; + + branchSeen = 0; + while (!branchSeen) + execIbp(memRLUT, regs_); +} + static void intClear(u32 Addr, u32 Size) { } @@ -1316,6 +1325,7 @@ void intApplyConfig() { psxSPC[0x08] = psxJRe; psxSPC[0x09] = psxJALRe; psxInt.Execute = intExecuteBp; + psxInt.ExecuteBlock = intExecuteBlockBp; } else { psxBSC[0x20] = psxLB; psxBSC[0x21] = psxLH; @@ -1333,6 +1343,7 @@ void intApplyConfig() { psxSPC[0x08] = psxJR; psxSPC[0x09] = psxJALR; psxInt.Execute = intExecute; + psxInt.ExecuteBlock = intExecuteBlock; } // the dynarec may occasionally call the interpreter, in such a case the diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 03aeee196..93a53ced0 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -49,6 +49,7 @@ enum R3000Anote { enum blockExecCaller { EXEC_CALLER_BOOT, EXEC_CALLER_HLE, + EXEC_CALLER_OTHER, }; typedef struct { @@ -213,7 +214,8 @@ typedef struct { extern psxRegisters psxRegs; /* new_dynarec stuff */ -void new_dyna_freeze(void *f, int mode); +void ndrc_freeze(void *f, int mode); +void ndrc_clear_full(void); int psxInit(); void psxReset(); From fad9f7555c6a156928510fbed14a3221d23a0874 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 13 Oct 2024 21:37:10 +0300 Subject: [PATCH 570/597] standalone: enable the new threaded stuff --- Makefile | 25 ++++++++++++++++++------- configure | 34 +++++++++++++++++++--------------- frontend/main.c | 6 ++++++ frontend/menu.c | 9 +++++++++ libpcsxcore/cdrom-async.c | 5 +++++ libpcsxcore/cdrom-async.h | 1 + 6 files changed, 58 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index 0d8991620..f30816fa9 100644 --- a/Makefile +++ b/Makefile @@ -4,13 +4,20 @@ TARGET ?= pcsx CFLAGS += -Wall -ggdb -Iinclude -ffast-math ifndef DEBUG -CFLAGS += -O2 -DNDEBUG +CFLAGS += -O2 +ifndef ASSERTS +CFLAGS += -DNDEBUG +endif endif ifeq ($(DEBUG_ASAN), 1) CFLAGS += -fsanitize=address LDFLAGS += -fsanitize=address #LDFLAGS += -static-libasan endif +ifneq ($(NO_FSECTIONS), 1) +CFLAGS += -ffunction-sections -fdata-sections +LDFLAGS += -Wl,--gc-sections +endif CFLAGS += -DP_HAVE_MMAP=$(if $(NO_MMAP),0,1) \ -DP_HAVE_PTHREAD=$(if $(NO_PTHREAD),0,1) \ -DP_HAVE_POSIX_MEMALIGN=$(if $(NO_POSIX_MEMALIGN),0,1) \ @@ -69,6 +76,13 @@ OBJS += libpcsxcore/gte_neon.o endif libpcsxcore/psxbios.o: CFLAGS += -Wno-nonnull +ifeq "$(USE_ASYNC_CDROM)" "1" +libpcsxcore/cdrom-async.o: CFLAGS += -DUSE_ASYNC_CDROM +frontend/libretro.o: CFLAGS += -DUSE_ASYNC_CDROM +frontend/menu.o: CFLAGS += -DUSE_ASYNC_CDROM +USE_RTHREADS := 1 +endif + # dynarec ifeq "$(DYNAREC)" "lightrec" CFLAGS += -Ideps/lightning/include -Ideps/lightrec -Iinclude/lightning -Iinclude/lightrec \ @@ -297,8 +311,7 @@ frontend/main.o frontend/menu.o: CFLAGS += -include frontend/pandora/ui_feat.h frontend/libpicofe/linux/plat.o: CFLAGS += -DPANDORA USE_PLUGIN_LIB = 1 USE_FRONTEND = 1 -CFLAGS += -gdwarf-3 -ffunction-sections -fdata-sections -LDFLAGS += -Wl,--gc-sections +CFLAGS += -gdwarf-3 endif ifeq "$(PLATFORM)" "caanoo" OBJS += frontend/libpicofe/gp2x/in_gp2x.o frontend/warm/warm.o @@ -332,10 +345,6 @@ OBJS += deps/libretro-common/memmap/memalign.o OBJS += deps/libretro-common/vfs/vfs_implementation_cdrom.o CFLAGS += -DHAVE_CDROM endif -ifeq "$(USE_ASYNC_CDROM)" "1" -CFLAGS += -DUSE_ASYNC_CDROM -USE_RTHREADS := 1 -endif ifeq "$(USE_LIBRETRO_VFS)" "1" OBJS += deps/libretro-common/compat/compat_posix_string.o OBJS += deps/libretro-common/compat/fopen_utf8.o @@ -398,6 +407,8 @@ frontend/main.o: CFLAGS += -DBUILTIN_GPU=$(BUILTIN_GPU) frontend/menu.o frontend/main.o: frontend/revision.h frontend/plat_sdl.o frontend/libretro.o: frontend/revision.h +CFLAGS += $(CFLAGS_LAST) + frontend/libpicofe/%.c: @echo "libpicofe module is missing, please run:" @echo "git submodule init && git submodule update" diff --git a/configure b/configure index be2c4ff40..0d315b687 100755 --- a/configure +++ b/configure @@ -24,25 +24,25 @@ compile_binary() check_define() { - $CC -E -dD $CFLAGS include/arm_features.h | grep -q "$1" || return 1 + $CC -E -dD $CFLAGS include/arm_features.h | grep -v '#undef' | grep -q "$1" || return 1 return 0 } check_define_val() { - $CC -E -dD $CFLAGS include/arm_features.h | grep "$1" | awk '{print $3}' + $CC -E -dD $CFLAGS include/arm_features.h | grep -v '#undef' | grep "$1" | awk '{print $3}' return 0 } # setting options to "yes" or "no" will make that choice default, # "" means "autodetect". -platform_list="generic pandora maemo caanoo libretro" +platform_list="generic pandora maemo caanoo" platform="generic" builtin_gpu_list="neon peops unai unai_old" dynarec_list="ari64 lightrec none" builtin_gpu="" -sound_driver_list="oss alsa pulseaudio sdl libretro" +sound_driver_list="oss alsa pulseaudio sdl" sound_drivers="" plugins="plugins/spunull/spunull.so \ plugins/dfxvideo/gpu_peops.so plugins/gpu_unai_old/gpu_unai_old.so plugins/gpu_unai/gpu_unai.so" @@ -56,6 +56,7 @@ have_tslib="" have_gles="" have_c64x_dsp="" dynarec="" +multithreading="yes" need_sdl="no" need_xlib="no" need_libpicofe="yes" @@ -109,21 +110,20 @@ set_platform() optimize_cortexa8="yes" have_arm_neon="yes" need_xlib="yes" + multithreading="no" ;; maemo) drc_cache_base="yes" optimize_cortexa8="yes" have_arm_neon="yes" + multithreading="no" ;; caanoo) sound_drivers="oss" drc_cache_base="yes" optimize_arm926ej="yes" need_warm="yes" - ;; - libretro) - sound_drivers="libretro" - need_libpicofe="no" + multithreading="no" ;; *) fail "unsupported platform: $platform" @@ -146,6 +146,10 @@ for opt do ;; --disable-neon) have_arm_neon="no" ;; + --enable-threads) multithreading="yes" + ;; + --disable-threads) multithreading="no" + ;; --dynarec=*) dynarec="$optarg" ;; --disable-dynarec) dynarec="no" @@ -166,6 +170,8 @@ if [ "$show_help" = "yes" ]; then echo " available: $sound_driver_list" echo " --enable-neon" echo " --disable-neon enable/disable ARM NEON optimizations [guessed]" + echo " --enable-threads" + echo " --disable-threads enable/disable multithreaded features [guessed]" echo " --dynarec=NAME select dynamic recompiler [guessed]" echo " available: $dynarec_list" echo "influential environment variables:" @@ -340,10 +346,6 @@ generic) maemo) CFLAGS="$CFLAGS -DMAEMO -DMAEMO_CHANGES" ;; -libretro) - CFLAGS="$CFLAGS -fPIC" - MAIN_LDFLAGS="$MAIN_LDFLAGS -shared -Wl,--no-undefined" - ;; esac # header/library presence tests @@ -570,6 +572,7 @@ echo "tslib support $have_tslib" if [ "$platform" = "generic" ]; then echo "OpenGL ES output $have_gles" fi +echo "multithreading $multithreading" echo "# Automatically generated by configure" > $config_mak printf "# Configured with:" >> $config_mak @@ -587,9 +590,6 @@ echo "MAIN_LDLIBS += $MAIN_LDLIBS" >> $config_mak echo "PLUGIN_CFLAGS += $PLUGIN_CFLAGS" >> $config_mak echo >> $config_mak -if [ "$platform" = "libretro" ]; then - echo "TARGET = libretro.so" >> $config_mak -fi echo "ARCH = $ARCH" >> $config_mak echo "PLATFORM = $platform" >> $config_mak echo "BUILTIN_GPU = $builtin_gpu" >> $config_mak @@ -616,6 +616,10 @@ fi if [ "$have_c64x_dsp" = "yes" ]; then echo "HAVE_C64_TOOLS = 1" >> $config_mak fi +if [ "$multithreading" = "yes" ]; then + echo "USE_ASYNC_CDROM = 1" >> $config_mak + echo "NDRC_THREAD = 1" >> $config_mak +fi # use pandora's skin (for now) test -e skin || ln -s frontend/pandora/skin skin diff --git a/frontend/main.c b/frontend/main.c index 750e5661c..0bb4a6df3 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -14,6 +14,9 @@ #if !defined(_WIN32) && !defined(NO_DYLIB) #include #endif +#ifdef HAVE_CPU_FEATURES +#include "features/features_cpu.h" +#endif #include "main.h" #include "plugin.h" @@ -510,6 +513,9 @@ int emu_core_preinit(void) int emu_core_init(void) { SysPrintf("Starting PCSX-ReARMed " REV "%s\n", get_build_info()); +#ifdef HAVE_CPU_FEATURES + SysPrintf("%d cpu core(s) detected\n", cpu_features_get_core_amount()); +#endif #ifndef NO_FRONTEND check_profile(); diff --git a/frontend/menu.c b/frontend/menu.c index 49ffed9a6..8549279ac 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -104,6 +104,7 @@ static char last_selected_fname[MAXPATHLEN]; static int config_save_counter, region, in_type_sel1, in_type_sel2; static int psx_clock; static int memcard1_sel = -1, memcard2_sel = -1; +static int cd_buf_count; extern int g_autostateld_opt; static int menu_iopts[8]; int g_opts, g_scaler, g_gamma = 100; @@ -436,6 +437,7 @@ static const struct { CE_INTVAL(memcard1_sel), CE_INTVAL(memcard2_sel), CE_INTVAL(g_autostateld_opt), + CE_INTVAL(cd_buf_count), CE_INTVAL_N("adev0_is_nublike", in_adev_is_nublike[0]), CE_INTVAL_N("adev1_is_nublike", in_adev_is_nublike[1]), CE_INTVAL_V(frameskip, 4), @@ -530,6 +532,8 @@ static int menu_write_config(int is_game) return -1; } + cd_buf_count = cdra_get_buf_count(); + for (i = 0; i < ARRAY_SIZE(config_data); i++) { fprintf(f, "%s = ", config_data[i].name); switch (config_data[i].len) { @@ -691,6 +695,7 @@ int menu_load_config(int is_game) } keys_load_all(cfg); + cdra_set_buf_count(cd_buf_count); ret = 0; fail_read: free(cfg); @@ -1687,6 +1692,9 @@ static menu_entry e_menu_adv_options[] = mee_enum_h ("GPU l-list slow walking",0, menu_iopts[AMO_GPUL], men_autooo, h_cfg_gpul), mee_enum_h ("Fractional framerate", 0, menu_iopts[AMO_FFPS], men_autooo, h_cfg_ffps), mee_onoff_h ("Turbo CD-ROM ", 0, menu_iopts[AMO_TCD], 1, h_cfg_tcd), +#ifdef USE_ASYNC_CDROM + mee_range ("CD-ROM read-ahead", 0, cd_buf_count, 0, 1024), +#endif #if !defined(DRC_DISABLE) || defined(LIGHTREC) mee_onoff_h ("Disable dynarec (slow!)",0, menu_iopts[AMO_CPU], 1, h_cfg_nodrc), #endif @@ -1721,6 +1729,7 @@ static int menu_loop_adv_options(int id, int keys) *opts[i].opt = *opts[i].mopt; Config.GpuListWalking = menu_iopts[AMO_GPUL] - 1; Config.FractionalFramerate = menu_iopts[AMO_FFPS] - 1; + cdra_set_buf_count(cd_buf_count); return 0; } diff --git a/libpcsxcore/cdrom-async.c b/libpcsxcore/cdrom-async.c index 2cb304733..4da0f3744 100644 --- a/libpcsxcore/cdrom-async.c +++ b/libpcsxcore/cdrom-async.c @@ -487,6 +487,10 @@ void cdra_set_buf_count(int newcount) cdra_start_thread(); } +int cdra_get_buf_count(void) +{ + return acdrom.buf_cnt; +} #else // phys. CD-ROM without a cache is unusable so not implemented @@ -561,6 +565,7 @@ int cdra_is_physical(void) { return 0; } int cdra_check_eject(int *inserted) { return 0; } void cdra_stop_thread(void) {} void cdra_set_buf_count(int newcount) {} +int cdra_get_buf_count(void) { return 0; } #endif diff --git a/libpcsxcore/cdrom-async.h b/libpcsxcore/cdrom-async.h index 02fe6b717..22747a85b 100644 --- a/libpcsxcore/cdrom-async.h +++ b/libpcsxcore/cdrom-async.h @@ -21,6 +21,7 @@ int cdra_is_physical(void); int cdra_check_eject(int *inserted); void cdra_stop_thread(void); void cdra_set_buf_count(int count); +int cdra_get_buf_count(void); void *cdra_getBuffer(void); From d135f6d2ee8f4d19c4bc5ed13b86496ea293528d Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 15 Oct 2024 01:49:22 +0300 Subject: [PATCH 571/597] 3ds: customize thread creation --- Makefile | 2 +- frontend/3ds/3ds_utils.h | 3 +- frontend/3ds/pthread.h | 3 + frontend/libretro-rthreads.c | 96 +++++++++++++++++++++++++++++--- frontend/libretro-rthreads.h | 18 +++++- frontend/libretro.c | 12 ++++ frontend/libretro_core_options.h | 24 ++++++++ frontend/main.c | 8 +-- libpcsxcore/cdrom-async.c | 3 +- libpcsxcore/new_dynarec/emu_if.c | 2 +- 10 files changed, 151 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index f30816fa9..69b7ab3d3 100644 --- a/Makefile +++ b/Makefile @@ -369,7 +369,7 @@ endif # $(PLATFORM) == "libretro" ifeq "$(USE_RTHREADS)" "1" OBJS += frontend/libretro-rthreads.o OBJS += deps/libretro-common/features/features_cpu.o -frontend/main.o: CFLAGS += -DHAVE_CPU_FEATURES +frontend/main.o: CFLAGS += -DHAVE_RTHREADS INC_LIBRETRO_COMMON := 1 endif ifeq "$(INC_LIBRETRO_COMMON)" "1" diff --git a/frontend/3ds/3ds_utils.h b/frontend/3ds/3ds_utils.h index 75ab63b91..f7c8ddc24 100644 --- a/frontend/3ds/3ds_utils.h +++ b/frontend/3ds/3ds_utils.h @@ -3,7 +3,8 @@ #include #include -#include <3ds.h> +#include <3ds/os.h> +#include <3ds/svc.h> #ifdef OS_HEAP_AREA_BEGIN // defined in libctru 2.0+ #define USE_CTRULIB_2 1 diff --git a/frontend/3ds/pthread.h b/frontend/3ds/pthread.h index 76f1681c9..63afa32ce 100644 --- a/frontend/3ds/pthread.h +++ b/frontend/3ds/pthread.h @@ -23,8 +23,11 @@ #ifndef _CTR_PTHREAD_WRAP_CTR_ #define _CTR_PTHREAD_WRAP_CTR_ +#include <3ds/thread.h> +#include <3ds/services/apt.h> #include "3ds_utils.h" +#include #include #include diff --git a/frontend/libretro-rthreads.c b/frontend/libretro-rthreads.c index 72784d4d2..245075087 100644 --- a/frontend/libretro-rthreads.c +++ b/frontend/libretro-rthreads.c @@ -1,20 +1,98 @@ -// temporary(?) workaround: -// https://github.com/libretro/libretro-common/pull/216 +#ifndef _GNU_SOURCE +#define _GNU_SOURCE // *_np +#endif #ifdef _3DS #include <3ds/svc.h> +#include <3ds/os.h> #include <3ds/services/apt.h> #include #endif #include "../deps/libretro-common/rthreads/rthreads.c" +#include "features/features_cpu.h" +#include "libretro-rthreads.h" + +// pcsxr "extensions" +extern void SysPrintf(const char *fmt, ...); + +#ifdef _3DS +static bool is_new_3ds; +#endif -// an "extension" -int sthread_set_name(sthread_t *thread, const char *name) +void pcsxr_sthread_init(void) { -#if defined(__GLIBC__) || defined(__MACH__) || \ - (defined(__ANDROID_API__) && __ANDROID_API__ >= 26) - if (thread) - return pthread_setname_np(thread->id, name); + SysPrintf("%d cpu core(s) detected\n", cpu_features_get_core_amount()); +#ifdef _3DS + int64_t version = 0; + APT_CheckNew3DS(&is_new_3ds); + svcGetSystemInfo(&version, 0x10000, 0); + + APT_SetAppCpuTimeLimit(35); + u32 percent = -1; + APT_GetAppCpuTimeLimit(&percent); + + SysPrintf("%s3ds detected, v%d.%d, AppCpuTimeLimit=%ld\n", + is_new_3ds ? "new" : "old", (int)GET_VERSION_MAJOR(version), + (int)GET_VERSION_MINOR(version), percent); +#endif +} + +sthread_t *pcsxr_sthread_create(void (*thread_func)(void *), + enum pcsxr_thread_type type) +{ + sthread_t *h = NULL; +#ifdef _3DS + Thread ctr_thread; + int core_id = 0; + s32 prio = 0x30; + + h = calloc(1, sizeof(*h)); + if (!h) + return NULL; + + svcGetThreadPriority(&prio, CUR_THREAD_HANDLE); + + switch (type) { + case PCSXRT_CDR: + case PCSXRT_SPU: + core_id = 1; + break; + case PCSXRT_DRC: + case PCSXRT_GPU: + core_id = is_new_3ds ? 2 : 1; + break; + case PCSXRT_COUNT: + break; + } + + ctr_thread = threadCreate(thread_func, NULL, STACKSIZE, prio, core_id, false); + if (!ctr_thread) { + if (core_id == 1) { + SysPrintf("threadCreate pcsxt %d core %d failed\n", + type, core_id); + core_id = is_new_3ds ? 2 : -1; + ctr_thread = threadCreate(thread_func, NULL, STACKSIZE, + prio, core_id, false); + } + } + if (!ctr_thread) { + SysPrintf("threadCreate pcsxt %d core %d failed\n", type, core_id); + free(h); + return NULL; + } + h->id = (pthread_t)ctr_thread; +#else + h = sthread_create(thread_func, NULL); + #if defined(__GLIBC__) || defined(__MACH__) || \ + (defined(__ANDROID_API__) && __ANDROID_API__ >= 26) + if (h && (unsigned int)type < (unsigned int)PCSXRT_COUNT) + { + const char * const pcsxr_tnames[PCSXRT_COUNT] = { + "pcsxr-cdrom", "pcsxr-drc", "pcsxr-gpu", "pcsxr-spu" + }; + pthread_setname_np(h->id, pcsxr_tnames[type]); + } + #endif #endif - return -1; + return h; } diff --git a/frontend/libretro-rthreads.h b/frontend/libretro-rthreads.h index 851d448eb..6a2d004b2 100644 --- a/frontend/libretro-rthreads.h +++ b/frontend/libretro-rthreads.h @@ -1,3 +1,19 @@ +#ifndef __LIBRETRO_PCSXR_RTHREADS_H__ +#define __LIBRETRO_PCSXR_RTHREADS_H__ + #include "rthreads/rthreads.h" -int sthread_set_name(sthread_t *thread, const char *name); +enum pcsxr_thread_type +{ + PCSXRT_CDR = 0, + PCSXRT_DRC, + PCSXRT_GPU, + PCSXRT_SPU, + PCSXRT_COUNT // must be last +}; + +void pcsxr_sthread_init(void); +sthread_t *pcsxr_sthread_create(void (*thread_func)(void*), + enum pcsxr_thread_type type); + +#endif // __LIBRETRO_PCSXR_RTHREADS_H__ diff --git a/frontend/libretro.c b/frontend/libretro.c index f7eb64cdb..ce0e7427a 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -50,6 +50,9 @@ #endif #ifdef _3DS +#include <3ds/svc.h> +#include <3ds/services/apt.h> +#include <3ds/allocator/linear.h> #include "3ds/3ds_utils.h" #endif @@ -2778,6 +2781,15 @@ static void update_variables(bool in_flight) mouse_sensitivity = atof(var.value); } +#ifdef _3DS + var.value = NULL; + var.key = "pcsx_rearmed_3ds_appcputime"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + APT_SetAppCpuTimeLimit(strtol(var.value, NULL, 10)); + } +#endif + if (found_bios) { var.value = NULL; diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index a4ead77ec..8910ad230 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -1612,6 +1612,30 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled", }, +#ifdef _3DS +#define V(x) { #x, NULL } + { + "pcsx_rearmed_3ds_appcputime", + "3DS AppCpuTimeLimit", + NULL, + "% of syscore (core #1) CPU time allocated to the emulator", + NULL, + "speed_hack", + { + V( 5), V(10), + V(15), V(20), + V(25), V(30), + V(35), V(40), + V(45), V(50), + V(55), V(60), + V(65), V(70), + V(75), V(80), + { NULL, NULL}, + }, + "35", + }, +#undef V +#endif // _3DS { "pcsx_rearmed_cd_turbo", "Turbo CD", diff --git a/frontend/main.c b/frontend/main.c index 0bb4a6df3..949ba65b6 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -14,8 +14,8 @@ #if !defined(_WIN32) && !defined(NO_DYLIB) #include #endif -#ifdef HAVE_CPU_FEATURES -#include "features/features_cpu.h" +#ifdef HAVE_RTHREADS +#include "../frontend/libretro-rthreads.h" #endif #include "main.h" @@ -513,10 +513,8 @@ int emu_core_preinit(void) int emu_core_init(void) { SysPrintf("Starting PCSX-ReARMed " REV "%s\n", get_build_info()); -#ifdef HAVE_CPU_FEATURES - SysPrintf("%d cpu core(s) detected\n", cpu_features_get_core_amount()); -#endif + pcsxr_sthread_init(); #ifndef NO_FRONTEND check_profile(); check_memcards(); diff --git a/libpcsxcore/cdrom-async.c b/libpcsxcore/cdrom-async.c index 4da0f3744..81b2e12ad 100644 --- a/libpcsxcore/cdrom-async.c +++ b/libpcsxcore/cdrom-async.c @@ -268,12 +268,11 @@ static void cdra_start_thread(void) if (acdrom.buf_cache && acdrom.buf_lock && acdrom.read_lock && acdrom.cond) { int i; - acdrom.thread = sthread_create(cdra_prefetch_thread, NULL); + acdrom.thread = pcsxr_sthread_create(cdra_prefetch_thread, PCSXRT_CDR); for (i = 0; i < acdrom.buf_cnt; i++) acdrom.buf_cache[i].lba = ~0; } if (acdrom.thread) { - sthread_set_name(acdrom.thread, "pcsxr-cdrom"); SysPrintf("cdrom precache: %d buffers%s\n", acdrom.buf_cnt, acdrom.have_subchannel ? " +sub" : ""); } diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index e49580183..57fe663d0 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -499,7 +499,7 @@ static void ari64_thread_init(void) ndrc_g.thread.cond = scond_new(); } if (ndrc_g.thread.lock && ndrc_g.thread.cond) - ndrc_g.thread.handle = sthread_create(ari64_compile_thread, NULL); + ndrc_g.thread.handle = pcsxr_sthread_create(ari64_compile_thread, PCSXRT_DRC); if (ndrc_g.thread.handle) { psxRec.Execute = ari64_execute_threaded; psxRec.ExecuteBlock = ari64_execute_threaded_block; From 8a1f23ef24f2cb2daba6132e882f22d0804f9130 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 15 Oct 2024 03:10:44 +0300 Subject: [PATCH 572/597] libretro: show the actual fps, some extra stats --- frontend/libretro.c | 63 ++++++++++++++++++--------- frontend/libretro_core_options.h | 20 +-------- frontend/main.c | 1 + libpcsxcore/cdrom-async.c | 20 +++++++++ libpcsxcore/cdrom-async.h | 1 + libpcsxcore/new_dynarec/new_dynarec.c | 2 +- 6 files changed, 67 insertions(+), 40 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index ce0e7427a..c932625f0 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -72,8 +72,6 @@ #define ISHEXDEC ((buf[cursor] >= '0') && (buf[cursor] <= '9')) || ((buf[cursor] >= 'a') && (buf[cursor] <= 'f')) || ((buf[cursor] >= 'A') && (buf[cursor] <= 'F')) -#define INTERNAL_FPS_SAMPLE_PERIOD 64 - //hack to prevent retroarch freezing when reseting in the menu but not while running with the hot key static int rebootemu = 0; @@ -102,8 +100,7 @@ static int vout_fb_dirty; static int psx_w, psx_h; static bool vout_can_dupe; static bool found_bios; -static bool display_internal_fps = false; -static unsigned frame_count = 0; +static int display_internal_fps; static bool libretro_supports_bitmasks = false; static bool libretro_supports_option_categories = false; static bool show_input_settings = true; @@ -1769,8 +1766,6 @@ bool retro_load_game(const struct retro_game_info *info) { 0 }, }; - frame_count = 0; - environ_cb(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, desc); #ifdef FRONTEND_SUPPORTS_RGB565 @@ -2232,14 +2227,16 @@ static void update_variables(bool in_flight) #endif var.value = NULL; - var.key = "pcsx_rearmed_display_internal_fps"; + var.key = "pcsx_rearmed_display_fps_v2"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - if (strcmp(var.value, "disabled") == 0) - display_internal_fps = false; + if (strcmp(var.value, "extra") == 0) + display_internal_fps = 2; else if (strcmp(var.value, "enabled") == 0) - display_internal_fps = true; + display_internal_fps = 1; + else + display_internal_fps = 0; } var.value = NULL; @@ -3164,24 +3161,50 @@ static void print_internal_fps(void) { if (display_internal_fps) { - frame_count++; + static u32 fps, frame_count_s; + static time_t last_time; + static u32 psx_vsync_count; + u32 psx_vsync_rate = is_pal_mode ? 50 : 60; + time_t now; - if (frame_count % INTERNAL_FPS_SAMPLE_PERIOD == 0) + psx_vsync_count++; + frame_count_s++; + now = time(NULL); + if (now != last_time) { - unsigned internal_fps = pl_rearmed_cbs.flip_cnt * (is_pal_mode ? 50 : 60) / INTERNAL_FPS_SAMPLE_PERIOD; - char str[64]; - const char *strc = (const char *)str; + fps = frame_count_s; + frame_count_s = 0; + last_time = now; + } - str[0] = '\0'; + if (psx_vsync_count >= psx_vsync_rate) + { + int pos = 0, cd_count; + char str[64]; - snprintf(str, sizeof(str), "Internal FPS: %2d", internal_fps); + if (display_internal_fps > 1) { +#if !defined(DRC_DISABLE) && !defined(LIGHTREC) + if (ndrc_g.did_compile) { + pos = snprintf(str, sizeof(str), "DRC: %d ", ndrc_g.did_compile); + ndrc_g.did_compile = 0; + } +#endif + cd_count = cdra_get_buf_count(); + if (cd_count) { + pos += snprintf(str + pos, sizeof(str) - pos, "CD: %2d/%d ", + cdra_get_buf_cached_approx(), cd_count); + } + } + snprintf(str + pos, sizeof(str) - pos, "FPS: %2d/%2d", + pl_rearmed_cbs.flip_cnt, fps); pl_rearmed_cbs.flip_cnt = 0; + psx_vsync_count = 0; if (msg_interface_version >= 1) { struct retro_message_ext msg = { - strc, + str, 3000, 1, RETRO_LOG_INFO, @@ -3194,15 +3217,13 @@ static void print_internal_fps(void) else { struct retro_message msg = { - strc, + str, 180 }; environ_cb(RETRO_ENVIRONMENT_SET_MESSAGE, &msg); } } } - else - frame_count = 0; } void retro_run(void) diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 8910ad230..384c58a5b 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -163,23 +163,6 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled", }, -#if 0 // ndef _WIN32 // currently disabled, see USE_READ_THREAD in libpcsxcore/cdriso.c - { - "pcsx_rearmed_async_cd", - "CD Access Method (Restart)", - NULL, - "Select method used to read data from content disk images. 'Synchronous' mimics original hardware. 'Asynchronous' can reduce stuttering on devices with slow storage. 'Pre-Cache (CHD)' loads disk image into memory for faster access (CHD files only).", - NULL, - "system", - { - { "sync", "Synchronous" }, - { "async", "Asynchronous" }, - { "precache", "Pre-Cache (CHD)" }, - { NULL, NULL}, - }, - "sync", - }, -#endif #if defined(HAVE_CDROM) || defined(USE_ASYNC_CDROM) #define V(x) { #x, NULL } { @@ -437,7 +420,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { "3" }, { - "pcsx_rearmed_display_internal_fps", + "pcsx_rearmed_display_fps_v2", "Display Internal FPS", NULL, "Show the internal frame rate at which the emulated PlayStation system is rendering content. Note: Requires on-screen notifications to be enabled in the libretro frontend.", @@ -446,6 +429,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { { { "disabled", NULL }, { "enabled", NULL }, + { "extra", NULL }, { NULL, NULL }, }, "disabled", diff --git a/frontend/main.c b/frontend/main.c index 949ba65b6..607426b88 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -513,6 +513,7 @@ int emu_core_preinit(void) int emu_core_init(void) { SysPrintf("Starting PCSX-ReARMed " REV "%s\n", get_build_info()); + SysPrintf("build time: " __DATE__ " " __TIME__ "\n"); pcsxr_sthread_init(); #ifndef NO_FRONTEND diff --git a/libpcsxcore/cdrom-async.c b/libpcsxcore/cdrom-async.c index 81b2e12ad..fadb774a3 100644 --- a/libpcsxcore/cdrom-async.c +++ b/libpcsxcore/cdrom-async.c @@ -490,6 +490,25 @@ int cdra_get_buf_count(void) { return acdrom.buf_cnt; } + +int cdra_get_buf_cached_approx(void) +{ + u32 buf_cnt = acdrom.buf_cnt, lba = acdrom.prefetch_lba; + u32 total = acdrom.total_lba; + u32 left = buf_cnt; + int buf_use = 0; + + if (left > total) + left = total; + for (; lba < total && left > 0; lba++, left--) + if (lba == acdrom.buf_cache[lba % buf_cnt].lba) + buf_use++; + for (lba = 0; left > 0; lba++, left--) + if (lba == acdrom.buf_cache[lba % buf_cnt].lba) + buf_use++; + + return buf_use; +} #else // phys. CD-ROM without a cache is unusable so not implemented @@ -565,6 +584,7 @@ int cdra_check_eject(int *inserted) { return 0; } void cdra_stop_thread(void) {} void cdra_set_buf_count(int newcount) {} int cdra_get_buf_count(void) { return 0; } +int cdra_get_buf_cached_approx(void) { return 0; } #endif diff --git a/libpcsxcore/cdrom-async.h b/libpcsxcore/cdrom-async.h index 22747a85b..c72ca9c75 100644 --- a/libpcsxcore/cdrom-async.h +++ b/libpcsxcore/cdrom-async.h @@ -22,6 +22,7 @@ int cdra_check_eject(int *inserted); void cdra_stop_thread(void); void cdra_set_buf_count(int count); int cdra_get_buf_count(void); +int cdra_get_buf_cached_approx(void); void *cdra_getBuffer(void); diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index c2899e42f..6243db46d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -9018,7 +9018,7 @@ static int new_recompile_block(u_int addr) } start = addr; - ndrc_g.did_compile = 1; + ndrc_g.did_compile++; if (Config.HLE && start == 0x80001000) // hlecall { void *beginning = start_block(); From 6ac061ecdea878502491b4e5e5bfe8ebef2a71f7 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 15 Oct 2024 03:39:25 +0300 Subject: [PATCH 573/597] 3ds: adjust for newer toolchain --- libpcsxcore/psxcounters.c | 4 ++-- libpcsxcore/psxcounters.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c index 887fe8a4c..9ff295e09 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c @@ -71,8 +71,8 @@ static const u32 HSyncTotal[] = { 263, 314 }; #ifdef DRC_DISABLE Rcnt rcnts[ CounterQuantity ]; #endif -u32 hSyncCount = 0; -u32 frame_counter = 0; +unsigned int hSyncCount = 0; +unsigned int frame_counter = 0; static u32 hsync_steps = 0; u32 psxNextCounter = 0, psxNextsCounter = 0; diff --git a/libpcsxcore/psxcounters.h b/libpcsxcore/psxcounters.h index 77025a617..618d74de5 100644 --- a/libpcsxcore/psxcounters.h +++ b/libpcsxcore/psxcounters.h @@ -31,7 +31,7 @@ extern "C" { extern u32 psxNextCounter, psxNextsCounter; -extern u32 hSyncCount, frame_counter; +extern unsigned int hSyncCount, frame_counter; typedef struct Rcnt { From 20a25fd7f8eb01193c37a61c88d5e36e62235c1b Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 16 Oct 2024 01:45:51 +0300 Subject: [PATCH 574/597] 3ds: try partial linking --- Makefile | 12 +++++++++- frontend/libretro-extern | 25 ++++++++++++++++++++ frontend/{link.T => libretro-version-script} | 0 3 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 frontend/libretro-extern rename frontend/{link.T => libretro-version-script} (100%) diff --git a/Makefile b/Makefile index 69b7ab3d3..ac5eab9da 100644 --- a/Makefile +++ b/Makefile @@ -429,10 +429,20 @@ frontend/revision.h: FORCE target_: $(TARGET) $(TARGET): $(OBJS) +ifeq ($(PARTIAL_LINKING), 1) + sed -e 's/.*/EXTERN(\0)/' frontend/libretro-extern > frontend/libretro-extern.T + $(LD) -o $(basename $(TARGET))1.o -r --gc-sections -T frontend/libretro-extern.T $^ + $(OBJCOPY) --keep-global-symbols=frontend/libretro-extern $(basename $(TARGET))1.o $(basename $(TARGET)).o + $(AR) rcs $@ $(basename $(TARGET)).o +else ifeq ($(STATIC_LINKING), 1) + $(AR) rcs $@ $^ +else $(CC_LINK) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LDLIBS) $(EXTRA_LDFLAGS) +endif clean: $(PLAT_CLEAN) clean_plugins - $(RM) $(TARGET) $(OBJS) $(TARGET).map frontend/revision.h + $(RM) $(TARGET) *.o $(OBJS) $(TARGET).map frontend/revision.h + $(RM) frontend/libretro-extern.T ifneq ($(PLUGINS),) plugins_: $(PLUGINS) diff --git a/frontend/libretro-extern b/frontend/libretro-extern new file mode 100644 index 000000000..c3c490c2d --- /dev/null +++ b/frontend/libretro-extern @@ -0,0 +1,25 @@ +retro_api_version +retro_cheat_reset +retro_cheat_set +retro_deinit +retro_get_memory_data +retro_get_memory_size +retro_get_region +retro_get_system_av_info +retro_get_system_info +retro_init +retro_load_game +retro_load_game_special +retro_reset +retro_run +retro_serialize +retro_serialize_size +retro_set_audio_sample +retro_set_audio_sample_batch +retro_set_controller_port_device +retro_set_environment +retro_set_input_poll +retro_set_input_state +retro_set_video_refresh +retro_unload_game +retro_unserialize diff --git a/frontend/link.T b/frontend/libretro-version-script similarity index 100% rename from frontend/link.T rename to frontend/libretro-version-script From bdc1bbb5c780967e4dec4f8adabc77227d355d1c Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 17 Oct 2024 02:46:38 +0300 Subject: [PATCH 575/597] cdrom: adjust the PhysCdPropagations hack --- libpcsxcore/cdrom.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 516ca8ed8..5fcc54cdc 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -664,12 +664,12 @@ static int msfiEq(const u8 *a, const u8 *b) void cdrPlayReadInterrupt(void) { int hit = cdra_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); - if (!hit && cdr.PhysCdPropagations++ < 222) { - // this propagates real cdrom delays to the emulated game + if (!hit && cdr.PhysCdPropagations < 75/2) { + // this propagates the real cdrom delays to the emulated game CDRPLAYREAD_INT(cdReadTime / 2, 0); + cdr.PhysCdPropagations++; return; } - cdr.PhysCdPropagations = 0; cdr.LastReadSeekCycles = psxRegs.cycle; @@ -711,6 +711,9 @@ void cdrPlayReadInterrupt(void) generate_subq(cdr.SetSectorPlay); CDRPLAYREAD_INT(cdReadTime, 0); + + // stop propagation since it breaks streaming + cdr.PhysCdPropagations = 0xff; } static void softReset(void) @@ -900,6 +903,7 @@ void cdrInterrupt(void) { // BIOS player - set flag again cdr.Play = TRUE; cdr.DriveState = DRIVESTATE_PLAY_READ; + cdr.PhysCdPropagations = 0; CDRPLAYREAD_INT(cdReadTime + seekTime, 1); start_rotating = 1; @@ -1253,6 +1257,7 @@ void cdrInterrupt(void) { cdr.SubqForwardSectors = 1; cdr.sectorsRead = 0; cdr.DriveState = DRIVESTATE_SEEK; + cdr.PhysCdPropagations = 0; cdra_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]); From 41846131f9579147fb6bb1d5e32516ac21746e0f Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 20 Oct 2024 00:19:50 +0300 Subject: [PATCH 576/597] cdriso: try some different buffering --- Makefile | 12 ++++++++++-- libpcsxcore/cdriso.c | 40 +++++++++++++++++++++++++++++++++------- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index ac5eab9da..7c534224d 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,16 @@ # default stuff goes here, so that config can override TARGET ?= pcsx -CFLAGS += -Wall -ggdb -Iinclude -ffast-math -ifndef DEBUG +CFLAGS += -Wall -Iinclude -ffast-math + +DEBUG ?= 0 +DEBUG_SYMS ?= 0 +ASSERTS ?= 0 +HAVE_CHD ?= 1 +ifneq ($(DEBUG)$(DEBUG_SYMS), 00) +CFLAGS += -ggdb +endif +ifneq ($(DEBUG), 1) CFLAGS += -O2 ifndef ASSERTS CFLAGS += -DNDEBUG diff --git a/libpcsxcore/cdriso.c b/libpcsxcore/cdriso.c index ff20731d5..9b98fbe5d 100644 --- a/libpcsxcore/cdriso.c +++ b/libpcsxcore/cdriso.c @@ -54,7 +54,6 @@ unsigned int cdrIsoMultidiskCount; unsigned int cdrIsoMultidiskSelect; static FILE *cdHandle = NULL; -static FILE *cddaHandle = NULL; static FILE *subHandle = NULL; static boolean subChanMixed = FALSE; @@ -173,6 +172,24 @@ static off_t get_size(FILE *f) return size; } +// Some c libs like newlib default buffering to just 1k which is less than +// cd sector size which is bad for performance. +// Note that NULL setvbuf() is implemented differently by different libs +// (newlib mallocs a buffer of given size and glibc ignores size and uses it's own). +static void set_static_stdio_buffer(FILE *f) +{ +#if !defined(fopen) // no stdio redirect + static char buf[16 * 1024]; + if (f) { + int r; + errno = 0; + r = setvbuf(f, buf, _IOFBF, sizeof(buf)); + if (r) + SysPrintf("cdriso: setvbuf %d %d\n", r, errno); + } +#endif +} + // this function tries to get the .toc file of the given .bin // the necessary data is put into the ti (trackinformation)-array static int parsetoc(const char *isofile) { @@ -493,6 +510,7 @@ static int parsecue(const char *isofile) { fclose(cdHandle); cdHandle = ti[1].handle; ti[1].handle = NULL; + set_static_stdio_buffer(cdHandle); } return 0; } @@ -952,13 +970,23 @@ static int handlecbin(const char *isofile) { static int handlechd(const char *isofile) { int frame_offset = 150; int file_offset = 0; + int is_chd_ext = 0; + chd_error err; + if (strlen(isofile) >= 3) { + const char *ext = isofile + strlen(isofile) - 3; + is_chd_ext = !strcasecmp(ext, "chd"); + } chd_img = calloc(1, sizeof(*chd_img)); if (chd_img == NULL) goto fail_io; - if(chd_open(isofile, CHD_OPEN_READ, NULL, &chd_img->chd) != CHDERR_NONE) + err = chd_open_file(cdHandle, CHD_OPEN_READ, NULL, &chd_img->chd); + if (err != CHDERR_NONE) { + if (is_chd_ext) + SysPrintf("chd_open: %d\n", err); goto fail_io; + } if (Config.CHD_Precache && (chd_precache(chd_img->chd) != CHDERR_NONE)) goto fail_io; @@ -1051,9 +1079,8 @@ static int opensubfile(const char *isoname) { } subHandle = fopen(subname, "rb"); - if (subHandle == NULL) { + if (subHandle == NULL) return -1; - } return 0; } @@ -1373,6 +1400,7 @@ int ISOopen(const char *fname) fname, strerror(errno)); return -1; } + set_static_stdio_buffer(cdHandle); size_main = get_size(cdHandle); snprintf(image_str, sizeof(image_str) - 6*4 - 1, @@ -1417,8 +1445,6 @@ int ISOopen(const char *fname) ISOgetBuffer = ISOgetBuffer_chd; cdimg_read_func = cdread_chd; cdimg_read_sub_func = cdread_sub_chd; - fclose(cdHandle); - cdHandle = NULL; } #endif @@ -1452,6 +1478,7 @@ int ISOopen(const char *fname) bin_filename = alt_bin_filename; fclose(cdHandle); cdHandle = tmpf; + set_static_stdio_buffer(cdHandle); size_main = get_size(cdHandle); } } @@ -1496,7 +1523,6 @@ int ISOclose(void) fclose(subHandle); subHandle = NULL; } - cddaHandle = NULL; if (compr_img != NULL) { free(compr_img->index_table); From dd4595107c3b99beb515d91e4b661a9ae2da3adc Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 20 Oct 2024 03:04:51 +0300 Subject: [PATCH 577/597] update Android.mk no idea if it compiles a working library though... --- Makefile | 1 - frontend/libretro-extern.T | 25 +++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 frontend/libretro-extern.T diff --git a/Makefile b/Makefile index 7c534224d..1219b097a 100644 --- a/Makefile +++ b/Makefile @@ -450,7 +450,6 @@ endif clean: $(PLAT_CLEAN) clean_plugins $(RM) $(TARGET) *.o $(OBJS) $(TARGET).map frontend/revision.h - $(RM) frontend/libretro-extern.T ifneq ($(PLUGINS),) plugins_: $(PLUGINS) diff --git a/frontend/libretro-extern.T b/frontend/libretro-extern.T new file mode 100644 index 000000000..22555b32f --- /dev/null +++ b/frontend/libretro-extern.T @@ -0,0 +1,25 @@ +EXTERN(retro_api_version) +EXTERN(retro_cheat_reset) +EXTERN(retro_cheat_set) +EXTERN(retro_deinit) +EXTERN(retro_get_memory_data) +EXTERN(retro_get_memory_size) +EXTERN(retro_get_region) +EXTERN(retro_get_system_av_info) +EXTERN(retro_get_system_info) +EXTERN(retro_init) +EXTERN(retro_load_game) +EXTERN(retro_load_game_special) +EXTERN(retro_reset) +EXTERN(retro_run) +EXTERN(retro_serialize) +EXTERN(retro_serialize_size) +EXTERN(retro_set_audio_sample) +EXTERN(retro_set_audio_sample_batch) +EXTERN(retro_set_controller_port_device) +EXTERN(retro_set_environment) +EXTERN(retro_set_input_poll) +EXTERN(retro_set_input_state) +EXTERN(retro_set_video_refresh) +EXTERN(retro_unload_game) +EXTERN(retro_unserialize) From 2b540629bca02d96eab845f47afddf929abbfcce Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 21 Oct 2024 00:38:07 +0300 Subject: [PATCH 578/597] 3ds: update ctr_clear_cache DMB looks insufficient before invalidate, also doesn't match what Linux does, so adjust. Hopefully will solve some weird rare crashes. --- frontend/3ds/3ds_utils.h | 4 ++-- frontend/3ds/utils.S | 24 +++++++++++++++++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/frontend/3ds/3ds_utils.h b/frontend/3ds/3ds_utils.h index f7c8ddc24..25bcc42a3 100644 --- a/frontend/3ds/3ds_utils.h +++ b/frontend/3ds/3ds_utils.h @@ -17,6 +17,8 @@ #define DEBUG_HOLD() do{printf("%s@%s:%d.\n",__FUNCTION__, __FILE__, __LINE__);fflush(stdout);wait_for_input();}while(0) void wait_for_input(void); +void ctr_clear_cache(void); +//void ctr_invalidate_icache(void); // only icache extern __attribute__((weak)) int __ctr_svchax; @@ -36,8 +38,6 @@ static inline void check_rosalina() { } } -void ctr_clear_cache(void); - typedef int32_t (*ctr_callback_type)(void); static inline void ctr_invalidate_ICache_kernel(void) diff --git a/frontend/3ds/utils.S b/frontend/3ds/utils.S index c8df651a7..61da15548 100644 --- a/frontend/3ds/utils.S +++ b/frontend/3ds/utils.S @@ -7,9 +7,8 @@ ctr_clear_cache_kernel: cpsid aif mov r0, #0 mcr p15, 0, r0, c7, c10, 0 @ Clean entire data cache - mcr p15, 0, r0, c7, c10, 5 @ Data Memory Barrier - mcr p15, 0, r0, c7, c5, 0 @ Invalidate entire instruction cache / Flush BTB mcr p15, 0, r0, c7, c10, 4 @ Data Sync Barrier + mcr p15, 0, r0, c7, c5, 0 @ Invalidate entire instruction cache / Flush BTB bx lr .endfunc @@ -19,7 +18,26 @@ ctr_clear_cache_kernel: .global ctr_clear_cache .func ctr_clear_cache ctr_clear_cache: - ldr r0, =ctr_clear_cache_kernel + adr r0, ctr_clear_cache_kernel + svc 0x80 @ svcCustomBackdoor + bx lr + .endfunc + +#if 0 + .func ctr_invalidate_icache_kernel +ctr_invalidate_icache_kernel: + cpsid aif + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ Data Sync Barrier + mcr p15, 0, r0, c7, c5, 0 @ Invalidate entire instruction cache / Flush BTB + bx lr + .endfunc + + .global ctr_invalidate_icache + .func ctr_invalidate_icache +ctr_invalidate_icache: + adr r0, ctr_invalidate_icache_kernel svc 0x80 @ svcCustomBackdoor bx lr .endfunc +#endif From b06f78f199f6fe13fd91f97db9d6c2465c4b6f68 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 22 Oct 2024 19:29:26 +0300 Subject: [PATCH 579/597] arm: provide bgr888_to_rgb565 for v6 --- Makefile | 2 +- frontend/cspace.c | 7 ++---- frontend/cspace_arm.S | 49 +++++++++++++++++++++++++++++++++++++ include/compiler_features.h | 2 ++ 4 files changed, 54 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 1219b097a..25cbbdf2d 100644 --- a/Makefile +++ b/Makefile @@ -285,7 +285,7 @@ CFLAGS += -DHAVE_CHD -I$(LCHDR)/include OBJS += frontend/cspace.o ifeq "$(HAVE_NEON_ASM)" "1" OBJS += frontend/cspace_neon.o -frontend/cspace.o: CFLAGS += -DHAVE_bgr555_to_rgb565 -DHAVE_bgr888_to_x +frontend/cspace.o: CFLAGS += -DHAVE_bgr555_to_rgb565 else ifeq "$(ARCH)" "arm" OBJS += frontend/cspace_arm.o diff --git a/frontend/cspace.c b/frontend/cspace.c index a3e3301fb..b4e4a7101 100644 --- a/frontend/cspace.c +++ b/frontend/cspace.c @@ -10,6 +10,7 @@ #include #include "cspace.h" +#include "compiler_features.h" /* * note: these are intended for testing and should be avoided @@ -111,9 +112,7 @@ void bgr555_to_rgb565(void *dst_, const void *src_, int bytes) #endif -#ifndef HAVE_bgr888_to_x - -void bgr888_to_rgb565(void *dst_, const void *src_, int bytes) +void attr_weak bgr888_to_rgb565(void *dst_, const void *src_, int bytes) { const unsigned char *src = src_; unsigned int *dst = dst_; @@ -140,8 +139,6 @@ void bgr888_to_rgb565(void *dst_, const void *src_, int bytes) void rgb888_to_rgb565(void *dst, const void *src, int bytes) {} void bgr888_to_rgb888(void *dst, const void *src, int bytes) {} -#endif // __ARM_NEON__ - /* YUV stuff */ static int yuv_ry[32], yuv_gy[32], yuv_by[32]; static unsigned char yuv_u[32 * 2], yuv_v[32 * 2]; diff --git a/frontend/cspace_arm.S b/frontend/cspace_arm.S index 177b08583..3ef5083b4 100644 --- a/frontend/cspace_arm.S +++ b/frontend/cspace_arm.S @@ -68,3 +68,52 @@ FUNCTION(bgr555_to_rgb565): @ void *dst, const void *src, int bytes bgt 2b pop {r4-r11,pc} + + +#ifdef HAVE_ARMV6 /* v6-only due to potential misaligned reads */ + +# r1b0g0r0 g2r2b1g1 b3g3r3b2 +FUNCTION(bgr888_to_rgb565): + pld [r1] + push {r4-r10,lr} + + mov r10, #0x001f @ b mask + mov r12, #0x07e0 @ g mask + mov lr, #0xf800 @ r mask + +0: + ldr r3, [r1], #4 @ may be unaligned + ldr r4, [r1], #4 + ldr r5, [r1], #4 + pld [r1, #32*1] + and r6, r10,r3, lsr #16+3 @ b0 + and r7, r12,r3, lsr #5 @ g0 + and r8, lr, r3, lsl #8 @ r0 + and r9, lr, r3, lsr #16 @ r1 + orr r6, r6, r7 + orr r6, r6, r8 @ r0g0b0 + + and r7, r12,r4, lsl #3 @ g1 + and r8, r10,r4, lsr #11 @ b1 + orr r9, r9, r7 + orr r9, r9, r8 @ r1g1b1 + and r7, lr, r4, lsr #8 @ r2 + and r8, r12,r4, lsr #21 @ g2 + pkhbt r9, r6, r9, lsl #16 + str r9, [r0], #4 + + and r6, r10,r5, lsr #3 @ b2 + orr r7, r7, r8 + orr r6, r6, r7 @ r2g2b2 + and r7, lr, r5 @ r3 + and r8, r12,r5, lsr #13 @ g3 + orr r7, r7, r5, lsr #27 @ r3b3 + orr r7, r7, r8 @ r3g3b3 + pkhbt r7, r6, r7, lsl #16 + str r7, [r0], #4 + subs r2, r2, #12 + bgt 0b + + pop {r4-r10,pc} + +#endif /* HAVE_ARMV6 */ diff --git a/include/compiler_features.h b/include/compiler_features.h index 0ab8468b2..d6983632e 100644 --- a/include/compiler_features.h +++ b/include/compiler_features.h @@ -8,11 +8,13 @@ # define noinline __attribute__((noinline,noclone)) # endif # define attr_unused __attribute__((unused)) +# define attr_weak __attribute__((weak)) #else # define likely(x) (x) # define unlikely(x) (x) # define noinline # define attr_unused +# define attr_weak #endif #ifndef __has_builtin From 9672b97e7f62b17c530de0d2fefdb008655e1abb Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 22 Oct 2024 20:45:03 +0300 Subject: [PATCH 580/597] gpulib: try to reduce flips --- plugins/gpulib/gpu.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 70f212933..d1be12a6b 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -311,6 +311,7 @@ long GPUshutdown(void) void GPUwriteStatus(uint32_t data) { uint32_t cmd = data >> 24; + uint32_t fb_dirty = 1; int src_x, src_y; if (cmd < ARRAY_SIZE(gpu.regs)) { @@ -319,14 +320,13 @@ void GPUwriteStatus(uint32_t data) gpu.regs[cmd] = data; } - gpu.state.fb_dirty = 1; - switch (cmd) { case 0x00: do_reset(); break; case 0x01: do_cmd_reset(); + fb_dirty = 0; break; case 0x03: if (data & 1) { @@ -339,6 +339,7 @@ void GPUwriteStatus(uint32_t data) case 0x04: gpu.status &= ~PSX_GPU_STATUS_DMA_MASK; gpu.status |= PSX_GPU_STATUS_DMA(data & 3); + fb_dirty = 0; break; case 0x05: src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff; @@ -374,9 +375,12 @@ void GPUwriteStatus(uint32_t data) default: if ((cmd & 0xf0) == 0x10) get_gpu_info(data); + fb_dirty = 0; break; } + gpu.state.fb_dirty |= fb_dirty; + #ifdef GPUwriteStatus_ext GPUwriteStatus_ext(data); #endif @@ -516,7 +520,20 @@ static void finish_vram_transfer(int is_read) if (is_read) gpu.status &= ~PSX_GPU_STATUS_IMG; else { - gpu.state.fb_dirty = 1; + int32_t screen_r = gpu.screen.src_x + gpu.screen.hres; + int32_t screen_b = gpu.screen.src_y + gpu.screen.vres; + int32_t dma_r = gpu.dma_start.x + gpu.dma_start.w; + int32_t dma_b = gpu.dma_start.y + gpu.dma_start.h; + int32_t not_dirty; + not_dirty = screen_r - gpu.dma_start.x - 1; + not_dirty |= screen_b - gpu.dma_start.y - 1; + not_dirty |= dma_r - gpu.screen.src_x - 1; + not_dirty |= dma_b - gpu.screen.src_y - 1; + not_dirty >>= 31; + log_io("dma %3d,%3d %dx%d scr %3d,%3d %3dx%3d -> dirty %d\n", + gpu.dma_start.x, gpu.dma_start.y, gpu.dma_start.w, gpu.dma_start.h, + gpu.screen.src_x, gpu.screen.src_y, gpu.screen.hres, gpu.screen.vres, !not_dirty); + gpu.state.fb_dirty |= !not_dirty; renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y, gpu.dma_start.w, gpu.dma_start.h, 0); } @@ -646,7 +663,7 @@ static noinline int do_cmd_buffer(uint32_t *data, int count, for (pos = 0; pos < count; ) { if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify - vram_dirty = 1; + // vram_dirty = 1; // handled in finish_vram_transfer() pos += do_vram_io(data + pos, count - pos, 0); if (pos == count) break; @@ -677,8 +694,9 @@ static noinline int do_cmd_buffer(uint32_t *data, int count, pos += 4; continue; } - else if (cmd == 0x1f) { - log_anomaly("irq1?\n"); + else if (cmd < 0x20 && cmd != 2) { + if (cmd == 0x1f) + log_anomaly("irq1?\n"); pos++; continue; } From 4adbe7bb40f9693bb6f9ef7d236b3a7e0ffdb476 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 22 Oct 2024 21:43:03 +0300 Subject: [PATCH 581/597] chd: override config relies on -Iinclude being before -Ideps/libchdr/include --- include/libchdr/chdconfig.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/libchdr/chdconfig.h diff --git a/include/libchdr/chdconfig.h b/include/libchdr/chdconfig.h new file mode 100644 index 000000000..a1f9d041b --- /dev/null +++ b/include/libchdr/chdconfig.h @@ -0,0 +1,15 @@ +#ifndef __CHDCONFIG_H__ +#define __CHDCONFIG_H__ + +/* this overrides deps/libchdr/include/libchdr/chdconfig.h */ +#define WANT_SUBCODE 1 +#define NEED_CACHE_HUNK 1 + +#if defined(__x86_64__) || defined(__aarch64__) +#define WANT_RAW_DATA_SECTOR 1 +#define VERIFY_BLOCK_CRC 1 +#else +// assume some slower hw so no ecc that most (all?) games don't need +#endif + +#endif From 5498b8ad6777c4367194b971e5a78a12790161e4 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 22 Oct 2024 21:44:27 +0300 Subject: [PATCH 582/597] standalone: neon dithering should be on by default on switch too --- frontend/libretro_core_options.h | 2 +- frontend/main.c | 1 + frontend/menu.c | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 384c58a5b..7fe908122 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -330,7 +330,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { { "enabled", NULL }, { NULL, NULL }, }, -#if defined HAVE_LIBNX || defined _3DS +#if defined(_3DS) "disabled", #else "enabled", diff --git a/frontend/main.c b/frontend/main.c index 607426b88..3ead1b08d 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -130,6 +130,7 @@ void emu_set_default_config(void) Config.FractionalFramerate = -1; pl_rearmed_cbs.gpu_neon.allow_interlace = 2; // auto + pl_rearmed_cbs.gpu_neon.allow_dithering = 1; pl_rearmed_cbs.gpu_neon.enhancement_enable = pl_rearmed_cbs.gpu_neon.enhancement_no_main = 0; pl_rearmed_cbs.gpu_neon.enhancement_tex_adj = 1; diff --git a/frontend/menu.c b/frontend/menu.c index 8549279ac..95d4edf6b 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -1427,10 +1427,11 @@ static const char *men_gpu_interlace[] = { "Off", "On", "Auto", NULL }; static menu_entry e_menu_plugin_gpu_neon[] = { - mee_enum ("Enable interlace mode", 0, pl_rearmed_cbs.gpu_neon.allow_interlace, men_gpu_interlace), mee_onoff_h ("Enhanced resolution", 0, pl_rearmed_cbs.gpu_neon.enhancement_enable, 1, h_gpu_neon_enhanced), mee_onoff_h ("Enhanced res. speed hack", 0, pl_rearmed_cbs.gpu_neon.enhancement_no_main, 1, h_gpu_neon_enhanced_hack), mee_onoff_h ("Enh. res. texture adjust", 0, pl_rearmed_cbs.gpu_neon.enhancement_tex_adj, 1, h_gpu_neon_enhanced_texadj), + mee_enum ("Enable interlace mode", 0, pl_rearmed_cbs.gpu_neon.allow_interlace, men_gpu_interlace), + mee_onoff ("Enable dithering", 0, pl_rearmed_cbs.gpu_neon.allow_dithering, 1), mee_end, }; From 3c3a80b2408d5a98521746c26502cd7addf3b365 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 22 Oct 2024 23:53:38 +0300 Subject: [PATCH 583/597] mdec: compiler-friendlier clamp that whole thing begs to be rewritten but ohwell --- libpcsxcore/mdec.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/libpcsxcore/mdec.c b/libpcsxcore/mdec.c index 38b03e1bb..d02a419a2 100644 --- a/libpcsxcore/mdec.c +++ b/libpcsxcore/mdec.c @@ -323,11 +323,22 @@ static const unsigned short *rl2blk(int *blk, const unsigned short *mdec_rl) { #define SCALE8(c) SCALER(c, 20) #define SCALE5(c) SCALER(c, 23) -#define CLAMP5(c) ( ((c) < -16) ? 0 : (((c) > (31 - 16)) ? 31 : ((c) + 16)) ) -#define CLAMP8(c) ( ((c) < -128) ? 0 : (((c) > (255 - 128)) ? 255 : ((c) + 128)) ) +static inline int clamp5(int v) +{ + v += 16; + v = v < 0 ? 0 : (v > 31 ? 31 : v); + return v; +} + +static inline int clamp8(int v) +{ + v += 128; + v = v < 0 ? 0 : (v > 255 ? 255 : v); + return v; +} -#define CLAMP_SCALE8(a) (CLAMP8(SCALE8(a))) -#define CLAMP_SCALE5(a) (CLAMP5(SCALE5(a))) +#define CLAMP_SCALE8(a) (clamp8(SCALE8(a))) +#define CLAMP_SCALE5(a) (clamp5(SCALE5(a))) static inline void putlinebw15(u16 *image, int *Yblk) { int i; @@ -336,7 +347,7 @@ static inline void putlinebw15(u16 *image, int *Yblk) { for (i = 0; i < 8; i++, Yblk++) { int Y = *Yblk; // missing rounding - image[i] = SWAP16((CLAMP5(Y >> 3) * 0x421) | A); + image[i] = SWAP16((clamp5(Y >> 3) * 0x421) | A); } } @@ -385,7 +396,7 @@ static inline void putlinebw24(u8 * image, int *Yblk) { int i; unsigned char Y; for (i = 0; i < 8 * 3; i += 3, Yblk++) { - Y = CLAMP8(*Yblk); + Y = clamp8(*Yblk); image[i + 0] = Y; image[i + 1] = Y; image[i + 2] = Y; From e7e802c7db305fb8551c31d2975de61116f593c2 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 24 Oct 2024 02:25:47 +0300 Subject: [PATCH 584/597] 3ds: use range clearing for small ranges Seems to be wastefull to throw away the entire icache when just a few new insns are compiled. Not that this gives any noticeable perf difference though. --- frontend/3ds/3ds_utils.h | 1 + frontend/3ds/utils.S | 38 +++++++++++++++++++++++++++ libpcsxcore/new_dynarec/new_dynarec.c | 6 ++++- 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/frontend/3ds/3ds_utils.h b/frontend/3ds/3ds_utils.h index 25bcc42a3..2fc44f2a7 100644 --- a/frontend/3ds/3ds_utils.h +++ b/frontend/3ds/3ds_utils.h @@ -18,6 +18,7 @@ void wait_for_input(void); void ctr_clear_cache(void); +void ctr_clear_cache_range(void *start, void *end); //void ctr_invalidate_icache(void); // only icache extern __attribute__((weak)) int __ctr_svchax; diff --git a/frontend/3ds/utils.S b/frontend/3ds/utils.S index 61da15548..6f7a6a2ff 100644 --- a/frontend/3ds/utils.S +++ b/frontend/3ds/utils.S @@ -4,11 +4,39 @@ .func ctr_clear_cache_kernel ctr_clear_cache_kernel: + @ this less than what B2.7.3 of DDI0100I_ARM_ARM recommends, but so is Linux + mrs r3, cpsr cpsid aif mov r0, #0 mcr p15, 0, r0, c7, c10, 0 @ Clean entire data cache mcr p15, 0, r0, c7, c10, 4 @ Data Sync Barrier mcr p15, 0, r0, c7, c5, 0 @ Invalidate entire instruction cache / Flush BTB + msr cpsr, r3 + bx lr + .endfunc + + .func ctr_clear_cache_range_kernel +ctr_clear_cache_range_kernel: + bic r0, r0, #31 + mov r12, r0 + mov r2, #0 + mrs r3, cpsr + cpsid aif +0: + mcr p15, 0, r0, c7, c10, 1 @ Clean Data Cache Line (using MVA) + add r0, r0, #32 + cmp r0, r1 + blo 0b + mcr p15, 0, r2, c7, c10, 4 @ Data Sync Barrier + mov r0, r12 +0: + mcr p15, 0, r0, c7, c5, 1 @ Invalidate Instruction Cache Line (using MVA) + add r0, r0, #32 + cmp r0, r1 + blo 0b + mcr p15, 0, r2, c7, c5, 6 @ Flush Entire Branch Target Cache + + msr cpsr, r3 bx lr .endfunc @@ -23,6 +51,16 @@ ctr_clear_cache: bx lr .endfunc + .global ctr_clear_cache_range + .func ctr_clear_cache_range +ctr_clear_cache_range: + mov r2, r1 + mov r1, r0 + adr r0, ctr_clear_cache_range_kernel + svc 0x80 @ svcCustomBackdoor + bx lr + .endfunc + #if 0 .func ctr_invalidate_icache_kernel ctr_invalidate_icache_kernel: diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 6243db46d..37caf4737 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -494,7 +494,11 @@ static void end_tcache_write(void *start, void *end) #elif defined(VITA) sceKernelSyncVMDomain(sceBlock, start, len); #elif defined(_3DS) - ctr_flush_invalidate_cache(); + // tuned for old3ds' 16k:16k cache (in it's mostly clean state...) + if ((char *)end - (char *)start <= 2*1024) + ctr_clear_cache_range(start, end); + else + ctr_flush_invalidate_cache(); ndrc_g.thread.cache_dirty = 1; #elif defined(HAVE_LIBNX) if (g_jit.type == JitType_CodeMemory) { From 4d987004e3b82a3f26d1d59cb69e18672ec09043 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 24 Oct 2024 02:33:38 +0300 Subject: [PATCH 585/597] 3ds: drop pre-rosalina code no reason not to run an up-to-date cfw --- frontend/3ds/3ds_utils.h | 63 +-------------------------- frontend/3ds/sys/mman.h | 1 + libpcsxcore/new_dynarec/new_dynarec.c | 5 +-- 3 files changed, 4 insertions(+), 65 deletions(-) diff --git a/frontend/3ds/3ds_utils.h b/frontend/3ds/3ds_utils.h index 2fc44f2a7..bde9c4912 100644 --- a/frontend/3ds/3ds_utils.h +++ b/frontend/3ds/3ds_utils.h @@ -1,13 +1,8 @@ #ifndef _3DS_UTILS_H #define _3DS_UTILS_H -#include -#include -#include <3ds/os.h> -#include <3ds/svc.h> - -#ifdef OS_HEAP_AREA_BEGIN // defined in libctru 2.0+ -#define USE_CTRULIB_2 1 +#ifndef USE_CTRULIB_2 +#error CTRULIB_2 is required #endif #define MEMOP_PROT 6 @@ -23,58 +18,4 @@ void ctr_clear_cache_range(void *start, void *end); extern __attribute__((weak)) int __ctr_svchax; -static bool has_rosalina; - -static inline void check_rosalina() { - int64_t version; - uint32_t major; - - has_rosalina = false; - - if (!svcGetSystemInfo(&version, 0x10000, 0)) { - major = GET_VERSION_MAJOR(version); - - if (major >= 8) - has_rosalina = true; - } -} - -typedef int32_t (*ctr_callback_type)(void); - -static inline void ctr_invalidate_ICache_kernel(void) -{ - __asm__ volatile( - "cpsid aif\n\t" - "mov r0, #0\n\t" - "mcr p15, 0, r0, c7, c5, 0\n\t"); -} - -static inline void ctr_flush_DCache_kernel(void) -{ - __asm__ volatile( - "cpsid aif\n\t" - "mov r0, #0\n\t" - "mcr p15, 0, r0, c7, c10, 0\n\t"); -} - -static inline void ctr_invalidate_ICache(void) -{ - svcBackdoor((ctr_callback_type)ctr_invalidate_ICache_kernel); -} - -static inline void ctr_flush_DCache(void) -{ - svcBackdoor((ctr_callback_type)ctr_flush_DCache_kernel); -} - -static inline void ctr_flush_invalidate_cache(void) -{ - if (has_rosalina) { - ctr_clear_cache(); - } else { - ctr_flush_DCache(); - ctr_invalidate_ICache(); - } -} - #endif // _3DS_UTILS_H diff --git a/frontend/3ds/sys/mman.h b/frontend/3ds/sys/mman.h index 4ba90db21..e53968c5b 100644 --- a/frontend/3ds/sys/mman.h +++ b/frontend/3ds/sys/mman.h @@ -8,6 +8,7 @@ extern "C" { #include #include +#include <3ds/svc.h> #include "3ds_utils.h" #define PROT_READ 0b001 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 37caf4737..2b2e66346 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -498,7 +498,7 @@ static void end_tcache_write(void *start, void *end) if ((char *)end - (char *)start <= 2*1024) ctr_clear_cache_range(start, end); else - ctr_flush_invalidate_cache(); + ctr_clear_cache(); ndrc_g.thread.cache_dirty = 1; #elif defined(HAVE_LIBNX) if (g_jit.type == JitType_CodeMemory) { @@ -6314,9 +6314,6 @@ void new_dynarec_init(void) SysPrintf("Init new dynarec, ndrc size %x, pgsize %d\n", (int)sizeof(*ndrc), align + 1); -#ifdef _3DS - check_rosalina(); -#endif #ifdef BASE_ADDR_DYNAMIC #ifdef VITA sceBlock = getVMBlock(); //sceKernelAllocMemBlockForVM("code", sizeof(*ndrc)); From 4f674a2f42bc63e6ffeb56a8ed559494d134f93e Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 25 Oct 2024 00:18:33 +0300 Subject: [PATCH 586/597] drc: update the debug patches --- libpcsxcore/new_dynarec/patches/trace_drc_chk | 177 +++++++++++++++--- libpcsxcore/new_dynarec/patches/trace_intr | 157 ++++++++-------- 2 files changed, 234 insertions(+), 100 deletions(-) diff --git a/libpcsxcore/new_dynarec/patches/trace_drc_chk b/libpcsxcore/new_dynarec/patches/trace_drc_chk index da8616988..c8fc23b83 100644 --- a/libpcsxcore/new_dynarec/patches/trace_drc_chk +++ b/libpcsxcore/new_dynarec/patches/trace_drc_chk @@ -1,16 +1,16 @@ diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c -index 74f32ee3..4eec8a83 100644 +index dad7625d..ad8dc383 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c -@@ -325,7 +325,7 @@ static struct compile_info - int new_dynarec_hacks_old; - int new_dynarec_did_compile; +@@ -334,7 +334,7 @@ static struct compile_info + #define stat_clear(s) + #endif -- #define HACK_ENABLED(x) ((new_dynarec_hacks | new_dynarec_hacks_pergame) & (x)) +- #define HACK_ENABLED(x) ((ndrc_g.hacks | ndrc_g.hacks_pergame) & (x)) + #define HACK_ENABLED(x) ((NDHACK_NO_STALLS) & (x)) - extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 (CCREG) - extern int last_count; // last absolute target, often = next_interupt + /* registers that may be allocated */ + /* 1-31 gpr */ @@ -603,6 +603,7 @@ static int cycle_multiplier_active; static int CLOCK_ADJUST(int x) @@ -21,7 +21,7 @@ index 74f32ee3..4eec8a83 100644 return (x * m + s * 50) / 100; @@ -808,6 +809,9 @@ static noinline u_int generate_exception(u_int pc) // This is called from the recompiled JR/JALR instructions - static void noinline *get_addr(u_int vaddr, int can_compile) + static void noinline *get_addr(const u_int vaddr, enum ndrc_compile_mode compile_mode) { +#ifdef DRC_DBG +printf("get_addr %08x, pc=%08x\n", vaddr, psxRegs.pc); @@ -29,7 +29,7 @@ index 74f32ee3..4eec8a83 100644 u_int start_page = get_page_prev(vaddr); u_int i, page, end_page = get_page(vaddr); void *found_clean = NULL; -@@ -7213,7 +7217,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) +@@ -7258,7 +7262,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) // R0 is always unneeded u|=1; // Save it @@ -38,15 +38,15 @@ index 74f32ee3..4eec8a83 100644 gte_unneeded[i]=gte_u; /* printf("ur (%d,%d) %x: ",istart,iend,start+i*4); -@@ -8355,6 +8359,7 @@ static noinline void pass5a_preallocate1(void) +@@ -8399,6 +8403,7 @@ static noinline void pass5a_preallocate1(void) + // to use, which can avoid a load-use penalty on certain CPUs. static noinline void pass5b_preallocate2(void) { - int i, hr; + return; - for(i=0;in.Cause &= ~0x400; ++ u32 c2 = cp0->n.Cause & ~0x400; + if (psxHu32(0x1070) & psxHu32(0x1074)) +- cp0->n.Cause |= 0x400; +- if (((cp0->n.Cause | 1) & cp0->n.SR & 0x401) == 0x401) { ++ c2 |= 0x400; ++ if (((c2 | 1) & cp0->n.SR & 0x401) == 0x401) { ++ cp0->n.Cause = c2; + psxException(0, 0, cp0); + pending_exception = 1; + } diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c -index f6ff2e8b..2f7147c3 100644 +index f979d5c7..9bb1df07 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -245,7 +245,7 @@ static inline void addCycle(psxRegisters *regs) @@ -124,13 +143,121 @@ index f6ff2e8b..2f7147c3 100644 regs->subCycle &= 0xffff; } -@@ -1348,8 +1348,15 @@ static void intShutdown() { +@@ -442,7 +442,9 @@ static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) { + regs->CP0.n.Target = pc_final; + regs->branching = 0; + ++ psxRegs.cycle += 2; + psxBranchTest(); ++ psxRegs.cycle -= 2; + } + + static void doBranchReg(psxRegisters *regs, u32 tar) { +@@ -971,7 +973,7 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) { + } + } + +-OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); } ++OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); psxBranchTest(); } + + // no exception + static inline void psxNULLne(psxRegisters *regs) { +@@ -1130,6 +1132,7 @@ OP(psxHLE) { + dloadFlush(regs_); + psxHLEt[hleCode](); + branchSeen = 1; ++ psxRegs.cycle -= 2; + } + + static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = { +@@ -1179,18 +1182,20 @@ static void intReset() { + static inline void execI_(u8 **memRLUT, psxRegisters *regs) { + u32 pc = regs->pc; + +- addCycle(regs); ++ //addCycle(regs); + dloadStep(regs); + + regs->pc += 4; + regs->code = fetch(regs, memRLUT, pc); + psxBSC[regs->code >> 26](regs, regs->code); ++ psxRegs.cycle += 2; ++ fetchNoCache(regs, memRLUT, regs->pc); // bus err check + } + + static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { + u32 pc = regs->pc; + +- addCycle(regs); ++ //addCycle(regs); + dloadStep(regs); + + if (execBreakCheck(regs, pc)) +@@ -1199,6 +1204,8 @@ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { + regs->pc += 4; + regs->code = fetch(regs, memRLUT, pc); + psxBSC[regs->code >> 26](regs, regs->code); ++ psxRegs.cycle += 2; ++ fetchNoCache(regs, memRLUT, regs->pc); // bus err check + } + + static void intExecute() { +@@ -1219,22 +1226,30 @@ static void intExecuteBp() { + execIbp(memRLUT, regs_); + } + ++ extern int last_count; ++ void do_insn_cmp(void); + void intExecuteBlock(enum blockExecCaller caller) { + psxRegisters *regs_ = &psxRegs; + u8 **memRLUT = psxMemRLUT; + ++ last_count = 0; + branchSeen = 0; +- while (!branchSeen || (regs_->dloadReg[0] || regs_->dloadReg[1])) ++ while (!branchSeen || (regs_->dloadReg[0] || regs_->dloadReg[1])) { ++ do_insn_cmp(); + execI_(memRLUT, regs_); ++ } + } + + static void intExecuteBlockBp(enum blockExecCaller caller) { + psxRegisters *regs_ = &psxRegs; + u8 **memRLUT = psxMemRLUT; + ++ last_count = 0; + branchSeen = 0; +- while (!branchSeen || (regs_->dloadReg[0] || regs_->dloadReg[1])) ++ while (!branchSeen || (regs_->dloadReg[0] || regs_->dloadReg[1])) { ++ do_insn_cmp(); + execIbp(memRLUT, regs_); ++ } + } + + static void intClear(u32 Addr, u32 Size) { +@@ -1265,7 +1280,7 @@ static void setupCop(u32 sr) + else + psxBSC[17] = psxCOPd; + if (sr & (1u << 30)) +- psxBSC[18] = Config.DisableStalls ? psxCOP2 : psxCOP2_stall; ++ psxBSC[18] = psxCOP2; + else + psxBSC[18] = psxCOPd; + if (sr & (1u << 31)) +@@ -1284,7 +1299,7 @@ void intApplyConfig() { + assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall); + assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall); + +- if (Config.DisableStalls) { ++ if (1) { + psxBSC[18] = psxCOP2; + psxBSC[50] = gteLWC2; + psxBSC[58] = gteSWC2; +@@ -1365,8 +1380,13 @@ static void intShutdown() { // single step (may do several ops in case of a branch or load delay) // called by asm/dynarec void execI(psxRegisters *regs) { -+ extern int last_count; + extern u32 next_interupt; -+ void do_insn_cmp(void); + printf("execI %08x c %u, ni %u\n", regs->pc, regs->cycle, next_interupt); + last_count = 0; do { diff --git a/libpcsxcore/new_dynarec/patches/trace_intr b/libpcsxcore/new_dynarec/patches/trace_intr index 3f01180d5..dcea98009 100644 --- a/libpcsxcore/new_dynarec/patches/trace_intr +++ b/libpcsxcore/new_dynarec/patches/trace_intr @@ -1,9 +1,17 @@ diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c -index f879ad8c..0ec366d0 100644 +index 3ab83ddf..514e349e 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c -@@ -323,13 +323,18 @@ static void ari64_shutdown() - { +@@ -5,6 +5,7 @@ + * See the COPYING file in the top-level directory. + */ + ++#undef NDRC_THREAD + #include + + #include "emu_if.h" +@@ -578,13 +579,18 @@ static void ari64_shutdown() + ari64_thread_shutdown(); new_dynarec_cleanup(); new_dyna_pcsx_mem_shutdown(); + (void)ari64_execute; @@ -23,7 +31,7 @@ index f879ad8c..0ec366d0 100644 ari64_clear, ari64_notify, ari64_apply_config, -@@ -398,7 +403,7 @@ static u32 memcheck_read(u32 a) +@@ -654,7 +660,7 @@ static u32 memcheck_read(u32 a) return *(u32 *)(psxM + (a & 0x1ffffc)); } @@ -33,10 +41,10 @@ index f879ad8c..0ec366d0 100644 { static psxRegisters oldregs; diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c -index 1f37dc29..357f753e 100644 +index 151fb4bb..0238f30f 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c -@@ -289,6 +289,8 @@ static void write_biu(u32 value) +@@ -242,6 +242,8 @@ static void write_biu(u32 value) return; } @@ -46,10 +54,10 @@ index 1f37dc29..357f753e 100644 psxRegs.biuReg = value; } diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c -index 18bd6a4e..bc2eb3f6 100644 +index 9ff295e0..dcd0022b 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c -@@ -389,9 +389,12 @@ void psxRcntUpdate() +@@ -457,9 +457,12 @@ void psxRcntUpdate() /******************************************************************************/ @@ -62,7 +70,7 @@ index 18bd6a4e..bc2eb3f6 100644 _psxRcntWcount( index, value ); psxRcntSet(); -@@ -400,6 +403,7 @@ void psxRcntWcount( u32 index, u32 value ) +@@ -468,6 +471,7 @@ void psxRcntWcount( u32 index, u32 value ) void psxRcntWmode( u32 index, u32 value ) { verboseLog( 1, "[RCNT %i] wmode: %x\n", index, value ); @@ -70,7 +78,7 @@ index 18bd6a4e..bc2eb3f6 100644 _psxRcntWmode( index, value ); _psxRcntWcount( index, 0 ); -@@ -411,6 +415,7 @@ void psxRcntWmode( u32 index, u32 value ) +@@ -479,6 +483,7 @@ void psxRcntWmode( u32 index, u32 value ) void psxRcntWtarget( u32 index, u32 value ) { verboseLog( 1, "[RCNT %i] wtarget: %x\n", index, value ); @@ -78,67 +86,66 @@ index 18bd6a4e..bc2eb3f6 100644 rcnts[index].target = value; -@@ -423,6 +428,7 @@ void psxRcntWtarget( u32 index, u32 value ) - u32 psxRcntRcount( u32 index ) +@@ -492,6 +497,7 @@ u32 psxRcntRcount0() { + u32 index = 0; u32 count; +handler_cycle = psxRegs.cycle; - count = _psxRcntRcount( index ); + if ((rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset) || + (rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset2)) +diff --git a/libpcsxcore/psxevents.c b/libpcsxcore/psxevents.c +index 28c1b5df..fdcf98eb 100644 +--- a/libpcsxcore/psxevents.c ++++ b/libpcsxcore/psxevents.c +@@ -70,10 +70,11 @@ void irq_test(psxCP0Regs *cp0) + } + } +- cp0->n.Cause &= ~0x400; ++ u32 c2 = cp0->n.Cause & ~0x400; + if (psxHu32(0x1070) & psxHu32(0x1074)) +- cp0->n.Cause |= 0x400; +- if (((cp0->n.Cause | 1) & cp0->n.SR & 0x401) == 0x401) { ++ c2 |= 0x400; ++ if (((c2 | 1) & cp0->n.SR & 0x401) == 0x401) { ++ cp0->n.Cause = c2; + psxException(0, 0, cp0); + pending_exception = 1; + } diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c -index 10a2695f..7e4a64da 100644 +index b96db97a..12ac2b80 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c -@@ -437,13 +437,14 @@ void psxHwWrite8(u32 add, u8 value) { - return; - } +@@ -301,6 +301,7 @@ void psxHwWrite8(u32 add, u32 value) { + log_unhandled("unhandled w8 %08x %08x @%08x\n", + add, value, psxRegs.pc); + } ++ if (add < 0x1f802000) + psxHu8(add) = value; + } -+ if (add < 0x1f802000) - psxHu8(add) = value; - #ifdef PSXHW_LOG - PSXHW_LOG("*Unknown 8bit write at address %x value %x\n", add, value); - #endif - return; +@@ -374,6 +375,7 @@ void psxHwWrite16(u32 add, u32 value) { + log_unhandled("unhandled w16 %08x %08x @%08x\n", + add, value, psxRegs.pc); } -- psxHu8(add) = value; -+ //psxHu8(add) = value; - #ifdef PSXHW_LOG - PSXHW_LOG("*Known 8bit write at address %x value %x\n", add, value); - #endif -@@ -565,6 +566,7 @@ void psxHwWrite16(u32 add, u16 value) { - return; - } ++ if (add < 0x1f802000) + psxHu16ref(add) = SWAPu16(value); + } -+ if (add < 0x1f802000) - psxHu16ref(add) = SWAPu16(value); - #ifdef PSXHW_LOG - PSXHW_LOG("*Unknown 16bit write at address %x value %x\n", add, value); -@@ -756,9 +758,9 @@ void psxHwWrite32(u32 add, u32 value) { +@@ -429,6 +431,7 @@ void psxHwWrite32(u32 add, u32 value) { return; + } + } ++ if (add < 0x1f802000) + psxHu32ref(add) = SWAPu32(value); + } - case 0x1f801820: -- mdecWrite0(value); break; -+ mdecWrite0(value); return; - case 0x1f801824: -- mdecWrite1(value); break; -+ mdecWrite1(value); return; - - case 0x1f801100: - #ifdef PSXHW_LOG -@@ -826,6 +828,7 @@ void psxHwWrite32(u32 add, u32 value) { - return; - } - -+ if (add < 0x1f802000) - psxHu32ref(add) = SWAPu32(value); - #ifdef PSXHW_LOG - PSXHW_LOG("*Unknown 32bit write at address %x value %x\n", add, value); diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c -index 5756bee5..4bf9248d 100644 +index f979d5c7..583aed2f 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c -@@ -238,7 +238,7 @@ static inline void addCycle(psxRegisters *regs) +@@ -245,7 +245,7 @@ static inline void addCycle(psxRegisters *regs) { assert(regs->subCycleStep >= 0x10000); regs->subCycle += regs->subCycleStep; @@ -147,7 +154,7 @@ index 5756bee5..4bf9248d 100644 regs->subCycle &= 0xffff; } -@@ -435,7 +435,9 @@ static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) { +@@ -442,7 +442,9 @@ static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) { regs->CP0.n.Target = pc_final; regs->branching = 0; @@ -157,7 +164,7 @@ index 5756bee5..4bf9248d 100644 } static void doBranchReg(psxRegisters *regs, u32 tar) { -@@ -960,7 +962,7 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) { +@@ -971,7 +973,7 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) { } } @@ -166,15 +173,15 @@ index 5756bee5..4bf9248d 100644 // no exception static inline void psxNULLne(psxRegisters *regs) { -@@ -1120,6 +1122,7 @@ OP(psxHLE) { - } +@@ -1130,6 +1132,7 @@ OP(psxHLE) { + dloadFlush(regs_); psxHLEt[hleCode](); branchSeen = 1; + psxRegs.cycle -= 2; } static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = { -@@ -1169,18 +1172,20 @@ static void intReset() { +@@ -1179,18 +1182,20 @@ static void intReset() { static inline void execI_(u8 **memRLUT, psxRegisters *regs) { u32 pc = regs->pc; @@ -197,7 +204,7 @@ index 5756bee5..4bf9248d 100644 dloadStep(regs); if (execBreakCheck(regs, pc)) -@@ -1189,6 +1194,8 @@ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { +@@ -1199,6 +1204,8 @@ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { regs->pc += 4; regs->code = fetch(regs, memRLUT, pc); psxBSC[regs->code >> 26](regs, regs->code); @@ -206,8 +213,8 @@ index 5756bee5..4bf9248d 100644 } static void intExecute() { -@@ -1218,6 +1225,30 @@ void intExecuteBlock(enum blockExecCaller caller) { - execI_(memRLUT, regs_); +@@ -1237,6 +1244,30 @@ static void intExecuteBlockBp(enum blockExecCaller caller) { + execIbp(memRLUT, regs_); } +extern void do_insn_trace(void); @@ -237,7 +244,7 @@ index 5756bee5..4bf9248d 100644 static void intClear(u32 Addr, u32 Size) { } -@@ -1246,7 +1277,7 @@ static void setupCop(u32 sr) +@@ -1265,7 +1296,7 @@ static void setupCop(u32 sr) else psxBSC[17] = psxCOPd; if (sr & (1u << 30)) @@ -246,7 +253,7 @@ index 5756bee5..4bf9248d 100644 else psxBSC[18] = psxCOPd; if (sr & (1u << 31)) -@@ -1265,7 +1296,7 @@ void intApplyConfig() { +@@ -1284,7 +1315,7 @@ void intApplyConfig() { assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall); assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall); @@ -256,10 +263,10 @@ index 5756bee5..4bf9248d 100644 psxBSC[50] = gteLWC2; psxBSC[58] = gteSWC2; diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c -index 42755e52..4fa4316b 100644 +index e08bd895..8ffb882c 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c -@@ -289,10 +289,13 @@ void psxMemOnIsolate(int enable) +@@ -315,10 +315,13 @@ void psxMemOnIsolate(int enable) : R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL); } @@ -273,7 +280,7 @@ index 42755e52..4fa4316b 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -318,6 +321,7 @@ u16 psxMemRead16(u32 mem) { +@@ -344,6 +347,7 @@ u16 psxMemRead16(u32 mem) { char *p; u32 t; @@ -281,7 +288,7 @@ index 42755e52..4fa4316b 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -343,6 +347,7 @@ u32 psxMemRead32(u32 mem) { +@@ -369,6 +373,7 @@ u32 psxMemRead32(u32 mem) { char *p; u32 t; @@ -289,7 +296,7 @@ index 42755e52..4fa4316b 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -370,6 +375,7 @@ void psxMemWrite8(u32 mem, u8 value) { +@@ -396,6 +401,7 @@ void psxMemWrite8(u32 mem, u32 value) { char *p; u32 t; @@ -297,7 +304,7 @@ index 42755e52..4fa4316b 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -397,6 +403,7 @@ void psxMemWrite16(u32 mem, u16 value) { +@@ -423,6 +429,7 @@ void psxMemWrite16(u32 mem, u32 value) { char *p; u32 t; @@ -305,7 +312,7 @@ index 42755e52..4fa4316b 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -424,6 +431,7 @@ void psxMemWrite32(u32 mem, u32 value) { +@@ -450,6 +457,7 @@ void psxMemWrite32(u32 mem, u32 value) { char *p; u32 t; @@ -313,7 +320,7 @@ index 42755e52..4fa4316b 100644 // if ((mem&0x1fffff) == 0x71E18 || value == 0x48088800) SysPrintf("t2fix!!\n"); t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { -@@ -442,6 +450,8 @@ void psxMemWrite32(u32 mem, u32 value) { +@@ -468,6 +476,8 @@ void psxMemWrite32(u32 mem, u32 value) { #endif } else { if (mem == 0xfffe0130) { @@ -323,10 +330,10 @@ index 42755e52..4fa4316b 100644 return; } diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c -index 48881068..47c40940 100644 +index 0c29dba7..3af7e156 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c -@@ -127,6 +127,8 @@ void psxException(u32 cause, enum R3000Abdt bdt, psxCP0Regs *cp0) { +@@ -126,6 +126,8 @@ void psxException(u32 cause, enum R3000Abdt bdt, psxCP0Regs *cp0) { } void psxBranchTest() { From cd6cec3cd05a0163bc0d98ab99e175fe2ff0deae Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 25 Oct 2024 00:07:54 +0300 Subject: [PATCH 587/597] drc: add another sync after running the interpreter --- libpcsxcore/new_dynarec/emu_if.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 57fe663d0..a42118f1b 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -84,8 +84,6 @@ void ndrc_freeze(void *f, int mode) SaveFuncs.write(f, addrs, size); } else { - new_dyna_pcsx_mem_load_state(); - bytes = SaveFuncs.read(f, header, sizeof(header)); if (bytes != sizeof(header) || strcmp(header, header_save)) { if (bytes > 0) @@ -280,6 +278,14 @@ static void ari64_clear(u32 addr, u32 size) new_dynarec_invalidate_range(addr, end); } +static void ari64_on_ext_change(int ram_replaced, int other_cpu_emu_exec) +{ + if (ram_replaced) + ari64_reset(); + else if (other_cpu_emu_exec) + new_dyna_pcsx_mem_load_state(); +} + static void ari64_notify(enum R3000Anote note, void *data) { switch (note) { @@ -290,8 +296,7 @@ static void ari64_notify(enum R3000Anote note, void *data) { case R3000ACPU_NOTIFY_BEFORE_SAVE: break; case R3000ACPU_NOTIFY_AFTER_LOAD: - if (data == NULL) - ari64_reset(); + ari64_on_ext_change(data == NULL, 0); psxInt.Notify(note, data); break; } @@ -354,6 +359,7 @@ static noinline void ari64_execute_threaded_slow(enum blockExecCaller block_call psxInt.Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); //ari64_notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); + ari64_on_ext_change(0, 1); } static void ari64_execute_threaded_once(enum blockExecCaller block_caller) From e7172b26dbdd51a91360693fb1c54eede81cd4a9 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 25 Oct 2024 00:14:40 +0300 Subject: [PATCH 588/597] drc: drop pending_exception just causing confusion --- libpcsxcore/new_dynarec/emu_if.c | 8 +------- libpcsxcore/new_dynarec/emu_if.h | 1 - libpcsxcore/new_dynarec/linkage_arm.S | 20 ++++++++---------- libpcsxcore/new_dynarec/linkage_arm64.S | 16 +++++++-------- libpcsxcore/new_dynarec/linkage_offsets.h | 4 ++-- libpcsxcore/new_dynarec/new_dynarec.c | 25 +++++++++-------------- libpcsxcore/new_dynarec/new_dynarec.h | 1 - libpcsxcore/psxevents.c | 7 +------ 8 files changed, 31 insertions(+), 51 deletions(-) diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index a42118f1b..986899980 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -42,7 +42,6 @@ #define stop dynarec_local_var4(LO_stop) #define psxRegs (*(psxRegisters *)((char *)dynarec_local + LO_psxRegs)) #define next_interupt dynarec_local_var4(LO_next_interupt) -#define pending_exception dynarec_local_var4(LO_pending_exception) #endif static void ari64_thread_sync(void); @@ -52,10 +51,6 @@ void pcsx_mtc0(u32 reg, u32 val) evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); MTC0(&psxRegs, reg, val); gen_interupt(&psxRegs.CP0); - - //if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.SR & 0x0300) // possible sw irq - if ((psxRegs.pc & 0x803ffeff) == 0x80000080) - pending_exception = 1; } void pcsx_mtc0_ds(u32 reg, u32 val) @@ -227,7 +222,6 @@ static void ari64_reset() new_dyna_pcsx_mem_reset(); new_dynarec_invalidate_all_pages(); new_dyna_pcsx_mem_load_state(); - pending_exception = 1; } // execute until predefined leave points @@ -581,7 +575,7 @@ R3000Acpu psxRec = { struct ndrc_globals ndrc_g; // dummy unsigned int address; -int pending_exception, stop; +int stop; u32 next_interupt; void *psxH_ptr; void *zeromem_ptr; diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index 1b587661f..433455b10 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -67,7 +67,6 @@ extern u32 inv_code_start, inv_code_end; /* cycles/irqs */ extern u32 next_interupt; -extern int pending_exception; /* called by drc */ void pcsx_mtc0(u32 reg, u32 val); diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 58e057b58..bac1f299b 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -56,7 +56,7 @@ dynarec_local: DRC_VAR(next_interupt, 4) DRC_VAR(cycle_count, 4) DRC_VAR(last_count, 4) -DRC_VAR(pending_exception, 4) +@DRC_VAR(unused1, 4) DRC_VAR(stop, 4) DRC_VAR(branch_target, 4) DRC_VAR(address, 4) @@ -235,26 +235,24 @@ FUNCTION(jump_vaddr_r0): .align 2 FUNCTION(cc_interrupt): ldr r0, [fp, #LO_last_count] - mov r1, #0 - add r10, r0, r10 - str r1, [fp, #LO_pending_exception] - str r10, [fp, #LO_cycle] /* PCSX cycles */ + ldr r9, [fp, #LO_pcaddr] + add r1, r0, r10 + str r1, [fp, #LO_cycle] /* PCSX cycles */ mov r10, lr add r0, fp, #LO_reg_cop0 /* CP0 */ bl gen_interupt mov lr, r10 ldr r10, [fp, #LO_cycle] - ldr r0, [fp, #LO_next_interupt] - ldr r1, [fp, #LO_pending_exception] + ldr r0, [fp, #LO_pcaddr] + ldr r1, [fp, #LO_next_interupt] ldr r2, [fp, #LO_stop] - str r0, [fp, #LO_last_count] - sub r10, r10, r0 + str r1, [fp, #LO_last_count] + sub r10, r10, r1 tst r2, r2 ldmfdne sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} - tst r1, r1 + cmp r0, r9 moveq pc, lr - ldr r0, [fp, #LO_pcaddr] bl ndrc_get_addr_ht mov pc, r0 .size cc_interrupt, .-cc_interrupt diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 9e61ea1e5..9e38bb96c 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -58,7 +58,7 @@ dynarec_local: DRC_VAR(next_interupt, 4) DRC_VAR(cycle_count, 4) DRC_VAR(last_count, 4) -DRC_VAR(pending_exception, 4) +@DRC_VAR(unused1, 4) DRC_VAR(stop, 4) DRC_VAR(branch_target, 4) DRC_VAR(address, 4) @@ -104,8 +104,8 @@ FUNCTION(dyna_linker): .align 2 FUNCTION(cc_interrupt): ldr w0, [rFP, #LO_last_count] + ldr w22, [rFP, #LO_pcaddr] add rCC, w0, rCC - str wzr, [rFP, #LO_pending_exception] str rCC, [rFP, #LO_cycle] /* PCSX cycles */ mov x21, lr 1: @@ -113,16 +113,16 @@ FUNCTION(cc_interrupt): bl gen_interupt mov lr, x21 ldr rCC, [rFP, #LO_cycle] - ldr w0, [rFP, #LO_next_interupt] - ldr w1, [rFP, #LO_pending_exception] + ldr w0, [rFP, #LO_pcaddr] + ldr w1, [rFP, #LO_next_interupt] ldr w2, [rFP, #LO_stop] - str w0, [rFP, #LO_last_count] - sub rCC, rCC, w0 + str w1, [rFP, #LO_last_count] + sub rCC, rCC, w1 cbnz w2, new_dyna_leave - cbnz w1, 2f + cmp w0, w22 + bne 2f ret 2: - ldr w0, [rFP, #LO_pcaddr] bl ndrc_get_addr_ht br x0 ESIZE(cc_interrupt, .-cc_interrupt) diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h index 541325acd..75521aa83 100644 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ b/libpcsxcore/new_dynarec/linkage_offsets.h @@ -4,8 +4,8 @@ #define LO_next_interupt 64 #define LO_cycle_count (LO_next_interupt + 4) #define LO_last_count (LO_cycle_count + 4) -#define LO_pending_exception (LO_last_count + 4) -#define LO_stop (LO_pending_exception + 4) +#define LO_unused1 (LO_last_count + 4) +#define LO_stop (LO_unused1 + 4) #define LO_branch_target (LO_stop + 4) #define LO_address (LO_branch_target + 4) #define LO_hack_addr (LO_address + 4) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 2b2e66346..bbf0d35be 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -73,7 +73,6 @@ static Jit g_jit; extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 (CCREG) extern int last_count; // last absolute target, often = next_interupt extern int pcaddr; -extern int pending_exception; extern int branch_target; /* same as psxRegs.CP0.n.* */ @@ -1311,7 +1310,6 @@ static const char *fpofs_name(u_int ofs) ofscase(next_interupt); ofscase(cycle_count); ofscase(last_count); - ofscase(pending_exception); ofscase(stop); ofscase(address); ofscase(lo); @@ -3608,8 +3606,6 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) } emit_movimm(start+i*4+4,HOST_TEMPREG); emit_writeword(HOST_TEMPREG,&pcaddr); - emit_movimm(0,HOST_TEMPREG); - emit_writeword(HOST_TEMPREG,&pending_exception); } if( s != 1) emit_mov(s, 1); @@ -3621,11 +3617,11 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); //emit_writeword(HOST_TEMPREG,&last_count); assert(!is_delayslot); - emit_readword(&pending_exception,HOST_TEMPREG); - emit_test(HOST_TEMPREG,HOST_TEMPREG); + emit_readword(&pcaddr, 0); + emit_movimm(start+i*4+4, HOST_TEMPREG); + emit_cmp(HOST_TEMPREG, 0); void *jaddr = out; emit_jeq(0); - emit_readword(&pcaddr, 0); emit_far_call(ndrc_get_addr_ht); emit_jmpreg(0); set_jump_target(jaddr, out); @@ -5152,7 +5148,8 @@ static void do_ccstub(int n) literal_pool(256); assem_debug("do_ccstub %x\n",start+(u_int)stubs[n].b*4); set_jump_target(stubs[n].addr, out); - int i=stubs[n].b; + int i = stubs[n].b; + int r_pc = -1; if (stubs[n].d != TAKEN) { wb_dirtys(branch_regs[i].regmap,branch_regs[i].dirty); } @@ -5163,8 +5160,7 @@ static void do_ccstub(int n) if(stubs[n].c!=-1) { // Save PC as return address - emit_movimm(stubs[n].c,0); - emit_writeword(0,&pcaddr); + emit_movimm(stubs[n].c, (r_pc = 0)); } else { @@ -5288,19 +5284,19 @@ static void do_ccstub(int n) else emit_movimm((dops[i].opcode2 & 1) ? cinfo[i].ba : start + i*4 + 8, addr); } - emit_writeword(addr, &pcaddr); + r_pc = addr; } else if(dops[i].itype==RJUMP) { - int r=get_reg(branch_regs[i].regmap,dops[i].rs1); + r_pc = get_reg(branch_regs[i].regmap, dops[i].rs1); if (ds_writes_rjump_rs(i)) { - r=get_reg(branch_regs[i].regmap,RTEMP); + r_pc = get_reg(branch_regs[i].regmap, RTEMP); } - emit_writeword(r,&pcaddr); } else {SysPrintf("Unknown branch type in do_ccstub\n");abort();} } + emit_writeword(r_pc, &pcaddr); // Update cycle count assert(branch_regs[i].regmap[HOST_CCREG]==CCREG||branch_regs[i].regmap[HOST_CCREG]==-1); if(stubs[n].a) emit_addimm(HOST_CCREG,(int)stubs[n].a,HOST_CCREG); @@ -6272,7 +6268,6 @@ void new_dynarec_clear_full(void) mini_ht_clear(); copy=shadow; expirep = EXPIRITY_OFFSET; - pending_exception=0; literalcount=0; stop_after_jal=0; inv_code_start=inv_code_end=~0; diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index 8c168084b..5b27c86a9 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -2,7 +2,6 @@ #define MAXBLOCK 2048 // in mips instructions -extern int pending_exception; extern int stop; #define NDHACK_NO_SMC_CHECK (1<<0) diff --git a/libpcsxcore/psxevents.c b/libpcsxcore/psxevents.c index 28c1b5dfd..4d13cfb97 100644 --- a/libpcsxcore/psxevents.c +++ b/libpcsxcore/psxevents.c @@ -5,8 +5,6 @@ #include "mdec.h" #include "psxevents.h" -extern int pending_exception; - //#define evprintf printf #define evprintf(...) @@ -73,10 +71,8 @@ void irq_test(psxCP0Regs *cp0) cp0->n.Cause &= ~0x400; if (psxHu32(0x1070) & psxHu32(0x1074)) cp0->n.Cause |= 0x400; - if (((cp0->n.Cause | 1) & cp0->n.SR & 0x401) == 0x401) { + if (((cp0->n.Cause | 1) & cp0->n.SR & 0x401) == 0x401) psxException(0, 0, cp0); - pending_exception = 1; - } } void gen_interupt(psxCP0Regs *cp0) @@ -85,7 +81,6 @@ void gen_interupt(psxCP0Regs *cp0) next_interupt, next_interupt - psxRegs.cycle); irq_test(cp0); - //pending_exception = 1; schedule_timeslice(); From c87406ff726aa6ca927e3e73831a09a9ccb0667d Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 25 Oct 2024 02:29:53 +0300 Subject: [PATCH 589/597] move more globals to psxRegs more locality, less literal pools on ARM --- frontend/libretro.c | 6 +- frontend/main.c | 6 +- frontend/main.h | 12 --- frontend/plugin_lib.c | 13 ++- libpcsxcore/lightrec/plugin.c | 20 ++-- libpcsxcore/misc.c | 2 + libpcsxcore/new_dynarec/emu_if.c | 120 +++++++++++----------- libpcsxcore/new_dynarec/emu_if.h | 5 +- libpcsxcore/new_dynarec/linkage_arm.S | 9 +- libpcsxcore/new_dynarec/linkage_arm64.S | 9 +- libpcsxcore/new_dynarec/linkage_offsets.h | 23 +++-- libpcsxcore/new_dynarec/new_dynarec.c | 36 +++---- libpcsxcore/new_dynarec/new_dynarec.h | 2 - libpcsxcore/psxbios.c | 10 +- libpcsxcore/psxevents.c | 43 +++++--- libpcsxcore/psxevents.h | 12 +-- libpcsxcore/psxinterpreter.c | 42 +++----- libpcsxcore/psxinterpreter.h | 9 +- libpcsxcore/r3000a.c | 2 +- libpcsxcore/r3000a.h | 19 ++-- 20 files changed, 200 insertions(+), 200 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index c932625f0..3edcc2c9e 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -659,7 +659,7 @@ struct rearmed_cbs pl_rearmed_cbs = { void pl_frame_limit(void) { /* called once per frame, make psxCpu->Execute() above return */ - stop++; + psxRegs.stop++; } void pl_timing_prepare(int is_pal) @@ -3288,8 +3288,8 @@ void retro_run(void) if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated) && updated) update_variables(true); - stop = 0; - psxCpu->Execute(); + psxRegs.stop = 0; + psxCpu->Execute(&psxRegs); if (pl_rearmed_cbs.fskip_dirty == 1) { if (frameskip_counter < frameskip_interval) diff --git a/frontend/main.c b/frontend/main.c index 3ead1b08d..48006a67b 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -538,7 +538,7 @@ int emu_core_init(void) void emu_core_ask_exit(void) { - stop++; + psxRegs.stop++; g_emu_want_quit = 1; } @@ -734,10 +734,10 @@ int main(int argc, char *argv[]) while (!g_emu_want_quit) { - stop = 0; + psxRegs.stop = 0; emu_action = SACTION_NONE; - psxCpu->Execute(); + psxCpu->Execute(&psxRegs); if (emu_action != SACTION_NONE) do_emu_action(); } diff --git a/frontend/main.h b/frontend/main.h index 98b0f370e..1c2493543 100644 --- a/frontend/main.h +++ b/frontend/main.h @@ -84,16 +84,4 @@ enum sched_action { #define SACTION_GUN_MASK (0x0f << SACTION_GUN_TRIGGER) -static inline void emu_set_action(enum sched_action action_) -{ - extern enum sched_action emu_action, emu_action_old; - extern int stop; - - if (action_ == SACTION_NONE) - emu_action_old = 0; - else if (action_ != emu_action_old) - stop++; - emu_action = action_; -} - #endif /* __FRONTEND_MAIN_H__ */ diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 1b63f241a..e12a79815 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -610,6 +610,17 @@ static void update_analogs(void) } } +static void emu_set_action(enum sched_action action_) +{ + extern enum sched_action emu_action, emu_action_old; + + if (action_ == SACTION_NONE) + emu_action_old = 0; + else if (action_ != emu_action_old) + psxRegs.stop++; + emu_action = action_; +} + static void update_input(void) { int actions[IN_BINDTYPE_COUNT] = { 0, }; @@ -834,7 +845,7 @@ static void *watchdog_thread(void *unused) { sleep(sleep_time); - if (stop) { + if (psxRegs.stop) { seen_dead = 0; sleep_time = 5; continue; diff --git a/libpcsxcore/lightrec/plugin.c b/libpcsxcore/lightrec/plugin.c index 7f500fd27..d62f35bdf 100644 --- a/libpcsxcore/lightrec/plugin.c +++ b/libpcsxcore/lightrec/plugin.c @@ -69,8 +69,9 @@ void* code_buffer; static struct lightrec_state *lightrec_state; static bool use_lightrec_interpreter; -static bool use_pcsx_interpreter; static bool block_stepping; +//static bool use_pcsx_interpreter; +#define use_pcsx_interpreter 0 extern u32 lightrec_hacks; @@ -161,7 +162,7 @@ static void lightrec_tansition_to_pcsx(struct lightrec_state *state) static void lightrec_tansition_from_pcsx(struct lightrec_state *state) { - s32 cycles_left = next_interupt - psxRegs.cycle; + s32 cycles_left = psxRegs.next_interupt - psxRegs.cycle; if (block_stepping || cycles_left <= 0 || has_interrupt()) lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT); @@ -490,10 +491,10 @@ static void lightrec_plugin_execute_internal(bool block_only) regs = lightrec_get_registers(lightrec_state); gen_interupt((psxCP0Regs *)regs->cp0); - if (!block_only && stop) + if (!block_only && psxRegs.stop) return; - cycles_pcsx = next_interupt - psxRegs.cycle; + cycles_pcsx = psxRegs.next_interupt - psxRegs.cycle; assert((s32)cycles_pcsx > 0); // step during early boot so that 0x80030000 fastboot hack works @@ -502,7 +503,7 @@ static void lightrec_plugin_execute_internal(bool block_only) cycles_pcsx = 0; if (use_pcsx_interpreter) { - intExecuteBlock(0); + psxInt.ExecuteBlock(&psxRegs, 0); } else { u32 cycles_lightrec = cycles_pcsx * 1024; if (unlikely(use_lightrec_interpreter)) { @@ -548,13 +549,14 @@ static void lightrec_plugin_execute_internal(bool block_only) } } -static void lightrec_plugin_execute(void) +static void lightrec_plugin_execute(psxRegisters *regs) { - while (!stop) + while (!regs->stop) lightrec_plugin_execute_internal(false); } -static void lightrec_plugin_execute_block(enum blockExecCaller caller) +static void lightrec_plugin_execute_block(psxRegisters *regs, + enum blockExecCaller caller) { lightrec_plugin_execute_internal(true); } @@ -603,6 +605,8 @@ static void lightrec_plugin_apply_config() } cycles_per_op_old = cycles_per_op; lightrec_set_cycles_per_opcode(lightrec_state, cycles_per_op); + + intApplyConfig(); } static void lightrec_plugin_shutdown(void) diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 286510257..6ba8d72ce 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -779,7 +779,9 @@ int LoadState(const char *file) { SaveFuncs.read(f, psxH, 0x00010000); SaveFuncs.read(f, &psxRegs, offsetof(psxRegisters, gteBusyCycle)); psxRegs.gteBusyCycle = psxRegs.cycle; + psxRegs.branching = 0; psxRegs.biosBranchCheck = ~0; + psxRegs.cpuInRecursion = 0; psxRegs.gpuIdleAfter = psxRegs.cycle - 1; HW_GPU_STATUS &= SWAP32(~PSXGPU_nBUSY); if (misc->magic == MISC_MAGIC) { diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 986899980..8ebf27463 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -35,30 +35,8 @@ //#define evprintf printf #define evprintf(...) -#if !defined(DRC_DISABLE) && !defined(LIGHTREC) -// reduce global loads/literal pools (maybe) -#include "linkage_offsets.h" -#define dynarec_local_var4(x) dynarec_local[(x) / sizeof(dynarec_local[0])] -#define stop dynarec_local_var4(LO_stop) -#define psxRegs (*(psxRegisters *)((char *)dynarec_local + LO_psxRegs)) -#define next_interupt dynarec_local_var4(LO_next_interupt) -#endif - static void ari64_thread_sync(void); -void pcsx_mtc0(u32 reg, u32 val) -{ - evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); - MTC0(&psxRegs, reg, val); - gen_interupt(&psxRegs.CP0); -} - -void pcsx_mtc0_ds(u32 reg, u32 val) -{ - evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); - MTC0(&psxRegs, reg, val); -} - void ndrc_freeze(void *f, int mode) { const char header_save[8] = "ariblks"; @@ -111,10 +89,24 @@ void ndrc_clear_full(void) } #if !defined(DRC_DISABLE) && !defined(LIGHTREC) +#include "linkage_offsets.h" static void ari64_thread_init(void); static int ari64_thread_check_range(unsigned int start, unsigned int end); +void pcsx_mtc0(psxRegisters *regs, u32 reg, u32 val) +{ + evprintf("MTC0 %d #%x @%08x %u\n", reg, val, regs->pc, regs->cycle); + MTC0(regs, reg, val); + gen_interupt(®s->CP0); +} + +void pcsx_mtc0_ds(psxRegisters *regs, u32 reg, u32 val) +{ + evprintf("MTC0 %d #%x @%08x %u\n", reg, val, regs->pc, regs->cycle); + MTC0(regs, reg, val); +} + /* GTE stuff */ void *gte_handlers[64]; @@ -226,36 +218,39 @@ static void ari64_reset() // execute until predefined leave points // (HLE softcall exit and BIOS fastboot end) -static void ari64_execute_until() +static void ari64_execute_until(psxRegisters *regs) { - evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc, - psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); + void *drc_local = (char *)regs - LO_psxRegs; + + assert(drc_local == dynarec_local); + evprintf("ari64_execute %08x, %u->%u (%d)\n", regs->pc, + regs->cycle, regs->next_interupt, regs->next_interupt - regs->cycle); - new_dyna_start(dynarec_local); + new_dyna_start(drc_local); - evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc, - psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); + evprintf("ari64_execute end %08x, %u->%u (%d)\n", regs->pc, + regs->cycle, regs->next_interupt, regs->next_interupt - regs->cycle); } -static void ari64_execute() +static void ari64_execute(struct psxRegisters *regs) { - while (!stop) { - schedule_timeslice(); - ari64_execute_until(); - evprintf("drc left @%08x\n", psxRegs.pc); + while (!regs->stop) { + schedule_timeslice(regs); + ari64_execute_until(regs); + evprintf("drc left @%08x\n", regs->pc); } } -static void ari64_execute_block(enum blockExecCaller caller) +static void ari64_execute_block(struct psxRegisters *regs, enum blockExecCaller caller) { if (caller == EXEC_CALLER_BOOT) - stop++; + regs->stop++; - next_interupt = psxRegs.cycle + 1; - ari64_execute_until(); + regs->next_interupt = regs->cycle + 1; + ari64_execute_until(regs); if (caller == EXEC_CALLER_BOOT) - stop--; + regs->stop--; } static void ari64_clear(u32 addr, u32 size) @@ -332,12 +327,13 @@ static void clear_local_cache(void) #endif } -static noinline void ari64_execute_threaded_slow(enum blockExecCaller block_caller) +static noinline void ari64_execute_threaded_slow(struct psxRegisters *regs, + enum blockExecCaller block_caller) { if (!ndrc_g.thread.busy) { - memcpy(ndrc_smrv_regs, psxRegs.GPR.r, sizeof(ndrc_smrv_regs)); + memcpy(ndrc_smrv_regs, regs->GPR.r, sizeof(ndrc_smrv_regs)); slock_lock(ndrc_g.thread.lock); - ndrc_g.thread.addr = psxRegs.pc; + ndrc_g.thread.addr = regs->pc; ndrc_g.thread.busy = 1; slock_unlock(ndrc_g.thread.lock); scond_signal(ndrc_g.thread.cond); @@ -347,18 +343,19 @@ static noinline void ari64_execute_threaded_slow(enum blockExecCaller block_call psxInt.Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); do { - psxInt.ExecuteBlock(block_caller); + psxInt.ExecuteBlock(regs, block_caller); } - while (!stop && ndrc_g.thread.busy && block_caller == EXEC_CALLER_OTHER); + while (!regs->stop && ndrc_g.thread.busy && block_caller == EXEC_CALLER_OTHER); psxInt.Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); //ari64_notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); ari64_on_ext_change(0, 1); } -static void ari64_execute_threaded_once(enum blockExecCaller block_caller) +static void ari64_execute_threaded_once(struct psxRegisters *regs, + enum blockExecCaller block_caller) { - psxRegisters *regs = (void *)((char *)dynarec_local + LO_psxRegs); + void *drc_local = (char *)regs - LO_psxRegs; void *target; if (likely(!ndrc_g.thread.busy)) { @@ -366,35 +363,36 @@ static void ari64_execute_threaded_once(enum blockExecCaller block_caller) target = ndrc_get_addr_ht_param(regs->pc, ndrc_cm_no_compile); if (target) { clear_local_cache(); - new_dyna_start_at(dynarec_local, target); + new_dyna_start_at(drc_local, target); return; } } - ari64_execute_threaded_slow(block_caller); + ari64_execute_threaded_slow(regs, block_caller); } -static void ari64_execute_threaded() +static void ari64_execute_threaded(struct psxRegisters *regs) { - schedule_timeslice(); - while (!stop) + schedule_timeslice(regs); + while (!regs->stop) { - ari64_execute_threaded_once(EXEC_CALLER_OTHER); + ari64_execute_threaded_once(regs, EXEC_CALLER_OTHER); - if ((s32)(psxRegs.cycle - next_interupt) >= 0) - schedule_timeslice(); + if ((s32)(regs->cycle - regs->next_interupt) >= 0) + schedule_timeslice(regs); } } -static void ari64_execute_threaded_block(enum blockExecCaller caller) +static void ari64_execute_threaded_block(struct psxRegisters *regs, + enum blockExecCaller caller) { if (caller == EXEC_CALLER_BOOT) - stop++; + regs->stop++; - next_interupt = psxRegs.cycle + 1; - ari64_execute_threaded_once(caller); + regs->next_interupt = regs->cycle + 1; + ari64_execute_threaded_once(regs, caller); if (caller == EXEC_CALLER_BOOT) - stop--; + regs->stop--; } static void ari64_thread_sync(void) @@ -574,9 +572,6 @@ R3000Acpu psxRec = { #else // if DRC_DISABLE struct ndrc_globals ndrc_g; // dummy -unsigned int address; -int stop; -u32 next_interupt; void *psxH_ptr; void *zeromem_ptr; u32 zero_mem[0x1000/4]; @@ -861,7 +856,8 @@ void do_insn_cmp(void) for (i = 0; i < 8; i++) printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i], i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]); - printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc, psxRegs.cycle, next_interupt); + printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc, + psxRegs.cycle, psxRegs.next_interupt); //dump_mem("/tmp/psxram.dump", psxM, 0x200000); //dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000); exit(1); diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index 433455b10..03440c491 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -69,8 +69,9 @@ extern u32 inv_code_start, inv_code_end; extern u32 next_interupt; /* called by drc */ -void pcsx_mtc0(u32 reg, u32 val); -void pcsx_mtc0_ds(u32 reg, u32 val); +struct psxRegisters; +void pcsx_mtc0(struct psxRegisters *regs, u32 reg, u32 val); +void pcsx_mtc0_ds(struct psxRegisters *regs, u32 reg, u32 val); /* misc */ extern void SysPrintf(const char *fmt, ...); diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index bac1f299b..1d8880ae9 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -53,24 +53,19 @@ dynarec_local: #define DRC_VAR(name, size_) \ DRC_VAR_(name, ESYM(name), size_) -DRC_VAR(next_interupt, 4) +@DRC_VAR(next_interupt, 4) DRC_VAR(cycle_count, 4) DRC_VAR(last_count, 4) -@DRC_VAR(unused1, 4) -DRC_VAR(stop, 4) -DRC_VAR(branch_target, 4) +@DRC_VAR(stop, 4) DRC_VAR(address, 4) DRC_VAR(hack_addr, 4) DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) /* psxRegs */ -@DRC_VAR(reg, 128) @DRC_VAR(lo, 4) @DRC_VAR(hi, 4) -DRC_VAR(reg_cop0, 128) DRC_VAR(reg_cop2d, 128) DRC_VAR(reg_cop2c, 128) -DRC_VAR(pcaddr, 4) @DRC_VAR(code, 4) @DRC_VAR(cycle, 4) @DRC_VAR(interrupt, 4) diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 9e38bb96c..155e0e2ff 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -55,24 +55,19 @@ dynarec_local: #define DRC_VAR(name, size_) \ DRC_VAR_(name, ESYM(name), size_) -DRC_VAR(next_interupt, 4) +#DRC_VAR(next_interupt, 4) DRC_VAR(cycle_count, 4) DRC_VAR(last_count, 4) -@DRC_VAR(unused1, 4) -DRC_VAR(stop, 4) -DRC_VAR(branch_target, 4) +#DRC_VAR(stop, 4) DRC_VAR(address, 4) DRC_VAR(hack_addr, 4) DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) /* psxRegs */ -#DRC_VAR(reg, 128) #DRC_VAR(lo, 4) #DRC_VAR(hi, 4) -DRC_VAR(reg_cop0, 128) DRC_VAR(reg_cop2d, 128) DRC_VAR(reg_cop2c, 128) -DRC_VAR(pcaddr, 4) #DRC_VAR(code, 4) #DRC_VAR(cycle, 4) #DRC_VAR(interrupt, 4) diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h index 75521aa83..2f0de6af6 100644 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ b/libpcsxcore/new_dynarec/linkage_offsets.h @@ -1,17 +1,16 @@ #define PTRSZ __SIZEOF_POINTER__ -#define LO_next_interupt 64 -#define LO_cycle_count (LO_next_interupt + 4) +#define LO_unused0 64 +#define LO_unused1 (LO_unused0 + 4) +#define LO_unused2 (LO_unused1 + 4) +#define LO_unused3 (LO_unused2 + 4) +#define LO_cycle_count (LO_unused3 + 4) #define LO_last_count (LO_cycle_count + 4) -#define LO_unused1 (LO_last_count + 4) -#define LO_stop (LO_unused1 + 4) -#define LO_branch_target (LO_stop + 4) -#define LO_address (LO_branch_target + 4) +#define LO_address (LO_last_count + 4) #define LO_hack_addr (LO_address + 4) #define LO_psxRegs (LO_hack_addr + 4) -#define LO_reg (LO_psxRegs) -#define LO_lo (LO_reg + 128) +#define LO_lo (LO_psxRegs + 128) #define LO_hi (LO_lo + 4) #define LO_reg_cop0 (LO_hi + 4) #define LO_reg_cop2d (LO_reg_cop0 + 128) @@ -22,12 +21,14 @@ #define LO_cycle (LO_code + 4) #define LO_interrupt (LO_cycle + 4) #define LO_intCycle (LO_interrupt + 4) -#define LO_gteBusyCycle (LO_intCycle + 256) +#define LO_next_interupt (LO_intCycle + 4*2*31) +#define LO_unused4 (LO_next_interupt + 4) +#define LO_gteBusyCycle (LO_unused4 + 4) #define LO_muldivBusyCycle (LO_gteBusyCycle + 4) #define LO_psxRegs_subCycle (LO_muldivBusyCycle + 4) #define LO_psxRegs_biuReg (LO_psxRegs_subCycle + 4*2) -#define LO_psxRegs_reserved (LO_psxRegs_biuReg + 4) -#define LO_psxRegs_end (LO_psxRegs_reserved + 4*7) +#define LO_stop (LO_psxRegs_biuReg + 4) +#define LO_psxRegs_end (LO_stop + 4*7) #define LO_rcnts (LO_psxRegs_end) #define LO_rcnts_end (LO_rcnts + 7*4*4) #define LO_inv_code_start (LO_rcnts_end) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index bbf0d35be..87a82d01f 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -72,11 +72,7 @@ static Jit g_jit; // from linkage_* extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 (CCREG) extern int last_count; // last absolute target, often = next_interupt -extern int pcaddr; -extern int branch_target; -/* same as psxRegs.CP0.n.* */ -extern int reg_cop0[]; extern int reg_cop2d[], reg_cop2c[]; extern uintptr_t ram_offset; @@ -3444,7 +3440,7 @@ static void store_assemble(int i, const struct regstat *i_regs, int ccadj_) load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i); wb_dirtys(regs[i].regmap_entry,regs[i].wasdirty); emit_movimm(start+i*4+4,0); - emit_writeword(0,&pcaddr); + emit_writeword(0,&psxRegs.pc); emit_addimm(HOST_CCREG,2,HOST_CCREG); emit_far_call(ndrc_get_addr_ht); emit_jmpreg(0); @@ -3574,7 +3570,7 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) signed char t=get_reg_w(i_regs->regmap, dops[i].rt1); u_int copr=(source[i]>>11)&0x1f; if(t>=0&&dops[i].rt1!=0) { - emit_readword(®_cop0[copr],t); + emit_readword(&psxRegs.CP0.r[copr],t); } } else if(dops[i].opcode2==4) // MTC0 @@ -3598,18 +3594,20 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_writeword(HOST_CCREG,&last_count); emit_movimm(0,HOST_CCREG); emit_storereg(CCREG,HOST_CCREG); - emit_loadreg(dops[i].rs1,1); - emit_movimm(copr,0); + emit_loadreg(dops[i].rs1, 2); + emit_movimm(copr, 1); + emit_addimm_ptr(FP, (u_char *)&psxRegs - (u_char *)&dynarec_local, 0); emit_far_call(pcsx_mtc0_ds); emit_loadreg(dops[i].rs1,s); return; } emit_movimm(start+i*4+4,HOST_TEMPREG); - emit_writeword(HOST_TEMPREG,&pcaddr); + emit_writeword(HOST_TEMPREG,&psxRegs.pc); } - if( s != 1) - emit_mov(s, 1); - emit_movimm(copr, 0); + if (s != 2) + emit_mov(s, 2); + emit_movimm(copr, 1); + emit_addimm_ptr(FP, (u_char *)&psxRegs - (u_char *)&dynarec_local, 0); emit_far_call(pcsx_mtc0); if (copr == 12 || copr == 13) { emit_readword(&psxRegs.cycle,HOST_CCREG); @@ -3617,7 +3615,7 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); //emit_writeword(HOST_TEMPREG,&last_count); assert(!is_delayslot); - emit_readword(&pcaddr, 0); + emit_readword(&psxRegs.pc, 0); emit_movimm(start+i*4+4, HOST_TEMPREG); emit_cmp(HOST_TEMPREG, 0); void *jaddr = out; @@ -5009,7 +5007,7 @@ static void drc_dbg_emit_do_cmp(int i, int ccadj_) emit_storereg(dops[i].rt1, 0); } emit_movimm(start+i*4,0); - emit_writeword(0,&pcaddr); + emit_writeword(0,&psxRegs.pc); int cc = get_reg(regs[i].regmap_entry, CCREG); if (cc < 0) emit_loadreg(CCREG, cc = 0); @@ -5296,7 +5294,7 @@ static void do_ccstub(int n) } else {SysPrintf("Unknown branch type in do_ccstub\n");abort();} } - emit_writeword(r_pc, &pcaddr); + emit_writeword(r_pc, &psxRegs.pc); // Update cycle count assert(branch_regs[i].regmap[HOST_CCREG]==CCREG||branch_regs[i].regmap[HOST_CCREG]==-1); if(stubs[n].a) emit_addimm(HOST_CCREG,(int)stubs[n].a,HOST_CCREG); @@ -5307,7 +5305,7 @@ static void do_ccstub(int n) load_needed_regs(branch_regs[i].regmap,regs[(cinfo[i].ba-start)>>2].regmap_entry); else if(dops[i].itype==RJUMP) { if(get_reg(branch_regs[i].regmap,RTEMP)>=0) - emit_readword(&pcaddr,get_reg(branch_regs[i].regmap,RTEMP)); + emit_readword(&psxRegs.pc,get_reg(branch_regs[i].regmap,RTEMP)); else emit_loadreg(dops[i].rs1,get_reg(branch_regs[i].regmap,dops[i].rs1)); } @@ -9020,7 +9018,7 @@ static int new_recompile_block(u_int addr) void *beginning = start_block(); emit_movimm(start,0); - emit_writeword(0,&pcaddr); + emit_writeword(0,&psxRegs.pc); emit_far_jump(new_dyna_leave); literal_pool(0); end_block(beginning); @@ -9134,13 +9132,13 @@ static int new_recompile_block(u_int addr) // for BiosBootBypass() to work // io address var abused as a "already been here" flag emit_readword(&address, 1); - emit_writeword(0, &pcaddr); + emit_writeword(0, &psxRegs.pc); emit_writeword(0, &address); emit_cmp(0, 1); } else { emit_readword(&psxRegs.cpuInRecursion, 1); - emit_writeword(0, &pcaddr); + emit_writeword(0, &psxRegs.pc); emit_test(1, 1); } #ifdef __aarch64__ diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index 5b27c86a9..dcfc4215f 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -2,8 +2,6 @@ #define MAXBLOCK 2048 // in mips instructions -extern int stop; - #define NDHACK_NO_SMC_CHECK (1<<0) #define NDHACK_GTE_UNNEEDED (1<<1) #define NDHACK_GTE_NO_FLAGS (1<<2) diff --git a/libpcsxcore/psxbios.c b/libpcsxcore/psxbios.c index 64a04b853..7682c92fa 100644 --- a/libpcsxcore/psxbios.c +++ b/libpcsxcore/psxbios.c @@ -420,7 +420,7 @@ static inline void softCall(u32 pc) { psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, PTR_1); while (pc0 != 0x80001000 && ++lim < 0x100000) - psxCpu->ExecuteBlock(EXEC_CALLER_HLE); + psxCpu->ExecuteBlock(&psxRegs, EXEC_CALLER_HLE); psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, PTR_1); psxRegs.cpuInRecursion--; @@ -445,7 +445,7 @@ static inline void softCallInException(u32 pc) { psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, PTR_1); while (!returned_from_exception() && pc0 != 0x80001000 && ++lim < 0x100000) - psxCpu->ExecuteBlock(EXEC_CALLER_HLE); + psxCpu->ExecuteBlock(&psxRegs, EXEC_CALLER_HLE); psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, PTR_1); psxRegs.cpuInRecursion--; @@ -2270,8 +2270,8 @@ static void psxBios_WaitEvent() { // 0a // retrigger this hlecall after the next emulation event pc0 -= 4; - if ((s32)(next_interupt - psxRegs.cycle) > 0) - psxRegs.cycle = next_interupt; + if ((s32)(psxRegs.next_interupt - psxRegs.cycle) > 0) + psxRegs.cycle = psxRegs.next_interupt; psxBranchTest(); } @@ -4564,7 +4564,7 @@ void psxBiosCheckBranch(void) if (cycles_passed < 10 || cycles_passed > 50 || v0 != v0_expect) return; - waste_cycles = schedule_timeslice() - psxRegs.cycle; + waste_cycles = schedule_timeslice(&psxRegs) - psxRegs.cycle; loops = waste_cycles / cycles_passed; if (loops > v0) loops = v0; diff --git a/libpcsxcore/psxevents.c b/libpcsxcore/psxevents.c index 4d13cfb97..d90804ed8 100644 --- a/libpcsxcore/psxevents.c +++ b/libpcsxcore/psxevents.c @@ -1,3 +1,4 @@ +#include #include #include "r3000a.h" #include "cdrom.h" @@ -10,10 +11,20 @@ u32 event_cycles[PSXINT_COUNT]; -u32 schedule_timeslice(void) +static psxRegisters *cp0TOpsxRegs(psxCP0Regs *cp0) { - u32 i, c = psxRegs.cycle; - u32 irqs = psxRegs.interrupt; +#ifndef LIGHTREC + return (void *)((char *)cp0 - offsetof(psxRegisters, CP0)); +#else + // lightrec has it's own cp0 + return &psxRegs; +#endif +} + +u32 schedule_timeslice(psxRegisters *regs) +{ + u32 i, c = regs->cycle; + u32 irqs = regs->interrupt; s32 min, dif; min = PSXCLK; @@ -25,8 +36,8 @@ u32 schedule_timeslice(void) if (0 < dif && dif < min) min = dif; } - next_interupt = c + min; - return next_interupt; + regs->next_interupt = c + min; + return regs->next_interupt; } static void irqNoOp() { @@ -55,15 +66,16 @@ static irq_func * const irq_funcs[] = { /* local dupe of psxBranchTest, using event_cycles */ void irq_test(psxCP0Regs *cp0) { - u32 cycle = psxRegs.cycle; + psxRegisters *regs = cp0TOpsxRegs(cp0); + u32 cycle = regs->cycle; u32 irq, irq_bits; - for (irq = 0, irq_bits = psxRegs.interrupt; irq_bits != 0; irq++, irq_bits >>= 1) { + for (irq = 0, irq_bits = regs->interrupt; irq_bits != 0; irq++, irq_bits >>= 1) { if (!(irq_bits & 1)) continue; if ((s32)(cycle - event_cycles[irq]) >= 0) { - // note: irq_funcs() also modify psxRegs.interrupt - psxRegs.interrupt &= ~(1u << irq); + // note: irq_funcs() also modify regs->interrupt + regs->interrupt &= ~(1u << irq); irq_funcs[irq](); } } @@ -77,15 +89,16 @@ void irq_test(psxCP0Regs *cp0) void gen_interupt(psxCP0Regs *cp0) { - evprintf(" +ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle, - next_interupt, next_interupt - psxRegs.cycle); + psxRegisters *regs = cp0TOpsxRegs(cp0); - irq_test(cp0); + evprintf(" +ge %08x, %u->%u (%d)\n", regs->pc, regs->cycle, + regs->next_interupt, regs->next_interupt - regs->cycle); - schedule_timeslice(); + irq_test(cp0); + schedule_timeslice(regs); - evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle, - next_interupt, next_interupt - psxRegs.cycle); + evprintf(" -ge %08x, %u->%u (%d)\n", regs->pc, regs->cycle, + regs->next_interupt, regs->next_interupt - regs->cycle); } void events_restore(void) diff --git a/libpcsxcore/psxevents.h b/libpcsxcore/psxevents.h index 1f1067efa..1a72c330c 100644 --- a/libpcsxcore/psxevents.h +++ b/libpcsxcore/psxevents.h @@ -23,16 +23,14 @@ enum { }; extern u32 event_cycles[PSXINT_COUNT]; -extern u32 next_interupt; -extern int stop; #define set_event_raw_abs(e, abs) { \ u32 abs_ = abs; \ - s32 di_ = next_interupt - abs_; \ + s32 di_ = psxRegs.next_interupt - abs_; \ event_cycles[e] = abs_; \ if (di_ > 0) { \ - /*printf("%u: next_interupt %u -> %u\n", psxRegs.cycle, next_interupt, abs_);*/ \ - next_interupt = abs_; \ + /*printf("%u: next_interupt %u -> %u\n", psxRegs.cycle, psxRegs.next_interupt, abs_);*/ \ + psxRegs.next_interupt = abs_; \ } \ } @@ -44,7 +42,9 @@ extern int stop; } while (0) union psxCP0Regs_; -u32 schedule_timeslice(void); +struct psxRegisters; + +u32 schedule_timeslice(struct psxRegisters *regs); void irq_test(union psxCP0Regs_ *cp0); void gen_interupt(union psxCP0Regs_ *cp0); void events_restore(void); diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index fadbf050b..c19f1c21b 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -36,8 +36,6 @@ #define DO_EXCEPTION_RESERVEDI #define HANDLE_LOAD_DELAY -static int branchSeen = 0; - #ifdef __i386__ #define INT_ATTR __attribute__((regparm(2))) #else @@ -156,7 +154,7 @@ static void intExceptionDebugBp(psxRegisters *regs, u32 pc) cp0->n.Cause |= (regs->branching << 30) | (R3000E_Bp << 2); cp0->n.SR = (cp0->n.SR & ~0x3f) | ((cp0->n.SR & 0x0f) << 2); cp0->n.EPC = regs->branching ? pc - 4 : pc; - psxRegs.pc = 0x80000040; + regs->pc = 0x80000040; } static int execBreakCheck(psxRegisters *regs, u32 pc) @@ -412,7 +410,7 @@ static void psxDoDelayBranch(psxRegisters *regs, u32 tar1, u32 code1) { static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) { u32 code, pc, pc_final; - branchSeen = regs->branching = taken; + regs->branchSeen = regs->branching = taken; pc_final = taken == R3000A_BRANCH_TAKEN ? tar : regs->pc + 4; // fetch the delay slot @@ -1129,7 +1127,7 @@ OP(psxHLE) { } dloadFlush(regs_); psxHLEt[hleCode](); - branchSeen = 1; + regs_->branchSeen = 1; } static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = { @@ -1201,40 +1199,34 @@ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { psxBSC[regs->code >> 26](regs, regs->code); } -static void intExecute() { - psxRegisters *regs_ = &psxRegs; +static void intExecute(psxRegisters *regs) { u8 **memRLUT = psxMemRLUT; - extern int stop; - while (!stop) - execI_(memRLUT, regs_); + while (!regs->stop) + execI_(memRLUT, regs); } -static void intExecuteBp() { - psxRegisters *regs_ = &psxRegs; +static void intExecuteBp(psxRegisters *regs) { u8 **memRLUT = psxMemRLUT; - extern int stop; - while (!stop) - execIbp(memRLUT, regs_); + while (!regs->stop) + execIbp(memRLUT, regs); } -void intExecuteBlock(enum blockExecCaller caller) { - psxRegisters *regs_ = &psxRegs; +static void intExecuteBlock(psxRegisters *regs, enum blockExecCaller caller) { u8 **memRLUT = psxMemRLUT; - branchSeen = 0; - while (!branchSeen) - execI_(memRLUT, regs_); + regs->branchSeen = 0; + while (!regs->branchSeen) + execI_(memRLUT, regs); } -static void intExecuteBlockBp(enum blockExecCaller caller) { - psxRegisters *regs_ = &psxRegs; +static void intExecuteBlockBp(psxRegisters *regs, enum blockExecCaller caller) { u8 **memRLUT = psxMemRLUT; - branchSeen = 0; - while (!branchSeen) - execIbp(memRLUT, regs_); + regs->branchSeen = 0; + while (!regs->branchSeen) + execIbp(memRLUT, regs); } static void intClear(u32 Addr, u32 Size) { diff --git a/libpcsxcore/psxinterpreter.h b/libpcsxcore/psxinterpreter.h index 2c3f3943b..bc219a49d 100644 --- a/libpcsxcore/psxinterpreter.h +++ b/libpcsxcore/psxinterpreter.h @@ -1,17 +1,16 @@ #ifndef __PSXINTERPRETER_H__ #define __PSXINTERPRETER_H__ +struct psxRegisters; + // get an opcode without triggering exceptions or affecting cache u32 intFakeFetch(u32 pc); // called by "new_dynarec" -void execI(psxRegisters *regs); +void execI(struct psxRegisters *regs); void intApplyConfig(); -void MTC0(psxRegisters *regs_, int reg, u32 val); +void MTC0(struct psxRegisters *regs, int reg, u32 val); void gteNULL(struct psxCP2Regs *regs); extern void (*psxCP2[64])(struct psxCP2Regs *regs); -// called by lightrec -void intExecuteBlock(enum blockExecCaller caller); - #endif // __PSXINTERPRETER_H__ diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index 0c29dba73..b1b819e43 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -169,7 +169,7 @@ void psxJumpTest() { void psxExecuteBios() { int i; for (i = 0; i < 5000000; i++) { - psxCpu->ExecuteBlock(EXEC_CALLER_BOOT); + psxCpu->ExecuteBlock(&psxRegs, EXEC_CALLER_BOOT); if ((psxRegs.pc & 0xff800000) == 0x80000000) break; } diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 93a53ced0..025cfa44b 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -52,11 +52,14 @@ enum blockExecCaller { EXEC_CALLER_OTHER, }; +struct psxRegisters; + typedef struct { int (*Init)(); void (*Reset)(); - void (*Execute)(); - void (*ExecuteBlock)(enum blockExecCaller caller); /* executes up to a jump */ + void (*Execute)(struct psxRegisters *regs); + /* executes up to a jump */ + void (*ExecuteBlock)(struct psxRegisters *regs, enum blockExecCaller caller); void (*Clear)(u32 Addr, u32 Size); void (*Notify)(enum R3000Anote note, void *data); void (*ApplyConfig)(); @@ -177,7 +180,7 @@ typedef struct psxCP2Regs { psxCP2Ctrl CP2C; /* Cop2 control registers */ } psxCP2Regs; -typedef struct { +typedef struct psxRegisters { // note: some cores like lightrec don't keep their data here, // so use R3000ACPU_NOTIFY_BEFORE_SAVE to sync psxGPRRegs GPR; /* General Purpose Registers */ @@ -193,22 +196,26 @@ typedef struct { u32 code; /* The instruction */ u32 cycle; u32 interrupt; - struct { u32 sCycle, cycle; } intCycle[32]; + struct { u32 sCycle, cycle; } intCycle[31]; + u32 next_interupt; /* cycle */ + u32 unused; u32 gteBusyCycle; u32 muldivBusyCycle; u32 subCycle; /* interpreter cycle counting */ u32 subCycleStep; u32 biuReg; + u8 stop; + u8 branchSeen; /* interp. */ u8 branching; /* interp. R3000A_BRANCH_TAKEN / not, 0 if not branch */ u8 dloadSel; /* interp. delay load state */ u8 dloadReg[2]; + u8 unused2[2]; u32 dloadVal[2]; u32 biosBranchCheck; u32 cpuInRecursion; u32 gpuIdleAfter; - u32 reserved[1]; // warning: changing anything in psxRegisters requires update of all - // asm in libpcsxcore/new_dynarec/ + // asm in libpcsxcore/new_dynarec/ and may break savestates } psxRegisters; extern psxRegisters psxRegs; From e81ab3102285a24ed01ee1ef1071df737ebc15df Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 25 Oct 2024 22:25:09 +0300 Subject: [PATCH 590/597] 3ds: only invalidate icache on non-compiler core seems enough, or is it? --- frontend/3ds/3ds_utils.h | 2 +- frontend/3ds/utils.S | 4 ++-- libpcsxcore/new_dynarec/emu_if.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/frontend/3ds/3ds_utils.h b/frontend/3ds/3ds_utils.h index bde9c4912..5c5ad3c5b 100644 --- a/frontend/3ds/3ds_utils.h +++ b/frontend/3ds/3ds_utils.h @@ -14,7 +14,7 @@ void wait_for_input(void); void ctr_clear_cache(void); void ctr_clear_cache_range(void *start, void *end); -//void ctr_invalidate_icache(void); // only icache +void ctr_invalidate_icache(void); // only icache extern __attribute__((weak)) int __ctr_svchax; diff --git a/frontend/3ds/utils.S b/frontend/3ds/utils.S index 6f7a6a2ff..be4eb97fb 100644 --- a/frontend/3ds/utils.S +++ b/frontend/3ds/utils.S @@ -61,13 +61,14 @@ ctr_clear_cache_range: bx lr .endfunc -#if 0 .func ctr_invalidate_icache_kernel ctr_invalidate_icache_kernel: + mrs r3, cpsr cpsid aif mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ Data Sync Barrier mcr p15, 0, r0, c7, c5, 0 @ Invalidate entire instruction cache / Flush BTB + msr cpsr, r3 bx lr .endfunc @@ -78,4 +79,3 @@ ctr_invalidate_icache: svc 0x80 @ svcCustomBackdoor bx lr .endfunc -#endif diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 8ebf27463..62e984b6f 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -320,7 +320,7 @@ static void clear_local_cache(void) #ifdef _3DS if (ndrc_g.thread.cache_dirty) { ndrc_g.thread.cache_dirty = 0; - ctr_clear_cache(); + ctr_invalidate_icache(); } #else // hopefully nothing is needed, as tested on r-pi4 and switch From 9b592b3ff912367255dba1ddeaffb57df9946c69 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 26 Oct 2024 22:57:57 +0300 Subject: [PATCH 591/597] platforms: try to fix build many lack weak symbols, apple has different pthread_setname_np... --- Makefile | 2 +- frontend/cspace.c | 4 ++++ frontend/libretro-rthreads.c | 2 +- include/compiler_features.h | 7 ++++++- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 25cbbdf2d..1219b097a 100644 --- a/Makefile +++ b/Makefile @@ -285,7 +285,7 @@ CFLAGS += -DHAVE_CHD -I$(LCHDR)/include OBJS += frontend/cspace.o ifeq "$(HAVE_NEON_ASM)" "1" OBJS += frontend/cspace_neon.o -frontend/cspace.o: CFLAGS += -DHAVE_bgr555_to_rgb565 +frontend/cspace.o: CFLAGS += -DHAVE_bgr555_to_rgb565 -DHAVE_bgr888_to_x else ifeq "$(ARCH)" "arm" OBJS += frontend/cspace_arm.o diff --git a/frontend/cspace.c b/frontend/cspace.c index b4e4a7101..b45cee2e6 100644 --- a/frontend/cspace.c +++ b/frontend/cspace.c @@ -112,6 +112,8 @@ void bgr555_to_rgb565(void *dst_, const void *src_, int bytes) #endif +#ifndef HAVE_bgr888_to_x + void attr_weak bgr888_to_rgb565(void *dst_, const void *src_, int bytes) { const unsigned char *src = src_; @@ -139,6 +141,8 @@ void attr_weak bgr888_to_rgb565(void *dst_, const void *src_, int bytes) void rgb888_to_rgb565(void *dst, const void *src, int bytes) {} void bgr888_to_rgb888(void *dst, const void *src, int bytes) {} +#endif // HAVE_bgr888_to_x + /* YUV stuff */ static int yuv_ry[32], yuv_gy[32], yuv_by[32]; static unsigned char yuv_u[32 * 2], yuv_v[32 * 2]; diff --git a/frontend/libretro-rthreads.c b/frontend/libretro-rthreads.c index 245075087..82af3ef23 100644 --- a/frontend/libretro-rthreads.c +++ b/frontend/libretro-rthreads.c @@ -83,7 +83,7 @@ sthread_t *pcsxr_sthread_create(void (*thread_func)(void *), h->id = (pthread_t)ctr_thread; #else h = sthread_create(thread_func, NULL); - #if defined(__GLIBC__) || defined(__MACH__) || \ + #if defined(__GLIBC__) || \ (defined(__ANDROID_API__) && __ANDROID_API__ >= 26) if (h && (unsigned int)type < (unsigned int)PCSXRT_COUNT) { diff --git a/include/compiler_features.h b/include/compiler_features.h index d6983632e..21549ddf3 100644 --- a/include/compiler_features.h +++ b/include/compiler_features.h @@ -8,12 +8,17 @@ # define noinline __attribute__((noinline,noclone)) # endif # define attr_unused __attribute__((unused)) -# define attr_weak __attribute__((weak)) #else # define likely(x) (x) # define unlikely(x) (x) # define noinline # define attr_unused +#endif + +// doesn't work on Android, mingw... +#if defined(__GNUC__) && !defined(ANDROID) && !defined(__MINGW32__) +# define attr_weak __attribute__((weak)) +#else # define attr_weak #endif From e6f13b4724f5950473b3a0a3b4da02ad9d85273c Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 26 Oct 2024 23:12:21 +0300 Subject: [PATCH 592/597] try to fix apple build --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1219b097a..f3730ee20 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,8 @@ LDFLAGS += -fsanitize=address endif ifneq ($(NO_FSECTIONS), 1) CFLAGS += -ffunction-sections -fdata-sections -LDFLAGS += -Wl,--gc-sections +FSECTIONS_LDFLAGS ?= -Wl,--gc-sections +LDFLAGS += $(FSECTIONS_LDFLAGS) endif CFLAGS += -DP_HAVE_MMAP=$(if $(NO_MMAP),0,1) \ -DP_HAVE_PTHREAD=$(if $(NO_PTHREAD),0,1) \ From a6a0a4d316ca2aef3dba1ccf76b9639d61c51247 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 26 Oct 2024 23:42:05 +0300 Subject: [PATCH 593/597] try to support older binutils EXTERN in a linker script doesn't work on ld 2.34, works on 2.43.1 --- Makefile | 3 +-- frontend/main.c | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index f3730ee20..9cc6088f1 100644 --- a/Makefile +++ b/Makefile @@ -439,8 +439,7 @@ target_: $(TARGET) $(TARGET): $(OBJS) ifeq ($(PARTIAL_LINKING), 1) - sed -e 's/.*/EXTERN(\0)/' frontend/libretro-extern > frontend/libretro-extern.T - $(LD) -o $(basename $(TARGET))1.o -r --gc-sections -T frontend/libretro-extern.T $^ + $(LD) -o $(basename $(TARGET))1.o -r --gc-sections $(addprefix -u , $(shell cat frontend/libretro-extern)) $^ $(OBJCOPY) --keep-global-symbols=frontend/libretro-extern $(basename $(TARGET))1.o $(basename $(TARGET)).o $(AR) rcs $@ $(basename $(TARGET)).o else ifeq ($(STATIC_LINKING), 1) diff --git a/frontend/main.c b/frontend/main.c index 48006a67b..963f6c81a 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -516,7 +516,9 @@ int emu_core_init(void) SysPrintf("Starting PCSX-ReARMed " REV "%s\n", get_build_info()); SysPrintf("build time: " __DATE__ " " __TIME__ "\n"); +#ifdef HAVE_RTHREADS pcsxr_sthread_init(); +#endif #ifndef NO_FRONTEND check_profile(); check_memcards(); From 5ddc3a7a2e0bcfd9e587a895468635bce598228b Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 27 Oct 2024 01:32:40 +0300 Subject: [PATCH 594/597] merge gpu_unai with gpu_unai_old Can now switch between them during gameplay. Specify GPU_UNAI_NO_OLD=1 to prevent compiling this in to save memory or whatever. No big endian support. --- Makefile | 21 ++++--- configure | 6 +- frontend/libretro.c | 11 ++++ frontend/libretro_core_options.h | 16 ++++++ frontend/main.c | 4 +- frontend/menu.c | 27 +-------- frontend/plugin_lib.h | 6 +- plugins/gpu_unai/Makefile | 2 +- plugins/gpu_unai/gpu.h | 1 + plugins/gpu_unai/gpu_fixedpoint.h | 4 +- plugins/gpu_unai/gpulib_if.cpp | 22 +++++++- .../{gpu_unai_old => gpu_unai/old}/debug.h | 0 .../{gpu_unai_old => gpu_unai/old}/gpu.cpp | 0 plugins/{gpu_unai_old => gpu_unai/old}/gpu.h | 0 .../{gpu_unai_old => gpu_unai/old}/gpu_arm.h | 0 .../{gpu_unai_old => gpu_unai/old}/gpu_arm.s | 0 .../{gpu_unai_old => gpu_unai/old}/gpu_blit.h | 0 .../old}/gpu_command.h | 0 .../old}/gpu_fixedpoint.h | 2 +- .../old}/gpu_inner.h | 0 .../old}/gpu_inner_blend.h | 0 .../old}/gpu_inner_light.h | 0 .../old}/gpu_raster_image.h | 0 .../old}/gpu_raster_line.h | 0 .../old}/gpu_raster_polygon.h | 0 .../old}/gpu_raster_sprite.h | 0 .../gpulib_if.cpp => gpu_unai/old/if.cpp} | 56 ++++++------------- plugins/gpu_unai/old/if.h | 18 ++++++ plugins/{gpu_unai_old => gpu_unai/old}/port.h | 0 .../{gpu_unai_old => gpu_unai/old}/profiler.h | 0 plugins/gpu_unai_old/Makefile | 16 ------ readme.txt | 2 - 32 files changed, 107 insertions(+), 107 deletions(-) rename plugins/{gpu_unai_old => gpu_unai/old}/debug.h (100%) rename plugins/{gpu_unai_old => gpu_unai/old}/gpu.cpp (100%) rename plugins/{gpu_unai_old => gpu_unai/old}/gpu.h (100%) rename plugins/{gpu_unai_old => gpu_unai/old}/gpu_arm.h (100%) rename plugins/{gpu_unai_old => gpu_unai/old}/gpu_arm.s (100%) rename plugins/{gpu_unai_old => gpu_unai/old}/gpu_blit.h (100%) rename plugins/{gpu_unai_old => gpu_unai/old}/gpu_command.h (100%) rename plugins/{gpu_unai_old => gpu_unai/old}/gpu_fixedpoint.h (99%) rename plugins/{gpu_unai_old => gpu_unai/old}/gpu_inner.h (100%) rename plugins/{gpu_unai_old => gpu_unai/old}/gpu_inner_blend.h (100%) rename plugins/{gpu_unai_old => gpu_unai/old}/gpu_inner_light.h (100%) rename plugins/{gpu_unai_old => gpu_unai/old}/gpu_raster_image.h (100%) rename plugins/{gpu_unai_old => gpu_unai/old}/gpu_raster_line.h (100%) rename plugins/{gpu_unai_old => gpu_unai/old}/gpu_raster_polygon.h (100%) rename plugins/{gpu_unai_old => gpu_unai/old}/gpu_raster_sprite.h (100%) rename plugins/{gpu_unai_old/gpulib_if.cpp => gpu_unai/old/if.cpp} (94%) create mode 100644 plugins/gpu_unai/old/if.h rename plugins/{gpu_unai_old => gpu_unai/old}/port.h (100%) rename plugins/{gpu_unai_old => gpu_unai/old}/profiler.h (100%) delete mode 100644 plugins/gpu_unai_old/Makefile diff --git a/Makefile b/Makefile index 9cc6088f1..d29779131 100644 --- a/Makefile +++ b/Makefile @@ -227,21 +227,24 @@ plugins/dfxvideo/gpulib_if.o: CFLAGS += -fno-strict-aliasing plugins/dfxvideo/gpulib_if.o: plugins/dfxvideo/prim.c plugins/dfxvideo/soft.c OBJS += plugins/dfxvideo/gpulib_if.o endif -ifeq "$(BUILTIN_GPU)" "unai_old" -OBJS += plugins/gpu_unai_old/gpulib_if.o -ifeq "$(ARCH)" "arm" -OBJS += plugins/gpu_unai_old/gpu_arm.o -endif -plugins/gpu_unai_old/gpulib_if.o: CFLAGS += -DREARMED -O3 -CC_LINK = $(CXX) -endif ifeq "$(BUILTIN_GPU)" "unai" OBJS += plugins/gpu_unai/gpulib_if.o ifeq "$(ARCH)" "arm" OBJS += plugins/gpu_unai/gpu_arm.o endif -plugins/gpu_unai/gpulib_if.o: CFLAGS += -DREARMED -DUSE_GPULIB=1 -O3 +ifeq "$(THREAD_RENDERING)" "1" +CFLAGS += -DTHREAD_RENDERING +OBJS += plugins/gpulib/gpulib_thread_if.o +endif +ifneq "$(GPU_UNAI_NO_OLD)" "1" +OBJS += plugins/gpu_unai/old/if.o +else +CFLAGS += -DGPU_UNAI_NO_OLD +endif +plugins/gpu_unai/gpulib_if.o: CFLAGS += -DREARMED -DUSE_GPULIB=1 +plugins/gpu_unai/gpulib_if.o \ +plugins/gpu_unai/old/if.o: CFLAGS += -O3 CC_LINK = $(CXX) endif diff --git a/configure b/configure index 0d315b687..3bc436e55 100755 --- a/configure +++ b/configure @@ -39,13 +39,13 @@ check_define_val() platform_list="generic pandora maemo caanoo" platform="generic" -builtin_gpu_list="neon peops unai unai_old" +builtin_gpu_list="neon peops unai" dynarec_list="ari64 lightrec none" builtin_gpu="" sound_driver_list="oss alsa pulseaudio sdl" sound_drivers="" plugins="plugins/spunull/spunull.so \ -plugins/dfxvideo/gpu_peops.so plugins/gpu_unai_old/gpu_unai_old.so plugins/gpu_unai/gpu_unai.so" +plugins/dfxvideo/gpu_peops.so plugins/gpu_unai/gpu_unai.so" drc_cache_base="no" have_armv5="" have_armv6="" @@ -263,7 +263,7 @@ arm*) builtin_gpu="neon" elif [ "$have_armv7" != "yes" ]; then # pre-ARMv7 hardware is usually not fast enough for peops - builtin_gpu="unai_old" + builtin_gpu="unai" else builtin_gpu="peops" fi diff --git a/frontend/libretro.c b/frontend/libretro.c index 3edcc2c9e..68ec65ff8 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2641,6 +2641,17 @@ static void update_variables(bool in_flight) * pcsx_rearmed_gpu_unai_scale_hires */ pl_rearmed_cbs.gpu_unai.pixel_skip = 0; + var.key = "pcsx_rearmed_gpu_unai_old_renderer"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + pl_rearmed_cbs.gpu_unai.old_renderer = 1; + else + pl_rearmed_cbs.gpu_unai.old_renderer = 0; + } + var.key = "pcsx_rearmed_gpu_unai_lighting"; var.value = NULL; diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 7fe908122..ed2f41c31 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -726,6 +726,22 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled", }, +#ifndef GPU_UNAI_NO_OLD + { + "pcsx_rearmed_gpu_unai_old_renderer", + "(GPU) Old renderer", + "Old renderer", + "This enables faster, but less accurate code.", + NULL, + "gpu_unai", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL}, + }, + "disabled", + }, +#endif { "pcsx_rearmed_gpu_unai_blending", "(GPU) Texture Blending", diff --git a/frontend/main.c b/frontend/main.c index 963f6c81a..abf97e69f 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -136,15 +136,13 @@ void emu_set_default_config(void) pl_rearmed_cbs.gpu_neon.enhancement_tex_adj = 1; pl_rearmed_cbs.gpu_peops.iUseDither = 0; pl_rearmed_cbs.gpu_peops.dwActFixes = 1<<7; + pl_rearmed_cbs.gpu_unai.old_renderer = 0; pl_rearmed_cbs.gpu_unai.ilace_force = 0; pl_rearmed_cbs.gpu_unai.pixel_skip = 0; pl_rearmed_cbs.gpu_unai.lighting = 1; pl_rearmed_cbs.gpu_unai.fast_lighting = 0; pl_rearmed_cbs.gpu_unai.blending = 1; pl_rearmed_cbs.gpu_unai.dithering = 0; - pl_rearmed_cbs.gpu_unai_old.abe_hack = - pl_rearmed_cbs.gpu_unai_old.no_light = - pl_rearmed_cbs.gpu_unai_old.no_blend = 0; memset(&pl_rearmed_cbs.gpu_peopsgl, 0, sizeof(pl_rearmed_cbs.gpu_peopsgl)); pl_rearmed_cbs.gpu_peopsgl.iVRamSize = 64; pl_rearmed_cbs.gpu_peopsgl.iTexGarbageCollection = 1; diff --git a/frontend/menu.c b/frontend/menu.c index 95d4edf6b..532ab714a 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -443,10 +443,7 @@ static const struct { CE_INTVAL_V(frameskip, 4), CE_INTVAL_P(gpu_peops.iUseDither), CE_INTVAL_P(gpu_peops.dwActFixes), - CE_INTVAL_P(gpu_unai_old.lineskip), - CE_INTVAL_P(gpu_unai_old.abe_hack), - CE_INTVAL_P(gpu_unai_old.no_light), - CE_INTVAL_P(gpu_unai_old.no_blend), + CE_INTVAL_P(gpu_unai.old_renderer), CE_INTVAL_P(gpu_unai.ilace_force), CE_INTVAL_P(gpu_unai.pixel_skip), CE_INTVAL_P(gpu_unai.lighting), @@ -1444,24 +1441,9 @@ static int menu_loop_plugin_gpu_neon(int id, int keys) #endif -static menu_entry e_menu_plugin_gpu_unai_old[] = -{ - mee_onoff ("Skip every 2nd line", 0, pl_rearmed_cbs.gpu_unai_old.lineskip, 1), - mee_onoff ("Abe's Odyssey hack", 0, pl_rearmed_cbs.gpu_unai_old.abe_hack, 1), - mee_onoff ("Disable lighting", 0, pl_rearmed_cbs.gpu_unai_old.no_light, 1), - mee_onoff ("Disable blending", 0, pl_rearmed_cbs.gpu_unai_old.no_blend, 1), - mee_end, -}; - -static int menu_loop_plugin_gpu_unai_old(int id, int keys) -{ - int sel = 0; - me_loop(e_menu_plugin_gpu_unai_old, &sel); - return 0; -} - static menu_entry e_menu_plugin_gpu_unai[] = { + mee_onoff ("Old renderer", 0, pl_rearmed_cbs.gpu_unai.old_renderer, 1), mee_onoff ("Interlace", 0, pl_rearmed_cbs.gpu_unai.ilace_force, 1), mee_onoff ("Dithering", 0, pl_rearmed_cbs.gpu_unai.dithering, 1), mee_onoff ("Lighting", 0, pl_rearmed_cbs.gpu_unai.lighting, 1), @@ -1579,15 +1561,13 @@ static const char h_plugin_gpu[] = "builtin_gpu is the NEON GPU, very fast and accurate\n" #endif "gpu_peops is Pete's soft GPU, slow but accurate\n" - "gpu_unai_old is from old PCSX4ALL, fast but glitchy\n" - "gpu_unai is newer, more accurate but slower\n" + "gpu_unai is the GPU renderer from PCSX4ALL\n" "gpu_gles Pete's hw GPU, uses 3D chip but is glitchy\n" "must save config and reload the game if changed"; static const char h_plugin_spu[] = "spunull effectively disables sound\n" "must save config and reload the game if changed"; static const char h_gpu_peops[] = "Configure P.E.Op.S. SoftGL Driver V1.17"; static const char h_gpu_peopsgl[]= "Configure P.E.Op.S. MesaGL Driver V1.78"; -static const char h_gpu_unai_old[] = "Configure Unai/PCSX4ALL Team GPU plugin (old)"; static const char h_gpu_unai[] = "Configure Unai/PCSX4ALL Team plugin (new)"; static const char h_spu[] = "Configure built-in P.E.Op.S. Sound Driver V1.7"; @@ -1600,7 +1580,6 @@ static menu_entry e_menu_plugin_options[] = mee_handler_h ("Configure built-in GPU plugin", menu_loop_plugin_gpu_neon, h_gpu_neon), #endif mee_handler_h ("Configure gpu_peops plugin", menu_loop_plugin_gpu_peops, h_gpu_peops), - mee_handler_h ("Configure gpu_unai_old GPU plugin", menu_loop_plugin_gpu_unai_old, h_gpu_unai_old), mee_handler_h ("Configure gpu_unai GPU plugin", menu_loop_plugin_gpu_unai, h_gpu_unai), mee_handler_h ("Configure gpu_gles GPU plugin", menu_loop_plugin_gpu_peopsgl, h_gpu_peopsgl), mee_handler_h ("Configure built-in SPU plugin", menu_loop_plugin_spu, h_spu), diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index c7ca247e2..b2ba69af2 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -94,11 +94,7 @@ struct rearmed_cbs { int dwFrameRateTicks; } gpu_peops; struct { - int abe_hack; - int no_light, no_blend; - int lineskip; - } gpu_unai_old; - struct { + int old_renderer; int ilace_force; int pixel_skip; int lighting; diff --git a/plugins/gpu_unai/Makefile b/plugins/gpu_unai/Makefile index 756d19aa8..8fe90cb50 100644 --- a/plugins/gpu_unai/Makefile +++ b/plugins/gpu_unai/Makefile @@ -8,7 +8,7 @@ CFLAGS += -DUSE_GPULIB=1 include ../../config.mak SRC_STANDALONE += gpu.cpp -SRC_GPULIB += gpulib_if.cpp +SRC_GPULIB += gpulib_if.cpp old/if.cpp ifeq "$(ARCH)" "arm" SRC += gpu_arm.S diff --git a/plugins/gpu_unai/gpu.h b/plugins/gpu_unai/gpu.h index f5eb69b4b..17f80eb3c 100644 --- a/plugins/gpu_unai/gpu.h +++ b/plugins/gpu_unai/gpu.h @@ -46,6 +46,7 @@ struct gpu_unai_config_t { uint8_t fast_lighting:1; uint8_t blending:1; uint8_t dithering:1; + uint8_t old_renderer:1; //senquack Only PCSX Rearmed's version of gpu_unai had this, and I // don't think it's necessary. It would require adding 'AH' flag to diff --git a/plugins/gpu_unai/gpu_fixedpoint.h b/plugins/gpu_unai/gpu_fixedpoint.h index 5df42cf00..f809905ee 100644 --- a/plugins/gpu_unai/gpu_fixedpoint.h +++ b/plugins/gpu_unai/gpu_fixedpoint.h @@ -75,12 +75,14 @@ INLINE float FloatInv(const float x) /////////////////////////////////////////////////////////////////////////// // --- BEGIN INVERSE APPROXIMATION SECTION --- /////////////////////////////////////////////////////////////////////////// -#ifdef GPU_UNAI_USE_INT_DIV_MULTINV +#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || !defined(GPU_UNAI_NO_OLD) // big precision inverse table. #define TABLE_BITS 16 s32 s_invTable[(1< #include #include "../gpulib/gpu.h" +#include "old/if.h" #ifdef THREAD_RENDERING #include "../gpulib/gpulib_thread_if.h" @@ -68,6 +69,12 @@ ///////////////////////////////////////////////////////////////////////////// +#ifndef GPU_UNAI_NO_OLD +#define IS_OLD_RENDERER() gpu_unai.config.old_renderer +#else +#define IS_OLD_RENDERER() false +#endif + #define DOWNSCALE_VRAM_SIZE (1024 * 512 * 2 * 2 + 4096) INLINE void scale_640_to_320(le16_t *dest, const le16_t *src, bool isRGB24) { @@ -247,7 +254,7 @@ int renderer_init(void) //gpu_unai.config.enableAbbeyHack = gpu_unai_config_ext.abe_hack; gpu_unai.ilace_mask = gpu_unai.config.ilace_force; -#ifdef GPU_UNAI_USE_INT_DIV_MULTINV +#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || !defined(GPU_UNAI_NO_OLD) // s_invTable for(int i=1;i<=(1<gpu_unai.old_renderer; gpu_unai.config.ilace_force = cbs->gpu_unai.ilace_force; gpu_unai.config.pixel_skip = cbs->gpu_unai.pixel_skip; gpu_unai.config.lighting = cbs->gpu_unai.lighting; @@ -860,6 +875,7 @@ void renderer_set_config(const struct rearmed_cbs *cbs) } else { unmap_downscale_buffer(); } + oldunai_renderer_set_config(cbs); } void renderer_sync(void) diff --git a/plugins/gpu_unai_old/debug.h b/plugins/gpu_unai/old/debug.h similarity index 100% rename from plugins/gpu_unai_old/debug.h rename to plugins/gpu_unai/old/debug.h diff --git a/plugins/gpu_unai_old/gpu.cpp b/plugins/gpu_unai/old/gpu.cpp similarity index 100% rename from plugins/gpu_unai_old/gpu.cpp rename to plugins/gpu_unai/old/gpu.cpp diff --git a/plugins/gpu_unai_old/gpu.h b/plugins/gpu_unai/old/gpu.h similarity index 100% rename from plugins/gpu_unai_old/gpu.h rename to plugins/gpu_unai/old/gpu.h diff --git a/plugins/gpu_unai_old/gpu_arm.h b/plugins/gpu_unai/old/gpu_arm.h similarity index 100% rename from plugins/gpu_unai_old/gpu_arm.h rename to plugins/gpu_unai/old/gpu_arm.h diff --git a/plugins/gpu_unai_old/gpu_arm.s b/plugins/gpu_unai/old/gpu_arm.s similarity index 100% rename from plugins/gpu_unai_old/gpu_arm.s rename to plugins/gpu_unai/old/gpu_arm.s diff --git a/plugins/gpu_unai_old/gpu_blit.h b/plugins/gpu_unai/old/gpu_blit.h similarity index 100% rename from plugins/gpu_unai_old/gpu_blit.h rename to plugins/gpu_unai/old/gpu_blit.h diff --git a/plugins/gpu_unai_old/gpu_command.h b/plugins/gpu_unai/old/gpu_command.h similarity index 100% rename from plugins/gpu_unai_old/gpu_command.h rename to plugins/gpu_unai/old/gpu_command.h diff --git a/plugins/gpu_unai_old/gpu_fixedpoint.h b/plugins/gpu_unai/old/gpu_fixedpoint.h similarity index 99% rename from plugins/gpu_unai_old/gpu_fixedpoint.h rename to plugins/gpu_unai/old/gpu_fixedpoint.h index e72fda12f..5dae806d0 100644 --- a/plugins/gpu_unai_old/gpu_fixedpoint.h +++ b/plugins/gpu_unai/old/gpu_fixedpoint.h @@ -39,7 +39,7 @@ typedef s32 fixed; #define fixed_HALF ((fixed)((1<>1)) // big precision inverse table. -s32 s_invTable[(1<>FIXED_BITS); } diff --git a/plugins/gpu_unai_old/gpu_inner.h b/plugins/gpu_unai/old/gpu_inner.h similarity index 100% rename from plugins/gpu_unai_old/gpu_inner.h rename to plugins/gpu_unai/old/gpu_inner.h diff --git a/plugins/gpu_unai_old/gpu_inner_blend.h b/plugins/gpu_unai/old/gpu_inner_blend.h similarity index 100% rename from plugins/gpu_unai_old/gpu_inner_blend.h rename to plugins/gpu_unai/old/gpu_inner_blend.h diff --git a/plugins/gpu_unai_old/gpu_inner_light.h b/plugins/gpu_unai/old/gpu_inner_light.h similarity index 100% rename from plugins/gpu_unai_old/gpu_inner_light.h rename to plugins/gpu_unai/old/gpu_inner_light.h diff --git a/plugins/gpu_unai_old/gpu_raster_image.h b/plugins/gpu_unai/old/gpu_raster_image.h similarity index 100% rename from plugins/gpu_unai_old/gpu_raster_image.h rename to plugins/gpu_unai/old/gpu_raster_image.h diff --git a/plugins/gpu_unai_old/gpu_raster_line.h b/plugins/gpu_unai/old/gpu_raster_line.h similarity index 100% rename from plugins/gpu_unai_old/gpu_raster_line.h rename to plugins/gpu_unai/old/gpu_raster_line.h diff --git a/plugins/gpu_unai_old/gpu_raster_polygon.h b/plugins/gpu_unai/old/gpu_raster_polygon.h similarity index 100% rename from plugins/gpu_unai_old/gpu_raster_polygon.h rename to plugins/gpu_unai/old/gpu_raster_polygon.h diff --git a/plugins/gpu_unai_old/gpu_raster_sprite.h b/plugins/gpu_unai/old/gpu_raster_sprite.h similarity index 100% rename from plugins/gpu_unai_old/gpu_raster_sprite.h rename to plugins/gpu_unai/old/gpu_raster_sprite.h diff --git a/plugins/gpu_unai_old/gpulib_if.cpp b/plugins/gpu_unai/old/if.cpp similarity index 94% rename from plugins/gpu_unai_old/gpulib_if.cpp rename to plugins/gpu_unai/old/if.cpp index e0d2005f9..8c67694e5 100644 --- a/plugins/gpu_unai_old/gpulib_if.cpp +++ b/plugins/gpu_unai/old/if.cpp @@ -22,8 +22,9 @@ #include #include #include -#include "../gpulib/gpu.h" +#include "../../gpulib/gpu.h" #include "arm_features.h" +#include "if.h" #define u8 uint8_t #define s8 int8_t @@ -50,7 +51,8 @@ static bool blend = true; /* blending */ static bool FrameToRead = false; /* load image in progress */ static bool FrameToWrite = false; /* store image in progress */ -static bool enableAbbeyHack = false; /* Abe's Odyssey hack */ +//static bool enableAbbeyHack = false; /* Abe's Odyssey hack */ +#define enableAbbeyHack false static u8 BLEND_MODE; static u8 TEXT_MODE; @@ -136,10 +138,11 @@ static u32 GPU_GP1; ///////////////////////////////////////////////////////////////////////////// -int renderer_init(void) +void oldunai_renderer_init(void) { GPU_FrameBuffer = (u16 *)gpu.vram; +#if 0 // shared with "new" unai // s_invTable for(int i=1;i<=(1<gpu_unai_old.lineskip; - enableAbbeyHack = cbs->gpu_unai_old.abe_hack; - light = !cbs->gpu_unai_old.no_light; - blend = !cbs->gpu_unai_old.no_blend; + force_interlace = cbs->gpu_unai.ilace_force; + //enableAbbeyHack = cbs->gpu_unai_old.abe_hack; + light = cbs->gpu_unai.lighting; + blend = cbs->gpu_unai.blending; GPU_FrameBuffer = (u16 *)gpu.vram; } diff --git a/plugins/gpu_unai/old/if.h b/plugins/gpu_unai/old/if.h new file mode 100644 index 000000000..f7faaff0a --- /dev/null +++ b/plugins/gpu_unai/old/if.h @@ -0,0 +1,18 @@ +#ifndef GPU_UNAI_NO_OLD + +struct rearmed_cbs; + +void oldunai_renderer_init(void); +int oldunai_do_cmd_list(uint32_t *list, int list_len, + int *cycles_sum_out, int *cycles_last, int *last_cmd); +void oldunai_renderer_sync_ecmds(uint32_t *ecmds); +void oldunai_renderer_set_config(const struct rearmed_cbs *cbs); + +#else + +#define oldunai_renderer_init() +#define oldunai_do_cmd_list(...) 0 +#define oldunai_renderer_sync_ecmds(x) +#define oldunai_renderer_set_config(x) + +#endif diff --git a/plugins/gpu_unai_old/port.h b/plugins/gpu_unai/old/port.h similarity index 100% rename from plugins/gpu_unai_old/port.h rename to plugins/gpu_unai/old/port.h diff --git a/plugins/gpu_unai_old/profiler.h b/plugins/gpu_unai/old/profiler.h similarity index 100% rename from plugins/gpu_unai_old/profiler.h rename to plugins/gpu_unai/old/profiler.h diff --git a/plugins/gpu_unai_old/Makefile b/plugins/gpu_unai_old/Makefile deleted file mode 100644 index ed3eff0ad..000000000 --- a/plugins/gpu_unai_old/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -CFLAGS += -ggdb -Wall -O3 -ffast-math -CFLAGS += -DREARMED -CFLAGS += -I../../include - -include ../../config.mak - -SRC_STANDALONE += gpu.cpp -SRC_GPULIB += gpulib_if.cpp - -ifeq "$(ARCH)" "arm" -SRC += gpu_arm.s -endif - -#BIN_STANDALONE = gpuPCSX4ALL.so -BIN_GPULIB = gpu_unai_old.so -include ../gpulib/gpulib.mak diff --git a/readme.txt b/readme.txt index 1f1f2d52e..142ce08d9 100644 --- a/readme.txt +++ b/readme.txt @@ -89,8 +89,6 @@ builtin_gpu - this is either Exophase's ARM NEON GPU (accurate and fast, gpu_peops or gpu_unai (depends on compile options). gpu_peops.so - P.E.Op.S. soft GPU, reasonably accurate but slow (also found with older emulators on PC) -gpu_unai_old.so- Unai's plugin from PCSX4ALL project. Faster than P.E.Op.S. - but has some glitches (old version). gpu_gles.so - experimental port of P.E.Op.S. MesaGL plugin to OpenGL ES. Occasionally faster but has lots of glitches and seems to be rather unstable (may crash the driver/system). From 75b5628c89f8ed6ab07d26a3e9788abb4ff8d69f Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 27 Oct 2024 02:41:16 +0300 Subject: [PATCH 595/597] gpu_unai: simplify gpuClearImage no need for alignment checks as it's not possible on real psx --- plugins/gpu_unai/gpu_raster_image.h | 37 ++++++++----------------- plugins/gpu_unai/old/gpu_raster_image.h | 37 ++++++++----------------- 2 files changed, 22 insertions(+), 52 deletions(-) diff --git a/plugins/gpu_unai/gpu_raster_image.h b/plugins/gpu_unai/gpu_raster_image.h index 7c9eb4d92..02d519e01 100644 --- a/plugins/gpu_unai/gpu_raster_image.h +++ b/plugins/gpu_unai/gpu_raster_image.h @@ -162,6 +162,9 @@ void gpuClearImage(PtrUnion packet) w0 = le16_to_s16(packet.U2[4]) & 0x3ff; h0 = le16_to_s16(packet.U2[5]) & 0x1ff; + x0 &= ~0xF; + w0 = ((w0 + 0xF) & ~0xF); + w0 += x0; if (x0 < 0) x0 = 0; if (w0 > FRAME_WIDTH) w0 = FRAME_WIDTH; @@ -177,40 +180,22 @@ void gpuClearImage(PtrUnion packet) fprintf(stdout,"gpuClearImage(x0=%d,y0=%d,w0=%d,h0=%d)\n",x0,y0,w0,h0); #endif - if (x0&1) - { - le16_t* pixel = gpu_unai.vram + FRAME_OFFSET(x0, y0); - le16_t rgb = u16_to_le16(GPU_RGB16(le32_to_u32(packet.U4[0]))); - y0 = FRAME_WIDTH - w0; - do { - x0=w0; - do { *pixel++ = rgb; } while (--x0); - pixel += y0; - } while (--h0); - } - else { le32_t* pixel = (le32_t*)gpu_unai.vram + ((FRAME_OFFSET(x0, y0))>>1); u32 _rgb = GPU_RGB16(le32_to_u32(packet.U4[0])); le32_t rgb = u32_to_le32(_rgb | (_rgb << 16)); - if (w0&1) - { - y0 = (FRAME_WIDTH - w0 +1)>>1; - w0>>=1; - do { - x0=w0; - do { *pixel++ = rgb; } while (--x0); - *((u16*)pixel) = (u16)le32_raw(rgb); - pixel += y0; - } while (--h0); - } - else { y0 = (FRAME_WIDTH - w0)>>1; - w0>>=1; + w0>>=3; do { x0=w0; - do { *pixel++ = rgb; } while (--x0); + do { + pixel[0] = rgb; + pixel[1] = rgb; + pixel[2] = rgb; + pixel[3] = rgb; + pixel += 4; + } while (--x0); pixel += y0; } while (--h0); } diff --git a/plugins/gpu_unai/old/gpu_raster_image.h b/plugins/gpu_unai/old/gpu_raster_image.h index 0c82aa976..92d5a6d33 100644 --- a/plugins/gpu_unai/old/gpu_raster_image.h +++ b/plugins/gpu_unai/old/gpu_raster_image.h @@ -151,6 +151,9 @@ INLINE void gpuClearImage(void) w0 = PacketBuffer.S2[4] & 0x3ff; h0 = PacketBuffer.S2[5] & 0x3ff; + x0 &= ~0xF; + w0 = ((w0 + 0xF) & ~0xF); + w0 += x0; if (x0 < 0) x0 = 0; if (w0 > FRAME_WIDTH) w0 = FRAME_WIDTH; @@ -162,40 +165,22 @@ INLINE void gpuClearImage(void) h0 -= y0; if (h0 <= 0) return; - if (x0&1) - { - u16* pixel = (u16*)GPU_FrameBuffer + FRAME_OFFSET(x0, y0); - u16 rgb = GPU_RGB16(PacketBuffer.S4[0]); - y0 = FRAME_WIDTH - w0; - do { - x0=w0; - do { *pixel++ = rgb; } while (--x0); - pixel += y0; - } while (--h0); - } - else { u32* pixel = (u32*)(void*)GPU_FrameBuffer + ((FRAME_OFFSET(x0, y0))>>1); u32 rgb = GPU_RGB16(PacketBuffer.S4[0]); rgb |= (rgb<<16); - if (w0&1) - { - y0 = (FRAME_WIDTH - w0 +1)>>1; - w0>>=1; - do { - x0=w0; - do { *pixel++ = rgb; } while (--x0); - *((u16*)pixel) = (u16)rgb; - pixel += y0; - } while (--h0); - } - else { y0 = (FRAME_WIDTH - w0)>>1; - w0>>=1; + w0>>=3; do { x0=w0; - do { *pixel++ = rgb; } while (--x0); + do { + pixel[0] = rgb; + pixel[1] = rgb; + pixel[2] = rgb; + pixel[3] = rgb; + pixel += 4; + } while (--x0); pixel += y0; } while (--h0); } From bd701916cca181f9eb7b6e9306ea59a319eab29a Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 27 Oct 2024 03:21:18 +0300 Subject: [PATCH 596/597] gpu_unai: don't typedef le32_t as structs Experiments show it prevents autovectorization on some compilers, so do it in asserts build only. --- plugins/gpu_unai/gpu_unai.h | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/plugins/gpu_unai/gpu_unai.h b/plugins/gpu_unai/gpu_unai.h index 330620228..844a8fd40 100644 --- a/plugins/gpu_unai/gpu_unai.h +++ b/plugins/gpu_unai/gpu_unai.h @@ -22,6 +22,7 @@ #ifndef GPU_UNAI_H #define GPU_UNAI_H +#include #include "gpu.h" // Header shared between both standalone gpu_unai (gpu.cpp) and new @@ -62,6 +63,8 @@ typedef union { u64 raw; } gcol_t; +#ifndef NDEBUG + typedef struct { u32 v; } le32_t; @@ -70,44 +73,54 @@ typedef struct { u16 v; } le16_t; +#define LExRead(v_) (v_.v) + +#else + +typedef u32 le32_t; +typedef u16 le16_t; +#define LExRead(v) (v) + +#endif + static inline u32 le32_to_u32(le32_t le) { - return LE32TOH(le.v); + return LE32TOH(LExRead(le)); } static inline s32 le32_to_s32(le32_t le) { - return (int32_t) LE32TOH(le.v); + return (int32_t) LE32TOH(LExRead(le)); } static inline u32 le32_raw(le32_t le) { - return le.v; + return LExRead(le); } static inline le32_t u32_to_le32(u32 u) { - return (le32_t){ .v = HTOLE32(u) }; + return (le32_t){ HTOLE32(u) }; } static inline u16 le16_to_u16(le16_t le) { - return LE16TOH(le.v); + return LE16TOH(LExRead(le)); } static inline s16 le16_to_s16(le16_t le) { - return (int16_t) LE16TOH(le.v); + return (int16_t) LE16TOH(LExRead(le)); } static inline u16 le16_raw(le16_t le) { - return le.v; + return LExRead(le); } static inline le16_t u16_to_le16(u16 u) { - return (le16_t){ .v = HTOLE16(u) }; + return (le16_t){ HTOLE16(u) }; } union PtrUnion From 79c4d4343ef401c5d5489972c7a7e434f441578a Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 27 Oct 2024 21:52:25 +0200 Subject: [PATCH 597/597] merge from libretro got tired of all the conflicts --- .gitmodules | 3 + Makefile | 36 +- Makefile.libretro | 691 +++++++++++++++++++++++++++++- deps/mman | 1 + include/mman/sys/mman.h | 1 + jni/Android.mk | 265 +++++++++++- plugins/dfsound/out.c | 2 +- plugins/dfxvideo/gpulib_if.c | 24 ++ plugins/gpu-gles/gpulib_if.c | 8 + plugins/gpu_neon/psx_gpu_if.c | 8 + plugins/gpulib/gpu.c | 18 +- plugins/gpulib/gpu.h | 5 + plugins/gpulib/gpulib_thread_if.c | 563 ++++++++++++++++++++++++ plugins/gpulib/gpulib_thread_if.h | 42 ++ 14 files changed, 1660 insertions(+), 7 deletions(-) create mode 160000 deps/mman create mode 100644 include/mman/sys/mman.h create mode 100644 plugins/gpulib/gpulib_thread_if.c create mode 100644 plugins/gpulib/gpulib_thread_if.h diff --git a/.gitmodules b/.gitmodules index d4665d30f..e96248d92 100644 --- a/.gitmodules +++ b/.gitmodules @@ -16,3 +16,6 @@ [submodule "deps/libretro-common"] path = deps/libretro-common url = https://github.com/libretro/libretro-common.git +[submodule "deps/mman"] + path = deps/mman + url = https://github.com/witwall/mman-win32 diff --git a/Makefile b/Makefile index d29779131..39b5fbaf8 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ CFLAGS += -ggdb endif ifneq ($(DEBUG), 1) CFLAGS += -O2 -ifndef ASSERTS +ifneq ($(ASSERTS), 1) CFLAGS += -DNDEBUG endif endif @@ -77,6 +77,28 @@ OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cdrom-async.o \ libpcsxcore/sio.o libpcsxcore/spu.o libpcsxcore/gpu.o OBJS += libpcsxcore/gte.o libpcsxcore/gte_nf.o libpcsxcore/gte_divider.o #OBJS += libpcsxcore/debug.o libpcsxcore/socket.o libpcsxcore/disr3000a.o + +ifeq ($(WANT_ZLIB),1) +ZLIB_DIR = deps/libchdr/deps/zlib-1.3.1 +CFLAGS += -I$(ZLIB_DIR) +OBJS += $(ZLIB_DIR)/adler32.o \ + $(ZLIB_DIR)/compress.o \ + $(ZLIB_DIR)/crc32.o \ + $(ZLIB_DIR)/deflate.o \ + $(ZLIB_DIR)/gzclose.o \ + $(ZLIB_DIR)/gzlib.o \ + $(ZLIB_DIR)/gzread.o \ + $(ZLIB_DIR)/gzwrite.o \ + $(ZLIB_DIR)/infback.o \ + $(ZLIB_DIR)/inffast.o \ + $(ZLIB_DIR)/inflate.o \ + $(ZLIB_DIR)/inftrees.o \ + $(ZLIB_DIR)/trees.o \ + $(ZLIB_DIR)/uncompr.o \ + $(ZLIB_DIR)/zutil.o +$(ZLIB_DIR)/%.o: CFLAGS += -DHAVE_UNISTD_H +endif + ifeq "$(ARCH)" "arm" OBJS += libpcsxcore/gte_arm.o endif @@ -209,6 +231,7 @@ endif # builtin gpu OBJS += plugins/gpulib/gpu.o plugins/gpulib/vout_pl.o ifeq "$(BUILTIN_GPU)" "neon" +CFLAGS += -DGPU_NEON OBJS += plugins/gpu_neon/psx_gpu_if.o plugins/gpu_neon/psx_gpu_if.o: CFLAGS += -DNEON_BUILD -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP plugins/gpu_neon/psx_gpu_if.o: plugins/gpu_neon/psx_gpu/*.c @@ -222,13 +245,20 @@ frontend/menu.o frontend/plugin_lib.o: CFLAGS += -DBUILTIN_GPU_NEON endif endif ifeq "$(BUILTIN_GPU)" "peops" +CFLAGS += -DGPU_PEOPS # note: code is not safe for strict-aliasing? (Castlevania problems) plugins/dfxvideo/gpulib_if.o: CFLAGS += -fno-strict-aliasing plugins/dfxvideo/gpulib_if.o: plugins/dfxvideo/prim.c plugins/dfxvideo/soft.c OBJS += plugins/dfxvideo/gpulib_if.o +ifeq "$(THREAD_RENDERING)" "1" +CFLAGS += -DTHREAD_RENDERING +OBJS += plugins/gpulib/gpulib_thread_if.o +endif endif ifeq "$(BUILTIN_GPU)" "unai" +CFLAGS += -DGPU_UNAI +CFLAGS += -DUSE_GPULIB=1 OBJS += plugins/gpu_unai/gpulib_if.o ifeq "$(ARCH)" "arm" OBJS += plugins/gpu_unai/gpu_arm.o @@ -249,7 +279,7 @@ CC_LINK = $(CXX) endif # libchdr -#ifeq "$(HAVE_CHD)" "1" +ifeq "$(HAVE_CHD)" "1" LCHDR = deps/libchdr LCHDR_LZMA = $(LCHDR)/deps/lzma-24.05 LCHDR_ZSTD = $(LCHDR)/deps/zstd-1.5.6/lib @@ -283,7 +313,7 @@ $(LCHDR_ZSTD)/decompress/%.o: CFLAGS += -I$(LCHDR_ZSTD) $(LCHDR)/src/%.o: CFLAGS += -I$(LCHDR_ZSTD) libpcsxcore/cdriso.o: CFLAGS += -Wno-unused-function CFLAGS += -DHAVE_CHD -I$(LCHDR)/include -#endif +endif # frontend/gui OBJS += frontend/cspace.o diff --git a/Makefile.libretro b/Makefile.libretro index 03ccff7ea..6bacb3f47 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -1 +1,690 @@ -$(error This file is unmaintained. Please use the libretro fork: https://github.com/libretro/pcsx_rearmed) +# Makefile for PCSX ReARMed (libretro) + +DEBUG ?= 0 +WANT_ZLIB ?= 1 +HAVE_CHD ?= 1 +HAVE_PHYSICAL_CDROM ?= 1 +USE_ASYNC_CDROM ?= 1 +USE_LIBRETRO_VFS ?= 0 +NDRC_THREAD ?= 1 + +# Dynarec options: lightrec, ari64 +DYNAREC ?= lightrec + +ifeq ($(platform),) + platform = unix + ifeq ($(shell uname -a),) + platform = win + else ifneq ($(findstring MINGW,$(shell uname -a)),) + platform = win + else ifneq ($(findstring Darwin,$(shell uname -a)),) + platform = osx + else ifneq ($(findstring win,$(shell uname -a)),) + platform = win + endif +endif + +CC ?= gcc +CXX ?= g++ +AS ?= as +LD ?= ld +CFLAGS ?= + +# early compiler overrides +ifeq ($(platform),ios-arm64) + CC = cc -arch arm64 -isysroot $(IOSSDK) + CXX = c++ -arch arm64 -isysroot $(IOSSDK) +else ifneq (,$(findstring ios,$(platform))) + CC = cc -arch armv7 -isysroot $(IOSSDK) + CXX = c++ -arch armv7 -isysroot $(IOSSDK) +else ifeq ($(platform), tvos-arm64) + ifeq ($(IOSSDK),) + IOSSDK := $(shell xcodebuild -version -sdk appletvos Path) + endif + CC = cc -arch arm64 -isysroot $(IOSSDK) + CXX = c++ -arch arm64 -isysroot $(IOSSDK) +else ifeq ($(platform), osx) + ifeq ($(CROSS_COMPILE),1) + TARGET_RULE = -target $(LIBRETRO_APPLE_PLATFORM) -isysroot $(LIBRETRO_APPLE_ISYSROOT) + CFLAGS += $(TARGET_RULE) + CXXFLAGS += $(TARGET_RULE) + LDFLAGS += $(TARGET_RULE) + endif +else ifeq ($(platform), psl1ght) + ifeq ($(strip $(PS3DEV)),) + $(error "PS3DEV env var is not set") + endif + CC = $(PS3DEV)/ppu/bin/ppu-gcc$(EXE_EXT) + AR = $(PS3DEV)/ppu/bin/ppu-ar$(EXE_EXT) +else ifeq ($(platform), psp1) + CC = psp-gcc$(EXE_EXT) + AR = psp-ar$(EXE_EXT) + LD = psp-ld$(EXE_EXT) +else ifeq ($(platform), vita) + CC = arm-vita-eabi-gcc$(EXE_EXT) + AR = arm-vita-eabi-ar$(EXE_EXT) + LD = arm-vita-eabi-ld$(EXE_EXT) + OBJCOPY = arm-vita-eabi-objcopy$(EXE_EXT) +else ifeq ($(platform), ctr) + ifeq ($(strip $(DEVKITARM)),) + $(error "DEVKITARM env var is not set") + endif + CC = $(DEVKITARM)/bin/arm-none-eabi-gcc$(EXE_EXT) + CXX = $(DEVKITARM)/bin/arm-none-eabi-g++$(EXE_EXT) + AR = $(DEVKITARM)/bin/arm-none-eabi-ar$(EXE_EXT) + LD = $(DEVKITARM)/bin/arm-none-eabi-ld$(EXE_EXT) + OBJCOPY = $(DEVKITARM)/bin/arm-none-eabi-objcopy$(EXE_EXT) +else ifeq ($(platform), libnx) + export DEPSDIR := $(CURDIR)/ + ifeq ($(strip $(DEVKITPRO)),) + $(error "DEVKITPRO env var is not set") + endif + include $(DEVKITPRO)/libnx/switch_rules + SHELL := PATH=$(PATH) $(SHELL) + LD = $(PREFIX)ld +else ifeq ($(platform), xenon) + CC = xenon-gcc$(EXE_EXT) + AR = xenon-ar$(EXE_EXT) + LD = xenon-ld$(EXE_EXT) +else ifneq (,$(filter $(platform),ngc wii wiiu)) + ifeq ($(strip $(DEVKITPPC)),) + $(error "DEVKITPPC env var is not set") + endif + CC = $(DEVKITPPC)/bin/powerpc-eabi-gcc$(EXE_EXT) + CXX = $(DEVKITPPC)/bin/powerpc-eabi-g++$(EXE_EXT) + AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) + LD = $(DEVKITPPC)/bin/powerpc-eabi-ld$(EXE_EXT) +else ifeq ($(platform), qnx) + CC = qcc -Vgcc_ntoarmv7le +else ifeq ($(platform), miyoo) + CC = /opt/miyoo/usr/bin/arm-linux-gcc + CXX = /opt/miyoo/usr/bin/arm-linux-g++ +endif +CC_AS ?= $(CC) + +# workaround wrong owner in libretro infra +GIT_VERSION1 := $(shell test -d /builds/libretro/pcsx_rearmed && git rev-parse --short HEAD 2>&1) +ifneq ($(findstring dubious ownership,$(GIT_VERSION1)),) +DUMMY := $(shell git config --global --add safe.directory /builds/libretro/pcsx_rearmed) +endif + +TARGET_NAME := pcsx_rearmed +ARCH_DETECTED := $(shell $(CC) $(CFLAGS) -dumpmachine | awk -F- '{print $$1}') +GIT_VERSION := " $(shell git rev-parse --short HEAD || echo unknown)" +ifneq ($(GIT_VERSION)," unknown") + CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" +endif +ifneq ($(WANT_ZLIB),1) +LIBZ := -lz +endif +LIBPTHREAD := -lpthread +ifneq ($(findstring Haiku,$(shell uname -s)),) +LIBDL := -lroot -lnetwork +else +LIBDL := -ldl +endif +LIBM := -lm +MMAP_WIN32 = 0 +EXTRA_LDFLAGS = + +# select some defaults +ifneq (,$(findstring $(ARCH_DETECTED),arm aarch64)) + DYNAREC = ari64 +ifneq (,$(shell $(CC) -E -dD $(CFLAGS) include/arm_features.h | grep 'define __thumb__')) + # must disable thumb as ari64 can't handle it + CFLAGS += -marm +endif +ifneq (,$(shell $(CC) -E -dD $(CFLAGS) include/arm_features.h | grep 'HAVE_NEON32')) + BUILTIN_GPU = neon +endif +endif +ifneq (,$(filter $(ARCH_DETECTED),i686 x86_64 arm64 aarch64)) + BUILTIN_GPU = neon +endif + +# platform specific options + +# Unix +ifeq ($(platform), unix) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC + THREAD_RENDERING = 1 +ifeq ($(shell uname),Linux) + LIGHTREC_CUSTOM_MAP := 1 +endif + +# ODROIDN2 +else ifneq (,$(findstring CortexA73_G12B,$(platform))) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC + ARCH = arm64 + BUILTIN_GPU = neon + HAVE_NEON = 1 + DYNAREC = ari64 + CFLAGS += -fomit-frame-pointer -ffast-math -DARM + CFLAGS += -march=armv8-a+crc -mcpu=cortex-a73 -mtune=cortex-a73.cortex-a53 + +# ALLWINNER H5 +else ifneq (,$(findstring h5,$(platform))) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC + ARCH = arm64 + BUILTIN_GPU = neon + HAVE_NEON = 1 + DYNAREC = ari64 + CFLAGS += -fomit-frame-pointer -ffast-math -DARM + CFLAGS += -march=armv8-a+crc -mcpu=cortex-a53 -mtune=cortex-a53 + +else ifeq ($(platform), linux-portable) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC -nostdlib + EXTRA_LDFLAGS += -fPIC -nostdlib + LIBZ := + LIBPTHREAD := + LIBDL := + LIBM := + NO_UNDEF_CHECK = 1 + LIGHTREC_CUSTOM_MAP := 1 + +# OS X +else ifeq ($(platform), osx) + DYNAREC := 0 + TARGET := $(TARGET_NAME)_libretro.dylib + fpic := -fPIC + MACSOSVER = `sw_vers -productVersion | cut -d. -f 1` + OSXVER = `sw_vers -productVersion | cut -d. -f 2` + OSX_LT_MAVERICKS = `(( $(OSXVER) <= 9)) && echo "YES"` + ifeq ($(OSX_LT_MAVERICKS),YES) + fpic += -mmacosx-version-min=10.1 + endif + CFLAGS += $(ARCHFLAGS) + CXXFLAGS += $(ARCHFLAGS) + LDFLAGS += $(ARCHFLAGS) + HAVE_PHYSICAL_CDROM = 0 + FSECTIONS_LDFLAGS = -Wl,-dead_strip + +# iOS +else ifneq (,$(findstring ios,$(platform))) + TARGET := $(TARGET_NAME)_libretro_ios.dylib + MINVERSION := +ifeq ($(DYNAREC),lightrec) + # Override + DYNAREC := 0 +endif + fpic := -fPIC + + ifeq ($(IOSSDK),) + IOSSDK := $(shell xcodebuild -version -sdk iphoneos Path) + endif + + CFLAGS += -DIOS +ifeq ($(platform),ios-arm64) + ARCH := arm64 + BUILTIN_GPU = neon + HAVE_NEON = 1 + DYNAREC = 0 +else + ARCH := arm + HAVE_NEON = 1 + HAVE_NEON_ASM = 1 + BUILTIN_GPU = neon + CFLAGS += -marm -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon + ASFLAGS += -marm -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon +endif + HAVE_PHYSICAL_CDROM = 0 + CC_AS = perl ./tools/gas-preprocessor.pl $(CC) +ifeq ($(platform),$(filter $(platform),ios9 ios-arm64)) + MINVERSION = -miphoneos-version-min=8.0 +else + MINVERSION = -miphoneos-version-min=5.0 +endif + CFLAGS += $(MINVERSION) + FSECTIONS_LDFLAGS = -Wl,-dead_strip + +else ifeq ($(platform), tvos-arm64) + TARGET := $(TARGET_NAME)_libretro_tvos.dylib + MINVERSION := +ifeq ($(DYNAREC),lightrec) + # Override + DYNAREC := 0 +endif + fpic := -fPIC + + ifeq ($(IOSSDK),) + IOSSDK := $(shell xcodebuild -version -sdk appletvos Path) + endif + + CFLAGS += -DIOS -DTVOS + ARCH := arm64 + BUILTIN_GPU = neon + HAVE_NEON = 1 + DYNAREC = 0 + HAVE_PHYSICAL_CDROM = 0 + CC_AS = perl ./tools/gas-preprocessor.pl $(CC) + MINVERSION = -mappletvos-version-min=11.0 + CFLAGS += $(MINVERSION) + FSECTIONS_LDFLAGS = -Wl,-dead_strip + +# Nintendo Switch (libnx) +else ifeq ($(platform), libnx) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + ARCH := arm64 + HAVE_VFS_FD = 0 + CFLAGS += -O3 -fomit-frame-pointer -ffast-math -I$(DEVKITPRO)/libnx/include/ -fPIE + CFLAGS += -specs=$(DEVKITPRO)/libnx/switch.specs -DNO_DYLIB -D__arm64__ -D__ARM_NEON__ + CFLAGS += -D__SWITCH__ -DSWITCH -DHAVE_LIBNX + CFLAGS += -DARM -D__aarch64__=1 -march=armv8-a -mtune=cortex-a57 -mtp=soft -DHAVE_INTTYPES -DLSB_FIRST -ffast-math -mcpu=cortex-a57+crc+fp+simd + CFLAGS += -ftree-vectorize + CFLAGS += -Ifrontend/switch + NO_POSIX_MEMALIGN := 1 + NO_PTHREAD=1 + NO_MMAP := 1 # for psxmem + LIBPTHREAD := + WANT_ZLIB = 0 + PARTIAL_LINKING = 1 + BUILTIN_GPU = neon + HAVE_NEON = 1 + DYNAREC = ari64 + HAVE_PHYSICAL_CDROM = 0 + +# Lakka Switch (arm64) +else ifeq ($(platform), arm64) + TARGET := $(TARGET_NAME)_libretro.so + ARCH := arm64 + BUILTIN_GPU = neon + HAVE_NEON = 1 + DYNAREC = ari64 + HAVE_PHYSICAL_CDROM = 0 + fpic := -fPIC + CFLAGS := $(filter-out -O2, $(CFLAGS)) + CFLAGS += -O3 -ftree-vectorize + +# Lightweight PS3 Homebrew SDK +else ifeq ($(platform), psl1ght) + TARGET := $(TARGET_NAME)_libretro_psl1ght.a + CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ + CFLAGS += -DNO_DYLIB + NO_UNDEF_CHECK := 1 + STATIC_LINKING := 1 + NO_MMAP := 1 + NO_PTHREAD := 1 + LIBPTHREAD := + LIBDL := + NEED_SYSCONF := 1 + HAVE_PHYSICAL_CDROM = 0 + USE_ASYNC_CDROM = 0 + +# PSP +else ifeq ($(platform), psp1) + TARGET := $(TARGET_NAME)_libretro_psp1.a + CFLAGS += -DPSP -G0 + HAVE_PHYSICAL_CDROM = 0 + +# Vita +else ifeq ($(platform), vita) + TARGET := $(TARGET_NAME)_libretro_vita.a + CFLAGS += -DVITA + CFLAGS += -mcpu=cortex-a9 -mtune=cortex-a9 -mfpu=neon -marm + CFLAGS += -fsingle-precision-constant -mword-relocations -fno-unwind-tables + CFLAGS += -fno-asynchronous-unwind-tables -ftree-vectorize -funroll-loops + CFLAGS += -fno-optimize-sibling-calls + CFLAGS += -I$(VITASDK)/include -Ifrontend/vita + CFLAGS += -DNO_DYLIB + CFLAGS_LAST += -O3 + ASFLAGS += -mcpu=cortex-a9 -mtune=cortex-a9 -mfpu=neon + + HAVE_NEON = 1 + HAVE_NEON_ASM = 1 + BUILTIN_GPU = neon + + DYNAREC = ari64 + ARCH = arm + PARTIAL_LINKING = 1 + NO_PTHREAD=1 + NO_POSIX_MEMALIGN := 1 + HAVE_PHYSICAL_CDROM = 0 + +# CTR(3DS) +else ifeq ($(platform), ctr) + ifeq ($(strip $(CTRULIB)),) + $(error "CTRULIB env var is not set") + endif + TARGET := $(TARGET_NAME)_libretro_ctr.a + CFLAGS += -DARM11 -D_3DS -D__3DS__ + CFLAGS += -DNO_DYLIB -DGPU_UNAI_USE_FLOATMATH -DGPU_UNAI_USE_FLOAT_DIV_MULTINV + CFLAGS += -march=armv6k -mtune=mpcore -mfloat-abi=hard -marm -mfpu=vfp -mtp=soft + CFLAGS += -Wall -mword-relocations + CFLAGS += -fomit-frame-pointer + CFLAGS_LAST += -O3 + # CFLAGS += -funroll-loops # ~500K of bloat + CFLAGS += -Ifrontend/3ds -I$(CTRULIB)/include + CFLAGS += -Werror=implicit-function-declaration + CFLAGS += -DHAVE_UNISTD_H + CFLAGS += -DZ7_DECL_Int32_AS_long + CFLAGS += -DUSE_CTRULIB_2 + + OBJS += deps/arm-mem/memcpymove-v6l.o + OBJS += deps/arm-mem/memset-v6l.o + OBJS += frontend/3ds/utils.o + + BUILTIN_GPU = unai + THREAD_RENDERING = 1 + DYNAREC = ari64 + ARCH = arm + HAVE_NEON = 0 + PARTIAL_LINKING = 1 + WANT_ZLIB = 0 + NO_POSIX_MEMALIGN := 1 + NO_MMAP := 1 # for psxmem + HAVE_PHYSICAL_CDROM = 0 + +# Xbox 360 +else ifeq ($(platform), xenon) + TARGET := $(TARGET_NAME)_libretro_xenon360.a + CFLAGS += -D__LIBXENON__ -m32 -D__ppc__ + HAVE_PHYSICAL_CDROM = 0 + +# Nintendo GC/Wii/WiiU +else ifneq (,$(filter $(platform),ngc wii wiiu)) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + ifeq ($(platform), ngc) + CFLAGS += -DHW_DOL -mogc + NEED_SYSCONF := 1 + else ifeq ($(platform), wii) + CFLAGS += -DHW_RVL -mrvl + NEED_SYSCONF := 1 + else ifeq ($(platform), wiiu) + # -mwup was removed in newer devkitPPC versions + CFLAGS += -DHW_WUP + CFLAGS += -I frontend/wiiu + CFLAGS += -DZ7_DECL_Int32_AS_long + LIGHTREC_CUSTOM_MAP := 1 + LIGHTREC_CUSTOM_MAP_OBJ := libpcsxcore/lightrec/mem_wiiu.o + LIGHTREC_CODE_INV := 1 + endif + ARCH = powerpc + BUILTIN_GPU = peops + CFLAGS += -D__ppc__ -D__powerpc__ + CFLAGS += -DGEKKO -mcpu=750 -meabi -mhard-float + CFLAGS += -DNO_DYLIB + STATIC_LINKING := 1 + THREAD_RENDERING := 0 + NO_PTHREAD := 1 + NO_MMAP := 1 + NO_POSIX_MEMALIGN := 1 + LIBDL := + LIBPTHREAD := + LIBRT := + HAVE_PHYSICAL_CDROM = 0 + USE_ASYNC_CDROM = 0 + +# QNX +else ifeq ($(platform), qnx) + TARGET := $(TARGET_NAME)_libretro_qnx.so + fpic := -fPIC + HAVE_NEON = 1 + HAVE_NEON_ASM = 1 + DYNAREC = ari64 + BUILTIN_GPU = neon + ARCH = arm + CFLAGS += -D__BLACKBERRY_QNX__ -marm -mcpu=cortex-a9 -mtune=cortex-a9 -mfpu=neon -mfloat-abi=softfp + ASFLAGS += -mcpu=cortex-a9 -mfpu=neon -mfloat-abi=softfp + MAIN_LDLIBS += -lsocket + LIBPTHREAD := + LIBDL := + LIBM := + HAVE_PHYSICAL_CDROM = 0 + +#Raspberry Pi 1 +else ifeq ($(platform), rpi1) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC + CFLAGS += -marm -mcpu=arm1176jzf-s -mfpu=vfp -mfloat-abi=hard + ASFLAGS += -mcpu=arm1176jzf-s -mfpu=vfp -mfloat-abi=hard + HAVE_NEON = 0 + ARCH = arm + BUILTIN_GPU = unai + DYNAREC = ari64 + +#Raspberry Pi 2 +else ifeq ($(platform), rpi2) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC + CFLAGS += -marm -mcpu=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard + ASFLAGS += -mcpu=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard + HAVE_NEON = 1 + HAVE_NEON_ASM = 1 + ARCH = arm + BUILTIN_GPU = neon + DYNAREC = ari64 + +#Raspberry Pi 3 +else ifeq ($(platform), rpi3) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC + CFLAGS += -marm -mcpu=cortex-a53 -mfpu=neon-fp-armv8 -mfloat-abi=hard + ASFLAGS += -mcpu=cortex-a53 -mfpu=neon-fp-armv8 -mfloat-abi=hard + HAVE_NEON = 1 + HAVE_NEON_ASM = 1 + ARCH = arm + BUILTIN_GPU = neon + DYNAREC = ari64 + +#Raspberry Pi 3 with a 64bit GNU/Linux OS +else ifeq ($(platform), rpi3_64) + TARGET := $(TARGET_NAME)_libretro.so + ARCH := arm64 + BUILTIN_GPU = neon + HAVE_NEON = 1 + DYNAREC = ari64 + fpic := -fPIC + CFLAGS += -march=armv8-a+crc+simd -mtune=cortex-a53 -ftree-vectorize + +#Raspberry Pi 4 with a 32bit GNU/Linux OS +else ifeq ($(platform), rpi4) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC + CFLAGS += -marm -mcpu=cortex-a72 -mfpu=neon-fp-armv8 -mfloat-abi=hard + ASFLAGS += -mcpu=cortex-a72 -mfpu=neon-fp-armv8 -mfloat-abi=hard + HAVE_NEON = 1 + HAVE_NEON_ASM = 1 + ARCH = arm + BUILTIN_GPU = neon + DYNAREC = ari64 + +#Raspberry Pi 4 with a 64bit GNU/Linux OS +else ifeq ($(platform), rpi4_64) + TARGET := $(TARGET_NAME)_libretro.so + ARCH := arm64 + BUILTIN_GPU = neon + HAVE_NEON = 1 + DYNAREC = ari64 + fpic := -fPIC + CFLAGS += -march=armv8-a+crc+simd -mtune=cortex-a72 -ftree-vectorize + +# Classic Platforms #################### +# Platform affix = classic__<µARCH> +# Help at https://modmyclassic.com/comp + +# (armv7 a7, hard point, neon based) ### +# NESC, SNESC, C64 mini +else ifeq ($(platform), classic_armv7_a7) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC + CFLAGS += -Ofast \ + -flto=auto -fuse-linker-plugin \ + -fno-stack-protector -fno-ident -fomit-frame-pointer \ + -falign-functions=1 -falign-jumps=1 -falign-loops=1 \ + -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-unroll-loops \ + -fmerge-all-constants -fno-math-errno \ + -marm -mtune=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard + CXXFLAGS += $(CFLAGS) + CPPFLAGS += $(CFLAGS) + ASFLAGS += $(CFLAGS) + HAVE_NEON = 1 + HAVE_NEON_ASM = 1 + ARCH = arm + BUILTIN_GPU = neon + DYNAREC = ari64 + ifeq ($(shell echo `$(CC) -dumpversion` "< 4.9" | bc -l), 1) + CFLAGS += -march=armv7-a + else + CFLAGS += -march=armv7ve + # If gcc is 5.0 or later + ifeq ($(shell echo `$(CC) -dumpversion` ">= 5" | bc -l), 1) + LDFLAGS += -static-libgcc -static-libstdc++ + endif + endif + +# (armv8 a35, hard point, neon based) ### +# PlayStation Classic +else ifeq ($(platform), classic_armv8_a35) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC + CFLAGS += -Ofast \ + -fmerge-all-constants -fno-math-errno \ + -fno-stack-protector -fomit-frame-pointer \ + -marm -mcpu=cortex-a35 -mtune=cortex-a35 -mfpu=neon-fp-armv8 -mfloat-abi=hard + HAVE_NEON = 1 + HAVE_NEON_ASM = 1 + ARCH = arm + BUILTIN_GPU = neon + DYNAREC = ari64 + LDFLAGS += -static-libgcc -static-libstdc++ -fPIC + +####################################### + +# ARM +else ifneq (,$(findstring armv,$(platform))) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC + HAVE_NEON = 0 + BUILTIN_GPU = peops + ifneq (,$(findstring cortexa8,$(platform))) + CFLAGS += -mcpu=cortex-a8 + ASFLAGS += -mcpu=cortex-a8 + else ifneq (,$(findstring cortexa7,$(platform))) + CFLAGS += -mcpu=cortex-a7 + ASFLAGS += -mcpu=cortex-a7 + LIBZ := + else ifneq (,$(findstring cortexa9,$(platform))) + CFLAGS += -mcpu=cortex-a9 + ASFLAGS += -mcpu=cortex-a9 + endif + CFLAGS += -marm + ifneq (,$(findstring neon,$(platform))) + CFLAGS += -mfpu=neon + ASFLAGS += -mfpu=neon + HAVE_NEON = 1 + HAVE_NEON_ASM = 1 + BUILTIN_GPU = neon + endif + ifneq (,$(findstring softfloat,$(platform))) + CFLAGS += -mfloat-abi=softfp + ASFLAGS += -mfloat-abi=softfp + else ifneq (,$(findstring hardfloat,$(platform))) + CFLAGS += -mfloat-abi=hard + ASFLAGS += -mfloat-abi=hard + endif + ARCH = arm + DYNAREC = ari64 + +else ifeq ($(platform), miyoo) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC + CFLAGS += -mcpu=arm926ej-s -fsingle-precision-constant + CFLAGS += -DGPU_UNAI_USE_INT_DIV_MULTINV -D_MIYOO + ARCH = arm + BUILTIN_GPU = unai + DYNAREC = ari64 + HAVE_NEON = 0 + +# Emscripten +else ifeq ($(platform), emscripten) + TARGET := $(TARGET_NAME)_libretro_$(platform).bc + fpic := -fPIC + NO_MMAP = 1 + CFLAGS += -DNO_DYLIB -DNO_SOCKET + CFLAGS += -msimd128 -ftree-vectorize + LIBPTHREAD := + NO_PTHREAD=1 + DYNAREC = + STATIC_LINKING = 1 + HAVE_PHYSICAL_CDROM = 0 + +# Windows +else + TARGET := $(TARGET_NAME)_libretro.dll + PLATFORM = libretro + MAIN_LDFLAGS += -static-libgcc -static-libstdc++ +ifneq ($(DEBUG),1) + MAIN_LDFLAGS += -s +endif + CFLAGS += -D__WIN32__ -DNO_DYLIB + MMAP_WIN32=1 + NO_PTHREAD=1 + MAIN_LDLIBS += -lws2_32 + LIBPTHREAD := + LIBDL := + LIBM := + USE_LIBRETRO_VFS = 1 +endif + +CFLAGS += $(fpic) +MAIN_LDFLAGS += -shared +MAIN_LDLIBS += $(LIBPTHREAD) $(LIBM) $(LIBDL) $(LIBZ) + +# enable large file support if available +ifeq ($(shell $(CC) -E -dD $(CFLAGS) include/arm_features.h | grep __SIZEOF_LONG__ | awk '{print $$3}'),4) +CFLAGS += -D_FILE_OFFSET_BITS=64 +endif + +# try to autodetect stuff for the lazy +ifndef ARCH +ARCH = $(ARCH_DETECTED) +endif +ifndef HAVE_NEON_ASM +# asm for 32bit only +HAVE_NEON_ASM = $(shell $(CC) -E -dD $(CFLAGS) include/arm_features.h | grep -q HAVE_NEON32 && echo 1 || echo 0) +endif +ifeq ($(NO_UNDEF_CHECK)$(shell $(LD) -v 2> /dev/null | awk '{print $$1}'),GNU) + ifeq (,$(findstring $(platform),win32)) + MAIN_LDFLAGS += -Wl,-version-script=frontend/libretro-version-script + endif +MAIN_LDFLAGS += -Wl,--no-undefined +endif +ifdef ALLOW_LIGHTREC_ON_ARM +CFLAGS += -DALLOW_LIGHTREC_ON_ARM +endif +ifeq ($(BUILTIN_GPU),neon) +ifneq (,$(findstring $(ARCH),x86 i686)) + CFLAGS_GPU_NEON ?= -msse2 # required +endif +ifeq ($(ARCH),x86_64) + CFLAGS_GPU_NEON ?= -mssse3 # optional, for more perf +endif +CFLAGS += $(CFLAGS_GPU_NEON) +endif + +TARGET ?= libretro.so +PLATFORM = libretro +BUILTIN_GPU ?= peops +SOUND_DRIVERS = libretro +PLUGINS = +NO_CONFIG_MAK = yes + +$(info TARGET: $(TARGET)) +$(info platform: $(platform)) +$(info ARCH: $(ARCH)) +$(info DYNAREC: $(DYNAREC)) +$(info BUILTIN_GPU: $(BUILTIN_GPU)) +$(info CC: $(CC) : $(shell $(CC) --version | head -1)) +$(info CFLAGS: $(CFLAGS)) +$(info MAIN_LDLIBS: $(MAIN_LDLIBS)) +$(info ) + +include Makefile + +# no special AS needed for gpu_neon +plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.o: plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S + $(CC) $(CFLAGS) -c $^ -o $@ diff --git a/deps/mman b/deps/mman new file mode 160000 index 000000000..2d1c576e6 --- /dev/null +++ b/deps/mman @@ -0,0 +1 @@ +Subproject commit 2d1c576e62b99e85d99407e1a88794c6e44c3310 diff --git a/include/mman/sys/mman.h b/include/mman/sys/mman.h new file mode 100644 index 000000000..55f7ea8ae --- /dev/null +++ b/include/mman/sys/mman.h @@ -0,0 +1 @@ +#include diff --git a/jni/Android.mk b/jni/Android.mk index 03ccff7ea..aacf89f5e 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -1 +1,264 @@ -$(error This file is unmaintained. Please use the libretro fork: https://github.com/libretro/pcsx_rearmed) +LOCAL_PATH := $(call my-dir) + +$(shell cd "$(LOCAL_PATH)" && ((git describe --always || echo) | sed -e 's/.*/#define REV "\0"/' > ../frontend/revision.h_)) +$(shell cd "$(LOCAL_PATH)" && (diff -q ../frontend/revision.h_ ../frontend/revision.h > /dev/null 2>&1 || cp ../frontend/revision.h_ ../frontend/revision.h)) +$(shell cd "$(LOCAL_PATH)" && (rm ../frontend/revision.h_)) + +USE_LIBRETRO_VFS ?= 0 +USE_ASYNC_CDROM ?= 1 +USE_RTHREADS ?= 0 +NDRC_THREAD ?= 1 + +ROOT_DIR := $(LOCAL_PATH)/.. +CORE_DIR := $(ROOT_DIR)/libpcsxcore +SPU_DIR := $(ROOT_DIR)/plugins/dfsound +GPU_DIR := $(ROOT_DIR)/plugins/gpulib +CDR_DIR := $(ROOT_DIR)/plugins/cdrcimg +FRONTEND_DIR := $(ROOT_DIR)/frontend +NEON_DIR := $(ROOT_DIR)/plugins/gpu_neon +UNAI_DIR := $(ROOT_DIR)/plugins/gpu_unai +PEOPS_DIR := $(ROOT_DIR)/plugins/dfxvideo +DYNAREC_DIR := $(ROOT_DIR)/libpcsxcore/new_dynarec +DEPS_DIR := $(ROOT_DIR)/deps +LIBRETRO_COMMON := $(DEPS_DIR)/libretro-common +EXTRA_INCLUDES := + +# core +SOURCES_C := $(CORE_DIR)/cdriso.c \ + $(CORE_DIR)/cdrom.c \ + $(CORE_DIR)/cdrom-async.c \ + $(CORE_DIR)/cheat.c \ + $(CORE_DIR)/database.c \ + $(CORE_DIR)/decode_xa.c \ + $(CORE_DIR)/mdec.c \ + $(CORE_DIR)/misc.c \ + $(CORE_DIR)/plugins.c \ + $(CORE_DIR)/ppf.c \ + $(CORE_DIR)/psxbios.c \ + $(CORE_DIR)/psxcommon.c \ + $(CORE_DIR)/psxcounters.c \ + $(CORE_DIR)/psxdma.c \ + $(CORE_DIR)/psxevents.c \ + $(CORE_DIR)/psxhw.c \ + $(CORE_DIR)/psxinterpreter.c \ + $(CORE_DIR)/psxmem.c \ + $(CORE_DIR)/r3000a.c \ + $(CORE_DIR)/sio.c \ + $(CORE_DIR)/spu.c \ + $(CORE_DIR)/gpu.c \ + $(CORE_DIR)/gte.c \ + $(CORE_DIR)/gte_nf.c \ + $(CORE_DIR)/gte_divider.c + +# spu +SOURCES_C += $(SPU_DIR)/dma.c \ + $(SPU_DIR)/freeze.c \ + $(SPU_DIR)/registers.c \ + $(SPU_DIR)/spu.c \ + $(SPU_DIR)/out.c \ + $(SPU_DIR)/nullsnd.c + +# gpu +SOURCES_C += $(GPU_DIR)/gpu.c \ + $(GPU_DIR)/vout_pl.c + +# cdrcimg +SOURCES_C += $(CDR_DIR)/cdrcimg.c + +# frontend +SOURCES_C += $(FRONTEND_DIR)/main.c \ + $(FRONTEND_DIR)/plugin.c \ + $(FRONTEND_DIR)/cspace.c \ + $(FRONTEND_DIR)/libretro.c + +# libchdr +LCHDR = $(DEPS_DIR)/libchdr +LCHDR_LZMA = $(LCHDR)/deps/lzma-24.05 +LCHDR_ZSTD = $(LCHDR)/deps/zstd-1.5.6/lib +SOURCES_C += \ + $(LCHDR)/src/libchdr_bitstream.c \ + $(LCHDR)/src/libchdr_cdrom.c \ + $(LCHDR)/src/libchdr_chd.c \ + $(LCHDR)/src/libchdr_flac.c \ + $(LCHDR)/src/libchdr_huffman.c \ + $(LCHDR_LZMA)/src/Alloc.c \ + $(LCHDR_LZMA)/src/CpuArch.c \ + $(LCHDR_LZMA)/src/Delta.c \ + $(LCHDR_LZMA)/src/LzFind.c \ + $(LCHDR_LZMA)/src/LzmaDec.c \ + $(LCHDR_LZMA)/src/LzmaEnc.c \ + $(LCHDR_LZMA)/src/Sort.c \ + $(LCHDR_ZSTD)/common/entropy_common.c \ + $(LCHDR_ZSTD)/common/error_private.c \ + $(LCHDR_ZSTD)/common/fse_decompress.c \ + $(LCHDR_ZSTD)/common/xxhash.c \ + $(LCHDR_ZSTD)/common/zstd_common.c \ + $(LCHDR_ZSTD)/decompress/huf_decompress.c \ + $(LCHDR_ZSTD)/decompress/zstd_ddict.c \ + $(LCHDR_ZSTD)/decompress/zstd_decompress_block.c \ + $(LCHDR_ZSTD)/decompress/zstd_decompress.c +SOURCES_ASM := +EXTRA_INCLUDES += $(LCHDR)/include $(LCHDR_LZMA)/include $(LCHDR_ZSTD) +COREFLAGS += -DHAVE_CHD -DZ7_ST -DZSTD_DISABLE_ASM +ifeq (,$(call gte,$(APP_PLATFORM_LEVEL),18)) +ifneq ($(TARGET_ARCH_ABI),arm64-v8a) +# HACK +COREFLAGS += -Dgetauxval=0* +endif +endif + +COREFLAGS += -ffast-math -funroll-loops -DHAVE_LIBRETRO -DNO_FRONTEND -DFRONTEND_SUPPORTS_RGB565 -DANDROID -DREARMED +COREFLAGS += -DP_HAVE_MMAP=1 -DP_HAVE_PTHREAD=1 -DP_HAVE_POSIX_MEMALIGN=1 + +ifeq ($(USE_LIBRETRO_VFS),1) +SOURCES_C += \ + $(LIBRETRO_COMMON)/compat/compat_posix_string.c \ + $(LIBRETRO_COMMON)/compat/fopen_utf8.c \ + $(LIBRETRO_COMMON)/encodings/compat_strl.c \ + $(LIBRETRO_COMMON)/encodings/encoding_utf.c \ + $(LIBRETRO_COMMON)/file/file_path.c \ + $(LIBRETRO_COMMON)/streams/file_stream.c \ + $(LIBRETRO_COMMON)/streams/file_stream_transforms.c \ + $(LIBRETRO_COMMON)/string/stdstring.c \ + $(LIBRETRO_COMMON)/time/rtime.c \ + $(LIBRETRO_COMMON)/vfs/vfs_implementation.c +COREFLAGS += -DUSE_LIBRETRO_VFS +endif +EXTRA_INCLUDES += $(LIBRETRO_COMMON)/include + +USE_RTHREADS=0 +HAVE_ARI64=0 +HAVE_LIGHTREC=0 +LIGHTREC_CUSTOM_MAP=0 +LIGHTREC_THREADED_COMPILER=0 +HAVE_GPU_NEON=0 +ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) + HAVE_ARI64=1 + HAVE_GPU_NEON=1 +else ifeq ($(TARGET_ARCH_ABI),armeabi) + HAVE_ARI64=1 +else ifeq ($(TARGET_ARCH_ABI),arm64-v8a) + HAVE_ARI64=1 + HAVE_GPU_NEON=1 +else ifeq ($(TARGET_ARCH_ABI),x86_64) + HAVE_LIGHTREC=1 + HAVE_GPU_NEON=1 +else ifeq ($(TARGET_ARCH_ABI),x86) + HAVE_LIGHTREC=1 + HAVE_GPU_NEON=1 +else + COREFLAGS += -DDRC_DISABLE +endif + COREFLAGS += -DLIGHTREC_CUSTOM_MAP=$(LIGHTREC_CUSTOM_MAP) + COREFLAGS += -DLIGHTREC_ENABLE_THREADED_COMPILER=$(LIGHTREC_THREADED_COMPILER) + +ifeq ($(HAVE_ARI64),1) + SOURCES_C += $(DYNAREC_DIR)/new_dynarec.c \ + $(DYNAREC_DIR)/pcsxmem.c + ifeq ($(TARGET_ARCH_ABI),arm64-v8a) + SOURCES_ASM += $(DYNAREC_DIR)/linkage_arm64.S + else + SOURCES_ASM += $(CORE_DIR)/gte_arm.S \ + $(SPU_DIR)/arm_utils.S \ + $(DYNAREC_DIR)/linkage_arm.S + endif + ifeq ($(NDRC_THREAD),1) + COREFLAGS += -DNDRC_THREAD + USE_RTHREADS := 1 + endif +endif + SOURCES_C += $(DYNAREC_DIR)/emu_if.c + +ifeq ($(HAVE_LIGHTREC),1) + COREFLAGS += -DLIGHTREC -DLIGHTREC_STATIC -DLIGHTREC_CODE_INV=0 + EXTRA_INCLUDES += $(DEPS_DIR)/lightning/include \ + $(DEPS_DIR)/lightrec \ + $(DEPS_DIR)/lightrec/tlsf \ + $(ROOT_DIR)/include/lightning \ + $(ROOT_DIR)/include/lightrec + SOURCES_C += $(DEPS_DIR)/lightrec/blockcache.c \ + $(DEPS_DIR)/lightrec/constprop.c \ + $(DEPS_DIR)/lightrec/disassembler.c \ + $(DEPS_DIR)/lightrec/emitter.c \ + $(DEPS_DIR)/lightrec/interpreter.c \ + $(DEPS_DIR)/lightrec/lightrec.c \ + $(DEPS_DIR)/lightrec/memmanager.c \ + $(DEPS_DIR)/lightrec/optimizer.c \ + $(DEPS_DIR)/lightrec/regcache.c \ + $(DEPS_DIR)/lightrec/recompiler.c \ + $(DEPS_DIR)/lightrec/reaper.c \ + $(DEPS_DIR)/lightrec/tlsf/tlsf.c + SOURCES_C += $(DEPS_DIR)/lightning/lib/jit_disasm.c \ + $(DEPS_DIR)/lightning/lib/jit_memory.c \ + $(DEPS_DIR)/lightning/lib/jit_names.c \ + $(DEPS_DIR)/lightning/lib/jit_note.c \ + $(DEPS_DIR)/lightning/lib/jit_print.c \ + $(DEPS_DIR)/lightning/lib/jit_size.c \ + $(DEPS_DIR)/lightning/lib/lightning.c + SOURCES_C += $(CORE_DIR)/lightrec/plugin.c +ifeq ($(LIGHTREC_CUSTOM_MAP),1) + SOURCES_C += $(CORE_DIR)/lightrec/mem.c +endif +endif + + +ifeq ($(HAVE_GPU_NEON),1) + COREFLAGS += -DNEON_BUILD -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP -DGPU_NEON + ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) + SOURCES_ASM += $(NEON_DIR)/psx_gpu/psx_gpu_arm_neon.S + else + COREFLAGS += -DSIMD_BUILD + SOURCES_C += $(NEON_DIR)/psx_gpu/psx_gpu_simd.c + endif + SOURCES_C += $(NEON_DIR)/psx_gpu_if.c +else ifeq ($(TARGET_ARCH_ABI),armeabi) + COREFLAGS += -DUSE_GPULIB=1 -DGPU_UNAI + COREFLAGS += -DHAVE_bgr555_to_rgb565 + SOURCES_ASM += $(UNAI_DIR)/gpu_arm.S \ + $(FRONTEND_DIR)/cspace_arm.S + SOURCES_C += $(UNAI_DIR)/gpulib_if.cpp +else + COREFLAGS += -fno-strict-aliasing -DGPU_PEOPS + SOURCES_C += $(PEOPS_DIR)/gpulib_if.c +endif + +ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) + COREFLAGS += -DHAVE_bgr555_to_rgb565 -DHAVE_bgr888_to_x + SOURCES_ASM += $(CORE_DIR)/gte_neon.S \ + $(FRONTEND_DIR)/cspace_neon.S +endif + +ifeq ($(USE_ASYNC_CDROM),1) +COREFLAGS += -DUSE_ASYNC_CDROM +USE_RTHREADS := 1 +endif +ifeq ($(USE_RTHREADS),1) +SOURCES_C += \ + $(FRONTEND_DIR)/libretro-rthreads.c \ + $(LIBRETRO_COMMON)/features/features_cpu.c +COREFLAGS += -DHAVE_RTHREADS +endif + +GIT_VERSION := " $(shell git rev-parse --short HEAD || echo unknown)" +ifneq ($(GIT_VERSION)," unknown") + COREFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" +endif + +include $(CLEAR_VARS) +LOCAL_MODULE := retro +LOCAL_SRC_FILES := $(SOURCES_C) $(SOURCES_ASM) +LOCAL_CFLAGS := $(COREFLAGS) +LOCAL_C_INCLUDES := $(ROOT_DIR)/include +LOCAL_C_INCLUDES += $(DEPS_DIR)/crypto +LOCAL_C_INCLUDES += $(EXTRA_INCLUDES) +LOCAL_LDFLAGS += -Wl,-version-script=$(FRONTEND_DIR)/libretro-version-script +LOCAL_LDFLAGS += -Wl,--script=$(FRONTEND_DIR)/libretro-extern.T +LOCAL_LDFLAGS += -Wl,--gc-sections +LOCAL_LDLIBS := -lz -llog +LOCAL_ARM_MODE := arm + +ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) + LOCAL_ARM_NEON := true +endif + +include $(BUILD_SHARED_LIBRARY) diff --git a/plugins/dfsound/out.c b/plugins/dfsound/out.c index 5ddd3f402..4443dae37 100644 --- a/plugins/dfsound/out.c +++ b/plugins/dfsound/out.c @@ -47,6 +47,6 @@ void SetupSound(void) } out_current = &out_drivers[i]; - printf("selected sound output driver: %s\n", out_current->name); + // printf("selected sound output driver: %s\n", out_current->name); } diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index af35f3cba..8a3f2f9a9 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -22,6 +22,22 @@ #pragma GCC diagnostic ignored "-Wmisleading-indentation" #endif +#ifdef THREAD_RENDERING +#include "../gpulib/gpulib_thread_if.h" +#define do_cmd_list real_do_cmd_list +#define renderer_init real_renderer_init +#define renderer_finish real_renderer_finish +#define renderer_sync_ecmds real_renderer_sync_ecmds +#define renderer_update_caches real_renderer_update_caches +#define renderer_flush_queues real_renderer_flush_queues +#define renderer_set_interlace real_renderer_set_interlace +#define renderer_set_config real_renderer_set_config +#define renderer_notify_res_change real_renderer_notify_res_change +#define renderer_notify_update_lace real_renderer_notify_update_lace +#define renderer_sync real_renderer_sync +#define ex_regs scratch_ex_regs +#endif + #define u32 uint32_t #define INFO_TW 0 @@ -468,6 +484,14 @@ void renderer_set_interlace(int enable, int is_odd) { } +void renderer_sync(void) +{ +} + +void renderer_notify_update_lace(int updated) +{ +} + #include "../../frontend/plugin_lib.h" void renderer_set_config(const struct rearmed_cbs *cbs) diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c index f00c4592d..ab95c6417 100644 --- a/plugins/gpu-gles/gpulib_if.c +++ b/plugins/gpu-gles/gpulib_if.c @@ -775,3 +775,11 @@ static void fps_update(void) DisplayText(buf, 1); } } + +void renderer_sync(void) +{ +} + +void renderer_notify_update_lace(int updated) +{ +} diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index 3f43e431f..021045861 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -244,4 +244,12 @@ void renderer_set_config(const struct rearmed_cbs *cbs) } } +void renderer_sync(void) +{ +} + +void renderer_notify_update_lace(int updated) +{ +} + // vim:ts=2:sw=2:expandtab diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index d1be12a6b..03be13d00 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -43,6 +43,7 @@ static void finish_vram_transfer(int is_read); static noinline void do_cmd_reset(void) { int dummy = 0; + renderer_sync(); if (unlikely(gpu.cmd_len > 0)) do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy); gpu.cmd_len = 0; @@ -167,6 +168,8 @@ static noinline void update_height(void) static noinline void decide_frameskip(void) { + *gpu.frameskip.dirty = 1; + if (gpu.frameskip.active) gpu.frameskip.cnt++; else { @@ -174,7 +177,9 @@ static noinline void decide_frameskip(void) gpu.frameskip.frame_ready = 1; } - if (!gpu.frameskip.active && *gpu.frameskip.advice) + if (*gpu.frameskip.force) + gpu.frameskip.active = 1; + else if (!gpu.frameskip.active && *gpu.frameskip.advice) gpu.frameskip.active = 1; else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set) gpu.frameskip.active = 1; @@ -448,6 +453,8 @@ static int do_vram_io(uint32_t *data, int count, int is_read) int l; count *= 2; // operate in 16bpp pixels + renderer_sync(); + if (gpu.dma.offset) { l = w - gpu.dma.offset; if (count < l) @@ -687,6 +694,7 @@ static noinline int do_cmd_buffer(uint32_t *data, int count, cmd = -1; // incomplete cmd, can't consume yet break; } + renderer_sync(); *cycles_sum += *cycles_last; *cycles_last = 0; do_vram_copy(data + pos + 1, cycles_last); @@ -890,12 +898,15 @@ long GPUfreeze(uint32_t type, struct GPUFreeze *freeze) case 1: // save if (gpu.cmd_len > 0) flush_cmd_buffer(); + + renderer_sync(); memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2); memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs)); memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs)); freeze->ulStatus = gpu.status; break; case 0: // load + renderer_sync(); memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2); memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs)); memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs)); @@ -929,6 +940,8 @@ void GPUupdateLace(void) return; } + renderer_notify_update_lace(0); + if (!gpu.state.fb_dirty) return; #endif @@ -948,6 +961,7 @@ void GPUupdateLace(void) gpu.state.enhancement_was_active = gpu.state.enhancement_active; gpu.state.fb_dirty = 0; gpu.state.blanked = 0; + renderer_notify_update_lace(1); } void GPUvBlank(int is_vblank, int lcf) @@ -984,6 +998,8 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs) { gpu.frameskip.set = cbs->frameskip; gpu.frameskip.advice = &cbs->fskip_advice; + gpu.frameskip.force = &cbs->fskip_force; + gpu.frameskip.dirty = (void *)&cbs->fskip_dirty; gpu.frameskip.active = 0; gpu.frameskip.frame_ready = 1; gpu.state.hcnt = (uint32_t *)cbs->gpu_hcnt; diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index ec7e05754..2083224e8 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -105,9 +105,12 @@ struct psx_gpu { uint32_t allow:1; uint32_t frame_ready:1; const int *advice; + const int *force; + int *dirty; uint32_t last_flip_frame; uint32_t pending_fill[3]; } frameskip; + uint32_t scratch_ex_regs[8]; // for threaded rendering void *(*get_enhancement_bufer) (int *x, int *y, int *w, int *h, int *vram_h); uint16_t *(*get_downscale_buffer) @@ -134,6 +137,8 @@ void renderer_flush_queues(void); void renderer_set_interlace(int enable, int is_odd); void renderer_set_config(const struct rearmed_cbs *config); void renderer_notify_res_change(void); +void renderer_notify_update_lace(int updated); +void renderer_sync(void); void renderer_notify_scanout_change(int x, int y); int vout_init(void); diff --git a/plugins/gpulib/gpulib_thread_if.c b/plugins/gpulib/gpulib_thread_if.c new file mode 100644 index 000000000..0b28fe338 --- /dev/null +++ b/plugins/gpulib/gpulib_thread_if.c @@ -0,0 +1,563 @@ +/************************************************************************** +* Copyright (C) 2020 The RetroArch Team * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#include +#include +#include +#include +#include "../gpulib/gpu.h" +#include "../../frontend/plugin_lib.h" +#include "gpu.h" +#include "gpu_timing.h" +#include "gpulib_thread_if.h" + +extern void SysPrintf(const char *fmt, ...); + +#define FALSE 0 +#define TRUE 1 +#define BOOL unsigned short + +typedef struct { + uint32_t *cmd_list; + int count; + int last_cmd; +} video_thread_cmd; + +#define QUEUE_SIZE 0x2000 + +typedef struct { + size_t start; + size_t end; + size_t used; + video_thread_cmd queue[QUEUE_SIZE]; +} video_thread_queue; + +typedef struct { + pthread_t thread; + pthread_mutex_t queue_lock; + pthread_cond_t cond_msg_avail; + pthread_cond_t cond_msg_done; + pthread_cond_t cond_queue_empty; + video_thread_queue *queue; + video_thread_queue *bg_queue; + BOOL running; +} video_thread_state; + +static video_thread_state thread; +static video_thread_queue queues[2]; +static int thread_rendering; +static BOOL hold_cmds; +static BOOL needs_display; +static BOOL flushed; + +extern const unsigned char cmd_lengths[]; + +static void *video_thread_main(void *arg) { + video_thread_cmd *cmd; + int i; + +#ifdef _3DS + static int processed = 0; +#endif /* _3DS */ + + while(1) { + int result, cycles_dummy = 0, last_cmd, start, end; + video_thread_queue *queue; + pthread_mutex_lock(&thread.queue_lock); + + while (!thread.queue->used && thread.running) { + pthread_cond_wait(&thread.cond_msg_avail, &thread.queue_lock); + } + + if (!thread.running) { + pthread_mutex_unlock(&thread.queue_lock); + break; + } + + queue = thread.queue; + start = queue->start; + end = queue->end > queue->start ? queue->end : QUEUE_SIZE; + queue->start = end % QUEUE_SIZE; + pthread_mutex_unlock(&thread.queue_lock); + + for (i = start; i < end; i++) { + cmd = &queue->queue[i]; + result = real_do_cmd_list(cmd->cmd_list, cmd->count, + &cycles_dummy, &cycles_dummy, &last_cmd); + if (result != cmd->count) { + fprintf(stderr, "Processed wrong cmd count: expected %d, got %d\n", cmd->count, result); + } + +#ifdef _3DS + /* Periodically yield so as not to starve other threads */ + processed += cmd->count; + if (processed >= 512) { + svcSleepThread(1); + processed %= 512; + } +#endif /* _3DS */ + } + + pthread_mutex_lock(&thread.queue_lock); + queue->used -= (end - start); + + if (!queue->used) + pthread_cond_signal(&thread.cond_queue_empty); + + pthread_cond_signal(&thread.cond_msg_done); + pthread_mutex_unlock(&thread.queue_lock); + } + + return 0; +} + +static void cmd_queue_swap() { + video_thread_queue *tmp; + if (!thread.bg_queue->used) return; + + pthread_mutex_lock(&thread.queue_lock); + if (!thread.queue->used) { + tmp = thread.queue; + thread.queue = thread.bg_queue; + thread.bg_queue = tmp; + pthread_cond_signal(&thread.cond_msg_avail); + } + pthread_mutex_unlock(&thread.queue_lock); +} + +/* Waits for the main queue to completely finish. */ +void renderer_wait() { + if (!thread.running) return; + + /* Not completely safe, but should be fine since the render thread + * only decreases used, and we check again inside the lock. */ + if (!thread.queue->used) { + return; + } + + pthread_mutex_lock(&thread.queue_lock); + + while (thread.queue->used) { + pthread_cond_wait(&thread.cond_queue_empty, &thread.queue_lock); + } + + pthread_mutex_unlock(&thread.queue_lock); +} + +/* Waits for all GPU commands in both queues to finish, bringing VRAM + * completely up-to-date. */ +void renderer_sync(void) { + if (!thread.running) return; + + /* Not completely safe, but should be fine since the render thread + * only decreases used, and we check again inside the lock. */ + if (!thread.queue->used && !thread.bg_queue->used) { + return; + } + + if (thread.bg_queue->used) { + /* When we flush the background queue, the vblank handler can't + * know that we had a frame pending, and we delay rendering too + * long. Force it. */ + flushed = TRUE; + } + + /* Flush both queues. This is necessary because gpulib could be + * trying to process a DMA write that a command in the queue should + * run beforehand. For example, Xenogears sprites write a black + * rectangle over the to-be-DMA'd spot in VRAM -- if this write + * happens after the DMA, it will clear the DMA, resulting in + * flickering sprites. We need to be totally up-to-date. This may + * drop a frame. */ + renderer_wait(); + cmd_queue_swap(); + hold_cmds = FALSE; + renderer_wait(); +} + +static void video_thread_stop() { + int i; + renderer_sync(); + + if (thread.running) { + thread.running = FALSE; + pthread_cond_signal(&thread.cond_msg_avail); + pthread_join(thread.thread, NULL); + } + + pthread_mutex_destroy(&thread.queue_lock); + pthread_cond_destroy(&thread.cond_msg_avail); + pthread_cond_destroy(&thread.cond_msg_done); + pthread_cond_destroy(&thread.cond_queue_empty); + + for (i = 0; i < QUEUE_SIZE; i++) { + video_thread_cmd *cmd = &thread.queue->queue[i]; + free(cmd->cmd_list); + cmd->cmd_list = NULL; + } + + for (i = 0; i < QUEUE_SIZE; i++) { + video_thread_cmd *cmd = &thread.bg_queue->queue[i]; + free(cmd->cmd_list); + cmd->cmd_list = NULL; + } +} + +static void video_thread_start() { + SysPrintf("Starting render thread\n"); + + thread.queue = &queues[0]; + thread.bg_queue = &queues[1]; + thread.running = TRUE; + + if (pthread_cond_init(&thread.cond_msg_avail, NULL) || + pthread_cond_init(&thread.cond_msg_done, NULL) || + pthread_cond_init(&thread.cond_queue_empty, NULL) || + pthread_mutex_init(&thread.queue_lock, NULL) || + pthread_create(&thread.thread, NULL, video_thread_main, &thread)) { + goto error; + } + + return; + + error: + SysPrintf("Failed to start rendering thread\n"); + thread.running = FALSE; + video_thread_stop(); +} + +static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) { + video_thread_cmd *cmd; + uint32_t *cmd_list; + video_thread_queue *queue; + BOOL lock; + + cmd_list = (uint32_t *)calloc(count, sizeof(uint32_t)); + + if (!cmd_list) { + /* Out of memory, disable the thread and run sync from now on */ + SysPrintf("Failed to allocate render thread command list, stopping thread\n"); + video_thread_stop(); + } + + memcpy(cmd_list, list, count * sizeof(uint32_t)); + + if (hold_cmds && thread.bg_queue->used >= QUEUE_SIZE) { + /* If the bg queue is full, do a full sync to empty both queues + * and clear space. This should be very rare, I've only seen it in + * Tekken 3 post-battle-replay. */ + renderer_sync(); + } + + if (hold_cmds) { + queue = thread.bg_queue; + lock = FALSE; + } else { + queue = thread.queue; + lock = TRUE; + } + + if (lock) { + pthread_mutex_lock(&thread.queue_lock); + + while (queue->used >= QUEUE_SIZE) { + pthread_cond_wait(&thread.cond_msg_done, &thread.queue_lock); + } + } + + cmd = &queue->queue[queue->end]; + free(cmd->cmd_list); + cmd->cmd_list = cmd_list; + cmd->count = count; + cmd->last_cmd = last_cmd; + queue->end = (queue->end + 1) % QUEUE_SIZE; + queue->used++; + + if (lock) { + pthread_cond_signal(&thread.cond_msg_avail); + pthread_mutex_unlock(&thread.queue_lock); + } +} + +/* Slice off just the part of the list that can be handled async, and + * update ex_regs. */ +static int scan_cmd_list(uint32_t *data, int count, + int *cycles_sum_out, int *cycles_last, int *last_cmd) +{ + int cpu_cycles_sum = 0, cpu_cycles = *cycles_last; + int cmd = 0, pos = 0, len, v; + + while (pos < count) { + uint32_t *list = data + pos; + short *slist = (void *)list; + cmd = LE32TOH(list[0]) >> 24; + len = 1 + cmd_lengths[cmd]; + + switch (cmd) { + case 0x02: + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_fill(LE16TOH(slist[4]) & 0x3ff, + LE16TOH(slist[5]) & 0x1ff)); + break; + case 0x20 ... 0x23: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); + break; + case 0x24 ... 0x27: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff; + break; + case 0x28 ... 0x2b: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); + break; + case 0x2c ... 0x2f: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff; + break; + case 0x30 ... 0x33: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); + break; + case 0x34 ... 0x37: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff; + break; + case 0x38 ... 0x3b: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); + break; + case 0x3c ... 0x3f: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff; + break; + case 0x40 ... 0x47: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); + break; + case 0x48 ... 0x4F: + for (v = 3; pos + v < count; v++) + { + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); + if ((list[v] & 0xf000f000) == 0x50005000) + break; + } + len += v - 3; + break; + case 0x50 ... 0x57: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); + break; + case 0x58 ... 0x5F: + for (v = 4; pos + v < count; v += 2) + { + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); + if ((list[v] & 0xf000f000) == 0x50005000) + break; + } + len += v - 4; + break; + case 0x60 ... 0x63: + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_sprite(LE16TOH(slist[4]) & 0x3ff, + LE16TOH(slist[5]) & 0x1ff)); + break; + case 0x64 ... 0x67: + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_sprite(LE16TOH(slist[6]) & 0x3ff, + LE16TOH(slist[7]) & 0x1ff)); + break; + case 0x68 ... 0x6b: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); + break; + case 0x70 ... 0x77: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(8, 8)); + break; + case 0x78 ... 0x7f: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(16, 16)); + break; + default: + if ((cmd & 0xf8) == 0xe0) + gpu.ex_regs[cmd & 7] = list[0]; + break; + } + + if (pos + len > count) { + cmd = -1; + break; /* incomplete cmd */ + } + if (0x80 <= cmd && cmd <= 0xdf) + break; /* image i/o */ + + pos += len; + } + + *cycles_sum_out += cpu_cycles_sum; + *cycles_last = cpu_cycles; + *last_cmd = cmd; + return pos; +} + +int do_cmd_list(uint32_t *list, int count, + int *cycles_sum, int *cycles_last, int *last_cmd) +{ + int pos = 0; + + if (thread.running) { + pos = scan_cmd_list(list, count, cycles_sum, cycles_last, last_cmd); + video_thread_queue_cmd(list, pos, *last_cmd); + } else { + pos = real_do_cmd_list(list, count, cycles_sum, cycles_last, last_cmd); + memcpy(gpu.ex_regs, gpu.scratch_ex_regs, sizeof(gpu.ex_regs)); + } + return pos; +} + +int renderer_init(void) { + if (thread_rendering) { + video_thread_start(); + } + return real_renderer_init(); +} + +void renderer_finish(void) { + real_renderer_finish(); + + if (thread_rendering && thread.running) { + video_thread_stop(); + } +} + +void renderer_sync_ecmds(uint32_t * ecmds) { + if (thread.running) { + int dummy = 0; + do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy); + } else { + real_renderer_sync_ecmds(ecmds); + } +} + +void renderer_update_caches(int x, int y, int w, int h, int state_changed) { + renderer_sync(); + real_renderer_update_caches(x, y, w, h, state_changed); +} + +void renderer_flush_queues(void) { + /* Called during DMA and updateLace. We want to sync if it's DMA, + * but not if it's updateLace. Instead of syncing here, there's a + * renderer_sync call during DMA. */ + real_renderer_flush_queues(); +} + +/* + * Normally all GPU commands are processed before rendering the + * frame. For games that naturally run < 50/60fps, this is unnecessary + * -- it forces the game to render as if it was 60fps and leaves the + * GPU idle half the time on a 30fps game, for example. + * + * Allowing the renderer to wait until a frame is done before + * rendering it would give it double, triple, or quadruple the amount + * of time to finish before we have to wait for it. + * + * We can use a heuristic to figure out when to force a render. + * + * - If a frame isn't done when we're asked to render, wait for it and + * put future GPU commands in a separate buffer (for the next frame) + * + * - If the frame is done, and had no future GPU commands, render it. + * + * - If we do have future GPU commands, it meant the frame took too + * long to render and there's another frame waiting. Stop until the + * first frame finishes, render it, and start processing the next + * one. + * + * This may possibly add a frame or two of latency that shouldn't be + * different than the real device. It may skip rendering a frame + * entirely if a VRAM transfer happens while a frame is waiting, or in + * games that natively run at 60fps if frames are coming in too + * quickly to process. Depending on how the game treats "60fps," this + * may not be noticeable. + */ +void renderer_notify_update_lace(int updated) { + if (!thread.running) return; + + if (thread_rendering == THREAD_RENDERING_SYNC) { + renderer_sync(); + return; + } + + if (updated) { + cmd_queue_swap(); + return; + } + + pthread_mutex_lock(&thread.queue_lock); + if (thread.bg_queue->used || flushed) { + /* We have commands for a future frame to run. Force a wait until + * the current frame is finished, and start processing the next + * frame after it's drawn (see the `updated` clause above). */ + pthread_mutex_unlock(&thread.queue_lock); + renderer_wait(); + pthread_mutex_lock(&thread.queue_lock); + + /* We are no longer holding commands back, so the next frame may + * get mixed into the following frame. This is usually fine, but can + * result in frameskip-like effects for 60fps games. */ + flushed = FALSE; + hold_cmds = FALSE; + needs_display = TRUE; + gpu.state.fb_dirty = TRUE; + } else if (thread.queue->used) { + /* We are still drawing during a vblank. Cut off the current frame + * by sending new commands to the background queue and skip + * drawing our partly rendered frame to the display. */ + hold_cmds = TRUE; + needs_display = TRUE; + gpu.state.fb_dirty = FALSE; + } else if (needs_display && !thread.queue->used) { + /* We have processed all commands in the queue, render the + * buffer. We know we have something to render, because + * needs_display is TRUE. */ + hold_cmds = FALSE; + needs_display = FALSE; + gpu.state.fb_dirty = TRUE; + } else { + /* Everything went normally, so do the normal thing. */ + } + + pthread_mutex_unlock(&thread.queue_lock); +} + +void renderer_set_interlace(int enable, int is_odd) { + real_renderer_set_interlace(enable, is_odd); +} + +void renderer_set_config(const struct rearmed_cbs *cbs) { + renderer_sync(); + thread_rendering = cbs->thread_rendering; + if (!thread.running && thread_rendering != THREAD_RENDERING_OFF) { + video_thread_start(); + } else if (thread.running && thread_rendering == THREAD_RENDERING_OFF) { + video_thread_stop(); + } + real_renderer_set_config(cbs); +} + +void renderer_notify_res_change(void) { + renderer_sync(); + real_renderer_notify_res_change(); +} diff --git a/plugins/gpulib/gpulib_thread_if.h b/plugins/gpulib/gpulib_thread_if.h new file mode 100644 index 000000000..45fdfe01e --- /dev/null +++ b/plugins/gpulib/gpulib_thread_if.h @@ -0,0 +1,42 @@ +/************************************************************************** +* Copyright (C) 2020 The RetroArch Team * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef __GPULIB_THREAD_H__ +#define __GPULIB_THREAD_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +int real_do_cmd_list(uint32_t *list, int count, + int *cycles_sum_out, int *cycles_last, int *last_cmd); +int real_renderer_init(void); +void real_renderer_finish(void); +void real_renderer_sync_ecmds(uint32_t * ecmds); +void real_renderer_update_caches(int x, int y, int w, int h, int state_changed); +void real_renderer_flush_queues(void); +void real_renderer_set_interlace(int enable, int is_odd); +void real_renderer_set_config(const struct rearmed_cbs *config); +void real_renderer_notify_res_change(void); + +#ifdef __cplusplus +} +#endif + +#endif /* __GPULIB_THREAD_H__ */