diff --git a/configure.ac b/configure.ac index 321f2f4de6..384582fd38 100644 --- a/configure.ac +++ b/configure.ac @@ -33,6 +33,7 @@ AC_CHECK_LIB([z], [compress]) AC_C_CONST AC_C_BIGENDIAN +AC_ARG_WITH(gprofflags, [ --with-gprofflags build with gprof flag included], [], [--with_gprofflags=no]) AC_ARG_WITH(pthread, [ --with-pthread build with pthread support], [], [with_pthread=yes]) # Checks for library functions. @@ -73,6 +74,9 @@ AC_SUBST(PSPSDK_LIBDIR) # CFLAGS and CXXFLAGS used to build pspsdk libraries. PSPSDK_CFLAGS="$CFLAGS -mno-gpopt -Wall -Werror -D_PSP_FW_VERSION=600" +if test "$with_gprofflags" = yes ; then + PSPSDK_CFLAGS="$PSPSDK_CFLAGS -pg -g" +fi PSPSDK_CXXFLAGS="$PSPSDK_CFLAGS -fno-exceptions -fno-rtti" if test "$with_pthread" = no ; then PSPSDK_CFLAGS="$PSPSDK_CFLAGS -DPSP_WITHOUT_PTHREAD" diff --git a/src/prof/Makefile.am b/src/prof/Makefile.am index 1da619a3a2..93bcc1d6d5 100644 --- a/src/prof/Makefile.am +++ b/src/prof/Makefile.am @@ -11,7 +11,7 @@ CFLAGS = @PSPSDK_CFLAGS@ -std=gnu99 -Wall -Wmissing-prototypes CCASFLAGS = $(CFLAGS) -I$(top_srcdir)/src/base -I$(top_srcdir)/src/kernel libpspprofincludedir = @PSPSDK_INCLUDEDIR@ -libpspprofinclude_HEADERS = +libpspprofinclude_HEADERS = pspprof.h lib_LIBRARIES = libpspprof.a libpspprof_a_SOURCES = prof.c mcount.s diff --git a/src/prof/mcount.s b/src/prof/mcount.s index 3cb207d328..620c5571d3 100644 --- a/src/prof/mcount.s +++ b/src/prof/mcount.s @@ -17,30 +17,12 @@ _mcount: sd $6, 32($29) sd $7, 40($29) -# Make sure we're not recursively called when compiling __mcount() -# With -pg - la $4, _busy - lw $5, 0($4) - bnez $5, done - nop - -# Mark busy - li $5, 1 - sw $5, 0($4) - # Call internal C handler move $4, $1 move $5, $31 jal __mcount nop -# Unmark busy - la $4, _busy - li $5, 0 - sw $5, 0($4) - - done: - # Restore registers ld $31, 0($29) ld $1, 8($29) @@ -52,9 +34,6 @@ _mcount: j $31 move $31, $1 # restore caller's ra -_busy: - .space 4 - .end _mcount .set reorder diff --git a/src/prof/prof.c b/src/prof/prof.c index f4d08410e6..f389a72e7b 100644 --- a/src/prof/prof.c +++ b/src/prof/prof.c @@ -13,6 +13,8 @@ #include #include +#include + #define GMON_PROF_ON 0 #define GMON_PROF_BUSY 1 #define GMON_PROF_ERROR 2 @@ -78,8 +80,11 @@ extern int _ftext; extern int _etext; /* forward declarations */ +__attribute__((__no_instrument_function__, __no_profile_instrument_function__)) void __gprof_cleanup(void); +__attribute__((__no_instrument_function__, __no_profile_instrument_function__)) void __mcount(unsigned int, unsigned int); +__attribute__((__no_instrument_function__, __no_profile_instrument_function__)) static SceUInt timer_handler(SceUID uid, SceKernelSysClock *c1, SceKernelSysClock *c2, void *common); /** Initializes pg library @@ -89,6 +94,7 @@ static SceUInt timer_handler(SceUID uid, SceKernelSysClock *c1, SceKernelSysCloc for sampling statistics. Note that this also installs a timer that runs at 1000 hert. */ +__attribute__((__no_instrument_function__, __no_profile_instrument_function__)) static void initialize() { initialized = 1; @@ -122,6 +128,15 @@ static void initialize() memset((void *)gp.samples, '\0', gp.nsamples * (sizeof(unsigned int ))); gp.timer = sceKernelCreateVTimer("gprof timer", NULL); + if (gp.timer < 0) + { + free(gp.arcs); + free(gp.samples); + gp.arcs = 0; + gp.samples = 0; + gp.state = GMON_PROF_ERROR; + return; + } SceKernelSysClock sc; sc.hi = 0; @@ -151,12 +166,17 @@ static void initialize() } } -/** Writes gmon.out dump file and stops profiling +__attribute__((__no_instrument_function__, __no_profile_instrument_function__)) +void gprof_start(void) { + // There is already a profiling session running, let's stop it and ignore the result + if (gp.state == GMON_PROF_ON) { + gprof_stop(NULL, 0); + } + initialize(); +} - Called from atexit() handler; will dump out a host:gmon.out file - with all collected information. -*/ -void __gprof_cleanup() +__attribute__((__no_instrument_function__, __no_profile_instrument_function__)) +void gprof_stop(const char* filename, int should_dump) { FILE *fp; int i; @@ -171,29 +191,47 @@ void __gprof_cleanup() /* disable profiling before we make plenty of libc calls */ gp.state = GMON_PROF_OFF; + // Delete timer sceKernelStopVTimer(gp.timer); - - fp = fopen("gmon.out", "wb"); - hdr.lpc = gp.lowpc; - hdr.hpc = gp.highpc; - hdr.ncnt = sizeof(hdr) + (sizeof(unsigned int) * gp.nsamples); - hdr.version = GMONVERSION; - hdr.profrate = SAMPLE_FREQ; - hdr.resv[0] = 0; - hdr.resv[1] = 0; - hdr.resv[2] = 0; - fwrite(&hdr, 1, sizeof(hdr), fp); - fwrite(gp.samples, gp.nsamples, sizeof(unsigned int), fp); - - for (i=0; i 0) + sceKernelDeleteVTimer(gp.timer); + + if (should_dump) { + fp = fopen(filename, "wb"); + hdr.lpc = gp.lowpc; + hdr.hpc = gp.highpc; + hdr.ncnt = sizeof(hdr) + (sizeof(unsigned int) * gp.nsamples); + hdr.version = GMONVERSION; + hdr.profrate = SAMPLE_FREQ; + hdr.resv[0] = 0; + hdr.resv[1] = 0; + hdr.resv[2] = 0; + fwrite(&hdr, 1, sizeof(hdr), fp); + fwrite(gp.samples, gp.nsamples, sizeof(unsigned int), fp); + + for (i=0; i 0) + { + fwrite(gp.arcs + i, sizeof(struct rawarc), 1, fp); + } } + + fclose(fp); } - fclose(fp); + // Free memory + free(gp.arcs); + free(gp.samples); +} + +/** Writes gmon.out dump file and stops profiling + Called from atexit() handler; will dump out a gmon.out file + at cwd with all collected information. +*/ +__attribute__((__no_instrument_function__, __no_profile_instrument_function__)) +void __gprof_cleanup() +{ + gprof_stop("gmon.out", 1); } /** Internal C handler for _mcount() @@ -205,6 +243,7 @@ void __gprof_cleanup() beginning of each compiled routine, which eventually brings the control to here. */ +__attribute__((__no_instrument_function__, __no_profile_instrument_function__)) void __mcount(unsigned int frompc, unsigned int selfpc) { int e; @@ -238,6 +277,7 @@ void __mcount(unsigned int frompc, unsigned int selfpc) /** Internal timer handler */ +__attribute__((__no_instrument_function__, __no_profile_instrument_function__)) static SceUInt timer_handler(SceUID uid, SceKernelSysClock *requested, SceKernelSysClock *actual, void *common) { unsigned int frompc = gp.pc; diff --git a/src/prof/pspprof.h b/src/prof/pspprof.h new file mode 100644 index 0000000000..449ae636a2 --- /dev/null +++ b/src/prof/pspprof.h @@ -0,0 +1,41 @@ +/* + * PSP Software Development Kit - https://github.com/pspdev + * ----------------------------------------------------------------------- + * Licensed under the BSD license, see LICENSE in PSPSDK root for details. + * + * pspprof.h - Prototypes for the profiler library + * + * Copyright (c) 2006 Urchin + * + */ +#ifndef __PSPPROF_H__ +#define __PSPPROF_H__ + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Start the profiler. + * If the profiler is already running, this function stop previous one, + * and ignore the result. + * Finally, it initializes a new profiler session. + */ +__attribute__((__no_instrument_function__, __no_profile_instrument_function__)) +void gprof_start(void); +/** + * Stop the profiler. + * If the profiler is not running, this function does nothing. + * @param filename The name of the file to write the profiling data to. + * @param should_dump If 1, the profiling data will be written to the file. + * If 0, the profiling data will be discarded. + */ +__attribute__((__no_instrument_function__, __no_profile_instrument_function__)) +void gprof_stop(const char* filename, int should_dump); + +#ifdef __cplusplus +} +#endif + +#endif /* __PSPPROF_H__ */ diff --git a/src/samples/Makefile.am b/src/samples/Makefile.am index ba6f7dd924..2db7137b7d 100644 --- a/src/samples/Makefile.am +++ b/src/samples/Makefile.am @@ -12,6 +12,8 @@ SAMPLES = \ debug/debugkb \ debug/sio \ debug/gdb \ + gprof/basic \ + gprof/custom \ gu/beginobject \ gu/blend \ gu/blit \ diff --git a/src/samples/Makefile.samples b/src/samples/Makefile.samples index b7f181cccb..e8c88ffc3d 100644 --- a/src/samples/Makefile.samples +++ b/src/samples/Makefile.samples @@ -9,6 +9,8 @@ SAMPLES = \ debug/debugkb \ debug/sio \ debug/gdb \ + gprof/basic \ + gprof/custom \ gu/beginobject \ gu/blend \ gu/blit \ diff --git a/src/samples/gprof/basic/Makefile.sample b/src/samples/gprof/basic/Makefile.sample new file mode 100644 index 0000000000..54e8ff026b --- /dev/null +++ b/src/samples/gprof/basic/Makefile.sample @@ -0,0 +1,16 @@ +TARGET = gprofbasic +OBJS = main.o + +INCDIR = +CFLAGS = -O2 -Wall -pg -g +CXXFLAGS = $(CFLAGS) -fno-exceptions -fno-rtti +ASFLAGS = $(CFLAGS) + +LIBDIR = +LDFLAGS = -pg -g + +EXTRA_TARGETS = EBOOT.PBP +PSP_EBOOT_TITLE = GProf Basic Example + +PSPSDK=$(shell psp-config --pspsdk-path) +include $(PSPSDK)/lib/build.mak diff --git a/src/samples/gprof/basic/README.md b/src/samples/gprof/basic/README.md new file mode 100644 index 0000000000..cbe842ca33 --- /dev/null +++ b/src/samples/gprof/basic/README.md @@ -0,0 +1,66 @@ +Sample program to show how to use the `gprof` feature. + +The requiremnts are quite easy, just adding `-g -pg` flags to the `CFLAGS` and `LDFLAGS` is enough to make things to work out of the box. + +Firstly execute your program, then once program ends it will automatically generates a `gmon.out` file at CWD level. + +In order to inspect the content of the generated file you need to use the `psp-gprof` binary. + +For instance, following the next syntax: +``` +psp-gprof -b {binary.elf} gmon.out +``` + +like: +``` +psp-gprof -b gprofbasic.elf gmon.out +``` + + +It will show something like: +``` +Flat profile: + +Each sample counts as 0.001 seconds. + % cumulative self self total + time seconds seconds calls ms/call ms/call name + 95.98 0.17 0.17 104728 0.00 0.00 is_prime + 4.02 0.17 0.01 1 7.00 7.00 dummy_function + 0.00 0.17 0.00 1 0.00 174.00 main + 0.00 0.17 0.00 1 0.00 167.00 sum_of_square_roots + + + Call graph + + +granularity: each sample hit covers 2 byte(s) for 0.57% of 0.17 seconds + +index % time self children called name + 0.00 0.17 1/1 _main [2] +[1] 100.0 0.00 0.17 1 main [1] + 0.00 0.17 1/1 sum_of_square_roots [4] + 0.01 0.00 1/1 dummy_function [5] +----------------------------------------------- + +[2] 100.0 0.00 0.17 _main [2] + 0.00 0.17 1/1 main [1] +----------------------------------------------- + 0.17 0.00 104728/104728 sum_of_square_roots [4] +[3] 96.0 0.17 0.00 104728 is_prime [3] +----------------------------------------------- + 0.00 0.17 1/1 main [1] +[4] 96.0 0.00 0.17 1 sum_of_square_roots [4] + 0.17 0.00 104728/104728 is_prime [3] +----------------------------------------------- + 0.01 0.00 1/1 main [1] +[5] 4.0 0.01 0.00 1 dummy_function [5] +----------------------------------------------- + + +Index by function name + + [5] dummy_function [1] main + [3] is_prime [4] sum_of_square_roots +``` + +Cheers \ No newline at end of file diff --git a/src/samples/gprof/basic/main.c b/src/samples/gprof/basic/main.c new file mode 100644 index 0000000000..b2806df62d --- /dev/null +++ b/src/samples/gprof/basic/main.c @@ -0,0 +1,78 @@ +/* + * PSP Software Development Kit - https://github.com/pspdev + * ----------------------------------------------------------------------- + * Licensed under the BSD license, see LICENSE in PSPSDK root for details. + * + * main.c - A basic example for checking the GProf profiler. + * + * Copyright (c) 2024 Francisco Javier Trujillo Mata - fjtrujy@gmail.com + * + */ +#include +#include +#include +#include + +#include +#include + +PSP_MODULE_INFO("GProf Basic Example", 0, 1, 1); +PSP_MAIN_THREAD_ATTR(THREAD_ATTR_USER | THREAD_ATTR_VFPU); + +// Function to check if a number is prime +int is_prime(int num) +{ + if (num <= 1) + return 0; + if (num <= 3) + return 1; + if (num % 2 == 0 || num % 3 == 0) + return 0; + for (int i = 5; i * i <= num; i += 6) + { + if (num % i == 0 || num % (i + 2) == 0) + return 0; + } + return 1; +} + +// Function to compute the sum of square roots of the first N prime numbers +double sum_of_square_roots(int N) +{ + int count = 0; + int num = 2; + double sum = 0.0; + + while (count < N) + { + if (is_prime(num)) + { + sum += sqrt(num); + count++; + } + num++; + } + return sum; +} + +int dummy_function() +{ + int i; + for (i = 0; i < 10000; i++) + { + printf("."); + } + printf("\n"); + return 0; +} + +int main(int argc, char *argv[]) +{ + printf("Hello, world!\n"); + dummy_function(); + int N = 10000; // Large number of primes to compute + printf("Sum of square roots of the first %d prime numbers is %lf\n", N, sum_of_square_roots(N)); + printf("Goodbye, world!\n"); + + return 0; +} \ No newline at end of file diff --git a/src/samples/gprof/custom/Makefile.sample b/src/samples/gprof/custom/Makefile.sample new file mode 100644 index 0000000000..3338be0c1a --- /dev/null +++ b/src/samples/gprof/custom/Makefile.sample @@ -0,0 +1,16 @@ +TARGET = gprofcustom +OBJS = main.o + +INCDIR = +CFLAGS = -O2 -Wall -pg -g +CXXFLAGS = $(CFLAGS) -fno-exceptions -fno-rtti +ASFLAGS = $(CFLAGS) + +LIBDIR = +LDFLAGS = -pg -g + +EXTRA_TARGETS = EBOOT.PBP +PSP_EBOOT_TITLE = GProf Custom Example + +PSPSDK=$(shell psp-config --pspsdk-path) +include $(PSPSDK)/lib/build.mak diff --git a/src/samples/gprof/custom/README.md b/src/samples/gprof/custom/README.md new file mode 100644 index 0000000000..16aaa5a62e --- /dev/null +++ b/src/samples/gprof/custom/README.md @@ -0,0 +1,57 @@ +More advance example about how to use the `gprof` feature. + +The requiremnts are quite easy, just adding `-g -pg` flags to the `CFLAGS` and `LDFLAGS` is enough to make things to work out of the box. + +This example shows how to enable profiling just around some specific piece of code. +How `gprof` by default start profiling from the very beginning of the app we must discard that result, this is why we do `gprof_stop(NULL, false);`. +Then we just need to call `gprof_start();` whenever we want to start meassuring our piece of code and `gprof_stop("gmon_custom.out", true);` whenver we want to stop the profiling. + +Firstly execute your program, then once program ends it will automatically generates the output with the given names. + +In order to inspect the content of the generated file you need to use the `psp-gprof` binary. + +For instance, following the next syntax: +``` +psp-gprof -b {binary.elf} {gmon_custom.out} +``` +like: +``` +psp-gprof -b gprofcustom.elf gmon_custom.out +``` + +Output in this example: +``` +Flat profile: + +Each sample counts as 0.001 seconds. + % cumulative self self total + time seconds seconds calls s/call s/call name + 89.78 5.60 5.60 1 5.60 5.60 heavy_operation_3 + 10.22 6.24 0.64 1 0.64 0.64 heavy_operation_2 + + + Call graph + + +granularity: each sample hit covers 2 byte(s) for 0.02% of 6.24 seconds + +index % time self children called name + +[1] 100.0 0.00 6.24 main [1] + 5.60 0.00 1/1 heavy_operation_3 [2] + 0.64 0.00 1/1 heavy_operation_2 [3] +----------------------------------------------- + 5.60 0.00 1/1 main [1] +[2] 89.8 5.60 0.00 1 heavy_operation_3 [2] +----------------------------------------------- + 0.64 0.00 1/1 main [1] +[3] 10.2 0.64 0.00 1 heavy_operation_2 [3] +----------------------------------------------- + + +Index by function name + + [3] heavy_operation_2 [2] heavy_operation_3 +``` + +Cheers. \ No newline at end of file diff --git a/src/samples/gprof/custom/main.c b/src/samples/gprof/custom/main.c new file mode 100644 index 0000000000..7bd73799fb --- /dev/null +++ b/src/samples/gprof/custom/main.c @@ -0,0 +1,78 @@ +/* + * PSP Software Development Kit - https://github.com/pspdev + * ----------------------------------------------------------------------- + * Licensed under the BSD license, see LICENSE in PSPSDK root for details. + * + * main.c - A more advadced example for checking the GProf profiler. + * This example demonstrates how to start and stop the profiler multiple times + * + * Copyright (c) 2024 Francisco Javier Trujillo Mata - fjtrujy@gmail.com + * + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +PSP_MODULE_INFO("GProf Custom Example", 0, 1, 1); +PSP_MAIN_THREAD_ATTR(THREAD_ATTR_USER | THREAD_ATTR_VFPU); + +// Simulates a CPU-bound heavy operation by calculating large numbers of square roots +void heavy_operation_1() { + double result = 0.0; + for (long i = 1; i < 100000; i++) { + result += sqrt((double)i); + } + printf("Result of heavy_operation_1: %f\n", result); +} + +// Simulates a different CPU-bound heavy operation by calculating factorials +void heavy_operation_2() { + unsigned long long result = 1; + for (int i = 1; i < 20; i++) { + result = 1; + for (int j = 1; j <= i * 10000; j++) { + result *= j; + result %= 100000; // Prevent overflow by keeping the result manageable + } + } + printf("Result of heavy_operation_2: %llu\n", result); +} + +// Simulates a mixed heavy operation (CPU + IO-bound) by performing some calculations and sleeping +void heavy_operation_3() { + double result = 1.0; + for (long i = 1; i < 50000; i++) { + result *= log((double)i); + if (i % 10000 == 0) { + printf("Heavy operation 3, part %ld: sleeping for 1 second...\n", i / 1000000); + sleep(1); // Simulate some IO-bound activity + } + } + printf("Result of heavy_operation_3: %f\n", result); +} + +int main() { + // Initial heavy operation (this part will be ignored in profiling) + heavy_operation_1(); + + // Stop profiling (ignore the above operation in gprof output) + gprof_stop(NULL, false); + + // Start profiling again + gprof_start(); + + // Operations to be profiled + heavy_operation_2(); + heavy_operation_3(); + + // Stop profiling and save the output to a custom file + gprof_stop("gmon_custom.out", true); + + return 0; +} \ No newline at end of file