Skip to content

Commit

Permalink
Merge pull request #231 from fjtrujy/gprof_improvements
Browse files Browse the repository at this point in the history
Some `gprof` improvements
  • Loading branch information
fjtrujy authored Aug 24, 2024
2 parents 62c71d2 + 8c477e9 commit d1d5dcf
Show file tree
Hide file tree
Showing 13 changed files with 424 additions and 45 deletions.
4 changes: 4 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ AC_CHECK_LIB([z], [compress])
AC_C_CONST
AC_C_BIGENDIAN

AC_ARG_WITH(gprofflags, [ --with-gprofflags build with gprof flag included], [], [--with_gprofflags=no])
AC_ARG_WITH(pthread, [ --with-pthread build with pthread support], [], [with_pthread=yes])

# Checks for library functions.
Expand Down Expand Up @@ -73,6 +74,9 @@ AC_SUBST(PSPSDK_LIBDIR)

# CFLAGS and CXXFLAGS used to build pspsdk libraries.
PSPSDK_CFLAGS="$CFLAGS -mno-gpopt -Wall -Werror -D_PSP_FW_VERSION=600"
if test "$with_gprofflags" = yes ; then
PSPSDK_CFLAGS="$PSPSDK_CFLAGS -pg -g"
fi
PSPSDK_CXXFLAGS="$PSPSDK_CFLAGS -fno-exceptions -fno-rtti"
if test "$with_pthread" = no ; then
PSPSDK_CFLAGS="$PSPSDK_CFLAGS -DPSP_WITHOUT_PTHREAD"
Expand Down
2 changes: 1 addition & 1 deletion src/prof/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ CFLAGS = @PSPSDK_CFLAGS@ -std=gnu99 -Wall -Wmissing-prototypes
CCASFLAGS = $(CFLAGS) -I$(top_srcdir)/src/base -I$(top_srcdir)/src/kernel

libpspprofincludedir = @PSPSDK_INCLUDEDIR@
libpspprofinclude_HEADERS =
libpspprofinclude_HEADERS = pspprof.h

lib_LIBRARIES = libpspprof.a
libpspprof_a_SOURCES = prof.c mcount.s
Expand Down
21 changes: 0 additions & 21 deletions src/prof/mcount.s
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,12 @@ _mcount:
sd $6, 32($29)
sd $7, 40($29)

# Make sure we're not recursively called when compiling __mcount()
# With -pg
la $4, _busy
lw $5, 0($4)
bnez $5, done
nop

# Mark busy
li $5, 1
sw $5, 0($4)

# Call internal C handler
move $4, $1
move $5, $31
jal __mcount
nop

# Unmark busy
la $4, _busy
li $5, 0
sw $5, 0($4)

done:

# Restore registers
ld $31, 0($29)
ld $1, 8($29)
Expand All @@ -52,9 +34,6 @@ _mcount:
j $31
move $31, $1 # restore caller's ra

_busy:
.space 4

.end _mcount

.set reorder
Expand Down
86 changes: 63 additions & 23 deletions src/prof/prof.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include <stdio.h>
#include <string.h>

#include <pspprof.h>

#define GMON_PROF_ON 0
#define GMON_PROF_BUSY 1
#define GMON_PROF_ERROR 2
Expand Down Expand Up @@ -78,8 +80,11 @@ extern int _ftext;
extern int _etext;

/* forward declarations */
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void __gprof_cleanup(void);
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void __mcount(unsigned int, unsigned int);
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
static SceUInt timer_handler(SceUID uid, SceKernelSysClock *c1, SceKernelSysClock *c2, void *common);

/** Initializes pg library
Expand All @@ -89,6 +94,7 @@ static SceUInt timer_handler(SceUID uid, SceKernelSysClock *c1, SceKernelSysCloc
for sampling statistics. Note that this also installs a timer that
runs at 1000 hert.
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
static void initialize()
{
initialized = 1;
Expand Down Expand Up @@ -122,6 +128,15 @@ static void initialize()
memset((void *)gp.samples, '\0', gp.nsamples * (sizeof(unsigned int )));

gp.timer = sceKernelCreateVTimer("gprof timer", NULL);
if (gp.timer < 0)
{
free(gp.arcs);
free(gp.samples);
gp.arcs = 0;
gp.samples = 0;
gp.state = GMON_PROF_ERROR;
return;
}

SceKernelSysClock sc;
sc.hi = 0;
Expand Down Expand Up @@ -151,12 +166,17 @@ static void initialize()
}
}

/** Writes gmon.out dump file and stops profiling
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void gprof_start(void) {
// There is already a profiling session running, let's stop it and ignore the result
if (gp.state == GMON_PROF_ON) {
gprof_stop(NULL, 0);
}
initialize();
}

Called from atexit() handler; will dump out a host:gmon.out file
with all collected information.
*/
void __gprof_cleanup()
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void gprof_stop(const char* filename, int should_dump)
{
FILE *fp;
int i;
Expand All @@ -171,29 +191,47 @@ void __gprof_cleanup()
/* disable profiling before we make plenty of libc calls */
gp.state = GMON_PROF_OFF;

// Delete timer
sceKernelStopVTimer(gp.timer);

fp = fopen("gmon.out", "wb");
hdr.lpc = gp.lowpc;
hdr.hpc = gp.highpc;
hdr.ncnt = sizeof(hdr) + (sizeof(unsigned int) * gp.nsamples);
hdr.version = GMONVERSION;
hdr.profrate = SAMPLE_FREQ;
hdr.resv[0] = 0;
hdr.resv[1] = 0;
hdr.resv[2] = 0;
fwrite(&hdr, 1, sizeof(hdr), fp);
fwrite(gp.samples, gp.nsamples, sizeof(unsigned int), fp);

for (i=0; i<gp.narcs; i++)
{
if (gp.arcs[i].count > 0)
sceKernelDeleteVTimer(gp.timer);

if (should_dump) {
fp = fopen(filename, "wb");
hdr.lpc = gp.lowpc;
hdr.hpc = gp.highpc;
hdr.ncnt = sizeof(hdr) + (sizeof(unsigned int) * gp.nsamples);
hdr.version = GMONVERSION;
hdr.profrate = SAMPLE_FREQ;
hdr.resv[0] = 0;
hdr.resv[1] = 0;
hdr.resv[2] = 0;
fwrite(&hdr, 1, sizeof(hdr), fp);
fwrite(gp.samples, gp.nsamples, sizeof(unsigned int), fp);

for (i=0; i<gp.narcs; i++)
{
fwrite(gp.arcs + i, sizeof(struct rawarc), 1, fp);
if (gp.arcs[i].count > 0)
{
fwrite(gp.arcs + i, sizeof(struct rawarc), 1, fp);
}
}

fclose(fp);
}

fclose(fp);
// Free memory
free(gp.arcs);
free(gp.samples);
}

/** Writes gmon.out dump file and stops profiling
Called from atexit() handler; will dump out a gmon.out file
at cwd with all collected information.
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void __gprof_cleanup()
{
gprof_stop("gmon.out", 1);
}

/** Internal C handler for _mcount()
Expand All @@ -205,6 +243,7 @@ void __gprof_cleanup()
beginning of each compiled routine, which eventually brings the
control to here.
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void __mcount(unsigned int frompc, unsigned int selfpc)
{
int e;
Expand Down Expand Up @@ -238,6 +277,7 @@ void __mcount(unsigned int frompc, unsigned int selfpc)

/** Internal timer handler
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
static SceUInt timer_handler(SceUID uid, SceKernelSysClock *requested, SceKernelSysClock *actual, void *common)
{
unsigned int frompc = gp.pc;
Expand Down
41 changes: 41 additions & 0 deletions src/prof/pspprof.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* PSP Software Development Kit - https://github.com/pspdev
* -----------------------------------------------------------------------
* Licensed under the BSD license, see LICENSE in PSPSDK root for details.
*
* pspprof.h - Prototypes for the profiler library
*
* Copyright (c) 2006 Urchin
*
*/
#ifndef __PSPPROF_H__
#define __PSPPROF_H__

#ifdef __cplusplus
extern "C" {
#endif


/**
* Start the profiler.
* If the profiler is already running, this function stop previous one,
* and ignore the result.
* Finally, it initializes a new profiler session.
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void gprof_start(void);
/**
* Stop the profiler.
* If the profiler is not running, this function does nothing.
* @param filename The name of the file to write the profiling data to.
* @param should_dump If 1, the profiling data will be written to the file.
* If 0, the profiling data will be discarded.
*/
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
void gprof_stop(const char* filename, int should_dump);

#ifdef __cplusplus
}
#endif

#endif /* __PSPPROF_H__ */
2 changes: 2 additions & 0 deletions src/samples/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ SAMPLES = \
debug/debugkb \
debug/sio \
debug/gdb \
gprof/basic \
gprof/custom \
gu/beginobject \
gu/blend \
gu/blit \
Expand Down
2 changes: 2 additions & 0 deletions src/samples/Makefile.samples
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ SAMPLES = \
debug/debugkb \
debug/sio \
debug/gdb \
gprof/basic \
gprof/custom \
gu/beginobject \
gu/blend \
gu/blit \
Expand Down
16 changes: 16 additions & 0 deletions src/samples/gprof/basic/Makefile.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
TARGET = gprofbasic
OBJS = main.o

INCDIR =
CFLAGS = -O2 -Wall -pg -g
CXXFLAGS = $(CFLAGS) -fno-exceptions -fno-rtti
ASFLAGS = $(CFLAGS)

LIBDIR =
LDFLAGS = -pg -g

EXTRA_TARGETS = EBOOT.PBP
PSP_EBOOT_TITLE = GProf Basic Example

PSPSDK=$(shell psp-config --pspsdk-path)
include $(PSPSDK)/lib/build.mak
66 changes: 66 additions & 0 deletions src/samples/gprof/basic/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
Sample program to show how to use the `gprof` feature.

The requiremnts are quite easy, just adding `-g -pg` flags to the `CFLAGS` and `LDFLAGS` is enough to make things to work out of the box.

Firstly execute your program, then once program ends it will automatically generates a `gmon.out` file at CWD level.

In order to inspect the content of the generated file you need to use the `psp-gprof` binary.

For instance, following the next syntax:
```
psp-gprof -b {binary.elf} gmon.out
```

like:
```
psp-gprof -b gprofbasic.elf gmon.out
```


It will show something like:
```
Flat profile:
Each sample counts as 0.001 seconds.
% cumulative self self total
time seconds seconds calls ms/call ms/call name
95.98 0.17 0.17 104728 0.00 0.00 is_prime
4.02 0.17 0.01 1 7.00 7.00 dummy_function
0.00 0.17 0.00 1 0.00 174.00 main
0.00 0.17 0.00 1 0.00 167.00 sum_of_square_roots
Call graph
granularity: each sample hit covers 2 byte(s) for 0.57% of 0.17 seconds
index % time self children called name
0.00 0.17 1/1 _main [2]
[1] 100.0 0.00 0.17 1 main [1]
0.00 0.17 1/1 sum_of_square_roots [4]
0.01 0.00 1/1 dummy_function [5]
-----------------------------------------------
<spontaneous>
[2] 100.0 0.00 0.17 _main [2]
0.00 0.17 1/1 main [1]
-----------------------------------------------
0.17 0.00 104728/104728 sum_of_square_roots [4]
[3] 96.0 0.17 0.00 104728 is_prime [3]
-----------------------------------------------
0.00 0.17 1/1 main [1]
[4] 96.0 0.00 0.17 1 sum_of_square_roots [4]
0.17 0.00 104728/104728 is_prime [3]
-----------------------------------------------
0.01 0.00 1/1 main [1]
[5] 4.0 0.01 0.00 1 dummy_function [5]
-----------------------------------------------
Index by function name
[5] dummy_function [1] main
[3] is_prime [4] sum_of_square_roots
```

Cheers
Loading

0 comments on commit d1d5dcf

Please sign in to comment.