Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

miscellaneous fixes #71

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions validation_tests/papi-rocm/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
PAPI_ROCM_ROOT ?= $(ROCM_ROOT) #/path/to/rocm/

CC = $(PAPI_ROCM_ROOT)/bin/hipcc
CXX = $(PAPI_ROCM_ROOT)/bin/hipcc
CXXFLAGS += -g -O2 -fopenmp
CC = $(HIP_ROOT)/bin/hipcc
CXX = $(HIP_ROOT)/bin/hipcc
CXXFLAGS += -g -O2
CPPFLAGS += -I$(PAPI_ROCM_ROOT)/include -I$(PAPI_ROOT)/include
LDFLAGS += -L$(PAPI_ROOT)/lib -lpapi -fopenmp
LDFLAGS += -L$(PAPI_ROOT)/lib -lpapi

ALL: single_monitor multi_monitor overflow

Expand Down
2 changes: 1 addition & 1 deletion validation_tests/papi-rocm/compile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
set -e
set -x

make PAPIROOT=$PAPI_ROOT
make PAPIROOT=$PAPI_ROOT
230 changes: 130 additions & 100 deletions validation_tests/papi-rocm/multi_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,119 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <omp.h>
#include <pthread.h>
#include "papi.h"
#include "matmul.h"

#define NUM_EVENTS 2
const char *events[NUM_EVENTS] = {
"rocm:::SQ_WAVES",
"rocm:::SQ_WAVES_RESTORED",
};

typedef struct {
int num_thread;
} thread_arg_t;

void *run(void *arg)
{
int eventset = PAPI_NULL;
int papi_errno = PAPI_create_eventset(&eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_create_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}

int thread_num = ((thread_arg_t *) arg)->num_thread;
for (int j = 0; j < NUM_EVENTS; ++j) {
char named_event[PAPI_MAX_STR_LEN] = { 0 };
sprintf(named_event, "%s:device=%d", events[j], thread_num);
papi_errno = PAPI_add_named_event(eventset, (const char *) named_event);
if (papi_errno != PAPI_OK && papi_errno != PAPI_ENOEVNT) {
fprintf(stderr, "ERROR: PAPI_add_named_event: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}
}

papi_errno = PAPI_start(eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_start: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}

hipError_t hip_errno = hipSetDevice(thread_num);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipSetDevice: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

hipStream_t stream;
hip_errno = hipStreamCreate(&stream);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipStreamCreate: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

void *handle;
int matmul_errno;
matmul_errno = matmul_init(&handle);
if (matmul_errno != MATMUL_SUCCESS) {
fprintf(stderr, "ERROR: matmul_init: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

matmul_errno = matmul_run(handle, stream);
if (matmul_errno != MATMUL_SUCCESS) {
fprintf(stderr, "ERROR: matmul_run: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

hip_errno = hipStreamSynchronize(stream);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipStreamSynchronize: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

hip_errno = hipStreamDestroy(stream);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipStreamDestroy: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

matmul_errno = matmul_finalize(&handle);
if (matmul_errno != MATMUL_SUCCESS) {
fprintf(stderr, "ERROR: matmul_finalize: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

long long counters[NUM_EVENTS] = { 0 };
papi_errno = PAPI_stop(eventset, counters);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_stop: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}

for (int i = 0; i < NUM_EVENTS; ++i) {
fprintf(stdout, "[tid:%d] %s:device=%d : %lld\n",
thread_num, events[i], thread_num,
counters[i]);
}

papi_errno = PAPI_cleanup_eventset(eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_cleanup_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}

papi_errno = PAPI_destroy_eventset(&eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_destroy_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}

pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
int papi_errno;
Expand All @@ -30,7 +139,7 @@ int main(int argc, char *argv[])
exit(EXIT_FAILURE);
}

papi_errno = PAPI_thread_init((unsigned long (*)(void)) omp_get_thread_num);
papi_errno = PAPI_thread_init((unsigned long (*)(void)) pthread_self);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_thread_init: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
Expand All @@ -44,112 +153,33 @@ int main(int argc, char *argv[])
}

num_threads = (num_threads < num_devices) ? num_threads : num_devices;
omp_set_num_threads(num_threads);
fprintf(stdout, "Run rocm test with %d threads\n", num_threads);

#define NUM_EVENTS 2
const char *events[NUM_EVENTS] = {
"rocm:::SQ_WAVES",
"rocm:::SQ_WAVES_RESTORED",
};

#pragma omp parallel
{
int eventset = PAPI_NULL;
papi_errno = PAPI_create_eventset(&eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_create_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}

int thread_num = omp_get_thread_num();
for (int j = 0; j < NUM_EVENTS; ++j) {
char named_event[PAPI_MAX_STR_LEN] = { 0 };
sprintf(named_event, "%s:device=%d", events[j], thread_num);
papi_errno = PAPI_add_named_event(eventset, (const char *) named_event);
if (papi_errno != PAPI_OK && papi_errno != PAPI_ENOEVNT) {
fprintf(stderr, "ERROR: PAPI_add_named_event: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}
}

papi_errno = PAPI_start(eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_start: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}

hip_errno = hipSetDevice(thread_num);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipSetDevice: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

hipStream_t stream;
hip_errno = hipStreamCreate(&stream);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipStreamCreate: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

void *handle;
int matmul_errno;
matmul_errno = matmul_init(&handle);
if (matmul_errno != MATMUL_SUCCESS) {
fprintf(stderr, "ERROR: matmul_init: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

matmul_errno = matmul_run(handle, stream);
if (matmul_errno != MATMUL_SUCCESS) {
fprintf(stderr, "ERROR: matmul_run: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

hip_errno = hipStreamSynchronize(stream);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipStreamSynchronize: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

hip_errno = hipStreamDestroy(stream);
if (hip_errno != hipSuccess) {
fprintf(stderr, "ERROR: hipStreamDestroy: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}

matmul_errno = matmul_finalize(&handle);
if (matmul_errno != MATMUL_SUCCESS) {
fprintf(stderr, "ERROR: matmul_finalize: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
exit(EXIT_FAILURE);
}
pthread_t *thread = (pthread_t *)malloc(num_threads * sizeof(*thread));
if (thread == NULL) {
return EXIT_FAILURE;
}

long long counters[NUM_EVENTS] = { 0 };
papi_errno = PAPI_stop(eventset, counters);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_stop: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}
thread_arg_t *arg = (thread_arg_t *)malloc(num_threads * sizeof(*arg));
if (arg == NULL) {
return EXIT_FAILURE;
}

for (int i = 0; i < NUM_EVENTS; ++i) {
fprintf(stdout, "[tid:%d] %s:device=%d : %lld\n",
omp_get_thread_num(), events[i], thread_num,
counters[i]);
}
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

papi_errno = PAPI_cleanup_eventset(eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_cleanup_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}
for (int i = 0; i < num_threads; ++i) {
arg[i].num_thread = i;
pthread_create(&thread[i], &attr, run, &arg[i]);
}

papi_errno = PAPI_destroy_eventset(&eventset);
if (papi_errno != PAPI_OK) {
fprintf(stderr, "ERROR: PAPI_destroy_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
exit(EXIT_FAILURE);
}
for (int i = 0; i < num_threads; ++i) {
pthread_join(thread[i], NULL);
}

free(thread);
free(arg);
PAPI_shutdown();

return EXIT_SUCCESS;
Expand Down
17 changes: 10 additions & 7 deletions validation_tests/papi-rocm/run_rocm_tests.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
#!/bin/bash

HSA_ROOT=$PAPI_ROCM_ROOT
PAPI_ROOT="$(dirname $(dirname `which papi_component_avail`))"

echo "=== Run rocm test in sampling mode"
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./single_monitor
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./multi_monitor --threads=1
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./multi_monitor --threads=2
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./overflow
echo "single monitor" && ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$HSA_ROOT/lib:$LD_LIBRARY_PATH ./single_monitor && echo ""
echo "multi monitor" && ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$HSA_ROOT/lib:$LD_LIBRARY_PATH ./multi_monitor --threads=1 && echo ""
echo "multi monitor" && ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$HSA_ROOT/lib:$LD_LIBRARY_PATH ./multi_monitor --threads=2 && echo ""
echo "overflow" && ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$HSA_ROOT/lib:$LD_LIBRARY_PATH ./overflow && echo ""

echo "=== Run rocm test in intercept mode"
ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./single_monitor
ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./multi_monitor --threads=1
ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./multi_monitor --threads=2
echo "single monitor" && ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$HSA_ROOT/lib:$LD_LIBRARY_PATH ./single_monitor && echo ""
echo "multi monitor" && ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$HSA_ROOT/lib:$LD_LIBRARY_PATH ./multi_monitor --threads=1 && echo ""
echo "multi monitor" && ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$HSA_ROOT/lib:$LD_LIBRARY_PATH ./multi_monitor --threads=2 && echo ""
2 changes: 1 addition & 1 deletion validation_tests/papi-rocm/setup.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/bash
. ../../setup.sh
spackLoadUnique papi+rocm@6.0.0.2:
spackLoadUnique papi+rocm@master: