Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic number of threads using HVM_NUM_THREADS. #358

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ hvm gen-cu <file.hvm> # compile to standalone CUDA
All modes produce the same output. The compiled modes require you to compile the
generated file (with `gcc file.c -o file`, for example), but are faster to run.
The CUDA versions have much higher peak performance but are less stable. As a
rule of thumb, `gen-c` should be used in production.
rule of thumb, `gen-c` should be used in production.
The environment variable `HVM_NUM_THREADS` sets the number of threads use when using `run-c`

Language
--------
Expand Down
20 changes: 16 additions & 4 deletions src/hvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ typedef _Atomic(u64) a64;
#ifndef TPC_L2
#define TPC_L2 0
#endif
#define TPC (1ul << TPC_L2)
unsigned long TPC = (1ul << TPC_L2);

// Types
// -----
Expand Down Expand Up @@ -127,7 +127,7 @@ static const f32 I24_MIN = (f32) (i32) ((-1u) << 23);
typedef struct Net {
APair node_buf[G_NODE_LEN]; // global node buffer
APort vars_buf[G_VARS_LEN]; // global vars buffer
APair rbag_buf[G_RBAG_LEN]; // global rbag buffer
APair* rbag_buf; // global rbag buffer
a64 itrs; // interaction count
a32 idle; // idle thread counter
} Net;
Expand Down Expand Up @@ -609,7 +609,7 @@ static inline u32 rbag_len(Net* net, TM* tm) {
// TM
// --

static TM* tm[TPC];
static TM** tm;

TM* tm_new(u32 tid) {
TM* tm = malloc(sizeof(TM));
Expand All @@ -624,6 +624,7 @@ TM* tm_new(u32 tid) {
}

void alloc_static_tms() {
tm = malloc(sizeof(TM*) * TPC);
for (u32 t = 0; t < TPC; ++t) {
tm[t] = tm_new(t);
}
Expand All @@ -633,6 +634,7 @@ void free_static_tms() {
for (u32 t = 0; t < TPC; ++t) {
free(tm[t]);
}
free(tm);
}

// Net
Expand Down Expand Up @@ -689,11 +691,17 @@ static inline Port vars_take(Net* net, u32 var) {

// Initializes a net.
static inline void net_init(Net* net) {
net->rbag_buf = malloc(sizeof(APair) * G_RBAG_LEN);
// is that needed?
atomic_store(&net->itrs, 0);
atomic_store(&net->idle, 0);
}

static inline void net_free(Net* net) {
free(net->rbag_buf);
free(net);
}

// Allocator
// ---------

Expand Down Expand Up @@ -1761,6 +1769,10 @@ void do_run_io(Net* net, Book* book, Port port);
// ----

void hvm_c(u32* book_buffer) {
char* hvm_num_threads = getenv("HVM_NUM_THREADS");
if (hvm_num_threads) {
TPC = strtoul(hvm_num_threads, NULL, 10);
}
// Creates static TMs
alloc_static_tms();

Expand Down Expand Up @@ -1803,7 +1815,7 @@ void hvm_c(u32* book_buffer) {

// Frees everything
free_static_tms();
free(net);
net_free(net);
free(book);
}

Expand Down