From e6b82acabfd51ae2e66ee85ff408b520df02db81 Mon Sep 17 00:00:00 2001 From: Catherine Guelque Date: Tue, 28 May 2024 17:01:56 +0200 Subject: [PATCH] Dynamic number of threads using HVM_NUM_THREADS. --- README.md | 3 ++- src/hvm.c | 20 ++++++++++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 694aef88..6dd39c3e 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,8 @@ hvm gen-cu # compile to standalone CUDA All modes produce the same output. The compiled modes require you to compile the generated file (with `gcc file.c -o file`, for example), but are faster to run. The CUDA versions have much higher peak performance but are less stable. As a -rule of thumb, `gen-c` should be used in production. +rule of thumb, `gen-c` should be used in production. +The environment variable `HVM_NUM_THREADS` sets the number of threads use when using `run-c` Language -------- diff --git a/src/hvm.c b/src/hvm.c index a1bb0217..8f2da99b 100644 --- a/src/hvm.c +++ b/src/hvm.c @@ -34,7 +34,7 @@ typedef _Atomic(u64) a64; #ifndef TPC_L2 #define TPC_L2 0 #endif -#define TPC (1ul << TPC_L2) +unsigned long TPC = (1ul << TPC_L2); // Types // ----- @@ -117,7 +117,7 @@ typedef u32 Numb; // Numb ::= 29-bit (rounded up to u32) typedef struct Net { APair node_buf[G_NODE_LEN]; // global node buffer APort vars_buf[G_VARS_LEN]; // global vars buffer - APair rbag_buf[G_RBAG_LEN]; // global rbag buffer + APair* rbag_buf; // global rbag buffer a64 itrs; // interaction count a32 idle; // idle thread counter } Net; @@ -565,7 +565,7 @@ static inline u32 rbag_len(Net* net, TM* tm) { // TM // -- -static TM* tm[TPC]; +static TM** tm; TM* tm_new(u32 tid) { TM* tm = malloc(sizeof(TM)); @@ -580,6 +580,7 @@ TM* tm_new(u32 tid) { } void alloc_static_tms() { + tm = malloc(sizeof(TM*) * TPC); for (u32 t = 0; t < TPC; ++t) { tm[t] = tm_new(t); } @@ -589,6 +590,7 @@ void free_static_tms() { for (u32 t = 0; t < TPC; ++t) { free(tm[t]); } + free(tm); } // Net @@ -650,11 +652,17 @@ static inline Port vars_take(Net* net, u32 var) { // Initializes a net. static inline void net_init(Net* net) { + net->rbag_buf = malloc(sizeof(APair) * G_RBAG_LEN); // is that needed? atomic_store(&net->itrs, 0); atomic_store(&net->idle, 0); } +static inline void net_free(Net* net) { + free(net->rbag_buf); + free(net); +} + // Allocator // --------- @@ -1931,6 +1939,10 @@ void pretty_print_port(Net* net, Book* book, Port port) { // ---- void hvm_c(u32* book_buffer) { + char* hvm_num_threads = getenv("HVM_NUM_THREADS"); + if (hvm_num_threads) { + TPC = strtoul(hvm_num_threads, NULL, 10); + } // Creates static TMs alloc_static_tms(); @@ -1971,7 +1983,7 @@ void hvm_c(u32* book_buffer) { // Frees everything free_static_tms(); - free(net); + net_free(net); free(book); }