diff --git a/README.md b/README.md index 3e9999e0..8483453a 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,8 @@ hvm gen-cu # compile to standalone CUDA All modes produce the same output. The compiled modes require you to compile the generated file (with `gcc file.c -o file`, for example), but are faster to run. The CUDA versions have much higher peak performance but are less stable. As a -rule of thumb, `gen-c` should be used in production. +rule of thumb, `gen-c` should be used in production. +The environment variable `HVM_NUM_THREADS` sets the number of threads use when using `run-c` Language -------- diff --git a/src/hvm.c b/src/hvm.c index f60c000d..96d3d9ff 100644 --- a/src/hvm.c +++ b/src/hvm.c @@ -40,7 +40,7 @@ typedef _Atomic(u64) a64; #ifndef TPC_L2 #define TPC_L2 0 #endif -#define TPC (1ul << TPC_L2) +unsigned long TPC = (1ul << TPC_L2); // Types // ----- @@ -127,7 +127,7 @@ static const f32 I24_MIN = (f32) (i32) ((-1u) << 23); typedef struct Net { APair node_buf[G_NODE_LEN]; // global node buffer APort vars_buf[G_VARS_LEN]; // global vars buffer - APair rbag_buf[G_RBAG_LEN]; // global rbag buffer + APair* rbag_buf; // global rbag buffer a64 itrs; // interaction count a32 idle; // idle thread counter } Net; @@ -609,7 +609,7 @@ static inline u32 rbag_len(Net* net, TM* tm) { // TM // -- -static TM* tm[TPC]; +static TM** tm; TM* tm_new(u32 tid) { TM* tm = malloc(sizeof(TM)); @@ -624,6 +624,7 @@ TM* tm_new(u32 tid) { } void alloc_static_tms() { + tm = malloc(sizeof(TM*) * TPC); for (u32 t = 0; t < TPC; ++t) { tm[t] = tm_new(t); } @@ -633,6 +634,7 @@ void free_static_tms() { for (u32 t = 0; t < TPC; ++t) { free(tm[t]); } + free(tm); } // Net @@ -689,11 +691,17 @@ static inline Port vars_take(Net* net, u32 var) { // Initializes a net. static inline void net_init(Net* net) { + net->rbag_buf = malloc(sizeof(APair) * G_RBAG_LEN); // is that needed? atomic_store(&net->itrs, 0); atomic_store(&net->idle, 0); } +static inline void net_free(Net* net) { + free(net->rbag_buf); + free(net); +} + // Allocator // --------- @@ -1761,6 +1769,10 @@ void do_run_io(Net* net, Book* book, Port port); // ---- void hvm_c(u32* book_buffer) { + char* hvm_num_threads = getenv("HVM_NUM_THREADS"); + if (hvm_num_threads) { + TPC = strtoul(hvm_num_threads, NULL, 10); + } // Creates static TMs alloc_static_tms(); @@ -1803,7 +1815,7 @@ void hvm_c(u32* book_buffer) { // Frees everything free_static_tms(); - free(net); + net_free(net); free(book); }