-
Notifications
You must be signed in to change notification settings - Fork 7
Open
Labels
enhancementNew feature or requestNew feature or requesthelp wantedExtra attention is neededExtra attention is needed
Description
We should add support for setting the CPU affinity on Windows and macOS, which would make the -cpu/-core options more useful and should result in a performance improvement.
For Windows, when built without the hwloc library, this would need some Windows specific code and when built with hwloc, the existing Linux code would need to be refactored so that it runs for both Linux and Windows:
Lines 304 to 423 in b6dda52
| #elif defined(OS_TYPE_LINUX) && !defined(__MINGW32__) | |
| #if 0 | |
| /* | |
| // This is the affinity API tied to pthread library ... interestingly, it's less portable than the | |
| // Linux system-centric one below; e.g. GCC gives "error: unknown type name ‘cpuset_t’; did you mean ‘cpu_set_t’?" here: | |
| int i,errcode; | |
| cpuset_t *cset; | |
| pthread_t pth; | |
| cset = cpuset_create(); | |
| if (cset == NULL) { | |
| err(EXIT_FAILURE, "cpuset_create"); | |
| } | |
| i = my_id % pool->num_of_cores; // get cpu mask using sequential thread ID modulo #available cores | |
| cpuset_set((cpuid_t)i, cset); | |
| pth = pthread_self(); | |
| errcode = pthread_setaffinity_np(pth, cpuset_size(cset), cset); | |
| if (errcode) { | |
| perror("pthread_setaffinity_np"); | |
| } | |
| cpuset_destroy(cset); | |
| */ | |
| #else | |
| cpu_set_t cpu_set; | |
| int i,errcode; | |
| pid_t thread_id = syscall (__NR_gettid); | |
| #if THREAD_POOL_DEBUG | |
| printf("executing worker thread id %u, syscall_id = %u\n", my_id, thread_id); | |
| #endif | |
| i = my_id % pool->num_of_cores; | |
| i = mi64_ith_set_bit(CORE_SET, i+1, MAX_CORES>>6); // Remember, [i]th-bit index in arglist is *unit* offset, i.e. must be in [1,MAX_CORES] | |
| if(i < 0) { | |
| fprintf(stderr,"Affinity CORE_SET does not have a [%u]th set bit!",my_id % pool->num_of_cores); | |
| ASSERT(0, "Aborting."); | |
| } | |
| #if INCLUDE_HWLOC | |
| if(HWLOC_AFFINITY) { // Global, declared in Mdata.h, defined in Mlucas.c, set in util.c::host_init() | |
| hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); | |
| hwloc_obj_t obj = hwloc_get_obj_by_type(hw_topology, HWLOC_OBJ_PU, i); | |
| if (obj) { | |
| hwloc_bitmap_or(cpuset, cpuset, obj->cpuset); | |
| } else { | |
| snprintf(cbuf,STR_MAX_LEN*2,"[hwloc] Error: HWLOC_OBJ_PU[%u] not found.\n",i); | |
| fprintf(stderr,"%s",cbuf); | |
| } | |
| // Set affinity to specified logical CPUs: | |
| if (hwloc_set_cpubind(hw_topology, cpuset, HWLOC_CPUBIND_THREAD)) { | |
| int error = errno; | |
| hwloc_bitmap_snprintf (str, sizeof (str), cpuset); | |
| snprintf(cbuf,STR_MAX_LEN*2,"[hwloc] Warning: Unable to set affinity to cpuset %s: %s; leaving up to OS to manage thread/core binding.\n",str,strerror(error)); | |
| fprintf(stderr,"%s",cbuf); | |
| #if THREAD_POOL_DEBUG | |
| } else { | |
| printf("[hwloc] tid = %d: HWLOC_OBJ_PU[%u], lidx %u, pidx %u: setaffinity[%d] to cpuset %s\n",my_id,i,obj->logical_index,obj->os_index,str); | |
| #endif | |
| } | |
| hwloc_bitmap_free(cpuset); | |
| } // HWLOC_AFFINITY = True? | |
| #else // INCLUDE_HWLOC = False: | |
| // get cpu mask using sequential thread ID modulo #available cores in runtime-specified affinity set: | |
| CPU_ZERO (&cpu_set); | |
| CPU_SET(i, &cpu_set); | |
| errcode = sched_setaffinity(thread_id, sizeof(cpu_set), &cpu_set); | |
| #if THREAD_POOL_DEBUG | |
| printf("syscall_id = %u, tid = %d, setaffinity[%d] = %d, ISSET[%d] = %d\n", thread_id,my_id,i,errcode,i,CPU_ISSET(i, &cpu_set)); | |
| #endif | |
| if (errcode) { | |
| perror("sched_setaffinity"); | |
| fprintf(stderr,"INFO: Your run should be OK, but leaving up to OS to manage thread/core binding.\n"); | |
| } | |
| #endif | |
| #endif // INCLUDE_HWLOC? | |
| #elif defined(OS_TYPE_MACOSX) | |
| thread_t thr = mach_thread_self(); | |
| thread_extended_policy_data_t epolicy; | |
| epolicy.timeshare = FALSE; | |
| kern_return_t ret = thread_policy_set( | |
| thr, THREAD_EXTENDED_POLICY, | |
| (thread_policy_t) &epolicy, THREAD_EXTENDED_POLICY_COUNT); | |
| if (ret != KERN_SUCCESS) { | |
| printf("thread_policy_set returned %d", ret); | |
| exit(-1); | |
| } | |
| thread_affinity_policy_data_t apolicy; | |
| int i = my_id % pool->num_of_cores; // get cpu mask using sequential thread ID modulo #available cores | |
| i = mi64_ith_set_bit(CORE_SET, i+1, MAX_CORES>>6); // Remember, [i]th-bit index in arglist is *unit* offset, i.e. must be in [1,MAX_CORES] | |
| if(i < 0) { | |
| fprintf(stderr,"Affinity CORE_SET does not have a [%u]th set bit!",my_id % pool->num_of_cores); | |
| ASSERT(0, "Aborting."); | |
| } | |
| apolicy.affinity_tag = i; // set affinity tag | |
| #if THREAD_POOL_DEBUG | |
| printf("Setting CPU = %d affinity of worker thread id %u, mach_id = %u\n", i, my_id, thr); | |
| #endif | |
| ret = thread_policy_set( | |
| thr, THREAD_EXTENDED_POLICY, | |
| (thread_policy_t) &apolicy, THREAD_EXTENDED_POLICY_COUNT); | |
| if (ret != KERN_SUCCESS) { | |
| printf("thread_policy_set returned %d", ret); | |
| exit(-1); | |
| } | |
| #else | |
| printf("executing worker thread id %u, #cores = %u\n", my_id, pool->num_of_cores); | |
| #endif |
For macOS, it does not support setting the CPU affinity directly, but @Artoria2e5 suggested a workaround in this forum thread. See their PR for Prime95: primesearch/Prime95#2
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or requesthelp wantedExtra attention is neededExtra attention is needed