Skip to content

Add support for setting the CPU affinity on Windows and macOS #59

@tdulcet

Description

@tdulcet

We should add support for setting the CPU affinity on Windows and macOS, which would make the -cpu/-core options more useful and should result in a performance improvement.

For Windows, when built without the hwloc library, this would need some Windows specific code and when built with hwloc, the existing Linux code would need to be refactored so that it runs for both Linux and Windows:

Mlucas/src/threadpool.c

Lines 304 to 423 in b6dda52

#elif defined(OS_TYPE_LINUX) && !defined(__MINGW32__)
#if 0
/*
// This is the affinity API tied to pthread library ... interestingly, it's less portable than the
// Linux system-centric one below; e.g. GCC gives "error: unknown type name ‘cpuset_t’; did you mean ‘cpu_set_t’?" here:
int i,errcode;
cpuset_t *cset;
pthread_t pth;
cset = cpuset_create();
if (cset == NULL) {
err(EXIT_FAILURE, "cpuset_create");
}
i = my_id % pool->num_of_cores; // get cpu mask using sequential thread ID modulo #available cores
cpuset_set((cpuid_t)i, cset);
pth = pthread_self();
errcode = pthread_setaffinity_np(pth, cpuset_size(cset), cset);
if (errcode) {
perror("pthread_setaffinity_np");
}
cpuset_destroy(cset);
*/
#else
cpu_set_t cpu_set;
int i,errcode;
pid_t thread_id = syscall (__NR_gettid);
#if THREAD_POOL_DEBUG
printf("executing worker thread id %u, syscall_id = %u\n", my_id, thread_id);
#endif
i = my_id % pool->num_of_cores;
i = mi64_ith_set_bit(CORE_SET, i+1, MAX_CORES>>6); // Remember, [i]th-bit index in arglist is *unit* offset, i.e. must be in [1,MAX_CORES]
if(i < 0) {
fprintf(stderr,"Affinity CORE_SET does not have a [%u]th set bit!",my_id % pool->num_of_cores);
ASSERT(0, "Aborting.");
}
#if INCLUDE_HWLOC
if(HWLOC_AFFINITY) { // Global, declared in Mdata.h, defined in Mlucas.c, set in util.c::host_init()
hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
hwloc_obj_t obj = hwloc_get_obj_by_type(hw_topology, HWLOC_OBJ_PU, i);
if (obj) {
hwloc_bitmap_or(cpuset, cpuset, obj->cpuset);
} else {
snprintf(cbuf,STR_MAX_LEN*2,"[hwloc] Error: HWLOC_OBJ_PU[%u] not found.\n",i);
fprintf(stderr,"%s",cbuf);
}
// Set affinity to specified logical CPUs:
if (hwloc_set_cpubind(hw_topology, cpuset, HWLOC_CPUBIND_THREAD)) {
int error = errno;
hwloc_bitmap_snprintf (str, sizeof (str), cpuset);
snprintf(cbuf,STR_MAX_LEN*2,"[hwloc] Warning: Unable to set affinity to cpuset %s: %s; leaving up to OS to manage thread/core binding.\n",str,strerror(error));
fprintf(stderr,"%s",cbuf);
#if THREAD_POOL_DEBUG
} else {
printf("[hwloc] tid = %d: HWLOC_OBJ_PU[%u], lidx %u, pidx %u: setaffinity[%d] to cpuset %s\n",my_id,i,obj->logical_index,obj->os_index,str);
#endif
}
hwloc_bitmap_free(cpuset);
} // HWLOC_AFFINITY = True?
#else // INCLUDE_HWLOC = False:
// get cpu mask using sequential thread ID modulo #available cores in runtime-specified affinity set:
CPU_ZERO (&cpu_set);
CPU_SET(i, &cpu_set);
errcode = sched_setaffinity(thread_id, sizeof(cpu_set), &cpu_set);
#if THREAD_POOL_DEBUG
printf("syscall_id = %u, tid = %d, setaffinity[%d] = %d, ISSET[%d] = %d\n", thread_id,my_id,i,errcode,i,CPU_ISSET(i, &cpu_set));
#endif
if (errcode) {
perror("sched_setaffinity");
fprintf(stderr,"INFO: Your run should be OK, but leaving up to OS to manage thread/core binding.\n");
}
#endif
#endif // INCLUDE_HWLOC?
#elif defined(OS_TYPE_MACOSX)
thread_t thr = mach_thread_self();
thread_extended_policy_data_t epolicy;
epolicy.timeshare = FALSE;
kern_return_t ret = thread_policy_set(
thr, THREAD_EXTENDED_POLICY,
(thread_policy_t) &epolicy, THREAD_EXTENDED_POLICY_COUNT);
if (ret != KERN_SUCCESS) {
printf("thread_policy_set returned %d", ret);
exit(-1);
}
thread_affinity_policy_data_t apolicy;
int i = my_id % pool->num_of_cores; // get cpu mask using sequential thread ID modulo #available cores
i = mi64_ith_set_bit(CORE_SET, i+1, MAX_CORES>>6); // Remember, [i]th-bit index in arglist is *unit* offset, i.e. must be in [1,MAX_CORES]
if(i < 0) {
fprintf(stderr,"Affinity CORE_SET does not have a [%u]th set bit!",my_id % pool->num_of_cores);
ASSERT(0, "Aborting.");
}
apolicy.affinity_tag = i; // set affinity tag
#if THREAD_POOL_DEBUG
printf("Setting CPU = %d affinity of worker thread id %u, mach_id = %u\n", i, my_id, thr);
#endif
ret = thread_policy_set(
thr, THREAD_EXTENDED_POLICY,
(thread_policy_t) &apolicy, THREAD_EXTENDED_POLICY_COUNT);
if (ret != KERN_SUCCESS) {
printf("thread_policy_set returned %d", ret);
exit(-1);
}
#else
printf("executing worker thread id %u, #cores = %u\n", my_id, pool->num_of_cores);
#endif

For macOS, it does not support setting the CPU affinity directly, but @Artoria2e5 suggested a workaround in this forum thread. See their PR for Prime95: primesearch/Prime95#2

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or requesthelp wantedExtra attention is needed

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions