From 4cc1084ebb456b722777e806ead2d4a21d883dc6 Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Mon, 27 Oct 2025 14:34:49 +0100 Subject: [PATCH 1/2] Add support for free-threaded Python (PEP 703). The significant change here is the use of thread local instead of a volatile global for the switching_thread_state global (which is otherwise protected by the GIL). There's some overhead to using a thread local, so only do this in the free-threaded build. The only other two bits of shared mutable data are `G_TOTAL_MAIN_GREENLETS and ThreadState::clocks_used_during_gc. Modify the latter to use a std::atomic with relaxed memory order, which should be good enough, and performance probably matters for those updates. For G_MAIN_TOTAL_GREENLETS, switch to a std::atomic without changing the inc/dec operations (which means they use sequential consistency), because they're rare enough that performance doesn't really matter. Also mark the main extension modules and the two test extensions as supporting free-threading (without switching to multi-phase init). The GIL will still temporarily be enabled during module import, but that probably won't matter (modules are usually imported before starting threads). If it does, switching to multi-phase init is always an option. The existing test suite cover threads extensively enough that no extra tests are necessary. There is an intermittent failure (<0.2% of runs) that shows up when running the testsuite in a tight loop, but this happens in regular Python builds (and before 3.14) too. ThreadSanitizer can't be used on greenlet, from what I can tell because of how it gets confused by the stack switching. This is the case for GILful Python builds as well. --- src/greenlet/PyModule.cpp | 8 ++--- src/greenlet/TMainGreenlet.cpp | 13 ++++++-- src/greenlet/TThreadState.hpp | 39 +++++++++++++++++++--- src/greenlet/greenlet.cpp | 3 ++ src/greenlet/greenlet_slp_switch.hpp | 4 +++ src/greenlet/tests/_test_extension.c | 3 ++ src/greenlet/tests/_test_extension_cpp.cpp | 3 ++ 7 files changed, 62 insertions(+), 11 deletions(-) diff --git a/src/greenlet/PyModule.cpp b/src/greenlet/PyModule.cpp index 6adcb5c3..1a320389 100644 --- a/src/greenlet/PyModule.cpp +++ b/src/greenlet/PyModule.cpp @@ -144,7 +144,7 @@ PyDoc_STRVAR(mod_get_clocks_used_doing_optional_cleanup_doc, static PyObject* mod_get_clocks_used_doing_optional_cleanup(PyObject* UNUSED(module)) { - std::clock_t& clocks = ThreadState::clocks_used_doing_gc(); + std::clock_t clocks = ThreadState::clocks_used_doing_gc(); if (clocks == std::clock_t(-1)) { Py_RETURN_NONE; @@ -168,15 +168,15 @@ mod_enable_optional_cleanup(PyObject* UNUSED(module), PyObject* flag) return nullptr; } - std::clock_t& clocks = ThreadState::clocks_used_doing_gc(); + std::clock_t clocks = ThreadState::clocks_used_doing_gc(); if (is_true) { // If we already have a value, we don't want to lose it. if (clocks == std::clock_t(-1)) { - clocks = 0; + ThreadState::set_clocks_used_doing_gc(0); } } else { - clocks = std::clock_t(-1); + ThreadState::set_clocks_used_doing_gc(std::clock_t(-1)); } Py_RETURN_NONE; } diff --git a/src/greenlet/TMainGreenlet.cpp b/src/greenlet/TMainGreenlet.cpp index a2a9cfe4..ee014812 100644 --- a/src/greenlet/TMainGreenlet.cpp +++ b/src/greenlet/TMainGreenlet.cpp @@ -14,11 +14,18 @@ #include "TGreenlet.hpp" +#ifdef Py_GIL_DISABLED +#include +#endif - -// Protected by the GIL. Incremented when we create a main greenlet, -// in a new thread, decremented when it is destroyed. +// Incremented when we create a main greenlet, in a new thread, decremented +// when it is destroyed. +#ifdef Py_GIL_DISABLED +static std::atomic G_TOTAL_MAIN_GREENLETS(0); +#else +// Protected by the GIL. static Py_ssize_t G_TOTAL_MAIN_GREENLETS; +#endif namespace greenlet { greenlet::PythonAllocator MainGreenlet::allocator; diff --git a/src/greenlet/TThreadState.hpp b/src/greenlet/TThreadState.hpp index e4e6f6cb..f8bbfe22 100644 --- a/src/greenlet/TThreadState.hpp +++ b/src/greenlet/TThreadState.hpp @@ -3,6 +3,7 @@ #include #include +#include #include "greenlet_internal.hpp" #include "greenlet_refs.hpp" @@ -118,7 +119,11 @@ class ThreadState { void* exception_state; #endif +#ifdef Py_GIL_DISABLED + static std::atomic _clocks_used_doing_gc; +#else static std::clock_t _clocks_used_doing_gc; +#endif static ImmortalString get_referrers_name; static PythonAllocator allocator; @@ -160,7 +165,7 @@ class ThreadState { static void init() { ThreadState::get_referrers_name = "get_referrers"; - ThreadState::_clocks_used_doing_gc = 0; + ThreadState::set_clocks_used_doing_gc(0); } ThreadState() @@ -349,9 +354,31 @@ class ThreadState { /** * Set to std::clock_t(-1) to disable. */ - inline static std::clock_t& clocks_used_doing_gc() + inline static std::clock_t clocks_used_doing_gc() { +#ifdef Py_GIL_DISABLED + return ThreadState::_clocks_used_doing_gc.load(std::memory_order_relaxed); +#else return ThreadState::_clocks_used_doing_gc; +#endif + } + + inline static void set_clocks_used_doing_gc(std::clock_t value) + { +#ifdef Py_GIL_DISABLED + ThreadState::_clocks_used_doing_gc.store(value, std::memory_order_relaxed); +#else + ThreadState::_clocks_used_doing_gc = value; +#endif + } + + inline static void add_clocks_used_doing_gc(std::clock_t value) + { +#ifdef Py_GIL_DISABLED + ThreadState::_clocks_used_doing_gc.fetch_add(value, std::memory_order_relaxed); +#else + ThreadState::_clocks_used_doing_gc += value; +#endif } ~ThreadState() @@ -390,7 +417,7 @@ class ThreadState { PyGreenlet* old_main_greenlet = this->main_greenlet.borrow(); Py_ssize_t cnt = this->main_greenlet.REFCNT(); this->main_greenlet.CLEAR(); - if (ThreadState::_clocks_used_doing_gc != std::clock_t(-1) + if (ThreadState::clocks_used_doing_gc() != std::clock_t(-1) && cnt == 2 && Py_REFCNT(old_main_greenlet) == 1) { // Highly likely that the reference is somewhere on // the stack, not reachable by GC. Verify. @@ -444,7 +471,7 @@ class ThreadState { } } std::clock_t end = std::clock(); - ThreadState::_clocks_used_doing_gc += (end - begin); + ThreadState::add_clocks_used_doing_gc(end - begin); } } } @@ -486,7 +513,11 @@ class ThreadState { ImmortalString ThreadState::get_referrers_name(nullptr); PythonAllocator ThreadState::allocator; +#ifdef Py_GIL_DISABLED +std::atomic ThreadState::_clocks_used_doing_gc(0); +#else std::clock_t ThreadState::_clocks_used_doing_gc(0); +#endif diff --git a/src/greenlet/greenlet.cpp b/src/greenlet/greenlet.cpp index e8d92a00..7722bd00 100644 --- a/src/greenlet/greenlet.cpp +++ b/src/greenlet/greenlet.cpp @@ -291,6 +291,9 @@ greenlet_internal_mod_init() noexcept // << "\n\tPyGreenlet : " << sizeof(PyGreenlet) // << endl; +#ifdef Py_GIL_DISABLED + PyUnstable_Module_SetGIL(m.borrow(), Py_MOD_GIL_NOT_USED); +#endif return m.borrow(); // But really it's the main reference. } catch (const LockInitError& e) { diff --git a/src/greenlet/greenlet_slp_switch.hpp b/src/greenlet/greenlet_slp_switch.hpp index bd4b7ae1..bdffccae 100644 --- a/src/greenlet/greenlet_slp_switch.hpp +++ b/src/greenlet/greenlet_slp_switch.hpp @@ -36,7 +36,11 @@ // running this code, the thread isn't exiting. This also nets us a // 10-12% speed improvement. +#if Py_GIL_DISABLED +thread_local greenlet::Greenlet* switching_thread_state = nullptr; +#else static greenlet::Greenlet* volatile switching_thread_state = nullptr; +#endif extern "C" { diff --git a/src/greenlet/tests/_test_extension.c b/src/greenlet/tests/_test_extension.c index 05e81c03..5731ed23 100644 --- a/src/greenlet/tests/_test_extension.c +++ b/src/greenlet/tests/_test_extension.c @@ -227,5 +227,8 @@ PyInit__test_extension(void) } PyGreenlet_Import(); +#ifdef Py_GIL_DISABLED + PyUnstable_Module_SetGIL(module, Py_MOD_GIL_NOT_USED); +#endif return module; } diff --git a/src/greenlet/tests/_test_extension_cpp.cpp b/src/greenlet/tests/_test_extension_cpp.cpp index 5cbe6a76..b933bcaa 100644 --- a/src/greenlet/tests/_test_extension_cpp.cpp +++ b/src/greenlet/tests/_test_extension_cpp.cpp @@ -221,6 +221,9 @@ PyInit__test_extension_cpp(void) p_test_exception_throw_nonstd = test_exception_throw_nonstd; p_test_exception_throw_std = test_exception_throw_std; p_test_exception_switch_recurse = test_exception_switch_recurse; +#ifdef Py_GIL_DISABLED + PyUnstable_Module_SetGIL(module, Py_MOD_GIL_NOT_USED); +#endif return module; } From 1eca97a8c0d5d8d0fbe8c3680b1c5cf1663e440b Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Mon, 27 Oct 2025 15:50:21 +0100 Subject: [PATCH 2/2] Address reviewer comments. --- src/greenlet/PyModule.cpp | 2 +- tox.ini | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/greenlet/PyModule.cpp b/src/greenlet/PyModule.cpp index 1a320389..a999dc97 100644 --- a/src/greenlet/PyModule.cpp +++ b/src/greenlet/PyModule.cpp @@ -168,8 +168,8 @@ mod_enable_optional_cleanup(PyObject* UNUSED(module), PyObject* flag) return nullptr; } - std::clock_t clocks = ThreadState::clocks_used_doing_gc(); if (is_true) { + std::clock_t clocks = ThreadState::clocks_used_doing_gc(); // If we already have a value, we don't want to lose it. if (clocks == std::clock_t(-1)) { ThreadState::set_clocks_used_doing_gc(0); diff --git a/tox.ini b/tox.ini index 818ba057..7ec0bb00 100644 --- a/tox.ini +++ b/tox.ini @@ -1,11 +1,11 @@ [tox] envlist = - py{37,38,39,310,311,312,313,314},py{310,311,312,313,314}-ns,docs + py{37,38,39,310,311,312,313,314},py{310,311,312,313,314}-ns,docs,py314t,tsan-314#,tsan-314t [testenv] commands = python -c 'import greenlet._greenlet as G; assert G.GREENLET_USE_STANDARD_THREADING' - python -m unittest discover -v greenlet.tests + python -m unittest discover greenlet.tests sphinx-build -b doctest -d docs/_build/doctrees-{envname} docs docs/_build/doctest-{envname} sitepackages = False extras = @@ -21,3 +21,13 @@ commands = sphinx-build -b html -d docs/_build/doctrees docs docs/_build/html sphinx-build -b doctest -d docs/_build/doctrees docs docs/_build/doctest extras = docs + +[testenv:tsan-314t] +basepython = /usr/local/python-builds/tsan/bin/python3.14t +passenv = + TSAN_OPTIONS + +[testenv:tsan-314] +basepython = /usr/local/python-builds/default-tsan/bin/python3.14 +passenv = + TSAN_OPTIONS