From 0e2cdd313ba5c67c5e2e21d993399b890e687c63 Mon Sep 17 00:00:00 2001
From: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Date: Tue, 7 Oct 2025 13:29:18 +0100
Subject: [PATCH 1/5] gh-139436: Remove ``dist-pdf`` from the docs archives
 rebuild target (#139437)

---
 Doc/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/Makefile b/Doc/Makefile
index 84578c5c57f478..f6f4c721080c42 100644
--- a/Doc/Makefile
+++ b/Doc/Makefile
@@ -184,7 +184,7 @@ venv:
 	fi
 
 .PHONY: dist-no-html
-dist-no-html: dist-text dist-pdf dist-epub dist-texinfo
+dist-no-html: dist-text dist-epub dist-texinfo
 
 .PHONY: dist
 dist:

From 7094f09f547dc1a520c666dc6ce11b7b69a1b8da Mon Sep 17 00:00:00 2001
From: Mark Shannon <mark@hotpy.org>
Date: Tue, 7 Oct 2025 14:04:37 +0100
Subject: [PATCH 2/5] GH-139291: Fix C stack limits by factoring out finding
 hardware stack limits (GH-139294)

---
 Python/ceval.c | 60 +++++++++++++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 28 deletions(-)

diff --git a/Python/ceval.c b/Python/ceval.c
index 0ccaacaf3ed5b1..1b52128c858ecb 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -438,31 +438,26 @@ int pthread_attr_destroy(pthread_attr_t *a)
 
 #endif
 
-
-void
-_Py_InitializeRecursionLimits(PyThreadState *tstate)
+static void
+hardware_stack_limits(uintptr_t *top, uintptr_t *base)
 {
-    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
 #ifdef WIN32
     ULONG_PTR low, high;
     GetCurrentThreadStackLimits(&low, &high);
-    _tstate->c_stack_top = (uintptr_t)high;
+    *top = (uintptr_t)high;
     ULONG guarantee = 0;
     SetThreadStackGuarantee(&guarantee);
-    _tstate->c_stack_hard_limit = ((uintptr_t)low) + guarantee + _PyOS_STACK_MARGIN_BYTES;
-    _tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES;
+    *base = (uintptr_t)low + guarantee;
 #elif defined(__APPLE__)
     pthread_t this_thread = pthread_self();
     void *stack_addr = pthread_get_stackaddr_np(this_thread); // top of the stack
     size_t stack_size = pthread_get_stacksize_np(this_thread);
-    _tstate->c_stack_top = (uintptr_t)stack_addr;
-    _tstate->c_stack_hard_limit = _tstate->c_stack_top - stack_size;
-    _tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES;
+    *top = (uintptr_t)stack_addr;
+    *base = ((uintptr_t)stack_addr) - stack_size;
 #else
-    uintptr_t here_addr = _Py_get_machine_stack_pointer();
-/// XXX musl supports HAVE_PTHRED_GETATTR_NP, but the resulting stack size
-/// (on alpine at least) is much smaller than expected and imposes undue limits
-/// compared to the old stack size estimation.  (We assume musl is not glibc.)
+    /// XXX musl supports HAVE_PTHRED_GETATTR_NP, but the resulting stack size
+    /// (on alpine at least) is much smaller than expected and imposes undue limits
+    /// compared to the old stack size estimation.  (We assume musl is not glibc.)
 #  if defined(HAVE_PTHREAD_GETATTR_NP) && !defined(_AIX) && \
         !defined(__NetBSD__) && (defined(__GLIBC__) || !defined(__linux__))
     size_t stack_size, guard_size;
@@ -475,26 +470,35 @@ _Py_InitializeRecursionLimits(PyThreadState *tstate)
         err |= pthread_attr_destroy(&attr);
     }
     if (err == 0) {
-        uintptr_t base = ((uintptr_t)stack_addr) + guard_size;
-        _tstate->c_stack_top = base + stack_size;
-#ifdef _Py_THREAD_SANITIZER
-        // Thread sanitizer crashes if we use a bit more than half the stack.
-        _tstate->c_stack_soft_limit = base + (stack_size / 2);
-#else
-        _tstate->c_stack_soft_limit = base + _PyOS_STACK_MARGIN_BYTES * 2;
-#endif
-        _tstate->c_stack_hard_limit = base + _PyOS_STACK_MARGIN_BYTES;
-        assert(_tstate->c_stack_soft_limit < here_addr);
-        assert(here_addr < _tstate->c_stack_top);
+        *base = ((uintptr_t)stack_addr) + guard_size;
+        *top = (uintptr_t)stack_addr + stack_size;
         return;
     }
 #  endif
-    _tstate->c_stack_top = _Py_SIZE_ROUND_UP(here_addr, 4096);
-    _tstate->c_stack_soft_limit = _tstate->c_stack_top - Py_C_STACK_SIZE;
-    _tstate->c_stack_hard_limit = _tstate->c_stack_top - (Py_C_STACK_SIZE + _PyOS_STACK_MARGIN_BYTES);
+    uintptr_t here_addr = _Py_get_machine_stack_pointer();
+    uintptr_t top_addr = _Py_SIZE_ROUND_UP(here_addr, 4096);
+    *top = top_addr;
+    *base = top_addr - Py_C_STACK_SIZE;
 #endif
 }
 
+void
+_Py_InitializeRecursionLimits(PyThreadState *tstate)
+{
+    uintptr_t top;
+    uintptr_t base;
+    hardware_stack_limits(&top, &base);
+#ifdef _Py_THREAD_SANITIZER
+    // Thread sanitizer crashes if we use more than half the stack.
+    uintptr_t stacksize = top - base;
+    base += stacksize/2;
+#endif
+    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+    _tstate->c_stack_top = top;
+    _tstate->c_stack_hard_limit = base + _PyOS_STACK_MARGIN_BYTES;
+    _tstate->c_stack_soft_limit = base + _PyOS_STACK_MARGIN_BYTES * 2;
+}
+
 /* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall()
    if the recursion_depth reaches recursion_limit. */
 int

From 96c59a6e427fab32d0bca89b77febca8cba8aada Mon Sep 17 00:00:00 2001
From: danigm <daniel.garcia@suse.com>
Date: Tue, 7 Oct 2025 16:54:31 +0200
Subject: [PATCH 3/5] gh-138497: Support LLVM_VERSION configuration via env
 (#138498)

Co-authored-by: Savannah Ostrowski <savannah@python.org>
---
 ...-09-04-12-16-31.gh-issue-138497.Y_5YXh.rst |  4 ++
 Tools/jit/README.md                           |  2 +-
 Tools/jit/_llvm.py                            | 49 ++++++++++++-------
 Tools/jit/_targets.py                         | 19 +++++--
 Tools/jit/build.py                            |  3 ++
 configure                                     |  2 +-
 configure.ac                                  |  2 +-
 7 files changed, 56 insertions(+), 25 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Build/2025-09-04-12-16-31.gh-issue-138497.Y_5YXh.rst

diff --git a/Misc/NEWS.d/next/Build/2025-09-04-12-16-31.gh-issue-138497.Y_5YXh.rst b/Misc/NEWS.d/next/Build/2025-09-04-12-16-31.gh-issue-138497.Y_5YXh.rst
new file mode 100644
index 00000000000000..7eb0770996877a
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2025-09-04-12-16-31.gh-issue-138497.Y_5YXh.rst
@@ -0,0 +1,4 @@
+The LLVM version used by the JIT at build time can now be modified using
+the ``LLVM_VERSION`` environment variable. Use this at your own risk, as
+there is only one officially supported LLVM version. For more information,
+please check ``Tools/jit/README.md``.
diff --git a/Tools/jit/README.md b/Tools/jit/README.md
index ffc762d3828bfb..35c7ffd7a283f8 100644
--- a/Tools/jit/README.md
+++ b/Tools/jit/README.md
@@ -9,7 +9,7 @@ Python 3.11 or newer is required to build the JIT.
 
 The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon).
 
-LLVM version 19 is required. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
+LLVM version 19 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
 
 It's easy to install all of the required tools:
 
diff --git a/Tools/jit/_llvm.py b/Tools/jit/_llvm.py
index f09a8404871b24..bc3b50ffe61634 100644
--- a/Tools/jit/_llvm.py
+++ b/Tools/jit/_llvm.py
@@ -10,8 +10,8 @@
 
 import _targets
 
-_LLVM_VERSION = 19
-_LLVM_VERSION_PATTERN = re.compile(rf"version\s+{_LLVM_VERSION}\.\d+\.\d+\S*\s+")
+
+_LLVM_VERSION = "19"
 _EXTERNALS_LLVM_TAG = "llvm-19.1.7.0"
 
 _P = typing.ParamSpec("_P")
@@ -56,53 +56,66 @@ async def _run(tool: str, args: typing.Iterable[str], echo: bool = False) -> str
 
 
 @_async_cache
-async def _check_tool_version(name: str, *, echo: bool = False) -> bool:
+async def _check_tool_version(
+    name: str, llvm_version: str, *, echo: bool = False
+) -> bool:
     output = await _run(name, ["--version"], echo=echo)
-    return bool(output and _LLVM_VERSION_PATTERN.search(output))
+    _llvm_version_pattern = re.compile(rf"version\s+{llvm_version}\.\d+\.\d+\S*\s+")
+    return bool(output and _llvm_version_pattern.search(output))
 
 
 @_async_cache
-async def _get_brew_llvm_prefix(*, echo: bool = False) -> str | None:
-    output = await _run("brew", ["--prefix", f"llvm@{_LLVM_VERSION}"], echo=echo)
+async def _get_brew_llvm_prefix(llvm_version: str, *, echo: bool = False) -> str | None:
+    output = await _run("brew", ["--prefix", f"llvm@{llvm_version}"], echo=echo)
     return output and output.removesuffix("\n")
 
 
 @_async_cache
-async def _find_tool(tool: str, *, echo: bool = False) -> str | None:
+async def _find_tool(tool: str, llvm_version: str, *, echo: bool = False) -> str | None:
     # Unversioned executables:
     path = tool
-    if await _check_tool_version(path, echo=echo):
+    if await _check_tool_version(path, llvm_version, echo=echo):
         return path
     # Versioned executables:
-    path = f"{tool}-{_LLVM_VERSION}"
-    if await _check_tool_version(path, echo=echo):
+    path = f"{tool}-{llvm_version}"
+    if await _check_tool_version(path, llvm_version, echo=echo):
         return path
     # PCbuild externals:
     externals = os.environ.get("EXTERNALS_DIR", _targets.EXTERNALS)
     path = os.path.join(externals, _EXTERNALS_LLVM_TAG, "bin", tool)
-    if await _check_tool_version(path, echo=echo):
+    if await _check_tool_version(path, llvm_version, echo=echo):
         return path
     # Homebrew-installed executables:
-    prefix = await _get_brew_llvm_prefix(echo=echo)
+    prefix = await _get_brew_llvm_prefix(llvm_version, echo=echo)
     if prefix is not None:
         path = os.path.join(prefix, "bin", tool)
-        if await _check_tool_version(path, echo=echo):
+        if await _check_tool_version(path, llvm_version, echo=echo):
             return path
     # Nothing found:
     return None
 
 
 async def maybe_run(
-    tool: str, args: typing.Iterable[str], echo: bool = False
+    tool: str,
+    args: typing.Iterable[str],
+    echo: bool = False,
+    llvm_version: str = _LLVM_VERSION,
 ) -> str | None:
     """Run an LLVM tool if it can be found. Otherwise, return None."""
-    path = await _find_tool(tool, echo=echo)
+
+    path = await _find_tool(tool, llvm_version, echo=echo)
     return path and await _run(path, args, echo=echo)
 
 
-async def run(tool: str, args: typing.Iterable[str], echo: bool = False) -> str:
+async def run(
+    tool: str,
+    args: typing.Iterable[str],
+    echo: bool = False,
+    llvm_version: str = _LLVM_VERSION,
+) -> str:
     """Run an LLVM tool if it can be found. Otherwise, raise RuntimeError."""
-    output = await maybe_run(tool, args, echo=echo)
+
+    output = await maybe_run(tool, args, echo=echo, llvm_version=llvm_version)
     if output is None:
-        raise RuntimeError(f"Can't find {tool}-{_LLVM_VERSION}!")
+        raise RuntimeError(f"Can't find {tool}-{llvm_version}!")
     return output
diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py
index 2f3969e7d0540c..9fc3522d23d982 100644
--- a/Tools/jit/_targets.py
+++ b/Tools/jit/_targets.py
@@ -50,6 +50,7 @@ class _Target(typing.Generic[_S, _R]):
     debug: bool = False
     verbose: bool = False
     cflags: str = ""
+    llvm_version: str = _llvm._LLVM_VERSION
     known_symbols: dict[str, int] = dataclasses.field(default_factory=dict)
     pyconfig_dir: pathlib.Path = pathlib.Path.cwd().resolve()
 
@@ -81,7 +82,9 @@ def _compute_digest(self) -> str:
     async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup:
         group = _stencils.StencilGroup()
         args = ["--disassemble", "--reloc", f"{path}"]
-        output = await _llvm.maybe_run("llvm-objdump", args, echo=self.verbose)
+        output = await _llvm.maybe_run(
+            "llvm-objdump", args, echo=self.verbose, llvm_version=self.llvm_version
+        )
         if output is not None:
             # Make sure that full paths don't leak out (for reproducibility):
             long, short = str(path), str(path.name)
@@ -99,7 +102,9 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup:
             "--sections",
             f"{path}",
         ]
-        output = await _llvm.run("llvm-readobj", args, echo=self.verbose)
+        output = await _llvm.run(
+            "llvm-readobj", args, echo=self.verbose, llvm_version=self.llvm_version
+        )
         # --elf-output-style=JSON is only *slightly* broken on Mach-O...
         output = output.replace("PrivateExtern\n", "\n")
         output = output.replace("Extern\n", "\n")
@@ -175,12 +180,16 @@ async def _compile(
             # Allow user-provided CFLAGS to override any defaults
             *shlex.split(self.cflags),
         ]
-        await _llvm.run("clang", args_s, echo=self.verbose)
+        await _llvm.run(
+            "clang", args_s, echo=self.verbose, llvm_version=self.llvm_version
+        )
         self.optimizer(
             s, label_prefix=self.label_prefix, symbol_prefix=self.symbol_prefix
         ).run()
         args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"]
-        await _llvm.run("clang", args_o, echo=self.verbose)
+        await _llvm.run(
+            "clang", args_o, echo=self.verbose, llvm_version=self.llvm_version
+        )
         return await self._parse(o)
 
     async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
@@ -224,6 +233,8 @@ def build(
         if not self.stable:
             warning = f"JIT support for {self.triple} is still experimental!"
             request = "Please report any issues you encounter.".center(len(warning))
+            if self.llvm_version != _llvm._LLVM_VERSION:
+                request = f"Warning! Building with an LLVM version other than {_llvm._LLVM_VERSION} is not supported."
             outline = "=" * len(warning)
             print("\n".join(["", outline, warning, request, outline, ""]))
         digest = f"// {self._compute_digest()}\n"
diff --git a/Tools/jit/build.py b/Tools/jit/build.py
index a0733005929bf2..127d93b317fb09 100644
--- a/Tools/jit/build.py
+++ b/Tools/jit/build.py
@@ -42,6 +42,7 @@
     parser.add_argument(
         "--cflags", help="additional flags to pass to the compiler", default=""
     )
+    parser.add_argument("--llvm-version", help="LLVM version to use")
     args = parser.parse_args()
     for target in args.target:
         target.debug = args.debug
@@ -49,6 +50,8 @@
         target.verbose = args.verbose
         target.cflags = args.cflags
         target.pyconfig_dir = args.pyconfig_dir
+        if args.llvm_version:
+            target.llvm_version = args.llvm_version
         target.build(
             comment=comment,
             force=args.force,
diff --git a/configure b/configure
index 0d1f6a29e9b432..d80340e3015bee 100755
--- a/configure
+++ b/configure
@@ -10875,7 +10875,7 @@ then :
 
 else case e in #(
   e) as_fn_append CFLAGS_NODIST " $jit_flags"
-           REGEN_JIT_COMMAND="\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir . --cflags=\"$CFLAGS_JIT\""
+           REGEN_JIT_COMMAND="\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir . --cflags=\"$CFLAGS_JIT\" --llvm-version=\"$LLVM_VERSION\""
            if test "x$Py_DEBUG" = xtrue
 then :
   as_fn_append REGEN_JIT_COMMAND " --debug"
diff --git a/configure.ac b/configure.ac
index 7b5da6e0d15682..1e0c0f71b7c281 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2786,7 +2786,7 @@ AS_VAR_IF([jit_flags],
           [],
           [AS_VAR_APPEND([CFLAGS_NODIST], [" $jit_flags"])
            AS_VAR_SET([REGEN_JIT_COMMAND],
-                      ["\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir . --cflags=\"$CFLAGS_JIT\""])
+                      ["\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir . --cflags=\"$CFLAGS_JIT\" --llvm-version=\"$LLVM_VERSION\""])
            AS_VAR_IF([Py_DEBUG],
                      [true],
                      [AS_VAR_APPEND([REGEN_JIT_COMMAND], [" --debug"])],

From 539461d9ec8e5322ead638f7be733fd196aa6c79 Mon Sep 17 00:00:00 2001
From: Tomasz Pytel <tompytel@gmail.com>
Date: Tue, 7 Oct 2025 12:28:15 -0400
Subject: [PATCH 4/5] gh-139516: Fix lambda colon start format spec in f-string
 in tokenizer (#139657)

---
 Lib/test/test_fstring.py                        |  7 +++++++
 Lib/test/test_tokenize.py                       | 17 +++++++++++++++++
 ...25-10-06-13-15-26.gh-issue-139516.d9Pkur.rst |  1 +
 Parser/lexer/lexer.c                            |  2 +-
 Parser/lexer/state.h                            |  2 ++
 5 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-10-06-13-15-26.gh-issue-139516.d9Pkur.rst

diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index 41cefe0e286d50..05d0cbd2445c4c 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1859,6 +1859,13 @@ def __format__(self, format):
         # Test multiple format specs in same raw f-string
         self.assertEqual(rf"{UnchangedFormat():\xFF} {UnchangedFormat():\n}", '\\xFF \\n')
 
+    def test_gh139516(self):
+        with temp_cwd():
+            script = 'script.py'
+            with open(script, 'wb') as f:
+                f.write('''def f(a): pass\nf"{f(a=lambda: 'à'\n)}"'''.encode())
+            assert_python_ok(script)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index d90a7659c4237c..8fdd03f347b632 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1216,6 +1216,23 @@ def test_multiline_non_ascii_fstring_with_expr(self):
     FSTRING_END "\'\'\'"         (3, 1) (3, 4)
     """)
 
+        # gh-139516, the '\n' is explicit to ensure no trailing whitespace which would invalidate the test
+        self.check_tokenize('''f"{f(a=lambda: 'à'\n)}"''', """\
+    FSTRING_START \'f"\'          (1, 0) (1, 2)
+    OP         '{'           (1, 2) (1, 3)
+    NAME       'f'           (1, 3) (1, 4)
+    OP         '('           (1, 4) (1, 5)
+    NAME       'a'           (1, 5) (1, 6)
+    OP         '='           (1, 6) (1, 7)
+    NAME       'lambda'      (1, 7) (1, 13)
+    OP         ':'           (1, 13) (1, 14)
+    STRING     "\'à\'"         (1, 15) (1, 18)
+    NL         '\\n'          (1, 18) (1, 19)
+    OP         ')'           (2, 0) (2, 1)
+    OP         '}'           (2, 1) (2, 2)
+    FSTRING_END \'"\'           (2, 2) (2, 3)
+    """)
+
 class GenerateTokensTest(TokenizeTest):
     def check_tokenize(self, s, expected):
         # Format the tokens in s in a table format.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-06-13-15-26.gh-issue-139516.d9Pkur.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-06-13-15-26.gh-issue-139516.d9Pkur.rst
new file mode 100644
index 00000000000000..a709112306025f
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-06-13-15-26.gh-issue-139516.d9Pkur.rst
@@ -0,0 +1 @@
+Fix lambda colon erroneously start format spec in f-string in tokenizer.
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index 81363cf8e810fe..a69994e9b3d005 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -1376,7 +1376,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
     }
 
-    if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) {
+    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
         current_tok->in_debug = 1;
     }
 
diff --git a/Parser/lexer/state.h b/Parser/lexer/state.h
index 5e8cac7249b21c..877127125a7652 100644
--- a/Parser/lexer/state.h
+++ b/Parser/lexer/state.h
@@ -9,6 +9,8 @@
 
 #define INSIDE_FSTRING(tok) (tok->tok_mode_stack_index > 0)
 #define INSIDE_FSTRING_EXPR(tok) (tok->curly_bracket_expr_start_depth >= 0)
+#define INSIDE_FSTRING_EXPR_AT_TOP(tok) \
+    (tok->curly_bracket_depth - tok->curly_bracket_expr_start_depth == 1)
 
 enum decoding_state {
     STATE_INIT,

From 162997bb70e067668c039700141770687bc8f267 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Tue, 7 Oct 2025 20:15:26 +0300
Subject: [PATCH 5/5] gh-139700: Check consistency of the zip64 end of central
 directory record (GH-139702)

Support records with "zip64 extensible data" if there are no bytes
prepended to the ZIP file.
---
 Lib/test/test_zipfile/test_core.py            | 82 ++++++++++++++++++-
 Lib/zipfile/__init__.py                       | 51 +++++++-----
 ...-10-07-19-31-34.gh-issue-139700.vNHU1O.rst |  3 +
 3 files changed, 113 insertions(+), 23 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst

diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py
index c033059a515db6..6acfefc74d6665 100644
--- a/Lib/test/test_zipfile/test_core.py
+++ b/Lib/test/test_zipfile/test_core.py
@@ -898,6 +898,8 @@ def make_zip64_file(
         self, file_size_64_set=False, file_size_extra=False,
         compress_size_64_set=False, compress_size_extra=False,
         header_offset_64_set=False, header_offset_extra=False,
+        extensible_data=b'',
+        end_of_central_dir_size=None, offset_to_end_of_central_dir=None,
     ):
         """Generate bytes sequence for a zip with (incomplete) zip64 data.
 
@@ -951,6 +953,12 @@ def make_zip64_file(
 
         central_dir_size = struct.pack('<Q', 58 + 8 * len(central_zip64_fields))
         offset_to_central_dir = struct.pack('<Q', 50 + 8 * len(local_zip64_fields))
+        if end_of_central_dir_size is None:
+            end_of_central_dir_size = 44 + len(extensible_data)
+        if offset_to_end_of_central_dir is None:
+            offset_to_end_of_central_dir = (108
+                                            + 8 * len(local_zip64_fields)
+                                            + 8 * len(central_zip64_fields))
 
         local_extra_length = struct.pack("<H", 4 + 8 * len(local_zip64_fields))
         central_extra_length = struct.pack("<H", 4 + 8 * len(central_zip64_fields))
@@ -979,14 +987,17 @@ def make_zip64_file(
             + filename
             + central_extra
             # Zip64 end of central directory
-            + b"PK\x06\x06,\x00\x00\x00\x00\x00\x00\x00-\x00-"
-            + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00"
+            + b"PK\x06\x06"
+            + struct.pack('<Q', end_of_central_dir_size)
+            + b"-\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00"
             + b"\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00"
             + central_dir_size
             + offset_to_central_dir
+            + extensible_data
             # Zip64 end of central directory locator
-            + b"PK\x06\x07\x00\x00\x00\x00l\x00\x00\x00\x00\x00\x00\x00\x01"
-            + b"\x00\x00\x00"
+            + b"PK\x06\x07\x00\x00\x00\x00"
+            + struct.pack('<Q', offset_to_end_of_central_dir)
+            + b"\x01\x00\x00\x00"
             # end of central directory
             + b"PK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00:\x00\x00\x002\x00"
             + b"\x00\x00\x00\x00"
@@ -1017,6 +1028,7 @@ def test_bad_zip64_extra(self):
         with self.assertRaises(zipfile.BadZipFile) as e:
             zipfile.ZipFile(io.BytesIO(missing_file_size_extra))
         self.assertIn('file size', str(e.exception).lower())
+        self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_file_size_extra)))
 
         # zip64 file size present, zip64 compress size present, one field in
         # extra, expecting two, equals missing compress size.
@@ -1028,6 +1040,7 @@ def test_bad_zip64_extra(self):
         with self.assertRaises(zipfile.BadZipFile) as e:
             zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
         self.assertIn('compress size', str(e.exception).lower())
+        self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_compress_size_extra)))
 
         # zip64 compress size present, no fields in extra, expecting one,
         # equals missing compress size.
@@ -1037,6 +1050,7 @@ def test_bad_zip64_extra(self):
         with self.assertRaises(zipfile.BadZipFile) as e:
             zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
         self.assertIn('compress size', str(e.exception).lower())
+        self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_compress_size_extra)))
 
         # zip64 file size present, zip64 compress size present, zip64 header
         # offset present, two fields in extra, expecting three, equals missing
@@ -1051,6 +1065,7 @@ def test_bad_zip64_extra(self):
         with self.assertRaises(zipfile.BadZipFile) as e:
             zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
         self.assertIn('header offset', str(e.exception).lower())
+        self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
 
         # zip64 compress size present, zip64 header offset present, one field
         # in extra, expecting two, equals missing header offset
@@ -1063,6 +1078,7 @@ def test_bad_zip64_extra(self):
         with self.assertRaises(zipfile.BadZipFile) as e:
             zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
         self.assertIn('header offset', str(e.exception).lower())
+        self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
 
         # zip64 file size present, zip64 header offset present, one field in
         # extra, expecting two, equals missing header offset
@@ -1075,6 +1091,7 @@ def test_bad_zip64_extra(self):
         with self.assertRaises(zipfile.BadZipFile) as e:
             zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
         self.assertIn('header offset', str(e.exception).lower())
+        self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
 
         # zip64 header offset present, no fields in extra, expecting one,
         # equals missing header offset
@@ -1086,6 +1103,63 @@ def test_bad_zip64_extra(self):
         with self.assertRaises(zipfile.BadZipFile) as e:
             zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
         self.assertIn('header offset', str(e.exception).lower())
+        self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
+
+    def test_bad_zip64_end_of_central_dir(self):
+        zipdata = self.make_zip64_file(end_of_central_dir_size=0)
+        with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'):
+            zipfile.ZipFile(io.BytesIO(zipdata))
+        self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
+
+        zipdata = self.make_zip64_file(end_of_central_dir_size=100)
+        with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'):
+            zipfile.ZipFile(io.BytesIO(zipdata))
+        self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
+
+        zipdata = self.make_zip64_file(offset_to_end_of_central_dir=0)
+        with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'):
+            zipfile.ZipFile(io.BytesIO(zipdata))
+        self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
+
+        zipdata = self.make_zip64_file(offset_to_end_of_central_dir=1000)
+        with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*locator'):
+            zipfile.ZipFile(io.BytesIO(zipdata))
+        self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
+
+    def test_zip64_end_of_central_dir_record_not_found(self):
+        zipdata = self.make_zip64_file()
+        zipdata = zipdata.replace(b"PK\x06\x06", b'\x00'*4)
+        with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'):
+            zipfile.ZipFile(io.BytesIO(zipdata))
+        self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
+
+        zipdata = self.make_zip64_file(
+            extensible_data=b'\xca\xfe\x04\x00\x00\x00data')
+        zipdata = zipdata.replace(b"PK\x06\x06", b'\x00'*4)
+        with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'):
+            zipfile.ZipFile(io.BytesIO(zipdata))
+        self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
+
+    def test_zip64_extensible_data(self):
+        # These values are what is set in the make_zip64_file method.
+        expected_file_size = 8
+        expected_compress_size = 8
+        expected_header_offset = 0
+        expected_content = b"test1234"
+
+        zipdata = self.make_zip64_file(
+            extensible_data=b'\xca\xfe\x04\x00\x00\x00data')
+        with zipfile.ZipFile(io.BytesIO(zipdata)) as zf:
+            zinfo = zf.infolist()[0]
+            self.assertEqual(zinfo.file_size, expected_file_size)
+            self.assertEqual(zinfo.compress_size, expected_compress_size)
+            self.assertEqual(zinfo.header_offset, expected_header_offset)
+            self.assertEqual(zf.read(zinfo), expected_content)
+        self.assertTrue(zipfile.is_zipfile(io.BytesIO(zipdata)))
+
+        with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'):
+            zipfile.ZipFile(io.BytesIO(b'prepended' + zipdata))
+        self.assertFalse(zipfile.is_zipfile(io.BytesIO(b'prepended' + zipdata)))
 
     def test_generated_valid_zip64_extra(self):
         # These values are what is set in the make_zip64_file method.
diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py
index 2969f735e8abb9..ac2332e58468a2 100644
--- a/Lib/zipfile/__init__.py
+++ b/Lib/zipfile/__init__.py
@@ -265,7 +265,7 @@ def is_zipfile(filename):
         else:
             with open(filename, "rb") as fp:
                 result = _check_zipfile(fp)
-    except OSError:
+    except (OSError, BadZipFile):
         pass
     return result
 
@@ -275,9 +275,6 @@ def _handle_prepended_data(endrec, debug=0):
 
     # "concat" is zero, unless zip was concatenated to another file
     concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
-    if endrec[_ECD_SIGNATURE] == stringEndArchive64:
-        # If Zip64 extension structures are present, account for them
-        concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
 
     if debug > 2:
         inferred = concat + offset_cd
@@ -289,16 +286,15 @@ def _EndRecData64(fpin, offset, endrec):
     """
     Read the ZIP64 end-of-archive records and use that to update endrec
     """
-    try:
-        fpin.seek(offset - sizeEndCentDir64Locator, 2)
-    except OSError:
-        # If the seek fails, the file is not large enough to contain a ZIP64
+    offset -= sizeEndCentDir64Locator
+    if offset < 0:
+        # The file is not large enough to contain a ZIP64
         # end-of-archive record, so just return the end record we were given.
         return endrec
-
+    fpin.seek(offset)
     data = fpin.read(sizeEndCentDir64Locator)
     if len(data) != sizeEndCentDir64Locator:
-        return endrec
+        raise OSError("Unknown I/O error")
     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
     if sig != stringEndArchive64Locator:
         return endrec
@@ -306,16 +302,33 @@ def _EndRecData64(fpin, offset, endrec):
     if diskno != 0 or disks > 1:
         raise BadZipFile("zipfiles that span multiple disks are not supported")
 
-    # Assume no 'zip64 extensible data'
-    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
+    offset -= sizeEndCentDir64
+    if reloff > offset:
+        raise BadZipFile("Corrupt zip64 end of central directory locator")
+    # First, check the assumption that there is no prepended data.
+    fpin.seek(reloff)
+    extrasz = offset - reloff
     data = fpin.read(sizeEndCentDir64)
     if len(data) != sizeEndCentDir64:
-        return endrec
+        raise OSError("Unknown I/O error")
+    if not data.startswith(stringEndArchive64) and reloff != offset:
+        # Since we already have seen the Zip64 EOCD Locator, it's
+        # possible we got here because there is prepended data.
+        # Assume no 'zip64 extensible data'
+        fpin.seek(offset)
+        extrasz = 0
+        data = fpin.read(sizeEndCentDir64)
+        if len(data) != sizeEndCentDir64:
+            raise OSError("Unknown I/O error")
+    if not data.startswith(stringEndArchive64):
+        raise BadZipFile("Zip64 end of central directory record not found")
+
     sig, sz, create_version, read_version, disk_num, disk_dir, \
         dircount, dircount2, dirsize, diroffset = \
         struct.unpack(structEndArchive64, data)
-    if sig != stringEndArchive64:
-        return endrec
+    if (diroffset + dirsize != reloff or
+        sz + 12 != sizeEndCentDir64 + extrasz):
+        raise BadZipFile("Corrupt zip64 end of central directory record")
 
     # Update the original endrec using data from the ZIP64 record
     endrec[_ECD_SIGNATURE] = sig
@@ -325,6 +338,7 @@ def _EndRecData64(fpin, offset, endrec):
     endrec[_ECD_ENTRIES_TOTAL] = dircount2
     endrec[_ECD_SIZE] = dirsize
     endrec[_ECD_OFFSET] = diroffset
+    endrec[_ECD_LOCATION] = offset - extrasz
     return endrec
 
 
@@ -358,7 +372,7 @@ def _EndRecData(fpin):
         endrec.append(filesize - sizeEndCentDir)
 
         # Try to read the "Zip64 end of central directory" structure
-        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
+        return _EndRecData64(fpin, filesize - sizeEndCentDir, endrec)
 
     # Either this is not a ZIP file, or it is a ZIP file with an archive
     # comment.  Search the end of the file for the "end of central directory"
@@ -382,8 +396,7 @@ def _EndRecData(fpin):
         endrec.append(maxCommentStart + start)
 
         # Try to read the "Zip64 end of central directory" structure
-        return _EndRecData64(fpin, maxCommentStart + start - filesize,
-                             endrec)
+        return _EndRecData64(fpin, maxCommentStart + start, endrec)
 
     # Unable to find a valid end of central directory structure
     return None
@@ -2142,7 +2155,7 @@ def _write_end_record(self):
                                    " would require ZIP64 extensions")
             zip64endrec = struct.pack(
                 structEndArchive64, stringEndArchive64,
-                44, 45, 45, 0, 0, centDirCount, centDirCount,
+                sizeEndCentDir64 - 12, 45, 45, 0, 0, centDirCount, centDirCount,
                 centDirSize, centDirOffset)
             self.fp.write(zip64endrec)
 
diff --git a/Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst b/Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst
new file mode 100644
index 00000000000000..a8e7a1f1878c6b
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst
@@ -0,0 +1,3 @@
+Check consistency of the zip64 end of central directory record. Support
+records with "zip64 extensible data" if there are no bytes prepended to the
+ZIP file.