From c2bf11e6011b1e78024839e3810f79bf1dd5c1d5 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 01/28] [mypyc] feat: improve LoadLiteral annotation determinism --- mypyc/codegen/emit.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 4ef53296ef0d1..df9d8c8143c1b 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -208,6 +208,15 @@ def object_annotation(self, obj: object, line: str) -> str: if any(x in formatted for x in ("/*", "*/", "\0")): return "" + # make frozenset annotations deterministic + if formatted.startswith("frozenset({"): + frozenset_items = formatted[11:-2] + # if our frozenset contains another frozenset or a tuple, we will need better logic + # here, but this redimentary logic will still vastly improve codegen determinism. + if "(" not in frozenset_items: + sorted_items = ", ".join(sorted(frozenset_items.split(", "))) + formatted = "frozenset({" + sorted_items + "})" + if "\n" in formatted: first_line, rest = formatted.split("\n", maxsplit=1) comment_continued = textwrap.indent(rest, (line_width + 3) * " ") From d750ec12df4d4696288c3905eb9cbcec8c749afa Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 02/28] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index df9d8c8143c1b..c8706f19e5483 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -212,7 +212,7 @@ def object_annotation(self, obj: object, line: str) -> str: if formatted.startswith("frozenset({"): frozenset_items = formatted[11:-2] # if our frozenset contains another frozenset or a tuple, we will need better logic - # here, but this redimentary logic will still vastly improve codegen determinism. + # here, but this rudimentary logic will still vastly improve codegen determinism. if "(" not in frozenset_items: sorted_items = ", ".join(sorted(frozenset_items.split(", "))) formatted = "frozenset({" + sorted_items + "})" From 486f9f01b253c31bbd0b2b3faa227adc820c691a Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 03/28] new sort key --- mypyc/codegen/emit.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index c8706f19e5483..e33b39c480950 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1235,3 +1235,11 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str # Multi-line result res.append(indent + ", ".join(current)) return "{\n " + ",\n ".join(res) + "\n" + indent + "}" + + +class _mypyc_safe_key(pprint._safe_key): + """A custom sort key implementation for pprint that makes the output deterministic + for all literal types supported by mypyc + """ + def __lt__(self, other: "_mypyc_safe_key") -> bool: + return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) From c30a72735180daed5e5b141dc95fa6e35f58b444 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 04/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/codegen/emit.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index e33b39c480950..10b831be5623b 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1241,5 +1241,6 @@ class _mypyc_safe_key(pprint._safe_key): """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc """ - def __lt__(self, other: "_mypyc_safe_key") -> bool: + + def __lt__(self, other: _mypyc_safe_key) -> bool: return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) From d174f4eeb3538ef046c05a79c44a975bc8c321ee Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 05/28] use new safe key --- mypyc/codegen/emit.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 10b831be5623b..ac4ccb665f488 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -204,19 +204,20 @@ def object_annotation(self, obj: object, line: str) -> str: If it contains illegal characters, an empty string is returned.""" line_width = self._indent + len(line) + + # temporarily override pprint._safe_key + default_safe_key = pprint._safe_key + pprint._safe_key = _mypyc_safe_key + + # pretty print the object formatted = pprint.pformat(obj, compact=True, width=max(90 - line_width, 20)) + + # replace the _safe_key + pprint._safe_key = default_safe_key + if any(x in formatted for x in ("/*", "*/", "\0")): return "" - # make frozenset annotations deterministic - if formatted.startswith("frozenset({"): - frozenset_items = formatted[11:-2] - # if our frozenset contains another frozenset or a tuple, we will need better logic - # here, but this rudimentary logic will still vastly improve codegen determinism. - if "(" not in frozenset_items: - sorted_items = ", ".join(sorted(frozenset_items.split(", "))) - formatted = "frozenset({" + sorted_items + "})" - if "\n" in formatted: first_line, rest = formatted.split("\n", maxsplit=1) comment_continued = textwrap.indent(rest, (line_width + 3) * " ") @@ -1239,8 +1240,10 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str class _mypyc_safe_key(pprint._safe_key): """A custom sort key implementation for pprint that makes the output deterministic - for all literal types supported by mypyc - """ + for all literal types supported by mypyc. - def __lt__(self, other: _mypyc_safe_key) -> bool: + This is NOT safe for use as a sort key for other types, so we MUST replace the + original pprint._safe_key once we've pprinted our object. + """ + def __lt__(self, other: "_mypyc_safe_key") -> bool: return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) From 28c0399bd04939af69a1445de0d34ea845b89556 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 06/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/codegen/emit.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index ac4ccb665f488..1d8b8ef5022f9 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -214,7 +214,7 @@ def object_annotation(self, obj: object, line: str) -> str: # replace the _safe_key pprint._safe_key = default_safe_key - + if any(x in formatted for x in ("/*", "*/", "\0")): return "" @@ -1245,5 +1245,6 @@ class _mypyc_safe_key(pprint._safe_key): This is NOT safe for use as a sort key for other types, so we MUST replace the original pprint._safe_key once we've pprinted our object. """ - def __lt__(self, other: "_mypyc_safe_key") -> bool: + + def __lt__(self, other: _mypyc_safe_key) -> bool: return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) From be0de786c953ed44bc8d964de28be1ac016321aa Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 07/28] Update emit.py --- mypyc/codegen/emit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 1d8b8ef5022f9..849322ef26e93 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -206,14 +206,14 @@ def object_annotation(self, obj: object, line: str) -> str: line_width = self._indent + len(line) # temporarily override pprint._safe_key - default_safe_key = pprint._safe_key - pprint._safe_key = _mypyc_safe_key + default_safe_key = pprint._safe_key # type: ignore [attr-defined] + pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] # pretty print the object formatted = pprint.pformat(obj, compact=True, width=max(90 - line_width, 20)) # replace the _safe_key - pprint._safe_key = default_safe_key + pprint._safe_key = default_safe_key # type: ignore [attr-defined] if any(x in formatted for x in ("/*", "*/", "\0")): return "" @@ -1238,7 +1238,7 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -class _mypyc_safe_key(pprint._safe_key): +def _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, misc] """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. From d33d2d979cfab684058ac968f5afdd4cc8de15fb Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 08/28] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 849322ef26e93..394ae0d116441 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1238,7 +1238,7 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -def _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, misc] +class _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, misc] """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. From 650d99d6b74e5433de399f7231d07cd7054dd3bb Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 09/28] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 394ae0d116441..f2bc07d6587fb 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1238,7 +1238,7 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -class _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, misc] +class _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, name-defined, misc] """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. From da2278578d3431b5dcf8ba9f1360aa3e4bb31ccd Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 10/28] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index f2bc07d6587fb..24df8e6ecf007 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1238,7 +1238,7 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -class _mypyc_safe_key(pprint._safe_key): # type: ignore [attr-defined, name-defined, misc] +class _mypyc_safe_key(pprint._safe_key): # type: ignore [name-defined, misc] """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. From 96aa63e6818ce44da0d0da724523d37d2da185fc Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:00:41 +0000 Subject: [PATCH 11/28] refactor --- mypyc/codegen/emit.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 24df8e6ecf007..9db22cc75c79a 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1238,13 +1238,11 @@ def c_array_initializer(components: list[str], *, indented: bool = False) -> str return "{\n " + ",\n ".join(res) + "\n" + indent + "}" -class _mypyc_safe_key(pprint._safe_key): # type: ignore [name-defined, misc] +def _mypyc_safe_key(obj: object) -> str: """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. This is NOT safe for use as a sort key for other types, so we MUST replace the original pprint._safe_key once we've pprinted our object. """ - - def __lt__(self, other: _mypyc_safe_key) -> bool: - return str(type(self.obj)) + repr(self.obj) < str(type(other.obj)) + repr(other.obj) + return str(type(obj)) + repr(obj) From 58e4b6752ff39e15dcb4634db3f94c46207831ab Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:32:40 -0400 Subject: [PATCH 12/28] Update emit.py --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 9db22cc75c79a..ebb152923ff64 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1245,4 +1245,4 @@ def _mypyc_safe_key(obj: object) -> str: This is NOT safe for use as a sort key for other types, so we MUST replace the original pprint._safe_key once we've pprinted our object. """ - return str(type(obj)) + repr(obj) + return str(type(obj)) + pprint.pformat(obj) From 629f1cf99bd7febced2ba9f4f07f6fc045c1f1b3 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 06:11:39 +0000 Subject: [PATCH 13/28] extract func pformat_deterministic --- mypyc/codegen/emit.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 44f78f6510c3f..bb813a6c52fdc 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -210,16 +210,7 @@ def object_annotation(self, obj: object, line: str) -> str: If it contains illegal characters, an empty string is returned.""" line_width = self._indent + len(line) - - # temporarily override pprint._safe_key - default_safe_key = pprint._safe_key # type: ignore [attr-defined] - pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] - - # pretty print the object - formatted = pprint.pformat(obj, compact=True, width=max(90 - line_width, 20)) - - # replace the _safe_key - pprint._safe_key = default_safe_key # type: ignore [attr-defined] + formatted = pformat_deterministic(obj, line_width) if any(x in formatted for x in ("/*", "*/", "\0")): return "" @@ -1283,6 +1274,19 @@ def native_function_doc_initializer(func: FuncIR) -> str: return c_string_initializer(docstring.encode("ascii", errors="backslashreplace")) +def pformat_deterministic(obj: object, line_width: int) -> str: + """Pretty-print `obj` with deterministic sorting for mypyc literal types.""" + # Temporarily override pprint._safe_key + default_safe_key = pprint._safe_key # type: ignore [attr-defined] + pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] + + try: + return pprint.pformat(obj, compact=True, width=max(90 - line_width, 20)) + finally: + # Always restore the original key to avoid affecting other pprint users. + pprint._safe_key = default_safe_key # type: ignore [attr-defined] + + def _mypyc_safe_key(obj: object) -> str: """A custom sort key implementation for pprint that makes the output deterministic for all literal types supported by mypyc. From ac8f3ffc15abc90a304d144bfd47fed273b7f73f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 6 Jan 2026 06:15:37 +0000 Subject: [PATCH 14/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index bb813a6c52fdc..0631ecd479c0b 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1279,7 +1279,7 @@ def pformat_deterministic(obj: object, line_width: int) -> str: # Temporarily override pprint._safe_key default_safe_key = pprint._safe_key # type: ignore [attr-defined] pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] - + try: return pprint.pformat(obj, compact=True, width=max(90 - line_width, 20)) finally: From 866d86e5d46d1ee01edcb52fd022f8969c49a524 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 06:27:51 +0000 Subject: [PATCH 15/28] add tests --- mypyc/codegen/emit.py | 10 +++++----- mypyc/test/test_emit.py | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index bb813a6c52fdc..a59e4850cd185 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -210,7 +210,7 @@ def object_annotation(self, obj: object, line: str) -> str: If it contains illegal characters, an empty string is returned.""" line_width = self._indent + len(line) - formatted = pformat_deterministic(obj, line_width) + formatted = pformat_deterministic(obj, max(90 - line_width, 20)) if any(x in formatted for x in ("/*", "*/", "\0")): return "" @@ -1274,14 +1274,14 @@ def native_function_doc_initializer(func: FuncIR) -> str: return c_string_initializer(docstring.encode("ascii", errors="backslashreplace")) -def pformat_deterministic(obj: object, line_width: int) -> str: +def pformat_deterministic(obj: object, width: int) -> str: """Pretty-print `obj` with deterministic sorting for mypyc literal types.""" - # Temporarily override pprint._safe_key + # Temporarily override pprint._safe_key to get deterministic ordering of containers. default_safe_key = pprint._safe_key # type: ignore [attr-defined] pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] - + try: - return pprint.pformat(obj, compact=True, width=max(90 - line_width, 20)) + return pprint.pformat(obj, compact=True, width=width) finally: # Always restore the original key to avoid affecting other pprint users. pprint._safe_key = default_safe_key # type: ignore [attr-defined] diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py index 1baed3964299e..7fef63aa744c8 100644 --- a/mypyc/test/test_emit.py +++ b/mypyc/test/test_emit.py @@ -1,8 +1,9 @@ from __future__ import annotations +import pprint import unittest -from mypyc.codegen.emit import Emitter, EmitterContext +from mypyc.codegen.emit import Emitter, EmitterContext, pformat_deterministic from mypyc.common import HAVE_IMMORTAL from mypyc.ir.class_ir import ClassIR from mypyc.ir.ops import BasicBlock, Register, Value @@ -21,6 +22,37 @@ from mypyc.namegen import NameGenerator +class TestPformatDeterministic(unittest.TestCase): + def test_frozenset_elements_sorted(self) -> None: + fs_small = frozenset({("a", 1)}) + fs_large = frozenset({("a", 1), ("b", 2)}) + literal_a = frozenset({fs_large, fs_small}) + literal_b = frozenset({fs_small, fs_large}) + expected = "frozenset({frozenset({('a', 1)}), frozenset({('a', 1), ('b', 2)})})" + + assert pformat_deterministic(literal_a, 80) == expected + assert pformat_deterministic(literal_b, 80) == expected + + def test_nested_supported_literals(self) -> None: + nested_frozen = frozenset({("m", 0), ("n", 1)}) + item_a = ("outer", 1, nested_frozen) + item_b = ("outer", 2, frozenset({("x", 3)})) + literal_a = frozenset({item_a, item_b}) + literal_b = frozenset({item_b, item_a}) + expected = ( + "frozenset({('outer', 1, frozenset({('m', 0), ('n', 1)})), " + "('outer', 2, frozenset({('x', 3)}))})" + ) + + assert pformat_deterministic(literal_a, 120) == expected + assert pformat_deterministic(literal_b, 120) == expected + + def test_restores_default_safe_key(self) -> None: + original_safe_key = pprint._safe_key + pformat_deterministic({"key": "value"}, 80) + assert pprint._safe_key is original_safe_key + + class TestEmitter(unittest.TestCase): def setUp(self) -> None: self.n = Register(int_rprimitive, "n") From 6e0153a749f03fb6a8a9372aa6e6c87571744e57 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 06:49:32 +0000 Subject: [PATCH 16/28] fix test --- mypyc/test/test_emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py index 7fef63aa744c8..b197ab036424c 100644 --- a/mypyc/test/test_emit.py +++ b/mypyc/test/test_emit.py @@ -28,7 +28,7 @@ def test_frozenset_elements_sorted(self) -> None: fs_large = frozenset({("a", 1), ("b", 2)}) literal_a = frozenset({fs_large, fs_small}) literal_b = frozenset({fs_small, fs_large}) - expected = "frozenset({frozenset({('a', 1)}), frozenset({('a', 1), ('b', 2)})})" + expected = "frozenset({frozenset({('a', 1), ('b', 2)}), frozenset({('a', 1)})})" assert pformat_deterministic(literal_a, 80) == expected assert pformat_deterministic(literal_b, 80) == expected From 5aa3d4023d70904d9cab455b022af59ef51b39a9 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 07:05:36 +0000 Subject: [PATCH 17/28] fix test --- mypyc/test/test_emit.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py index b197ab036424c..ef0f0ae9c2920 100644 --- a/mypyc/test/test_emit.py +++ b/mypyc/test/test_emit.py @@ -39,10 +39,7 @@ def test_nested_supported_literals(self) -> None: item_b = ("outer", 2, frozenset({("x", 3)})) literal_a = frozenset({item_a, item_b}) literal_b = frozenset({item_b, item_a}) - expected = ( - "frozenset({('outer', 1, frozenset({('m', 0), ('n', 1)})), " - "('outer', 2, frozenset({('x', 3)}))})" - ) + expected = "frozenset({('outer', 2, frozenset({('x', 3)})), ('outer', 1, frozenset({('m', 0), ('n', 1)}))})" assert pformat_deterministic(literal_a, 120) == expected assert pformat_deterministic(literal_b, 120) == expected From ab520a241b7f5b735d8035fb07e26631f4aa5d28 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 07:17:43 +0000 Subject: [PATCH 18/28] fix test --- mypyc/test/test_emit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py index ef0f0ae9c2920..c084b47584761 100644 --- a/mypyc/test/test_emit.py +++ b/mypyc/test/test_emit.py @@ -28,7 +28,7 @@ def test_frozenset_elements_sorted(self) -> None: fs_large = frozenset({("a", 1), ("b", 2)}) literal_a = frozenset({fs_large, fs_small}) literal_b = frozenset({fs_small, fs_large}) - expected = "frozenset({frozenset({('a', 1), ('b', 2)}), frozenset({('a', 1)})})" + expected = "frozenset({frozenset({('b', 2), ('a', 1)}), frozenset({('a', 1)})})" assert pformat_deterministic(literal_a, 80) == expected assert pformat_deterministic(literal_b, 80) == expected @@ -39,7 +39,7 @@ def test_nested_supported_literals(self) -> None: item_b = ("outer", 2, frozenset({("x", 3)})) literal_a = frozenset({item_a, item_b}) literal_b = frozenset({item_b, item_a}) - expected = "frozenset({('outer', 2, frozenset({('x', 3)})), ('outer', 1, frozenset({('m', 0), ('n', 1)}))})" + expected = "frozenset({('outer', 1, frozenset({('m', 0), ('n', 1)})), ('outer', 2, frozenset({('x', 3)}))})" assert pformat_deterministic(literal_a, 120) == expected assert pformat_deterministic(literal_b, 120) == expected From 99a07eadb37f85a75de4c3b96b5d63e518f0d1cc Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 08:09:00 +0000 Subject: [PATCH 19/28] fix recursion --- mypyc/codegen/emit.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index a59e4850cd185..554f802fa344d 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1281,7 +1281,7 @@ def pformat_deterministic(obj: object, width: int) -> str: pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] try: - return pprint.pformat(obj, compact=True, width=width) + return pprint.pformat(_normalize_sets(obj), compact=True, width=width) finally: # Always restore the original key to avoid affecting other pprint users. pprint._safe_key = default_safe_key # type: ignore [attr-defined] @@ -1293,5 +1293,22 @@ def _mypyc_safe_key(obj: object) -> str: This is NOT safe for use as a sort key for other types, so we MUST replace the original pprint._safe_key once we've pprinted our object. + + Since this is a bit hacky, see for context https://github.com/python/mypy/pull/20012 + """ + return str(type(obj)) + pprint.pformat(obj, compact=True, sort_dicts=True) + + +def _normalize_sets(obj: object) -> object: + """Recursively normalize sets/frozensets so pprint sees a stable order. + + We rebuild each set/frozenset from a deterministically sorted list of + elements (using _mypyc_safe_key), recursing into tuples those sets contain. + This keeps repr (used internally) output deterministic without otherwise changing content. """ - return str(type(obj)) + pprint.pformat(obj) + if isinstance(obj, frozenset): + return frozenset(map(_normalize_sets, sorted(obj, key=_mypyc_safe_key))) + elif isinstance(obj, tuple): + return tuple(map(_normalize_sets, obj)) + else: + return obj From 065692613959ba27fba90ff460d7619ef6e31da5 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 08:51:26 +0000 Subject: [PATCH 20/28] use subclass --- mypyc/codegen/emit.py | 50 +++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 554f802fa344d..63d3c27b28d18 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -6,7 +6,7 @@ import sys import textwrap from collections.abc import Callable -from typing import Final +from typing import TYPE_CHECKING, Final from mypyc.codegen.cstring import c_string_initializer from mypyc.codegen.literals import Literals @@ -59,6 +59,9 @@ from mypyc.namegen import NameGenerator, exported_name from mypyc.sametype import is_same_type +if TYPE_CHECKING: + from _typeshed import SupportsWrite + # Whether to insert debug asserts for all error handling, to quickly # catch errors propagating without exceptions set. DEBUG_ERRORS: Final = False @@ -1281,7 +1284,8 @@ def pformat_deterministic(obj: object, width: int) -> str: pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] try: - return pprint.pformat(_normalize_sets(obj), compact=True, width=width) + printer = _DeterministicPrettyPrinter(width=width, compact=True, sort_dicts=True) + return printer.pformat(obj) finally: # Always restore the original key to avoid affecting other pprint users. pprint._safe_key = default_safe_key # type: ignore [attr-defined] @@ -1299,16 +1303,34 @@ def _mypyc_safe_key(obj: object) -> str: return str(type(obj)) + pprint.pformat(obj, compact=True, sort_dicts=True) -def _normalize_sets(obj: object) -> object: - """Recursively normalize sets/frozensets so pprint sees a stable order. +class _DeterministicPrettyPrinter(pprint.PrettyPrinter): + """PrettyPrinter that sorts set/frozenset elements deterministically.""" - We rebuild each set/frozenset from a deterministically sorted list of - elements (using _mypyc_safe_key), recursing into tuples those sets contain. - This keeps repr (used internally) output deterministic without otherwise changing content. - """ - if isinstance(obj, frozenset): - return frozenset(map(_normalize_sets, sorted(obj, key=_mypyc_safe_key))) - elif isinstance(obj, tuple): - return tuple(map(_normalize_sets, obj)) - else: - return obj + _dispatch = pprint.PrettyPrinter._dispatch.copy() + + def _pprint_set( + self, + object: set[object] | frozenset[object], + stream: "SupportsWrite[str]", + indent: int, + allowance: int, + context: dict[int, int], + level: int, + ) -> None: + if not object: + stream.write(repr(object)) + return + typ = type(object) + if typ is set: + stream.write("{") + endchar = "}" + else: + stream.write("frozenset({") + endchar = "})" + indent += len("frozenset(") + items = sorted(object, key=_mypyc_safe_key) + self._format_items(items, stream, indent, allowance + len(endchar), context, level) + stream.write(endchar) + + _dispatch[set.__repr__] = _pprint_set + _dispatch[frozenset.__repr__] = _pprint_set From 3b19181d96c03d589f6f9738a9567fa612b69962 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 6 Jan 2026 08:53:45 +0000 Subject: [PATCH 21/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/codegen/emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 63d3c27b28d18..cf648001b8245 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1311,7 +1311,7 @@ class _DeterministicPrettyPrinter(pprint.PrettyPrinter): def _pprint_set( self, object: set[object] | frozenset[object], - stream: "SupportsWrite[str]", + stream: SupportsWrite[str], indent: int, allowance: int, context: dict[int, int], From 09328d5affd274d1371c6746e5161f6efe9fcc85 Mon Sep 17 00:00:00 2001 From: BobTheBuidler Date: Tue, 6 Jan 2026 09:06:06 +0000 Subject: [PATCH 22/28] fix test --- mypyc/test/test_emit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py index c084b47584761..f52da1cd8757b 100644 --- a/mypyc/test/test_emit.py +++ b/mypyc/test/test_emit.py @@ -39,7 +39,7 @@ def test_nested_supported_literals(self) -> None: item_b = ("outer", 2, frozenset({("x", 3)})) literal_a = frozenset({item_a, item_b}) literal_b = frozenset({item_b, item_a}) - expected = "frozenset({('outer', 1, frozenset({('m', 0), ('n', 1)})), ('outer', 2, frozenset({('x', 3)}))})" + expected = "frozenset({('outer', 2, frozenset({('x', 3)})), ('outer', 1, frozenset({('n', 1), ('m', 0)}))})" assert pformat_deterministic(literal_a, 120) == expected assert pformat_deterministic(literal_b, 120) == expected From a76a89c7c6e75242bbadb1214c7ffc6c3d5ee908 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Fri, 20 Feb 2026 17:43:45 -0400 Subject: [PATCH 23/28] test(mypyc): add hash-seed stability checks for deterministic pformat output --- mypyc/test/test_emit.py | 62 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py index f52da1cd8757b..75f2358d5354b 100644 --- a/mypyc/test/test_emit.py +++ b/mypyc/test/test_emit.py @@ -1,6 +1,10 @@ from __future__ import annotations +import os import pprint +import subprocess +import sys +import textwrap import unittest from mypyc.codegen.emit import Emitter, EmitterContext, pformat_deterministic @@ -23,15 +27,31 @@ class TestPformatDeterministic(unittest.TestCase): + HASH_SEEDS = (1, 2, 3, 4, 5, 11, 19, 27) + + def run_with_hash_seed(self, script: str, seed: int) -> str: + env = dict(os.environ) + env["PYTHONHASHSEED"] = str(seed) + proc = subprocess.run( + [sys.executable, "-c", script], + capture_output=True, + check=True, + text=True, + env=env, + ) + return proc.stdout.strip() + def test_frozenset_elements_sorted(self) -> None: fs_small = frozenset({("a", 1)}) fs_large = frozenset({("a", 1), ("b", 2)}) literal_a = frozenset({fs_large, fs_small}) literal_b = frozenset({fs_small, fs_large}) - expected = "frozenset({frozenset({('b', 2), ('a', 1)}), frozenset({('a', 1)})})" + out_a = pformat_deterministic(literal_a, 80) + out_b = pformat_deterministic(literal_b, 80) - assert pformat_deterministic(literal_a, 80) == expected - assert pformat_deterministic(literal_b, 80) == expected + assert out_a == out_b + assert "frozenset({('a', 1)})" in out_a + assert "frozenset({('a', 1), ('b', 2)})" in out_a def test_nested_supported_literals(self) -> None: nested_frozen = frozenset({("m", 0), ("n", 1)}) @@ -39,16 +59,46 @@ def test_nested_supported_literals(self) -> None: item_b = ("outer", 2, frozenset({("x", 3)})) literal_a = frozenset({item_a, item_b}) literal_b = frozenset({item_b, item_a}) - expected = "frozenset({('outer', 2, frozenset({('x', 3)})), ('outer', 1, frozenset({('n', 1), ('m', 0)}))})" + out_a = pformat_deterministic(literal_a, 120) + out_b = pformat_deterministic(literal_b, 120) - assert pformat_deterministic(literal_a, 120) == expected - assert pformat_deterministic(literal_b, 120) == expected + assert out_a == out_b + assert "frozenset({('m', 0), ('n', 1)})" in out_a def test_restores_default_safe_key(self) -> None: original_safe_key = pprint._safe_key pformat_deterministic({"key": "value"}, 80) assert pprint._safe_key is original_safe_key + def test_frozenset_output_is_stable_across_hash_seeds(self) -> None: + script = textwrap.dedent( + """ + from mypyc.codegen.emit import pformat_deterministic + + fs_small = frozenset({("a", 1)}) + fs_large = frozenset({("a", 1), ("b", 2)}) + literal = frozenset({fs_small, fs_large}) + print(pformat_deterministic(literal, 80)) + """ + ) + outputs = {self.run_with_hash_seed(script, seed) for seed in self.HASH_SEEDS} + assert len(outputs) == 1 + + def test_nested_output_is_stable_across_hash_seeds(self) -> None: + script = textwrap.dedent( + """ + from mypyc.codegen.emit import pformat_deterministic + + nested_frozen = frozenset({("m", 0), ("n", 1)}) + item_a = ("outer", 1, nested_frozen) + item_b = ("outer", 2, frozenset({("x", 3)})) + literal = frozenset({item_a, item_b}) + print(pformat_deterministic(literal, 120)) + """ + ) + outputs = {self.run_with_hash_seed(script, seed) for seed in self.HASH_SEEDS} + assert len(outputs) == 1 + class TestEmitter(unittest.TestCase): def setUp(self) -> None: From 16931f90efa67a5ce6d9a7fdf126bdba29c6e304 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 20 Feb 2026 21:45:55 +0000 Subject: [PATCH 24/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/test/test_emit.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py index 75f2358d5354b..48df593579e19 100644 --- a/mypyc/test/test_emit.py +++ b/mypyc/test/test_emit.py @@ -33,11 +33,7 @@ def run_with_hash_seed(self, script: str, seed: int) -> str: env = dict(os.environ) env["PYTHONHASHSEED"] = str(seed) proc = subprocess.run( - [sys.executable, "-c", script], - capture_output=True, - check=True, - text=True, - env=env, + [sys.executable, "-c", script], capture_output=True, check=True, text=True, env=env ) return proc.stdout.strip() @@ -71,22 +67,19 @@ def test_restores_default_safe_key(self) -> None: assert pprint._safe_key is original_safe_key def test_frozenset_output_is_stable_across_hash_seeds(self) -> None: - script = textwrap.dedent( - """ + script = textwrap.dedent(""" from mypyc.codegen.emit import pformat_deterministic fs_small = frozenset({("a", 1)}) fs_large = frozenset({("a", 1), ("b", 2)}) literal = frozenset({fs_small, fs_large}) print(pformat_deterministic(literal, 80)) - """ - ) + """) outputs = {self.run_with_hash_seed(script, seed) for seed in self.HASH_SEEDS} assert len(outputs) == 1 def test_nested_output_is_stable_across_hash_seeds(self) -> None: - script = textwrap.dedent( - """ + script = textwrap.dedent(""" from mypyc.codegen.emit import pformat_deterministic nested_frozen = frozenset({("m", 0), ("n", 1)}) @@ -94,8 +87,7 @@ def test_nested_output_is_stable_across_hash_seeds(self) -> None: item_b = ("outer", 2, frozenset({("x", 3)})) literal = frozenset({item_a, item_b}) print(pformat_deterministic(literal, 120)) - """ - ) + """) outputs = {self.run_with_hash_seed(script, seed) for seed in self.HASH_SEEDS} assert len(outputs) == 1 From 9dd8b752b09515ac4b7dd4a3c64e3fc59361f62e Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Fri, 20 Feb 2026 17:47:17 -0400 Subject: [PATCH 25/28] fix(mypyc): stabilize nested frozenset literal formatting deterministically --- mypyc/codegen/emit.py | 140 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 114 insertions(+), 26 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 4633da2130f8a..01147570794cc 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1290,48 +1290,135 @@ def native_function_doc_initializer(func: FuncIR) -> str: def pformat_deterministic(obj: object, width: int) -> str: """Pretty-print `obj` with deterministic sorting for mypyc literal types.""" - # Temporarily override pprint._safe_key to get deterministic ordering of containers. - default_safe_key = pprint._safe_key # type: ignore [attr-defined] - pprint._safe_key = _mypyc_safe_key # type: ignore [attr-defined] - - try: - printer = _DeterministicPrettyPrinter(width=width, compact=True, sort_dicts=True) - return printer.pformat(obj) - finally: - # Always restore the original key to avoid affecting other pprint users. - pprint._safe_key = default_safe_key # type: ignore [attr-defined] - - -def _mypyc_safe_key(obj: object) -> str: - """A custom sort key implementation for pprint that makes the output deterministic - for all literal types supported by mypyc. + printer = _DeterministicPrettyPrinter(width=width, compact=True, sort_dicts=True) + return printer.pformat(obj) + + +def _mypyc_safe_key(obj: object) -> tuple[str, tuple[object, ...] | str]: + """Build a deterministic recursive key for sorting mypyc literal values.""" + typ = type(obj) + if typ is tuple: + return ("tuple", tuple(_mypyc_safe_key(item) for item in obj)) + if typ is list: + return ("list", tuple(_mypyc_safe_key(item) for item in obj)) + if typ is dict: + items = tuple( + sorted((_mypyc_safe_key(key), _mypyc_safe_key(value)) for key, value in obj.items()) + ) + return ("dict", items) + if typ is set: + return ("set", tuple(sorted(_mypyc_safe_key(item) for item in obj))) + if typ is frozenset: + return ("frozenset", tuple(sorted(_mypyc_safe_key(item) for item in obj))) + return (f"{typ.__module__}.{typ.__qualname__}", repr(obj)) - This is NOT safe for use as a sort key for other types, so we MUST replace the - original pprint._safe_key once we've pprinted our object. - Since this is a bit hacky, see for context https://github.com/python/mypy/pull/20012 - """ - return str(type(obj)) + pprint.pformat(obj, compact=True, sort_dicts=True) +def _recursion_repr(obj: object) -> str: + return f"" class _DeterministicPrettyPrinter(pprint.PrettyPrinter): - """PrettyPrinter that sorts set/frozenset elements deterministically.""" + """PrettyPrinter that uses deterministic sorting for literal containers.""" _dispatch = pprint.PrettyPrinter._dispatch.copy() + def _safe_repr( + self, obj: object, context: dict[int, int], maxlevels: int | None, level: int + ) -> tuple[str, bool, bool]: + typ = type(obj) + repr_fn = getattr(typ, "__repr__", None) + + if isinstance(obj, dict) and repr_fn is dict.__repr__: + if not obj: + return "{}", True, False + obj_id = id(obj) + if maxlevels and level >= maxlevels: + return "{...}", False, obj_id in context + if obj_id in context: + return _recursion_repr(obj), False, True + context[obj_id] = 1 + readable = True + recursive = False + components: list[str] = [] + level += 1 + items = ( + sorted(obj.items(), key=lambda item: _mypyc_safe_key(item[0])) + if self._sort_dicts + else obj.items() + ) + for key, value in items: + key_repr, key_readable, key_recursive = self.format(key, context, maxlevels, level) + value_repr, value_readable, value_recursive = self.format( + value, context, maxlevels, level + ) + components.append(f"{key_repr}: {value_repr}") + readable = readable and key_readable and value_readable + recursive = recursive or key_recursive or value_recursive + del context[obj_id] + return "{%s}" % ", ".join(components), readable, recursive + + if isinstance(obj, (set, frozenset)) and repr_fn is typ.__repr__: + if not obj: + return repr(obj), True, False + obj_id = id(obj) + if maxlevels and level >= maxlevels: + if typ is set: + return "{...}", False, obj_id in context + return "frozenset({...})", False, obj_id in context + if obj_id in context: + return _recursion_repr(obj), False, True + context[obj_id] = 1 + readable = True + recursive = False + components: list[str] = [] + level += 1 + for item in sorted(obj, key=_mypyc_safe_key): + item_repr, item_readable, item_recursive = self.format(item, context, maxlevels, level) + components.append(item_repr) + readable = readable and item_readable + recursive = recursive or item_recursive + del context[obj_id] + if typ is set: + return "{%s}" % ", ".join(components), readable, recursive + return "frozenset({%s})" % ", ".join(components), readable, recursive + + return super()._safe_repr(obj, context, maxlevels, level) + + def _pprint_dict( + self, + obj: dict[object, object], + stream: SupportsWrite[str], + indent: int, + allowance: int, + context: dict[int, int], + level: int, + ) -> None: + write = stream.write + write("{") + if self._indent_per_level > 1: + write((self._indent_per_level - 1) * " ") + if obj: + items = ( + sorted(obj.items(), key=lambda item: _mypyc_safe_key(item[0])) + if self._sort_dicts + else obj.items() + ) + self._format_dict_items(items, stream, indent, allowance + 1, context, level) + write("}") + def _pprint_set( self, - object: set[object] | frozenset[object], + obj: set[object] | frozenset[object], stream: SupportsWrite[str], indent: int, allowance: int, context: dict[int, int], level: int, ) -> None: - if not object: - stream.write(repr(object)) + if not obj: + stream.write(repr(obj)) return - typ = type(object) + typ = type(obj) if typ is set: stream.write("{") endchar = "}" @@ -1339,9 +1426,10 @@ def _pprint_set( stream.write("frozenset({") endchar = "})" indent += len("frozenset(") - items = sorted(object, key=_mypyc_safe_key) + items = sorted(obj, key=_mypyc_safe_key) self._format_items(items, stream, indent, allowance + len(endchar), context, level) stream.write(endchar) + _dispatch[dict.__repr__] = _pprint_dict _dispatch[set.__repr__] = _pprint_set _dispatch[frozenset.__repr__] = _pprint_set From fa97ab613d2c46bfd99a539f8c92152b57af663c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 20 Feb 2026 21:50:36 +0000 Subject: [PATCH 26/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/codegen/emit.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 01147570794cc..754a371af5a3a 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1373,7 +1373,9 @@ def _safe_repr( components: list[str] = [] level += 1 for item in sorted(obj, key=_mypyc_safe_key): - item_repr, item_readable, item_recursive = self.format(item, context, maxlevels, level) + item_repr, item_readable, item_recursive = self.format( + item, context, maxlevels, level + ) components.append(item_repr) readable = readable and item_readable recursive = recursive or item_recursive From a4f125357d7d52c8f0a828a5746286f757b33001 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Sat, 21 Feb 2026 00:03:04 +0000 Subject: [PATCH 27/28] fix(mypyc): type deterministic pprint hooks to unblock CI self-check --- mypyc/codegen/emit.py | 104 ++++++++++++++++++++++++++++++---------- mypyc/test/test_emit.py | 18 ++++--- 2 files changed, 91 insertions(+), 31 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 754a371af5a3a..aaea518aa01a7 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1294,21 +1294,24 @@ def pformat_deterministic(obj: object, width: int) -> str: return printer.pformat(obj) -def _mypyc_safe_key(obj: object) -> tuple[str, tuple[object, ...] | str]: +def _mypyc_safe_key(obj: object) -> tuple[str, object]: """Build a deterministic recursive key for sorting mypyc literal values.""" typ = type(obj) - if typ is tuple: + if isinstance(obj, tuple): return ("tuple", tuple(_mypyc_safe_key(item) for item in obj)) - if typ is list: + if isinstance(obj, list): return ("list", tuple(_mypyc_safe_key(item) for item in obj)) - if typ is dict: + if isinstance(obj, dict): items = tuple( - sorted((_mypyc_safe_key(key), _mypyc_safe_key(value)) for key, value in obj.items()) + sorted( + ((_mypyc_safe_key(key), _mypyc_safe_key(value)) for key, value in obj.items()), + key=lambda item: item[0], + ) ) return ("dict", items) - if typ is set: + if isinstance(obj, set): return ("set", tuple(sorted(_mypyc_safe_key(item) for item in obj))) - if typ is frozenset: + if isinstance(obj, frozenset): return ("frozenset", tuple(sorted(_mypyc_safe_key(item) for item in obj))) return (f"{typ.__module__}.{typ.__qualname__}", repr(obj)) @@ -1320,19 +1323,39 @@ def _recursion_repr(obj: object) -> str: class _DeterministicPrettyPrinter(pprint.PrettyPrinter): """PrettyPrinter that uses deterministic sorting for literal containers.""" - _dispatch = pprint.PrettyPrinter._dispatch.copy() + def __init__( + self, + indent: int = 1, + width: int = 80, + depth: int | None = None, + *, + compact: bool = False, + sort_dicts: bool = True, + ) -> None: + super().__init__( + indent=indent, width=width, depth=depth, compact=compact, sort_dicts=sort_dicts + ) + self.mypyc_indent_per_level = indent + self.mypyc_sort_dicts = sort_dicts + self.mypyc_width = width + + def format( + self, obj: object, context: dict[int, int], maxlevels: object, level: int + ) -> tuple[str, bool, bool]: + return self._safe_repr(obj, context, maxlevels, level) def _safe_repr( - self, obj: object, context: dict[int, int], maxlevels: int | None, level: int + self, obj: object, context: dict[int, int], maxlevels: object, level: int ) -> tuple[str, bool, bool]: typ = type(obj) repr_fn = getattr(typ, "__repr__", None) + maxlevels_int = maxlevels if isinstance(maxlevels, int) else 0 if isinstance(obj, dict) and repr_fn is dict.__repr__: if not obj: return "{}", True, False obj_id = id(obj) - if maxlevels and level >= maxlevels: + if maxlevels_int and level >= maxlevels_int: return "{...}", False, obj_id in context if obj_id in context: return _recursion_repr(obj), False, True @@ -1343,7 +1366,7 @@ def _safe_repr( level += 1 items = ( sorted(obj.items(), key=lambda item: _mypyc_safe_key(item[0])) - if self._sort_dicts + if self.mypyc_sort_dicts else obj.items() ) for key, value in items: @@ -1361,7 +1384,7 @@ def _safe_repr( if not obj: return repr(obj), True, False obj_id = id(obj) - if maxlevels and level >= maxlevels: + if maxlevels_int and level >= maxlevels_int: if typ is set: return "{...}", False, obj_id in context return "frozenset({...})", False, obj_id in context @@ -1370,21 +1393,56 @@ def _safe_repr( context[obj_id] = 1 readable = True recursive = False - components: list[str] = [] + set_components: list[str] = [] level += 1 for item in sorted(obj, key=_mypyc_safe_key): item_repr, item_readable, item_recursive = self.format( item, context, maxlevels, level ) - components.append(item_repr) + set_components.append(item_repr) readable = readable and item_readable recursive = recursive or item_recursive del context[obj_id] if typ is set: - return "{%s}" % ", ".join(components), readable, recursive - return "frozenset({%s})" % ", ".join(components), readable, recursive + return "{%s}" % ", ".join(set_components), readable, recursive + return "frozenset({%s})" % ", ".join(set_components), readable, recursive - return super()._safe_repr(obj, context, maxlevels, level) + return super()._safe_repr(obj, context, maxlevels_int, level) + + def _format( + self, + obj: object, + stream: SupportsWrite[str], + indent: int, + allowance: int, + context: dict[int, int], + level: int, + ) -> None: + typ = type(obj) + if typ not in (dict, set, frozenset): + super()._format(obj, stream, indent, allowance, context, level) + return + + obj_id = id(obj) + if obj_id in context: + stream.write(_recursion_repr(obj)) + return + + rep = self._repr(obj, context, level) + max_width = self.mypyc_width - indent - allowance + if len(rep) > max_width: + context[obj_id] = 1 + try: + if isinstance(obj, dict): + self._pprint_dict(obj, stream, indent, allowance, context, level + 1) + elif isinstance(obj, (set, frozenset)): + self._pprint_set(obj, stream, indent, allowance, context, level + 1) + else: + assert False, "unreachable: _format only handles dict/set/frozenset here" + finally: + del context[obj_id] + return + stream.write(rep) def _pprint_dict( self, @@ -1397,13 +1455,13 @@ def _pprint_dict( ) -> None: write = stream.write write("{") - if self._indent_per_level > 1: - write((self._indent_per_level - 1) * " ") + if self.mypyc_indent_per_level > 1: + write((self.mypyc_indent_per_level - 1) * " ") if obj: items = ( sorted(obj.items(), key=lambda item: _mypyc_safe_key(item[0])) - if self._sort_dicts - else obj.items() + if self.mypyc_sort_dicts + else list(obj.items()) ) self._format_dict_items(items, stream, indent, allowance + 1, context, level) write("}") @@ -1431,7 +1489,3 @@ def _pprint_set( items = sorted(obj, key=_mypyc_safe_key) self._format_items(items, stream, indent, allowance + len(endchar), context, level) stream.write(endchar) - - _dispatch[dict.__repr__] = _pprint_dict - _dispatch[set.__repr__] = _pprint_set - _dispatch[frozenset.__repr__] = _pprint_set diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py index 48df593579e19..7523958af42bf 100644 --- a/mypyc/test/test_emit.py +++ b/mypyc/test/test_emit.py @@ -62,24 +62,29 @@ def test_nested_supported_literals(self) -> None: assert "frozenset({('m', 0), ('n', 1)})" in out_a def test_restores_default_safe_key(self) -> None: - original_safe_key = pprint._safe_key + sample = {"beta": [2, 1], "alpha": [3, 4]} + before = pprint.pformat(sample, width=80, compact=True, sort_dicts=True) pformat_deterministic({"key": "value"}, 80) - assert pprint._safe_key is original_safe_key + after = pprint.pformat(sample, width=80, compact=True, sort_dicts=True) + assert after == before def test_frozenset_output_is_stable_across_hash_seeds(self) -> None: - script = textwrap.dedent(""" + script = textwrap.dedent( + """ from mypyc.codegen.emit import pformat_deterministic fs_small = frozenset({("a", 1)}) fs_large = frozenset({("a", 1), ("b", 2)}) literal = frozenset({fs_small, fs_large}) print(pformat_deterministic(literal, 80)) - """) + """ + ) outputs = {self.run_with_hash_seed(script, seed) for seed in self.HASH_SEEDS} assert len(outputs) == 1 def test_nested_output_is_stable_across_hash_seeds(self) -> None: - script = textwrap.dedent(""" + script = textwrap.dedent( + """ from mypyc.codegen.emit import pformat_deterministic nested_frozen = frozenset({("m", 0), ("n", 1)}) @@ -87,7 +92,8 @@ def test_nested_output_is_stable_across_hash_seeds(self) -> None: item_b = ("outer", 2, frozenset({("x", 3)})) literal = frozenset({item_a, item_b}) print(pformat_deterministic(literal, 120)) - """) + """ + ) outputs = {self.run_with_hash_seed(script, seed) for seed in self.HASH_SEEDS} assert len(outputs) == 1 From 38113c36a3f32a4d7c299c7080f0cc2e197b7766 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 21 Feb 2026 00:10:30 +0000 Subject: [PATCH 28/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/test/test_emit.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py index 7523958af42bf..c743dced359a4 100644 --- a/mypyc/test/test_emit.py +++ b/mypyc/test/test_emit.py @@ -69,22 +69,19 @@ def test_restores_default_safe_key(self) -> None: assert after == before def test_frozenset_output_is_stable_across_hash_seeds(self) -> None: - script = textwrap.dedent( - """ + script = textwrap.dedent(""" from mypyc.codegen.emit import pformat_deterministic fs_small = frozenset({("a", 1)}) fs_large = frozenset({("a", 1), ("b", 2)}) literal = frozenset({fs_small, fs_large}) print(pformat_deterministic(literal, 80)) - """ - ) + """) outputs = {self.run_with_hash_seed(script, seed) for seed in self.HASH_SEEDS} assert len(outputs) == 1 def test_nested_output_is_stable_across_hash_seeds(self) -> None: - script = textwrap.dedent( - """ + script = textwrap.dedent(""" from mypyc.codegen.emit import pformat_deterministic nested_frozen = frozenset({("m", 0), ("n", 1)}) @@ -92,8 +89,7 @@ def test_nested_output_is_stable_across_hash_seeds(self) -> None: item_b = ("outer", 2, frozenset({("x", 3)})) literal = frozenset({item_a, item_b}) print(pformat_deterministic(literal, 120)) - """ - ) + """) outputs = {self.run_with_hash_seed(script, seed) for seed in self.HASH_SEEDS} assert len(outputs) == 1