From add8b255832d6ec9352e1cc2e82490a37e6c0cca Mon Sep 17 00:00:00 2001 From: beauxq Date: Sat, 1 Nov 2025 09:36:16 -0700 Subject: [PATCH 1/8] start towards patching `float | int` into typeshed --- build/py3_8/generate_docstubs.py | 194 ++++++++++++++++++++++++++++++- 1 file changed, 193 insertions(+), 1 deletion(-) diff --git a/build/py3_8/generate_docstubs.py b/build/py3_8/generate_docstubs.py index 2d1ba719e9..4b6dd4b831 100644 --- a/build/py3_8/generate_docstubs.py +++ b/build/py3_8/generate_docstubs.py @@ -1,9 +1,200 @@ from __future__ import annotations +import ast from pathlib import Path from shutil import copytree, rmtree from docify import main as docify # pyright:ignore[reportMissingTypeStubs] +from typing_extensions import override + +KEEP_FLOAT = frozenset(( + "stdlib/math.pyi/sqrt", + "stdlib/math.pyi/e", + "stdlib/math.pyi/pi", + "stdlib/math.pyi/inf", + "stdlib/math.pyi/nan", + "stdlib/math.pyi/tau", +)) + + +def name_for_target(node: ast.AnnAssign) -> str: + return ( + node.target.id + if isinstance(node.target, ast.Name) + else node.target.attr + if isinstance(node.target, ast.Attribute) + else "subscript" + ) + + +def name_for_node( + node: ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef | ast.arg | ast.Name | ast.AnnAssign +) -> str: + return ( + node.name + if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)) + else node.arg + if isinstance(node, ast.arg) + else name_for_target(node) + if isinstance(node, ast.AnnAssign) + else node.id + ) + + +def has_int_child(node: ast.BinOp) -> bool: + if isinstance(node.right, ast.Name) and node.right.id == "int": + return True + if isinstance(node.left, ast.Name) and node.left.id == "int": + return True + if isinstance(node.right, ast.BinOp): + # assuming "|" is the only BinOp in annotations + assert isinstance(node.right.op, ast.BitOr), node.right.op + if has_int_child(node.right): + return True + if isinstance(node.left, ast.BinOp): + assert isinstance(node.left.op, ast.BitOr), node.left.op + if has_int_child(node.left): + return True + return False + + +class AnnotationTrackingVisitor(ast.NodeVisitor): + parent_stack: list[ast.AST] + in_ann: str | None = None + floats: list[ast.Name] + module: str + + def __init__(self, module: str) -> None: + self.parent_stack = [] + self.floats = [] + self.module = module + + @override + def visit(self, node: ast.AST) -> None: + self.parent_stack.append(node) + super().visit(node) + _ = self.parent_stack.pop() + + @override + def visit_AnnAssign(self, node: ast.AnnAssign) -> None: + assert len(ns := list(ast.iter_fields(node))) == 4 and not isinstance(ns[3], ast.AST), ns + # I don't know what the 4th field "simple" is, but it's not an AST. + + self.visit(node.target) + + self.in_ann = name_for_target(node) + self.visit(node.annotation) + self.in_ann = None + + if node.value: + self.visit(node.value) + + @override + def visit_arg(self, node: ast.arg) -> None: + # arg name str, annotation, type comment str + assert len(list(ast.iter_fields(node))) == 3, list(ast.iter_fields(node)) + + self.in_ann = node.arg + if node.annotation: + self.visit(node.annotation) + self.in_ann = None + + # NOTE: assuming function return values are actually float if annotated as such. + # If we don't want to assume that, uncomment this: + + # @override + # def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + # # Copied from implementation of base generic_visit + # # and modified for "returns" + # for field, value in ast.iter_fields(node): + # if isinstance(value, list): + # for item in value: + # if isinstance(item, ast.AST): + # self.visit(item) + # elif isinstance(value, ast.AST): + # if field == "returns": + # self.in_ann = "returns" + # self.visit(value) + # if field == "returns": + # self.in_ann = None + + def _node_path(self) -> str: + strs = [ + name_for_node(n) + for n in self.parent_stack + if isinstance(n, ( + ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef, ast.arg, ast.Name, ast.AnnAssign + )) + ] + if len(strs) > 0 and strs[-1] == "float": + _ = strs.pop() + return self.module + "/" + ".".join(strs) + + def _with_int(self) -> bool: + assert isinstance(self.parent_stack[-1], ast.Name) and self.parent_stack[-1].id == "float", self.parent_stack + index = len(self.parent_stack) - 2 + while index >= 0: + traverse_node = self.parent_stack[index] + if not isinstance(traverse_node, ast.BinOp): + return False + # assuming "|" is the only BinOp in annotations + assert isinstance(traverse_node.op, ast.BitOr), traverse_node.op + if has_int_child(traverse_node): + return True + index -= 1 + return False + + def _is_final(self) -> bool: + assert isinstance(self.parent_stack[-1], ast.Name) and self.parent_stack[-1].id == "float", self.parent_stack + if len(self.parent_stack) > 1: + parent = self.parent_stack[-2] + if isinstance(parent, ast.Subscript) and isinstance(parent.value, ast.Name) and parent.value.id == "Final": + assert parent.slice is self.parent_stack[-1], parent.slice + return True + return False + + @override + def generic_visit(self, node: ast.AST) -> None: + if self.in_ann is not None and isinstance(node, ast.Name) and node.id == "float": + assert node is self.parent_stack[-1], self.parent_stack + + if self._with_int() or self._is_final(): + # There's already some already float | int in typeshed + # and assuming `Final[float]` is really float + # print(self._node_path()) + pass + else: + node_path = self._node_path() + # if node_path == "stubs/aiofiles/aiofiles/ospath.pyi/float": + # print("break point for debugging") + + if node_path not in KEEP_FLOAT: + self.floats.append(node) + super().generic_visit(node) + + +def float_expand(stubs_with_docs_path: Path) -> None: + """ change stubs in the given directory from `float` to `float | int` """ + import os + + for dir_path, _dir_names, file_names in os.walk(stubs_with_docs_path): + for file_name in file_names: + if not file_name.endswith(".pyi"): + continue + file_path = os.path.join(dir_path, file_name) + rel_path = Path(os.path.relpath(file_path, stubs_with_docs_path)).as_posix() + file_bytes = Path(file_path).read_bytes() + file_parsed = ast.parse(file_bytes) + v = AnnotationTrackingVisitor(rel_path) + v.visit(file_parsed) + + if len(v.floats) > 0: + print(file_path) + v.floats.sort(key=lambda n: (n.lineno, n.col_offset)) + for fl in v.floats: + assert fl.end_lineno == fl.lineno, fl # always within 1 line + assert fl.end_col_offset == fl.col_offset + 5, fl # always "float" 5 chars + print(f"{file_path}:{fl.lineno}") def main(*, overwrite: bool): @@ -29,7 +220,8 @@ def main(*, overwrite: bool): copytree(stubs_path, stubs_with_docs_path, dirs_exist_ok=False) elif not stubs_with_docs_path.exists(): copytree(stubs_path, stubs_with_docs_path, dirs_exist_ok=True) - docify([str(stubs_with_docs_path / "stdlib"), "--if-needed", "--in-place"]) + float_expand(stubs_with_docs_path) + # docify([str(stubs_with_docs_path / "stdlib"), "--if-needed", "--in-place"]) if __name__ == "__main__": From 8903491e872c40ee89967d754205ce4be5ee73a2 Mon Sep 17 00:00:00 2001 From: beauxq Date: Sat, 1 Nov 2025 10:53:53 -0700 Subject: [PATCH 2/8] file modification and cleaning --- build/py3_8/generate_docstubs.py | 48 +++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/build/py3_8/generate_docstubs.py b/build/py3_8/generate_docstubs.py index 4b6dd4b831..af648d99a5 100644 --- a/build/py3_8/generate_docstubs.py +++ b/build/py3_8/generate_docstubs.py @@ -8,13 +8,15 @@ from typing_extensions import override KEEP_FLOAT = frozenset(( - "stdlib/math.pyi/sqrt", - "stdlib/math.pyi/e", - "stdlib/math.pyi/pi", - "stdlib/math.pyi/inf", - "stdlib/math.pyi/nan", - "stdlib/math.pyi/tau", + # Example: + # If we didn't want to change the type of the `priority` parameter + # of the `register` function in the `Registry` class of markdown/util.pyi + + # "stubs/Markdown/markdown/util.pyi/Registry.register.priority", + + # See implementation `_node_path` for details on this identifier string. )) +""" identifiers for `float` that we don't want to change to `float | int` """ def name_for_target(node: ast.AnnAssign) -> str: @@ -77,7 +79,7 @@ def visit(self, node: ast.AST) -> None: @override def visit_AnnAssign(self, node: ast.AnnAssign) -> None: - assert len(ns := list(ast.iter_fields(node))) == 4 and not isinstance(ns[3], ast.AST), ns + assert len(list(ast.iter_fields(node))) == 4, list(ast.iter_fields(node)) # I don't know what the 4th field "simple" is, but it's not an AST. self.visit(node.target) @@ -101,6 +103,7 @@ def visit_arg(self, node: ast.arg) -> None: # NOTE: assuming function return values are actually float if annotated as such. # If we don't want to assume that, uncomment this: + # (probably would also want `visit_AsyncFunctionDef`) # @override # def visit_FunctionDef(self, node: ast.FunctionDef) -> None: @@ -119,6 +122,7 @@ def visit_arg(self, node: ast.arg) -> None: # self.in_ann = None def _node_path(self) -> str: + """ a string that identifies the current node (from `self.parent_stack`) """ strs = [ name_for_node(n) for n in self.parent_stack @@ -131,6 +135,7 @@ def _node_path(self) -> str: return self.module + "/" + ".".join(strs) def _with_int(self) -> bool: + """ `float | int` already """ assert isinstance(self.parent_stack[-1], ast.Name) and self.parent_stack[-1].id == "float", self.parent_stack index = len(self.parent_stack) - 2 while index >= 0: @@ -190,11 +195,28 @@ def float_expand(stubs_with_docs_path: Path) -> None: if len(v.floats) > 0: print(file_path) - v.floats.sort(key=lambda n: (n.lineno, n.col_offset)) - for fl in v.floats: - assert fl.end_lineno == fl.lineno, fl # always within 1 line - assert fl.end_col_offset == fl.col_offset + 5, fl # always "float" 5 chars - print(f"{file_path}:{fl.lineno}") + # compute start offset of each line in file + lines = file_bytes.split(b"\n") + line_starts = [0] + for line in lines: + line_starts.append(line_starts[-1] + len(line) + 1) # +1 for newline + + # process in reverse order to avoid offset changes affecting subsequent replacements + v.floats.sort(key=lambda n: (n.lineno, n.col_offset), reverse=True) + for fl in v.floats: + assert fl.end_lineno == fl.lineno, fl # always within 1 line + assert fl.end_col_offset and fl.end_col_offset == fl.col_offset + 5, fl # always "float" 5 chars + + # calculate offsets in file (from offsets in line) + line_start = line_starts[fl.lineno - 1] + start_offset = line_start + fl.col_offset + end_offset = line_start + fl.end_col_offset + + assert file_bytes[start_offset:end_offset] == b"float", file_bytes[start_offset:end_offset] + + file_bytes = file_bytes[:start_offset] + b"float | int" + file_bytes[end_offset:] + + _ = Path(file_path).write_bytes(file_bytes) def main(*, overwrite: bool): @@ -221,7 +243,7 @@ def main(*, overwrite: bool): elif not stubs_with_docs_path.exists(): copytree(stubs_path, stubs_with_docs_path, dirs_exist_ok=True) float_expand(stubs_with_docs_path) - # docify([str(stubs_with_docs_path / "stdlib"), "--if-needed", "--in-place"]) + docify([str(stubs_with_docs_path / "stdlib"), "--if-needed", "--in-place"]) if __name__ == "__main__": From 112981748dc2069dad3a6eda2d1a30ba08ab8885 Mon Sep 17 00:00:00 2001 From: beauxq Date: Sat, 1 Nov 2025 10:58:44 -0700 Subject: [PATCH 3/8] ruff format --- build/py3_8/generate_docstubs.py | 55 +++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/build/py3_8/generate_docstubs.py b/build/py3_8/generate_docstubs.py index af648d99a5..77a3dd2e75 100644 --- a/build/py3_8/generate_docstubs.py +++ b/build/py3_8/generate_docstubs.py @@ -11,9 +11,7 @@ # Example: # If we didn't want to change the type of the `priority` parameter # of the `register` function in the `Registry` class of markdown/util.pyi - # "stubs/Markdown/markdown/util.pyi/Registry.register.priority", - # See implementation `_node_path` for details on this identifier string. )) """ identifiers for `float` that we don't want to change to `float | int` """ @@ -30,7 +28,12 @@ def name_for_target(node: ast.AnnAssign) -> str: def name_for_node( - node: ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef | ast.arg | ast.Name | ast.AnnAssign + node: ast.ClassDef + | ast.FunctionDef + | ast.AsyncFunctionDef + | ast.arg + | ast.Name + | ast.AnnAssign, ) -> str: return ( node.name @@ -122,21 +125,31 @@ def visit_arg(self, node: ast.arg) -> None: # self.in_ann = None def _node_path(self) -> str: - """ a string that identifies the current node (from `self.parent_stack`) """ + """a string that identifies the current node (from `self.parent_stack`)""" strs = [ name_for_node(n) for n in self.parent_stack - if isinstance(n, ( - ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef, ast.arg, ast.Name, ast.AnnAssign - )) + if isinstance( + n, + ( + ast.ClassDef, + ast.FunctionDef, + ast.AsyncFunctionDef, + ast.arg, + ast.Name, + ast.AnnAssign, + ), + ) ] if len(strs) > 0 and strs[-1] == "float": _ = strs.pop() return self.module + "/" + ".".join(strs) def _with_int(self) -> bool: - """ `float | int` already """ - assert isinstance(self.parent_stack[-1], ast.Name) and self.parent_stack[-1].id == "float", self.parent_stack + """`float | int` already""" + assert ( + isinstance(self.parent_stack[-1], ast.Name) and self.parent_stack[-1].id == "float" + ), self.parent_stack index = len(self.parent_stack) - 2 while index >= 0: traverse_node = self.parent_stack[index] @@ -150,10 +163,16 @@ def _with_int(self) -> bool: return False def _is_final(self) -> bool: - assert isinstance(self.parent_stack[-1], ast.Name) and self.parent_stack[-1].id == "float", self.parent_stack + assert ( + isinstance(self.parent_stack[-1], ast.Name) and self.parent_stack[-1].id == "float" + ), self.parent_stack if len(self.parent_stack) > 1: parent = self.parent_stack[-2] - if isinstance(parent, ast.Subscript) and isinstance(parent.value, ast.Name) and parent.value.id == "Final": + if ( + isinstance(parent, ast.Subscript) + and isinstance(parent.value, ast.Name) + and parent.value.id == "Final" + ): assert parent.slice is self.parent_stack[-1], parent.slice return True return False @@ -179,7 +198,7 @@ def generic_visit(self, node: ast.AST) -> None: def float_expand(stubs_with_docs_path: Path) -> None: - """ change stubs in the given directory from `float` to `float | int` """ + """change stubs in the given directory from `float` to `float | int`""" import os for dir_path, _dir_names, file_names in os.walk(stubs_with_docs_path): @@ -205,16 +224,22 @@ def float_expand(stubs_with_docs_path: Path) -> None: v.floats.sort(key=lambda n: (n.lineno, n.col_offset), reverse=True) for fl in v.floats: assert fl.end_lineno == fl.lineno, fl # always within 1 line - assert fl.end_col_offset and fl.end_col_offset == fl.col_offset + 5, fl # always "float" 5 chars + assert ( + fl.end_col_offset and fl.end_col_offset == fl.col_offset + 5 + ) # always "float" 5 chars # calculate offsets in file (from offsets in line) line_start = line_starts[fl.lineno - 1] start_offset = line_start + fl.col_offset end_offset = line_start + fl.end_col_offset - assert file_bytes[start_offset:end_offset] == b"float", file_bytes[start_offset:end_offset] + assert file_bytes[start_offset:end_offset] == b"float", file_bytes[ + start_offset:end_offset + ] - file_bytes = file_bytes[:start_offset] + b"float | int" + file_bytes[end_offset:] + file_bytes = ( + file_bytes[:start_offset] + b"float | int" + file_bytes[end_offset:] + ) _ = Path(file_path).write_bytes(file_bytes) From b2467817d06e59c996d7a3e3819c9b38b3ce2cdc Mon Sep 17 00:00:00 2001 From: beauxq Date: Sat, 1 Nov 2025 11:51:25 -0700 Subject: [PATCH 4/8] annotate `KEEP_FLOAT` --- build/py3_8/generate_docstubs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build/py3_8/generate_docstubs.py b/build/py3_8/generate_docstubs.py index 77a3dd2e75..70977addb7 100644 --- a/build/py3_8/generate_docstubs.py +++ b/build/py3_8/generate_docstubs.py @@ -3,11 +3,12 @@ import ast from pathlib import Path from shutil import copytree, rmtree +from typing import Final from docify import main as docify # pyright:ignore[reportMissingTypeStubs] from typing_extensions import override -KEEP_FLOAT = frozenset(( +KEEP_FLOAT: Final[frozenset[str]] = frozenset(( # Example: # If we didn't want to change the type of the `priority` parameter # of the `register` function in the `Registry` class of markdown/util.pyi From e1b1a9b7185ed684115bc18e01d7ef2e0f0cfe60 Mon Sep 17 00:00:00 2001 From: beauxq Date: Sat, 1 Nov 2025 13:01:58 -0700 Subject: [PATCH 5/8] some of what ruff finds --- build/py3_8/generate_docstubs.py | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/build/py3_8/generate_docstubs.py b/build/py3_8/generate_docstubs.py index 70977addb7..8e4f0b840c 100644 --- a/build/py3_8/generate_docstubs.py +++ b/build/py3_8/generate_docstubs.py @@ -1,6 +1,7 @@ from __future__ import annotations import ast +import os from pathlib import Path from shutil import copytree, rmtree from typing import Final @@ -148,9 +149,8 @@ def _node_path(self) -> str: def _with_int(self) -> bool: """`float | int` already""" - assert ( - isinstance(self.parent_stack[-1], ast.Name) and self.parent_stack[-1].id == "float" - ), self.parent_stack + assert isinstance(self.parent_stack[-1], ast.Name), self.parent_stack + assert self.parent_stack[-1].id == "float", self.parent_stack[-1].id index = len(self.parent_stack) - 2 while index >= 0: traverse_node = self.parent_stack[index] @@ -164,9 +164,8 @@ def _with_int(self) -> bool: return False def _is_final(self) -> bool: - assert ( - isinstance(self.parent_stack[-1], ast.Name) and self.parent_stack[-1].id == "float" - ), self.parent_stack + assert isinstance(self.parent_stack[-1], ast.Name), self.parent_stack + assert self.parent_stack[-1].id == "float", self.parent_stack if len(self.parent_stack) > 1: parent = self.parent_stack[-2] if ( @@ -186,13 +185,9 @@ def generic_visit(self, node: ast.AST) -> None: if self._with_int() or self._is_final(): # There's already some already float | int in typeshed # and assuming `Final[float]` is really float - # print(self._node_path()) pass else: node_path = self._node_path() - # if node_path == "stubs/aiofiles/aiofiles/ospath.pyi/float": - # print("break point for debugging") - if node_path not in KEEP_FLOAT: self.floats.append(node) super().generic_visit(node) @@ -200,13 +195,11 @@ def generic_visit(self, node: ast.AST) -> None: def float_expand(stubs_with_docs_path: Path) -> None: """change stubs in the given directory from `float` to `float | int`""" - import os - for dir_path, _dir_names, file_names in os.walk(stubs_with_docs_path): for file_name in file_names: if not file_name.endswith(".pyi"): continue - file_path = os.path.join(dir_path, file_name) + file_path = Path(dir_path) / file_name rel_path = Path(os.path.relpath(file_path, stubs_with_docs_path)).as_posix() file_bytes = Path(file_path).read_bytes() file_parsed = ast.parse(file_bytes) @@ -214,7 +207,7 @@ def float_expand(stubs_with_docs_path: Path) -> None: v.visit(file_parsed) if len(v.floats) > 0: - print(file_path) + print(file_path.as_posix()) # compute start offset of each line in file lines = file_bytes.split(b"\n") line_starts = [0] @@ -225,9 +218,8 @@ def float_expand(stubs_with_docs_path: Path) -> None: v.floats.sort(key=lambda n: (n.lineno, n.col_offset), reverse=True) for fl in v.floats: assert fl.end_lineno == fl.lineno, fl # always within 1 line - assert ( - fl.end_col_offset and fl.end_col_offset == fl.col_offset + 5 - ) # always "float" 5 chars + assert fl.end_col_offset is not None + assert fl.end_col_offset == fl.col_offset + 5 # always "float" 5 chars # calculate offsets in file (from offsets in line) line_start = line_starts[fl.lineno - 1] From 3421c1f85afe1f40cde9c2bc905317e07db966a4 Mon Sep 17 00:00:00 2001 From: beauxq Date: Mon, 3 Nov 2025 19:29:22 -0800 Subject: [PATCH 6/8] remaining ruff rules --- build/py3_8/generate_docstubs.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/build/py3_8/generate_docstubs.py b/build/py3_8/generate_docstubs.py index 8e4f0b840c..8832c874d9 100644 --- a/build/py3_8/generate_docstubs.py +++ b/build/py3_8/generate_docstubs.py @@ -2,6 +2,7 @@ import ast import os +from collections.abc import Iterable from pathlib import Path from shutil import copytree, rmtree from typing import Final @@ -107,24 +108,24 @@ def visit_arg(self, node: ast.arg) -> None: self.in_ann = None # NOTE: assuming function return values are actually float if annotated as such. - # If we don't want to assume that, uncomment this: + # If we don't want to assume that, rename this to `visit_FunctionDef`: # (probably would also want `visit_AsyncFunctionDef`) # @override - # def visit_FunctionDef(self, node: ast.FunctionDef) -> None: - # # Copied from implementation of base generic_visit - # # and modified for "returns" - # for field, value in ast.iter_fields(node): - # if isinstance(value, list): - # for item in value: - # if isinstance(item, ast.AST): - # self.visit(item) - # elif isinstance(value, ast.AST): - # if field == "returns": - # self.in_ann = "returns" - # self.visit(value) - # if field == "returns": - # self.in_ann = None + def _unused(self, node: ast.FunctionDef) -> None: + # Copied from implementation of base generic_visit + # and modified for "returns" + for field, value in ast.iter_fields(node): # pyright: ignore[reportAny] + if isinstance(value, ast.AST): + if field == "returns": + self.in_ann = "returns" + self.visit(value) + if field == "returns": + self.in_ann = None + elif isinstance(value, Iterable): + for item in value: + if isinstance(item, ast.AST): + self.visit(item) def _node_path(self) -> str: """a string that identifies the current node (from `self.parent_stack`)""" From 91a1aab9ace129c8b98389abcdb875dfb3c9db81 Mon Sep 17 00:00:00 2001 From: beauxq Date: Mon, 3 Nov 2025 19:40:12 -0800 Subject: [PATCH 7/8] move to new file --- build/py3_8/float_int_stubs.py | 241 +++++++++++++++++++++++++++++++ build/py3_8/generate_docstubs.py | 233 ------------------------------ 2 files changed, 241 insertions(+), 233 deletions(-) create mode 100644 build/py3_8/float_int_stubs.py diff --git a/build/py3_8/float_int_stubs.py b/build/py3_8/float_int_stubs.py new file mode 100644 index 0000000000..cc7685c034 --- /dev/null +++ b/build/py3_8/float_int_stubs.py @@ -0,0 +1,241 @@ +from __future__ import annotations + +import ast +import os +from collections.abc import Iterable +from pathlib import Path +from typing import Final + +from typing_extensions import override + +KEEP_FLOAT: Final[frozenset[str]] = frozenset(( + # Example: + # If we didn't want to change the type of the `priority` parameter + # of the `register` function in the `Registry` class of markdown/util.pyi + # "stubs/Markdown/markdown/util.pyi/Registry.register.priority", + # See implementation `_node_path` for details on this identifier string. +)) +""" identifiers for `float` that we don't want to change to `float | int` """ + + +def name_for_target(node: ast.AnnAssign) -> str: + return ( + node.target.id + if isinstance(node.target, ast.Name) + else node.target.attr + if isinstance(node.target, ast.Attribute) + else "subscript" + ) + + +def name_for_node( + node: ast.ClassDef + | ast.FunctionDef + | ast.AsyncFunctionDef + | ast.arg + | ast.Name + | ast.AnnAssign, +) -> str: + return ( + node.name + if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)) + else node.arg + if isinstance(node, ast.arg) + else name_for_target(node) + if isinstance(node, ast.AnnAssign) + else node.id + ) + + +def has_int_child(node: ast.BinOp) -> bool: + if isinstance(node.right, ast.Name) and node.right.id == "int": + return True + if isinstance(node.left, ast.Name) and node.left.id == "int": + return True + if isinstance(node.right, ast.BinOp): + # assuming "|" is the only BinOp in annotations + assert isinstance(node.right.op, ast.BitOr), node.right.op + if has_int_child(node.right): + return True + if isinstance(node.left, ast.BinOp): + assert isinstance(node.left.op, ast.BitOr), node.left.op + if has_int_child(node.left): + return True + return False + + +class AnnotationTrackingVisitor(ast.NodeVisitor): + parent_stack: list[ast.AST] + in_ann: str | None = None + floats: list[ast.Name] + module: str + + def __init__(self, module: str) -> None: + self.parent_stack = [] + self.floats = [] + self.module = module + + @override + def visit(self, node: ast.AST) -> None: + self.parent_stack.append(node) + super().visit(node) + _ = self.parent_stack.pop() + + @override + def visit_AnnAssign(self, node: ast.AnnAssign) -> None: + assert len(list(ast.iter_fields(node))) == 4, list(ast.iter_fields(node)) + # I don't know what the 4th field "simple" is, but it's not an AST. + + self.visit(node.target) + + self.in_ann = name_for_target(node) + self.visit(node.annotation) + self.in_ann = None + + if node.value: + self.visit(node.value) + + @override + def visit_arg(self, node: ast.arg) -> None: + # arg name str, annotation, type comment str + assert len(list(ast.iter_fields(node))) == 3, list(ast.iter_fields(node)) + + self.in_ann = node.arg + if node.annotation: + self.visit(node.annotation) + self.in_ann = None + + # NOTE: assuming function return values are actually float if annotated as such. + # If we don't want to assume that, rename this to `visit_FunctionDef`: + # (probably would also want `visit_AsyncFunctionDef`) + + # @override + def _unused(self, node: ast.FunctionDef) -> None: + # Copied from implementation of base generic_visit + # and modified for "returns" + for field, value in ast.iter_fields(node): # pyright: ignore[reportAny] + if isinstance(value, ast.AST): + if field == "returns": + self.in_ann = "returns" + self.visit(value) + if field == "returns": + self.in_ann = None + elif isinstance(value, Iterable): + for item in value: + if isinstance(item, ast.AST): + self.visit(item) + + def _node_path(self) -> str: + """a string that identifies the current node (from `self.parent_stack`)""" + strs = [ + name_for_node(n) + for n in self.parent_stack + if isinstance( + n, + ( + ast.ClassDef, + ast.FunctionDef, + ast.AsyncFunctionDef, + ast.arg, + ast.Name, + ast.AnnAssign, + ), + ) + ] + if len(strs) > 0 and strs[-1] == "float": + _ = strs.pop() + return self.module + "/" + ".".join(strs) + + def _with_int(self) -> bool: + """`float | int` already""" + assert isinstance(self.parent_stack[-1], ast.Name), self.parent_stack + assert self.parent_stack[-1].id == "float", self.parent_stack[-1].id + index = len(self.parent_stack) - 2 + while index >= 0: + traverse_node = self.parent_stack[index] + if not isinstance(traverse_node, ast.BinOp): + return False + # assuming "|" is the only BinOp in annotations + assert isinstance(traverse_node.op, ast.BitOr), traverse_node.op + if has_int_child(traverse_node): + return True + index -= 1 + return False + + def _is_final(self) -> bool: + assert isinstance(self.parent_stack[-1], ast.Name), self.parent_stack + assert self.parent_stack[-1].id == "float", self.parent_stack + if len(self.parent_stack) > 1: + parent = self.parent_stack[-2] + if ( + isinstance(parent, ast.Subscript) + and isinstance(parent.value, ast.Name) + and parent.value.id == "Final" + ): + assert parent.slice is self.parent_stack[-1], parent.slice + return True + return False + + @override + def generic_visit(self, node: ast.AST) -> None: + if self.in_ann is not None and isinstance(node, ast.Name) and node.id == "float": + assert node is self.parent_stack[-1], self.parent_stack + + if self._with_int() or self._is_final(): + # There's already some already float | int in typeshed + # and assuming `Final[float]` is really float + pass + else: + node_path = self._node_path() + if node_path not in KEEP_FLOAT: + self.floats.append(node) + super().generic_visit(node) + + +def float_expand(stubs_with_docs_path: Path) -> None: + """change stubs in the given directory from `float` to `float | int`""" + for dir_path, _dir_names, file_names in os.walk(stubs_with_docs_path): + for file_name in file_names: + if not file_name.endswith(".pyi"): + continue + file_path = Path(dir_path) / file_name + rel_path = Path(os.path.relpath(file_path, stubs_with_docs_path)).as_posix() + file_bytes = Path(file_path).read_bytes() + file_parsed = ast.parse(file_bytes) + v = AnnotationTrackingVisitor(rel_path) + v.visit(file_parsed) + + if len(v.floats) > 0: + print(file_path.as_posix()) + # compute start offset of each line in file + lines = file_bytes.split(b"\n") + line_starts = [0] + for line in lines: + line_starts.append(line_starts[-1] + len(line) + 1) # +1 for newline + + # process in reverse order to avoid offset changes affecting subsequent replacements + v.floats.sort(key=lambda n: (n.lineno, n.col_offset), reverse=True) + for fl in v.floats: + assert fl.end_lineno == fl.lineno, fl # always within 1 line + assert fl.end_col_offset is not None + assert fl.end_col_offset == fl.col_offset + 5 # always "float" 5 chars + + # calculate offsets in file (from offsets in line) + line_start = line_starts[fl.lineno - 1] + start_offset = line_start + fl.col_offset + end_offset = line_start + fl.end_col_offset + + assert file_bytes[start_offset:end_offset] == b"float", file_bytes[ + start_offset:end_offset + ] + + file_bytes = ( + file_bytes[:start_offset] + b"float | int" + file_bytes[end_offset:] + ) + + _ = Path(file_path).write_bytes(file_bytes) + + +if __name__ == "__main__": + stubs_with_docs_path = Path("docstubs") + float_expand(stubs_with_docs_path) diff --git a/build/py3_8/generate_docstubs.py b/build/py3_8/generate_docstubs.py index 8832c874d9..2d1ba719e9 100644 --- a/build/py3_8/generate_docstubs.py +++ b/build/py3_8/generate_docstubs.py @@ -1,241 +1,9 @@ from __future__ import annotations -import ast -import os -from collections.abc import Iterable from pathlib import Path from shutil import copytree, rmtree -from typing import Final from docify import main as docify # pyright:ignore[reportMissingTypeStubs] -from typing_extensions import override - -KEEP_FLOAT: Final[frozenset[str]] = frozenset(( - # Example: - # If we didn't want to change the type of the `priority` parameter - # of the `register` function in the `Registry` class of markdown/util.pyi - # "stubs/Markdown/markdown/util.pyi/Registry.register.priority", - # See implementation `_node_path` for details on this identifier string. -)) -""" identifiers for `float` that we don't want to change to `float | int` """ - - -def name_for_target(node: ast.AnnAssign) -> str: - return ( - node.target.id - if isinstance(node.target, ast.Name) - else node.target.attr - if isinstance(node.target, ast.Attribute) - else "subscript" - ) - - -def name_for_node( - node: ast.ClassDef - | ast.FunctionDef - | ast.AsyncFunctionDef - | ast.arg - | ast.Name - | ast.AnnAssign, -) -> str: - return ( - node.name - if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)) - else node.arg - if isinstance(node, ast.arg) - else name_for_target(node) - if isinstance(node, ast.AnnAssign) - else node.id - ) - - -def has_int_child(node: ast.BinOp) -> bool: - if isinstance(node.right, ast.Name) and node.right.id == "int": - return True - if isinstance(node.left, ast.Name) and node.left.id == "int": - return True - if isinstance(node.right, ast.BinOp): - # assuming "|" is the only BinOp in annotations - assert isinstance(node.right.op, ast.BitOr), node.right.op - if has_int_child(node.right): - return True - if isinstance(node.left, ast.BinOp): - assert isinstance(node.left.op, ast.BitOr), node.left.op - if has_int_child(node.left): - return True - return False - - -class AnnotationTrackingVisitor(ast.NodeVisitor): - parent_stack: list[ast.AST] - in_ann: str | None = None - floats: list[ast.Name] - module: str - - def __init__(self, module: str) -> None: - self.parent_stack = [] - self.floats = [] - self.module = module - - @override - def visit(self, node: ast.AST) -> None: - self.parent_stack.append(node) - super().visit(node) - _ = self.parent_stack.pop() - - @override - def visit_AnnAssign(self, node: ast.AnnAssign) -> None: - assert len(list(ast.iter_fields(node))) == 4, list(ast.iter_fields(node)) - # I don't know what the 4th field "simple" is, but it's not an AST. - - self.visit(node.target) - - self.in_ann = name_for_target(node) - self.visit(node.annotation) - self.in_ann = None - - if node.value: - self.visit(node.value) - - @override - def visit_arg(self, node: ast.arg) -> None: - # arg name str, annotation, type comment str - assert len(list(ast.iter_fields(node))) == 3, list(ast.iter_fields(node)) - - self.in_ann = node.arg - if node.annotation: - self.visit(node.annotation) - self.in_ann = None - - # NOTE: assuming function return values are actually float if annotated as such. - # If we don't want to assume that, rename this to `visit_FunctionDef`: - # (probably would also want `visit_AsyncFunctionDef`) - - # @override - def _unused(self, node: ast.FunctionDef) -> None: - # Copied from implementation of base generic_visit - # and modified for "returns" - for field, value in ast.iter_fields(node): # pyright: ignore[reportAny] - if isinstance(value, ast.AST): - if field == "returns": - self.in_ann = "returns" - self.visit(value) - if field == "returns": - self.in_ann = None - elif isinstance(value, Iterable): - for item in value: - if isinstance(item, ast.AST): - self.visit(item) - - def _node_path(self) -> str: - """a string that identifies the current node (from `self.parent_stack`)""" - strs = [ - name_for_node(n) - for n in self.parent_stack - if isinstance( - n, - ( - ast.ClassDef, - ast.FunctionDef, - ast.AsyncFunctionDef, - ast.arg, - ast.Name, - ast.AnnAssign, - ), - ) - ] - if len(strs) > 0 and strs[-1] == "float": - _ = strs.pop() - return self.module + "/" + ".".join(strs) - - def _with_int(self) -> bool: - """`float | int` already""" - assert isinstance(self.parent_stack[-1], ast.Name), self.parent_stack - assert self.parent_stack[-1].id == "float", self.parent_stack[-1].id - index = len(self.parent_stack) - 2 - while index >= 0: - traverse_node = self.parent_stack[index] - if not isinstance(traverse_node, ast.BinOp): - return False - # assuming "|" is the only BinOp in annotations - assert isinstance(traverse_node.op, ast.BitOr), traverse_node.op - if has_int_child(traverse_node): - return True - index -= 1 - return False - - def _is_final(self) -> bool: - assert isinstance(self.parent_stack[-1], ast.Name), self.parent_stack - assert self.parent_stack[-1].id == "float", self.parent_stack - if len(self.parent_stack) > 1: - parent = self.parent_stack[-2] - if ( - isinstance(parent, ast.Subscript) - and isinstance(parent.value, ast.Name) - and parent.value.id == "Final" - ): - assert parent.slice is self.parent_stack[-1], parent.slice - return True - return False - - @override - def generic_visit(self, node: ast.AST) -> None: - if self.in_ann is not None and isinstance(node, ast.Name) and node.id == "float": - assert node is self.parent_stack[-1], self.parent_stack - - if self._with_int() or self._is_final(): - # There's already some already float | int in typeshed - # and assuming `Final[float]` is really float - pass - else: - node_path = self._node_path() - if node_path not in KEEP_FLOAT: - self.floats.append(node) - super().generic_visit(node) - - -def float_expand(stubs_with_docs_path: Path) -> None: - """change stubs in the given directory from `float` to `float | int`""" - for dir_path, _dir_names, file_names in os.walk(stubs_with_docs_path): - for file_name in file_names: - if not file_name.endswith(".pyi"): - continue - file_path = Path(dir_path) / file_name - rel_path = Path(os.path.relpath(file_path, stubs_with_docs_path)).as_posix() - file_bytes = Path(file_path).read_bytes() - file_parsed = ast.parse(file_bytes) - v = AnnotationTrackingVisitor(rel_path) - v.visit(file_parsed) - - if len(v.floats) > 0: - print(file_path.as_posix()) - # compute start offset of each line in file - lines = file_bytes.split(b"\n") - line_starts = [0] - for line in lines: - line_starts.append(line_starts[-1] + len(line) + 1) # +1 for newline - - # process in reverse order to avoid offset changes affecting subsequent replacements - v.floats.sort(key=lambda n: (n.lineno, n.col_offset), reverse=True) - for fl in v.floats: - assert fl.end_lineno == fl.lineno, fl # always within 1 line - assert fl.end_col_offset is not None - assert fl.end_col_offset == fl.col_offset + 5 # always "float" 5 chars - - # calculate offsets in file (from offsets in line) - line_start = line_starts[fl.lineno - 1] - start_offset = line_start + fl.col_offset - end_offset = line_start + fl.end_col_offset - - assert file_bytes[start_offset:end_offset] == b"float", file_bytes[ - start_offset:end_offset - ] - - file_bytes = ( - file_bytes[:start_offset] + b"float | int" + file_bytes[end_offset:] - ) - - _ = Path(file_path).write_bytes(file_bytes) def main(*, overwrite: bool): @@ -261,7 +29,6 @@ def main(*, overwrite: bool): copytree(stubs_path, stubs_with_docs_path, dirs_exist_ok=False) elif not stubs_with_docs_path.exists(): copytree(stubs_path, stubs_with_docs_path, dirs_exist_ok=True) - float_expand(stubs_with_docs_path) docify([str(stubs_with_docs_path / "stdlib"), "--if-needed", "--in-place"]) From a4e338d826dd2f6263351a489a211b168e93577b Mon Sep 17 00:00:00 2001 From: beauxq Date: Wed, 5 Nov 2025 09:11:02 -0800 Subject: [PATCH 8/8] a pylint issue --- build/py3_8/float_int_stubs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/build/py3_8/float_int_stubs.py b/build/py3_8/float_int_stubs.py index cc7685c034..fa0bef39c3 100644 --- a/build/py3_8/float_int_stubs.py +++ b/build/py3_8/float_int_stubs.py @@ -192,14 +192,14 @@ def generic_visit(self, node: ast.AST) -> None: super().generic_visit(node) -def float_expand(stubs_with_docs_path: Path) -> None: +def float_expand(stubs: Path) -> None: """change stubs in the given directory from `float` to `float | int`""" - for dir_path, _dir_names, file_names in os.walk(stubs_with_docs_path): + for dir_path, _dir_names, file_names in os.walk(stubs): for file_name in file_names: if not file_name.endswith(".pyi"): continue file_path = Path(dir_path) / file_name - rel_path = Path(os.path.relpath(file_path, stubs_with_docs_path)).as_posix() + rel_path = Path(os.path.relpath(file_path, stubs)).as_posix() file_bytes = Path(file_path).read_bytes() file_parsed = ast.parse(file_bytes) v = AnnotationTrackingVisitor(rel_path)