From f5a52cb00f9623c51a7a6310bbce488a6cd955e4 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 4 Aug 2025 15:54:47 +0100 Subject: [PATCH 01/13] Give credit to Mike (@mwtoews) and Ben (@musicinmybrain) --- changelog.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/changelog.txt b/changelog.txt index bf7eb92..42ea4cb 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,6 @@ + Improvements: + * LICENSE.TXT re-encoded in UTF-8 (@musicinmybrain) + VERSION 3.0.0-alpha 2025-08-03 @@ -15,13 +18,13 @@ VERSION 3.0.0-alpha Code quality * Statically typed and checked with Mypy - * Checked with Ruff. + * Checked with Ruff. (@mwtoews) * Type hints * f-strings * Remove Python 2 specific functions. * Run doctests against wheels. * Testing of wheels before publishing them - * pyproject.toml src layout + * Updated metadata, changed build backend to Hatch, and restructured repor into pyproject.toml src layout (@mwtoews) * Slow test marked. Improvements: From 58f244fc77c0ed03e617da545e89d29b0228cdc1 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 4 Aug 2025 16:24:58 +0100 Subject: [PATCH 02/13] Add return type hints to the BinStream Protocol methods --- src/shapefile.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 316db0c..90bd08e 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -130,30 +130,30 @@ class WriteableBinStream(Protocol): - def write(self, b: bytes): ... + def write(self, b: bytes) -> int: ... class ReadableBinStream(Protocol): - def read(self, size: int = -1): ... + def read(self, size: int = -1) -> bytes: ... class WriteSeekableBinStream(Protocol): - def write(self, b: bytes): ... - def seek(self, offset: int, whence: int = 0): ... - def tell(self): ... + def write(self, b: bytes) -> int: ... + def seek(self, offset: int, whence: int = 0) -> int: ... + def tell(self) -> int: ... class ReadSeekableBinStream(Protocol): - def seek(self, offset: int, whence: int = 0): ... - def tell(self): ... - def read(self, size: int = -1): ... + def seek(self, offset: int, whence: int = 0) -> int: ... + def tell(self) -> int: ... + def read(self, size: int = -1) -> bytes: ... class ReadWriteSeekableBinStream(Protocol): - def write(self, b: bytes): ... - def seek(self, offset: int, whence: int = 0): ... - def tell(self): ... - def read(self, size: int = -1): ... + def write(self, b: bytes) -> int: ... + def seek(self, offset: int, whence: int = 0) -> int: ... + def tell(self) -> int: ... + def read(self, size: int = -1) -> bytes: ... # File name, file object or anything with a read() method that returns bytes. From cef5aeead8778013607a4a5d450f719bf0123534 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 4 Aug 2025 17:07:44 +0100 Subject: [PATCH 03/13] Run mypy --strict in CI with continue-on-error: true --- .github/workflows/run_checks_build_and_test.yml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index 0d47126..3dc9642 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -13,14 +13,29 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - uses: pre-commit/action@v3.0.1 + mypy-strict: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.13" + - name: Install mypy + run: pip install mypy + - name: Run mypy --strict + continue-on-error: true + run: mypy --strict . + + build_wheel_and_sdist: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 + with: + python-version: "3.13" - name: Build wheel from the project repo uses: ./.github/actions/build_wheel_and_sdist From 1e274ac77783f6365927ef601ae6843e27e504fd Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 4 Aug 2025 18:52:42 +0100 Subject: [PATCH 04/13] Bound generic type var for _Array to int or float --- src/shapefile.py | 132 +++++++++++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 57 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 90bd08e..f15cd11 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -21,6 +21,7 @@ import zipfile from datetime import date from struct import Struct, calcsize, error, pack, unpack +from types import TracebackType from typing import ( IO, Any, @@ -336,10 +337,10 @@ class GeoJSONFeatureCollectionWithBBox(GeoJSONFeatureCollection): @overload -def fsdecode_if_pathlike(path: os.PathLike) -> str: ... +def fsdecode_if_pathlike(path: os.PathLike[Any]) -> str: ... @overload def fsdecode_if_pathlike(path: T) -> T: ... -def fsdecode_if_pathlike(path): +def fsdecode_if_pathlike(path: Any) -> Any: if isinstance(path, os.PathLike): return os.fsdecode(path) # str @@ -348,12 +349,13 @@ def fsdecode_if_pathlike(path): # Begin +ARR_TYPE = TypeVar("ARR_TYPE", int, float) #Literal["i"], Literal["d"]) -class _Array(array.array, Generic[T]): +class _Array(array.array[ARR_TYPE], Generic[ARR_TYPE]): """Converts python tuples to lists of the appropriate type. Used to unpack different shapefile header parts.""" - def __repr__(self): + def __repr__(self) -> str: return str(self.tolist()) @@ -369,7 +371,7 @@ def signed_area( xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values xs.append(xs[1]) ys.append(ys[1]) - area2 = sum(xs[i] * (ys[i + 1] - ys[i - 1]) for i in range(1, len(coords))) + area2: float = sum(xs[i] * (ys[i + 1] - ys[i - 1]) for i in range(1, len(coords))) if fast: return area2 @@ -471,7 +473,7 @@ def ring_sample(coords: PointsT, ccw: bool = False) -> Point2D: """ triplet = [] - def itercoords(): + def itercoords() -> Iterator[PointT]: # iterate full closed ring yield from coords # finally, yield the second coordinate to the end to allow checking the last triplet @@ -655,6 +657,7 @@ class _NoShapeTypeSentinel: preserve old behaviour for anyone who explictly called Shape(shapeType=None). """ +_NO_SHAPE_TYPE_SENTINEL: Final = _NoShapeTypeSentinel() def _m_from_point(point: Union[PointMT, PointZT], mpos: int) -> Optional[float]: @@ -694,7 +697,7 @@ class CanHaveBboxNoLinesKwargs(TypedDict, total=False): class Shape: def __init__( self, - shapeType: Union[int, _NoShapeTypeSentinel] = _NoShapeTypeSentinel(), + shapeType: Union[int, _NoShapeTypeSentinel] = _NO_SHAPE_TYPE_SENTINEL, points: Optional[PointsT] = None, parts: Optional[Sequence[int]] = None, # index of start point of each part lines: Optional[list[PointsT]] = None, @@ -720,8 +723,8 @@ def __init__( """ # Preserve previous behaviour for anyone who set self.shapeType = None - if not isinstance(shapeType, _NoShapeTypeSentinel): - self.shapeType = shapeType + if shapeType is not _NO_SHAPE_TYPE_SENTINEL: + self.shapeType = cast(int, shapeType) else: class_name = self.__class__.__name__ self.shapeType = SHAPETYPENUM_LOOKUP.get(class_name.upper(), NULL) @@ -1043,7 +1046,7 @@ def oid(self) -> int: def shapeTypeName(self) -> str: return SHAPETYPE_LOOKUP[self.shapeType] - def __repr__(self): + def __repr__(self) -> str: class_name = self.__class__.__name__ if class_name == "Shape": return f"Shape #{self.__oid}: {self.shapeTypeName}" @@ -1123,7 +1126,8 @@ def _write_bbox_to_byte_stream( @staticmethod def _read_npoints_from_byte_stream(b_io: ReadableBinStream) -> int: - return unpack(" int: @@ -1290,7 +1294,8 @@ class _CanHaveParts(_CanHaveBBox): @staticmethod def _read_nparts_from_byte_stream(b_io: ReadableBinStream) -> int: - return unpack(" int: @@ -1325,7 +1330,7 @@ def __init__( Shape.__init__(self, points=[(x, y)], oid=oid) @staticmethod - def _x_y_from_byte_stream(b_io: ReadableBinStream): + def _x_y_from_byte_stream(b_io: ReadableBinStream) -> tuple[float, float]: # Unpack _Array too x, y = unpack("<2d", b_io.read(16)) # Convert to tuple @@ -1964,7 +1969,7 @@ def __init__( } -class _Record(list): +class _Record(list[RecordValue]): """ A class to hold a record. Subclasses list to ensure compatibility with former work and to reuse all the optimizations of the builtin list. @@ -2023,7 +2028,7 @@ def __getattr__(self, item: str) -> RecordValue: f"{item} found as a field but not enough values available." ) - def __setattr__(self, key: str, value: RecordValue): + def __setattr__(self, key: str, value: RecordValue) -> None: """ Sets a value of a field attribute :param key: The field name @@ -2060,7 +2065,7 @@ def __getitem__(self, item): raise IndexError(f'"{item}" is not a field name and not an int') - def __setitem__(self, key, value): + def __setitem__(self, key, value) -> None: """ Extends the normal list item access with access using a fieldname @@ -2095,7 +2100,7 @@ def as_dict(self, date_strings: bool = False) -> dict[str, RecordValue]: dct[k] = f"{v.year:04d}{v.month:02d}{v.day:02d}" return dct - def __repr__(self): + def __repr__(self) -> str: return f"Record #{self.__oid}: {list(self)}" def __dir__(self) -> list[str]: @@ -2113,7 +2118,7 @@ def __dir__(self) -> list[str]: ) # plus field names (random order if Python version < 3.6) return default + fnames - def __eq__(self, other): + def __eq__(self, other) -> bool: if isinstance(other, self.__class__): if self.__field_positions != other.__field_positions: return False @@ -2147,7 +2152,7 @@ class Shapes(list[Optional[Shape]]): In addition to the list interface, this also provides the GeoJSON __geo_interface__ to return a GeometryCollection dictionary.""" - def __repr__(self): + def __repr__(self) -> str: return f"Shapes: {list(self)}" @property @@ -2167,7 +2172,7 @@ class ShapeRecords(list[ShapeRecord]): In addition to the list interface, this also provides the GeoJSON __geo_interface__ to return a FeatureCollection dictionary.""" - def __repr__(self): + def __repr__(self) -> str: return f"ShapeRecords: {list(self)}" @property @@ -2188,7 +2193,7 @@ class _NoShpSentinel: in the **kwargs dict) in case someone explictly called Reader(shp=None) to load self.shx. """ - +_NO_SHP_SENTINEL = _NoShpSentinel() class Reader: """Reads the three files of a shapefile as a unit or @@ -2214,21 +2219,21 @@ class Reader: CONSTITUENT_FILE_EXTS = ["shp", "shx", "dbf"] assert all(ext.islower() for ext in CONSTITUENT_FILE_EXTS) - def _assert_ext_is_supported(self, ext: str): + def _assert_ext_is_supported(self, ext: str) -> None: assert ext in self.CONSTITUENT_FILE_EXTS def __init__( self, - shapefile_path: Union[str, os.PathLike] = "", + shapefile_path: Union[str, os.PathLike[Any]] = "", /, *, encoding: str = "utf-8", encodingErrors: str = "strict", - shp: Union[_NoShpSentinel, Optional[BinaryFileT]] = _NoShpSentinel(), + shp: Union[_NoShpSentinel, Optional[BinaryFileT]] = _NO_SHP_SENTINEL, shx: Optional[BinaryFileT] = None, dbf: Optional[BinaryFileT] = None, # Keep kwargs even though unused, to preserve PyShp 2.4 API - **kwargs, + **kwargs: Any, ): self.shp = None self.shx = None @@ -2263,7 +2268,7 @@ def __init__( zpath = path[: path.find(".zip") + 4] shapefile = path[path.find(".zip") + 4 + 1 :] - zipfileobj: Union[tempfile._TemporaryFileWrapper, io.BufferedReader] + zipfileobj: Union[tempfile._TemporaryFileWrapper[bytes], io.BufferedReader] # Create a zip file handle if zpath.startswith("http"): # Zipfile is from a url @@ -2380,7 +2385,8 @@ def __init__( self.load(path) return - if not isinstance(shp, _NoShpSentinel): + if shp is not _NO_SHP_SENTINEL: + shp = cast(Union[str, IO[bytes], None], shp) self.shp = self.__seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) self.shx = self.__seek_0_on_file_obj_wrap_or_open_from_name("shx", shx) @@ -2417,7 +2423,7 @@ def __seek_0_on_file_obj_wrap_or_open_from_name( f"Could not load shapefile constituent file from: {file_}" ) - def __str__(self): + def __str__(self) -> str: """ Use some general info on the shapefile as __str__ """ @@ -2430,26 +2436,32 @@ def __str__(self): info.append(f" {len(self)} records ({len(self.fields)} fields)") return "\n".join(info) - def __enter__(self): + def __enter__(self) -> Reader: """ Enter phase of context manager. """ return self - def __exit__(self, exc_type, exc_val, exc_tb): + # def __exit__(self, exc_type, exc_val, exc_tb) -> None: + def __exit__(self, + exc_type: Optional[BaseException], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType]) -> Optional[bool]: """ Exit phase of context manager, close opened files. """ self.close() + return None - def __len__(self): + def __len__(self) -> int: """Returns the number of shapes/records in the shapefile.""" if self.dbf: # Preferably use dbf record count if self.numRecords is None: self.__dbfHeader() - return self.numRecords + # .__dbfHeader sets self.numRecords or raises Exception + return cast(int, self.numRecords) if self.shp: # Otherwise use shape count @@ -2457,7 +2469,8 @@ def __len__(self): if self.numShapes is None: self.__shxHeader() - return self.numShapes + # .__shxHeader sets self.numShapes or raises Exception + return cast(int, self.numShapes) # Index file not available, iterate all shapes to get total count if self.numShapes is None: @@ -2488,7 +2501,7 @@ def __len__(self): # No file loaded yet, treat as 'empty' shapefile return 0 - def __iter__(self): + def __iter__(self) -> Iterator[ShapeRecord]: """Iterates through the shapes/records in the shapefile.""" yield from self.iterShapeRecords() @@ -2505,7 +2518,7 @@ def __geo_interface__(self) -> GeoJSONFeatureCollectionWithBBox: def shapeTypeName(self) -> str: return SHAPETYPE_LOOKUP[self.shapeType] - def load(self, shapefile=None): + def load(self, shapefile=None) -> None: """Opens a shapefile from a filename or file-like object. Normally this method would be called by the constructor with the file name as an argument.""" @@ -2521,7 +2534,7 @@ def load(self, shapefile=None): ) self._try_to_set_constituent_file_headers() - def _try_to_set_constituent_file_headers(self): + def _try_to_set_constituent_file_headers(self) -> None: if self.shp: self.__shpHeader() if self.dbf: @@ -2567,28 +2580,28 @@ def _load_constituent_file( self._files_to_close.append(shp_dbf_or_dhx_file) return shp_dbf_or_dhx_file - def load_shp(self, shapefile_name): + def load_shp(self, shapefile_name) -> None: """ Attempts to load file with .shp extension as both lower and upper case """ self.shp = self._load_constituent_file(shapefile_name, "shp") - def load_shx(self, shapefile_name): + def load_shx(self, shapefile_name) -> None: """ Attempts to load file with .shx extension as both lower and upper case """ self.shx = self._load_constituent_file(shapefile_name, "shx") - def load_dbf(self, shapefile_name): + def load_dbf(self, shapefile_name) -> None: """ Attempts to load file with .dbf extension as both lower and upper case """ self.dbf = self._load_constituent_file(shapefile_name, "dbf") - def __del__(self): + def __del__(self) -> None: self.close() - def close(self): + def close(self) -> None: # Close any files that the reader opened (but not those given by user) for attribute in self._files_to_close: if hasattr(attribute, "close"): @@ -2692,7 +2705,7 @@ def __shape( return shape - def __shxHeader(self): + def __shxHeader(self) -> None: """Reads the header information from a .shx file.""" shx = self.shx if not shx: @@ -2704,7 +2717,7 @@ def __shxHeader(self): shxRecordLength = (unpack(">i", shx.read(4))[0] * 2) - 100 self.numShapes = shxRecordLength // 8 - def __shxOffsets(self): + def __shxOffsets(self) -> None: """Reads the shape offset positions from a .shx file""" shx = self.shx if not shx: @@ -3195,7 +3208,7 @@ class Writer: def __init__( self, - target: Union[str, os.PathLike, None] = None, + target: Union[str, os.PathLike[Any], None] = None, shapeType: Optional[int] = None, autoBalance: bool = False, *, @@ -3252,28 +3265,32 @@ def __init__( self.encoding = encoding self.encodingErrors = encodingErrors - def __len__(self): + def __len__(self) -> int: """Returns the current number of features written to the shapefile. If shapes and records are unbalanced, the length is considered the highest of the two.""" return max(self.recNum, self.shpNum) - def __enter__(self): + def __enter__(self) -> Writer: """ Enter phase of context manager. """ return self - def __exit__(self, exc_type, exc_val, exc_tb): + def __exit__(self, + exc_type: Optional[BaseException], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType]) -> Optional[bool]: """ Exit phase of context manager, finish writing and close the files. """ self.close() + return None - def __del__(self): + def __del__(self) -> None: self.close() - def close(self): + def close(self) -> None: """ Write final shp, shx, and dbf headers, close opened files. """ @@ -3327,7 +3344,7 @@ def __getFileObj(self, f: str) -> WriteSeekableBinStream: ... def __getFileObj(self, f: None) -> NoReturn: ... @overload def __getFileObj(self, f: WriteSeekableBinStream) -> WriteSeekableBinStream: ... - def __getFileObj(self, f): + def __getFileObj(self, f: Union[str,None,WriteSeekableBinStream]) -> WriteSeekableBinStream: """Safety handler to verify file-like objects""" if not f: raise ShapefileException("No file-like object available.") @@ -3359,7 +3376,7 @@ def __shpFileLength(self) -> int: shp.seek(start) return size - def _update_file_bbox(self, s: Shape): + def _update_file_bbox(self, s: Shape) -> None: if s.shapeType == NULL: shape_bbox = None elif s.shapeType in _CanHaveBBox_shapeTypes: @@ -3369,7 +3386,7 @@ def _update_file_bbox(self, s: Shape): shape_bbox = (x, y, x, y) if shape_bbox is None: - return + return None if self._bbox: # compare with existing @@ -3382,8 +3399,9 @@ def _update_file_bbox(self, s: Shape): else: # first time bbox is being set self._bbox = shape_bbox + return None - def _update_file_zbox(self, s: Union[_HasZ, PointZ]): + def _update_file_zbox(self, s: Union[_HasZ, PointZ]) -> None: if self._zbox: # compare with existing self._zbox = (min(s.zbox[0], self._zbox[0]), max(s.zbox[1], self._zbox[1])) @@ -3391,7 +3409,7 @@ def _update_file_zbox(self, s: Union[_HasZ, PointZ]): # first time zbox is being set self._zbox = s.zbox - def _update_file_mbox(self, s: Union[_HasM, PointM]): + def _update_file_mbox(self, s: Union[_HasM, PointM]) -> None: mbox = s.mbox if self._mbox: # compare with existing @@ -3542,7 +3560,7 @@ def __dbfHeader(self) -> None: def shape( self, - s: Union[Shape, HasGeoInterface, dict], + s: Union[Shape, HasGeoInterface, dict[str,PointsT]], ) -> None: # Balance if already not balanced if self.autoBalance and self.recNum < self.shpNum: @@ -3978,7 +3996,7 @@ def _replace_remote_url( if path is None: path = old_parsed.path.rpartition("/")[2] - if port not in (None, ""): + if port not in (None, ""): # type: ignore[comparison-overlap] netloc = f"{netloc}:{port}" new_parsed = old_parsed._replace( @@ -4043,7 +4061,7 @@ def _test(args: list[str] = sys.argv[1:], verbosity: bool = False) -> int: return failure_count -def main(): +def main() -> None: """ Doctests are contained in the file 'README.md', and are tested using the built-in testing libraries. From 3c68a9a6d1b123b4c675937f067b54b816f5e7e0 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 4 Aug 2025 18:57:01 +0100 Subject: [PATCH 05/13] Just need to type hint Shape._from_geo_json and _Record methods --- src/shapefile.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index f15cd11..692204e 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2518,7 +2518,7 @@ def __geo_interface__(self) -> GeoJSONFeatureCollectionWithBBox: def shapeTypeName(self) -> str: return SHAPETYPE_LOOKUP[self.shapeType] - def load(self, shapefile=None) -> None: + def load(self, shapefile: Optional[str]=None) -> None: """Opens a shapefile from a filename or file-like object. Normally this method would be called by the constructor with the file name as an argument.""" @@ -2580,19 +2580,19 @@ def _load_constituent_file( self._files_to_close.append(shp_dbf_or_dhx_file) return shp_dbf_or_dhx_file - def load_shp(self, shapefile_name) -> None: + def load_shp(self, shapefile_name: str) -> None: """ Attempts to load file with .shp extension as both lower and upper case """ self.shp = self._load_constituent_file(shapefile_name, "shp") - def load_shx(self, shapefile_name) -> None: + def load_shx(self, shapefile_name: str) -> None: """ Attempts to load file with .shx extension as both lower and upper case """ self.shx = self._load_constituent_file(shapefile_name, "shx") - def load_dbf(self, shapefile_name) -> None: + def load_dbf(self, shapefile_name: str) -> None: """ Attempts to load file with .dbf extension as both lower and upper case """ @@ -3218,7 +3218,7 @@ def __init__( shx: Optional[WriteSeekableBinStream] = None, dbf: Optional[WriteSeekableBinStream] = None, # Keep kwargs even though unused, to preserve PyShp 2.4 API - **kwargs, + **kwargs: Any, ): self.target = target self.autoBalance = autoBalance From d81e131eb521753173bd983e961fb01956f00fbc Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 4 Aug 2025 20:12:53 +0100 Subject: [PATCH 06/13] Type hint run_benchmarks. mypy --strict shapefile.py only --- .../workflows/run_checks_build_and_test.yml | 2 +- run_benchmarks.py | 7 ++-- src/shapefile.py | 42 ++++++++++++------- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index 3dc9642..b9c463c 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -26,7 +26,7 @@ jobs: run: pip install mypy - name: Run mypy --strict continue-on-error: true - run: mypy --strict . + run: mypy --strict ./src/shapefile.py build_wheel_and_sdist: diff --git a/run_benchmarks.py b/run_benchmarks.py index ac31e1c..961357f 100644 --- a/run_benchmarks.py +++ b/run_benchmarks.py @@ -9,7 +9,7 @@ from collections.abc import Callable from pathlib import Path from tempfile import TemporaryFile as TempF -from typing import Union +from typing import Iterable, Union, cast import shapefile @@ -63,8 +63,9 @@ def write_shapefile_with_PyShp(target: Union[str, os.PathLike]): for field_info_tuple in fields[target]: w.field(*field_info_tuple) for shapeRecord in shapeRecords[target]: - w.shape(shapeRecord.shape) - w.record(*shapeRecord.record) + w.shape(cast(shapefile.Shape, shapeRecord.shape)) + record = cast(Iterable, shapeRecord.record) + w.record(*record) SHAPEFILES = { diff --git a/src/shapefile.py b/src/shapefile.py index 692204e..cfdc61b 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -349,7 +349,8 @@ def fsdecode_if_pathlike(path: Any) -> Any: # Begin -ARR_TYPE = TypeVar("ARR_TYPE", int, float) #Literal["i"], Literal["d"]) +ARR_TYPE = TypeVar("ARR_TYPE", int, float) # Literal["i"], Literal["d"]) + class _Array(array.array[ARR_TYPE], Generic[ARR_TYPE]): """Converts python tuples to lists of the appropriate type. @@ -657,6 +658,8 @@ class _NoShapeTypeSentinel: preserve old behaviour for anyone who explictly called Shape(shapeType=None). """ + + _NO_SHAPE_TYPE_SENTINEL: Final = _NoShapeTypeSentinel() @@ -2193,8 +2196,11 @@ class _NoShpSentinel: in the **kwargs dict) in case someone explictly called Reader(shp=None) to load self.shx. """ + + _NO_SHP_SENTINEL = _NoShpSentinel() + class Reader: """Reads the three files of a shapefile as a unit or separately. If one of the three files (.shp, .shx, @@ -2268,7 +2274,9 @@ def __init__( zpath = path[: path.find(".zip") + 4] shapefile = path[path.find(".zip") + 4 + 1 :] - zipfileobj: Union[tempfile._TemporaryFileWrapper[bytes], io.BufferedReader] + zipfileobj: Union[ + tempfile._TemporaryFileWrapper[bytes], io.BufferedReader + ] # Create a zip file handle if zpath.startswith("http"): # Zipfile is from a url @@ -2443,10 +2451,12 @@ def __enter__(self) -> Reader: return self # def __exit__(self, exc_type, exc_val, exc_tb) -> None: - def __exit__(self, - exc_type: Optional[BaseException], - exc_val: Optional[BaseException], - exc_tb: Optional[TracebackType]) -> Optional[bool]: + def __exit__( + self, + exc_type: Optional[BaseException], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> Optional[bool]: """ Exit phase of context manager, close opened files. """ @@ -2518,7 +2528,7 @@ def __geo_interface__(self) -> GeoJSONFeatureCollectionWithBBox: def shapeTypeName(self) -> str: return SHAPETYPE_LOOKUP[self.shapeType] - def load(self, shapefile: Optional[str]=None) -> None: + def load(self, shapefile: Optional[str] = None) -> None: """Opens a shapefile from a filename or file-like object. Normally this method would be called by the constructor with the file name as an argument.""" @@ -3277,10 +3287,12 @@ def __enter__(self) -> Writer: """ return self - def __exit__(self, - exc_type: Optional[BaseException], - exc_val: Optional[BaseException], - exc_tb: Optional[TracebackType]) -> Optional[bool]: + def __exit__( + self, + exc_type: Optional[BaseException], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> Optional[bool]: """ Exit phase of context manager, finish writing and close the files. """ @@ -3344,7 +3356,9 @@ def __getFileObj(self, f: str) -> WriteSeekableBinStream: ... def __getFileObj(self, f: None) -> NoReturn: ... @overload def __getFileObj(self, f: WriteSeekableBinStream) -> WriteSeekableBinStream: ... - def __getFileObj(self, f: Union[str,None,WriteSeekableBinStream]) -> WriteSeekableBinStream: + def __getFileObj( + self, f: Union[str, None, WriteSeekableBinStream] + ) -> WriteSeekableBinStream: """Safety handler to verify file-like objects""" if not f: raise ShapefileException("No file-like object available.") @@ -3560,7 +3574,7 @@ def __dbfHeader(self) -> None: def shape( self, - s: Union[Shape, HasGeoInterface, dict[str,PointsT]], + s: Union[Shape, HasGeoInterface, dict[str, PointsT]], ) -> None: # Balance if already not balanced if self.autoBalance and self.recNum < self.shpNum: @@ -3996,7 +4010,7 @@ def _replace_remote_url( if path is None: path = old_parsed.path.rpartition("/")[2] - if port not in (None, ""): # type: ignore[comparison-overlap] + if port not in (None, ""): # type: ignore[comparison-overlap] netloc = f"{netloc}:{port}" new_parsed = old_parsed._replace( From b8cf0ceeb7e0b6b4fd9756700195d634bee1ac07 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 4 Aug 2025 20:33:45 +0100 Subject: [PATCH 07/13] Make _Array inherit from array.array, not concrete impl of it --- src/shapefile.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index cfdc61b..91f221d 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -349,10 +349,12 @@ def fsdecode_if_pathlike(path: Any) -> Any: # Begin -ARR_TYPE = TypeVar("ARR_TYPE", int, float) # Literal["i"], Literal["d"]) +# ARR_TYPE = TypeVar("ARR_TYPE", int, float) +ARR_TYPE = TypeVar("ARR_TYPE", Literal['i'], Literal['d']) #int, float) - -class _Array(array.array[ARR_TYPE], Generic[ARR_TYPE]): +# In Python 3.12 we can do: +# class _Array(array.array[ARR_TYPE], Generic[ARR_TYPE]): +class _Array(array.array, Generic[ARR_TYPE]): # """Converts python tuples to lists of the appropriate type. Used to unpack different shapefile header parts.""" @@ -1334,7 +1336,6 @@ def __init__( @staticmethod def _x_y_from_byte_stream(b_io: ReadableBinStream) -> tuple[float, float]: - # Unpack _Array too x, y = unpack("<2d", b_io.read(16)) # Convert to tuple return x, y From 83337cafcf86f86dd8e6fa613809cc9927639748 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 4 Aug 2025 20:40:36 +0100 Subject: [PATCH 08/13] Set ARR_TYPE to be int or float again. Keep array.array base class generic --- src/shapefile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 91f221d..c2cb4ca 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -349,12 +349,12 @@ def fsdecode_if_pathlike(path: Any) -> Any: # Begin -# ARR_TYPE = TypeVar("ARR_TYPE", int, float) -ARR_TYPE = TypeVar("ARR_TYPE", Literal['i'], Literal['d']) #int, float) +ARR_TYPE = TypeVar("ARR_TYPE", int, float) + # In Python 3.12 we can do: -# class _Array(array.array[ARR_TYPE], Generic[ARR_TYPE]): -class _Array(array.array, Generic[ARR_TYPE]): # +# class _Array(array.array[ARR_TYPE], Generic[ARR_TYPE]): +class _Array(array.array, Generic[ARR_TYPE]): # type: ignore[type-arg] """Converts python tuples to lists of the appropriate type. Used to unpack different shapefile header parts.""" From d78fb61a4d67c9e03dc8cded212e73bf8f81ba8c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 4 Aug 2025 20:48:14 +0100 Subject: [PATCH 09/13] Type hint _Record.__eq__ as other: Any --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index c2cb4ca..42af9c5 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2122,8 +2122,8 @@ def __dir__(self) -> list[str]: ) # plus field names (random order if Python version < 3.6) return default + fnames - def __eq__(self, other) -> bool: - if isinstance(other, self.__class__): + def __eq__(self, other: Any) -> bool: + if isinstance(other, _Record): if self.__field_positions != other.__field_positions: return False return list.__eq__(self, other) From 92c12121d966b5901cbd38d4793caf70d53c3006 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 4 Aug 2025 20:49:56 +0100 Subject: [PATCH 10/13] Update run_checks_build_and_test.yml --- .github/workflows/run_checks_build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index b9c463c..ea63da1 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -16,6 +16,7 @@ jobs: - uses: pre-commit/action@v3.0.1 mypy-strict: + continue-on-error: true runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -25,7 +26,6 @@ jobs: - name: Install mypy run: pip install mypy - name: Run mypy --strict - continue-on-error: true run: mypy --strict ./src/shapefile.py From 5342d0eda936f083fbad17061a98c2596591ebc6 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 4 Aug 2025 21:12:01 +0100 Subject: [PATCH 11/13] Add overloads with lots of type: ignores for _Record overridden list methods --- src/shapefile.py | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 42af9c5..2a8f093 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -37,6 +37,7 @@ Protocol, Reversible, Sequence, + SupportsIndex, TypedDict, TypeVar, Union, @@ -231,7 +232,7 @@ def __repr__(self) -> str: return f'Field(name="{self.name}", field_type=FieldType.{self.field_type}, size={self.size}, decimal={self.decimal})' -RecordValueNotDate = Union[bool, int, float, str, date] +RecordValueNotDate = Union[bool, int, float, str] # A Possible value in a Shapefile dbf record, i.e. L, N, M, F, C, or D types RecordValue = Union[RecordValueNotDate, date] @@ -2048,7 +2049,15 @@ def __setattr__(self, key: str, value: RecordValue) -> None: except KeyError: raise AttributeError(f"{key} is not a field name") - def __getitem__(self, item): + @overload + def __getitem__(self, i: SupportsIndex) -> RecordValue: ... + @overload + def __getitem__(self, s: slice) -> list[RecordValue]: ... + @overload + def __getitem__(self, s: str) -> RecordValue: ... + def __getitem__( + self, item: Union[SupportsIndex, slice, str] + ) -> Union[RecordValue, list[RecordValue]]: """ Extends the normal list item access with access using a fieldname @@ -2058,10 +2067,10 @@ def __getitem__(self, item): :return: the value of the field """ try: - return list.__getitem__(self, item) + return list.__getitem__(self, item) # type: ignore[index] except TypeError: try: - index = self.__field_positions[item] + index = self.__field_positions[item] # type: ignore[index] except KeyError: index = None if index is not None: @@ -2069,7 +2078,17 @@ def __getitem__(self, item): raise IndexError(f'"{item}" is not a field name and not an int') - def __setitem__(self, key, value) -> None: + @overload + def __setitem__(self, key: SupportsIndex, value: RecordValue) -> None: ... + @overload + def __setitem__(self, key: slice, value: Iterable[RecordValue]) -> None: ... + @overload + def __setitem__(self, key: str, value: RecordValue) -> None: ... + def __setitem__( + self, + key: Union[SupportsIndex, slice, str], + value: Union[RecordValue, Iterable[RecordValue]], + ) -> None: """ Extends the normal list item access with access using a fieldname @@ -2079,11 +2098,11 @@ def __setitem__(self, key, value) -> None: :param value: the new value of the field """ try: - return list.__setitem__(self, key, value) + return list.__setitem__(self, key, value) # type: ignore[misc,assignment] except TypeError: - index = self.__field_positions.get(key) + index = self.__field_positions.get(key) # type: ignore[arg-type] if index is not None: - return list.__setitem__(self, index, value) + return list.__setitem__(self, index, value) # type: ignore[misc] raise IndexError(f"{key} is not a field name and not an int") From a3a5ac4bde7274c68c07c4e987d38959d46c8a62 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 4 Aug 2025 21:33:06 +0100 Subject: [PATCH 12/13] Type hint _from_geojson with more judicious uses of cast() --- src/shapefile.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 2a8f093..f7e6e88 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -982,7 +982,7 @@ def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: ) @staticmethod - def _from_geojson(geoj) -> Shape: + def _from_geojson(geoj: GeoJSONHomogeneousGeometryObject) -> Shape: # create empty shape # set shapeType geojType = geoj["type"] if geoj else "Null" @@ -991,18 +991,26 @@ def _from_geojson(geoj) -> Shape: else: raise GeoJSON_Error(f"Cannot create Shape from GeoJSON type '{geojType}'") + coordinates = geoj["coordinates"] + + if coordinates == (): + raise GeoJSON_Error(f"Cannot create non-Null Shape from: {coordinates=}") + + points: PointsT + parts: list[int] + # set points and parts if geojType == "Point": - points = [geoj["coordinates"]] + points = [cast(PointT, coordinates)] parts = [0] elif geojType in ("MultiPoint", "LineString"): - points = geoj["coordinates"] + points = cast(PointsT, coordinates) parts = [0] elif geojType == "Polygon": points = [] parts = [] index = 0 - for i, ext_or_hole in enumerate(geoj["coordinates"]): + for i, ext_or_hole in enumerate(cast(list[PointsT], coordinates)): # although the latest GeoJSON spec states that exterior rings should have # counter-clockwise orientation, we explicitly check orientation since older # GeoJSONs might not enforce this. @@ -1019,7 +1027,7 @@ def _from_geojson(geoj) -> Shape: points = [] parts = [] index = 0 - for linestring in geoj["coordinates"]: + for linestring in cast(list[PointsT], coordinates): points.extend(linestring) parts.append(index) index += len(linestring) @@ -1027,7 +1035,7 @@ def _from_geojson(geoj) -> Shape: points = [] parts = [] index = 0 - for polygon in geoj["coordinates"]: + for polygon in cast(list[list[PointsT]], coordinates): for i, ext_or_hole in enumerate(polygon): # although the latest GeoJSON spec states that exterior rings should have # counter-clockwise orientation, we explicitly check orientation since older @@ -3604,7 +3612,7 @@ def shape( if hasattr(s, "__geo_interface__"): s = s.__geo_interface__ # type: ignore [assignment] if isinstance(s, dict): - s = Shape._from_geojson(s) + s = Shape._from_geojson(cast(GeoJSONHomogeneousGeometryObject, s)) else: raise TypeError( "Can only write Shape objects, GeoJSON dictionaries, " From 2b029c89ad64fc56b08f2c5623cab7c6937f0dd5 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 4 Aug 2025 21:35:30 +0100 Subject: [PATCH 13/13] Require mypy-strict --- .github/workflows/run_checks_build_and_test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/run_checks_build_and_test.yml b/.github/workflows/run_checks_build_and_test.yml index ea63da1..abdca0c 100644 --- a/.github/workflows/run_checks_build_and_test.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -16,7 +16,6 @@ jobs: - uses: pre-commit/action@v3.0.1 mypy-strict: - continue-on-error: true runs-on: ubuntu-latest steps: - uses: actions/checkout@v4