From 54a49a306402a9cb1207a5f6493308786f709858 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 14:12:01 +0100 Subject: [PATCH 01/16] Rename pathlike_obj and define specific and generic overloads for it --- src/shapefile.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 5ba3c61..27e1c65 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -21,7 +21,18 @@ import zipfile from datetime import date from struct import Struct, calcsize, error, pack, unpack -from typing import IO, Any, Iterable, Iterator, Optional, Reversible, TypedDict, Union +from typing import ( + IO, + Any, + Iterable, + Iterator, + Optional, + Reversible, + TypedDict, + TypeVar, + Union, + overload, +) from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -158,9 +169,16 @@ def is_string(v: Any) -> bool: return isinstance(v, str) -def pathlike_obj(path: Any) -> Any: +T = TypeVar("T") + + +@overload +def fsdecode_if_pathlike(path: os.PathLike) -> str: ... +@overload +def fsdecode_if_pathlike(path: T) -> T: ... +def fsdecode_if_pathlike(path): if isinstance(path, os.PathLike): - return os.fsdecode(path) + return os.fsdecode(path) # str return path @@ -999,7 +1017,7 @@ def __init__( self.encodingErrors = encodingErrors # See if a shapefile name was passed as the first argument if shapefile_path: - path = pathlike_obj(shapefile_path) + path = fsdecode_if_pathlike(shapefile_path) if is_string(path): if ".zip" in path: # Shapefile is inside a zipfile @@ -2001,7 +2019,7 @@ def __init__( self.shp = self.shx = self.dbf = None self._files_to_close = [] if target: - target = pathlike_obj(target) + target = fsdecode_if_pathlike(target) if not is_string(target): raise TypeError( f"The target filepath {target!r} must be of type str/unicode or path-like, not {type(target)}." From 4c1498d1a7ec58ce595e73e26c81180a4481f01c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 14:16:44 +0100 Subject: [PATCH 02/16] Add int and bool to RecordValue --- src/shapefile.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 27e1c65..344da23 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -115,7 +115,9 @@ BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO] -RecordValue = Union[float, str, date] +RecordValue = Union[ + bool, int, float, str, date +] # A Possible value in a Shapefile record, e.g. L, N, F, C, D types class GeoJsonShapeT(TypedDict): From c8334c5a4b9424a126ab44f612d56272a912b28e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 14:47:51 +0100 Subject: [PATCH 03/16] Make Reader.__getFileObj generic --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 344da23..117643f 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -993,7 +993,7 @@ def _assert_ext_is_supported(self, ext: str): def __init__( self, - shapefile_path: str = "", + shapefile_path: Union[str, os.PathLike] = "", /, *, encoding: str = "utf-8", @@ -1369,7 +1369,7 @@ def close(self): pass self._files_to_close = [] - def __getFileObj(self, f): + def __getFileObj(self, f: Optional[T]) -> T: """Checks to see if the requested shapefile file object is available. If not a ShapefileException is raised.""" if not f: From 350752ebebb1a86cf163bef74f89a9239e19ee6e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 14:56:37 +0100 Subject: [PATCH 04/16] Type hint Reader.__shapeIndex --- src/shapefile.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 117643f..df06cf2 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1425,6 +1425,7 @@ def __shpHeader(self): # pylint: enable=attribute-defined-outside-init + # def __shape(self, oid: Optional[int] = None, bbox: Optional[BBox] = None) -> Shape: def __shape(self, oid=None, bbox=None): """Returns the header info and geometry for a single shape.""" @@ -1540,9 +1541,9 @@ def __shxOffsets(self): shxRecords = _Array("i", shx.read(2 * self.numShapes * 4)) if sys.byteorder != "big": shxRecords.byteswap() - self._offsets = [2 * el for el in shxRecords[::2]] + self._offsets: list[int] = [2 * el for el in shxRecords[::2]] - def __shapeIndex(self, i=None): + def __shapeIndex(self, i: Optional[int] = None) -> Optional[int]: """Returns the offset in a .shp file for a shape based on information in the .shx index file.""" shx = self.shx @@ -1554,7 +1555,7 @@ def __shapeIndex(self, i=None): self.__shxOffsets() return self._offsets[i] - def shape(self, i=0, bbox=None): + def shape(self, i: int = 0, bbox: Optional[BBox] = None): """Returns a shape object for a shape in the geometry record file. If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), From dc94a2cfc8cccb37891482cdf0fd226cbeb5f420 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 15:36:39 +0100 Subject: [PATCH 05/16] Type hint Reader.__shape --- src/shapefile.py | 106 +++++++++++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 41 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index df06cf2..eb750ae 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -24,10 +24,13 @@ from typing import ( IO, Any, + Collection, + Generic, Iterable, Iterator, Optional, Reversible, + Sequence, TypedDict, TypeVar, Union, @@ -99,8 +102,9 @@ 5: "RING", } -# Custom type variables +## Custom type variables +T = TypeVar("T") Point2D = tuple[float, float] PointZ = tuple[float, float, float] PointZM = tuple[float, float, float, float] @@ -171,9 +175,6 @@ def is_string(v: Any) -> bool: return isinstance(v, str) -T = TypeVar("T") - - @overload def fsdecode_if_pathlike(path: os.PathLike) -> str: ... @overload @@ -188,7 +189,7 @@ def fsdecode_if_pathlike(path): # Begin -class _Array(array.array): +class _Array(array.array, Generic[T]): """Converts python tuples to lists of the appropriate type. Used to unpack different shapefile header parts.""" @@ -235,7 +236,7 @@ def ring_bbox(coords: Coords) -> BBox: return bbox -def bbox_overlap(bbox1: BBox, bbox2: BBox) -> bool: +def bbox_overlap(bbox1: BBox, bbox2: Collection[float]) -> bool: """Tests whether two bounding boxes overlap.""" xmin1, ymin1, xmax1, ymax1 = bbox1 xmin2, ymin2, xmax2, ymax2 = bbox2 @@ -492,8 +493,8 @@ def __init__( self, shapeType: int = NULL, points: Optional[list[Coord]] = None, - parts: Optional[list[int]] = None, - partTypes: Optional[list[int]] = None, + parts: Optional[Sequence[int]] = None, + partTypes: Optional[Sequence[int]] = None, oid: Optional[int] = None, ): """Stores the geometry of the different shape types @@ -522,6 +523,10 @@ def __init__( else: self.__oid = -1 + self.z: Optional[Union[list[Optional[float]], _Array[float]]] = None + self.m: Optional[list[Optional[float]]] = None + self.bbox: Optional[_Array[float]] = None + @property def __geo_interface__(self) -> GeoJsonShapeT: if self.shapeType in [POINT, POINTM, POINTZ]: @@ -1425,15 +1430,17 @@ def __shpHeader(self): # pylint: enable=attribute-defined-outside-init - # def __shape(self, oid: Optional[int] = None, bbox: Optional[BBox] = None) -> Shape: - def __shape(self, oid=None, bbox=None): + def __shape( + self, oid: Optional[int] = None, bbox: Optional[BBox] = None + ) -> Optional[Shape]: """Returns the header info and geometry for a single shape.""" # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) record = Shape(oid=oid) - # Formerly we also set __zmin = __zmax = __mmin = __mmax = None - nParts = nPoints = None + # Previously, we also set __zmin = __zmax = __mmin = __mmax = None + nParts: Optional[int] = None + nPoints: Optional[int] = None (__recNum, recLength) = unpack(">2i", f.read(8)) # Determine the start of the next record next_shape = f.tell() + (2 * recLength) @@ -1444,7 +1451,7 @@ def __shape(self, oid=None, bbox=None): record.points = [] # All shape types capable of having a bounding box elif shapeType in (3, 5, 8, 13, 15, 18, 23, 25, 28, 31): - record.bbox = _Array("d", unpack("<4d", f.read(32))) + record.bbox = _Array[float]("d", unpack("<4d", f.read(32))) # if bbox specified and no overlap, skip this shape if bbox is not None and not bbox_overlap(bbox, record.bbox): # because we stop parsing this shape, skip to beginning of @@ -1454,40 +1461,52 @@ def __shape(self, oid=None, bbox=None): # Shape types with parts if shapeType in (3, 5, 13, 15, 23, 25, 31): nParts = unpack("= 16: - __mmin, __mmax = unpack("<2d", f.read(16)) - # Measure values less than -10e38 are nodata values according to the spec - if next_shape - f.tell() >= nPoints * 8: - record.m = [] - for m in _Array("d", unpack(f"<{nPoints}d", f.read(nPoints * 8))): - if m > NODATA: - record.m.append(m) - else: - record.m.append(None) - else: - record.m = [None for _ in range(nPoints)] + + # Read z extremes and values + if shapeType in (13, 15, 18, 31): + __zmin, __zmax = unpack("<2d", f.read(16)) + record.z = _Array[float]( + "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) + ) + + # Read m extremes and values + if shapeType in (13, 15, 18, 23, 25, 28, 31): + if next_shape - f.tell() >= 16: + __mmin, __mmax = unpack("<2d", f.read(16)) + # Measure values less than -10e38 are nodata values according to the spec + if next_shape - f.tell() >= nPoints * 8: + record.m = [] + for m in _Array[float]( + "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) + ): + if m > NODATA: + record.m.append(m) + else: + record.m.append(None) + else: + record.m = [None for _ in range(nPoints)] + # Read a single point if shapeType in (1, 11, 21): - record.points = [_Array("d", unpack("<2d", f.read(16)))] + array_2D = _Array[float]("d", unpack("<2d", f.read(16))) + + record.points = [tuple(array_2D)] if bbox is not None: # create bounding box for Point by duplicating coordinates point_bbox = list(record.points[0] + record.points[0]) @@ -1495,9 +1514,11 @@ def __shape(self, oid=None, bbox=None): if not bbox_overlap(bbox, point_bbox): f.seek(next_shape) return None + # Read a single Z value if shapeType == 11: record.z = list(unpack("= 8: @@ -1509,11 +1530,14 @@ def __shape(self, oid=None, bbox=None): record.m = [m] else: record.m = [None] + # pylint: enable=attribute-defined-outside-init # Seek to the end of this record as defined by the record header because # the shapefile spec doesn't require the actual content to meet the header # definition. Probably allowed for lazy feature deletion. + f.seek(next_shape) + return record def __shxHeader(self): @@ -1538,7 +1562,7 @@ def __shxOffsets(self): # Jump to the first record. shx.seek(100) # Each index record consists of two nrs, we only want the first one - shxRecords = _Array("i", shx.read(2 * self.numShapes * 4)) + shxRecords = _Array[int]("i", shx.read(2 * self.numShapes * 4)) if sys.byteorder != "big": shxRecords.byteswap() self._offsets: list[int] = [2 * el for el in shxRecords[::2]] From cfcc376f8c0414130b7b69f10c6093a23844ee53 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:01:23 +0100 Subject: [PATCH 06/16] Suppress ..bbox, z, .m attr-defined mypy errors on Shape --- src/shapefile.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index eb750ae..9cf8622 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -523,9 +523,9 @@ def __init__( else: self.__oid = -1 - self.z: Optional[Union[list[Optional[float]], _Array[float]]] = None - self.m: Optional[list[Optional[float]]] = None - self.bbox: Optional[_Array[float]] = None + # self.z: Optional[Union[list[Optional[float]], _Array[float]]] = None + # self.m: Optional[list[Optional[float]]] = None + # self.bbox: Optional[_Array[float]] = None @property def __geo_interface__(self) -> GeoJsonShapeT: @@ -1451,9 +1451,9 @@ def __shape( record.points = [] # All shape types capable of having a bounding box elif shapeType in (3, 5, 8, 13, 15, 18, 23, 25, 28, 31): - record.bbox = _Array[float]("d", unpack("<4d", f.read(32))) + record.bbox = _Array[float]("d", unpack("<4d", f.read(32))) # type: ignore [attr-defined] # if bbox specified and no overlap, skip this shape - if bbox is not None and not bbox_overlap(bbox, record.bbox): + if bbox is not None and not bbox_overlap(bbox, record.bbox): # type: ignore [attr-defined] # because we stop parsing this shape, skip to beginning of # next shape before we return f.seek(next_shape) @@ -1462,6 +1462,12 @@ def __shape( if shapeType in (3, 5, 13, 15, 23, 25, 31): nParts = unpack("= nPoints * 8: - record.m = [] + record.m = [] # type: ignore [attr-defined] for m in _Array[float]( "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) ): if m > NODATA: - record.m.append(m) + record.m.append(m) # type: ignore [attr-defined] else: - record.m.append(None) + record.m.append(None) # type: ignore [attr-defined] else: - record.m = [None for _ in range(nPoints)] + record.m = [None for _ in range(nPoints)] # type: ignore [attr-defined] # Read a single point if shapeType in (1, 11, 21): @@ -1517,7 +1519,7 @@ def __shape( # Read a single Z value if shapeType == 11: - record.z = list(unpack(" NODATA: - record.m = [m] + record.m = [m] # type: ignore [attr-defined] else: - record.m = [None] + record.m = [None] # type: ignore [attr-defined] # pylint: enable=attribute-defined-outside-init # Seek to the end of this record as defined by the record header because From 8c0187532d236d369668e7c5d0e10a81a59e6648 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:08:12 +0100 Subject: [PATCH 07/16] Reorder shapetype codes to group according to attributes. --- src/shapefile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 9cf8622..5543739 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -1450,7 +1450,7 @@ def __shape( if shapeType == 0: record.points = [] # All shape types capable of having a bounding box - elif shapeType in (3, 5, 8, 13, 15, 18, 23, 25, 28, 31): + elif shapeType in (3, 13, 23, 5, 15, 25, 8, 18, 28, 31): record.bbox = _Array[float]("d", unpack("<4d", f.read(32))) # type: ignore [attr-defined] # if bbox specified and no overlap, skip this shape if bbox is not None and not bbox_overlap(bbox, record.bbox): # type: ignore [attr-defined] @@ -1459,11 +1459,11 @@ def __shape( f.seek(next_shape) return None # Shape types with parts - if shapeType in (3, 5, 13, 15, 23, 25, 31): + if shapeType in (3, 13, 23, 5, 15, 25, 31): nParts = unpack("= 16: __mmin, __mmax = unpack("<2d", f.read(16)) # Measure values less than -10e38 are nodata values according to the spec From aa5560c058c47fe800766b5ae49b9a980f5bd278 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:28:16 +0100 Subject: [PATCH 08/16] Type hint FieldTuples --- src/shapefile.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 5543739..3c70d2b 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -25,6 +25,7 @@ IO, Any, Collection, + Container, Generic, Iterable, Iterator, @@ -119,6 +120,7 @@ BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO] +FieldTuple = tuple[str, str, int, bool] RecordValue = Union[ bool, int, float, str, date ] # A Possible value in a Shapefile record, e.g. L, N, F, C, D types @@ -1017,7 +1019,7 @@ def __init__( self.shpLength: Optional[int] = None self.numRecords: Optional[int] = None self.numShapes: Optional[int] = None - self.fields: list[list[str]] = [] + self.fields: list[FieldTuple] = [] self.__dbfHdrLength = 0 self.__fieldLookup: dict[str, int] = {} self.encoding = encoding @@ -1581,7 +1583,7 @@ def __shapeIndex(self, i: Optional[int] = None) -> Optional[int]: self.__shxOffsets() return self._offsets[i] - def shape(self, i: int = 0, bbox: Optional[BBox] = None): + def shape(self, i: int = 0, bbox: Optional[BBox] = None) -> Optional[Shape]: """Returns a shape object for a shape in the geometry record file. If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), @@ -1619,7 +1621,7 @@ def shape(self, i: int = 0, bbox: Optional[BBox] = None): shp.seek(offset) return self.__shape(oid=i, bbox=bbox) - def shapes(self, bbox=None): + def shapes(self, bbox: Optional[BBox] = None) -> Shapes: """Returns all shapes in a shapefile. To only read shapes within a given spatial region, specify the 'bbox' arg as a list or tuple of xmin,ymin,xmax,ymax. @@ -1628,7 +1630,7 @@ def shapes(self, bbox=None): shapes.extend(self.iterShapes(bbox=bbox)) return shapes - def iterShapes(self, bbox=None): + def iterShapes(self, bbox: Optional[BBox] = None) -> Iterator[Optional[Shape]]: """Returns a generator of shapes in a shapefile. Useful for handling large shapefiles. To only read shapes within a given spatial region, specify the 'bbox' @@ -1722,7 +1724,7 @@ def __dbfHeader(self): # pylint: enable=attribute-defined-outside-init - def __recordFmt(self, fields=None): + def __recordFmt(self, fields: Optional[Container[str]] = None) -> tuple[str, int]: """Calculates the format and size of a .dbf record. Optional 'fields' arg specifies which fieldnames to unpack and which to ignore. Note that this always includes the DeletionFlag at index 0, regardless of the 'fields' arg. @@ -1748,7 +1750,9 @@ def __recordFmt(self, fields=None): fmtSize += 1 return (fmt, fmtSize) - def __recordFields(self, fields=None): + def __recordFields( + self, fields: Optional[Iterable[str]] = None + ) -> tuple[list[FieldTuple], dict[str, int], Struct]: """Returns the necessary info required to unpack a record's fields, restricted to a subset of fieldnames 'fields' if specified. Returns a list of field info tuples, a name-index lookup dict, @@ -1758,19 +1762,19 @@ def __recordFields(self, fields=None): if fields is not None: # restrict info to the specified fields # first ignore repeated field names (order doesn't matter) - fields = list(set(fields)) + unique_fields = list(set(fields)) # get the struct - fmt, __fmtSize = self.__recordFmt(fields=fields) + fmt, __fmtSize = self.__recordFmt(fields=unique_fields) recStruct = Struct(fmt) # make sure the given fieldnames exist - for name in fields: + for name in unique_fields: if name not in self.__fieldLookup or name == "DeletionFlag": raise ValueError(f'"{name}" is not a valid field name') # fetch relevant field info tuples fieldTuples = [] for fieldinfo in self.fields[1:]: name = fieldinfo[0] - if name in fields: + if name in unique_fields: fieldTuples.append(fieldinfo) # store the field positions recLookup = {f[0]: i for i, f in enumerate(fieldTuples)} @@ -1783,7 +1787,7 @@ def __recordFields(self, fields=None): def __record( self, - fieldTuples: list[tuple[str, str, int, bool]], + fieldTuples: list[FieldTuple], recLookup: dict[str, int], recStruct: Struct, oid: Optional[int] = None, From b8c84551a09a4e44f926be5a5eb7519a4e9369e3 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:45:03 +0100 Subject: [PATCH 09/16] Type hint Writer.__init__ --- src/shapefile.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 3c70d2b..2a0b65a 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2034,23 +2034,23 @@ class Writer: def __init__( self, - target=None, - shapeType=None, - autoBalance=False, - encoding="utf-8", - encodingErrors="strict", + target: Union[str, os.PathLike, None] = None, + shapeType: Optional[int] = None, + autoBalance: bool = False, *, - shp=None, - shx=None, - dbf=None, + encoding: str = "utf-8", + encodingErrors: str = "strict", + shp: Optional[BinaryFileT] = None, + shx: Optional[BinaryFileT] = None, + dbf: Optional[BinaryFileT] = None, **kwargs, # pylint: disable=unused-argument ): self.target = target self.autoBalance = autoBalance - self.fields = [] + self.fields: list[FieldTuple] = [] self.shapeType = shapeType self.shp = self.shx = self.dbf = None - self._files_to_close = [] + self._files_to_close: list[BinaryFileStreamT] = [] if target: target = fsdecode_if_pathlike(target) if not is_string(target): From b91d6d7b4b5f635c8c3ce3081f683c2d40e8308a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 18:11:18 +0100 Subject: [PATCH 10/16] Type Writer.__getFileObj as generic bounded to Protocol. I couldn't make my Readable Protocol idea work for the particular implementation of __seek_0_on_file_obj_wrap_or_open_from_name - the generic for Seekable absorbs it, so it requires type negations / algebra. --- src/shapefile.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 2a0b65a..e6392cc 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -29,7 +29,9 @@ Generic, Iterable, Iterator, + NoReturn, Optional, + Protocol, Reversible, Sequence, TypedDict, @@ -115,8 +117,12 @@ BBox = tuple[float, float, float, float] + +class BinaryWritable(Protocol): + def write(self, data: bytes): ... + + # File name, file object or anything with a read() method that returns bytes. -# TODO: Create simple Protocol with a read() method pylint: disable=fixme BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO] @@ -2157,7 +2163,15 @@ def close(self): pass self._files_to_close = [] - def __getFileObj(self, f: Union[IO[bytes], str]) -> IO[bytes]: + W = TypeVar("W", bound=BinaryWritable) + + @overload + def __getFileObj(self, f: str) -> IO[bytes]: ... + @overload + def __getFileObj(self, f: None) -> NoReturn: ... + @overload + def __getFileObj(self, f: W) -> W: ... + def __getFileObj(self, f): """Safety handler to verify file-like objects""" if not f: raise ShapefileException("No file-like object available.") From 98b094b5b819fc4bef52b8209ca1c83c001a6ce3 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 18:19:44 +0100 Subject: [PATCH 11/16] Type Writer.shapeTypeName, and make return "NULL" if self.shapeType is None --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index e6392cc..1641dcb 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2275,8 +2275,8 @@ def __mbox(self, s): return mbox @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] + def shapeTypeName(self) -> str: + return SHAPETYPE_LOOKUP[self.shapeType or 0] def bbox(self): """Returns the current bounding box for the shapefile which is From b999d8ab7a54b55fdee25fd3af6017067a7df692 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 20:17:17 +0100 Subject: [PATCH 12/16] Type hint Writer.shp, .dbf & .shx --- src/shapefile.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 1641dcb..0f0529b 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -122,6 +122,10 @@ class BinaryWritable(Protocol): def write(self, data: bytes): ... +class BinaryWritableSeekable(BinaryWritable): + def seek(self, i: int): ... + + # File name, file object or anything with a read() method that returns bytes. BinaryFileT = Union[str, IO[bytes]] BinaryFileStreamT = Union[IO[bytes], io.BytesIO] @@ -139,6 +143,11 @@ class GeoJsonShapeT(TypedDict): ] +class HasGeoInterface(Protocol): + @property + def __geo_interface__(self) -> Any: ... + + # Helpers MISSING = [None, ""] @@ -2046,16 +2055,18 @@ def __init__( *, encoding: str = "utf-8", encodingErrors: str = "strict", - shp: Optional[BinaryFileT] = None, - shx: Optional[BinaryFileT] = None, - dbf: Optional[BinaryFileT] = None, + shp: Optional[BinaryWritableSeekable] = None, + shx: Optional[BinaryWritableSeekable] = None, + dbf: Optional[BinaryWritableSeekable] = None, **kwargs, # pylint: disable=unused-argument ): self.target = target self.autoBalance = autoBalance self.fields: list[FieldTuple] = [] self.shapeType = shapeType - self.shp = self.shx = self.dbf = None + self.shp: Optional[Union[BinaryFileStreamT, BinaryWritableSeekable]] = None + self.shx: Optional[Union[BinaryFileStreamT, BinaryWritableSeekable]] = None + self.dbf: Optional[Union[BinaryFileStreamT, BinaryWritableSeekable]] = None self._files_to_close: list[BinaryFileStreamT] = [] if target: target = fsdecode_if_pathlike(target) @@ -2163,7 +2174,7 @@ def close(self): pass self._files_to_close = [] - W = TypeVar("W", bound=BinaryWritable) + W = TypeVar("W", bound=BinaryWritableSeekable) @overload def __getFileObj(self, f: str) -> IO[bytes]: ... @@ -2292,7 +2303,11 @@ def mbox(self): """Returns the current m extremes for the shapefile.""" return self._mbox - def __shapefileHeader(self, fileObj, headerType="shp"): + def __shapefileHeader( + self, + fileObj: Union[str, BinaryWritableSeekable], + headerType: str = "shp", + ): """Writes the specified header type to the specified file-like object. Several of the shapefile formats are so similar that a single generic method to read or write them is warranted.""" @@ -2404,14 +2419,17 @@ def __dbfHeader(self): # Terminator f.write(b"\r") - def shape(self, s): + def shape( + self, + s: Union[Shape, HasGeoInterface, dict], + ): # Balance if already not balanced if self.autoBalance and self.recNum < self.shpNum: self.balance() # Check is shape or import from geojson if not isinstance(s, Shape): if hasattr(s, "__geo_interface__"): - s = s.__geo_interface__ + s = s.__geo_interface__ # type: ignore [assignment] if isinstance(s, dict): s = Shape._from_geojson(s) else: From c0e88605d5729184d448f2235aa43acc07df86df Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 20:36:22 +0100 Subject: [PATCH 13/16] Simplify type hints. --- src/shapefile.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 0f0529b..88a7680 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -123,12 +123,13 @@ def write(self, data: bytes): ... class BinaryWritableSeekable(BinaryWritable): - def seek(self, i: int): ... + def seek(self, i: int): ... # pylint: disable=unused-argument + def tell(self): ... # File name, file object or anything with a read() method that returns bytes. BinaryFileT = Union[str, IO[bytes]] -BinaryFileStreamT = Union[IO[bytes], io.BytesIO] +BinaryFileStreamT = Union[IO[bytes], io.BytesIO, BinaryWritableSeekable] FieldTuple = tuple[str, str, int, bool] RecordValue = Union[ @@ -2064,9 +2065,9 @@ def __init__( self.autoBalance = autoBalance self.fields: list[FieldTuple] = [] self.shapeType = shapeType - self.shp: Optional[Union[BinaryFileStreamT, BinaryWritableSeekable]] = None - self.shx: Optional[Union[BinaryFileStreamT, BinaryWritableSeekable]] = None - self.dbf: Optional[Union[BinaryFileStreamT, BinaryWritableSeekable]] = None + self.shp: Optional[BinaryFileStreamT] = None + self.shx: Optional[BinaryFileStreamT] = None + self.dbf: Optional[BinaryFileStreamT] = None self._files_to_close: list[BinaryFileStreamT] = [] if target: target = fsdecode_if_pathlike(target) @@ -2305,7 +2306,7 @@ def mbox(self): def __shapefileHeader( self, - fileObj: Union[str, BinaryWritableSeekable], + fileObj: Optional[BinaryWritableSeekable], headerType: str = "shp", ): """Writes the specified header type to the specified file-like object. From 9d089596c0eafbefff5fba14c3d8622481a7e593 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 20:40:04 +0100 Subject: [PATCH 14/16] Type hint Writer.record --- src/shapefile.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/shapefile.py b/src/shapefile.py index 88a7680..1cc7ea6 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2645,7 +2645,9 @@ def __shxRecord(self, offset, length): # pylint: enable=raise-missing-from - def record(self, *recordList, **recordDict): + def record( + self, *recordList: Iterable[RecordValue], **recordDict: dict[str, RecordValue] + ): """Creates a dbf attribute record. You can submit either a sequence of field values or keyword arguments of field names and values. Before adding records you must add fields for the record values using the From df75bd023664580ce547af0a6f31a10ff500a068 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 20:45:36 +0100 Subject: [PATCH 15/16] Type hint Writer.point --- src/shapefile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 1cc7ea6..9b586d7 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2775,11 +2775,11 @@ def null(self): """Creates a null shape.""" self.shape(Shape(NULL)) - def point(self, x, y): + def point(self, x: float, y: float): """Creates a POINT shape.""" shapeType = POINT pointShape = Shape(shapeType) - pointShape.points.append([x, y]) + pointShape.points.append((x, y)) self.shape(pointShape) def pointm(self, x, y, m=None): From a0ba5839dcd1e54761d6c28070a60ec59f7c5393 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 26 Jul 2025 20:56:36 +0100 Subject: [PATCH 16/16] Type hint Writer.multipoint. Simplify Writer.multipointm --- src/shapefile.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 9b586d7..41f6359 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -2799,14 +2799,12 @@ def pointz(self, x, y, z=0, m=None): pointShape.points.append([x, y, z, m]) self.shape(pointShape) - def multipoint(self, points): + def multipoint(self, points: Coords): """Creates a MULTIPOINT shape. Points is a list of xy values.""" shapeType = MULTIPOINT - points = [ - points - ] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) + # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=[points], shapeType=shapeType) def multipointm(self, points): """Creates a MULTIPOINTM shape. @@ -2921,9 +2919,8 @@ def _shapeparts(self, parts, shapeType): # add points for point in part: # Ensure point is list - if not isinstance(point, list): - point = list(point) - polyShape.points.append(point) + point_list = list(point) + polyShape.points.append(point_list) # write the shape self.shape(polyShape)