From b35482c60c7a96edef76225565495b7a4ed185cd Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 14:10:19 +0100 Subject: [PATCH 01/10] Define Shape subclasses for each supported Shape type --- src/shapefile.py | 143 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 111 insertions(+), 32 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 8008fe1..60ca315 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -110,14 +110,14 @@ T = TypeVar("T") Point2D = tuple[float, float] Point3D = tuple[float, float, float] -PointM = tuple[float, float, Optional[float]] -PointZ = tuple[float, float, float, Optional[float]] +PointMT = tuple[float, float, Optional[float]] +PointZT = tuple[float, float, float, Optional[float]] Coord = Union[Point2D, Point3D] Coords = list[Coord] -Point = Union[Point2D, PointM, PointZ] -Points = list[Point] +PointT = Union[Point2D, PointMT, PointZT] +PointsT = list[PointT] BBox = tuple[float, float, float, float] @@ -153,36 +153,36 @@ class GeoJSONPoint(TypedDict): # elements. " # RFC7946 also requires long/lat easting/northing which we do not enforce, # and despite the SHOULD NOT, we may use a 4th element for Shapefile M Measures. - coordinates: Union[Point, tuple[()]] + coordinates: Union[PointT, tuple[()]] class GeoJSONMultiPoint(TypedDict): type: Literal["MultiPoint"] - coordinates: Points + coordinates: PointsT class GeoJSONLineString(TypedDict): type: Literal["LineString"] # "Two or more positions" not enforced by type checker # https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.4 - coordinates: Points + coordinates: PointsT class GeoJSONMultiLineString(TypedDict): type: Literal["MultiLineString"] - coordinates: list[Points] + coordinates: list[PointsT] class GeoJSONPolygon(TypedDict): type: Literal["Polygon"] # Other requirements for Polygon not enforced by type checker # https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.6 - coordinates: list[Points] + coordinates: list[PointsT] class GeoJSONMultiPolygon(TypedDict): type: Literal["MultiPolygon"] - coordinates: list[list[Points]] + coordinates: list[list[PointsT]] GeoJSONHomogeneousGeometryObject = Union[ @@ -293,7 +293,7 @@ def __repr__(self): def signed_area( - coords: Points, + coords: PointsT, fast: bool = False, ) -> float: """Return the signed area enclosed by a ring using the linear time @@ -311,7 +311,7 @@ def signed_area( return area2 / 2.0 -def is_cw(coords: Points) -> bool: +def is_cw(coords: PointsT) -> bool: """Returns True if a polygon ring has clockwise orientation, determined by a negatively signed area. """ @@ -319,12 +319,12 @@ def is_cw(coords: Points) -> bool: return area2 < 0 -def rewind(coords: Reversible[Point]) -> Points: +def rewind(coords: Reversible[PointT]) -> PointsT: """Returns the input coords in reversed order.""" return list(reversed(coords)) -def ring_bbox(coords: Points) -> BBox: +def ring_bbox(coords: PointsT) -> BBox: """Calculates and returns the bounding box of a ring.""" xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values bbox = min(xs), min(ys), max(xs), max(ys) @@ -347,7 +347,7 @@ def bbox_contains(bbox1: BBox, bbox2: BBox) -> bool: return contains -def ring_contains_point(coords: Points, p: Point2D) -> bool: +def ring_contains_point(coords: PointsT, p: Point2D) -> bool: """Fast point-in-polygon crossings algorithm, MacMartin optimization. Adapted from code by Eric Haynes @@ -396,7 +396,7 @@ class RingSamplingError(Exception): pass -def ring_sample(coords: Points, ccw: bool = False) -> Point2D: +def ring_sample(coords: PointsT, ccw: bool = False) -> Point2D: """Return a sample point guaranteed to be within a ring, by efficiently finding the first centroid of a coordinate triplet whose orientation matches the orientation of the ring and passes the point-in-ring test. @@ -446,15 +446,15 @@ def itercoords(): ) -def ring_contains_ring(coords1: Points, coords2: list[Point]) -> bool: +def ring_contains_ring(coords1: PointsT, coords2: list[PointT]) -> bool: """Returns True if all vertexes in coords2 are fully inside coords1.""" # Ignore Z and M values in coords2 return all(ring_contains_point(coords1, p2[:2]) for p2 in coords2) def organize_polygon_rings( - rings: Iterable[Points], return_errors: Optional[dict[str, int]] = None -) -> list[list[Points]]: + rings: Iterable[PointsT], return_errors: Optional[dict[str, int]] = None +) -> list[list[PointsT]]: """Organize a list of coordinate rings into one or more polygons with holes. Returns a list of polygons, where each polygon is composed of a single exterior ring, and one or more interior holes. If a return_errors dict is provided (optional), @@ -585,10 +585,12 @@ class GeoJSON_Error(Exception): class Shape: + shapeType = NULL + def __init__( self, shapeType: int = NULL, - points: Optional[Points] = None, + points: Optional[PointsT] = None, parts: Optional[Sequence[int]] = None, partTypes: Optional[Sequence[int]] = None, oid: Optional[int] = None, @@ -604,7 +606,8 @@ def __init__( list of shapes. For MultiPatch geometry, partTypes designates the patch type of each of the parts. """ - self.shapeType = shapeType + if self.shapeType != shapeType: + self.shapeType = shapeType self.points = points or [] self.parts = parts or [] if partTypes: @@ -838,6 +841,82 @@ def __repr__(self): return f"Shape #{self.__oid}: {self.shapeTypeName}" +class NullShape(Shape): + # Shape.shapeType = NULL already, + # to preserve handling of default args in Shape.__init__ + # Repeated for clarity. + shapeType = NULL + + +class _CanHaveBBox(Shape): + # Not a BBox because the legacy implementation was a list, not a 4-tuple. + bbox: Optional[list[float]] = None + + +class Point(Shape): + shapeType = 1 + + +class Polyline(_CanHaveBBox): + shapeType = 3 + + +class Polygon(_CanHaveBBox): + shapeType = 5 + + +class MultiPoint(_CanHaveBBox): + shapeType = 8 + + +class _HasM(Shape): + m: Sequence[Optional[float]] + + +class _HasZ(Shape): + z: Sequence[float] + + +class MultiPatch(_HasM, _HasZ, _CanHaveBBox): + shapeType = 31 + + +class PointM(Point, _HasM): + # same default as in Writer.__shpRecord (if s.shapeType in (11, 21):) + # PyShp encodes None m values as NODATA + m = (None,) + shapeType = 21 + + +class PolylineM(Polyline, _HasM): + shapeType = 23 + + +class PolygonM(Polygon, _HasM): + shapeType = 25 + + +class MultiPointM(MultiPoint, _HasM): + shapeType = 28 + + +class PointZ(PointM, _HasZ): + shapeType = 11 + z = (0,) # same default as in Writer.__shpRecord (if s.shapeType == 11:) + + +class PolylineZ(PolylineM, _HasZ): + shapeType = 13 + + +class PolygonZ(PolygonM, _HasZ): + shapeType = 15 + + +class MultiPointZ(MultiPointM, _HasZ): + shapeType = 18 + + class _Record(list): """ A class to hold a record. Subclasses list to ensure compatibility with @@ -2879,14 +2958,14 @@ def pointz(self, x: float, y: float, z: float = 0.0, m: Optional[float] = None): pointShape.points.append((x, y, z, m)) self.shape(pointShape) - def multipoint(self, points: Points): + def multipoint(self, points: PointsT): """Creates a MULTIPOINT shape. Points is a list of xy values.""" shapeType = MULTIPOINT # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=[points], shapeType=shapeType) - def multipointm(self, points: Points): + def multipointm(self, points: PointsT): """Creates a MULTIPOINTM shape. Points is a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" @@ -2894,7 +2973,7 @@ def multipointm(self, points: Points): # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=[points], shapeType=shapeType) - def multipointz(self, points: Points): + def multipointz(self, points: PointsT): """Creates a MULTIPOINTZ shape. Points is a list of xyzm values. If the z (elevation) value is not included, it defaults to 0. @@ -2903,20 +2982,20 @@ def multipointz(self, points: Points): # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=[points], shapeType=shapeType) - def line(self, lines: list[Points]): + def line(self, lines: list[PointsT]): """Creates a POLYLINE shape. Lines is a collection of lines, each made up of a list of xy values.""" shapeType = POLYLINE self._shapeparts(parts=lines, shapeType=shapeType) - def linem(self, lines: list[Points]): + def linem(self, lines: list[PointsT]): """Creates a POLYLINEM shape. Lines is a collection of lines, each made up of a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" shapeType = POLYLINEM self._shapeparts(parts=lines, shapeType=shapeType) - def linez(self, lines: list[Points]): + def linez(self, lines: list[PointsT]): """Creates a POLYLINEZ shape. Lines is a collection of lines, each made up of a list of xyzm values. If the z (elevation) value is not included, it defaults to 0. @@ -2924,7 +3003,7 @@ def linez(self, lines: list[Points]): shapeType = POLYLINEZ self._shapeparts(parts=lines, shapeType=shapeType) - def poly(self, polys: list[Points]): + def poly(self, polys: list[PointsT]): """Creates a POLYGON shape. Polys is a collection of polygons, each made up of a list of xy values. Note that for ordinary polygons the coordinates must run in a clockwise direction. @@ -2932,7 +3011,7 @@ def poly(self, polys: list[Points]): shapeType = POLYGON self._shapeparts(parts=polys, shapeType=shapeType) - def polym(self, polys: list[Points]): + def polym(self, polys: list[PointsT]): """Creates a POLYGONM shape. Polys is a collection of polygons, each made up of a list of xym values. Note that for ordinary polygons the coordinates must run in a clockwise direction. @@ -2941,7 +3020,7 @@ def polym(self, polys: list[Points]): shapeType = POLYGONM self._shapeparts(parts=polys, shapeType=shapeType) - def polyz(self, polys: list[Points]): + def polyz(self, polys: list[PointsT]): """Creates a POLYGONZ shape. Polys is a collection of polygons, each made up of a list of xyzm values. Note that for ordinary polygons the coordinates must run in a clockwise direction. @@ -2951,7 +3030,7 @@ def polyz(self, polys: list[Points]): shapeType = POLYGONZ self._shapeparts(parts=polys, shapeType=shapeType) - def multipatch(self, parts: list[Points], partTypes: list[int]): + def multipatch(self, parts: list[PointsT], partTypes: list[int]): """Creates a MULTIPATCH shape. Parts is a collection of 3D surface patches, each made up of a list of xyzm values. PartTypes is a list of types that define each of the surface patches. @@ -2977,7 +3056,7 @@ def multipatch(self, parts: list[Points], partTypes: list[int]): # write the shape self.shape(polyShape) - def _shapeparts(self, parts: list[Points], shapeType: int): + def _shapeparts(self, parts: list[PointsT], shapeType: int): """Internal method for adding a shape that has multiple collections of points (parts): lines, polygons, and multipoint shapes. """ From c5a186f6e939a724ef3a67c65f94d0cf7e9de4e6 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 15:56:53 +0100 Subject: [PATCH 02/10] Use Shape subclasses to avoid the need for # type: ignore [attr-defined] --- src/shapefile.py | 142 ++++++++++++++++++++++++++++------------------- 1 file changed, 85 insertions(+), 57 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index 60ca315..c8ebf24 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -584,12 +584,19 @@ class GeoJSON_Error(Exception): pass +class _NoShapeTypeSentinel: + """For use as a default value for Shape.__init__ to + preserve old behaviour for anyone who explictly + called Shape(shapeType=None). + """ + + class Shape: - shapeType = NULL + shapeType: int = NULL def __init__( self, - shapeType: int = NULL, + shapeType: Union[int, _NoShapeTypeSentinel] = _NoShapeTypeSentinel(), points: Optional[PointsT] = None, parts: Optional[Sequence[int]] = None, partTypes: Optional[Sequence[int]] = None, @@ -606,7 +613,7 @@ def __init__( list of shapes. For MultiPatch geometry, partTypes designates the patch type of each of the parts. """ - if self.shapeType != shapeType: + if not isinstance(shapeType, _NoShapeTypeSentinel): self.shapeType = shapeType self.points = points or [] self.parts = parts or [] @@ -850,7 +857,7 @@ class NullShape(Shape): class _CanHaveBBox(Shape): # Not a BBox because the legacy implementation was a list, not a 4-tuple. - bbox: Optional[list[float]] = None + bbox: Optional[Sequence[float]] = None class Point(Shape): @@ -882,10 +889,10 @@ class MultiPatch(_HasM, _HasZ, _CanHaveBBox): class PointM(Point, _HasM): + shapeType = 21 # same default as in Writer.__shpRecord (if s.shapeType in (11, 21):) # PyShp encodes None m values as NODATA m = (None,) - shapeType = 21 class PolylineM(Polyline, _HasM): @@ -902,7 +909,8 @@ class MultiPointM(MultiPoint, _HasM): class PointZ(PointM, _HasZ): shapeType = 11 - z = (0,) # same default as in Writer.__shpRecord (if s.shapeType == 11:) + # same default as in Writer.__shpRecord (if s.shapeType == 11:) + z: Sequence[float] = (0.0,) class PolylineZ(PolylineM, _HasZ): @@ -917,6 +925,24 @@ class MultiPointZ(MultiPointM, _HasZ): shapeType = 18 +SHAPE_CLASS_FROM_SHAPETYPE: dict[int, type[Shape]] = { + NULL: NullShape, + POINT: Point, + POLYLINE: Polyline, + POLYGON: Polygon, + MULTIPOINT: MultiPoint, + POINTZ: PointZ, + POLYLINEZ: PolylineZ, + POLYGONZ: PolygonZ, + MULTIPOINTZ: MultiPointZ, + POINTM: PointM, + POLYLINEM: PolylineM, + POLYGONM: PolygonM, + MULTIPOINTM: MultiPointM, + MULTIPATCH: MultiPatch, +} + + class _Record(list): """ A class to hold a record. Subclasses list to ensure compatibility with @@ -1615,7 +1641,8 @@ def __shape( # pylint: disable=attribute-defined-outside-init f = self.__getFileObj(self.shp) - record = Shape(oid=oid) + record = SHAPE_CLASS_FROM_SHAPETYPE[self.shapeType](oid=oid) + # record = Shape(oid=oid) # Previously, we also set __zmin = __zmax = __mmin = __mmax = None nParts: Optional[int] = None nPoints: Optional[int] = None @@ -1625,23 +1652,28 @@ def __shape( shapeType = unpack("= 16: __mmin, __mmax = unpack("<2d", f.read(16)) # Measure values less than -10e38 are nodata values according to the spec if next_shape - f.tell() >= nPoints * 8: - record.m = [] # type: ignore [attr-defined] + record.m = [] for m in _Array[float]( "d", unpack(f"<{nPoints}d", f.read(nPoints * 8)) ): if m > NODATA: - record.m.append(m) # type: ignore [attr-defined] + record.m.append(m) else: - record.m.append(None) # type: ignore [attr-defined] + record.m.append(None) else: - record.m = [None for _ in range(nPoints)] # type: ignore [attr-defined] + record.m = [None for _ in range(nPoints)] # Read a single point - if shapeType in (1, 11, 21): + # if shapeType in (1, 11, 21): + if isinstance(record, Point): x, y = _Array[float]("d", unpack("<2d", f.read(16))) record.points = [(x, y)] @@ -1695,20 +1731,22 @@ def __shape( return None # Read a single Z value - if shapeType == 11: - record.z = list(unpack("= 8: (m,) = unpack(" NODATA: - record.m = [m] # type: ignore [attr-defined] + record.m = (m,) else: - record.m = [None] # type: ignore [attr-defined] + record.m = (None,) # pylint: enable=attribute-defined-outside-init # Seek to the end of this record as defined by the record header because @@ -2932,20 +2970,18 @@ def balance(self): def null(self): """Creates a null shape.""" - self.shape(Shape(NULL)) + self.shape(NullShape()) def point(self, x: float, y: float): """Creates a POINT shape.""" - shapeType = POINT - pointShape = Shape(shapeType) + pointShape = Point() pointShape.points.append((x, y)) self.shape(pointShape) def pointm(self, x: float, y: float, m: Optional[float] = None): """Creates a POINTM shape. If the m (measure) value is not set, it defaults to NoData.""" - shapeType = POINTM - pointShape = Shape(shapeType) + pointShape = PointM() pointShape.points.append((x, y, m)) self.shape(pointShape) @@ -2953,63 +2989,55 @@ def pointz(self, x: float, y: float, z: float = 0.0, m: Optional[float] = None): """Creates a POINTZ shape. If the z (elevation) value is not set, it defaults to 0. If the m (measure) value is not set, it defaults to NoData.""" - shapeType = POINTZ - pointShape = Shape(shapeType) + pointShape = PointZ() pointShape.points.append((x, y, z, m)) self.shape(pointShape) def multipoint(self, points: PointsT): """Creates a MULTIPOINT shape. Points is a list of xy values.""" - shapeType = MULTIPOINT # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=[points], shapeType=shapeType) + self._shapeparts(parts=[points], polyShape=MultiPoint()) def multipointm(self, points: PointsT): """Creates a MULTIPOINTM shape. Points is a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPOINTM # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=[points], shapeType=shapeType) + self._shapeparts(parts=[points], polyShape=MultiPointM()) def multipointz(self, points: PointsT): """Creates a MULTIPOINTZ shape. Points is a list of xyzm values. If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPOINTZ # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=[points], shapeType=shapeType) + self._shapeparts(parts=[points], polyShape=MultiPointZ()) def line(self, lines: list[PointsT]): """Creates a POLYLINE shape. Lines is a collection of lines, each made up of a list of xy values.""" - shapeType = POLYLINE - self._shapeparts(parts=lines, shapeType=shapeType) + self._shapeparts(parts=lines, polyShape=Polyline()) def linem(self, lines: list[PointsT]): """Creates a POLYLINEM shape. Lines is a collection of lines, each made up of a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYLINEM - self._shapeparts(parts=lines, shapeType=shapeType) + self._shapeparts(parts=lines, polyShape=PolylineM()) def linez(self, lines: list[PointsT]): """Creates a POLYLINEZ shape. Lines is a collection of lines, each made up of a list of xyzm values. If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYLINEZ - self._shapeparts(parts=lines, shapeType=shapeType) + self._shapeparts(parts=lines, polyShape=PolylineZ()) def poly(self, polys: list[PointsT]): """Creates a POLYGON shape. Polys is a collection of polygons, each made up of a list of xy values. Note that for ordinary polygons the coordinates must run in a clockwise direction. If some of the polygons are holes, these must run in a counterclockwise direction.""" - shapeType = POLYGON - self._shapeparts(parts=polys, shapeType=shapeType) + self._shapeparts(parts=polys, polyShape=Polygon()) def polym(self, polys: list[PointsT]): """Creates a POLYGONM shape. @@ -3017,8 +3045,7 @@ def polym(self, polys: list[PointsT]): Note that for ordinary polygons the coordinates must run in a clockwise direction. If some of the polygons are holes, these must run in a counterclockwise direction. If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYGONM - self._shapeparts(parts=polys, shapeType=shapeType) + self._shapeparts(parts=polys, polyShape=PolygonM()) def polyz(self, polys: list[PointsT]): """Creates a POLYGONZ shape. @@ -3027,8 +3054,7 @@ def polyz(self, polys: list[PointsT]): If some of the polygons are holes, these must run in a counterclockwise direction. If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYGONZ - self._shapeparts(parts=polys, shapeType=shapeType) + self._shapeparts(parts=polys, polyShape=PolygonZ()) def multipatch(self, parts: list[PointsT], partTypes: list[int]): """Creates a MULTIPATCH shape. @@ -3038,8 +3064,7 @@ def multipatch(self, parts: list[PointsT], partTypes: list[int]): TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPATCH - polyShape = Shape(shapeType) + polyShape = MultiPatch() polyShape.parts = [] polyShape.points = [] for part in parts: @@ -3056,15 +3081,18 @@ def multipatch(self, parts: list[PointsT], partTypes: list[int]): # write the shape self.shape(polyShape) - def _shapeparts(self, parts: list[PointsT], shapeType: int): + def _shapeparts( + self, parts: list[PointsT], polyShape: Union[Polyline, Polygon, MultiPoint] + ): """Internal method for adding a shape that has multiple collections of points (parts): lines, polygons, and multipoint shapes. """ - polyShape = Shape(shapeType) polyShape.parts = [] polyShape.points = [] # Make sure polygon rings (parts) are closed - if shapeType in (5, 15, 25, 31): + + # if shapeType in (5, 15, 25, 31): + if isinstance(polyShape, Polygon): for part in parts: if part[0] != part[-1]: part.append(part[0]) From fe4e0c7efbc11b17c4a4a65285c11687166876c5 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 16:23:27 +0100 Subject: [PATCH 03/10] Replace big if elif block with dict --- src/shapefile.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/shapefile.py b/src/shapefile.py index c8ebf24..14da4bf 100644 --- a/src/shapefile.py +++ b/src/shapefile.py @@ -194,6 +194,16 @@ class GeoJSONMultiPolygon(TypedDict): GeoJSONMultiPolygon, ] +GEOJSON_TO_SHAPETYPE: dict[str, int] = { + "Null": NULL, + "Point": POINT, + "LineString": POLYLINE, + "Polygon": POLYGON, + "MultiPoint": MULTIPOINT, + "MultiLineString": POLYLINE, + "MultiPolygon": POLYGON, +} + class GeoJSONGeometryCollection(TypedDict): type: Literal["GeometryCollection"] @@ -759,23 +769,10 @@ def _from_geojson(geoj) -> Shape: shape = Shape() # set shapeType geojType = geoj["type"] if geoj else "Null" - if geojType == "Null": - shapeType = NULL - elif geojType == "Point": - shapeType = POINT - elif geojType == "LineString": - shapeType = POLYLINE - elif geojType == "Polygon": - shapeType = POLYGON - elif geojType == "MultiPoint": - shapeType = MULTIPOINT - elif geojType == "MultiLineString": - shapeType = POLYLINE - elif geojType == "MultiPolygon": - shapeType = POLYGON + if geojType in GEOJSON_TO_SHAPETYPE: + shape.shapeType = GEOJSON_TO_SHAPETYPE[geojType] else: raise GeoJSON_Error(f"Cannot create Shape from GeoJSON type '{geojType}'") - shape.shapeType = shapeType # set points and parts if geojType == "Point": From b90d8f117c17ba2df2ab40c5c50385c7a8ba9850 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:01:37 +0100 Subject: [PATCH 04/10] Add speed test --- .github/actions/test/action.yml | 2 +- .github/workflows/speed_test.yml | 70 +++++++++++++++++++++++++++ speed_test.py | 82 ++++++++++++++++++++++++++++++++ 3 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/speed_test.yml create mode 100644 speed_test.py diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 4a2fd99..e1dc3ca 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -72,7 +72,7 @@ runs: name: PyShp_wheel_and_sdist path: dist - - name: Install PyShp from the wheel (built in prev step) + - name: Install PyShp from the wheel (downloaded in prev step) shell: bash working-directory: dist/ run: | diff --git a/.github/workflows/speed_test.yml b/.github/workflows/speed_test.yml new file mode 100644 index 0000000..822982b --- /dev/null +++ b/.github/workflows/speed_test.yml @@ -0,0 +1,70 @@ + +name: Run the speed tests + +on: + push: + pull_request: + workflow_dispatch: + +jobs: + + + build_wheel_and_sdist: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: Build wheel from the project repo + uses: ./.github/actions/build_wheel_and_sdist + + run_speed_tests: + needs: build_wheel_and_sdist + strategy: + fail-fast: false + matrix: + python-version: [ + "3.9", + "3.13", + ] + os: [ + "windows-latest", + "ubuntu-24.04", + ] + + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Download wheel and sdist (built in previous job) + uses: actions/download-artifact@v4 + with: + name: PyShp_wheel_and_sdist + path: dist + + - name: Install PyShp + test deps from the wheel (downloaded in prev step) + shell: bash + working-directory: dist/ + run: | + WHEEL_NAME=$(ls pyshp-*py3-none-any.whl) + python -m pip install $WHEEL_NAME[test] + + - uses: actions/checkout@v4 + with: + path: ./Pyshp + + - name: Checkout shapefiles and zip file artefacts repo + uses: actions/checkout@v4 + with: + repository: JamesParrott/PyShp_test_shapefile + path: ./PyShp_test_shapefile + + - name: Run Benchmarks + env: + PYSHP_TEST_REPO: ./PyShp_test_shapefile + run: python speed_test.py + + + diff --git a/speed_test.py b/speed_test.py new file mode 100644 index 0000000..951ee59 --- /dev/null +++ b/speed_test.py @@ -0,0 +1,82 @@ +# Based on Taneli Hukkinen's https://github.com/hukkin/tomli-w/blob/master/benchmark/run.py + +from __future__ import annotations + +from collections.abc import Callable +import functools +import os +from pathlib import Path +import timeit + +import shapefile as shp + +import test_shapefile + +DEFAULT_PYSHP_TEST_REPO = fr"{os.getenv('USERPROFILE')}\Coding\repos\PyShp_test_shapefile" +PYSHP_TEST_REPO = Path(os.getenv("PYSHP_TEST_REPO", DEFAULT_PYSHP_TEST_REPO)) +REPO_ROOT = Path(__file__).parent + + +blockgroups_file = REPO_ROOT / "shapefiles" / "blockgroups.shp" +edit_file = REPO_ROOT / "shapefiles" / "test" / "edit.shp" +merge_file = REPO_ROOT / "shapefiles" / "test" / "merge.shp" +states_provinces_file = PYSHP_TEST_REPO / "ne_10m_admin_1_states_provinces.shp" +tiny_countries_file = PYSHP_TEST_REPO / "ne_110m_admin_0_tiny_countries.shp" +gis_osm_natural_file = PYSHP_TEST_REPO / "gis_osm_natural_a_free_1.zip" + + +def benchmark( + name: str, + run_count: int, + func: Callable, + col_width: tuple, + compare_to: float | None = None, +) -> float: + placeholder = "Running..." + print(f"{name:>{col_width[0]}} | {placeholder}", end="", flush=True) + time_taken = timeit.timeit(func, number=run_count) + print("\b" * len(placeholder), end="") + time_suffix = " s" + print(f"{time_taken:{col_width[1]-len(time_suffix)}.3g}{time_suffix}", end="") + print() + return time_taken + + + +def open_shapefile_with_PyShp(target: Union[str, os.PathLike]): + with shp.Reader(target) as r: + for shapeRecord in r.iterShapeRecords(): + pass + + +READER_TESTS = { + "Blockgroups": blockgroups_file, + "Edit": edit_file, + "Merge": merge_file, + "States_35MB": states_provinces_file, + "Tiny Countries": tiny_countries_file, + "GIS_OSM_zip_10MB": gis_osm_natural_file, +} + + +def run(run_count: int) -> None: + col_width = (21, 10) + col_head = ("parser", "exec time", "performance (more is better)") + # Load files to avoid one off delays that only affect first disk seek + for file_path in READER_TESTS.values(): + file_path.read_bytes() + print(f"Running benchmarks {run_count} times:") + print("-" * col_width[0] + "---" + "-" * col_width[1]) + print( + f"{col_head[0]:>{col_width[0]}} | {col_head[1]:>{col_width[1]}}" + ) + print("-" * col_width[0] + "-+-" + "-" * col_width[1]) + for test_name, target in READER_TESTS.items(): + benchmark(f"Read {test_name}", run_count, functools.partial(open_shapefile_with_PyShp, target=target), col_width) + + benchmark(f"Slow test", 1, test_shapefile.test_iterRecords_start_stop, col_width) + + + +if __name__ == "__main__": + run(1) \ No newline at end of file From b827fbea426014c6b7932e72b72ef0722ad51e2b Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:08:01 +0100 Subject: [PATCH 05/10] Correct type checking and path to speed_test.py in custom repo location --- .github/workflows/speed_test.yml | 4 ++-- speed_test.py | 27 +++++++++++++++------------ 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/.github/workflows/speed_test.yml b/.github/workflows/speed_test.yml index 822982b..818f347 100644 --- a/.github/workflows/speed_test.yml +++ b/.github/workflows/speed_test.yml @@ -64,7 +64,7 @@ jobs: - name: Run Benchmarks env: PYSHP_TEST_REPO: ./PyShp_test_shapefile - run: python speed_test.py - + run: python ./Pyshp/speed_test.py + diff --git a/speed_test.py b/speed_test.py index 951ee59..9befbfe 100644 --- a/speed_test.py +++ b/speed_test.py @@ -2,17 +2,19 @@ from __future__ import annotations -from collections.abc import Callable import functools import os -from pathlib import Path import timeit +from collections.abc import Callable +from pathlib import Path +from typing import Union import shapefile as shp - import test_shapefile -DEFAULT_PYSHP_TEST_REPO = fr"{os.getenv('USERPROFILE')}\Coding\repos\PyShp_test_shapefile" +DEFAULT_PYSHP_TEST_REPO = ( + rf"{os.getenv('USERPROFILE')}\Coding\repos\PyShp_test_shapefile" +) PYSHP_TEST_REPO = Path(os.getenv("PYSHP_TEST_REPO", DEFAULT_PYSHP_TEST_REPO)) REPO_ROOT = Path(__file__).parent @@ -42,7 +44,6 @@ def benchmark( return time_taken - def open_shapefile_with_PyShp(target: Union[str, os.PathLike]): with shp.Reader(target) as r: for shapeRecord in r.iterShapeRecords(): @@ -55,7 +56,7 @@ def open_shapefile_with_PyShp(target: Union[str, os.PathLike]): "Merge": merge_file, "States_35MB": states_provinces_file, "Tiny Countries": tiny_countries_file, - "GIS_OSM_zip_10MB": gis_osm_natural_file, + "GIS_OSM_zip_10MB": gis_osm_natural_file, } @@ -67,16 +68,18 @@ def run(run_count: int) -> None: file_path.read_bytes() print(f"Running benchmarks {run_count} times:") print("-" * col_width[0] + "---" + "-" * col_width[1]) - print( - f"{col_head[0]:>{col_width[0]}} | {col_head[1]:>{col_width[1]}}" - ) + print(f"{col_head[0]:>{col_width[0]}} | {col_head[1]:>{col_width[1]}}") print("-" * col_width[0] + "-+-" + "-" * col_width[1]) for test_name, target in READER_TESTS.items(): - benchmark(f"Read {test_name}", run_count, functools.partial(open_shapefile_with_PyShp, target=target), col_width) + benchmark( + f"Read {test_name}", + run_count, + functools.partial(open_shapefile_with_PyShp, target=target), + col_width, + ) benchmark(f"Slow test", 1, test_shapefile.test_iterRecords_start_stop, col_width) - if __name__ == "__main__": - run(1) \ No newline at end of file + run(1) From dd2da7467948bbb582ddc43b316aaa386e06006e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:09:51 +0100 Subject: [PATCH 06/10] Remove slow test from speed tests --- speed_test.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/speed_test.py b/speed_test.py index 9befbfe..10b7fd0 100644 --- a/speed_test.py +++ b/speed_test.py @@ -10,7 +10,6 @@ from typing import Union import shapefile as shp -import test_shapefile DEFAULT_PYSHP_TEST_REPO = ( rf"{os.getenv('USERPROFILE')}\Coding\repos\PyShp_test_shapefile" @@ -78,8 +77,5 @@ def run(run_count: int) -> None: col_width, ) - benchmark(f"Slow test", 1, test_shapefile.test_iterRecords_start_stop, col_width) - - if __name__ == "__main__": run(1) From 1958afae1e72d28328f0a228712cf670af2bbe5e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:12:03 +0100 Subject: [PATCH 07/10] Update speed_test.py --- speed_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/speed_test.py b/speed_test.py index 10b7fd0..3374c63 100644 --- a/speed_test.py +++ b/speed_test.py @@ -11,6 +11,7 @@ import shapefile as shp +# For shapefiles from https://github.com/JamesParrott/PyShp_test_shapefile DEFAULT_PYSHP_TEST_REPO = ( rf"{os.getenv('USERPROFILE')}\Coding\repos\PyShp_test_shapefile" ) From 1af47c335855402588bd5806155b5135e34a2cf5 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:13:56 +0100 Subject: [PATCH 08/10] Update speed_test.yml --- .github/workflows/speed_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/speed_test.yml b/.github/workflows/speed_test.yml index 818f347..7f95020 100644 --- a/.github/workflows/speed_test.yml +++ b/.github/workflows/speed_test.yml @@ -61,7 +61,7 @@ jobs: repository: JamesParrott/PyShp_test_shapefile path: ./PyShp_test_shapefile - - name: Run Benchmarks + - name: Run Speed tests. env: PYSHP_TEST_REPO: ./PyShp_test_shapefile run: python ./Pyshp/speed_test.py From 92eb277fedbdce0b76e0f207452038e6cdb768ad Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:15:17 +0100 Subject: [PATCH 09/10] Reformat --- speed_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/speed_test.py b/speed_test.py index 3374c63..0b8b828 100644 --- a/speed_test.py +++ b/speed_test.py @@ -78,5 +78,6 @@ def run(run_count: int) -> None: col_width, ) + if __name__ == "__main__": run(1) From 5e40f2e590c6b80d0814894657919dac52f80de0 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:32:59 +0100 Subject: [PATCH 10/10] Rename speed_test.py to match that of PyShp 2.4.0 --- .github/workflows/speed_test.yml | 2 +- speed_test.py => run_benchmarks.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename speed_test.py => run_benchmarks.py (100%) diff --git a/.github/workflows/speed_test.yml b/.github/workflows/speed_test.yml index 7f95020..5a03722 100644 --- a/.github/workflows/speed_test.yml +++ b/.github/workflows/speed_test.yml @@ -64,7 +64,7 @@ jobs: - name: Run Speed tests. env: PYSHP_TEST_REPO: ./PyShp_test_shapefile - run: python ./Pyshp/speed_test.py + run: python ./Pyshp/run_benchmarks.py diff --git a/speed_test.py b/run_benchmarks.py similarity index 100% rename from speed_test.py rename to run_benchmarks.py