From ce8b059bca6f078107ed1b52ab741dd61705e5a7 Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sun, 6 Jul 2025 15:09:31 +0000 Subject: [PATCH 01/22] Update to python 3.9. - Some hand editing - `pyupgrade --py39-plus` --- pyproject.toml | 3 +- setup.cfg | 3 +- shapefile.py | 243 ++++++++++++++++------------------------------ test_shapefile.py | 6 +- 4 files changed, 89 insertions(+), 166 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index df8e737f..fed78f76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ line-length = 88 indent-width = 4 # Assume Python 3.9 -target-version = "py37" +target-version = "py39" [tool.ruff.lint] # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. @@ -67,7 +67,6 @@ skip-magic-trailing-comma = false line-ending = "auto" - [tool.pylint.MASTER] load-plugins=[ "pylint_per_file_ignores", diff --git a/setup.cfg b/setup.cfg index 906abd3a..d13d43bb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,7 +16,6 @@ keywords = gis, geospatial, geographic, shapefile, shapefiles classifiers = Development Status :: 5 - Production/Stable Programming Language :: Python - Programming Language :: Python :: 2.7 Programming Language :: Python :: 3 Topic :: Scientific/Engineering :: GIS Topic :: Software Development :: Libraries @@ -24,7 +23,7 @@ classifiers = [options] py_modules = shapefile -python_requires = >=2.7 +python_requires = >=3.9 [bdist_wheel] universal=1 diff --git a/shapefile.py b/shapefile.py index 211fd48f..fdd49cd7 100644 --- a/shapefile.py +++ b/shapefile.py @@ -3,21 +3,25 @@ Provides read and write support for ESRI Shapefiles. authors: jlawheadgeospatialpython.com maintainer: karim.bahgat.norwaygmail.com -Compatible with Python versions 2.7-3.x +Compatible with Python versions >=3.9 """ __version__ = "2.4.0" import array +from datetime import date import io import logging import os import sys import tempfile import time -import zipfile -from datetime import date from struct import Struct, calcsize, error, pack, unpack +import zipfile + +from urllib.error import HTTPError +from urllib.parse import urlparse, urlunparse +from urllib.request import Request, urlopen # Create named logger logger = logging.getLogger(__name__) @@ -79,118 +83,48 @@ 5: "RING", } - -# Python 2-3 handling - -PYTHON3 = sys.version_info[0] == 3 - -if PYTHON3: - xrange = range - izip = zip - - from urllib.error import HTTPError - from urllib.parse import urlparse, urlunparse - from urllib.request import Request, urlopen - -else: - from itertools import izip - - from urllib2 import HTTPError, Request, urlopen - from urlparse import urlparse, urlunparse - - # Helpers MISSING = [None, ""] NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. -if PYTHON3: - - def b(v, encoding="utf-8", encodingErrors="strict"): - if isinstance(v, str): - # For python 3 encode str to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return b"" - else: - # Force string representation. - return str(v).encode(encoding, encodingErrors) - - def u(v, encoding="utf-8", encodingErrors="strict"): - if isinstance(v, bytes): - # For python 3 decode bytes to str. - return v.decode(encoding, encodingErrors) - elif isinstance(v, str): - # Already str. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - def is_string(v): - return isinstance(v, str) - -else: - - def b(v, encoding="utf-8", encodingErrors="strict"): - if isinstance(v, unicode): - # For python 2 encode unicode to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return unicode(v).encode(encoding, encodingErrors) - - def u(v, encoding="utf-8", encodingErrors="strict"): - if isinstance(v, bytes): - # For python 2 decode bytes to unicode. - return v.decode(encoding, encodingErrors) - elif isinstance(v, unicode): - # Already unicode. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - def is_string(v): - return isinstance(v, basestring) - -if sys.version_info[0:2] >= (3, 6): +def b(v, encoding="utf-8", encodingErrors="strict"): + if isinstance(v, str): + # For python 3 encode str to bytes. + return v.encode(encoding, encodingErrors) + elif isinstance(v, bytes): + # Already bytes. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return b"" + else: + # Force string representation. + return str(v).encode(encoding, encodingErrors) + +def u(v, encoding="utf-8", encodingErrors="strict"): + if isinstance(v, bytes): + # For python 3 decode bytes to str. + return v.decode(encoding, encodingErrors) + elif isinstance(v, str): + # Already str. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return "" + else: + # Force string representation. + return bytes(v).decode(encoding, encodingErrors) - def pathlike_obj(path): - if isinstance(path, os.PathLike): - return os.fsdecode(path) - else: - return path -else: - - def pathlike_obj(path): - if is_string(path): - return path - elif hasattr(path, "__fspath__"): - return path.__fspath__() - else: - try: - return str(path) - except: - return path +def is_string(v): + return isinstance(v, str) +def pathlike_obj(path): + if isinstance(path, os.PathLike): + return os.fsdecode(path) + else: + return path # Begin @@ -311,8 +245,7 @@ def ring_sample(coords, ccw=False): def itercoords(): # iterate full closed ring - for p in coords: - yield p + yield from coords # finally, yield the second coordinate to the end to allow checking the last triplet yield coords[1] @@ -350,7 +283,7 @@ def itercoords(): def ring_contains_ring(coords1, coords2): """Returns True if all vertexes in coords2 are fully inside coords1.""" - return all((ring_contains_point(coords1, p2) for p2 in coords2)) + return all(ring_contains_point(coords1, p2) for p2 in coords2) def organize_polygon_rings(rings, return_errors=None): @@ -398,7 +331,7 @@ def organize_polygon_rings(rings, return_errors=None): return polys # first determine each hole's candidate exteriors based on simple bbox contains test - hole_exteriors = dict([(hole_i, []) for hole_i in xrange(len(holes))]) + hole_exteriors = {hole_i: [] for hole_i in range(len(holes))} exterior_bboxes = [ring_bbox(ring) for ring in exteriors] for hole_i in hole_exteriors.keys(): hole_bbox = ring_bbox(holes[hole_i]) @@ -478,7 +411,7 @@ def organize_polygon_rings(rings, return_errors=None): return polys -class Shape(object): +class Shape: def __init__( self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None ): @@ -566,7 +499,7 @@ def __geo_interface__(self): else: # get all polygon rings rings = [] - for i in xrange(len(self.parts)): + for i in range(len(self.parts)): # get indexes of start and end points of the ring start = self.parts[i] try: @@ -712,7 +645,7 @@ def shapeTypeName(self): return SHAPETYPE_LOOKUP[self.shapeType] def __repr__(self): - return "Shape #{}: {}".format(self.__oid, self.shapeTypeName) + return f"Shape #{self.__oid}: {self.shapeTypeName}" class _Record(list): @@ -763,10 +696,10 @@ def __getattr__(self, item): index = self.__field_positions[item] return list.__getitem__(self, index) except KeyError: - raise AttributeError("{} is not a field name".format(item)) + raise AttributeError(f"{item} is not a field name") except IndexError: raise IndexError( - "{} found as a field but not enough values available.".format(item) + f"{item} found as a field but not enough values available." ) def __setattr__(self, key, value): @@ -783,7 +716,7 @@ def __setattr__(self, key, value): index = self.__field_positions[key] return list.__setitem__(self, index, value) except KeyError: - raise AttributeError("{} is not a field name".format(key)) + raise AttributeError(f"{key} is not a field name") def __getitem__(self, item): """ @@ -804,7 +737,7 @@ def __getitem__(self, item): if index is not None: return list.__getitem__(self, index) else: - raise IndexError('"{}" is not a field name and not an int'.format(item)) + raise IndexError(f'"{item}" is not a field name and not an int') def __setitem__(self, key, value): """ @@ -822,7 +755,7 @@ def __setitem__(self, key, value): if index is not None: return list.__setitem__(self, index, value) else: - raise IndexError("{} is not a field name and not an int".format(key)) + raise IndexError(f"{key} is not a field name and not an int") @property def oid(self): @@ -834,15 +767,15 @@ def as_dict(self, date_strings=False): Returns this Record as a dictionary using the field names as keys :return: dict """ - dct = dict((f, self[i]) for f, i in self.__field_positions.items()) + dct = {f: self[i] for f, i in self.__field_positions.items()} if date_strings: for k, v in dct.items(): if isinstance(v, date): - dct[k] = "{:04d}{:02d}{:02d}".format(v.year, v.month, v.day) + dct[k] = f"{v.year:04d}{v.month:02d}{v.day:02d}" return dct def __repr__(self): - return "Record #{}: {}".format(self.__oid, list(self)) + return f"Record #{self.__oid}: {list(self)}" def __dir__(self): """ @@ -866,7 +799,7 @@ def __eq__(self, other): return list.__eq__(self, other) -class ShapeRecord(object): +class ShapeRecord: """A ShapeRecord object containing a shape along with its attributes. Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" @@ -892,7 +825,7 @@ class Shapes(list): to return a GeometryCollection dictionary.""" def __repr__(self): - return "Shapes: {}".format(list(self)) + return f"Shapes: {list(self)}" @property def __geo_interface__(self): @@ -912,7 +845,7 @@ class ShapeRecords(list): to return a FeatureCollection dictionary.""" def __repr__(self): - return "ShapeRecords: {}".format(list(self)) + return f"ShapeRecords: {list(self)}" @property def __geo_interface__(self): @@ -929,7 +862,7 @@ class ShapefileException(Exception): pass -class Reader(object): +class Reader: """Reads the three files of a shapefile as a unit or separately. If one of the three files (.shp, .shx, .dbf) is missing no exception is thrown until you try @@ -1157,7 +1090,7 @@ def __str__(self): ) if self.dbf: info.append( - " {} records ({} fields)".format(len(self), len(self.fields)) + f" {len(self)} records ({len(self.fields)} fields)" ) return "\n".join(info) @@ -1224,8 +1157,7 @@ def __len__(self): def __iter__(self): """Iterates through the shapes/records in the shapefile.""" - for shaperec in self.iterShapeRecords(): - yield shaperec + yield from self.iterShapeRecords() @property def __geo_interface__(self): @@ -1250,7 +1182,7 @@ def load(self, shapefile=None): self.load_dbf(shapeName) if not (self.shp or self.dbf): raise ShapefileException( - "Unable to open %s.dbf or %s.shp." % (shapeName, shapeName) + f"Unable to open {shapeName}.dbf or {shapeName}.shp." ) if self.shp: self.__shpHeader() @@ -1265,13 +1197,13 @@ def load_shp(self, shapefile_name): """ shp_ext = "shp" try: - self.shp = open("%s.%s" % (shapefile_name, shp_ext), "rb") + self.shp = open(f"{shapefile_name}.{shp_ext}", "rb") self._files_to_close.append(self.shp) - except IOError: + except OSError: try: - self.shp = open("%s.%s" % (shapefile_name, shp_ext.upper()), "rb") + self.shp = open(f"{shapefile_name}.{shp_ext.upper()}", "rb") self._files_to_close.append(self.shp) - except IOError: + except OSError: pass def load_shx(self, shapefile_name): @@ -1280,13 +1212,13 @@ def load_shx(self, shapefile_name): """ shx_ext = "shx" try: - self.shx = open("%s.%s" % (shapefile_name, shx_ext), "rb") + self.shx = open(f"{shapefile_name}.{shx_ext}", "rb") self._files_to_close.append(self.shx) - except IOError: + except OSError: try: - self.shx = open("%s.%s" % (shapefile_name, shx_ext.upper()), "rb") + self.shx = open(f"{shapefile_name}.{shx_ext.upper()}", "rb") self._files_to_close.append(self.shx) - except IOError: + except OSError: pass def load_dbf(self, shapefile_name): @@ -1295,13 +1227,13 @@ def load_dbf(self, shapefile_name): """ dbf_ext = "dbf" try: - self.dbf = open("%s.%s" % (shapefile_name, dbf_ext), "rb") + self.dbf = open(f"{shapefile_name}.{dbf_ext}", "rb") self._files_to_close.append(self.dbf) - except IOError: + except OSError: try: - self.dbf = open("%s.%s" % (shapefile_name, dbf_ext.upper()), "rb") + self.dbf = open(f"{shapefile_name}.{dbf_ext.upper()}", "rb") self._files_to_close.append(self.dbf) - except IOError: + except OSError: pass def __del__(self): @@ -1313,7 +1245,7 @@ def close(self): if hasattr(attribute, "close"): try: attribute.close() - except IOError: + except OSError: pass self._files_to_close = [] @@ -1337,7 +1269,7 @@ def __restrictIndex(self, i): rmax = self.numRecords - 1 if abs(i) > rmax: raise IndexError( - "Shape or Record index: %s out of range. Max index: %s" % (i, rmax) + f"Shape or Record index: {i} out of range. Max index: {rmax}" ) if i < 0: i = range(self.numRecords)[i] @@ -1561,7 +1493,7 @@ def iterShapes(self, bbox=None): if self.numShapes: # Iterate exactly the number of shapes from shx header - for i in xrange(self.numShapes): + for i in range(self.numShapes): # MAYBE: check if more left of file or exit early? shape = self.__shape(oid=i, bbox=bbox) if shape: @@ -1624,7 +1556,7 @@ def __dbfHeader(self): # store all field positions for easy lookups # note: fieldLookup gives the index position of a field inside Reader.fields - self.__fieldLookup = dict((f[0], i) for i, f in enumerate(self.fields)) + self.__fieldLookup = {f[0]: i for i, f in enumerate(self.fields)} # by default, read all fields except the deletion flag, hence "[1:]" # note: recLookup gives the index position of a field inside a _Record list @@ -1676,7 +1608,7 @@ def __recordFields(self, fields=None): # make sure the given fieldnames exist for name in fields: if name not in self.__fieldLookup or name == "DeletionFlag": - raise ValueError('"{}" is not a valid field name'.format(name)) + raise ValueError(f'"{name}" is not a valid field name') # fetch relevant field info tuples fieldTuples = [] for fieldinfo in self.fields[1:]: @@ -1684,7 +1616,7 @@ def __recordFields(self, fields=None): if name in fields: fieldTuples.append(fieldinfo) # store the field positions - recLookup = dict((f[0], i) for i, f in enumerate(fieldTuples)) + recLookup = {f[0]: i for i, f in enumerate(fieldTuples)} else: # use all the dbf fields fieldTuples = self.fields[1:] # sans deletion flag @@ -1850,7 +1782,7 @@ def iterRecords(self, fields=None, start=0, stop=None): recSize = self.__recordLength f.seek(self.__dbfHdrLength + (start * recSize)) fieldTuples, recLookup, recStruct = self.__recordFields(fields) - for i in xrange(start, stop): + for i in range(start, stop): r = self.__record( oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct ) @@ -1891,7 +1823,7 @@ def iterShapeRecords(self, fields=None, bbox=None): """ if bbox is None: # iterate through all shapes and records - for shape, record in izip( + for shape, record in zip( self.iterShapes(), self.iterRecords(fields=fields) ): yield ShapeRecord(shape=shape, record=record) @@ -1908,7 +1840,7 @@ def iterShapeRecords(self, fields=None, bbox=None): yield ShapeRecord(shape=shape, record=record) -class Writer(object): +class Writer: """Provides write support for ESRI Shapefiles.""" def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): @@ -2015,7 +1947,7 @@ def close(self): ): try: attribute.flush() - except IOError: + except OSError: pass # Close any files that the writer opened (but not those given by user) @@ -2023,7 +1955,7 @@ def close(self): if hasattr(attribute, "close"): try: attribute.close() - except IOError: + except OSError: pass self._files_to_close = [] @@ -2494,7 +2426,7 @@ def record(self, *recordList, **recordDict): if self.autoBalance and self.recNum > self.shpNum: self.balance() - fieldCount = sum((1 for field in self.fields if field[0] != "DeletionFlag")) + fieldCount = sum(1 for field in self.fields if field[0] != "DeletionFlag") if recordList: record = list(recordList) while len(record) < fieldCount: @@ -2909,9 +2841,6 @@ def _test(args=sys.argv[1:], verbosity=0): class Py23DocChecker(doctest.OutputChecker): def check_output(self, want, got, optionflags): - if sys.version_info[0] == 2: - got = re.sub("u'(.*?)'", "'\\1'", got) - got = re.sub('u"(.*?)"', '"\\1"', got) res = doctest.OutputChecker.check_output(self, want, got, optionflags) return res diff --git a/test_shapefile.py b/test_shapefile.py index 1b7182f9..b55c1f7b 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -6,11 +6,7 @@ import json import os.path -try: - from pathlib import Path -except ImportError: - # pathlib2 is a dependency of pytest >= 3.7 - from pathlib2 import Path +from pathlib import Path # third party imports import pytest From 3e3462089712f8d4f49415ac5baee0eb360cf0d3 Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sun, 6 Jul 2025 15:12:52 +0000 Subject: [PATCH 02/22] =?UTF-8?q?izip=20=E2=86=92=20zip?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shapefile.py b/shapefile.py index fdd49cd7..57f3630c 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1338,7 +1338,7 @@ def __shape(self, oid=None, bbox=None): # Read points - produces a list of [x,y] values if nPoints: flat = unpack("<%sd" % (2 * nPoints), f.read(16 * nPoints)) - record.points = list(izip(*(iter(flat),) * 2)) + record.points = list(zip(*(iter(flat),) * 2)) # Read z extremes and values if shapeType in (13, 15, 18, 31): (zmin, zmax) = unpack("<2d", f.read(16)) From 61505d121d2f7a0aee2e9c7ca7c87b3438d32faf Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sun, 6 Jul 2025 16:05:39 +0000 Subject: [PATCH 03/22] pyproject.toml: Set target-version to py39 to match comment. --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fed78f76..697b6e67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,6 @@ requires = ["setuptools"] build-backend = "setuptools.build_meta" - [tool.ruff] # Exclude a variety of commonly ignored directories. exclude = [ From 2c9aeaa5f1bbbc419d5dc0282bcee75951adc577 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 15:47:48 +0100 Subject: [PATCH 04/22] Run ruff format and pre-commit hooks --- shapefile.py | 13 +++++++------ test_shapefile.py | 1 - 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/shapefile.py b/shapefile.py index 57f3630c..9e62edca 100644 --- a/shapefile.py +++ b/shapefile.py @@ -9,16 +9,15 @@ __version__ = "2.4.0" import array -from datetime import date import io import logging import os import sys import tempfile import time -from struct import Struct, calcsize, error, pack, unpack import zipfile - +from datetime import date +from struct import Struct, calcsize, error, pack, unpack from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -103,6 +102,7 @@ def b(v, encoding="utf-8", encodingErrors="strict"): # Force string representation. return str(v).encode(encoding, encodingErrors) + def u(v, encoding="utf-8", encodingErrors="strict"): if isinstance(v, bytes): # For python 3 decode bytes to str. @@ -117,15 +117,18 @@ def u(v, encoding="utf-8", encodingErrors="strict"): # Force string representation. return bytes(v).decode(encoding, encodingErrors) + def is_string(v): return isinstance(v, str) + def pathlike_obj(path): if isinstance(path, os.PathLike): return os.fsdecode(path) else: return path + # Begin @@ -1089,9 +1092,7 @@ def __str__(self): ) ) if self.dbf: - info.append( - f" {len(self)} records ({len(self.fields)} fields)" - ) + info.append(f" {len(self)} records ({len(self.fields)} fields)") return "\n".join(info) def __enter__(self): diff --git a/test_shapefile.py b/test_shapefile.py index b55c1f7b..5f9b855d 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -5,7 +5,6 @@ import datetime import json import os.path - from pathlib import Path # third party imports From 48a6a47b44189b9a5f0f6968876906ff870c34a6 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 15:57:58 +0100 Subject: [PATCH 05/22] Remove reference to removed constant --- shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shapefile.py b/shapefile.py index 9e62edca..5dff293f 100644 --- a/shapefile.py +++ b/shapefile.py @@ -2802,7 +2802,7 @@ def _replace_remote_url( fragment=fragment, ) - new_url = urlunparse(new_parsed) if PYTHON3 else urlunparse(list(new_parsed)) + new_url = urlunparse(new_parsed) return new_url From 8954a9766729531066328d5e23a53caf144d7249 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 16:04:49 +0100 Subject: [PATCH 06/22] Set continue-on-error: true on tests of Python 2.7, ..., 3.8 --- .github/workflows/run_tests_hooks_and_tools.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run_tests_hooks_and_tools.yml b/.github/workflows/run_tests_hooks_and_tools.yml index 468b2e2b..548d9dc7 100644 --- a/.github/workflows/run_tests_hooks_and_tools.yml +++ b/.github/workflows/run_tests_hooks_and_tools.yml @@ -31,6 +31,7 @@ jobs: pylint --disable=R,C test_shapefile.py test_on_EOL_Pythons: + continue-on-error: true strategy: fail-fast: false matrix: From 5e1beec08f8d24e11e6bf38a5cecbd6cbb8d913f Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 16:34:31 +0100 Subject: [PATCH 07/22] Update .pre-commit-config.yaml --- .pre-commit-config.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ffe59bf6..85e04eba 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,3 +13,7 @@ repos: hooks: - id: check-yaml - id: trailing-whitespace +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.17.0 + hooks: + - id: mypy \ No newline at end of file From cb0c527dee4993e426fe0929e4e280e612ca22d3 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 16:56:03 +0100 Subject: [PATCH 08/22] Add type hints to doctest runner and filter --- shapefile.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/shapefile.py b/shapefile.py index 5dff293f..ba2f4fac 100644 --- a/shapefile.py +++ b/shapefile.py @@ -9,6 +9,7 @@ __version__ = "2.4.0" import array +import doctest import io import logging import os @@ -18,6 +19,7 @@ import zipfile from datetime import date from struct import Struct, calcsize, error, pack, unpack +from typing import Iterable, Iterator from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -25,6 +27,8 @@ # Create named logger logger = logging.getLogger(__name__) +doctest.NORMALIZE_WHITESPACE = 1 + # Module settings VERBOSE = True @@ -2714,11 +2718,7 @@ def field(self, name, fieldType="C", size="50", decimal=0): # Begin Testing -def _get_doctests(): - import doctest - - doctest.NORMALIZE_WHITESPACE = 1 - +def _get_doctests() -> doctest.DocTest: # run tests with open("README.md", "rb") as fobj: tests = doctest.DocTestParser().get_doctest( @@ -2732,7 +2732,11 @@ def _get_doctests(): return tests -def _filter_network_doctests(examples, include_network=False, include_non_network=True): +def _filter_network_doctests( + examples: Iterable[doctest.Example], + include_network: bool = False, + include_non_network: bool = True, +) -> Iterator[doctest.Example]: globals_from_network_doctests = set() if not (include_network or include_non_network): @@ -2773,16 +2777,16 @@ def _filter_network_doctests(examples, include_network=False, include_non_networ def _replace_remote_url( - old_url, + old_url: str, # Default port of Python http.server and Python 2's SimpleHttpServer - port=8000, - scheme="http", - netloc="localhost", - path=None, - params="", - query="", - fragment="", -): + port: int = 8000, + scheme: str = "http", + netloc: str = "localhost", + path: str | None = None, + params: str = "", + query: str = "", + fragment: str = "", +) -> str: old_parsed = urlparse(old_url) # Strip subpaths, so an artefacts @@ -2806,15 +2810,12 @@ def _replace_remote_url( return new_url -def _test(args=sys.argv[1:], verbosity=0): +def _test(args: list[str] = sys.argv[1:], verbosity: bool = False) -> int: if verbosity == 0: print("Getting doctests...") - import doctest import re - doctest.NORMALIZE_WHITESPACE = 1 - tests = _get_doctests() if len(args) >= 2 and args[0] == "-m": From 2457a953f5a4e26ac7607a8f31bda7ce60b74818 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 17:02:10 +0100 Subject: [PATCH 09/22] Use typing.Optional instead of T | None 3.10 syntax --- .github/workflows/run_tests_hooks_and_tools.yml | 2 -- shapefile.py | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run_tests_hooks_and_tools.yml b/.github/workflows/run_tests_hooks_and_tools.yml index 548d9dc7..b6dd3330 100644 --- a/.github/workflows/run_tests_hooks_and_tools.yml +++ b/.github/workflows/run_tests_hooks_and_tools.yml @@ -36,8 +36,6 @@ jobs: fail-fast: false matrix: python-version: [ - "2.7", - "3.5", "3.6", "3.7", "3.8", diff --git a/shapefile.py b/shapefile.py index ba2f4fac..1399aa35 100644 --- a/shapefile.py +++ b/shapefile.py @@ -19,7 +19,7 @@ import zipfile from datetime import date from struct import Struct, calcsize, error, pack, unpack -from typing import Iterable, Iterator +from typing import Iterable, Iterator, Optional from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -2782,7 +2782,7 @@ def _replace_remote_url( port: int = 8000, scheme: str = "http", netloc: str = "localhost", - path: str | None = None, + path: Optional[str] = None, params: str = "", query: str = "", fragment: str = "", From 72bdfeb7cb10031a8385255e183b0dcbdc19d88c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 17:05:09 +0100 Subject: [PATCH 10/22] Remove EOL Python tests, so that we can use list[] 3.9 type syntax --- .../workflows/run_tests_hooks_and_tools.yml | 34 ------------------- 1 file changed, 34 deletions(-) diff --git a/.github/workflows/run_tests_hooks_and_tools.yml b/.github/workflows/run_tests_hooks_and_tools.yml index b6dd3330..42c981e1 100644 --- a/.github/workflows/run_tests_hooks_and_tools.yml +++ b/.github/workflows/run_tests_hooks_and_tools.yml @@ -30,40 +30,6 @@ jobs: run: | pylint --disable=R,C test_shapefile.py - test_on_EOL_Pythons: - continue-on-error: true - strategy: - fail-fast: false - matrix: - python-version: [ - "3.6", - "3.7", - "3.8", - ] - - runs-on: ubuntu-latest - container: - image: python:${{ matrix.python-version }} - - steps: - - uses: actions/checkout@v4 - with: - path: ./Pyshp - - - name: Non-network tests - uses: ./Pyshp/.github/actions/test - with: - pyshp_repo_directory: ./Pyshp - python-version: ${{ matrix.python-version }} - - - name: Network tests - uses: ./Pyshp/.github/actions/test - with: - extra_args: '-m network' - replace_remote_urls_with_localhost: 'yes' - pyshp_repo_directory: ./Pyshp - python-version: ${{ matrix.python-version }} - test_on_supported_Pythons: strategy: fail-fast: false From d199fa34691551643038aee32b2f7981f5ecd95b Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 17:15:00 +0100 Subject: [PATCH 11/22] Type hint b and u helper and other functions --- shapefile.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/shapefile.py b/shapefile.py index 1399aa35..271d7382 100644 --- a/shapefile.py +++ b/shapefile.py @@ -19,7 +19,7 @@ import zipfile from datetime import date from struct import Struct, calcsize, error, pack, unpack -from typing import Iterable, Iterator, Optional +from typing import Any, Iterable, Iterator, Optional, Union from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -92,7 +92,9 @@ NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. -def b(v, encoding="utf-8", encodingErrors="strict"): +def b( + v: Union[str, bytes], encoding: str = "utf-8", encodingErrors: str = "strict" +) -> bytes: if isinstance(v, str): # For python 3 encode str to bytes. return v.encode(encoding, encodingErrors) @@ -107,7 +109,9 @@ def b(v, encoding="utf-8", encodingErrors="strict"): return str(v).encode(encoding, encodingErrors) -def u(v, encoding="utf-8", encodingErrors="strict"): +def u( + v: Union[str, bytes], encoding: str = "utf-8", encodingErrors: str = "strict" +) -> str: if isinstance(v, bytes): # For python 3 decode bytes to str. return v.decode(encoding, encodingErrors) @@ -122,11 +126,11 @@ def u(v, encoding="utf-8", encodingErrors="strict"): return bytes(v).decode(encoding, encodingErrors) -def is_string(v): +def is_string(v: Any) -> bool: return isinstance(v, str) -def pathlike_obj(path): +def pathlike_obj(path: Any) -> Any: if isinstance(path, os.PathLike): return os.fsdecode(path) else: From 7a895ce4b95ddfa855367d240b3f003fc6012cd7 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 17:24:08 +0100 Subject: [PATCH 12/22] Type hint signed_area --- shapefile.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/shapefile.py b/shapefile.py index 271d7382..ddf53fcc 100644 --- a/shapefile.py +++ b/shapefile.py @@ -17,6 +17,7 @@ import tempfile import time import zipfile +from collections.abc import Collection from datetime import date from struct import Struct, calcsize, error, pack, unpack from typing import Any, Iterable, Iterator, Optional, Union @@ -148,7 +149,16 @@ def __repr__(self): return str(self.tolist()) -def signed_area(coords, fast=False): +def signed_area( + coords: Collection[ + Union[ + tuple[float, float], + tuple[float, float, float], + tuple[float, float, float, float], + ] + ], + fast: bool = False, +) -> float: """Return the signed area enclosed by a ring using the linear time algorithm. A value >= 0 indicates a counter-clockwise oriented ring. A faster version is possible by setting 'fast' to True, which returns From 0f1e006b518b5180ccace0cd01fda2e615db01c4 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 17:47:03 +0100 Subject: [PATCH 13/22] Add more type hints --- shapefile.py | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/shapefile.py b/shapefile.py index ddf53fcc..3c6ebc9d 100644 --- a/shapefile.py +++ b/shapefile.py @@ -20,7 +20,7 @@ from collections.abc import Collection from datetime import date from struct import Struct, calcsize, error, pack, unpack -from typing import Any, Iterable, Iterator, Optional, Union +from typing import Any, Iterable, Iterator, Optional, Reversible, Union from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -149,14 +149,16 @@ def __repr__(self): return str(self.tolist()) +Point2D = tuple[float, float] +PointZ = tuple[float, float, float] +PointZM = tuple[float, float, float, float] + +Coord = Union[Point2D, PointZ, PointZM] +Coords = Collection[Coord] + + def signed_area( - coords: Collection[ - Union[ - tuple[float, float], - tuple[float, float, float], - tuple[float, float, float, float], - ] - ], + coords: Coords, fast: bool = False, ) -> float: """Return the signed area enclosed by a ring using the linear time @@ -174,7 +176,7 @@ def signed_area( return area2 / 2.0 -def is_cw(coords): +def is_cw(coords: Coords) -> bool: """Returns True if a polygon ring has clockwise orientation, determined by a negatively signed area. """ @@ -182,35 +184,38 @@ def is_cw(coords): return area2 < 0 -def rewind(coords): +def rewind(coords: Reversible[Coord]) -> list[Coord]: """Returns the input coords in reversed order.""" return list(reversed(coords)) -def ring_bbox(coords): +BBox = tuple[float, float, float, float] + + +def ring_bbox(coords: Coords) -> BBox: """Calculates and returns the bounding box of a ring.""" xs, ys = zip(*coords) bbox = min(xs), min(ys), max(xs), max(ys) return bbox -def bbox_overlap(bbox1, bbox2): - """Tests whether two bounding boxes overlap, returning a boolean""" +def bbox_overlap(bbox1: BBox, bbox2: BBox) -> bool: + """Tests whether two bounding boxes overlap.""" xmin1, ymin1, xmax1, ymax1 = bbox1 xmin2, ymin2, xmax2, ymax2 = bbox2 overlap = xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2 return overlap -def bbox_contains(bbox1, bbox2): - """Tests whether bbox1 fully contains bbox2, returning a boolean""" +def bbox_contains(bbox1: BBox, bbox2: BBox) -> bool: + """Tests whether bbox1 fully contains bbox2.""" xmin1, ymin1, xmax1, ymax1 = bbox1 xmin2, ymin2, xmax2, ymax2 = bbox2 contains = xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2 return contains -def ring_contains_point(coords, p): +def ring_contains_point(coords: list[Coord], p: Point2D) -> bool: """Fast point-in-polygon crossings algorithm, MacMartin optimization. Adapted from code by Eric Haynes @@ -255,7 +260,7 @@ def ring_contains_point(coords, p): return inside_flag -def ring_sample(coords, ccw=False): +def ring_sample(coords: list[Coord], ccw: bool = False) -> Coord: """Return a sample point guaranteed to be within a ring, by efficiently finding the first centroid of a coordinate triplet whose orientation matches the orientation of the ring and passes the point-in-ring test. @@ -302,7 +307,7 @@ def itercoords(): raise Exception("Unexpected error: Unable to find a ring sample point.") -def ring_contains_ring(coords1, coords2): +def ring_contains_ring(coords1: list[Coord], coords2: list[Point2D]) -> bool: """Returns True if all vertexes in coords2 are fully inside coords1.""" return all(ring_contains_point(coords1, p2) for p2 in coords2) From 165d99d3242df35364dba66704930b2c7e9a0176 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:15:49 +0100 Subject: [PATCH 14/22] Annotate organize_polygon_rings --- shapefile.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/shapefile.py b/shapefile.py index 3c6ebc9d..8a22c9f5 100644 --- a/shapefile.py +++ b/shapefile.py @@ -260,7 +260,7 @@ def ring_contains_point(coords: list[Coord], p: Point2D) -> bool: return inside_flag -def ring_sample(coords: list[Coord], ccw: bool = False) -> Coord: +def ring_sample(coords: list[Coord], ccw: bool = False) -> Point2D: """Return a sample point guaranteed to be within a ring, by efficiently finding the first centroid of a coordinate triplet whose orientation matches the orientation of the ring and passes the point-in-ring test. @@ -312,7 +312,9 @@ def ring_contains_ring(coords1: list[Coord], coords2: list[Point2D]) -> bool: return all(ring_contains_point(coords1, p2) for p2 in coords2) -def organize_polygon_rings(rings, return_errors=None): +def organize_polygon_rings( + rings: Iterable[list[Coord]], return_errors: Optional[dict[str, int]] = None +) -> list[list[list[Coord]]]: """Organize a list of coordinate rings into one or more polygons with holes. Returns a list of polygons, where each polygon is composed of a single exterior ring, and one or more interior holes. If a return_errors dict is provided (optional), @@ -357,7 +359,9 @@ def organize_polygon_rings(rings, return_errors=None): return polys # first determine each hole's candidate exteriors based on simple bbox contains test - hole_exteriors = {hole_i: [] for hole_i in range(len(holes))} + hole_exteriors: dict[int, list[int]] = { + hole_i: [] for hole_i in range(len(holes)) + } exterior_bboxes = [ring_bbox(ring) for ring in exteriors] for hole_i in hole_exteriors.keys(): hole_bbox = ring_bbox(holes[hole_i]) From c5c07eb953c46bec945601b250f29d2ae046906f Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:30:34 +0100 Subject: [PATCH 15/22] Begin type annotations of Shape --- shapefile.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/shapefile.py b/shapefile.py index 8a22c9f5..d8498e2d 100644 --- a/shapefile.py +++ b/shapefile.py @@ -443,7 +443,12 @@ def organize_polygon_rings( class Shape: def __init__( - self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None + self, + shapeType: int = NULL, + points: Optional[Coords] = None, + parts: Optional[list[int]] = None, + partTypes: Optional[list[int]] = None, + oid: Optional[int] = None, ): """Stores the geometry of the different shape types specified in the Shapefile spec. Shape types are @@ -463,7 +468,7 @@ def __init__( self.partTypes = partTypes # and a dict to silently record any errors encountered - self._errors = {} + self._errors: dict[str, int] = {} # add oid if oid is not None: @@ -666,12 +671,12 @@ def _from_geojson(geoj): return shape @property - def oid(self): + def oid(self) -> int: """The index position of the shape in the original shapefile""" return self.__oid @property - def shapeTypeName(self): + def shapeTypeName(self) -> str: return SHAPETYPE_LOOKUP[self.shapeType] def __repr__(self): From c3fc7f6666cefd9aa58f8a46bdf75d438c4ca073 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 19:15:32 +0100 Subject: [PATCH 16/22] Replace kwargs.pop with actual key word args --- .pre-commit-config.yaml | 1 + shapefile.py | 77 ++++++++++++++++++++++++++--------------- 2 files changed, 51 insertions(+), 27 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 85e04eba..3849c557 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,6 +8,7 @@ repos: hooks: - id: isort name: isort (python) + args: ["--profile", "black"] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.3.0 hooks: diff --git a/shapefile.py b/shapefile.py index d8498e2d..e56b3b27 100644 --- a/shapefile.py +++ b/shapefile.py @@ -20,7 +20,7 @@ from collections.abc import Collection from datetime import date from struct import Struct, calcsize, error, pack, unpack -from typing import Any, Iterable, Iterator, Optional, Reversible, Union +from typing import Any, Iterable, Iterator, Optional, Reversible, TypedDict, Union from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -87,6 +87,25 @@ 5: "RING", } +# Custom type variables + +Point2D = tuple[float, float] +PointZ = tuple[float, float, float] +PointZM = tuple[float, float, float, float] + +Coord = Union[Point2D, PointZ, PointZM] +Coords = list[Coord] + +BBox = tuple[float, float, float, float] + + +class GeoJSONT(TypedDict): + type: str + coordinates: Union[ + tuple[()], Point2D, PointZ, PointZM, Coords, list[Coords], list[list[Coords]] + ] + + # Helpers MISSING = [None, ""] @@ -149,14 +168,6 @@ def __repr__(self): return str(self.tolist()) -Point2D = tuple[float, float] -PointZ = tuple[float, float, float] -PointZM = tuple[float, float, float, float] - -Coord = Union[Point2D, PointZ, PointZM] -Coords = Collection[Coord] - - def signed_area( coords: Coords, fast: bool = False, @@ -189,9 +200,6 @@ def rewind(coords: Reversible[Coord]) -> list[Coord]: return list(reversed(coords)) -BBox = tuple[float, float, float, float] - - def ring_bbox(coords: Coords) -> BBox: """Calculates and returns the bounding box of a ring.""" xs, ys = zip(*coords) @@ -445,7 +453,7 @@ class Shape: def __init__( self, shapeType: int = NULL, - points: Optional[Coords] = None, + points: Optional[list[Coord]] = None, parts: Optional[list[int]] = None, partTypes: Optional[list[int]] = None, oid: Optional[int] = None, @@ -477,16 +485,18 @@ def __init__( self.__oid = -1 @property - def __geo_interface__(self): + def __geo_interface__(self) -> GeoJSONT: if self.shapeType in [POINT, POINTM, POINTZ]: # point if len(self.points) == 0: # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {"type": "Point", "coordinates": tuple()} + return {"type": "Point", "coordinates": ()} + # return {"type": "Point", "coordinates": tuple()} #type: ignore else: - return {"type": "Point", "coordinates": tuple(self.points[0])} + return {"type": "Point", "coordinates": self.points[0]} + # return {"type": "Point", "coordinates": tuple(self.points[0])} # type: ignore elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: if len(self.points) == 0: # the shape has no coordinate information, i.e. is 'empty' @@ -497,7 +507,8 @@ def __geo_interface__(self): # multipoint return { "type": "MultiPoint", - "coordinates": [tuple(p) for p in self.points], + "coordinates": self.points, + # "coordinates": [tuple(p) for p in self.points], #type: ignore } elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: if len(self.parts) == 0: @@ -509,7 +520,8 @@ def __geo_interface__(self): # linestring return { "type": "LineString", - "coordinates": [tuple(p) for p in self.points], + "coordinates": self.points, + # "coordinates": [tuple(p) for p in self.points], #type: ignore } else: # multilinestring @@ -520,10 +532,12 @@ def __geo_interface__(self): ps = part continue else: - coordinates.append([tuple(p) for p in self.points[ps:part]]) + # coordinates.append([tuple(p) for p in self.points[ps:part]]) + coordinates.append([p for p in self.points[ps:part]]) ps = part else: - coordinates.append([tuple(p) for p in self.points[part:]]) + # coordinates.append([tuple(p) for p in self.points[part:]]) + coordinates.append([p for p in self.points[part:]]) return {"type": "MultiLineString", "coordinates": coordinates} elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: if len(self.parts) == 0: @@ -543,7 +557,8 @@ def __geo_interface__(self): end = len(self.points) # extract the points that make up the ring - ring = [tuple(p) for p in self.points[start:end]] + # ring = [tuple(p) for p in self.points[start:end]] + ring = [p for p in self.points[start:end]] rings.append(ring) # organize rings into list of polygons, where each polygon is defined as list of rings. @@ -918,7 +933,7 @@ class Reader: but they can be. """ - def __init__(self, *args, **kwargs): + def __init__(self, *args, encoding="utf-8", encodingErrors="strict", **kwargs): self.shp = None self.shx = None self.dbf = None @@ -931,8 +946,8 @@ def __init__(self, *args, **kwargs): self.fields = [] self.__dbfHdrLength = 0 self.__fieldLookup = {} - self.encoding = kwargs.pop("encoding", "utf-8") - self.encodingErrors = kwargs.pop("encodingErrors", "strict") + self.encoding = encoding + self.encodingErrors = encodingErrors # See if a shapefile name was passed as the first argument if len(args) > 0: path = pathlike_obj(args[0]) @@ -1876,7 +1891,15 @@ def iterShapeRecords(self, fields=None, bbox=None): class Writer: """Provides write support for ESRI Shapefiles.""" - def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): + def __init__( + self, + target=None, + shapeType=None, + autoBalance=False, + encoding="utf-8", + encodingErrors="strict", + **kwargs, + ): self.target = target self.autoBalance = autoBalance self.fields = [] @@ -1920,8 +1943,8 @@ def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): # Use deletion flags in dbf? Default is false (0). Note: Currently has no effect, records should NOT contain deletion flags. self.deletionFlag = 0 # Encoding - self.encoding = kwargs.pop("encoding", "utf-8") - self.encodingErrors = kwargs.pop("encodingErrors", "strict") + self.encoding = encoding + self.encodingErrors = encodingErrors def __len__(self): """Returns the current number of features written to the shapefile. From 209421c1846bd406dd164ea37426b2ccf3fd1224 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 20:46:31 +0100 Subject: [PATCH 17/22] Remove code duplication in constituent file loading (more kwargs) --- shapefile.py | 189 +++++++++++++++++++++++++++++---------------------- 1 file changed, 107 insertions(+), 82 deletions(-) diff --git a/shapefile.py b/shapefile.py index e56b3b27..0a99fbe2 100644 --- a/shapefile.py +++ b/shapefile.py @@ -20,7 +20,7 @@ from collections.abc import Collection from datetime import date from struct import Struct, calcsize, error, pack, unpack -from typing import Any, Iterable, Iterator, Optional, Reversible, TypedDict, Union +from typing import IO, Any, Iterable, Iterator, Optional, Reversible, TypedDict, Union from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen @@ -912,6 +912,10 @@ class ShapefileException(Exception): pass +class _NoShpSentinel(object): + pass + + class Reader: """Reads the three files of a shapefile as a unit or separately. If one of the three files (.shp, .shx, @@ -933,10 +937,25 @@ class Reader: but they can be. """ - def __init__(self, *args, encoding="utf-8", encodingErrors="strict", **kwargs): - self.shp = None - self.shx = None - self.dbf = None + CONSTITUENT_FILE_EXTS = ["shp", "shx", "dbf"] + assert all(ext.islower() for ext in CONSTITUENT_FILE_EXTS) + + def _assert_ext_is_supported(self, ext: str): + assert ext in self.CONSTITUENT_FILE_EXTS + + def __init__( + self, + *args, + encoding="utf-8", + encodingErrors="strict", + shp=_NoShpSentinel, + shx=None, + dbf=None, + **kwargs, + ): + # self.shp = None + # self.shx = None + # self.dbf = None self._files_to_close = [] self.shapeName = "Not specified" self._offsets = [] @@ -1014,19 +1033,20 @@ def __init__(self, *args, encoding="utf-8", encodingErrors="strict", **kwargs): shapefile = os.path.splitext(shapefile)[ 0 ] # root shapefile name - for ext in ["SHP", "SHX", "DBF", "shp", "shx", "dbf"]: - try: - member = archive.open(shapefile + "." + ext) - # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() - fileobj = tempfile.NamedTemporaryFile( - mode="w+b", delete=True - ) - fileobj.write(member.read()) - fileobj.seek(0) - setattr(self, ext.lower(), fileobj) - self._files_to_close.append(fileobj) - except: - pass + for lower_ext in self.CONSTITUENT_FILE_EXTS: + for cased_ext in [lower_ext, lower_ext.upper()]: + try: + member = archive.open(f"{shapefile}.{cased_ext}") + # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() + fileobj = tempfile.NamedTemporaryFile( + mode="w+b", delete=True + ) + fileobj.write(member.read()) + fileobj.seek(0) + setattr(self, lower_ext, fileobj) + self._files_to_close.append(fileobj) + except: + pass # Close and delete the temporary zipfile try: zipfileobj.close() @@ -1086,46 +1106,47 @@ def __init__(self, *args, encoding="utf-8", encodingErrors="strict", **kwargs): self.load(path) return - # Otherwise, load from separate shp/shx/dbf args (must be path or file-like) - if "shp" in kwargs: - if hasattr(kwargs["shp"], "read"): - self.shp = kwargs["shp"] - # Copy if required - try: - self.shp.seek(0) - except (NameError, io.UnsupportedOperation): - self.shp = io.BytesIO(self.shp.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["shp"]) - self.load_shp(baseName) - - if "shx" in kwargs: - if hasattr(kwargs["shx"], "read"): - self.shx = kwargs["shx"] - # Copy if required - try: - self.shx.seek(0) - except (NameError, io.UnsupportedOperation): - self.shx = io.BytesIO(self.shx.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["shx"]) - self.load_shx(baseName) + if shp is _NoShpSentinel: + self.shp = None + self.shx = None + else: + self.shp = self._seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) + self.shx = self._seek_0_on_file_obj_wrap_or_open_from_name("shx", shx) - if "dbf" in kwargs: - if hasattr(kwargs["dbf"], "read"): - self.dbf = kwargs["dbf"] - # Copy if required - try: - self.dbf.seek(0) - except (NameError, io.UnsupportedOperation): - self.dbf = io.BytesIO(self.dbf.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["dbf"]) - self.load_dbf(baseName) + self.dbf = self._seek_0_on_file_obj_wrap_or_open_from_name("dbf", dbf) # Load the files if self.shp or self.dbf: - self.load() + self._try_to_set_constituent_file_headers() + + def _seek_0_on_file_obj_wrap_or_open_from_name( + self, + ext: str, + # File name, file object or anything with a read() method that returns bytes. + # TODO: Create simple Protocol with a read() method + file_: Optional[Union[str, IO[bytes]]], + ) -> Union[None, io.BytesIO, IO[bytes]]: + # assert ext in {'shp', 'dbf', 'shx'} + self._assert_ext_is_supported(ext) + + if file_ is None: + return None + + if isinstance(file_, str): + baseName, __ = os.path.splitext(file_) + return self._load_constituent_file(baseName, ext) + + if hasattr(file_, "read"): + # Copy if required + try: + file_.seek(0) # type: ignore + return file_ + except (NameError, io.UnsupportedOperation): + return io.BytesIO(file_.read()) + + raise ShapefileException( + f"Could not load shapefile constituent file from: {file_}" + ) def __str__(self): """ @@ -1232,6 +1253,9 @@ def load(self, shapefile=None): raise ShapefileException( f"Unable to open {shapeName}.dbf or {shapeName}.shp." ) + self._try_to_set_constituent_file_headers() + + def _try_to_set_constituent_file_headers(self): if self.shp: self.__shpHeader() if self.dbf: @@ -1239,50 +1263,51 @@ def load(self, shapefile=None): if self.shx: self.__shxHeader() - def load_shp(self, shapefile_name): + def _try_get_open_constituent_file(self, shapefile_name: str, ext: str): """ - Attempts to load file with .shp extension as both lower and upper case + Attempts to open a .shp, .dbf or .shx file, + with both lower case and upper case file extensions, + and return it. If it was not possible to open the file, None is returned. """ - shp_ext = "shp" + # typing.LiteralString is only available from PYthon 3.11 onwards. + # https://docs.python.org/3/library/typing.html#typing.LiteralString + self._assert_ext_is_supported(ext) try: - self.shp = open(f"{shapefile_name}.{shp_ext}", "rb") - self._files_to_close.append(self.shp) + return open(f"{shapefile_name}.{ext}", "rb") except OSError: try: - self.shp = open(f"{shapefile_name}.{shp_ext.upper()}", "rb") - self._files_to_close.append(self.shp) + return open(f"{shapefile_name}.{ext.upper()}", "rb") except OSError: - pass + return None + + def _load_constituent_file(self, shapefile_name: str, ext: str): + """ + Attempts to open a .shp, .dbf or .shx file, with the extension + as both lower and upper case, and if successful append it to + self._files_to_close. + """ + shp_dbf_or_dhx_file = self._try_get_open_constituent_file(shapefile_name, ext) + if shp_dbf_or_dhx_file is not None: + self._files_to_close.append(shp_dbf_or_dhx_file) + return shp_dbf_or_dhx_file + + def load_shp(self, shapefile_name): + """ + Attempts to load file with .shp extension as both lower and upper case + """ + self.shp = self._load_constituent_file(shapefile_name, "shp") def load_shx(self, shapefile_name): """ Attempts to load file with .shx extension as both lower and upper case """ - shx_ext = "shx" - try: - self.shx = open(f"{shapefile_name}.{shx_ext}", "rb") - self._files_to_close.append(self.shx) - except OSError: - try: - self.shx = open(f"{shapefile_name}.{shx_ext.upper()}", "rb") - self._files_to_close.append(self.shx) - except OSError: - pass + self.shx = self._load_constituent_file(shapefile_name, "shx") def load_dbf(self, shapefile_name): """ Attempts to load file with .dbf extension as both lower and upper case """ - dbf_ext = "dbf" - try: - self.dbf = open(f"{shapefile_name}.{dbf_ext}", "rb") - self._files_to_close.append(self.dbf) - except OSError: - try: - self.dbf = open(f"{shapefile_name}.{dbf_ext.upper()}", "rb") - self._files_to_close.append(self.dbf) - except OSError: - pass + self.dbf = self._load_constituent_file(shapefile_name, "dbf") def __del__(self): self.close() From f4fdf2ccbdfdbaeeb656c92d890366bbe11dc2ae Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 21:36:36 +0100 Subject: [PATCH 18/22] Add docstring to sentinel. --- shapefile.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/shapefile.py b/shapefile.py index 0a99fbe2..12f94d1a 100644 --- a/shapefile.py +++ b/shapefile.py @@ -913,6 +913,12 @@ class ShapefileException(Exception): class _NoShpSentinel(object): + """For use as a default value for shp to preserve the + behaviour (from when all keyword args were gathered + in the **kwargs dict) in case someone explictly + called Reader(shp=None) to load self.shx. + """ + pass @@ -1106,15 +1112,14 @@ def __init__( self.load(path) return + self.shp = self._seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) + self.dbf = self._seek_0_on_file_obj_wrap_or_open_from_name("dbf", dbf) + if shp is _NoShpSentinel: - self.shp = None self.shx = None else: - self.shp = self._seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) self.shx = self._seek_0_on_file_obj_wrap_or_open_from_name("shx", shx) - self.dbf = self._seek_0_on_file_obj_wrap_or_open_from_name("dbf", dbf) - # Load the files if self.shp or self.dbf: self._try_to_set_constituent_file_headers() From 1bb2e380e70f98ddae160baeccbc6aa4f1cfeb41 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 21:59:20 +0100 Subject: [PATCH 19/22] Restore self.shp = None etc. Replace *args with a kwarg: shapefile_path --- shapefile.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/shapefile.py b/shapefile.py index 12f94d1a..866eb316 100644 --- a/shapefile.py +++ b/shapefile.py @@ -951,7 +951,8 @@ def _assert_ext_is_supported(self, ext: str): def __init__( self, - *args, + shapefile_path: str = "", + *, encoding="utf-8", encodingErrors="strict", shp=_NoShpSentinel, @@ -959,23 +960,23 @@ def __init__( dbf=None, **kwargs, ): - # self.shp = None - # self.shx = None - # self.dbf = None + self.shp = None + self.shx = None + self.dbf = None self._files_to_close = [] self.shapeName = "Not specified" - self._offsets = [] + self._offsets: list[int] = [] self.shpLength = None self.numRecords = None self.numShapes = None - self.fields = [] + self.fields: list[list[str]] = [] self.__dbfHdrLength = 0 - self.__fieldLookup = {} + self.__fieldLookup: dict[str, int] = {} self.encoding = encoding self.encodingErrors = encodingErrors # See if a shapefile name was passed as the first argument - if len(args) > 0: - path = pathlike_obj(args[0]) + if shapefile_path: + path = pathlike_obj(shapefile_path) if is_string(path): if ".zip" in path: # Shapefile is inside a zipfile @@ -992,6 +993,8 @@ def __init__( else: zpath = path[: path.find(".zip") + 4] shapefile = path[path.find(".zip") + 4 + 1 :] + + zipfileobj: Union[tempfile._TemporaryFileWrapper, io.BufferedReader] # Create a zip file handle if zpath.startswith("http"): # Zipfile is from a url From 2b1aa2f10b960cf86d25f334a4d3a516e58aa47a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 22:05:31 +0100 Subject: [PATCH 20/22] Don't load shp from sentinel --- shapefile.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/shapefile.py b/shapefile.py index 866eb316..c1a2d76b 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1115,14 +1115,12 @@ def __init__( self.load(path) return - self.shp = self._seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) - self.dbf = self._seek_0_on_file_obj_wrap_or_open_from_name("dbf", dbf) - - if shp is _NoShpSentinel: - self.shx = None - else: + if shp is not _NoShpSentinel: + self.shp = self._seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) self.shx = self._seek_0_on_file_obj_wrap_or_open_from_name("shx", shx) + self.dbf = self._seek_0_on_file_obj_wrap_or_open_from_name("dbf", dbf) + # Load the files if self.shp or self.dbf: self._try_to_set_constituent_file_headers() From 9e157d6132f0422c3ffd9c0310f762468046ce83 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 22:41:40 +0100 Subject: [PATCH 21/22] Replace kwargs.get with key word args and defaults. --- shapefile.py | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/shapefile.py b/shapefile.py index c1a2d76b..e5632981 100644 --- a/shapefile.py +++ b/shapefile.py @@ -963,7 +963,7 @@ def __init__( self.shp = None self.shx = None self.dbf = None - self._files_to_close = [] + self._files_to_close: list[IO[bytes]] = [] self.shapeName = "Not specified" self._offsets: list[int] = [] self.shpLength = None @@ -1269,7 +1269,11 @@ def _try_to_set_constituent_file_headers(self): if self.shx: self.__shxHeader() - def _try_get_open_constituent_file(self, shapefile_name: str, ext: str): + def _try_get_open_constituent_file( + self, + shapefile_name: str, + ext: str, + ) -> Union[IO[bytes], None]: """ Attempts to open a .shp, .dbf or .shx file, with both lower case and upper case file extensions, @@ -1277,7 +1281,9 @@ def _try_get_open_constituent_file(self, shapefile_name: str, ext: str): """ # typing.LiteralString is only available from PYthon 3.11 onwards. # https://docs.python.org/3/library/typing.html#typing.LiteralString + # assert ext in {'shp', 'dbf', 'shx'} self._assert_ext_is_supported(ext) + try: return open(f"{shapefile_name}.{ext}", "rb") except OSError: @@ -1286,7 +1292,11 @@ def _try_get_open_constituent_file(self, shapefile_name: str, ext: str): except OSError: return None - def _load_constituent_file(self, shapefile_name: str, ext: str): + def _load_constituent_file( + self, + shapefile_name: str, + ext: str, + ) -> Union[IO[bytes], None]: """ Attempts to open a .shp, .dbf or .shx file, with the extension as both lower and upper case, and if successful append it to @@ -1341,7 +1351,7 @@ def __getFileObj(self, f): self.load() return f - def __restrictIndex(self, i): + def __restrictIndex(self, i: int) -> int: """Provides list-like handling of a record index with a clearer error message if the index is out of bounds.""" if self.numRecords: @@ -1929,6 +1939,10 @@ def __init__( autoBalance=False, encoding="utf-8", encodingErrors="strict", + *, + shp=None, + shx=None, + dbf=None, **kwargs, ): self.target = target @@ -1948,8 +1962,7 @@ def __init__( self.shp = self.__getFileObj(os.path.splitext(target)[0] + ".shp") self.shx = self.__getFileObj(os.path.splitext(target)[0] + ".shx") self.dbf = self.__getFileObj(os.path.splitext(target)[0] + ".dbf") - elif kwargs.get("shp") or kwargs.get("shx") or kwargs.get("dbf"): - shp, shx, dbf = kwargs.get("shp"), kwargs.get("shx"), kwargs.get("dbf") + elif shp or shx or dbf: if shp: self.shp = self.__getFileObj(shp) if shx: @@ -2046,13 +2059,11 @@ def close(self): pass self._files_to_close = [] - def __getFileObj(self, f): + def __getFileObj(self, f: Union[IO[bytes], str]) -> IO[bytes]: """Safety handler to verify file-like objects""" if not f: raise ShapefileException("No file-like object available.") - elif hasattr(f, "write"): - return f - else: + if isinstance(f, str): pth = os.path.split(f)[0] if pth and not os.path.exists(pth): os.makedirs(pth) @@ -2060,6 +2071,10 @@ def __getFileObj(self, f): self._files_to_close.append(fp) return fp + if hasattr(f, "write"): + return f + raise Exception(f"Unsupported file-like: {f}") + def __shpFileLength(self): """Calculates the file length of the shp file.""" # Remember starting position From ec43361f609f316811c45b33508c3d06c7fc9b92 Mon Sep 17 00:00:00 2001 From: Mike Taves Date: Tue, 22 Jul 2025 09:33:17 +1200 Subject: [PATCH 22/22] Set minimum Python 3.9, move project metadata to pyproject.toml --- .github/actions/test/action.yml | 2 +- README.md | 2 -- pyproject.toml | 36 ++++++++++++++++++++++++++++++++- requirements.test.txt | 2 -- setup.cfg | 29 -------------------------- setup.py | 3 --- 6 files changed, 36 insertions(+), 38 deletions(-) delete mode 100644 requirements.test.txt delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index c6ca65a4..0184dfe3 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -87,7 +87,7 @@ runs: working-directory: ${{ inputs.pyshp_repo_directory }} run: | python -m pip install --upgrade pip - pip install -r requirements.test.txt + pip install -e .[test] - name: Pytest shell: bash diff --git a/README.md b/README.md index c55e2043..caf5f339 100644 --- a/README.md +++ b/README.md @@ -74,8 +74,6 @@ Both the Esri and XBase file-formats are very simple in design and memory efficient which is part of the reason the shapefile format remains popular despite the numerous ways to store and exchange GIS data available today. -Pyshp is compatible with Python 2.7-3.x. - This document provides examples for using PyShp to read and write shapefiles. However many more examples are continually added to the blog [http://GeospatialPython.com](http://GeospatialPython.com), and by searching for PyShp on [https://gis.stackexchange.com](https://gis.stackexchange.com). diff --git a/pyproject.toml b/pyproject.toml index 697b6e67..945c86c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,40 @@ requires = ["setuptools"] build-backend = "setuptools.build_meta" +[project] +name = "pyshp" +authors = [ + {name = "Joel Lawhead", email = "jlawhead@geospatialpython.com"}, +] +maintainers = [ + {name = "Karim Bahgat", email = "karim.bahgat.norway@gmail.com"} +] +readme = "README.md" +keywords = ["gis", "geospatial", "geographic", "shapefile", "shapefiles"] +description = "Pure Python read/write support for ESRI Shapefile format" +license = "MIT" +license-files = ["LICENSE.TXT"] +dynamic = ["version"] +requires-python = ">=3.9" +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering :: GIS", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", +] + +[project.optional-dependencies] +test = ["pytest"] + +[project.urls] +Repository = "https://github.com/GeospatialPython/pyshp" + +[tool.setuptools.dynamic] +version = {attr = "shapefile.__version__"} + [tool.ruff] # Exclude a variety of commonly ignored directories. exclude = [ @@ -84,4 +118,4 @@ load-plugins=[ per-file-ignores = """ shapefile.py:W0212 test_shapefile.py:W0212 -""" \ No newline at end of file +""" diff --git a/requirements.test.txt b/requirements.test.txt deleted file mode 100644 index 11141738..00000000 --- a/requirements.test.txt +++ /dev/null @@ -1,2 +0,0 @@ -pytest >= 3.7 -setuptools diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index d13d43bb..00000000 --- a/setup.cfg +++ /dev/null @@ -1,29 +0,0 @@ -[metadata] -name = pyshp -version = attr: shapefile.__version__ -description = Pure Python read/write support for ESRI Shapefile format -long_description = file: README.md -long_description_content_type = text/markdown -author = Joel Lawhead -author_email = jlawhead@geospatialpython.com -maintainer = Karim Bahgat -maintainer_email = karim.bahgat.norway@gmail.com -url = https://github.com/GeospatialPython/pyshp -download_url = https://pypi.org/project/pyshp/ -license = MIT -license_files = LICENSE.TXT -keywords = gis, geospatial, geographic, shapefile, shapefiles -classifiers = - Development Status :: 5 - Production/Stable - Programming Language :: Python - Programming Language :: Python :: 3 - Topic :: Scientific/Engineering :: GIS - Topic :: Software Development :: Libraries - Topic :: Software Development :: Libraries :: Python Modules - -[options] -py_modules = shapefile -python_requires = >=3.9 - -[bdist_wheel] -universal=1 diff --git a/setup.py b/setup.py deleted file mode 100644 index 60684932..00000000 --- a/setup.py +++ /dev/null @@ -1,3 +0,0 @@ -from setuptools import setup - -setup()