diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..20739a9 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +data/** filter=lfs diff=lfs merge=lfs -text diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..64e9078 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,42 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '[BUG] ' +labels: 'bug' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Code Example** +```python +# Minimal code example that reproduces the issue +import pymapgis as pmg +# Your code here +``` + +**Error Message** +``` +Paste the full error message and stack trace here +``` + +**Environment (please complete the following information):** + - OS: [e.g. Windows 11, macOS 14, Ubuntu 22.04] + - Python version: [e.g. 3.10.5] + - PyMapGIS version: [e.g. 0.1.0] + - Other relevant package versions: [e.g. geopandas 1.1.0] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..3c09262 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,31 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '[FEATURE] ' +labels: 'enhancement' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Proposed API Design** +```python +# Example of how you envision the feature would be used +import pymapgis as pmg + +# Your proposed API here +``` + +**Use Case** +Describe the specific use case this feature would enable. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..548ceb6 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,54 @@ +## Description + +Brief description of the changes in this PR. + +## Type of Change + +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] Documentation update +- [ ] Performance improvement +- [ ] Code refactoring + +## Related Issues + +Closes #(issue number) + +## Changes Made + +- [ ] Change 1 +- [ ] Change 2 +- [ ] Change 3 + +## Testing + +- [ ] Tests pass locally +- [ ] New tests added for new functionality +- [ ] Manual testing completed + +**Test Instructions:** +1. Step 1 +2. Step 2 +3. Step 3 + +## Documentation + +- [ ] Documentation updated (if applicable) +- [ ] README updated (if applicable) +- [ ] CHANGELOG updated (if applicable) + +## Code Quality + +- [ ] Code follows project style guidelines +- [ ] Self-review of code completed +- [ ] Code is commented where necessary +- [ ] No new warnings introduced + +## Screenshots (if applicable) + +Add screenshots to help explain your changes. + +## Additional Notes + +Any additional information that reviewers should know. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 78a3ce4..79419df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,12 +7,27 @@ permissions: jobs: test: runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: - python-version: "3.12" + python-version: ${{ matrix.python-version }} - run: pip install poetry - + - run: poetry install --with dev --no-interaction - run: poetry run pytest -q || [ $? -eq 5 ] + + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: "3.12" + - run: pip install poetry + - run: poetry install --with dev --no-interaction + - run: poetry run ruff check + - run: poetry run black --check . diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml new file mode 100644 index 0000000..4fca8a0 --- /dev/null +++ b/.github/workflows/examples.yml @@ -0,0 +1,17 @@ +name: Examples smoke-test +on: + push: + paths: ["examples/**"] + pull_request: + paths: ["examples/**"] + +jobs: + run-demo: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + - run: pip install -r labor_force_gap/after/requirements.txt + - run: python labor_force_gap/after/app.py --headless || true diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..8b9ae03 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,104 @@ +name: Publish to PyPI + +on: + release: + types: [published] + +permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + +jobs: + build: + name: Build distribution πŸ“¦ + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + - name: Install poetry + run: pip install poetry + - name: Build package + run: poetry build + - name: Store the distribution packages + uses: actions/upload-artifact@v3 + with: + name: python-package-distributions + path: dist/ + + publish-to-pypi: + name: Publish Python 🐍 distribution πŸ“¦ to PyPI + if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/pymapgis + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution πŸ“¦ to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + + github-release: + name: Sign the Python 🐍 distribution πŸ“¦ with Sigstore and upload them to GitHub Release + needs: + - publish-to-pypi + runs-on: ubuntu-latest + + permissions: + contents: write # IMPORTANT: mandatory for making GitHub Releases + id-token: write # IMPORTANT: mandatory for sigstore + + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: dist/ + - name: Sign the dists with Sigstore + uses: sigstore/gh-action-sigstore-python@v1.2.3 + with: + inputs: >- + ./dist/*.tar.gz + ./dist/*.whl + - name: Create GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + run: >- + gh release upload + '${{ github.ref_name }}' dist/** + --repo '${{ github.repository }}' + + publish-to-testpypi: + name: Publish Python 🐍 distribution πŸ“¦ to TestPyPI + needs: + - build + runs-on: ubuntu-latest + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + + environment: + name: testpypi + url: https://test.pypi.org/p/pymapgis + + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution πŸ“¦ to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ diff --git a/.gitignore b/.gitignore index 27c819c..f74f11e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,133 @@ -**/pycache/ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# PyMapGIS specific +custom_cache/ +test_simple.py diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..2796ae2 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,71 @@ +# Changelog + +All notable changes to PyMapGIS will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added +- Comprehensive caching system with TTL support +- Census ACS data source integration +- TIGER/Line geographic boundaries support +- Interactive plotting with Leafmap +- Housing cost burden and labor force gap examples +- GitHub Actions CI/CD pipeline +- Pre-commit hooks for code quality + +### Changed +- Updated project structure for PyPI publication +- Improved documentation and README +- Enhanced type hints throughout codebase + +### Fixed +- Code formatting and linting issues +- Import organization in example files + +## [0.1.0] - 2024-01-XX + +### Added +- Initial PyMapGIS core library +- Basic data reading functionality +- Settings management with Pydantic +- MIT license +- Poetry-based dependency management + +### Infrastructure +- GitHub repository setup +- Basic CI/CD with GitHub Actions +- Pre-commit configuration +- Testing framework with pytest + +--- + +## Release Notes + +### Version 0.1.0 +This is the initial release of PyMapGIS, a modern GIS toolkit for Python. The library provides: + +- **Simplified Data Access**: Built-in support for Census ACS and TIGER/Line data +- **Smart Caching**: Automatic HTTP caching with configurable TTL +- **Interactive Visualization**: Beautiful maps with Leaflet integration +- **Clean APIs**: Pandas-like interface for geospatial workflows + +### Upcoming Features +- Additional data sources (OpenStreetMap, Natural Earth) +- Raster data processing capabilities +- Advanced spatial analysis tools +- Plugin system for custom data sources +- Jupyter notebook integration +- Performance optimizations + +### Breaking Changes +None in this initial release. + +### Migration Guide +This is the first release, so no migration is needed. + +--- + +For more details, see the [GitHub releases page](https://github.com/pymapgis/core/releases). diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..84fa299 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,213 @@ +# Contributing to PyMapGIS + +Thank you for your interest in contributing to PyMapGIS! This document provides guidelines and information for contributors. + +## πŸš€ Getting Started + +### Prerequisites + +- Python 3.10 or higher +- [Poetry](https://python-poetry.org/) for dependency management +- Git for version control + +### Development Setup + +1. **Fork and clone the repository** + ```bash + git clone https://github.com/YOUR_USERNAME/core.git + cd core + ``` + +2. **Install dependencies** + ```bash + poetry install --with dev + ``` + +3. **Install pre-commit hooks** + ```bash + poetry run pre-commit install + ``` + +4. **Run tests to verify setup** + ```bash + poetry run pytest + ``` + +## πŸ”„ Development Workflow + +### Branch Strategy + +- **`main`**: Production-ready code (protected) +- **`dev`**: Development branch for integration +- **`feature/*`**: Feature branches for new functionality +- **`fix/*`**: Bug fix branches + +### Making Changes + +1. **Create a feature branch** + ```bash + git checkout dev + git pull origin dev + git checkout -b feature/your-feature-name + ``` + +2. **Make your changes** + - Write clean, documented code + - Follow existing code style + - Add tests for new functionality + +3. **Run quality checks** + ```bash + poetry run pytest # Run tests + poetry run ruff check # Linting + poetry run black . # Code formatting + poetry run mypy pymapgis # Type checking + ``` + +4. **Commit your changes** + ```bash + git add . + git commit -m "feat: add amazing new feature" + ``` + +5. **Push and create PR** + ```bash + git push origin feature/your-feature-name + ``` + +## πŸ“ Code Style + +### Python Style Guide + +- Follow [PEP 8](https://pep8.org/) +- Use [Black](https://black.readthedocs.io/) for formatting +- Use [Ruff](https://docs.astral.sh/ruff/) for linting +- Use type hints where appropriate + +### Commit Messages + +Follow [Conventional Commits](https://www.conventionalcommits.org/): + +- `feat:` New features +- `fix:` Bug fixes +- `docs:` Documentation changes +- `style:` Code style changes +- `refactor:` Code refactoring +- `test:` Test additions/changes +- `chore:` Maintenance tasks + +### Documentation + +- Use docstrings for all public functions and classes +- Follow [Google style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) docstrings +- Update README.md for user-facing changes + +## πŸ§ͺ Testing + +### Running Tests + +```bash +# Run all tests +poetry run pytest + +# Run with coverage +poetry run pytest --cov=pymapgis + +# Run specific test file +poetry run pytest tests/test_cache.py + +# Run tests matching pattern +poetry run pytest -k "test_cache" +``` + +### Writing Tests + +- Place tests in the `tests/` directory +- Use descriptive test names +- Test both success and failure cases +- Mock external dependencies + +Example: +```python +def test_cache_stores_and_retrieves_data(): + """Test that cache can store and retrieve data correctly.""" + cache = Cache() + cache.put("key", "value") + assert cache.get("key") == "value" +``` + +## πŸ“¦ Package Structure + +``` +pymapgis/ +β”œβ”€β”€ __init__.py # Package exports +β”œβ”€β”€ cache.py # Caching functionality +β”œβ”€β”€ acs.py # Census ACS data source +β”œβ”€β”€ tiger.py # TIGER/Line data source +β”œβ”€β”€ plotting.py # Visualization utilities +β”œβ”€β”€ settings.py # Configuration +β”œβ”€β”€ io/ # Input/output modules +β”œβ”€β”€ network/ # Network utilities +β”œβ”€β”€ plugins/ # Plugin system +β”œβ”€β”€ raster/ # Raster data handling +β”œβ”€β”€ serve/ # Server components +β”œβ”€β”€ vector/ # Vector data handling +└── viz/ # Visualization components +``` + +## πŸ› Reporting Issues + +### Bug Reports + +Include: +- Python version +- PyMapGIS version +- Operating system +- Minimal code example +- Error messages/stack traces + +### Feature Requests + +Include: +- Use case description +- Proposed API design +- Examples of usage + +## πŸ“‹ Pull Request Guidelines + +### Before Submitting + +- [ ] Tests pass locally +- [ ] Code follows style guidelines +- [ ] Documentation is updated +- [ ] CHANGELOG.md is updated (if applicable) + +### PR Description + +Include: +- Summary of changes +- Related issue numbers +- Breaking changes (if any) +- Testing instructions + +## 🏷️ Release Process + +1. Update version in `pyproject.toml` +2. Update `CHANGELOG.md` +3. Create release PR to `main` +4. Tag release after merge +5. Publish to PyPI + +## πŸ’¬ Community + +- **GitHub Discussions**: For questions and ideas +- **Issues**: For bug reports and feature requests +- **Email**: nicholaskarlson@gmail.com for maintainer contact + +## πŸ“„ License + +By contributing, you agree that your contributions will be licensed under the MIT License. + +--- + +Thank you for contributing to PyMapGIS! πŸ—ΊοΈβœ¨ diff --git a/LICENSE b/LICENSE index d1e1072..fbf2343 100644 --- a/LICENSE +++ b/LICENSE @@ -1 +1,21 @@ MIT License + +Copyright (c) 2024 PyMapGIS + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 40782af..5e586e5 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,120 @@ -# PyMapGIS Core +# PyMapGIS -Core library of the PyMapGIS project. -_⚠️ Pre-alpha β€” APIs will change rapidly._ +[![PyPI version](https://badge.fury.io/py/pymapgis.svg)](https://badge.fury.io/py/pymapgis) +[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![CI](https://github.com/pymapgis/core/workflows/CI/badge.svg)](https://github.com/pymapgis/core/actions) + +**Modern GIS toolkit for Python** - Simplifying geospatial workflows with built-in data sources, intelligent caching, and fluent APIs. + +## πŸš€ Quick Start + +```bash +pip install pymapgis +``` + +```python +import pymapgis as pmg + +# Load Census data with automatic geometry +acs = pmg.read("census://acs/acs5?year=2022&geography=county&variables=B25070_010E,B25070_001E") + +# Calculate housing cost burden (30%+ of income on housing) +acs["cost_burden_rate"] = acs["B25070_010E"] / acs["B25070_001E"] + +# Create interactive map +acs.plot.choropleth( + column="cost_burden_rate", + title="Housing Cost Burden by County (2022)", + cmap="Reds" +).show() +``` + +## ✨ Key Features + +- **πŸ”— Built-in Data Sources**: Census ACS, TIGER/Line, and more +- **⚑ Smart Caching**: Automatic HTTP caching with TTL support +- **πŸ—ΊοΈ Interactive Maps**: Beautiful visualizations with Leaflet +- **🧹 Clean APIs**: Fluent, pandas-like interface +- **πŸ”§ Extensible**: Plugin architecture for custom data sources + +## πŸ“Š Supported Data Sources + +| Source | URL Pattern | Description | +|--------|-------------|-------------| +| **Census ACS** | `census://acs/acs5?year=2022&geography=county` | American Community Survey data | +| **TIGER/Line** | `tiger://county?year=2022&state=06` | Census geographic boundaries | +| **Local Files** | `file://path/to/data.geojson` | Local geospatial files | + +## 🎯 Examples + +### Labor Force Participation Analysis +```python +# Traditional approach: 20+ lines of boilerplate +# PyMapGIS approach: 3 lines + +acs = pmg.read("census://acs/acs5?year=2022&geography=county&variables=B23025_004E,B23025_003E") +acs["lfp_rate"] = acs["B23025_004E"] / acs["B23025_003E"] +acs.plot.choropleth(column="lfp_rate", title="Labor Force Participation").show() +``` + +### Housing Cost Burden Explorer +```python +# Load housing cost data with automatic county boundaries +housing = pmg.read("census://acs/acs5?year=2022&geography=county&variables=B25070_010E,B25070_001E") + +# Calculate and visualize cost burden +housing["burden_30plus"] = housing["B25070_010E"] / housing["B25070_001E"] +housing.plot.choropleth( + column="burden_30plus", + title="% Households Spending 30%+ on Housing", + cmap="OrRd", + legend=True +).show() +``` + +## πŸ› οΈ Installation + +### From PyPI (Recommended) +```bash +pip install pymapgis +``` + +### From Source +```bash +git clone https://github.com/pymapgis/core.git +cd core +poetry install +``` + +## πŸ“š Documentation + +- **[API Reference](https://pymapgis.github.io/core/)** +- **[Examples Repository](https://github.com/pymapgis/examples)** +- **[Contributing Guide](CONTRIBUTING.md)** + +## 🀝 Contributing + +We welcome contributions! PyMapGIS is an open-source project under the MIT license. + +1. **Fork** the repository +2. **Create** a feature branch (`git checkout -b feature/amazing-feature`) +3. **Commit** your changes (`git commit -m 'Add amazing feature'`) +4. **Push** to the branch (`git push origin feature/amazing-feature`) +5. **Open** a Pull Request + +See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines. + +## πŸ“„ License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## πŸ™ Acknowledgments + +- Built on top of [GeoPandas](https://geopandas.org/), [Leafmap](https://leafmap.org/), and [Requests-Cache](https://requests-cache.readthedocs.io/) +- Inspired by the need for simpler geospatial workflows in Python +- Thanks to all [contributors](https://github.com/pymapgis/core/graphs/contributors) + +--- + +**Made with ❀️ by the PyMapGIS community** diff --git a/housing_cost_burden/README.md b/housing_cost_burden/README.md new file mode 100644 index 0000000..b011c3c --- /dev/null +++ b/housing_cost_burden/README.md @@ -0,0 +1,20 @@ +# Housing-Cost Burden Explorer + +**before/** – classic GeoPandas + requests + matplotlib +**after/** – 10-line PyMapGIS script + +## Description + +This demo shows how to create a map of housing cost burden (percentage of households spending 30%+ of income on housing) by county using Census ACS data. + +### Before (Traditional Approach) +- Manual API calls to Census Bureau +- Complex data aggregation and calculation +- Merging with shapefile using GeoPandas +- Static plotting with matplotlib + +### After (PyMapGIS Approach) +- Single `pmg.read()` call with census:// URL +- Built-in formula calculation for complex aggregations +- Interactive map generation with tooltips +- Automatic data handling and visualization diff --git a/housing_cost_burden/after/app.py b/housing_cost_burden/after/app.py new file mode 100644 index 0000000..5a7533a --- /dev/null +++ b/housing_cost_burden/after/app.py @@ -0,0 +1,35 @@ +""" +AFTER: Housing Cost Burden using PyMapGIS +Run: python app.py +Produces: housing_burden_map.png +""" + +import pymapgis as pm + +# --- 1. Fetch data ---------------------------------------------------------- +VARS = ["B25070_001E", "B25070_007E", "B25070_008E", "B25070_009E", "B25070_010E"] +acs = pm.get_county_table(2022, VARS) + +# --- 2. Calculate burden rate ----------------------------------------------- +# Calculate housing cost burden (30%+ of income on housing) +acs["burden_30plus"] = ( + acs["B25070_007E"] + acs["B25070_008E"] + acs["B25070_009E"] + acs["B25070_010E"] +) +acs["burden_rate"] = acs["burden_30plus"] / acs["B25070_001E"] + +# --- 3. Join geometry ------------------------------------------------------- +gdf = pm.counties(2022, "20m") +# Ensure consistent column names for joining +if "GEOID" in gdf.columns: + gdf = gdf.rename(columns={"GEOID": "geoid"}) +merged = gdf.merge(acs[["geoid", "burden_rate"]], on="geoid", how="left") + +# --- 4. Plot ---------------------------------------------------------------- +ax = pm.choropleth( + merged, + "burden_rate", + cmap="Reds", + title="Housing Cost Burden (30%+ of Income), 2022 ACS", +) +ax.figure.savefig("housing_burden_map.png", dpi=150, bbox_inches="tight") +print("βœ“ Map saved to housing_burden_map.png") diff --git a/housing_cost_burden/after/requirements.txt b/housing_cost_burden/after/requirements.txt new file mode 100644 index 0000000..6ea4d57 --- /dev/null +++ b/housing_cost_burden/after/requirements.txt @@ -0,0 +1 @@ +pymapgis @ git+https://github.com/pymapgis/core.git@main diff --git a/housing_cost_burden/before/app.py b/housing_cost_burden/before/app.py new file mode 100644 index 0000000..4336ab6 --- /dev/null +++ b/housing_cost_burden/before/app.py @@ -0,0 +1,49 @@ +""" +BEFORE: Housing Cost Burden map +Run with: python app.py +""" + +import sys +import requests +import pandas as pd +import geopandas as gpd +import matplotlib.pyplot as plt + +key = sys.argv[1] if len(sys.argv) > 1 else "DEMO_KEY" +vars = "B25070_001E,B25070_007E,B25070_008E,B25070_009E,B25070_010E" +url = ( + f"https://api.census.gov/data/2022/acs/acs5" + f"?get=NAME,{vars}&for=county:*&key={key}" +) + +df = pd.DataFrame( + requests.get(url).json()[1:], + columns=[ + "name", + "total", + "b30_35", + "b35_40", + "b40_50", + "b50plus", + "state", + "county", + ], +) + +# Calculate housing cost burden (30%+ of income on housing) +df["burden_30plus"] = ( + df.b30_35.astype(int) + + df.b35_40.astype(int) + + df.b40_50.astype(int) + + df.b50plus.astype(int) +) +df["burden_rate"] = df.burden_30plus / df.total.astype(int) + +shp = gpd.read_file("../../data/counties/cb_2022_us_county_500k.shp") +gdf = shp.merge(df, left_on=["STATEFP", "COUNTYFP"], right_on=["state", "county"]) + +ax = gdf.plot("burden_rate", cmap="Reds", figsize=(12, 7), legend=True, edgecolor="0.4") +ax.set_title("Housing Cost Burden (30%+ of Income)") +ax.axis("off") +plt.tight_layout() +plt.show() diff --git a/housing_cost_burden/before/requirements.txt b/housing_cost_burden/before/requirements.txt new file mode 100644 index 0000000..160436e --- /dev/null +++ b/housing_cost_burden/before/requirements.txt @@ -0,0 +1,3 @@ +geopandas>=1.1,<2.0 +matplotlib +requests diff --git a/labor_force_gap/README.md b/labor_force_gap/README.md new file mode 100644 index 0000000..ca04258 --- /dev/null +++ b/labor_force_gap/README.md @@ -0,0 +1,20 @@ +# Labor-Force Participation Gap + +**before/** – classic GeoPandas + requests + matplotlib +**after/** – 10-line PyMapGIS script + +## Description + +This demo shows how to create a map of prime-age labor-force participation rates by county using Census ACS data. + +### Before (Traditional Approach) +- Manual API calls to Census Bureau +- Data cleaning and transformation with pandas +- Merging with shapefile using GeoPandas +- Plotting with matplotlib + +### After (PyMapGIS Approach) +- Single `pmg.read()` call with census:// URL +- Built-in formula calculation +- Interactive map generation with tooltips +- Automatic data handling and visualization diff --git a/labor_force_gap/after/app.py b/labor_force_gap/after/app.py new file mode 100644 index 0000000..d27fe64 --- /dev/null +++ b/labor_force_gap/after/app.py @@ -0,0 +1,32 @@ +""" +AFTER: Prime-Age Labor-Force Participation map using PyMapGIS +Run: python app.py +Produces: gap_map.png +""" + +import pymapgis as pm + +# --- 1. Fetch data ---------------------------------------------------------- +VARS = ["B23025_004E", "B23025_003E"] # In labor force, Total population +acs = pm.get_county_table(2022, VARS) + +# --- 2. Calculate ratio ----------------------------------------------------- +acs["lfp"] = acs["B23025_004E"] / acs["B23025_003E"] +acs["gap"] = 1 - acs["lfp"] # Gap from 100% participation + +# --- 3. Join geometry ------------------------------------------------------- +gdf = pm.counties(2022, "20m") +# Ensure consistent column names for joining +if "GEOID" in gdf.columns: + gdf = gdf.rename(columns={"GEOID": "geoid"}) +merged = gdf.merge(acs[["geoid", "gap", "lfp"]], on="geoid", how="left") + +# --- 4. Plot ---------------------------------------------------------------- +ax = pm.choropleth( + merged, + "gap", + cmap="YlOrRd", + title="Prime-Age Labor-Force Participation GAP, 2022 ACS", +) +ax.figure.savefig("gap_map.png", dpi=150, bbox_inches="tight") +print("βœ“ Map saved to gap_map.png") diff --git a/labor_force_gap/after/gap_map.png b/labor_force_gap/after/gap_map.png new file mode 100644 index 0000000..f8de676 Binary files /dev/null and b/labor_force_gap/after/gap_map.png differ diff --git a/labor_force_gap/after/requirements.txt b/labor_force_gap/after/requirements.txt new file mode 100644 index 0000000..6ea4d57 --- /dev/null +++ b/labor_force_gap/after/requirements.txt @@ -0,0 +1 @@ +pymapgis @ git+https://github.com/pymapgis/core.git@main diff --git a/labor_force_gap/before/app.py b/labor_force_gap/before/app.py new file mode 100644 index 0000000..c7e5c25 --- /dev/null +++ b/labor_force_gap/before/app.py @@ -0,0 +1,31 @@ +""" +BEFORE: Prime-Age Labor-Force Participation map +Run with: python app.py +""" + +import sys +import requests +import pandas as pd +import geopandas as gpd +import matplotlib.pyplot as plt + +key = sys.argv[1] if len(sys.argv) > 1 else "DEMO_KEY" +vars = "B23001_001E,B23001_004E" +url = ( + f"https://api.census.gov/data/2022/acs/acs5" + f"?get=NAME,{vars}&for=county:*&key={key}" +) + +df = pd.DataFrame( + requests.get(url).json()[1:], columns=["name", "labor", "pop", "state", "county"] +) +df["lfp"] = df.labor.astype(int) / df.pop.astype(int) + +shp = gpd.read_file("../../data/counties/cb_2022_us_county_500k.shp") +gdf = shp.merge(df, left_on=["STATEFP", "COUNTYFP"], right_on=["state", "county"]) + +ax = gdf.plot("lfp", cmap="viridis", figsize=(12, 7), legend=True, edgecolor="0.4") +ax.set_title("Prime-Age Labor-Force Participation") +ax.axis("off") +plt.tight_layout() +plt.show() diff --git a/labor_force_gap/before/requirements.txt b/labor_force_gap/before/requirements.txt new file mode 100644 index 0000000..160436e --- /dev/null +++ b/labor_force_gap/before/requirements.txt @@ -0,0 +1,3 @@ +geopandas>=1.1,<2.0 +matplotlib +requests diff --git a/poetry.lock b/poetry.lock index b6b938a..ff36047 100644 --- a/poetry.lock +++ b/poetry.lock @@ -262,6 +262,33 @@ files = [ {file = "cachetools-6.0.0.tar.gz", hash = "sha256:f225782b84438f828328fc2ad74346522f27e5b1440f4e9fd18b20ebfd1aa2cf"}, ] +[[package]] +name = "cattrs" +version = "25.1.1" +description = "Composable complex class support for attrs and dataclasses." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "cattrs-25.1.1-py3-none-any.whl", hash = "sha256:1b40b2d3402af7be79a7e7e097a9b4cd16d4c06e6d526644b0b26a063a1cc064"}, + {file = "cattrs-25.1.1.tar.gz", hash = "sha256:c914b734e0f2d59e5b720d145ee010f1fd9a13ee93900922a2f3f9d593b8382c"}, +] + +[package.dependencies] +attrs = ">=24.3.0" +exceptiongroup = {version = ">=1.1.1", markers = "python_version < \"3.11\""} +typing-extensions = ">=4.12.2" + +[package.extras] +bson = ["pymongo (>=4.4.0)"] +cbor2 = ["cbor2 (>=5.4.6)"] +msgpack = ["msgpack (>=1.0.5)"] +msgspec = ["msgspec (>=0.19.0) ; implementation_name == \"cpython\""] +orjson = ["orjson (>=3.10.7) ; implementation_name == \"cpython\""] +pyyaml = ["pyyaml (>=6.0)"] +tomlkit = ["tomlkit (>=0.11.8)"] +ujson = ["ujson (>=5.10.0)"] + [[package]] name = "certifi" version = "2025.4.26" @@ -3297,6 +3324,37 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "requests-cache" +version = "1.2.1" +description = "A persistent cache for python requests" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "requests_cache-1.2.1-py3-none-any.whl", hash = "sha256:1285151cddf5331067baa82598afe2d47c7495a1334bfe7a7d329b43e9fd3603"}, + {file = "requests_cache-1.2.1.tar.gz", hash = "sha256:68abc986fdc5b8d0911318fbb5f7c80eebcd4d01bfacc6685ecf8876052511d1"}, +] + +[package.dependencies] +attrs = ">=21.2" +cattrs = ">=22.2" +platformdirs = ">=2.5" +requests = ">=2.22" +url-normalize = ">=1.4" +urllib3 = ">=1.25.5" + +[package.extras] +all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "pymongo (>=3)", "pyyaml (>=6.0.1)", "redis (>=3)", "ujson (>=5.4)"] +bson = ["bson (>=0.5)"] +docs = ["furo (>=2023.3,<2024.0)", "linkify-it-py (>=2.0,<3.0)", "myst-parser (>=1.0,<2.0)", "sphinx (>=5.0.2,<6.0.0)", "sphinx-autodoc-typehints (>=1.19)", "sphinx-automodapi (>=0.14)", "sphinx-copybutton (>=0.5)", "sphinx-design (>=0.2)", "sphinx-notfound-page (>=0.8)", "sphinxcontrib-apidoc (>=0.3)", "sphinxext-opengraph (>=0.9)"] +dynamodb = ["boto3 (>=1.15)", "botocore (>=1.18)"] +json = ["ujson (>=5.4)"] +mongodb = ["pymongo (>=3)"] +redis = ["redis (>=3)"] +security = ["itsdangerous (>=2.0)"] +yaml = ["pyyaml (>=6.0.1)"] + [[package]] name = "rich" version = "14.0.0" @@ -3890,6 +3948,24 @@ files = [ {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, ] +[[package]] +name = "url-normalize" +version = "2.2.1" +description = "URL normalization for Python" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "url_normalize-2.2.1-py3-none-any.whl", hash = "sha256:3deb687587dc91f7b25c9ae5162ffc0f057ae85d22b1e15cf5698311247f567b"}, + {file = "url_normalize-2.2.1.tar.gz", hash = "sha256:74a540a3b6eba1d95bdc610c24f2c0141639f3ba903501e61a52a8730247ff37"}, +] + +[package.dependencies] +idna = ">=3.3" + +[package.extras] +dev = ["mypy", "pre-commit", "pytest", "pytest-cov", "pytest-socket", "ruff"] + [[package]] name = "urllib3" version = "2.4.0" @@ -4065,4 +4141,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "a50f242afed67be822b9a16c17bab2b1140ee7685f9faf34f43a2a6eeb0fa9b8" +content-hash = "baf402943597095c0412452c4009c89442b2ae1e67cfc58fff7907e5a108dd98" diff --git a/pymapgis/__init__.py b/pymapgis/__init__.py index 40fa9ee..41ac302 100644 --- a/pymapgis/__init__.py +++ b/pymapgis/__init__.py @@ -1 +1,39 @@ __version__ = "0.0.0-dev0" + +from pathlib import Path +from .io import read as read +from .cache import _init_session, clear as clear_cache +from .acs import get_county_table +from .tiger import counties +from .plotting import choropleth + + +def set_cache(dir_: str | Path | None = None, *, ttl_days: int = 7) -> None: + """ + Enable or disable caching at runtime. + + set_cache(None) β†’ disable + set_cache("~/mycache") β†’ enable & use that folder + """ + import os + from datetime import timedelta + + if dir_ is None: + os.environ["PYMAPGIS_DISABLE_CACHE"] = "1" + else: + os.environ.pop("PYMAPGIS_DISABLE_CACHE", None) + # Reset the global session + import pymapgis.cache as cache_module + + cache_module._session = None + _init_session(dir_, expire_after=timedelta(days=ttl_days)) + + +__all__ = [ + "read", + "set_cache", + "clear_cache", + "get_county_table", + "counties", + "choropleth", +] diff --git a/pymapgis/acs.py b/pymapgis/acs.py new file mode 100644 index 0000000..af7e735 --- /dev/null +++ b/pymapgis/acs.py @@ -0,0 +1,55 @@ +""" +American Community Survey downloader (county-level) – first cut. +""" + +from __future__ import annotations + +import os +from typing import Sequence + +import pandas as pd + +from .cache import get as cached_get + +_API = "https://api.census.gov/data/{year}/acs/acs5" +_KEY = os.getenv("CENSUS_API_KEY") # optional + + +def get_county_table( + year: int, + variables: Sequence[str], + *, + state: str | None = None, + ttl: str = "6h", +) -> pd.DataFrame: + """ + Fetch *variables* for every county (or a single state) for *year*. + + Parameters + ---------- + variables : list[str] + e.g. ["B23025_004E", "B23025_003E"] (Labour-force vars) + state : "06" for CA, "01" for AL … None = all states + """ + vars_str = ",".join(["NAME", *variables]) + params = {"get": vars_str} + + if state: + params["for"] = "county:*" + params["in"] = f"state:{state}" + else: + params["for"] = "county:*" + + if _KEY: + params["key"] = _KEY + + url = _API.format(year=year) + resp = cached_get(url, params=params, ttl=ttl) + resp.raise_for_status() + + data = resp.json() + df = pd.DataFrame(data[1:], columns=data[0]) + df[variables] = df[variables].apply(pd.to_numeric, errors="coerce") + # The API returns state and county as the last two columns + df["geoid"] = df.iloc[:, -2] + df.iloc[:, -1] # state + county + return df diff --git a/pymapgis/cache.py b/pymapgis/cache.py new file mode 100644 index 0000000..8f0562d --- /dev/null +++ b/pymapgis/cache.py @@ -0,0 +1,141 @@ +""" +File-system + SQLite HTTP cache for PyMapGIS. + +Usage +----- +>>> from pymapgis import cache +>>> url = "https://api.census.gov/data/..." +>>> data = cache.get(url, ttl="3h") # transparently cached +>>> cache.clear() # wipe""" + +from __future__ import annotations + +import os +import re +from datetime import timedelta +from pathlib import Path +from typing import Optional, Union + +import requests +import requests_cache +import urllib3 + +# ----------- configuration ------------------------------------------------- + +_ENV_DISABLE = bool(int(os.getenv("PYMAPGIS_DISABLE_CACHE", "0"))) +_DEFAULT_DIR = Path.home() / ".pymapgis" / "cache" +_DEFAULT_EXPIRE = timedelta(days=7) + +_session: Optional[requests_cache.CachedSession] = None + +# Disable SSL warnings for government sites with certificate issues +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + +def _init_session( + cache_dir: Union[str, Path] = _DEFAULT_DIR, + expire_after: timedelta = _DEFAULT_EXPIRE, +) -> None: + """Lazy-initialise the global CachedSession.""" + global _session + if _ENV_DISABLE: + return + + cache_dir = Path(cache_dir).expanduser() + cache_dir.mkdir(parents=True, exist_ok=True) + _session = requests_cache.CachedSession( + cache_name=str(cache_dir / "http_cache"), + backend="sqlite", + expire_after=expire_after, + allowable_codes=(200,), + allowable_methods=("GET", "HEAD"), + ) + + +def _ensure_session() -> None: + # Check environment variable each time + if _session is None and not bool(int(os.getenv("PYMAPGIS_DISABLE_CACHE", "0"))): + _init_session() + + +# ----------- public helpers ------------------------------------------------- + + +def get( + url: str, + *, + ttl: Union[int, float, str, timedelta, None] = None, + **kwargs, +) -> requests.Response: + """ + Fetch *url* with caching. + + Parameters + ---------- + ttl : int | float | str | timedelta | None + β€’ None β†’ default expiry (7 days) + β€’ int/float (seconds) + β€’ "24h", "90m" shorthand + β€’ timedelta + kwargs : passed straight to requests (headers, params …) + """ + # Check environment variable each time + if bool(int(os.getenv("PYMAPGIS_DISABLE_CACHE", "0"))): + # Use verify=False for government sites with SSL issues + kwargs.setdefault("verify", False) + return requests.get(url, **kwargs) + + _ensure_session() + expire_after = _parse_ttl(ttl) + # Use verify=False for government sites with SSL issues + kwargs.setdefault("verify", False) + with _session.cache_disabled() if expire_after == 0 else _session: + return _session.get(url, expire_after=expire_after, **kwargs) + + +def put(binary: bytes, dest: Path, *, overwrite: bool = False) -> Path: + """ + Persist raw bytes (e.g. a ZIP shapefile) onto disk cache. + Returns the written Path. + """ + dest = Path(dest) + if dest.exists() and not overwrite: + return dest + dest.parent.mkdir(parents=True, exist_ok=True) + dest.write_bytes(binary) + return dest + + +def clear() -> None: + """Drop the entire cache directory.""" + global _session + if _session: + _session.cache.clear() + _session.close() + _session = None + + +# ----------- internals ------------------------------------------------------ + +_RE_SHORTHAND = re.compile(r"^(?P\d+)(?P[smhd])$") + + +def _parse_ttl(val) -> Optional[timedelta]: + if val is None: + return _DEFAULT_EXPIRE + if isinstance(val, timedelta): + return val + if isinstance(val, (int, float)): + return timedelta(seconds=val) + + match = _RE_SHORTHAND.match(str(val).lower()) + if match: + mult = int(match["num"]) + return timedelta( + **{ + {"s": "seconds", "m": "minutes", "h": "hours", "d": "days"}[ + match["unit"] + ]: mult + } + ) + raise ValueError(f"Un-recognised TTL: {val!r}") diff --git a/pymapgis/plotting.py b/pymapgis/plotting.py new file mode 100644 index 0000000..07a8945 --- /dev/null +++ b/pymapgis/plotting.py @@ -0,0 +1,24 @@ +""" +One-liner choropleth helper (matplotlib backend). +""" + +from __future__ import annotations + +import matplotlib.pyplot as plt +import geopandas as gpd + + +def choropleth( + gdf: gpd.GeoDataFrame, + column: str, + *, + cmap: str = "viridis", + title: str | None = None, +): + ax = gdf.plot( + column=column, cmap=cmap, linewidth=0.1, edgecolor="black", figsize=(10, 6) + ) + ax.axis("off") + ax.set_title(title or column) + plt.tight_layout() + return ax diff --git a/pymapgis/tiger.py b/pymapgis/tiger.py new file mode 100644 index 0000000..d165571 --- /dev/null +++ b/pymapgis/tiger.py @@ -0,0 +1,34 @@ +""" +TIGER/Cartographic-Boundary helpers (county polygons). +""" + +from __future__ import annotations + +from pathlib import Path + +import geopandas as gpd + +from .cache import get as cached_get, put as cache_put + +_URL_TMPL = ( + "https://www2.census.gov/geo/tiger/GENZ{year}/shp/" + "cb_{year}_us_county_{scale}.zip" +) + + +def counties(year: int = 2022, scale: str = "500k") -> gpd.GeoDataFrame: + """ + Cached download β†’ GeoDataFrame for all US counties (incl. PR). + + `scale` ∈ {"500k", "5m", "20m"}. + """ + url = _URL_TMPL.format(year=year, scale=scale) + cache_dir = Path.home() / ".pymapgis" / "shapes" + zip_path = cache_dir / Path(url).name + + if not zip_path.exists(): + resp = cached_get(url, ttl="90d") + resp.raise_for_status() + cache_put(resp.content, zip_path, overwrite=True) + + return gpd.read_file(f"zip://{zip_path}") diff --git a/pyproject.toml b/pyproject.toml index 78023c4..9fef018 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,28 @@ [tool.poetry] name = "pymapgis" version = "0.1.0" -description = "Modern GIS toolkit" +description = "Modern GIS toolkit for Python - Simplifying geospatial workflows with built-in data sources, intelligent caching, and fluent APIs" authors = ["Nicholas Karlson "] license = "MIT" readme = "README.md" +homepage = "https://github.com/pymapgis/core" +repository = "https://github.com/pymapgis/core" +documentation = "https://pymapgis.github.io/core/" +keywords = ["gis", "geospatial", "mapping", "census", "visualization", "geography"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering :: GIS", + "Topic :: Software Development :: Libraries :: Python Modules", + "Typing :: Typed", +] packages = [{ include = "pymapgis" }] [tool.poetry.dependencies] @@ -14,6 +32,8 @@ fsspec = "^2025.5" leafmap = "^0.47.2" pydantic-settings = "^2.9.1" # (add rasterio later, once the click conflict is solved) +requests-cache = "^1.2.1" +pandas = "^2.3.0" [tool.poetry.group.dev.dependencies] pytest = "^8.4" diff --git a/tests/test_cache.py b/tests/test_cache.py new file mode 100644 index 0000000..c2a1a5e --- /dev/null +++ b/tests/test_cache.py @@ -0,0 +1,106 @@ +import tempfile +from pathlib import Path + +import pytest +from pymapgis import cache + +TEST_URL = "https://httpbin.org/get" + + +def test_caching_roundtrip(): + with tempfile.TemporaryDirectory() as td: + cache._init_session(Path(td)) + # first call -> miss + r1 = cache.get(TEST_URL, ttl=1) + assert not getattr(r1, "from_cache", False) + + # second call within TTL -> hit + r2 = cache.get(TEST_URL, ttl=1) + assert getattr(r2, "from_cache", False) + + +def test_clear(): + with tempfile.TemporaryDirectory() as td: + cache._init_session(Path(td)) + cache.get(TEST_URL, ttl=60) + cache.clear() + # After clear, session should be None + assert cache._session is None + + +def test_ttl_parsing(): + """Test TTL parsing functionality""" + from datetime import timedelta + + # Test None (default) + assert cache._parse_ttl(None) == timedelta(days=7) + + # Test timedelta + td = timedelta(hours=2) + assert cache._parse_ttl(td) == td + + # Test seconds (int/float) + assert cache._parse_ttl(3600) == timedelta(seconds=3600) + assert cache._parse_ttl(3600.5) == timedelta(seconds=3600.5) + + # Test shorthand + assert cache._parse_ttl("30s") == timedelta(seconds=30) + assert cache._parse_ttl("5m") == timedelta(minutes=5) + assert cache._parse_ttl("2h") == timedelta(hours=2) + assert cache._parse_ttl("3d") == timedelta(days=3) + + # Test invalid + with pytest.raises(ValueError): + cache._parse_ttl("invalid") + + +def test_put_file(): + """Test file caching functionality""" + with tempfile.TemporaryDirectory() as td: + dest = Path(td) / "test.bin" + data = b"test data" + + # First write + result = cache.put(data, dest) + assert result == dest + assert dest.read_bytes() == data + + # Second write without overwrite (should not change) + new_data = b"new data" + result = cache.put(new_data, dest, overwrite=False) + assert result == dest + assert dest.read_bytes() == data # unchanged + + # With overwrite + result = cache.put(new_data, dest, overwrite=True) + assert result == dest + assert dest.read_bytes() == new_data + + +def test_disable_cache(): + """Test cache disabling via environment variable""" + import os + + # Save original state + original = os.environ.get("PYMAPGIS_DISABLE_CACHE") + + try: + # Reset session first + cache._session = None + + # Test with cache disabled + os.environ["PYMAPGIS_DISABLE_CACHE"] = "1" + + # Should use regular requests, not cached + response = cache.get(TEST_URL) + assert not hasattr(response, "from_cache") + + finally: + # Restore original state + if original is None: + os.environ.pop("PYMAPGIS_DISABLE_CACHE", None) + else: + os.environ["PYMAPGIS_DISABLE_CACHE"] = original + + # Reset session for other tests + cache._session = None diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py new file mode 100644 index 0000000..edf7bb0 --- /dev/null +++ b/tests/test_end_to_end.py @@ -0,0 +1,24 @@ +import pandas as pd +from pymapgis import get_county_table + + +def test_acs_smoke(): + """Test ACS data fetching functionality.""" + vars_ = ["B23025_004E", "B23025_003E"] # labour-force + df = get_county_table(2022, vars_, state="06") # CA only – tiny payload + assert isinstance(df, pd.DataFrame) + assert set(vars_) <= set(df.columns) + assert "geoid" in df.columns + assert len(df) > 0 # Should have some counties + + +def test_counties_smoke(): + """Test county shapefile download with SSL fix.""" + import geopandas as gpd + from pymapgis import counties + + gdf = counties(2022, "20m") + assert isinstance(gdf, gpd.GeoDataFrame) + # join key must be present + assert "GEOID" in gdf.columns + assert len(gdf) > 3000 # Should have all US counties