Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,19 @@ __pycache__
*.pyc
*.swp
*.log
build
build
dist
pyhera.egg-info/
venv
.vscode/
heraenv
.venv
.python-version

# --- local venv & noise ---
heraenv/
heraenv.bak/
**/__pycache__/
**/.ipynb_checkpoints/
.vscode/
.venv
.python-version
File renamed without changes.
64 changes: 64 additions & 0 deletions =0.5
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
Requirement already satisfied: numpy==2.2.2 in ./heraenv/lib/python3.12/site-packages (2.2.2)
Requirement already satisfied: scipy==1.14.1 in ./heraenv/lib/python3.12/site-packages (1.14.1)
Requirement already satisfied: pandas==2.3.2 in ./heraenv/lib/python3.12/site-packages (2.3.2)
Requirement already satisfied: matplotlib==3.9.2 in ./heraenv/lib/python3.12/site-packages (3.9.2)
Requirement already satisfied: dask==2024.8.0 in ./heraenv/lib/python3.12/site-packages (from dask[dataframe]==2024.8.0) (2024.8.0)
Requirement already satisfied: xarray==2024.5.0 in ./heraenv/lib/python3.12/site-packages (2024.5.0)
Requirement already satisfied: geopandas==0.14.4 in ./heraenv/lib/python3.12/site-packages (0.14.4)
Requirement already satisfied: rasterio==1.4.1 in ./heraenv/lib/python3.12/site-packages (1.4.1)
Requirement already satisfied: shapely==2.0.4 in ./heraenv/lib/python3.12/site-packages (2.0.4)
Requirement already satisfied: mongoengine==0.22.1 in ./heraenv/lib/python3.12/site-packages (0.22.1)
Requirement already satisfied: pymongo<4 in ./heraenv/lib/python3.12/site-packages (3.13.0)
Requirement already satisfied: vtk==9.5.2 in ./heraenv/lib/python3.12/site-packages (9.5.2)
Requirement already satisfied: netcdf4==1.7.1 in ./heraenv/lib/python3.12/site-packages (1.7.1)
Requirement already satisfied: geojson==2.5.0 in ./heraenv/lib/python3.12/site-packages (2.5.0)
Requirement already satisfied: fastparquet==2024.5.0 in ./heraenv/lib/python3.12/site-packages (2024.5.0)
Requirement already satisfied: seaborn==0.13.2 in ./heraenv/lib/python3.12/site-packages (0.13.2)
Requirement already satisfied: jinja2==3.1.6 in ./heraenv/lib/python3.12/site-packages (3.1.6)
Requirement already satisfied: testresources==2.0.1 in ./heraenv/lib/python3.12/site-packages (2.0.1)
Requirement already satisfied: unum==4.1.4 in ./heraenv/lib/python3.12/site-packages (4.1.4)
Requirement already satisfied: descartes==1.1.0 in ./heraenv/lib/python3.12/site-packages (1.1.0)
Requirement already satisfied: pyfoam==2020.5 in ./heraenv/lib/python3.12/site-packages (2020.5)
Requirement already satisfied: pint in ./heraenv/lib/python3.12/site-packages (0.25)
Requirement already satisfied: pint-pandas in ./heraenv/lib/python3.12/site-packages (0.7.1)
Requirement already satisfied: deprecated in ./heraenv/lib/python3.12/site-packages (1.2.18)
Requirement already satisfied: jsonpath-ng in ./heraenv/lib/python3.12/site-packages (1.7.0)
Requirement already satisfied: tqdm in ./heraenv/lib/python3.12/site-packages (4.67.1)
Requirement already satisfied: python-dateutil>=2.8.2 in ./heraenv/lib/python3.12/site-packages (from pandas==2.3.2) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in ./heraenv/lib/python3.12/site-packages (from pandas==2.3.2) (2025.2)
Requirement already satisfied: tzdata>=2022.7 in ./heraenv/lib/python3.12/site-packages (from pandas==2.3.2) (2025.2)
Requirement already satisfied: contourpy>=1.0.1 in ./heraenv/lib/python3.12/site-packages (from matplotlib==3.9.2) (1.3.3)
Requirement already satisfied: cycler>=0.10 in ./heraenv/lib/python3.12/site-packages (from matplotlib==3.9.2) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in ./heraenv/lib/python3.12/site-packages (from matplotlib==3.9.2) (4.60.0)
Requirement already satisfied: kiwisolver>=1.3.1 in ./heraenv/lib/python3.12/site-packages (from matplotlib==3.9.2) (1.4.9)
Requirement already satisfied: packaging>=20.0 in ./heraenv/lib/python3.12/site-packages (from matplotlib==3.9.2) (25.0)
Requirement already satisfied: pillow>=8 in ./heraenv/lib/python3.12/site-packages (from matplotlib==3.9.2) (11.3.0)
Requirement already satisfied: pyparsing>=2.3.1 in ./heraenv/lib/python3.12/site-packages (from matplotlib==3.9.2) (3.2.5)
Requirement already satisfied: click>=8.1 in ./heraenv/lib/python3.12/site-packages (from dask==2024.8.0->dask[dataframe]==2024.8.0) (8.3.0)
Requirement already satisfied: cloudpickle>=1.5.0 in ./heraenv/lib/python3.12/site-packages (from dask==2024.8.0->dask[dataframe]==2024.8.0) (3.1.1)
Requirement already satisfied: fsspec>=2021.09.0 in ./heraenv/lib/python3.12/site-packages (from dask==2024.8.0->dask[dataframe]==2024.8.0) (2025.9.0)
Requirement already satisfied: partd>=1.4.0 in ./heraenv/lib/python3.12/site-packages (from dask==2024.8.0->dask[dataframe]==2024.8.0) (1.4.2)
Requirement already satisfied: pyyaml>=5.3.1 in ./heraenv/lib/python3.12/site-packages (from dask==2024.8.0->dask[dataframe]==2024.8.0) (6.0.3)
Requirement already satisfied: toolz>=0.10.0 in ./heraenv/lib/python3.12/site-packages (from dask==2024.8.0->dask[dataframe]==2024.8.0) (1.0.0)
Requirement already satisfied: fiona>=1.8.21 in ./heraenv/lib/python3.12/site-packages (from geopandas==0.14.4) (1.10.1)
Requirement already satisfied: pyproj>=3.3.0 in ./heraenv/lib/python3.12/site-packages (from geopandas==0.14.4) (3.7.2)
Requirement already satisfied: affine in ./heraenv/lib/python3.12/site-packages (from rasterio==1.4.1) (2.4.0)
Requirement already satisfied: attrs in ./heraenv/lib/python3.12/site-packages (from rasterio==1.4.1) (25.3.0)
Requirement already satisfied: certifi in ./heraenv/lib/python3.12/site-packages (from rasterio==1.4.1) (2025.8.3)
Requirement already satisfied: cligj>=0.5 in ./heraenv/lib/python3.12/site-packages (from rasterio==1.4.1) (0.7.2)
Requirement already satisfied: click-plugins in ./heraenv/lib/python3.12/site-packages (from rasterio==1.4.1) (1.1.1.2)
Requirement already satisfied: cftime in ./heraenv/lib/python3.12/site-packages (from netcdf4==1.7.1) (1.6.4.post1)
Requirement already satisfied: cramjam>=2.3 in ./heraenv/lib/python3.12/site-packages (from fastparquet==2024.5.0) (2.11.0)
Requirement already satisfied: MarkupSafe>=2.0 in ./heraenv/lib/python3.12/site-packages (from jinja2==3.1.6) (3.0.3)
Requirement already satisfied: pbr>=1.8 in ./heraenv/lib/python3.12/site-packages (from testresources==2.0.1) (7.0.1)
Requirement already satisfied: dask-expr<1.2,>=1.1 in ./heraenv/lib/python3.12/site-packages (from dask[dataframe]==2024.8.0) (1.1.10)
Requirement already satisfied: pyarrow>=7.0.0 in ./heraenv/lib/python3.12/site-packages (from dask-expr<1.2,>=1.1->dask[dataframe]==2024.8.0) (21.0.0)
Requirement already satisfied: flexcache>=0.3 in ./heraenv/lib/python3.12/site-packages (from pint) (0.3)
Requirement already satisfied: flexparser>=0.4 in ./heraenv/lib/python3.12/site-packages (from pint) (0.4)
Requirement already satisfied: platformdirs>=2.1.0 in ./heraenv/lib/python3.12/site-packages (from pint) (4.4.0)
Requirement already satisfied: typing-extensions>=4.0.0 in ./heraenv/lib/python3.12/site-packages (from pint) (4.15.0)
Requirement already satisfied: wrapt<2,>=1.10 in ./heraenv/lib/python3.12/site-packages (from deprecated) (1.17.3)
Requirement already satisfied: ply in ./heraenv/lib/python3.12/site-packages (from jsonpath-ng) (3.11)
Requirement already satisfied: locket in ./heraenv/lib/python3.12/site-packages (from partd>=1.4.0->dask==2024.8.0->dask[dataframe]==2024.8.0) (1.0.0)
Requirement already satisfied: setuptools in ./heraenv/lib/python3.12/site-packages (from pbr>=1.8->testresources==2.0.1) (80.9.0)
Requirement already satisfied: six>=1.5 in ./heraenv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas==2.3.2) (1.17.0)
File renamed without changes.
File renamed without changes.
206 changes: 140 additions & 66 deletions hera/datalayer/datahandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,13 @@
import dask.dataframe
import xarray
import json

try:
import geopandas
except ImportError:
print("geopandas not installed, no support for gis data format")
try:
from osgeo import gdal
except ImportError:
print("gdal not installed, no support for shapefiles")

import geopandas
from osgeo import gdal
import matplotlib.image as mpimg
import sys
import pickle
import io
try:
import rasterio
except ImportError:
print("rasterio not installed, no support for image data types. ")

import rasterio
from hera.utils import loadJSON
import importlib
import os
Expand Down Expand Up @@ -753,84 +741,170 @@ class DataHandler_Class(object):
DataType: Class

resource:
Filesystem directory that contains the code for the class (will be added to sys.path).
If empty/None, nothing is added to sys.path.
If resource points directly to a Python package directory (contains __init__.py),
the parent directory is also added so that `import top_pkg.module` resolves.
Filesystem directory used to resolve imports:
- If 'resource' points to the *package directory itself* (contains __init__.py),
we add ONLY its parent directory to sys.path (so 'import mypkg.mymod' works).
- If 'resource' points to a *parent directory* that contains the package,
we add that parent directory to sys.path.
- If resource is empty/None, we do not modify sys.path.

desc:
- 'classpath' (str): fully qualified import path of the class,
e.g. 'mypkg.mymodule.MyClass' (REQUIRED).
- 'params' or 'parameters' (dict, optional): keyword-args for the class constructor.
- 'classpath' (str, REQUIRED): fully qualified class path, e.g. 'mypkg.mymod.MyClass'.
- 'params' or 'parameters' (dict, optional): kwargs for the class constructor.
- 'instantiate' (bool, optional; default True):
If True, return an instance (cls(**...)).
If False, return the class object (cls) itself.
If False, return the class object (cls).

Merge rule (Option B):
Merge rule:
When both desc.parameters and **kwargs provide the same key,
desc.parameters should take precedence (override kwargs).
desc.parameters take precedence (they override **kwargs).
"""

@staticmethod
def saveData(resource, fileName, **kwargs):
# Storing a "Class" datatype as a file is not supported by this handler.
# This handler is for dynamically loading Python classes; saving is not supported.
raise NotImplementedError("Saving a Class datatype is not supported")

@staticmethod
def getData(resource, desc=None, **kwargs):
"""
Load a Python class dynamically.

Behavior for 'resource':
- If 'resource' is a *package directory* (contains __init__.py), we must add
the *parent directory* to sys.path so that 'import top_pkg.module' resolves.
- If 'resource' is a directory that *contains* the top-level package, we add
that directory itself to sys.path.

Extra robustness:
- Move the chosen path to the *front* of sys.path (even if it already exists).
- Purge relevant entries from sys.modules before import (avoid stale cache).
- Invalidate import caches between attempts.
"""
import os
import sys
import importlib

# 1) Add search paths to sys.path:
# - If resource points to the package directory itself (contains __init__.py),
# also add its parent so that `import top_pkg...` resolves.
search_paths = []
if resource:
abs_path = os.path.abspath(resource)
if os.path.isdir(abs_path):
pkg_init = os.path.join(abs_path, "__init__.py")
if os.path.isfile(pkg_init):
parent = os.path.dirname(abs_path)
if parent not in sys.path:
search_paths.append(parent)
if abs_path not in sys.path:
search_paths.append(abs_path)
# Prepend for priority (keep user-provided paths before existing ones)
for pth in reversed(search_paths):
sys.path.insert(0, pth)

# 2) Resolve metadata
# ---------- 1) Resolve classpath ----------
desc = desc or {}
classpath = desc.get("classpath") or kwargs.get("classpath")
if not classpath:
raise ValueError('For dataFormat=Class you must provide desc["classpath"]')

params = desc.get("parameters") or desc.get("params") or {}
instantiate = desc.get("instantiate", True)

# 3) Import module and get class by name
module_name, _, class_name = classpath.rpartition(".")
if not module_name or not class_name:
raise ValueError(
f"Invalid classpath '{classpath}'. Expected something like 'pkg.mod.Class'."
)

try:
raise ValueError(f"Invalid classpath '{classpath}'. Expected 'pkg.mod.Class'.")
top_pkg = module_name.split(".", 1)[0]

# ---------- 2) sys.path manipulation helpers ----------
def move_to_front(path: str):
"""Ensure 'path' is at sys.path[0] (remove previous occurrence if any)."""
try:
while path in sys.path:
sys.path.remove(path)
except ValueError:
pass
sys.path.insert(0, path)

def ensure_parent_for_package_dir(pkg_dir: str):
"""resource == package dir: put its *parent* at sys.path[0]."""
parent = os.path.dirname(pkg_dir)
move_to_front(parent)

def ensure_parent_dir_contains_pkg(parent_dir: str):
"""resource == parent dir that contains the package."""
move_to_front(parent_dir)

def purge_modules():
"""Drop relevant modules from sys.modules to avoid stale imports."""
for mod in (top_pkg, module_name):
if mod in sys.modules:
del sys.modules[mod]

# ---------- 3) Decide which path to put on sys.path ----------
if resource:
abs_path = os.path.abspath(resource)
if os.path.isdir(abs_path):
pkg_init = os.path.join(abs_path, "__init__.py")
if os.path.isfile(pkg_init):
# Case A: resource is the *package directory*
ensure_parent_for_package_dir(abs_path)
else:
# Case B: resource is a parent dir that *may* contain the package
cand_pkg = os.path.join(abs_path, top_pkg)
if os.path.isdir(cand_pkg) and os.path.isfile(os.path.join(cand_pkg, "__init__.py")):
ensure_parent_dir_contains_pkg(abs_path)
else:
# Fallback: still add the given dir
ensure_parent_dir_contains_pkg(abs_path)

importlib.invalidate_caches()
purge_modules() # clear caches BEFORE first import attempt

# ---------- 4) Import module & get class (with one retry and debug) ----------
def import_and_get():
module = importlib.import_module(module_name)
except Exception as e:
raise ImportError(
f"Cannot import module '{module_name}' for classpath '{classpath}': {e}"
)
try:
return getattr(module, class_name)
except AttributeError:
raise ImportError(f"Module '{module_name}' has no attribute '{class_name}'")

try:
cls = getattr(module, class_name)
except AttributeError:
raise ImportError(f"Module '{module_name}' has no attribute '{class_name}'")

# 4) Merge constructor kwargs so that desc.parameters override duplicates (Option B)
call_kwargs = dict(kwargs) # baseline from **kwargs
call_kwargs.update(params) # desc.parameters WIN on duplicates
cls = import_and_get()
except Exception as e1:
# Retry once with a stronger path setup: ensure both parent and pkg dir exist at the front
debug_notes = []
try:
if resource:
abs_path = os.path.abspath(resource)
if os.path.isdir(abs_path):
pkg_init = os.path.join(abs_path, "__init__.py")
if os.path.isfile(pkg_init):
# Make sure BOTH parent and pkg dir are at the very front (parent first)
parent = os.path.dirname(abs_path)
move_to_front(parent)
move_to_front(abs_path) # harmless; parent is still first
else:
cand_pkg = os.path.join(abs_path, top_pkg)
if os.path.isdir(cand_pkg) and os.path.isfile(os.path.join(cand_pkg, "__init__.py")):
move_to_front(abs_path)
move_to_front(cand_pkg)
importlib.invalidate_caches()
purge_modules()
cls = import_and_get()
except Exception as e2:
# Build rich diagnostics
try:
head_sys_path = list(sys.path[:8])
except Exception:
head_sys_path = []
try:
parent_ls = []
pkg_ls = []
if resource:
ap = os.path.abspath(resource)
if os.path.isdir(ap):
parent = os.path.dirname(ap)
parent_ls = sorted(os.listdir(parent)) if os.path.isdir(parent) else []
pkg_ls = sorted(os.listdir(ap)) if os.path.isdir(ap) else []
debug_notes.append(f"sys.path(head)={head_sys_path}")
debug_notes.append(f"top_pkg={top_pkg} | module_name={module_name}")
debug_notes.append(f"resource={resource}")
debug_notes.append(f"parent_dir_ls={parent_ls[:20]}")
debug_notes.append(f"pkg_dir_ls={pkg_ls[:20]}")
except Exception:
pass
msg = (
f"Cannot import module '{module_name}' for classpath '{classpath}': {e2}\n"
+ "\n".join(debug_notes)
)
raise ImportError(msg) from e2

# ---------- 5) Build kwargs (desc.parameters override **kwargs) ----------
params = desc.get("parameters") or desc.get("params") or {}
instantiate = desc.get("instantiate", True)
call_kwargs = dict(kwargs)
call_kwargs.update(params)

# 5) Return an instance or the class object
# ---------- 6) Return instance or class ----------
return cls(**call_kwargs) if instantiate else cls
Loading