Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions podpac/core/authentication.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@

import requests
import traitlets as tl
from lazy_import import lazy_module, lazy_function
from lazy_import import lazy_module

from podpac.core.settings import settings
from podpac.core.utils import cached_property

# Optional dependencies
pydap_setup_session = lazy_function("pydap.cas.urs.setup_session")
# see pydap_source.py for import note
# pydap_setup_session = lazy_function("pydap.cas.urs.setup_session")
from pydap.cas.urs import setup_session as pydap_setup_session

_log = logging.getLogger(__name__)
_USERNAME_AT = "username@{}"
Expand Down
1 change: 0 additions & 1 deletion podpac/core/cache/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
make_cache_ctrl,
clear_cache,
cache_cleanup,
DiskCacheMixin,
)
from podpac.core.cache.ram_cache_store import RamCacheStore
from podpac.core.cache.disk_cache_store import DiskCacheStore
Expand Down
56 changes: 25 additions & 31 deletions podpac/core/cache/cache_ctrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@
from podpac.core.cache.disk_cache_store import DiskCacheStore
from podpac.core.cache.s3_cache_store import S3CacheStore
import traitlets as tl
import logging



_CACHE_STORES = {"ram": RamCacheStore, "disk": DiskCacheStore, "s3": S3CacheStore}

_CACHE_NAMES = {RamCacheStore: "ram", DiskCacheStore: "disk", S3CacheStore: "s3"}

_CACHE_MODES = ["ram", "disk", "network", "all"]
_CACHE_MODES = ["ram", "disk", "s3", "all"]

# Error messages used in 3 or more places
_INVALID_NODE = "Invalid node (must be of type Node, not '%s')"
Expand All @@ -24,6 +26,7 @@
_INVALID_MODE = "Invalid mode (must be one of %s, not '%s')"
_INVALID_ITEM_ASTERISK = "Invalid item ('*' is reserved)"

_logger = logging.getLogger(__name__)

def get_default_cache_ctrl():
"""
Expand Down Expand Up @@ -63,7 +66,15 @@ def make_cache_ctrl(names):
if name not in _CACHE_STORES:
raise ValueError("Unknown cache store type '%s', options are %s" % (name, list(_CACHE_STORES)))

return CacheCtrl([_CACHE_STORES[name]() for name in names])
cache_stores = []
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
cache_stores = []
# makes all requested cache stores and fails gracefully if one of the stores is unavailable
cache_stores = []

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got the approval notice before these suggestions and jumped the gun a bit. I'll get this into the branch that is still merging into main.

for name in names:
try:
cache_store = _CACHE_STORES[name]()
cache_stores.append(cache_store)
except Exception as e:
_logger.warning("Cannot create cache_store of type {} -- error={}".format(name, e))

return CacheCtrl(cache_stores)


def clear_cache(mode="all"):
Expand All @@ -73,10 +84,13 @@ def clear_cache(mode="all"):
Arguments
---------
mode : str
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'.
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 's3', 'all'. Default 'all'.
"""

cache_ctrl = get_default_cache_ctrl()
if mode == "all":
modes = _CACHE_STORES.keys()
else:
modes = [mode]
cache_ctrl = make_cache_ctrl(modes)
cache_ctrl.clear(mode=mode)


Expand Down Expand Up @@ -128,7 +142,7 @@ def _validate_args(node, item, coordinates, mode):
coordinates : :class:`podpac.Coordinates`, optional
Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output
mode : str
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'.
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 's3', 'all'. Default 'all'.
"""
if not isinstance(node, podpac.Node):
raise TypeError(_INVALID_NODE % type(node))
Expand Down Expand Up @@ -159,7 +173,7 @@ def put(self, node, data, item, coordinates=None, expires=None, mode="all", upda
coordinates : :class:`podpac.Coordinates`, optional
Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output
mode : str
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'.
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 's3', 'all'. Default 'all'.
expires : float, datetime, timedelta
Expiration date. If a timedelta is supplied, the expiration date will be calculated from the current time.
update : bool
Expand All @@ -182,7 +196,7 @@ def get(self, node, item, coordinates=None, mode="all"):
coordinates : :class:`podpac.Coordinates`, optional
Coordinates for which cached object should be retrieved, for coordinate-dependent data such as evaluation output
mode : str
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'.
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 's3', 'all'. Default 'all'.

Returns
-------
Expand Down Expand Up @@ -213,7 +227,7 @@ def has(self, node, item, coordinates=None, mode="all"):
coordinates: Coordinate, optional
Coordinates for which cached object should be checked
mode : str
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'.
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 's3', 'all'. Default 'all'.

Returns
-------
Expand All @@ -240,7 +254,7 @@ def rem(self, node, item, coordinates=None, mode="all"):
coordinates : :class:`podpac.Coordinates`, str
Delete only cached objects for these coordinates. Use `'*'` to match all coordinates.
mode : str
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'.
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 's3', 'all'. Default 'all'.
"""

if not isinstance(node, podpac.Node):
Expand Down Expand Up @@ -271,7 +285,7 @@ def clear(self, mode="all"):
Parameters
------------
mode : str
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 'network', 'all'. Default 'all'.
determines what types of the `CacheStore` are affected. Options: 'ram', 'disk', 's3', 'all'. Default 'all'.
"""

if mode not in _CACHE_MODES:
Expand All @@ -289,23 +303,3 @@ def cleanup(self):

for c in self._cache_stores:
c.cleanup()


# --------------------------------------------------------#
# Mixins
# --------------------------------------------------------#


class DiskCacheMixin(tl.HasTraits):
"""Mixin to add disk caching to the Node by default."""

property_cache_ctrl = tl.Instance(CacheCtrl, allow_none=True)

@tl.default("property_cache_ctrl")
def _property_cache_ctrl_default(self):
# get the default cache_ctrl and addd a disk cache store if necessary
default_ctrl = get_default_cache_ctrl()
stores = default_ctrl._cache_stores
if not any(isinstance(store, DiskCacheStore) for store in default_ctrl._cache_stores):
stores.append(DiskCacheStore())
return CacheCtrl(stores)
2 changes: 1 addition & 1 deletion podpac/core/cache/cache_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


class CacheStore(object):
"""Abstract parent class for classes representing actual data stores (e.g. RAM, local disk, network storage).
"""Abstract parent class for classes representing actual data stores (e.g. RAM, local disk, s3 storage).
Includes implementation of common hashing operations and call signature for required abstract methods:
put(), get(), rem(), has()
"""
Expand Down
8 changes: 6 additions & 2 deletions podpac/core/cache/test/test_node_caches.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@ def test_uid_no_definition(self):
assert not hash_cache_node._from_cache
o2 = hash_cache_node2.eval(coords)
assert hash_cache_node2._from_cache
o3 = hash_cache_node3.eval(coords)
assert not hash_cache_node3._from_cache
try:
o3 = hash_cache_node3.eval(coords)
assert not hash_cache_node3._from_cache
finally:
hash_cache_node3.rem_cache("*",coordinates="*")


def test_global_ram_cache(self):
my_node = SinCoords(cache_output=True)
Expand Down
2 changes: 1 addition & 1 deletion podpac/core/coordinates/test/test_coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ def test_alt_units(self):
assert c.alt_units in ["us-ft", "US survey foot"] # pyproj < 3.0 # pyproj >= 3.0

def test_create_from_uniform_coords(self):
Coordinates([[clinspace("2020-01-01T09:36", "2020-01-02T15:35", 8)]], [['time']])
Coordinates([clinspace("2020-01-01T09:36", "2020-01-02T15:35", 8)], [['time']])

class TestCoordinatesSerialization(object):
def test_definition(self):
Expand Down
6 changes: 3 additions & 3 deletions podpac/core/coordinates/test/test_coordinates_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,15 +621,15 @@ def test_add_custom_dimension():
# Unstacked coords, one dimension, nearest neighbor interpolation
c1 = podpac.Coordinates([[1, 2, 3]], ["mydim"])
c1_interp = podpac.Coordinates([[1.9, 2, 3]], ["mydim"])
n1 = podpac.data.Array(source=[1, 2, 3], coordinates=c1)
n1 = podpac.data.Array(source=[1, 2, 3], coordinates=c1).interpolate(interpolation="nearest")
data1 = n1.eval(c1_interp)
assert np.array_equal(data1.data, np.array([2, 2, 3]))
assert "mydim" in data1.dims

# Unstacked coords, one dimension, linear neighbor interpolation
c1 = podpac.Coordinates([[1, 2, 3]], ["mydim"])
c1_interp = podpac.Coordinates([[1.9, 2, 3]], ["mydim"])
n1 = podpac.data.Array(source=[1, 2, 3], coordinates=c1, interpolation="bilinear")
n1 = podpac.data.Array(source=[1, 2, 3], coordinates=c1).interpolate(interpolation="bilinear")
data1 = n1.eval(c1_interp)
assert np.array_equal(data1.data, np.array([1.9, 2, 3]))
assert "mydim" in data1.dims
Expand All @@ -655,7 +655,7 @@ def test_add_custom_dimension():
assert "mydim" in c3.udims
assert "lat" in c3.udims
c3_interp = podpac.Coordinates([[[1.9, 2, 3], [4.9, 5, 6]]], dims=["mydim_lat"])
n3 = podpac.data.Array(source=[1, 2, 3], coordinates=c3, interpolation="nearest")
n3 = podpac.data.Array(source=[1, 2, 3], coordinates=c3).interpolate(interpolation="nearest")
data3 = n3.eval(c3_interp)
assert np.array_equal(data3.data, np.array([2, 2, 3]))
assert "mydim_lat" in data3.dims
Expand Down
2 changes: 1 addition & 1 deletion podpac/core/data/csv_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def _default_outputs(self):
# -------------------------------------------------------------------------

def open_dataset(self, f):
return pd.read_csv(f, parse_dates=True, infer_datetime_format=True, header=self.header)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What was the problem with infer_datetime_format ? New pandas behavior? Seems like a potentially important flag...

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the version of pandas that was installed when pip installing podpac, that argument no longer existed.

I suspect me using python 3.11.12 for this effort may have caused me to do some unexpected "newer python" updating. This and the "zarr<3" in setup seem to both be python version upgrade-like changes.

return pd.read_csv(f, parse_dates=True, header=self.header)

@cached_property
def dims(self):
Expand Down
2 changes: 1 addition & 1 deletion podpac/core/data/datasource.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def _get_data(self, rc, rci):

"""
# get data from data source at requested source coordinates and requested source coordinates index
data = self.get_data(rc, rci)
data = deepcopy(self.get_data(rc, rci))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the deepcopy? that's rarely needed...

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This later line

        udata_array.data[np.isin(udata_array.data, self.nan_vals)] = self.nan_val

would get "read-only" permission errors if the original get_data was used for one of the unit tests.


# convert data into UnitsDataArray depending on format
# TODO: what other processing needs to happen here?
Expand Down
2 changes: 2 additions & 0 deletions podpac/core/data/h5py_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ class H5PY(FileKeysMixin, BaseFileSource):
file_mode = tl.Unicode(default_value="r").tag(readonly=True)
array_dims = tl.List(trait=tl.Unicode()).tag(readonly=True)

coordinate_index_type = "slice"

@cached_property
def dataset(self):
return h5py.File(self.source, self.file_mode)
Expand Down
18 changes: 12 additions & 6 deletions podpac/core/data/pydap_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,26 @@
import requests
from webob.exc import HTTPError

# Helper utility for optional imports
from lazy_import import lazy_module, lazy_class
# # Helper utility for optional imports
# from lazy_import import lazy_module, lazy_class

# Internal dependencies
from podpac.core import authentication
from podpac.core.utils import common_doc, cached_property
from podpac.core.data.datasource import COMMON_DATA_DOC, DataSource



# Optional dependencies
pydap = lazy_module("pydap")
lazy_module("pydap.client")
lazy_module("pydap.model")
# pydap = lazy_module("pydap")
# lazy_module("pydap.client")
# lazy_module("pydap.model")
# lazy_class("pydap.__spec__")

# Lazy loading was conflicting with xarray access of pyap.__spec__
import pydap
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just realized we can still make pydap an optional dependency using the pattern:

Suggested change
import pydap
try:
import pydap
import pydap.model
import pydap.client
except: ImportError
class Dum:
pass
pydap = Dum()
pydap.model =None
pydap.client = None

import pydap.model
import pydap.client



_logger = logging.getLogger(__name__)
Expand Down
4 changes: 0 additions & 4 deletions podpac/core/data/test/test_array.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,3 @@ def test_coordinates(self):
node = Array(source=self.data).interpolate()
with pytest.raises(tl.TraitError):
node.coordinates

def test_no_cache(self):
node = Array().interpolate()
assert len(node.source.cache_ctrl._cache_stores) == 0
2 changes: 1 addition & 1 deletion podpac/core/data/test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,4 @@ def test_extra_dimension_selection(self):
node = Dataset(source=self.source, data_key="data", selection={"day": 1})
assert np.all([d in ["lat", "lon"] for d in node.dims])
out = node.eval(node.coordinates)
np.testing.assert_array_equal(out, self.data[1].T)
np.testing.assert_array_equal(out, self.data[1])
10 changes: 3 additions & 7 deletions podpac/core/interpolation/interpolation_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,17 +737,13 @@ def _fix_coordinates_for_none_interp(self, eval_coordinates, source_coordinates)
covered_udims = []
for k in interpolator_queue:
# Keep the eval_coordinates for some dimensions
dims = (
source_coordinates.dims
if isinstance(interpolator_queue[k], NoneInterpolator)
else eval_coordinates.dims
)
for d in dims:
coords = source_coordinates if isinstance(interpolator_queue[k], NoneInterpolator) else eval_coordinates
for d in coords.dims:
ud = d.split("_")
for u in ud:
if u in k:
new_dims.append(d)
new_coords.append(eval_coordinates[d])
new_coords.append(coords[d])
covered_udims.extend(ud)
break
new_coordinates = Coordinates(new_coords, new_dims)
Expand Down
Loading
Loading