diff --git a/doc/source/nodes.md b/doc/source/nodes.md index 8d4ed5bc..56c2123c 100644 --- a/doc/source/nodes.md +++ b/doc/source/nodes.md @@ -141,6 +141,9 @@ output = node.execute(coords) You will also be able to set these tagged attrs in node definitions. + +The values of tagged attributes are preserved in node definitions, so it is important to tag all attributes that meaningfully contribute to a node's state as `attr=True`. + ## Serialization Any podpac Node can be saved, shared, and loaded using a JSON definition. This definition describes all of the nodes required to create and evaluate the final Node. diff --git a/podpac/core/data/datasource.py b/podpac/core/data/datasource.py index c714adcf..b28226aa 100644 --- a/podpac/core/data/datasource.py +++ b/podpac/core/data/datasource.py @@ -23,6 +23,8 @@ from podpac.core.node import Node from podpac.core.utils import common_doc, cached_property from podpac.core.node import COMMON_NODE_DOC +from podpac.core.interpolation.selector import Selector + log = logging.getLogger(__name__) @@ -221,7 +223,6 @@ def coordinates(self): else: nc = self.get_coordinates() self.set_trait("_coordinates", nc) - print("get_coordinates", nc) if self.cache_coordinates: self.put_property_cache(nc, "coordinates") return nc @@ -413,11 +414,17 @@ def _eval(self, coordinates, output=None, _selector=None): log.debug("Evaluating {} data source".format(self.__class__.__name__)) # Use the selector + if _selector is not None: (rsc, rsci) = _selector(self.coordinates, coordinates, index_type=self.coordinate_index_type) else: # get source coordinates that are within the requested coordinates bounds (rsc, rsci) = self.coordinates.intersect(coordinates, outer=True, return_index=True) + # make a nearest neighbor source to impose index_type restrictions + # use the original coords if there was no intersection + if rsc.size != 0: + temp_selector = Selector(method="nearest") + (rsc, rsci) = temp_selector.select(self.coordinates, rsc, index_type=self.coordinate_index_type) # if requested coordinates and coordinates do not intersect, shortcut with nan UnitsDataArary if rsc.size == 0: @@ -455,7 +462,6 @@ def _eval(self, coordinates, output=None, _selector=None): # get indexed boundary rsb = self._get_boundary(rsci) output.attrs["boundary_data"] = rsb - output.attrs["bounds"] = self.coordinates.bounds # save output to private for debugging if settings["DEBUG"]: @@ -468,6 +474,36 @@ def _eval(self, coordinates, output=None, _selector=None): return output + @common_doc(COMMON_DATA_DOC) + def create_output_array(self, coords, data=np.nan, attrs=None, outputs=None, **kwargs): + """ + Initialize an output data array. This adds `bounds` and `boundary_data` to the output attrs + + The `boundary_data` output.attrs is set to match this node's polygonal (i.e. non-rectangular) boundary. + For uniform grids, this expected to be an empty dictionary. + + Parameters + ---------- + coords : podpac.Coordinates + {arr_coords} + data : None, number, or array-like (optional) + {arr_init_type} + attrs : dict + Attributes to add to output -- UnitsDataArray.create uses the 'crs' portion contained in here + outputs : list[string], optional + Default is self.outputs. List of strings listing the outputs + **kwargs + {arr_kwargs} + + Returns + ------- + {arr_return} + """ + output = super().create_output_array(coords, data=data, attrs=attrs, outputs=outputs, **kwargs) + output.attrs["bounds"], _ = self.get_bounds(crs=output.attrs["crs"]) # this is the bounds of the full dataset + output.attrs["boundary_data"] = self.boundary # this is the bounding polygon of the nonuniform dataset + return output + def find_coordinates(self): """ Get the available coordinates for the Node. For a DataSource, this is just the coordinates. diff --git a/podpac/core/data/rasterio_source.py b/podpac/core/data/rasterio_source.py index 041df158..0a40ddc9 100644 --- a/podpac/core/data/rasterio_source.py +++ b/podpac/core/data/rasterio_source.py @@ -21,6 +21,7 @@ from podpac.core.data.datasource import COMMON_DATA_DOC, DATA_DOC from podpac.core.data.file_source import BaseFileSource from podpac.core.authentication import S3Mixin +from podpac.core.interpolation.selector import Selector _logger = logging.getLogger(__name__) @@ -172,10 +173,10 @@ def get_data(self, coordinates, coordinates_index): data.data.ravel()[:] = raster_data.ravel() return data - def _get_window_coords(self,coordinates,new_coords): - new_coords,slc = new_coords.intersect(coordinates,return_index=True,outer=True) - window = ((slc[0].start,slc[0].stop),(slc[1].start,slc[1].stop)) - return window,new_coords + def _get_window_coords(self, coordinates, new_coords): + new_coords, slc = new_coords.intersect(coordinates, return_index=True, outer=True) + window = ((slc[0].start, slc[0].stop), (slc[1].start, slc[1].stop)) + return window, new_coords def get_data_overviews(self, coordinates): # Figure out how much coarser the request is than the actual data @@ -219,11 +220,13 @@ def get_data_overviews(self, coordinates): try: # read data within coordinates_index window at the resolution of the overview # Rasterio will then automatically pull from the overview + new_coords = Coordinates.from_geotransform( dataset.transform.to_gdal(), dataset.shape, crs=self.coordinates.crs ) - window,new_coords = self._get_window_coords(coordinates,new_coords) + window, new_coords = self._get_window_coords(coordinates, new_coords) missing_coords = self.coordinates.drop(["lat", "lon"]) + new_coords = merge_dims([new_coords, missing_coords]) new_coords = new_coords.transpose(*self.coordinates.dims) coordinates_shape = new_coords.shape[:2] @@ -322,4 +325,3 @@ def get_band_numbers(self, key, value): matches = np.nonzero(match)[0] + 1 return matches - diff --git a/podpac/core/interpolation/selector.py b/podpac/core/interpolation/selector.py index 8c3f7f6a..f39e0962 100755 --- a/podpac/core/interpolation/selector.py +++ b/podpac/core/interpolation/selector.py @@ -76,6 +76,7 @@ def __init__(self, method=None): else: self.method = method + def select(self, source_coords, request_coords, index_type="numpy"): """Sub-selects the source_coords based on the request_coords diff --git a/podpac/core/test/test_utils.py b/podpac/core/test/test_utils.py index de462950..46a653a8 100644 --- a/podpac/core/test/test_utils.py +++ b/podpac/core/test/test_utils.py @@ -13,6 +13,8 @@ import pandas as pd import xarray as xr import traitlets as tl +from requests import ConnectionError +from unittest.mock import MagicMock, patch import podpac from podpac.core.utils import common_doc @@ -24,6 +26,8 @@ from podpac.core.utils import ind2slice from podpac.core.utils import probe_node from podpac.core.utils import align_xarray_dict +from podpac.core.utils import _get_param +from podpac.core.utils import _get_from_url class TestCommonDocs(object): @@ -473,6 +477,9 @@ def test_nontuple(self): assert ind2slice([False, True, True, False, True, False]) == slice(1, 5) assert ind2slice([1, 3, 5]) == slice(1, 7, 2) + def test_empty_slice(self): + assert ind2slice([]) == slice(0, 0) + class AnotherOne(podpac.algorithm.Algorithm): def algorithm(self, inputs, coordinates): @@ -797,3 +804,78 @@ def test_align_xarray_dict(): assert(np.all(inputs['B'].data==data_b)) assert(np.all(inputs['C'].data==data_c)) assert(np.all((inputs['A'] + inputs['B'] + inputs['C']).shape == inputs['A'].shape)) + + +class TestGetParam: + def test_key_in_params_not_a_list(self): + params = {"test_key": 0} + ret = _get_param(params, "test_key") + assert ret == 0 + + def test_key_in_params_list(self): + params = {"test_key": [4, 5, 3, 0]} + ret = _get_param(params, "test_key") + assert ret == 4 + + def test_key_not_in_params_upper_in_params(self): + params = {"TEST_KEY": 0} + ret = _get_param(params, "test_key") + assert ret == 0 + + def test_key_not_in_params_upper_not_in_params(self): + params = {"test_key": 0} + ret = _get_param(params, "not_test_key") + assert ret is None + + +class TestGetFromUrl: + def test_raise_requests_error(self): + mock_requests = MagicMock() + mock_requests.get.side_effect = ConnectionError("Test Connection Error") + + with patch("podpac.core.utils.requests", mock_requests): + ret = _get_from_url("TEST/URL", None) + assert ret is None + + def test_raise_runtime_error(self): + mock_requests = MagicMock() + mock_requests.get.side_effect = RuntimeError("Test Runtime Error") + + with patch("podpac.core.utils.requests", mock_requests): + ret = _get_from_url("TEST/URL", None) + assert ret is None + + def test_session_is_none(self): + mock_get_return = MagicMock() + mock_get_return.status_code = 200 + mock_get_return.validation_value = "Expected Return" + mock_requests = MagicMock() + mock_requests.get.return_value = mock_get_return + + with patch("podpac.core.utils.requests", mock_requests): + ret = _get_from_url("TEST/URL", None) + + assert ret.validation_value == "Expected Return" + + def test_session_is_not_none(self): + mock_get_return = MagicMock() + mock_get_return.status_code = 200 + mock_get_return.validation_value = "Expected Return" + mock_session = MagicMock() + mock_session.get.return_value = mock_get_return + + ret = _get_from_url("TEST/URL", mock_session) + + assert ret.validation_value == "Expected Return" + + def test_status_code_not_200(self): + mock_get_return = MagicMock() + mock_get_return.status_code = 000 + mock_get_return.validation_value = "Expected Return" + mock_requests = MagicMock() + mock_requests.get.return_value = mock_get_return + + with patch("podpac.core.utils.requests", mock_requests): + ret = _get_from_url("TEST/URL", None) + + assert ret.validation_value == "Expected Return" diff --git a/podpac/core/utils.py b/podpac/core/utils.py index 3a0ecff0..c56d735c 100644 --- a/podpac/core/utils.py +++ b/podpac/core/utils.py @@ -347,6 +347,7 @@ def _get_from_url(url, session=None): r = None except RuntimeError as e: _log.warning("Cannot authenticate to {}. Check credentials. Error was as follows:".format(url) + str(e)) + r = None return r @@ -515,28 +516,49 @@ def _get_entry(key, out, definition): entry = OrderedDict() entry["name"] = out[key]["name"] entry["value"] = str(out[key]["value"]) - if out[key]["units"] not in [None, ""]: - entry["value"] = entry["value"] + " " + str(out[key]["units"]) + entry['label'] = out[key]['label'] entry["active"] = out[key]["active"] - entry["node_id"] = out[key]["node_hash"] + entry['node_class'] = out[key]['node_class'] + if 'node_hash' in out[key]: + entry["node_id"] = out[key]["node_hash"] entry["params"] = {} entry["inputs"] = {"inputs": [_get_entry(inp, out, definition) for inp in out[key]["inputs"]]} if len(entry["inputs"]["inputs"]) == 0: entry["inputs"] = {} return entry - -def _format_value(value, style, add_enumeration_labels): - """Helper for probe_node().""" + + +def _get_label(value, style, add_enumeration_labels): + """Helper for probe_node(). Handles both enumerations and units to be given back to the label field + + If no enumeration_legend is detected in style, or the user opts out of enumeration labels + with add_enumeration_labels = False, then units are returned. + Else, an enumeration label is determined, defaulting to "unknown" in error cases + """ if not add_enumeration_labels or style.enumeration_legend is None: - return value - if np.isnan(value): - return str(value) + " (unknown)" - try: - return str(int(value)) + " ({})".format(style.enumeration_legend[int(value)]) - except ValueError: - return str(value) + " (unknown)" - -def probe_node(node, lat=None, lon=None, time=None, alt=None, crs=None, nested=False, add_enumeration_labels=True): + return style.units + if isinstance(value, list): # all list returns should be 2-D + ret = '' + for v in np.unique(value): + try: + new_label = style.enumeration_legend[int(v)] + except ValueError: + _log.warning( + 'Enumeration label lookup failed for node of name {}, returning unknown'.format(style.name) + ) + new_label = 'unknown' + ret += '{}={}, '.format(v, new_label) + return ret[:-2] + else: + if np.isnan(value): + return 'unknown' + try: + return str(style.enumeration_legend[int(value)]) + except ValueError: + _log.warning('Enumeration label lookup failed for node of name {}, returning unknown'.format(style.name)) + return 'unknown' + +def probe_node(node, lat=None, lon=None, time=None, alt=None, crs=None, nested=False, add_enumeration_labels=True, compute_hash=True): """Evaluates every part of a node / pipeline at a point and records which nodes are actively being used. @@ -595,12 +617,14 @@ def probe_node(node, lat=None, lon=None, time=None, alt=None, crs=None, nested=F active = True out[item] = { "active": active, - "value": _format_value(value, n.style, add_enumeration_labels), - "units": n.style.units, + "value": value, + "label": _get_label(value, n.style, add_enumeration_labels), "inputs": inputs, "name": n.style.name if n.style.name else item, - "node_hash": n.hash, + "node_class": type(n).__name__ } + if compute_hash: + out[item]['node_hash'] = n.hash raw_values[item] = value # Fix sources for Compositors if isinstance(n, podpac.compositor.OrderedCompositor): diff --git a/setup.py b/setup.py index e8bb9309..78aaf6db 100644 --- a/setup.py +++ b/setup.py @@ -71,7 +71,7 @@ "coveralls>=1.3", "six>=1.0", "attrs>=17.4.0", - "pre_commit>=1", + "pre_commit>=1" ], }