Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions quest/api/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .. import static
from ..plugins import load_providers, load_plugins, list_plugins
from ..database.database import get_db, db_session, select_datasets
import json


@add_async
Expand Down Expand Up @@ -374,16 +375,30 @@ def open_dataset(dataset, fmt=None, **kwargs):
m = get_metadata(dataset).get(dataset)
file_format = m.get('file_format')
path = m.get('file_path')
intake_plugin = m.get('intake_plugin')

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that the class that loads data for intake is now usually called a "driver" - there are many types of plugins. https://intake.readthedocs.io/en/latest/glossary.html

required = []
args = m.get('intake_args')
if len(args.strip()):
# Get args from json if available
required = json.loads(m.get('intake_args'))
else:
raise ValueError('No intake plugin found')

if path is None:
raise ValueError('No dataset file found')

if file_format not in list_plugins(static.PluginType.IO):
raise ValueError('No reader available for: %s' % file_format)

io = load_plugins(static.PluginType.IO, file_format)[file_format]
return io.open(path, fmt=fmt, **kwargs)
# Use intake plugin to open
if intake_plugin:
# New code, with 'intake_plugin' added to the local .db
plugin_name = 'open_' + intake_plugin

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems somewhat fragile.
You could look directly in the registry

import intake
cls = intake.registry[intake_plugin]
source = cls(*args, **kwargs)

or, perhaps better, you could construct either the relavant YAML block or a intake.catalog.local.LocalCatalogEntry instance, and have Intake do the lookup for you.

Note that in the Intake world, the driver here could be something like "parquet", but it can also be the fully-qualified class name like "intake_parquet.ParquetSource". Of course, if you have additional constrains within Quest, that's fine.


module = __import__('intake')
func = getattr(module, plugin_name)
source = func(*required, **kwargs)
return source.read()

@add_async
def visualize_dataset(dataset, update_cache=False, **kwargs):
Expand Down
2 changes: 2 additions & 0 deletions quest/database/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class Dataset(db.Entity):
message = orm.Optional(str)
file_path = orm.Optional(str, nullable=True)
visualization_path = orm.Optional(str)
intake_plugin = orm.Optional(str)
intake_args = orm.Optional(str)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A JSON representation of arguments, correct? If these are stored as strings, would it make sense to use the same YAML spec used by Intake text-file catalogs?


# setup relationships
collection = orm.Required(Collection)
Expand Down
5 changes: 4 additions & 1 deletion quest_provider_plugins/noaa_coastwatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import param
import pandas as pd
from urllib.error import HTTPError
import json
from urllib.parse import quote, urlencode

from quest.util.log import logger
Expand Down Expand Up @@ -38,7 +39,7 @@ def parameters(self):
def search_catalog(self, **kwargs):
raise NotImplementedError()
# TODO drop duplicates?

@property
def catalog_id(self):
return self._catalog_id
Expand Down Expand Up @@ -91,6 +92,8 @@ def download(self, catalog_id, file_path, dataset, **kwargs):
'file_path': file_path,
'file_format': 'timeseries-hdf5',
'datatype': DataType.TIMESERIES,
'intake_plugin': 'quest_timeseries_hdf5',

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

E241 multiple spaces after ':'

'intake_args': json.dumps([file_path]),
'parameter': p.parameter,
'unit': units[self.parameter_code],
'service_id': 'svc://noaa:{}/{}'.format(self.service_name, catalog_id)
Expand Down
6 changes: 4 additions & 2 deletions quest_provider_plugins/noaa_ncdc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import json

import param
import pandas as pd
Expand All @@ -7,7 +8,6 @@
from quest.static import ServiceType, GeomType, DataType
from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins


BASE_PATH = 'ncdc'


Expand All @@ -26,7 +26,7 @@ def metadata(self):
'geographical_areas': self.geographical_areas,
'bounding_boxes': self.bounding_boxes
}

@property
def parameters(self):
return {
Expand Down Expand Up @@ -95,6 +95,8 @@ def download(self, catalog_id, file_path, dataset, **kwargs):
'file_path': file_path,
'file_format': 'timeseries-hdf5',
'datatype': DataType.TIMESERIES,
'intake_plugin': 'quest_timeseries_hdf5',
'intake_args': json.dumps([file_path]),
'parameter': self.parameter,
'unit': self._unit_map[self.parameter],
'service_id': 'svc://ncdc:{}/{}'.format(self.service_name, catalog_id)
Expand Down
14 changes: 14 additions & 0 deletions quest_provider_plugins/usgs_ned.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
import os
import json

from ulmo.usgs import ned

from quest import util
from quest.static import ServiceType, DataType, GeomType
from quest.plugins import ProviderBase, SingleFileServiceBase


from ulmo.usgs import ned

Expand All @@ -22,6 +30,12 @@ class UsgsNedServiceBase(SingleFileServiceBase):
'elevation': 'elevation'
}

def download(self, catalog_id, file_path, dataset, **kwargs):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not know the context here, but you should be aware that Intake also has the ability to download source data files on first use https://intake.readthedocs.io/en/latest/catalog.html#caching-source-files-locally

# Call the base to download, but then update the dictionary with intake info
metadata = super().download(catalog_id, file_path, dataset, **kwargs)
metadata.update({'intake_plugin': 'rasterio', 'intake_args': json.dumps([file_path, {}])})
return metadata

def search_catalog(self, **kwargs):
service = self._description
catalog_entries = util.to_geodataframe(
Expand Down
7 changes: 7 additions & 0 deletions quest_provider_plugins/usgs_nlcd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import requests
import json
import pandas as pd

from quest import util
Expand All @@ -17,6 +18,12 @@ class UsgsNlcdServiceBase(SingleFileServiceBase):
'landcover': 'landcover'
}

def download(self, catalog_id, file_path, dataset, **kwargs):
# Call the base to download, but then update the dictionary with intake info
metadata = super().download(catalog_id, file_path, dataset, **kwargs)
metadata.update({'intake_plugin': 'rasterio', 'intake_args': json.dumps([file_path, {}])})
return metadata

def search_catalog(self, **kwargs):
base_url = 'https://www.sciencebase.gov/catalog/items'
params = [
Expand Down
3 changes: 3 additions & 0 deletions quest_provider_plugins/usgs_nwis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from quest import util
from quest.static import ServiceType, GeomType, DataType
import json
from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins


Expand Down Expand Up @@ -70,6 +71,8 @@ def download(self, catalog_id, file_path, dataset, **kwargs):
'name': dataset,
'metadata': data,
'file_path': file_path,
'intake_plugin': 'quest_timeseries_hdf5',
'intake_args': json.dumps([file_path]),
'file_format': 'timeseries-hdf5',
'datatype': DataType.TIMESERIES,
'parameter': parameter,
Expand Down
3 changes: 3 additions & 0 deletions quest_provider_plugins/wmts_imagery.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import param
import requests
import rasterio
import json
from imageio import imread
from quest.static import ServiceType
from requests.packages.urllib3.util.retry import Retry
Expand Down Expand Up @@ -92,6 +93,8 @@ def download(self, catalog_id, file_path, dataset, **kwargs):
'metadata': {'bbox': adjusted_bbox},
'file_path': file_path,
'file_format': 'raster-gdal',
'intake_plugin': 'rasterio',
'intake_args': json.dumps([file_path, {}]),
'datatype': 'image',
}

Expand Down
17 changes: 10 additions & 7 deletions quest_tool_plugins/raster/rst_base.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import json
import rasterio

from quest import util
from quest.plugins import ToolBase
from quest.api import get_metadata
from quest.api import get_metadata, update_metadata
from quest.static import DataType, UriType

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

F401 'quest.static.UriType' imported but unused



Expand Down Expand Up @@ -37,18 +38,20 @@ def _run_tool(self):
"width": out_image.shape[1],
"transform": None})

new_dset, file_path, catalog_entry = self._create_new_dataset(
old_dataset=dataset,
ext='.tif'
)

new_metadata = {
'parameter': orig_metadata['parameter'],
'datatype': orig_metadata['datatype'],
'file_format': orig_metadata['file_format'],
'intake_plugin': orig_metadata['intake_plugin'],
'intake_args': json.dumps([file_path, {}]),
'unit': orig_metadata['unit']
}

new_dset, file_path, catalog_entry = self._create_new_dataset(
old_dataset=dataset,
ext='.tif',
dataset_metadata=new_metadata,
)
update_metadata(new_dset, quest_metadata=new_metadata)

with rasterio.open(file_path, "w", **out_meta) as dest:
dest.write(out_image)
Expand Down
15 changes: 9 additions & 6 deletions quest_tool_plugins/raster/rst_merge.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import param
import rasterio
import rasterio.mask
Expand Down Expand Up @@ -42,18 +43,20 @@ def _run_tool(self):
if get_metadata(dataset)[dataset]['unit'] != orig_metadata['unit']:
raise ValueError('Units must match for all datasets')

new_dset, file_path, catalog_entry = self._create_new_dataset(
old_dataset=datasets[0],
ext='.tif'
)

new_metadata = {
'parameter': orig_metadata['parameter'],
'datatype': orig_metadata['datatype'],
'file_format': orig_metadata['file_format'],
'intake_plugin': orig_metadata['intake_plugin'],
'intake_args': json.dumps([file_path, {}]),
'unit': orig_metadata['unit'],
}

new_dset, file_path, catalog_entry = self._create_new_dataset(
old_dataset=datasets[0],
ext='.tif',
dataset_metadata=new_metadata,
)
update_metadata(new_dset, quest_metadata=new_metadata)

open_datasets = [rasterio.open(d) for d in raster_files]
profile = open_datasets[0].profile
Expand Down
15 changes: 9 additions & 6 deletions quest_tool_plugins/raster/rst_reprojection.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import param
import rasterio
import subprocess
Expand Down Expand Up @@ -35,17 +36,19 @@ def _run_tool(self):

dst_crs = self.new_crs

new_dset, file_path, catalog_entry = self._create_new_dataset(
old_dataset=dataset,
ext='.tif'
)

new_metadata = {
'parameter': orig_metadata['parameter'],
'datatype': orig_metadata['datatype'],
'file_format': orig_metadata['file_format'],
'intake_plugin': orig_metadata['intake_plugin'],
'intake_args': json.dumps([file_path, {}]),
}

new_dset, file_path, catalog_entry = self._create_new_dataset(
old_dataset=dataset,
ext='.tif',
dataset_metadata=new_metadata,
)
update_metadata(new_dset, quest_metadata=new_metadata)

# run filter
with rasterio.open(src_path) as src:
Expand Down
17 changes: 10 additions & 7 deletions quest_tool_plugins/timeseries/ts_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from quest import util
from quest.plugins import ToolBase
from quest.api import get_metadata
from quest.api import get_metadata, update_metadata
from quest.plugins import load_plugins
from quest.static import UriType, DataType

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

F401 'quest.static.UriType' imported but unused


Expand Down Expand Up @@ -35,18 +36,20 @@ def _run_tool(self):


# setup new dataset
new_dset, file_path, catalog_entry = self._create_new_dataset(
old_dataset=dataset,
ext='.h5'
)

new_metadata = {
'parameter': new_df.metadata.get('parameter'),
'unit': new_df.metadata.get('unit'),
'datatype': orig_metadata['datatype'],
'file_format': orig_metadata['file_format'],
'intake_plugin': orig_metadata['intake_plugin'],
'intake_args': json.dumps([file_path]),
}

new_dset, file_path, catalog_entry = self._create_new_dataset(
old_dataset=dataset,
ext='.h5',
dataset_metadata=new_metadata,
)
update_metadata(new_dset, quest_metadata=new_metadata)

# save dataframe
io.write(file_path, new_df, new_metadata)
Expand Down
18 changes: 11 additions & 7 deletions quest_tool_plugins/timeseries/ts_flow_duration.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from quest import util
from quest.plugins import ToolBase
from quest.api import get_metadata
from quest.api import get_metadata, update_metadata
from quest.static import DataType, UriType

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

F401 'quest.static.UriType' imported but unused

from quest.plugins import load_plugins
from quest.util import setattr_on_dataframe
Expand Down Expand Up @@ -43,19 +44,22 @@ def _run_tool(self):
setattr_on_dataframe(df, 'metadata', metadata)
new_df = df
# setup new dataset

new_dset, file_path, catalog_entry = self._create_new_dataset(
old_dataset=dataset,
ext='.h5',
)

new_metadata = {
'parameter': new_df.metadata.get('parameter'),
'datatype': orig_metadata['datatype'],
'options': self.set_options,
'file_format': orig_metadata['file_format'],
'unit': new_df.metadata.get('unit'),
'intake_plugin': orig_metadata['intake_plugin'],
'intake_args': json.dumps([file_path]),
}

new_dset, file_path, catalog_entry = self._create_new_dataset(
old_dataset=dataset,
ext='.h5',
dataset_metadata=new_metadata,
)
update_metadata(new_dset, quest_metadata=new_metadata)

# save dataframe
output = load_plugins('io', 'xy-hdf5')['xy-hdf5']
Expand Down
Loading