From 2a0b968adf816745826d962a10efc8b888c3842f Mon Sep 17 00:00:00 2001 From: dgallup Date: Wed, 14 Nov 2018 10:26:11 -0700 Subject: [PATCH 01/12] Replaces built in plugins for loading with intake plugins: intake_questgdal, intake_questhdf5 --- quest/api/datasets.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/quest/api/datasets.py b/quest/api/datasets.py index d29e9681..91d06c4f 100644 --- a/quest/api/datasets.py +++ b/quest/api/datasets.py @@ -379,8 +379,12 @@ def open_dataset(dataset, fmt=None, **kwargs): if file_format not in list_plugins('io'): raise ValueError('No reader available for: %s' % file_format) - io = load_plugins('io', file_format)[file_format] - return io.open(path, fmt=fmt, **kwargs) + # Use the quest_xyHdf5, quest_timeseries_hdf5, and quest_raster_gdal intake plugins to open + plugin_name = 'open_quest_' + file_format + module = __import__('intake') + func = getattr(module, plugin_name) + source = func(path, fmt=fmt, **kwargs) + return source.read() @add_async From 058179d880c7b5fa6183144235370353a48a7f4c Mon Sep 17 00:00:00 2001 From: dgallup Date: Wed, 14 Nov 2018 10:37:10 -0700 Subject: [PATCH 02/12] Replaces built in plugins for loading with intake plugins: intake_questgdal, intake_questhdf5 --- quest/api/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quest/api/datasets.py b/quest/api/datasets.py index 91d06c4f..81d568bf 100644 --- a/quest/api/datasets.py +++ b/quest/api/datasets.py @@ -379,7 +379,7 @@ def open_dataset(dataset, fmt=None, **kwargs): if file_format not in list_plugins('io'): raise ValueError('No reader available for: %s' % file_format) - # Use the quest_xyHdf5, quest_timeseries_hdf5, and quest_raster_gdal intake plugins to open + # Use quest_xyHdf5, quest_timeseries_hdf5, and quest_raster_gdal intake plugins to open plugin_name = 'open_quest_' + file_format module = __import__('intake') func = getattr(module, plugin_name) From 0b226c77cbae56d3e552cf8d00a7b58e4001dbe0 Mon Sep 17 00:00:00 2001 From: douggallup Date: Fri, 16 Nov 2018 09:44:29 -0700 Subject: [PATCH 03/12] Replaces built in plugins for loading with intake plugins: intake_questgdal, intake_questhdf5 --- quest/api/datasets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/quest/api/datasets.py b/quest/api/datasets.py index 81d568bf..61b20b4e 100644 --- a/quest/api/datasets.py +++ b/quest/api/datasets.py @@ -380,6 +380,7 @@ def open_dataset(dataset, fmt=None, **kwargs): raise ValueError('No reader available for: %s' % file_format) # Use quest_xyHdf5, quest_timeseries_hdf5, and quest_raster_gdal intake plugins to open + # Temp plugin_name = 'open_quest_' + file_format module = __import__('intake') func = getattr(module, plugin_name) From 239f6bc33f4ff43cdef9de9ec09993f0fc427c53 Mon Sep 17 00:00:00 2001 From: douggallup Date: Mon, 19 Nov 2018 10:31:19 -0700 Subject: [PATCH 04/12] Replaces built in plugins for loading with intake plugins: intake_questgdal, intake_questhdf5 --- quest/api/datasets.py | 27 ++++++++++++++++-- quest/database/database.py | 2 ++ quest_provider_plugins/noaa_coastwatch.py | 5 +++- quest_provider_plugins/noaa_ncdc.py | 5 +++- quest_provider_plugins/usgs_ned.py | 3 ++ quest_provider_plugins/usgs_nlcd.py | 3 ++ quest_provider_plugins/usgs_nwis.py | 3 ++ quest_provider_plugins/wmts_imagery.py | 3 ++ quest_tool_plugins/raster/rst_base.py | 2 ++ quest_tool_plugins/raster/rst_merge.py | 2 ++ quest_tool_plugins/raster/rst_reprojection.py | 2 ++ quest_tool_plugins/timeseries/ts_base.py | 2 ++ .../timeseries/ts_flow_duration.py | 2 ++ .../whitebox/whitebox_watershed.py | 8 ++++++ .../projects_template/default/metadata.db | Bin 49152 -> 57344 bytes .../projects_template/project1/metadata.db | Bin 49152 -> 57344 bytes .../projects_template/project2/metadata.db | Bin 49152 -> 57344 bytes .../projects_template/test_data/metadata.db | Bin 49152 -> 57344 bytes 18 files changed, 64 insertions(+), 5 deletions(-) diff --git a/quest/api/datasets.py b/quest/api/datasets.py index 61b20b4e..648b11e4 100644 --- a/quest/api/datasets.py +++ b/quest/api/datasets.py @@ -9,6 +9,7 @@ import pandas as pd import param import os +import json @add_async @@ -372,6 +373,13 @@ def open_dataset(dataset, fmt=None, **kwargs): m = get_metadata(dataset).get(dataset) file_format = m.get('file_format') path = m.get('file_path') + intake_plugin = m.get('intake_plugin') + required = [] + args = m.get('intake_args') + if args is not None: + print('DEBUG: raw args in open_dataset() = {}'.format(args)) + required = json.loads(m.get('intake_args')) + print('DEBUG: raw args, after json.loads in open_dataset() = {}'.format(required)) if path is None: raise ValueError('No dataset file found') @@ -380,11 +388,24 @@ def open_dataset(dataset, fmt=None, **kwargs): raise ValueError('No reader available for: %s' % file_format) # Use quest_xyHdf5, quest_timeseries_hdf5, and quest_raster_gdal intake plugins to open - # Temp - plugin_name = 'open_quest_' + file_format + if intake_plugin: + # New code, with 'intake_plugin' added to the local .db + plugin_name = 'open_' + intake_plugin + print('Opening with dynamic intake using plugin %s' % plugin_name) + else: + # Old code, hard coded + plugin_name = 'open_quest_' + file_format.replace('-', '_') + print('Opening with static intake using plugin %s' % plugin_name) module = __import__('intake') func = getattr(module, plugin_name) - source = func(path, fmt=fmt, **kwargs) + if intake_plugin: + # New code, with 'intake_plugin' added to the local .db + print('Calling function with required args %s' % required) + source = func(*required, **kwargs) + else: + # Old code, hard coded + print('Calling old function with path %s' % path) + source = func(path, fmt=fmt, **kwargs) return source.read() diff --git a/quest/database/database.py b/quest/database/database.py index f5a8df4b..54284dd9 100644 --- a/quest/database/database.py +++ b/quest/database/database.py @@ -45,6 +45,8 @@ class Dataset(db.Entity): message = orm.Optional(str) file_path = orm.Optional(str, nullable=True) visualization_path = orm.Optional(str) + intake_plugin = orm.Optional(str) + intake_args = orm.Optional(str) # setup relationships collection = orm.Required(Collection) diff --git a/quest_provider_plugins/noaa_coastwatch.py b/quest_provider_plugins/noaa_coastwatch.py index 405950a8..f56db770 100644 --- a/quest_provider_plugins/noaa_coastwatch.py +++ b/quest_provider_plugins/noaa_coastwatch.py @@ -5,6 +5,7 @@ import param from urllib.parse import quote, urlencode from urllib.error import HTTPError +import json from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins from quest.util.log import logger @@ -38,7 +39,7 @@ def parameters(self): def search_catalog(self, **kwargs): raise NotImplementedError() # TODO drop duplicates? - + @property def catalog_id(self): return self._catalog_id @@ -90,6 +91,8 @@ def download(self, catalog_id, file_path, dataset, **kwargs): metadata = { 'file_path': file_path, 'file_format': 'timeseries-hdf5', + 'intake_plugin': 'quest_timeseries_hdf5', + 'intake_args': json.dumps([file_path]), 'datatype': 'timeseries', 'parameter': p.parameter, 'unit': units[self.parameter_code], diff --git a/quest_provider_plugins/noaa_ncdc.py b/quest_provider_plugins/noaa_ncdc.py index a14b08dc..e8295e13 100644 --- a/quest_provider_plugins/noaa_ncdc.py +++ b/quest_provider_plugins/noaa_ncdc.py @@ -3,6 +3,7 @@ import pandas as pd import param import os +import json # from ulmo.ncdc.ghcn_daily.core import _get_inventory as _get_ghcn_inventory @@ -24,7 +25,7 @@ def metadata(self): 'geographical_areas': self.geographical_areas, 'bounding_boxes': self.bounding_boxes } - + @property def parameters(self): return { @@ -92,6 +93,8 @@ def download(self, catalog_id, file_path, dataset, **kwargs): metadata = { 'file_path': file_path, 'file_format': 'timeseries-hdf5', + 'intake_plugin': 'quest_timeseries_hdf5', + 'intake_args': json.dumps([file_path]), 'datatype': 'timeseries', 'parameter': self.parameter, 'unit': self._unit_map[self.parameter], diff --git a/quest_provider_plugins/usgs_ned.py b/quest_provider_plugins/usgs_ned.py index f2370183..1a1e80e3 100644 --- a/quest_provider_plugins/usgs_ned.py +++ b/quest_provider_plugins/usgs_ned.py @@ -2,6 +2,7 @@ from quest import util from ulmo.usgs import ned import os +import json DEFAULT_FILE_PATH = os.path.join('usgs','ned') CACHE_FILE = 'ned_%s_metadata.json' @@ -32,6 +33,8 @@ def search_catalog(self, **kwargs): lambda x: {'download_url': x['download url'], 'filename': x['filename'], 'file_format': 'raster-gdal', + 'intake_plugin': 'rasterio', + 'intake_args': json.dumps([x['filename'], {}]), 'extract_from_zip': '.img', }, axis=1) diff --git a/quest_provider_plugins/usgs_nlcd.py b/quest_provider_plugins/usgs_nlcd.py index 75b2d8cb..0fbc0cfe 100644 --- a/quest_provider_plugins/usgs_nlcd.py +++ b/quest_provider_plugins/usgs_nlcd.py @@ -2,6 +2,7 @@ import pandas as pd import requests +import json from quest.plugins import ProviderBase, SingleFileServiceBase from quest import util @@ -41,6 +42,8 @@ def search_catalog(self, **kwargs): lambda x: {'download_url': x['download_url'], 'filename': x['filename'], 'file_format': 'raster-gdal', + 'intake_plugin': 'rasterio', + 'intake_args': json.dumps([x['filename'], {}]), 'extract_from_zip': '.tif', }, axis=1) diff --git a/quest_provider_plugins/usgs_nwis.py b/quest_provider_plugins/usgs_nwis.py index ba18a681..fc506d99 100644 --- a/quest_provider_plugins/usgs_nwis.py +++ b/quest_provider_plugins/usgs_nwis.py @@ -8,6 +8,7 @@ from ulmo.usgs import nwis from quest import util import param +import json BASE_PATH = 'usgs-nwis' @@ -68,6 +69,8 @@ def download(self, catalog_id, file_path, dataset, **kwargs): 'name': dataset, 'metadata': data, 'file_path': file_path, + 'intake_args': json.dumps([file_path]), + 'intake_plugin': 'quest_timeseries_hdf5', 'file_format': 'timeseries-hdf5', 'datatype': 'timeseries', 'parameter': parameter, diff --git a/quest_provider_plugins/wmts_imagery.py b/quest_provider_plugins/wmts_imagery.py index a542f760..98c39f53 100644 --- a/quest_provider_plugins/wmts_imagery.py +++ b/quest_provider_plugins/wmts_imagery.py @@ -10,6 +10,7 @@ import param import requests import rasterio +import json from imageio import imread from quest.static import ServiceType from requests.packages.urllib3.util.retry import Retry @@ -92,6 +93,8 @@ def download(self, catalog_id, file_path, dataset, **kwargs): 'metadata': {'bbox': adjusted_bbox}, 'file_path': file_path, 'file_format': 'raster-gdal', + 'intake_plugin': 'rasterio', + 'intake_args': json.dumps([file_path, {}]), 'datatype': 'image', } diff --git a/quest_tool_plugins/raster/rst_base.py b/quest_tool_plugins/raster/rst_base.py index b069960b..77672f3b 100644 --- a/quest_tool_plugins/raster/rst_base.py +++ b/quest_tool_plugins/raster/rst_base.py @@ -43,6 +43,8 @@ def _run_tool(self): 'parameter': orig_metadata['parameter'], 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], + 'intake_plugin': orig_metadata['intake_plugin'], + 'intake_args': orig_metadata['intake_args'], 'unit': orig_metadata['unit'] } diff --git a/quest_tool_plugins/raster/rst_merge.py b/quest_tool_plugins/raster/rst_merge.py index 4275a41a..0ace1647 100644 --- a/quest_tool_plugins/raster/rst_merge.py +++ b/quest_tool_plugins/raster/rst_merge.py @@ -44,6 +44,8 @@ def _run_tool(self): 'parameter': orig_metadata['parameter'], 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], + 'intake_plugin': orig_metadata['intake_plugin'], + 'intake_args': orig_metadata['intake_args'], 'unit': orig_metadata['unit'], } diff --git a/quest_tool_plugins/raster/rst_reprojection.py b/quest_tool_plugins/raster/rst_reprojection.py index a1420778..14e85d84 100644 --- a/quest_tool_plugins/raster/rst_reprojection.py +++ b/quest_tool_plugins/raster/rst_reprojection.py @@ -38,6 +38,8 @@ def _run_tool(self): 'parameter': orig_metadata['parameter'], 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], + 'intake_plugin': orig_metadata['intake_plugin'], + 'intake_args': orig_metadata['intake_args'], } new_dset, file_path, catalog_entry = self._create_new_dataset( diff --git a/quest_tool_plugins/timeseries/ts_base.py b/quest_tool_plugins/timeseries/ts_base.py index 945d9395..00081330 100644 --- a/quest_tool_plugins/timeseries/ts_base.py +++ b/quest_tool_plugins/timeseries/ts_base.py @@ -39,6 +39,8 @@ def _run_tool(self): 'unit': new_df.metadata.get('unit'), 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], + 'intake_plugin': orig_metadata['intake_plugin'], + 'intake_args': orig_metadata['intake_args'], } new_dset, file_path, catalog_entry = self._create_new_dataset( diff --git a/quest_tool_plugins/timeseries/ts_flow_duration.py b/quest_tool_plugins/timeseries/ts_flow_duration.py index ccc56101..8231e39d 100644 --- a/quest_tool_plugins/timeseries/ts_flow_duration.py +++ b/quest_tool_plugins/timeseries/ts_flow_duration.py @@ -48,6 +48,8 @@ def _run_tool(self): 'options': self.set_options, 'file_format': orig_metadata['file_format'], 'unit': new_df.metadata.get('unit'), + 'intake_plugin': orig_metadata['intake_plugin'], + 'intake_args': orig_metadata['intake_args'], } new_dset, file_path, catalog_entry = self._create_new_dataset( diff --git a/quest_tool_plugins/whitebox/whitebox_watershed.py b/quest_tool_plugins/whitebox/whitebox_watershed.py index 9ec5b3dc..a8ebbc22 100644 --- a/quest_tool_plugins/whitebox/whitebox_watershed.py +++ b/quest_tool_plugins/whitebox/whitebox_watershed.py @@ -43,6 +43,8 @@ def _run_tool(self): 'parameter': 'streams', 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], + 'intake_plugin': orig_metadata['intake_plugin'], + 'intake_args': orig_metadata['intake_args'], } update_metadata(new_dset, quest_metadata=quest_metadata) @@ -112,6 +114,8 @@ def _run_tool(self): 'parameter': 'streams', 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], + 'intake_plugin': orig_metadata['intake_plugin'], + 'intake_args': orig_metadata['intake_args'], } ) @@ -192,6 +196,8 @@ def _run_tool(self): 'parameter': 'watershed_boundary', 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], + 'intake_plugin': orig_metadata['intake_plugin'], + 'intake_args': orig_metadata['intake_args'], } ) @@ -237,6 +243,8 @@ def _run_tool(self): 'parameter': 'streams', 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], + 'intake_plugin': orig_metadata['intake_plugin'], + 'intake_args': orig_metadata['intake_args'], 'file_path': file_path, } diff --git a/test/files/projects_template/default/metadata.db b/test/files/projects_template/default/metadata.db index 7b4473204b76298670abc573df2713a483ec4c9e..d7b7f76831eefb8fb831fcbe7b5e917d1fdd244d 100644 GIT binary patch delta 862 zcmZ`&KX21O6t@#6t(_)mrT{_c5E5;qP*Jgg1VqUd85*Z@6SZCHJDh8ZKa>gr$8ouM zC0N`DD)*KIZsqzxHgm#d(py~aDE~E+l0xyP5 z7_M#Nq4vLR;_8lJQ^V>bQE4}zD%T>I?hs5#N5k}A`VwuAP~2fYkoa`7sZ=}$8Q3fl zL+aHQY{?B6QQ6pG)+lk}+NiW|Fy3jejF1lPS#9hYCeijRrxzKT0=k|xaP(O?>$ElZ zT-s$41NJ|~(Ic*lyR$e>wQWpaOi)ozuMBs9P2)9=2B+EBJKMZ8B;7JiqEBW!rKhuI zO{=SsK6QpO{1|<1}`SHMt}+KTk=s#g3U>Tw0p3BY1Kmo3I!QRDX!0 zlaDK0(_}vWn91&(>U_DWC5b6OgOn7!g8ls_XL8Eb7bF%X0)wIMl}nEDq9IQdy9SO)f1nCpA7TzbH2mtJdQD(xPN68tWC5@(Vy-DMqrt z7-&r?Pyo{fxv9m)iRoC(QBVRqpdhg%1ELS?;Iho((!`w1szi_t@d!zTx6(oWMG8<< zI~9Ul-CToQ{hVEc6_lJ2!48asJYc9PX@YG@1_nz`etLXrUP)0Unya}qby?WN9i=C4 d;FJ)A#$mi6l)<|B5}%&pA_s*<3IdA~5&$OUr_KNX diff --git a/test/files/projects_template/project1/metadata.db b/test/files/projects_template/project1/metadata.db index f740db317753f3f5cef10b0157e9eb6c5b6ae415..e4920cff4c7d9c7c081a5389a79c6d12c27ad9f1 100644 GIT binary patch delta 857 zcmZo@U~V|TJV9EJgMop84~m(9w8lgoV|5M&J(&%>+@Be^*&-SEF7Y4cX64$<`IxJL z<0j`Swn)Cke1^RHc=v6b_=Sm+`!j1iH;{W>a!S)PkzxiaoLH0&^#m}81A;t#eI0`$ z6}(*|H6Y1A6DaKFALQ!k?gtS?a*L)ykgJ<(kgK1wYp_CsGs2NDyMJ zpfrRqm`hVviA~&5dh!NN2|=i$cta?IZSy5QJ;g;63>GCMY*sLMz&~+;IwvDA&+q~< M%Vt4?_xy|u0E?9Yj{pDw delta 610 zcmZoTz}(QlJV9EJm4ShQ2Z&*SZK95`GAo0gN&_$d4+d_In+$xH_z&|W@h#^5%+1QR zne#DM1IJCyRhtC`!Z|kISHMt}+KTk=s#g3U>Tw0p3Be*0nDJK=G3QDm+ zO$%{!@^OV5F`17)X0kh{I$v&TNn#4nSS1CoV1K{KnVfR<1&Kw8K;hIPh?owSf`U?M zUMA4m5Z8zh1wVfv4)yT?i-WY5R2HOSlS|9YNsUj-FUn2CsH`gFnKWEoq1tn)hBmko%4;X4nnqXU!fx(iKpB|r@S5j1o=4viYT^2TR eN9oBMI3)z3krZzTWw36(#HXjY$U$LIKmh>gw5^-~ diff --git a/test/files/projects_template/project2/metadata.db b/test/files/projects_template/project2/metadata.db index 1a786ed9b137c69385dc35e0ae02daa3397b128b..16a59d64b12706ead5153969fcd6f21f821d99bf 100644 GIT binary patch delta 860 zcmZo@U~V|TJV9EJm4ShQ4~W@-m$0eOwoLrPy zP?DLS2hj}DlU$UVSdy9&pIDN>>EamT8sh2eiehMPYDr=W&=m=sUcvr;KrQtNr3F9% zG(8}*3KENe4l7A5f~!p^&C3KDjOkLKfhCm%*gTPznUflymS2<$auJ5c;{4M3qGW9P z^1&V}Mp#`8w4xNq$1o!|wYWGj9h(_oy9*LaG7#ZYmRVeyn3GwR2nx1%kX$_$Ba`!U za#E8KA&U}%$w1HMF%fC?HZ|(fMlztLXfMQYmlp-vum(If-}N78j!@I3Dg`ASHMt}+KTk=s#g3U>Tw0p3Be*0nDJK=G3QDm+ zO$%{!@^OV5F`17)X0kh{I$v&TNn#4nSS1CoV1K{KnVfR<1&Kw8K;hIPh?owSf`U?M zUMA4m5Z8zh1wVfv4)yT?i-WY5R2HOSlS|9YNsUj-FUn2CsH`gFnKWEoq1tn)hBmko%4;X4nnqXU!fx(iKpB|r@S5j1o=4viYT^2TR hN9oBMI3)z3krZzTWw36(#HXjYXo12a1%X8g2>|C)uE+oY diff --git a/test/files/projects_template/test_data/metadata.db b/test/files/projects_template/test_data/metadata.db index 7b3c72d29c3b01afbbf7a3bcac5497e171dca101..132524c453b9ec9add10e84ecefbf746d4f20355 100644 GIT binary patch delta 951 zcmZo@U~V|TJV9EJlYxPO4~W@-m78m3sR>tF!PAyI@$}A|! z%+G^p2I)yIN=+$d^mRosG&i**F$L&~1WvDDe?Op>`h?N~pa7a4 zkXZ$ZML>s@q!z)|CY0u70u9D=DbT=@$^vYjNXyJgjZe!j$_2RyLt}A%X?;;LHhuYE zj};@VE(Tgr3gly$k(*jvoS2Ty46xk=i6t3`@F~kIE=|nItV#q0TRcdv9*dF5`8heM z$%v3e3BhEbXLIt?<5TlWiYl>a&de)G%ubCj$SFqx2 zRPc6<)PN)dO`x!we~_!EyB|ap$t{`+L9TADL9TwzuE7ck&Im`s^m1u#zR4lQ%)B$>7B?O_0;tin;zRj2T^b{8<2y7N?c+Wp^fdnTbFvqY0F$-8^ z^8fj)n^_Y6#7|nl!p6wCmVtBaW7b^Q}t-2$!hkynU!MI7dZ$307IG} ANB{r; delta 647 zcmZoTz}(QlJV9EJoq>UY2Z&*SZK95`GCPBwN&_$d4+d_In+$xH_z&|W@h#^5%+1QR zne#DM1IJCyRhtC`!Z|kISHMt}+KTk=s#g3U>Tw0p3Be*0nDJK=G3QDm+ zO$%{!@^OV5F`17)X0kh{I$v&TNn#4nSS1CoV1K{KnVfR<1&Kw8K;hIPh?owSf`U?M zUMA4m5Z8zh1wVfv4)yT?i-WY5R2HOSlS|9YNsUj-FUn2CsH`gFnKWEoq1tn)hBmko%4;X4nnqXU!fx(iKpB|r@S5j1o=4viYT^2TR zN9oBMI3)z3krZzTWw36(#HXjo#S2VKj9hIDTy4NqQp3g4Rn90jdE-gZ$q();Y<_vt Pk#*4pfkg@eixLt5rr5dP From 057a856b5accb73592c7d9612466fde8807c6c24 Mon Sep 17 00:00:00 2001 From: douggallup Date: Mon, 19 Nov 2018 16:46:48 -0700 Subject: [PATCH 05/12] Cleanup for ensuring intake plugin information is properly populated. --- quest/api/datasets.py | 26 ++++-------- quest_provider_plugins/usgs_ned.py | 8 +++- quest_provider_plugins/usgs_nlcd.py | 9 ++++- quest_provider_plugins/usgs_nwis.py | 2 +- quest_tool_plugins/raster/rst_base.py | 17 ++++---- quest_tool_plugins/raster/rst_merge.py | 15 +++---- quest_tool_plugins/raster/rst_reprojection.py | 14 +++---- quest_tool_plugins/timeseries/ts_base.py | 18 +++++---- .../timeseries/ts_flow_duration.py | 19 +++++---- .../whitebox/whitebox_watershed.py | 40 +++++++++++-------- 10 files changed, 90 insertions(+), 78 deletions(-) diff --git a/quest/api/datasets.py b/quest/api/datasets.py index 648b11e4..6fb72b51 100644 --- a/quest/api/datasets.py +++ b/quest/api/datasets.py @@ -376,10 +376,11 @@ def open_dataset(dataset, fmt=None, **kwargs): intake_plugin = m.get('intake_plugin') required = [] args = m.get('intake_args') - if args is not None: - print('DEBUG: raw args in open_dataset() = {}'.format(args)) + if len(args.strip()): + # Get args from json if available required = json.loads(m.get('intake_args')) - print('DEBUG: raw args, after json.loads in open_dataset() = {}'.format(required)) + else: + raise ValueError('No intake plugin found') if path is None: raise ValueError('No dataset file found') @@ -391,22 +392,11 @@ def open_dataset(dataset, fmt=None, **kwargs): if intake_plugin: # New code, with 'intake_plugin' added to the local .db plugin_name = 'open_' + intake_plugin - print('Opening with dynamic intake using plugin %s' % plugin_name) - else: - # Old code, hard coded - plugin_name = 'open_quest_' + file_format.replace('-', '_') - print('Opening with static intake using plugin %s' % plugin_name) - module = __import__('intake') - func = getattr(module, plugin_name) - if intake_plugin: - # New code, with 'intake_plugin' added to the local .db - print('Calling function with required args %s' % required) + + module = __import__('intake') + func = getattr(module, plugin_name) source = func(*required, **kwargs) - else: - # Old code, hard coded - print('Calling old function with path %s' % path) - source = func(path, fmt=fmt, **kwargs) - return source.read() + return source.read() @add_async diff --git a/quest_provider_plugins/usgs_ned.py b/quest_provider_plugins/usgs_ned.py index 1a1e80e3..a99f7579 100644 --- a/quest_provider_plugins/usgs_ned.py +++ b/quest_provider_plugins/usgs_ned.py @@ -19,6 +19,12 @@ class UsgsNedServiceBase(SingleFileServiceBase): 'elevation': 'elevation' } + def download(self, catalog_id, file_path, dataset, **kwargs): + # Call the base to download, but then update the dictionary with intake info + temp = super().download(catalog_id, file_path, dataset, **kwargs) + temp.update({'intake_plugin': 'rasterio', 'intake_args': json.dumps([file_path, {}])}) + return temp + def search_catalog(self, **kwargs): service = self._description catalog_entries = util.to_geodataframe( @@ -33,8 +39,6 @@ def search_catalog(self, **kwargs): lambda x: {'download_url': x['download url'], 'filename': x['filename'], 'file_format': 'raster-gdal', - 'intake_plugin': 'rasterio', - 'intake_args': json.dumps([x['filename'], {}]), 'extract_from_zip': '.img', }, axis=1) diff --git a/quest_provider_plugins/usgs_nlcd.py b/quest_provider_plugins/usgs_nlcd.py index 0fbc0cfe..d3f19950 100644 --- a/quest_provider_plugins/usgs_nlcd.py +++ b/quest_provider_plugins/usgs_nlcd.py @@ -18,6 +18,13 @@ class UsgsNlcdServiceBase(SingleFileServiceBase): 'landcover': 'landcover' } + def download(self, catalog_id, file_path, dataset, **kwargs): + # Call the base to download, but then update the dictionary with intake info + temp = super().download(catalog_id, file_path, dataset, **kwargs) + temp.update({'intake_plugin': 'rasterio', 'intake_args': json.dumps([file_path, {}])}) + return temp + + def search_catalog(self, **kwargs): base_url = 'https://www.sciencebase.gov/catalog/items' params = [ @@ -42,8 +49,6 @@ def search_catalog(self, **kwargs): lambda x: {'download_url': x['download_url'], 'filename': x['filename'], 'file_format': 'raster-gdal', - 'intake_plugin': 'rasterio', - 'intake_args': json.dumps([x['filename'], {}]), 'extract_from_zip': '.tif', }, axis=1) diff --git a/quest_provider_plugins/usgs_nwis.py b/quest_provider_plugins/usgs_nwis.py index fc506d99..c6e6326f 100644 --- a/quest_provider_plugins/usgs_nwis.py +++ b/quest_provider_plugins/usgs_nwis.py @@ -69,8 +69,8 @@ def download(self, catalog_id, file_path, dataset, **kwargs): 'name': dataset, 'metadata': data, 'file_path': file_path, - 'intake_args': json.dumps([file_path]), 'intake_plugin': 'quest_timeseries_hdf5', + 'intake_args': json.dumps([file_path]), 'file_format': 'timeseries-hdf5', 'datatype': 'timeseries', 'parameter': parameter, diff --git a/quest_tool_plugins/raster/rst_base.py b/quest_tool_plugins/raster/rst_base.py index 77672f3b..f2853368 100644 --- a/quest_tool_plugins/raster/rst_base.py +++ b/quest_tool_plugins/raster/rst_base.py @@ -1,10 +1,11 @@ """Functions required run raster filters""" +import json import rasterio from quest.plugins import ToolBase from quest import util -from quest.api import get_metadata +from quest.api import get_metadata, update_metadata @@ -39,20 +40,20 @@ def _run_tool(self): "width": out_image.shape[1], "transform": None}) + new_dset, file_path, catalog_entry = self._create_new_dataset( + old_dataset=dataset, + ext='.tif' + ) + new_metadata = { 'parameter': orig_metadata['parameter'], 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], 'intake_plugin': orig_metadata['intake_plugin'], - 'intake_args': orig_metadata['intake_args'], + 'intake_args': json.dumps([file_path, {}]), 'unit': orig_metadata['unit'] } - - new_dset, file_path, catalog_entry = self._create_new_dataset( - old_dataset=dataset, - ext='.tif', - dataset_metadata=new_metadata, - ) + update_metadata(new_dset, quest_metadata=new_metadata) with rasterio.open(file_path, "w", **out_meta) as dest: dest.write(out_image) diff --git a/quest_tool_plugins/raster/rst_merge.py b/quest_tool_plugins/raster/rst_merge.py index 0ace1647..18bc9f65 100644 --- a/quest_tool_plugins/raster/rst_merge.py +++ b/quest_tool_plugins/raster/rst_merge.py @@ -1,3 +1,4 @@ +import json from quest.plugins import ToolBase from quest import util from quest.api import get_metadata, update_metadata @@ -40,20 +41,20 @@ def _run_tool(self): if get_metadata(dataset)[dataset]['unit'] != orig_metadata['unit']: raise ValueError('Units must match for all datasets') + new_dset, file_path, catalog_entry = self._create_new_dataset( + old_dataset=datasets[0], + ext='.tif' + ) + new_metadata = { 'parameter': orig_metadata['parameter'], 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], 'intake_plugin': orig_metadata['intake_plugin'], - 'intake_args': orig_metadata['intake_args'], + 'intake_args': json.dumps([file_path, {}]), 'unit': orig_metadata['unit'], } - - new_dset, file_path, catalog_entry = self._create_new_dataset( - old_dataset=datasets[0], - ext='.tif', - dataset_metadata=new_metadata, - ) + update_metadata(new_dset, quest_metadata=new_metadata) open_datasets = [rasterio.open(d) for d in raster_files] profile = open_datasets[0].profile diff --git a/quest_tool_plugins/raster/rst_reprojection.py b/quest_tool_plugins/raster/rst_reprojection.py index 14e85d84..6f5a527c 100644 --- a/quest_tool_plugins/raster/rst_reprojection.py +++ b/quest_tool_plugins/raster/rst_reprojection.py @@ -34,19 +34,19 @@ def _run_tool(self): dst_crs = self.new_crs + new_dset, file_path, catalog_entry = self._create_new_dataset( + old_dataset=dataset, + ext='.tif' + ) + new_metadata = { 'parameter': orig_metadata['parameter'], 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], 'intake_plugin': orig_metadata['intake_plugin'], - 'intake_args': orig_metadata['intake_args'], + 'intake_args': json.dumps([file_path, {}]), } - - new_dset, file_path, catalog_entry = self._create_new_dataset( - old_dataset=dataset, - ext='.tif', - dataset_metadata=new_metadata, - ) + update_metadata(new_dset, quest_metadata=new_metadata) # run filter with rasterio.open(src_path) as src: diff --git a/quest_tool_plugins/timeseries/ts_base.py b/quest_tool_plugins/timeseries/ts_base.py index 00081330..18b8d976 100644 --- a/quest_tool_plugins/timeseries/ts_base.py +++ b/quest_tool_plugins/timeseries/ts_base.py @@ -1,5 +1,7 @@ +import json + from quest.plugins import ToolBase -from quest.api import get_metadata +from quest.api import get_metadata, update_metadata from quest import util from quest.plugins import load_plugins @@ -34,20 +36,20 @@ def _run_tool(self): # setup new dataset + new_dset, file_path, catalog_entry = self._create_new_dataset( + old_dataset=dataset, + ext='.h5' + ) + new_metadata = { 'parameter': new_df.metadata.get('parameter'), 'unit': new_df.metadata.get('unit'), 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], 'intake_plugin': orig_metadata['intake_plugin'], - 'intake_args': orig_metadata['intake_args'], + 'intake_args': json.dumps([file_path]), } - - new_dset, file_path, catalog_entry = self._create_new_dataset( - old_dataset=dataset, - ext='.h5', - dataset_metadata=new_metadata, - ) + update_metadata(new_dset, quest_metadata=new_metadata) # save dataframe io.write(file_path, new_df, new_metadata) diff --git a/quest_tool_plugins/timeseries/ts_flow_duration.py b/quest_tool_plugins/timeseries/ts_flow_duration.py index 8231e39d..7af2769f 100644 --- a/quest_tool_plugins/timeseries/ts_flow_duration.py +++ b/quest_tool_plugins/timeseries/ts_flow_duration.py @@ -1,5 +1,7 @@ +import json + from quest.plugins import ToolBase -from quest.api import get_metadata +from quest.api import get_metadata, update_metadata from quest import util from quest.util import setattr_on_dataframe from quest.plugins import load_plugins @@ -42,6 +44,12 @@ def _run_tool(self): setattr_on_dataframe(df, 'metadata', metadata) new_df = df # setup new dataset + + new_dset, file_path, catalog_entry = self._create_new_dataset( + old_dataset=dataset, + ext='.h5', + ) + new_metadata = { 'parameter': new_df.metadata.get('parameter'), 'datatype': orig_metadata['datatype'], @@ -49,14 +57,9 @@ def _run_tool(self): 'file_format': orig_metadata['file_format'], 'unit': new_df.metadata.get('unit'), 'intake_plugin': orig_metadata['intake_plugin'], - 'intake_args': orig_metadata['intake_args'], + 'intake_args': json.dumps([file_path]), } - - new_dset, file_path, catalog_entry = self._create_new_dataset( - old_dataset=dataset, - ext='.h5', - dataset_metadata=new_metadata, - ) + update_metadata(new_dset, quest_metadata=new_metadata) # save dataframe output = load_plugins('io', 'xy-hdf5')['xy-hdf5'] diff --git a/quest_tool_plugins/whitebox/whitebox_watershed.py b/quest_tool_plugins/whitebox/whitebox_watershed.py index a8ebbc22..9346892c 100644 --- a/quest_tool_plugins/whitebox/whitebox_watershed.py +++ b/quest_tool_plugins/whitebox/whitebox_watershed.py @@ -1,5 +1,6 @@ import numpy as np import param +import json from quest.plugins import ToolBase from quest import util @@ -44,7 +45,7 @@ def _run_tool(self): 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], 'intake_plugin': orig_metadata['intake_plugin'], - 'intake_args': orig_metadata['intake_args'], + 'intake_args': json.dumps([file_path, {}]), } update_metadata(new_dset, quest_metadata=quest_metadata) @@ -109,16 +110,18 @@ def _run_tool(self): new_dset, file_path, catalog_entry = self._create_new_dataset( old_dataset=dataset, - ext='.tif', - dataset_metadata={ - 'parameter': 'streams', - 'datatype': orig_metadata['datatype'], - 'file_format': orig_metadata['file_format'], - 'intake_plugin': orig_metadata['intake_plugin'], - 'intake_args': orig_metadata['intake_args'], - } + ext='.tif' ) + quest_metadata = { + 'parameter': 'streams', + 'datatype': orig_metadata['datatype'], + 'file_format': orig_metadata['file_format'], + 'intake_plugin': orig_metadata['intake_plugin'], + 'intake_args': json.dumps([file_path, {}]), + } + update_metadata(new_dset, quest_metadata=quest_metadata) + fa = self.flow_accumulation if self.flow_accumulation is not None else wbt.d_inf_flow_accumulation(elev_file) # fa = wbt.d8_flow_accumulation(fill) wbt.extract_streams( @@ -192,15 +195,18 @@ def _run_tool(self): new_dset, file_path, catalog_entry = self._create_new_dataset( old_dataset=dataset, ext='.tif', - dataset_metadata={ - 'parameter': 'watershed_boundary', - 'datatype': orig_metadata['datatype'], - 'file_format': orig_metadata['file_format'], - 'intake_plugin': orig_metadata['intake_plugin'], - 'intake_args': orig_metadata['intake_args'], - } ) + quest_metadata = { + 'parameter': 'watershed_boundary', + 'datatype': orig_metadata['datatype'], + 'file_format': orig_metadata['file_format'], + 'intake_plugin': orig_metadata['intake_plugin'], + 'intake_args': json.dumps([file_path, {}]), + } + + update_metadata(new_dset, quest_metadata=quest_metadata) + d8 = wbt.d8_pointer(elev_file) point_shp = points_to_shp(original_outlets) @@ -244,7 +250,7 @@ def _run_tool(self): 'datatype': orig_metadata['datatype'], 'file_format': orig_metadata['file_format'], 'intake_plugin': orig_metadata['intake_plugin'], - 'intake_args': orig_metadata['intake_args'], + 'intake_args': json.dumps([file_path, {}]), 'file_path': file_path, } From 3cd958628f07627b72807e3d6fce3dd7589cf042 Mon Sep 17 00:00:00 2001 From: douggallup Date: Tue, 20 Nov 2018 09:17:33 -0700 Subject: [PATCH 06/12] Fixed spacing. --- quest_provider_plugins/usgs_nlcd.py | 1 - 1 file changed, 1 deletion(-) diff --git a/quest_provider_plugins/usgs_nlcd.py b/quest_provider_plugins/usgs_nlcd.py index d3f19950..12c2efc7 100644 --- a/quest_provider_plugins/usgs_nlcd.py +++ b/quest_provider_plugins/usgs_nlcd.py @@ -24,7 +24,6 @@ def download(self, catalog_id, file_path, dataset, **kwargs): temp.update({'intake_plugin': 'rasterio', 'intake_args': json.dumps([file_path, {}])}) return temp - def search_catalog(self, **kwargs): base_url = 'https://www.sciencebase.gov/catalog/items' params = [ From 8ebfb06a820c25353864016482bff7d41248aecd Mon Sep 17 00:00:00 2001 From: douggallup Date: Tue, 20 Nov 2018 12:56:23 -0700 Subject: [PATCH 07/12] Renamed variable --- quest_provider_plugins/usgs_ned.py | 6 +++--- quest_provider_plugins/usgs_nlcd.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/quest_provider_plugins/usgs_ned.py b/quest_provider_plugins/usgs_ned.py index a99f7579..88e616df 100644 --- a/quest_provider_plugins/usgs_ned.py +++ b/quest_provider_plugins/usgs_ned.py @@ -21,9 +21,9 @@ class UsgsNedServiceBase(SingleFileServiceBase): def download(self, catalog_id, file_path, dataset, **kwargs): # Call the base to download, but then update the dictionary with intake info - temp = super().download(catalog_id, file_path, dataset, **kwargs) - temp.update({'intake_plugin': 'rasterio', 'intake_args': json.dumps([file_path, {}])}) - return temp + metadata = super().download(catalog_id, file_path, dataset, **kwargs) + metadata.update({'intake_plugin': 'rasterio', 'intake_args': json.dumps([file_path, {}])}) + return metadata def search_catalog(self, **kwargs): service = self._description diff --git a/quest_provider_plugins/usgs_nlcd.py b/quest_provider_plugins/usgs_nlcd.py index 12c2efc7..d90edda8 100644 --- a/quest_provider_plugins/usgs_nlcd.py +++ b/quest_provider_plugins/usgs_nlcd.py @@ -20,9 +20,9 @@ class UsgsNlcdServiceBase(SingleFileServiceBase): def download(self, catalog_id, file_path, dataset, **kwargs): # Call the base to download, but then update the dictionary with intake info - temp = super().download(catalog_id, file_path, dataset, **kwargs) - temp.update({'intake_plugin': 'rasterio', 'intake_args': json.dumps([file_path, {}])}) - return temp + metadata = super().download(catalog_id, file_path, dataset, **kwargs) + metadata.update({'intake_plugin': 'rasterio', 'intake_args': json.dumps([file_path, {}])}) + return metadata def search_catalog(self, **kwargs): base_url = 'https://www.sciencebase.gov/catalog/items' From f96c5331c79e50a9504bdb69992fdc039cb0d3eb Mon Sep 17 00:00:00 2001 From: Aaron Valoroso Date: Mon, 19 Nov 2018 08:51:34 -0600 Subject: [PATCH 08/12] Overall straightening of the source code. (#117) * Added use of enums where relevant * Re-ordered import statements --- README.md | 70 +--- appveyor.yml | 2 +- conda_environment.yml | 1 + examples/{ => broken}/workflow_gssha.py | 0 examples/notebooks/Getting_Started.ipynb | 308 ++++++++++++++++++ examples/notebooks/WMTS_Example.ipynb | 4 +- .../notebooks/Watershed Delineation.ipynb | 4 +- quest/api/__init__.py | 2 +- quest/api/catalog.py | 20 +- quest/api/collections.py | 9 +- quest/api/datasets.py | 76 ++--- quest/api/manage.py | 47 +-- quest/api/metadata.py | 48 ++- quest/api/parameters.py | 14 +- quest/api/projects.py | 34 +- quest/api/providers.py | 11 +- quest/api/tasks.py | 22 +- quest/api/tools.py | 17 +- quest/api/version.py | 1 - quest/api/workflows.py | 2 +- quest/database/database.py | 1 + quest/plugins/base/publish_base.py | 3 +- quest/plugins/base/service_base.py | 8 +- quest/plugins/base/tool_base.py | 4 +- quest/plugins/plugins.py | 40 ++- quest/plugins/user_provider.py | 57 ++-- quest/static/__init__.py | 6 + quest/tools/__init__.py | 3 +- quest/util/__init__.py | 1 - quest/util/config.py | 8 +- quest/util/io.py | 1 - quest/util/misc.py | 4 +- quest_io_plugins/raster_gdal.py | 9 +- quest_io_plugins/timeseries_hdf5.py | 3 +- quest_io_plugins/xyHdf5.py | 4 +- quest_provider_plugins/cuahsi_hs.py | 21 +- quest_provider_plugins/kitware_girder.py | 9 +- quest_provider_plugins/nasa.py | 16 +- quest_provider_plugins/noaa_coastwatch.py | 28 +- quest_provider_plugins/noaa_ncdc.py | 26 +- quest_provider_plugins/quest_catalog.py | 9 +- quest_provider_plugins/usgs_ned.py | 16 +- quest_provider_plugins/usgs_nlcd.py | 22 +- quest_provider_plugins/usgs_nwis.py | 30 +- quest_provider_plugins/wmts_imagery.py | 2 + quest_tool_plugins/raster/rst_base.py | 12 +- quest_tool_plugins/raster/rst_merge.py | 18 +- quest_tool_plugins/raster/rst_reprojection.py | 13 +- quest_tool_plugins/timeseries/timeseries.py | 3 - quest_tool_plugins/timeseries/ts_base.py | 9 +- .../timeseries/ts_flow_duration.py | 9 +- quest_tool_plugins/whitebox/whitebox_utils.py | 10 +- .../whitebox/whitebox_watershed.py | 27 +- test/data.py | 2 +- test/test_catalog.py | 7 +- test/test_datasets.py | 1 - test/test_providers.py | 12 +- test/test_util_misc.py | 14 +- 58 files changed, 716 insertions(+), 444 deletions(-) rename examples/{ => broken}/workflow_gssha.py (100%) create mode 100644 examples/notebooks/Getting_Started.ipynb diff --git a/README.md b/README.md index 0c8a3ad8..99c9a422 100644 --- a/README.md +++ b/README.md @@ -1,73 +1,15 @@ # Environmental Simulator Quest -| Gitter | Linux/Mac | Windows | Test Coverage | -| --------- | --------- | --------- | ------------- | -| [![Join the chat at https://gitter.im/Quest-Development/Quest](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Quest-Development/Quest) | [![Build Status](https://travis-ci.org/erdc/quest.svg?branch=master)](https://travis-ci.org/erdc/quest) | [![Build Status](https://ci.appveyor.com/api/projects/status/e20arxcfrcmb2ylm/branch/master?svg=true)](https://ci.appveyor.com/project/dharhas/quest) | [![Coverage Status](https://coveralls.io/repos/github/erdc/quest/badge.svg)](https://coveralls.io/github/erdc/quest) | +| Gitter | Linux/Mac | Windows | ReadTheDocs | Test Coverage | +| --------- | --------- | --------- | ------------- | ------------- | +| [![Join the chat at https://gitter.im/Quest-Development/Quest](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Quest-Development/Quest) | [![Build Status](https://travis-ci.org/erdc/quest.svg?branch=master)](https://travis-ci.org/erdc/quest) | [![Build Status](https://ci.appveyor.com/api/projects/status/e20arxcfrcmb2ylm/branch/master?svg=true)](https://ci.appveyor.com/project/dharhas/quest) | [![Documentation Status](https://readthedocs.org/projects/quest/badge/?version=latest)](https://quest.readthedocs.io/en/latest/?badge=latest) | [![Coverage Status](https://coveralls.io/repos/github/erdc/quest/badge.svg)](https://coveralls.io/github/erdc/quest) | ### Project Description -Quest is a python library that provides an API the ability to search, publish and download data (both geographical and non-geographical) across multiple data sources including both local repositories and web based services. The library also allows provides tools in order to manipulate and manage the data that the user is working with. +Quest is a Python library that provides the ability to search, publish and download data (both geographical and non-geographical) from multiple data sources, including local repositories and web-based data providers. Quest also provides a suite of tools for manipulating and transforming data once it is downloaded. ### Project Links - Here is a live link for the Quest Documentation: https://quest.readthedocs.io/en/latest/ -## Setup Dev Environment - -- Install miniconda -- Install conda-env - - conda install conda-env - -- Clone master branch -- Create a new conda environment for development - - conda env create -n quest -f py3_conda_requirements.yml - - (you can also create a python 2 env but 3 is preferred) - -- Install quest in develop mode - - python setup.py develop - -## Development Workflow - -- change to master branch - - git checkout master - -- get the latest version of master - - git pull master - -- create a new branch locally - - git checkout -b mybranch - -- Develop the new features on your local machine, add tests for any new features -- push the local branch to gitlab and set up tracking, later `git push` is all that is required. - - git push -u origin mybranch - -- run tests on python 2 and python 3 using py.test -- Once you have finished developing your branch, check if master has changed - - git checkout master - - git pull - -- If `git pull` pulls in new changes from master then you need to rebase - - git checkout mybranch - - git rebase master - - (follow the prompts, you may have to fix conflicts) - -- after a rebase you may have to force push to gitlab on your branch - - git push -f - -- Run tests again. -- If everything looks good, use Gitlab to do a merge request from your branch to master -- Once the merge has been accepted, do not continuing working in that branch. make a new branch starting at step 1 - +Quest was designed to be extensible and has three types of plugins (io, tool, and provider). Provider plugins allow Quest to search for data from remote and local data providers. Tool plugins alloww Quest to perform different data manipulations. I/O plugins allows Quest to read and write different file formats. +- Here is a link to an example Quest Plugin: https://github.com/quest-dev/quest_ncep_provider_plugin \ No newline at end of file diff --git a/appveyor.yml b/appveyor.yml index a5a5cf18..f4381616 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -35,4 +35,4 @@ install: - python -c "import quest; quest.api.update_settings(dict(CACHE_DIR='%QUEST_CACHE_DIR%')); quest.api.save_settings()" test_script: - - python -m pytest -vv + - python -m pytest -vv \ No newline at end of file diff --git a/conda_environment.yml b/conda_environment.yml index f27455a3..66cbb8c6 100644 --- a/conda_environment.yml +++ b/conda_environment.yml @@ -41,6 +41,7 @@ dependencies: - werkzeug # test dependencies + - pytest=3.7.4 - pytest-runner - pytest-cov - coveralls diff --git a/examples/workflow_gssha.py b/examples/broken/workflow_gssha.py similarity index 100% rename from examples/workflow_gssha.py rename to examples/broken/workflow_gssha.py diff --git a/examples/notebooks/Getting_Started.ipynb b/examples/notebooks/Getting_Started.ipynb new file mode 100644 index 00000000..475b8f72 --- /dev/null +++ b/examples/notebooks/Getting_Started.ipynb @@ -0,0 +1,308 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Getting Started with using Quest." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pprint import pprint\n", + "import itertools\n", + "from IPython.display import display, Markdown\n", + "\n", + "import param\n", + "import ipywidgets as widgets\n", + "from paramnb import Widgets \n", + "\n", + "import quest\n", + "print('\\nQUEST version %s' % quest.api.get_quest_version())\n", + "print('\\nQUEST API version %s' % quest.api.get_api_version())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Quest Provider Plugins:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "providers = quest.api.get_providers()\n", + "print(\"Providers: \")\n", + "pprint(providers)\n", + "\n", + "services = quest.api.get_services()\n", + "print(\"\\nServices: \")\n", + "pprint(services)\n", + "\n", + "publishers = quest.api.get_publishers()\n", + "print(\"\\nPublishers: \")\n", + "pprint(publishers)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Quest Tool Plugins:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tools = quest.api.get_tools()\n", + "pprint(tools)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Quest Projects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print('\\033[1m' + 'Showing existing projects.' + '\\033[0m')\n", + "projects = quest.api.get_projects()\n", + "pprint(projects)\n", + "\n", + "print('\\n---------------------------\\n')\n", + "print('\\033[1m' + 'Creating a new project.' + '\\033[0m')\n", + "\n", + "if 'quest-demo' in projects:\n", + " quest.api.delete_project('quest-demo')\n", + "\n", + "quest.api.new_project('Quest-Demo', 'Quest-Demo', 'For demostrating how to use Quest.')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Quest Collections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print('\\033[1m' + 'Showing existing collections.' + '\\033[0m')\n", + "collections = quest.api.get_collections()\n", + "pprint(collections)\n", + "\n", + "print('\\n---------------------------\\n')\n", + "print('\\033[1m' + 'Creating a new project.' + '\\033[0m')\n", + "\n", + "collection_name = 'col-test'\n", + "if collection_name in collections:\n", + " quest.api.delete(collection_name)\n", + "\n", + "quest.api.new_collection(collection_name, collection_name, 'For demonstrating how to add collections to Quest.')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Getting Catalog Entries:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "filters = {}\n", + "get_parameter = widgets.Dropdown(options=quest.api.get_mapped_parameters(), description='Parameters:')\n", + "display(get_parameter)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if get_parameter.value != '':\n", + " filters['parameter'] = get_parameter.value\n", + " get_service = widgets.Dropdown(options=quest.api.get_services(parameter=get_parameter.value), description='Services:')\n", + "else:\n", + " get_service = widgets.Dropdown(options=quest.api.get_services(), description='Services:')\n", + "display(get_service)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "get_geom = widgets.Dropdown(options=['', 'Point', 'Polygon', 'Line'], description='Gemoetry:')\n", + "display(get_geom)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if get_geom.value != '':\n", + " filters['geom_type'] = get_geom.value\n", + " \n", + "long_min = widgets.Text(description='long min:')\n", + "lat_min = widgets.Text(description='lat min:')\n", + "long_max = widgets.Text(description='long max:')\n", + "lat_max = widgets.Text(description='lat max:')\n", + "display(long_min, lat_min, long_max, lat_max)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "value = len(long_min.value) * len(lat_min.value) * len(long_max.value) * len(lat_max.value)\n", + "if value != 0:\n", + " bbox = [long_min.value, lat_min.value, long_max.value, lat_max.value]\n", + " filters['bbox'] = bbox" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cache = widgets.Checkbox(value=False, description='Update Cache')\n", + "display(cache)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "filters = {'parameter': get_parameter.value}\n", + "datasets = quest.api.search_catalog(uris=get_service.value, filters=filters, update_cache=cache.value, as_dataframe=True)\n", + "datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "get_datasets = widgets.Dropdown(options=datasets.index.tolist()[:50], description='Datasets:')\n", + "display(get_datasets)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add the dataset to a collection:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "catalog_entry = quest.api.add_datasets(collection_name, get_datasets.value)\n", + "catalog_entry" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get the download options:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "download_options = quest.api.get_download_options(catalog_entry, fmt='param')[catalog_entry[0]]\n", + "Widgets(download_options)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download the selected data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "staged_id = quest.api.stage_for_download(catalog_entry, options=download_options)\n", + "print('\\033[1m' + \"Staged ID: \" + '\\033[0m' + staged_id[0])\n", + "\n", + "quest.api.download_datasets(staged_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/notebooks/WMTS_Example.ipynb b/examples/notebooks/WMTS_Example.ipynb index d08373bb..fcdd982a 100644 --- a/examples/notebooks/WMTS_Example.ipynb +++ b/examples/notebooks/WMTS_Example.ipynb @@ -82,9 +82,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "dataset = quest.api.get_data(\n", diff --git a/examples/notebooks/Watershed Delineation.ipynb b/examples/notebooks/Watershed Delineation.ipynb index 56fa18fe..381c4d96 100644 --- a/examples/notebooks/Watershed Delineation.ipynb +++ b/examples/notebooks/Watershed Delineation.ipynb @@ -109,9 +109,7 @@ "metadata": {}, "outputs": [], "source": [ - "fill_dataset = quest.tools.wbt_fill_depressions(\n", - " dataset=elevation_raster,\n", - ")['datasets'][0]\n", + "fill_dataset = quest.tools.wbt_fill_depressions(dataset=elevation_raster)['datasets'][0]\n", "fill = quest.api.open_dataset(fill_dataset, with_nodata=True, isel_band=0)" ] }, diff --git a/quest/api/__init__.py b/quest/api/__init__.py index 40466d68..d0f3f83d 100644 --- a/quest/api/__init__.py +++ b/quest/api/__init__.py @@ -4,7 +4,7 @@ Services Library. """ -_version__ = '3.0' +__version__ = '3.0.a1' # __all__ needed for autodoc to work diff --git a/quest/api/catalog.py b/quest/api/catalog.py index 03baa4a8..7b67f41b 100644 --- a/quest/api/catalog.py +++ b/quest/api/catalog.py @@ -1,18 +1,18 @@ import json import itertools + import pandas as pd import numpy as np import geojson from shapely.geometry import shape -from .. import util -from .. import plugins -from quest.database.database import get_db, db_session -from .datasets import new_dataset -from .metadata import get_metadata -from ..util import construct_service_uri from .tasks import add_async +from .metadata import get_metadata +from .datasets import new_dataset +from .. import util +from ..plugins import load_providers from ..static import DatasetSource, UriType +from ..database.database import get_db, db_session @add_async @@ -83,14 +83,14 @@ def search_catalog(uris=None, expand=False, as_dataframe=False, as_geojson=False grouped_uris = util.classify_uris(uris, as_dataframe=False, exclude=[UriType.DATASET, UriType.COLLECTION]) - services = grouped_uris.get('services') or [] + services = grouped_uris.get(UriType.SERVICE) or [] all_datasets = [] filters = filters or dict() for name in services: provider, service, _ = util.parse_service_uri(name) - provider_plugin = plugins.load_providers()[provider] + provider_plugin = load_providers()[provider] tmp_datasets = provider_plugin.search_catalog(service, update_cache=update_cache, **filters) all_datasets.append(tmp_datasets) @@ -202,7 +202,7 @@ def get_tags(service_uris, update_cache=False, filter=None, as_count=False): for service in services: provider, service, _ = util.parse_service_uri(service) - provider_plugin = plugins.load_providers()[provider] + provider_plugin = load_providers()[provider] service_tags = provider_plugin.get_tags(service, update_cache=update_cache) tags.update(service_tags) @@ -261,4 +261,4 @@ def new_catalog_entry(geometry=None, geom_type=None, geom_coords=None, metadata= with db_session: db.QuestCatalog(**data) - return construct_service_uri('quest', 'quest', catalog_id) + return util.construct_service_uri('quest', 'quest', catalog_id) diff --git a/quest/api/collections.py b/quest/api/collections.py index c87bc223..71d9d08e 100644 --- a/quest/api/collections.py +++ b/quest/api/collections.py @@ -1,9 +1,10 @@ -"""API functions related to Collections.""" -from quest.database.database import get_db, db_session -from .projects import _get_project_dir -import pandas as pd import os +import pandas as pd + +from .projects import _get_project_dir +from ..database.database import get_db, db_session + def get_collections(expand=False, as_dataframe=False): """Get available collections. diff --git a/quest/api/datasets.py b/quest/api/datasets.py index 6fb72b51..eb719b34 100644 --- a/quest/api/datasets.py +++ b/quest/api/datasets.py @@ -1,14 +1,16 @@ -from quest.database.database import get_db, db_session, select_datasets -from ..plugins import load_providers, load_plugins, list_plugins -from ..util import logger, parse_service_uri, listify, uuid, is_uuid, classify_uris +import os + +import param +import pandas as pd + +from .tasks import add_async +from .projects import _get_project_dir from .collections import get_collections from .metadata import get_metadata, update_metadata -from .projects import _get_project_dir -from quest.static import DatasetStatus, DatasetSource -from .tasks import add_async -import pandas as pd -import param -import os +from .. import util +from .. import static +from ..plugins import load_providers, load_plugins, list_plugins +from ..database.database import get_db, db_session, select_datasets import json @@ -38,7 +40,7 @@ def download(catalog_entry, file_path, dataset=None, **kwargs): if file_path is None: pass - provider, service, catalog_id = parse_service_uri(service_uri) + provider, service, catalog_id = util.parse_service_uri(service_uri) provider_plugin = load_providers()[provider] data = provider_plugin.download(service=service, catalog_id=catalog_id, file_path=file_path, dataset=dataset, **kwargs) @@ -51,7 +53,7 @@ def publish(publisher_uri, options=None, **kwargs): options = dict(options.get_param_values()) options = options or dict() options.update(kwargs) - provider, publisher, _ = parse_service_uri(publisher_uri) + provider, publisher, _ = util.parse_service_uri(publisher_uri) provider_plugin = load_providers()[provider] data = provider_plugin.publish(publisher=publisher, **options) return data @@ -78,7 +80,7 @@ def download_datasets(datasets, raise_on_error=False): return # filter out non download datasets - datasets = datasets[datasets['source'] == DatasetSource.WEB_SERVICE] + datasets = datasets[datasets['source'] == static.DatasetSource.WEB_SERVICE] db = get_db() project_path = _get_project_dir() @@ -87,7 +89,7 @@ def download_datasets(datasets, raise_on_error=False): collection_path = os.path.join(project_path, dataset['collection']) catalog_entry = dataset["catalog_entry"] try: - update_metadata(idx, quest_metadata={'status': DatasetStatus.PENDING}) + update_metadata(idx, quest_metadata={'status': static.DatasetStatus.PENDING}) kwargs = dataset['options'] or dict() all_metadata = download(catalog_entry, file_path=collection_path, @@ -96,7 +98,7 @@ def download_datasets(datasets, raise_on_error=False): metadata = all_metadata.pop('metadata', None) quest_metadata = all_metadata quest_metadata.update({ - 'status': DatasetStatus.DOWNLOADED, + 'status': static.DatasetStatus.DOWNLOADED, 'message': 'success', }) except Exception as e: @@ -104,7 +106,7 @@ def download_datasets(datasets, raise_on_error=False): raise quest_metadata = { - 'status': DatasetStatus.FAILED_DOWNLOAD, + 'status': static.DatasetStatus.FAILED_DOWNLOAD, 'message': str(e), } @@ -136,18 +138,18 @@ def get_download_options(uris, fmt='json'): download options that can be specified when calling quest.api.stage_for_download or quest.api.download """ - uris = listify(uris) - grouped_uris = classify_uris(uris, as_dataframe=False, exclude=['collections']) + uris = util.listify(uris) + grouped_uris = util.classify_uris(uris, as_dataframe=False, exclude=['collections']) - services = grouped_uris.get('services') or [] - datasets = grouped_uris.get('datasets') or [] + services = grouped_uris.get(static.UriType.SERVICE) or [] + datasets = grouped_uris.get(static.UriType.DATASET) or [] service_uris = {s: s for s in services} service_uris.update({dataset: get_metadata(dataset)[dataset]['catalog_entry'] for dataset in datasets}) options = {} for uri, service_uri in service_uris.items(): - provider, service, _ = parse_service_uri(service_uri) + provider, service, _ = util.parse_service_uri(service_uri) provider_plugin = load_providers()[provider] options[uri] = provider_plugin.get_download_options(service, fmt) @@ -155,11 +157,11 @@ def get_download_options(uris, fmt='json'): def get_publish_options(publish_uri, fmt='json'): - uris = listify(publish_uri) + uris = util.listify(publish_uri) options = {} for uri in uris: publish_uri = uri - provider, publisher, _ = parse_service_uri(publish_uri) + provider, publisher, _ = util.parse_service_uri(publish_uri) provider_plugin = load_providers()[provider] options[uri] = provider_plugin.publish_options(publisher, fmt) @@ -199,7 +201,7 @@ def get_datasets(expand=None, filters=None, queries=None, as_dataframe=None): if filters is not None: for k, v in filters.items(): if k not in datasets.keys(): - logger.warning('filter field {} not found, continuing'.format(k)) + util.logger.warning('filter field {} not found, continuing'.format(k)) continue datasets = datasets.loc[datasets[k] == v] @@ -254,11 +256,11 @@ def new_dataset(catalog_entry, collection, source=None, display_name=None, except IndexError: raise ValueError('Entry {} dose not exist'.format(catalog_entry)) - name = name or uuid('dataset') - assert name.startswith('d') and is_uuid(name) + name = name or util.uuid('dataset') + assert name.startswith('d') and util.is_uuid(name) if source is None: - source = DatasetSource.USER + source = static.DatasetSource.USER if display_name is None: display_name = name @@ -276,8 +278,8 @@ def new_dataset(catalog_entry, collection, source=None, display_name=None, 'file_path': file_path, 'metadata': metadata, } - if source == DatasetSource.WEB_SERVICE: - quest_metadata.update({'status': DatasetStatus.NOT_STAGED}) + if source == static.DatasetSource.WEB_SERVICE: + quest_metadata.update({'status': static.DatasetStatus.NOT_STAGED}) db = get_db() with db_session: @@ -302,7 +304,7 @@ def stage_for_download(uris, options=None): uris (list): staged dataset uids """ - uris = listify(uris) + uris = util.listify(uris) display_name = None datasets = [] @@ -324,13 +326,13 @@ def stage_for_download(uris, options=None): if dataset_metadata['display_name'] == dataset_uri: catalog_entry = dataset_metadata['catalog_entry'] - provider, service, _ = parse_service_uri(catalog_entry) + provider, service, _ = util.parse_service_uri(catalog_entry) display_name = '{0}-{1}-{2}'.format(provider, parameter_name, dataset_uri[:7]) quest_metadata = { 'display_name': display_name or dataset_metadata['display_name'], 'options': kwargs, - 'status': DatasetStatus.STAGED, + 'status': static.DatasetStatus.STAGED, 'parameter': parameter } @@ -385,10 +387,10 @@ def open_dataset(dataset, fmt=None, **kwargs): if path is None: raise ValueError('No dataset file found') - if file_format not in list_plugins('io'): + if file_format not in list_plugins(static.PluginType.IO): raise ValueError('No reader available for: %s' % file_format) - # Use quest_xyHdf5, quest_timeseries_hdf5, and quest_raster_gdal intake plugins to open + io = load_plugins(static.PluginType.IO, file_format)[file_format] if intake_plugin: # New code, with 'intake_plugin' added to the local .db plugin_name = 'open_' + intake_plugin @@ -431,10 +433,10 @@ def visualize_dataset(dataset, update_cache=False, **kwargs): if path is None: raise ValueError('No dataset file found') - if file_format not in list_plugins('io'): + if file_format not in list_plugins(static.PluginType.IO): raise ValueError('No reader available for: %s' % file_format) - io = load_plugins('io', file_format)[file_format] + io = load_plugins(static.PluginType.IO, file_format)[file_format] title = m.get('display_name') if title is None: @@ -471,9 +473,9 @@ def get_visualization_options(dataset, fmt='json'): if path is None: raise ValueError('No dataset file found') - if file_format not in list_plugins('io'): + if file_format not in list_plugins(static.PluginType.IO): raise ValueError('No reader available for: %s' % file_format) - io = load_plugins('io', file_format)[file_format] + io = load_plugins(static.PluginType.IO, file_format)[file_format] return io.visualize_options(path, fmt) diff --git a/quest/api/manage.py b/quest/api/manage.py index b0689e4c..b8554ede 100644 --- a/quest/api/manage.py +++ b/quest/api/manage.py @@ -1,12 +1,13 @@ import os import shutil -from ..util.log import logger -from quest.database.database import get_db, db_session, select_datasets + +from .tasks import add_async from .projects import _get_project_dir from .collections import get_collections from .metadata import get_metadata, update_metadata -from .tasks import add_async -from .. import util +from ..static import UriType, DatasetSource +from ..util import logger, classify_uris, uuid, parse_service_uri +from ..database.database import get_db, db_session, select_datasets @add_async @@ -30,16 +31,16 @@ def delete(uris): return True # group uris by type - grouped_uris = util.classify_uris(uris, - as_dataframe=False, - exclude=['services', 'publishers'], - require_same_type=True) + grouped_uris = classify_uris(uris, + as_dataframe=False, + exclude=[UriType.SERVICE, UriType.PUBLISHER], + require_same_type=True) resource = list(grouped_uris)[0] uris = grouped_uris[resource] db = get_db() for uri in uris: - if resource == 'collections': + if resource == UriType.COLLECTION: if uri not in get_collections(): logger.error('Collection does not exist: %s', uri) raise ValueError('Collection does not exists') @@ -56,15 +57,15 @@ def delete(uris): logger.info('deleting all data under path: %s' % path) shutil.rmtree(path) - if resource == 'datasets': + if resource == UriType.DATASET: with db_session: dataset = db.Dataset[uri] - if dataset.source == 'derived': + if dataset.source == DatasetSource.DERIVED: catalog_entry_datasets = select_datasets(lambda d: d.catalog_entry == dataset.catalog_entry) if len(catalog_entry_datasets) == 1: - _, _, catalog_id = util.parse_service_uri(dataset.catalog_entry) + _, _, catalog_id = parse_service_uri(dataset.catalog_entry) db.QuestCatalog[catalog_id].delete() try: @@ -83,10 +84,10 @@ def move(uris, destination_collection, as_dataframe=None, expand=None): if not uris: return {} - grouped_uris = util.classify_uris(uris, - as_dataframe=False, - exclude=['services', 'publishers', 'collections'], - require_same_type=True) + grouped_uris = classify_uris(uris, + as_dataframe=False, + exclude=[UriType.SERVICE, UriType.PUBLISHER, UriType.COLLECTION], + require_same_type=True) resource = list(grouped_uris)[0] uris = grouped_uris[resource] project_path = _get_project_dir() @@ -95,7 +96,7 @@ def move(uris, destination_collection, as_dataframe=None, expand=None): new_datasets = [] for uri in uris: - if resource == 'datasets': + if resource == UriType.DATASET: dataset_metadata = get_metadata(uri)[uri] collection_path = os.path.join(project_path, dataset_metadata['collection']) _move_dataset(dataset_metadata, collection_path, destination_collection_path) @@ -117,10 +118,10 @@ def copy(uris, destination_collection, as_dataframe=None, expand=None): if not uris: return {} - grouped_uris = util.classify_uris(uris, - as_dataframe=False, - exclude=['services', 'publishers', 'collections'], - require_same_type=True) + grouped_uris = classify_uris(uris, + as_dataframe=False, + exclude=[UriType.SERVICE, UriType.PUBLISHER, UriType.COLLECTION], + require_same_type=True) resource = list(grouped_uris)[0] uris = grouped_uris[resource] project_path = _get_project_dir() @@ -129,7 +130,7 @@ def copy(uris, destination_collection, as_dataframe=None, expand=None): new_datasets = [] for uri in uris: - if resource == 'datasets': + if resource == UriType.DATASET: dataset_metadata = get_metadata(uri)[uri] collection_path = os.path.join(project_path, dataset_metadata['collection']) @@ -146,7 +147,7 @@ def copy(uris, destination_collection, as_dataframe=None, expand=None): def _copy_dataset(dataset_metadata, collection_path, destination_collection_path): - new_name = util.uuid('dataset') + new_name = uuid('dataset') db = get_db() with db_session: db_metadata = db.Dataset[dataset_metadata['name']].to_dict() diff --git a/quest/api/metadata.py b/quest/api/metadata.py index be9aa70f..f1b165a6 100644 --- a/quest/api/metadata.py +++ b/quest/api/metadata.py @@ -1,13 +1,8 @@ -"""API functions related to metadata. - -get/update metadata for projects/collections/datasets. -""" - import pandas as pd -from .. import util -from .. import plugins from ..static import UriType +from ..plugins import load_providers +from ..util import classify_uris, construct_service_uri, parse_service_uri from ..database import get_db, db_session, select_collections, select_datasets @@ -25,7 +20,7 @@ def get_metadata(uris, as_dataframe=False): metadata at each uri keyed on uris """ # group uris by type - grouped_uris = util.classify_uris(uris) + grouped_uris = classify_uris(uris) # handle case when no uris are passed in if not any(grouped_uris): metadata = pd.DataFrame() @@ -36,17 +31,17 @@ def get_metadata(uris, as_dataframe=False): metadata = [] # get metadata for service type uris - if 'services' in grouped_uris.groups.keys(): - svc_df = grouped_uris.get_group('services') - svc_df[['provider', 'service', 'catalog_id']] = svc_df['uri'].apply(util.parse_service_uri).apply(pd.Series) + if UriType.SERVICE in grouped_uris.groups.keys(): + svc_df = grouped_uris.get_group(UriType.SERVICE) + svc_df[['provider', 'service', 'catalog_id']] = svc_df['uri'].apply(parse_service_uri).apply(pd.Series) for (provider, service), grp in svc_df.groupby(['provider', 'service']): - provider_plugin = plugins.load_providers()[provider] + provider_plugin = load_providers()[provider] if not grp.query('catalog_id != catalog_id').empty: service_metadata = provider_plugin.get_services()[service] - index = util.construct_service_uri(provider, service) + index = construct_service_uri(provider, service) metadata.append(pd.DataFrame(service_metadata, index=[index])) selected_catalog_entries = grp.query('catalog_id == catalog_id').uri.tolist() @@ -55,27 +50,27 @@ def get_metadata(uris, as_dataframe=False): catalog_entries = catalog_entries.loc[selected_catalog_entries] metadata.append(catalog_entries) - if 'publishers' in grouped_uris.groups.keys(): - svc_df = grouped_uris.get_group('publishers') - svc_df[['provider', 'publish', 'catalog_id']] = svc_df['uri'].apply(util.parse_service_uri).apply(pd.Series) + if UriType.PUBLISHER in grouped_uris.groups.keys(): + svc_df = grouped_uris.get_group(UriType.PUBLISHER) + svc_df[['provider', 'publish', 'catalog_id']] = svc_df['uri'].apply(parse_service_uri).apply(pd.Series) for (provider, publisher), grp in svc_df.groupby(['provider', 'publish']): - provider_plugin = plugins.load_providers()[provider] + provider_plugin = load_providers()[provider] publisher_metadata = provider_plugin.get_publishers()[publisher] - index = util.construct_service_uri(provider, publisher) + index = construct_service_uri(provider, publisher) metadata.append(pd.DataFrame(publisher_metadata, index=[index])) - if 'collections' in grouped_uris.groups.keys(): + if UriType.COLLECTION in grouped_uris.groups.keys(): # get metadata for collections - tmp_df = grouped_uris.get_group('collections') + tmp_df = grouped_uris.get_group(UriType.COLLECTION) collections = select_collections(lambda c: c.name in tmp_df['uri'].tolist()) collections = pd.DataFrame(collections) collections.set_index('name', inplace=True, drop=False) metadata.append(collections) - if 'datasets' in grouped_uris.groups.keys(): - tmp_df = grouped_uris.get_group('datasets') + if UriType.DATASET in grouped_uris.groups.keys(): + tmp_df = grouped_uris.get_group(UriType.DATASET) datasets = select_datasets(lambda c: c.name in tmp_df['uri'].tolist()) datasets = pd.DataFrame(datasets) datasets.set_index('name', inplace=True, drop=False) @@ -116,10 +111,7 @@ def update_metadata(uris, display_name=None, description=None, } # group uris by type - grouped_uris = util.classify_uris(uris, - as_dataframe=True, - exclude=[UriType.PUBLISHER], - require_same_type=True) + grouped_uris = classify_uris(uris, as_dataframe=True, exclude=[UriType.PUBLISHER], require_same_type=True) resource = list(grouped_uris.groups.keys())[0] uris = grouped_uris.get_group(resource) get_db_entity = get_db_entity_funcs[resource] @@ -153,9 +145,7 @@ def update_metadata(uris, display_name=None, description=None, metadata = [metadata] quest_metadata = [quest_metadata] - for uri, name, desc, meta, quest_meta in zip(uris, display_name, - description, metadata, - quest_metadata): + for uri, name, desc, meta, quest_meta in zip(uris, display_name, description, metadata, quest_metadata): if quest_meta is None: quest_meta = {} diff --git a/quest/api/parameters.py b/quest/api/parameters.py index d60aaf91..716ae793 100644 --- a/quest/api/parameters.py +++ b/quest/api/parameters.py @@ -1,8 +1,10 @@ -"""API functions related to Parameters.""" +import os + import pandas as pd + from .providers import get_services -import os -from .. import util +from ..util import parse_service_uri, get_cache_dir +from ..plugins import load_providers def get_mapped_parameters(): @@ -35,7 +37,7 @@ def get_parameters(service_uri, update_cache=False): """ - provider, service, catalog_id = util.parse_service_uri(service_uri) + provider, service, catalog_id = parse_service_uri(service_uri) parameters = _read_cached_parameters(provider, service, update_cache=update_cache) if isinstance(parameters, pd.DataFrame) and catalog_id: @@ -62,7 +64,7 @@ def delete_parameter(): def _read_cached_parameters(provider, service, update_cache=False): """read cached parameters.""" - cache_file = os.path.join(util.get_cache_dir(), provider, service + '_parameters.h5') + cache_file = os.path.join(get_cache_dir(), provider, service + '_parameters.h5') if update_cache: return _get_parameters(provider, service, cache_file) @@ -75,7 +77,7 @@ def _read_cached_parameters(provider, service, update_cache=False): def _get_parameters(provider, service, cache_file): - driver = util.load_providers()[provider] + driver = load_providers()[provider] parameters = driver.get_parameters(service) os.makedirs(os.path.split(cache_file)[0], exist_ok=True) if isinstance(parameters, pd.DataFrame): diff --git a/quest/api/projects.py b/quest/api/projects.py index e3ef82d1..5f2493cd 100644 --- a/quest/api/projects.py +++ b/quest/api/projects.py @@ -1,10 +1,10 @@ -"""API functions related to Projects.""" import os -import pandas as pd import shutil -from ..util.log import logger -from .. import util -from quest.database.database import db_session, get_db, init_db + +import pandas as pd + +from ..util import logger, get_projects_dir, read_yaml, write_yaml +from ..database.database import db_session, get_db, init_db PROJECT_DB_FILE = 'metadata.db' @@ -115,7 +115,7 @@ def delete_project(name): folder = projects[name]['folder'] if not os.path.isabs(folder): - path = os.path.join(util.get_projects_dir(), folder) + path = os.path.join(get_projects_dir(), folder) else: path = folder if os.path.exists(path): @@ -134,7 +134,7 @@ def get_active_project(): """ path = _get_projects_index_file() - contents = util.read_yaml(path) + contents = read_yaml(path) default_project = contents.get('active_project') if default_project is None: projects = contents.get('projects') or _create_default_project() @@ -143,7 +143,7 @@ def get_active_project(): 'active_project': default_project, 'projects': projects }) - util.write_yaml(path, contents) + write_yaml(path, contents) return default_project @@ -169,7 +169,7 @@ def get_projects(expand=False, as_dataframe=False): for name, project in _load_projects().items(): path = project['folder'] if not os.path.isabs(path): - path = os.path.join(util.get_projects_dir(), path) + path = os.path.join(get_projects_dir(), path) data = _load_project(name) data.update({ @@ -242,11 +242,11 @@ def set_active_project(name): """ path = _get_projects_index_file() - contents = util.read_yaml(path) + contents = read_yaml(path) if name not in contents['projects'].keys(): raise ValueError('Project %s does not exist' % name) contents.update({'active_project': name}) - util.write_yaml(path, contents) + write_yaml(path, contents) get_db(active_db(), reconnect=True) # change active database return name @@ -268,7 +268,7 @@ def _new_project(name, display_name=None, description=None, metadata=None, folde metadata = {} if not os.path.isabs(folder): - path = os.path.join(util.get_projects_dir(), folder) + path = os.path.join(get_projects_dir(), folder) else: path = folder @@ -307,7 +307,7 @@ def _load_project(name): def _load_projects(): """load list of collections.""" path = _get_projects_index_file() - projects = util.read_yaml(path).get('projects') + projects = read_yaml(path).get('projects') if not projects: projects = _create_default_project() @@ -317,9 +317,9 @@ def _load_projects(): def _write_projects(projects): """write list of collections to file.""" path = _get_projects_index_file() - contents = util.read_yaml(path) + contents = read_yaml(path) contents.update({'projects': projects}) - util.write_yaml(path, contents) + write_yaml(path, contents) def _get_project_dir(): @@ -333,10 +333,10 @@ def _get_project_db(name): path = projects[name]['folder'] if not os.path.isabs(path): - path = os.path.join(util.get_projects_dir(), path) + path = os.path.join(get_projects_dir(), path) return os.path.join(path, PROJECT_DB_FILE) def _get_projects_index_file(): - return os.path.join(util.get_projects_dir(), PROJECT_INDEX_FILE) + return os.path.join(get_projects_dir(), PROJECT_INDEX_FILE) diff --git a/quest/api/providers.py b/quest/api/providers.py index 08aac48e..4c8a8a2c 100644 --- a/quest/api/providers.py +++ b/quest/api/providers.py @@ -1,12 +1,9 @@ -"""API functions related to Services. +import os +import requests -Providers are inferred by aggregating information from service plugins. -""" -from ..util import save_settings, get_settings, update_settings, parse_service_uri -from quest.database.database import get_db, db_session from ..plugins import load_providers -import requests -import os +from ..database.database import get_db, db_session +from ..util import save_settings, get_settings, update_settings, parse_service_uri def get_providers(expand=None, update_cache=False): diff --git a/quest/api/tasks.py b/quest/api/tasks.py index 9bcbeccc..70215c04 100644 --- a/quest/api/tasks.py +++ b/quest/api/tasks.py @@ -1,12 +1,14 @@ -from concurrent.futures import CancelledError -from functools import wraps -from distributed import Client, LocalCluster +import sys import psutil + import pandas as pd from tornado import gen -import sys -from ..util import listify -from ..util.log import logger +from functools import wraps +from distributed import Client, LocalCluster +from concurrent.futures import CancelledError + +from ..static import DatasetStatus +from ..util import listify, logger _cluster = None tasks = {} @@ -44,7 +46,7 @@ def wrapper(*args, **kwargs): 'fn': f.__name__, 'args': args, 'kwargs': kwargs, - 'status': 'pending', + 'status': DatasetStatus.PENDING, 'result': None, } return future.key @@ -60,7 +62,7 @@ def get_pending_tasks(**kwargs): (filters={}, expand=None, as_dataframe=None, with_future=None) """ - filters = {'status': 'pending'} + filters = {'status': DatasetStatus.PENDING} if 'filters' in kwargs: kwargs['filters'].update(filters) else: @@ -169,9 +171,9 @@ def remove_tasks(task_ids=None, status=None): else: status = ['cancelled', 'finished', 'lost', 'error'] - if 'pending' in status: + if DatasetStatus.PENDING in status: logger.error('cannot remove pending tasks, please cancel them first') - status.remove('pending') + status.remove(DatasetStatus.PENDING) task_list = get_tasks(filters={'status': status, 'task_ids': task_ids}) diff --git a/quest/api/tools.py b/quest/api/tools.py index ebae2bbe..55d0d26d 100644 --- a/quest/api/tools.py +++ b/quest/api/tools.py @@ -1,14 +1,11 @@ -"""API functions related to data Tools. - -This will eventually hold filter related functionality -""" import param -from .. import util -from ..plugins.plugins import load_plugins from .datasets import open_dataset from .metadata import get_metadata from .tasks import add_async +from ..util import to_geojson +from ..static import UriType, PluginType +from ..plugins.plugins import load_plugins def get_tools(filters=None, expand=False, **kwargs): @@ -36,7 +33,7 @@ def get_tools(filters=None, expand=False, **kwargs): all available tools """ - avail = [dict(name=k, **v.metadata) for k, v in load_plugins('tool').items()] + avail = [dict(name=k, **v.metadata) for k, v in load_plugins(PluginType.TOOL).items()] if filters is not None: for k, v in filters.items(): @@ -94,7 +91,7 @@ def run_tool(name, options=None, as_dataframe=None, expand=None, as_open_dataset options = options or dict() options.update(kwargs) - plugin = load_plugins('tool', name)[name] + plugin = load_plugins(PluginType.TOOL, name)[name] result = plugin.run_tool(**options) new_datasets = result.get('datasets', []) @@ -106,7 +103,7 @@ def run_tool(name, options=None, as_dataframe=None, expand=None, as_open_dataset if expand: new_datasets = list(new_datasets.to_dict(orient='index').values()) - new_catalog_entries = util.to_geojson(new_catalog_entries)['catalog_entries'] + new_catalog_entries = to_geojson(new_catalog_entries)['catalog_entries'] result.update({'datasets': new_datasets, 'catalog_entries': new_catalog_entries}) @@ -130,5 +127,5 @@ def get_tool_options(name, fmt='json', **kwargs): tool options (json scheme): tool options that can be applied when calling quest.api.run_filter """ - plugin = load_plugins('tool', name)[name] + plugin = load_plugins(PluginType.TOOL, name)[name] return plugin.get_tool_options(fmt, **kwargs) diff --git a/quest/api/version.py b/quest/api/version.py index 1db5cbc0..5c7ef057 100644 --- a/quest/api/version.py +++ b/quest/api/version.py @@ -1,4 +1,3 @@ -"""API functions related to versions.""" import quest diff --git a/quest/api/workflows.py b/quest/api/workflows.py index 1800bc76..d1daea86 100644 --- a/quest/api/workflows.py +++ b/quest/api/workflows.py @@ -5,8 +5,8 @@ from .catalog import search_catalog, add_datasets from .tools import run_tool from ..database import get_db, db_session -from ..static import DatasetStatus from ..util import logger as log +from ..static import DatasetStatus, UriType def get_data( diff --git a/quest/database/database.py b/quest/database/database.py index 54284dd9..9dae6b48 100644 --- a/quest/database/database.py +++ b/quest/database/database.py @@ -1,4 +1,5 @@ from datetime import datetime + from pony import orm from pony.orm import db_session import shapely.wkt diff --git a/quest/plugins/base/publish_base.py b/quest/plugins/base/publish_base.py index a19ca1fb..d3e5b935 100644 --- a/quest/plugins/base/publish_base.py +++ b/quest/plugins/base/publish_base.py @@ -1,6 +1,7 @@ -from quest.util.param_util import format_json_options import param +from ...util.param_util import format_json_options + class PublishBase(param.Parameterized): publisher_name = None diff --git a/quest/plugins/base/service_base.py b/quest/plugins/base/service_base.py index b8bfaf3d..287afe1b 100644 --- a/quest/plugins/base/service_base.py +++ b/quest/plugins/base/service_base.py @@ -2,13 +2,13 @@ import os import pickle -import geopandas as gpd -import pandas as pd -import param import ulmo +import param +import pandas as pd +import geopandas as gpd from shapely.geometry import box, Point -from ... import util +from quest import util reserved_catalog_entry_fields = [ diff --git a/quest/plugins/base/tool_base.py b/quest/plugins/base/tool_base.py index c0035149..09080418 100644 --- a/quest/plugins/base/tool_base.py +++ b/quest/plugins/base/tool_base.py @@ -1,10 +1,10 @@ -import abc import os +import abc import param -from ...static import DatasetStatus, DatasetSource from ...util import listify, format_json_options, uuid +from ...static import DatasetStatus, DatasetSource, UriType class ToolBase(param.ParameterizedFunction): diff --git a/quest/plugins/plugins.py b/quest/plugins/plugins.py index 87c7d718..04b067b6 100644 --- a/quest/plugins/plugins.py +++ b/quest/plugins/plugins.py @@ -1,33 +1,37 @@ -from .base import ProviderBase, IoBase, ToolBase -from ..util import listify, get_settings import importlib import inspect import pkgutil import logging + +from .base import ProviderBase, IoBase, ToolBase +from ..static import PluginType +from ..util import listify, get_settings + + logger = logging.getLogger('quest') plugin_instances = { - 'provider': None, - 'io': None, - 'tool': None, + PluginType.PROVIDER: None, + PluginType.IO: None, + PluginType.TOOL: None, } plugin_namespaces = { - 'provider': 'quest_provider_plugins', - 'io': 'quest_io_plugins', - 'tool': 'quest_tool_plugins', + PluginType.PROVIDER: 'quest_provider_plugins', + PluginType.IO: 'quest_io_plugins', + PluginType.TOOL: 'quest_tool_plugins', } plugin_base_classes = { - 'provider': ProviderBase, - 'io': IoBase, - 'tool': ToolBase, + PluginType.PROVIDER: ProviderBase, + PluginType.IO: IoBase, + PluginType.TOOL: ToolBase, } plugin_instantiate_funcs = { - 'provider': lambda x: x(), - 'io': lambda x: x(), - 'tool': lambda x: x.instance(), + PluginType.PROVIDER: lambda x: x(), + PluginType.IO: lambda x: x(), + PluginType.TOOL: lambda x: x.instance(), } @@ -109,8 +113,8 @@ def load_providers(update_cache=False): settings = get_settings() - if update_cache or plugin_instances['provider'] is None: - providers = load_plugins('provider', update_cache=True) + if update_cache or plugin_instances[PluginType.PROVIDER] is None: + providers = load_plugins(PluginType.PROVIDER, update_cache=True) if len(settings.get('USER_SERVICES', [])) > 0: from quest.plugins import user_provider for uri in settings.get('USER_SERVICES', []): @@ -122,8 +126,8 @@ def load_providers(update_cache=False): 'due to the following exception: \n\t{} {}.' .format('user', uri, e.__class__.__name__, str(e))) - plugin_instances['provider'] = providers + plugin_instances[PluginType.PROVIDER] = providers else: - providers = plugin_instances['provider'] + providers = plugin_instances[PluginType.PROVIDER] return providers diff --git a/quest/plugins/user_provider.py b/quest/plugins/user_provider.py index 9276faf6..b26a2671 100644 --- a/quest/plugins/user_provider.py +++ b/quest/plugins/user_provider.py @@ -1,15 +1,18 @@ -from geojson import Feature, FeatureCollection, Polygon -from quest.plugins.base import ProviderBase, ServiceBase -from io import StringIO -from quest import util -import pandas as pd -import requests -import warnings -import geojson +import os import shutil -import param + import yaml -import os +import param +import geojson +import requests +import warnings +import pandas as pd +from io import StringIO +from geojson import Feature, FeatureCollection + +from ..static import UriType +from ..plugins.base import ProviderBase, ServiceBase +from ..util import listify, to_geodataframe, bbox2poly, is_remote_uri def get_user_service_base(): @@ -19,7 +22,7 @@ class UserServiceBase(ServiceBase): @classmethod def instance(cls, service_name, service_data, provider, uri, is_remote): - parameters = util.listify(service_data['metadata'].pop('parameters')) + parameters = listify(service_data['metadata'].pop('parameters')) if len(parameters) > 1: cls.params()['parameter'].objects = sorted(parameters) @@ -34,7 +37,7 @@ def instance(cls, service_name, service_data, provider, uri, is_remote): self._parameter_map = {p: p for p in parameters} for k, v in service_data['metadata'].items(): setattr(self, k, v) - self.service_folder = util.listify(service_data['service_folder']) + self.service_folder = listify(service_data['service_folder']) if len(self.service_folder) > 1: raise ValueError() # Now only supporting one service folder else: @@ -52,9 +55,10 @@ def download(self, catalog_id, file_path, dataset, **kwargs): fnames = self.datasets_mapping if isinstance(self.datasets_mapping, dict): fnames = self.dataset_mapping[self.parameter] - fnames = [f.replace('', catalog_id) for f in util.listify(fnames)] + fnames = [f.replace('', catalog_id) for f in listify(fnames)] else: - fnames = self.catalog_entries.loc[catalog_id]['_download_url'] # TODO where does self.catalog_entries get initialized? + fnames = self.catalog_entries.loc[catalog_id]['_download_url'] + # TODO where does self.catalog_entries get initialized? final_path = [] for src, file_name in zip(self._get_paths(fnames), fnames): @@ -105,11 +109,11 @@ def search_catalog(self, **kwargs): all_catalog_entries = [] - for p in util.listify(paths): + for p in listify(paths): with uri_open(p, self.is_remote) as f: if fmt.lower() == 'geojson': catalog_entries = geojson.load(f) - catalog_entries = util.to_geodataframe(catalog_entries) + catalog_entries = to_geodataframe(catalog_entries) if fmt.lower() == 'mbr': # TODO creating FeatureCollection not needed anymore @@ -120,9 +124,12 @@ def search_catalog(self, **kwargs): for line in f: catalog_id, x1, y1, x2, y2 = line.split() properties = {} - polys.append(Feature(geometry=util.bbox2poly(x1, y1, x2, y2, as_geojson=True), properties=properties, id=catalog_id)) + polys.append(Feature(geometry=bbox2poly(x1, y1, x2, y2, + as_geojson=True), + properties=properties, + id=catalog_id)) catalog_entries = FeatureCollection(polys) - catalog_entries = util.to_geodataframe(catalog_entries) + catalog_entries = to_geodataframe(catalog_entries) if fmt.lower() == 'mbr-csv': # TODO merge this with the above, @@ -133,16 +140,20 @@ def search_catalog(self, **kwargs): catalog_id, y1, x1, y2, x2 = line.split(',') catalog_id = catalog_id.split('.')[0] properties = {} - polys.append(Feature(geometry=util.bbox2poly(x1, y1, x2, y2, as_geojson=True), properties=properties, id=catalog_id)) + polys.append(Feature(geometry=bbox2poly(x1, y1, x2, y2, + as_geojson=True), + properties=properties, + id=catalog_id)) catalog_entries = FeatureCollection(polys) - catalog_entries = util.to_geodataframe(catalog_entries) + catalog_entries = to_geodataframe(catalog_entries) if fmt.lower() == 'isep-json': # uses exported json file from ISEP DataBase # assuming ISEP if a geotypical service for now. catalog_entries = pd.read_json(p) catalog_entries.rename(columns={'_id': 'service_id'}, inplace=True) - catalog_entries['download_url'] = catalog_entries['files'].apply(lambda x: os.path.join(x[0].get('file location'), x[0].get('file name'))) + catalog_entries['download_url'] = catalog_entries['files'].apply( + lambda x: os.path.join(x[0].get('file location'), x[0].get('file name'))) # remove leading slash from file path catalog_entries['download_url'] = catalog_entries['download_url'].str.lstrip('/') catalog_entries['parameters'] = 'met' @@ -160,7 +171,7 @@ def search_catalog(self, **kwargs): def _get_paths(self, filenames): folder = self.service_folder paths = list() - for filename in util.listify(filenames): + for filename in listify(filenames): if self.uri.startswith('http'): paths.append(self.uri.rstrip('/') + '/{}/{}'.format(folder, filename)) else: @@ -180,7 +191,7 @@ class UserProvider(ProviderBase): def __init__(self, uri, name=None, use_cache=True, update_frequency='M'): super(UserProvider, self).__init__(name=name, use_cache=use_cache, update_frequency=update_frequency) self.uri = uri - self.is_remote = util.is_remote_uri(uri) + self.is_remote = is_remote_uri(uri) self._register() @property diff --git a/quest/static/__init__.py b/quest/static/__init__.py index bf3f5517..8220e3ca 100644 --- a/quest/static/__init__.py +++ b/quest/static/__init__.py @@ -42,3 +42,9 @@ class DatasetSource: DERIVED = 'derived' WEB_SERVICE = 'download' USER = 'user-created' + + +class PluginType: + IO = 'io' + TOOL = 'tool' + PROVIDER = 'provider' diff --git a/quest/tools/__init__.py b/quest/tools/__init__.py index ab6bbadd..c2381c76 100644 --- a/quest/tools/__init__.py +++ b/quest/tools/__init__.py @@ -1,9 +1,10 @@ import sys +from ..static import PluginType from ..plugins import load_plugins -tools = load_plugins('tool') +tools = load_plugins(PluginType.TOOL) def codify(name): diff --git a/quest/util/__init__.py b/quest/util/__init__.py index 2ef99c08..ca543a80 100644 --- a/quest/util/__init__.py +++ b/quest/util/__init__.py @@ -1,4 +1,3 @@ -# flake8: noqa from .misc import * from .io import read_yaml, write_yaml from .config import get_settings, save_settings, update_settings, update_settings_from_file diff --git a/quest/util/config.py b/quest/util/config.py index 0fe68560..7614da48 100644 --- a/quest/util/config.py +++ b/quest/util/config.py @@ -1,12 +1,10 @@ -"""Module wide settings. - -""" -import logging -import yaml import os +import yaml +import logging from ..database import get_db + log = logging.getLogger(__name__) settings = {} diff --git a/quest/util/io.py b/quest/util/io.py index 3eb80e92..963e512f 100644 --- a/quest/util/io.py +++ b/quest/util/io.py @@ -1,4 +1,3 @@ -"""io utilities.""" import os import yaml diff --git a/quest/util/misc.py b/quest/util/misc.py index 0a1df3d6..6d657cc1 100644 --- a/quest/util/misc.py +++ b/quest/util/misc.py @@ -26,8 +26,9 @@ def _abs_path(path, mkdir=True): path (string): A string that is a filename. mkdir (bool): A boolean if the user wants to create the directory. Returns: - A string of an absolute path with a file from somwhere with in the Quest directory. + A string of an absolute path with a file from somewhere with in the Quest directory. """ + if not os.path.isabs(path): path = os.path.join(get_quest_dir(), path) @@ -207,6 +208,7 @@ def get_cache_dir(service=None): """ settings = get_settings() path = _abs_path(settings['CACHE_DIR']) + if service is not None: path = os.path.join(path, service) diff --git a/quest_io_plugins/raster_gdal.py b/quest_io_plugins/raster_gdal.py index 73caa8f0..eabb6a15 100644 --- a/quest_io_plugins/raster_gdal.py +++ b/quest_io_plugins/raster_gdal.py @@ -1,13 +1,12 @@ -"""io plugin for timeseries datasets.""" - -from quest.plugins import IoBase -from quest.util import convert_nodata_to_nans import os import subprocess + import rasterio import xarray as xr -import numpy as np +from quest.plugins import IoBase +from quest.static import DataType +from quest.util import convert_nodata_to_nans class RasterGdal(IoBase): diff --git a/quest_io_plugins/timeseries_hdf5.py b/quest_io_plugins/timeseries_hdf5.py index bf3918a8..3e92f0e0 100644 --- a/quest_io_plugins/timeseries_hdf5.py +++ b/quest_io_plugins/timeseries_hdf5.py @@ -1,7 +1,6 @@ -"""io plugin for timeseries datasets.""" - import json +from quest.static import DataType from quest_io_plugins.xyHdf5 import XYHdf5 diff --git a/quest_io_plugins/xyHdf5.py b/quest_io_plugins/xyHdf5.py index b6de770f..6c7a17e1 100644 --- a/quest_io_plugins/xyHdf5.py +++ b/quest_io_plugins/xyHdf5.py @@ -1,12 +1,12 @@ -import json import os +import json import pandas as pd import matplotlib.pyplot as plt from quest.plugins import IoBase -from quest.util import setattr_on_dataframe from quest.util.log import logger +from quest.util import setattr_on_dataframe class XYHdf5(IoBase): diff --git a/quest_provider_plugins/cuahsi_hs.py b/quest_provider_plugins/cuahsi_hs.py index 1251d5e5..da4d911a 100644 --- a/quest_provider_plugins/cuahsi_hs.py +++ b/quest_provider_plugins/cuahsi_hs.py @@ -1,15 +1,16 @@ -from quest.plugins import ProviderBase, SingleFileServiceBase, PublishBase +import os +import param + +import pandas as pd +from getpass import getpass +from shapely.geometry import Point, box from hs_restclient import HydroShare, HydroShareAuthBasic -from quest.database.database import get_db, db_session +from quest.plugins import ProviderBase, SingleFileServiceBase, PublishBase + from quest.api.metadata import get_metadata from quest.util import param_util, listify, log -from quest.static import DatasetStatus -from shapely.geometry import Point, box -from quest.static import ServiceType -from getpass import getpass -import pandas as pd -import param -import os +from quest.database.database import get_db, db_session +from quest.static import DatasetStatus, ServiceType, GeomType class HSServiceBase(SingleFileServiceBase): @@ -28,7 +29,7 @@ class HSGeoService(HSServiceBase): description = 'HydroShare geo-discrete resources.' service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' + geom_type = GeomType.POINT datatype = 'zip' geographical_areas = ['Worldwide'] bounding_boxes = [ diff --git a/quest_provider_plugins/kitware_girder.py b/quest_provider_plugins/kitware_girder.py index d1df7bed..a1ad1efc 100644 --- a/quest_provider_plugins/kitware_girder.py +++ b/quest_provider_plugins/kitware_girder.py @@ -1,9 +1,10 @@ +import param +import girder_client + +from quest.util import param_util, log +from quest.api.metadata import get_metadata from quest.plugins import ProviderBase, PublishBase from quest.database.database import get_db, db_session -from quest.api.metadata import get_metadata -from quest.util import param_util, log -import girder_client -import param # There is no service base fore the Live Girder Server due to the general layout of how # the folders and files are layed out. It would be super difficult to look through diff --git a/quest_provider_plugins/nasa.py b/quest_provider_plugins/nasa.py index 87685594..1439b333 100644 --- a/quest_provider_plugins/nasa.py +++ b/quest_provider_plugins/nasa.py @@ -1,8 +1,10 @@ -from quest.database.database import get_db, db_session -from quest.util import log -from getpass import getpass -import pandas as pd import requests +import pandas as pd +from getpass import getpass + +from quest.util import log +from quest.database.database import get_db, db_session +from quest.static import ServiceType, GeomType, DataType collections_url = 'https://cmr.earthdata.nasa.gov/search/collections.json?short_name=%s' @@ -12,10 +14,10 @@ class NasaServiceBase(SingleFileServiceBase): - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = False - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Worldwide'] _parameter_map = { 'elevation': 'elevation' diff --git a/quest_provider_plugins/noaa_coastwatch.py b/quest_provider_plugins/noaa_coastwatch.py index f56db770..4e805685 100644 --- a/quest_provider_plugins/noaa_coastwatch.py +++ b/quest_provider_plugins/noaa_coastwatch.py @@ -1,14 +1,14 @@ -"""QUEST wrapper for NCDC GHCN and GSOD Services.""" import os -import pandas as pd import param -from urllib.parse import quote, urlencode +import pandas as pd from urllib.error import HTTPError import json +from urllib.parse import quote, urlencode -from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins from quest.util.log import logger +from quest.static import ServiceType, GeomType, DataType +from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins class NoaaServiceBase(TimePeriodServiceBase): @@ -91,9 +91,9 @@ def download(self, catalog_id, file_path, dataset, **kwargs): metadata = { 'file_path': file_path, 'file_format': 'timeseries-hdf5', + 'datatype': DataType.TIMESERIES, 'intake_plugin': 'quest_timeseries_hdf5', 'intake_args': json.dumps([file_path]), - 'datatype': 'timeseries', 'parameter': p.parameter, 'unit': units[self.parameter_code], 'service_id': 'svc://noaa:{}/{}'.format(self.service_name, catalog_id) @@ -140,10 +140,10 @@ class NoaaServiceNDBC(NoaaServiceBase): service_name = 'ndbc' display_name = 'NOAA National Data Buoy Center' description = 'NDBC Standard Meteorological Buoy Data' - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Worldwide'] bounding_boxes = [ [-177.75, -27.705, 179.001, 71.758], @@ -198,10 +198,10 @@ class NoaaServiceCoopsMet(NoaaServiceBase): service_name = 'coops-meteorological' display_name = 'NOAA COOPS Met' description = 'Center for Operational Oceanographic Products and Services' - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Worldwide'] bounding_boxes = [ [-180, -90, 180, 90], @@ -280,10 +280,10 @@ class NoaaServiceCoopsWater(NoaaServiceBase): service_name = 'coops-water' display_name = 'NOAA COOPS Water' description = 'Center for Operational Oceanographic Products and Services' - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Worldwide'] bounding_boxes = [ [-180, -90, 180, 90], diff --git a/quest_provider_plugins/noaa_ncdc.py b/quest_provider_plugins/noaa_ncdc.py index e8295e13..6622a57c 100644 --- a/quest_provider_plugins/noaa_ncdc.py +++ b/quest_provider_plugins/noaa_ncdc.py @@ -1,11 +1,13 @@ -from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins -from ulmo.ncdc import ghcn_daily, gsod -import pandas as pd -import param import os import json -# from ulmo.ncdc.ghcn_daily.core import _get_inventory as _get_ghcn_inventory +import param +import pandas as pd +from ulmo.ncdc import ghcn_daily, gsod + +from quest.static import ServiceType, GeomType, DataType +from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins + BASE_PATH = 'ncdc' @@ -93,9 +95,9 @@ def download(self, catalog_id, file_path, dataset, **kwargs): metadata = { 'file_path': file_path, 'file_format': 'timeseries-hdf5', + 'datatype': DataType.TIMESERIES, 'intake_plugin': 'quest_timeseries_hdf5', 'intake_args': json.dumps([file_path]), - 'datatype': 'timeseries', 'parameter': self.parameter, 'unit': self._unit_map[self.parameter], 'service_id': 'svc://ncdc:{}/{}'.format(self.service_name, catalog_id) @@ -113,10 +115,10 @@ class NcdcServiceGhcnDaily(NcdcServiceBase): service_name = 'ghcn-daily' display_name = 'NCDC GHCN Daily' description = 'Daily Meteorologic Data from the Global Historic Climate Network' - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Worldwide'] bounding_boxes = [ [-180, -90, 180, 90], @@ -165,10 +167,10 @@ class NcdcServiceGsod(NcdcServiceBase): service_name = 'gsod' display_name = 'NCDC GSOD' description = 'Daily Meteorologic Data from the Global Summary of the Day' - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Worldwide'] bounding_boxes = [ [-180, -90, 180, 90] diff --git a/quest_provider_plugins/quest_catalog.py b/quest_provider_plugins/quest_catalog.py index 3f2e03ac..db7d9148 100644 --- a/quest_provider_plugins/quest_catalog.py +++ b/quest_provider_plugins/quest_catalog.py @@ -1,8 +1,9 @@ -from quest.plugins import ProviderBase, SingleFileServiceBase -from quest.database.database import select_catalog_entries -from quest.static import ServiceType import pandas as pd +from quest.static import ServiceType, GeomType +from quest.database.database import select_catalog_entries +from quest.plugins import ProviderBase, SingleFileServiceBase + class QuestCatalogService(SingleFileServiceBase): service_name = 'quest' @@ -10,7 +11,7 @@ class QuestCatalogService(SingleFileServiceBase): description = 'Quest Catalog uses a database for derived datasets from Quest.' service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = ['Point', 'Line', 'Polygon'] + geom_type = [GeomType.POINT, GeomType.LINE, GeomType.POLYGON] datatype = None geographical_areas = ['Worldwide'] bounding_boxes = [ diff --git a/quest_provider_plugins/usgs_ned.py b/quest_provider_plugins/usgs_ned.py index 88e616df..2af0c0c5 100644 --- a/quest_provider_plugins/usgs_ned.py +++ b/quest_provider_plugins/usgs_ned.py @@ -1,18 +1,22 @@ -from quest.plugins import ProviderBase, SingleFileServiceBase -from quest import util -from ulmo.usgs import ned import os import json +from ulmo.usgs import ned + +from quest import util +from quest.static import ServiceType, DataType, GeomType +from quest.plugins import ProviderBase, SingleFileServiceBase + + DEFAULT_FILE_PATH = os.path.join('usgs','ned') CACHE_FILE = 'ned_%s_metadata.json' class UsgsNedServiceBase(SingleFileServiceBase): - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = False - geom_type = 'polygon' - datatype = 'raster' + geom_type = GeomType.POLYGON + datatype = DataType.RASTER geographical_areas = ['Alaska', 'USA', 'Hawaii'] bounding_boxes = [[-180, -90, 180, 90]] _parameter_map = { diff --git a/quest_provider_plugins/usgs_nlcd.py b/quest_provider_plugins/usgs_nlcd.py index d90edda8..81a32488 100644 --- a/quest_provider_plugins/usgs_nlcd.py +++ b/quest_provider_plugins/usgs_nlcd.py @@ -1,14 +1,14 @@ -"""providers based on www.sciencebase.gov.""" - -import pandas as pd import requests import json -from quest.plugins import ProviderBase, SingleFileServiceBase +import pandas as pd + from quest import util +from quest.static import ServiceType, DatasetSource +from quest.plugins import ProviderBase, SingleFileServiceBase class UsgsNlcdServiceBase(SingleFileServiceBase): - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = False geom_type = 'polygon' datatype = 'discrete-raster' @@ -92,17 +92,7 @@ class UsgsNlcdProvider(ProviderBase): service_list = [UsgsNlcdService2001, UsgsNlcdService2006, UsgsNlcdService2011] display_name = 'National Land Cover Database' description = 'The National Land Cover Database products are created through a cooperative project conducted by ' \ - 'the Multi-Resolution Land Characteristics (MRLC) Consortium. The MRLC Consortium is a partnership ' \ - 'of federal agencies (www.mrlc.gov), consisting of ' \ - 'the U.S. Geological Survey (USGS), ' \ - 'the National Oceanic and Atmospheric Administration (NOAA), ' \ - 'the U.S. Environmental Protection Agency (EPA), ' \ - 'the U.S. Department of Agriculture -Forest Service (USDA-FS), ' \ - 'the National Park Service (NPS), ' \ - 'the U.S. Fish and Wildlife Service (FWS), ' \ - 'the Bureau of Land Management (BLM), and ' \ - 'the USDA Natural Resources Conservation Service (NRCS).' - organization_name = 'United States Geological Survey' + 'the Multi-Resolution Land Characteristics (MRLC) Consortium.' organization_abbr = 'USGS' name = 'usgs-nlcd' diff --git a/quest_provider_plugins/usgs_nwis.py b/quest_provider_plugins/usgs_nwis.py index c6e6326f..3071e2f6 100644 --- a/quest_provider_plugins/usgs_nwis.py +++ b/quest_provider_plugins/usgs_nwis.py @@ -1,14 +1,16 @@ -"""QUEST wrapper for USGS NWIS Services.""" +import os -from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins -import concurrent.futures -from functools import partial +import param import pandas as pd -import os +import concurrent.futures from ulmo.usgs import nwis +from functools import partial + from quest import util -import param +from quest.static import ServiceType, GeomType, DataType import json +from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins + BASE_PATH = 'usgs-nwis' @@ -72,7 +74,7 @@ def download(self, catalog_id, file_path, dataset, **kwargs): 'intake_plugin': 'quest_timeseries_hdf5', 'intake_args': json.dumps([file_path]), 'file_format': 'timeseries-hdf5', - 'datatype': 'timeseries', + 'datatype': DataType.TIMESERIES, 'parameter': parameter, 'unit': data['variable']['units']['code'], 'service_id': 'svc://usgs-nwis:{}/{}'.format(self.service_name, catalog_id) @@ -108,7 +110,7 @@ def get_parameters(self, catalog_ids=None): chunks = list(_chunks(df.index.tolist())) func = partial(_site_info, service=self.service_name) - with concurrent.futures.ProcessPoolExecutor() as executor: + with concurrent.futures.ProcessPoolExecutor(max_workers=None) as executor: data = executor.map(func, chunks) data = pd.concat(data, ignore_index=True) @@ -148,10 +150,10 @@ class NwisServiceIV(NwisServiceBase): service_name = 'iv' display_name = 'NWIS Instantaneous Values Service' description = 'Retrieve current streamflow and other real-time data for USGS water sites since October 1, 2007' - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Alaska', 'USA', 'Hawaii'] bounding_boxes = [ (-178.19453125, 51.6036621094, -130.0140625, 71.4076660156), @@ -171,10 +173,10 @@ class NwisServiceDV(NwisServiceBase): display_name = 'NWIS Daily Values Service' description = 'Retrieve historical summarized daily data about streams, lakes and wells. Daily data available ' \ 'for USGS water sites include mean, median, maximum, minimum, and/or other derived values.' - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Alaska', 'USA', 'Hawaii'] bounding_boxes = [ (-178.19453125, 51.6036621094, -130.0140625, 71.4076660156), diff --git a/quest_provider_plugins/wmts_imagery.py b/quest_provider_plugins/wmts_imagery.py index 98c39f53..62664fdf 100644 --- a/quest_provider_plugins/wmts_imagery.py +++ b/quest_provider_plugins/wmts_imagery.py @@ -280,6 +280,8 @@ def _download_and_stitch_tiles(url, tile_indices, crop_bbox, zoom_level, max_til raise ValueError("{} tiles were requested, which exceeds the maximum tile limit of {}. " "Either increase the tile limit (max_tiles) or decrease the zoom level." .format(total_number_of_tiles, max_tiles)) + else: + log.info("There are {} tiles to download.".format(total_number_of_tiles)) # calculate full image height and width (count is calculated on the first time in the loop) height = number_of_y_tiles * TILE_SIZE diff --git a/quest_tool_plugins/raster/rst_base.py b/quest_tool_plugins/raster/rst_base.py index f2853368..d2058d75 100644 --- a/quest_tool_plugins/raster/rst_base.py +++ b/quest_tool_plugins/raster/rst_base.py @@ -1,27 +1,25 @@ -"""Functions required run raster filters""" - import json import rasterio -from quest.plugins import ToolBase from quest import util from quest.api import get_metadata, update_metadata - +from quest.plugins import ToolBase +from quest.static import DataType, UriType class RstBase(ToolBase): # metadata attributes group = 'raster' - operates_on_datatype = ['raster'] + operates_on_datatype = [DataType.RASTER] operates_on_geotype = None operates_on_parameters = None - produces_datatype = ['raster'] + produces_datatype = [DataType.RASTER] produces_geotype = None produces_parameters = None dataset = util.param.DatasetSelector(default=None, doc="""Dataset to apply filter to.""", - filters={'datatype': 'raster'}, + filters={'datatype': DataType.RASTER}, ) def _run_tool(self): diff --git a/quest_tool_plugins/raster/rst_merge.py b/quest_tool_plugins/raster/rst_merge.py index 18bc9f65..2608c1e7 100644 --- a/quest_tool_plugins/raster/rst_merge.py +++ b/quest_tool_plugins/raster/rst_merge.py @@ -1,20 +1,22 @@ import json -from quest.plugins import ToolBase -from quest import util -from quest.api import get_metadata, update_metadata +import param import rasterio -import rasterio.merge import rasterio.mask -from shapely.geometry import box -from fiona.crs import from_epsg +import rasterio.merge import geopandas as gpd -import param +from fiona.crs import from_epsg +from shapely.geometry import box + +from quest import util +from quest.plugins import ToolBase +from quest.static import DataType, UriType +from quest.api import get_metadata, update_metadata class RstMerge(ToolBase): _name = 'raster-merge' group = 'Multi-dataset' - operates_on_datatype = ['raster','discrete-raster'] + operates_on_datatype = [DataType.RASTER, 'discrete-raster'] datasets = util.param.DatasetListSelector(default=None, doc="""Dataset to run tool on.""", diff --git a/quest_tool_plugins/raster/rst_reprojection.py b/quest_tool_plugins/raster/rst_reprojection.py index 6f5a527c..0abf820e 100644 --- a/quest_tool_plugins/raster/rst_reprojection.py +++ b/quest_tool_plugins/raster/rst_reprojection.py @@ -1,19 +1,20 @@ -from quest.plugins import ToolBase -from quest import util -from quest.api import get_metadata, update_metadata +import param import rasterio import subprocess -import param +from quest import util +from quest.plugins import ToolBase +from quest.static import DataType, UriType +from quest.api import get_metadata, update_metadata class RstReprojection(ToolBase): _name = 'raster-reprojection' - operates_on_datatype = ['raster', 'discrete-raster'] + operates_on_datatype = [DataType.RASTER, 'discrete-raster'] dataset = util.param.DatasetSelector(default=None, doc="""Dataset to run tool on.""", - filters={'datatype': 'raster'}, + filters={'datatype': DataType.RASTER}, ) new_crs = param.String(default=None, doc="""New coordinate reference system to project to""") diff --git a/quest_tool_plugins/timeseries/timeseries.py b/quest_tool_plugins/timeseries/timeseries.py index 4efdfedb..c2a20770 100644 --- a/quest_tool_plugins/timeseries/timeseries.py +++ b/quest_tool_plugins/timeseries/timeseries.py @@ -1,6 +1,3 @@ -"""Timeseries Tools - -""" import param from quest.util import setattr_on_dataframe, unit_list, unit_registry diff --git a/quest_tool_plugins/timeseries/ts_base.py b/quest_tool_plugins/timeseries/ts_base.py index 18b8d976..0536995b 100644 --- a/quest_tool_plugins/timeseries/ts_base.py +++ b/quest_tool_plugins/timeseries/ts_base.py @@ -1,24 +1,25 @@ import json +from quest import util from quest.plugins import ToolBase from quest.api import get_metadata, update_metadata -from quest import util from quest.plugins import load_plugins +from quest.static import UriType, DataType class TsBase(ToolBase): # metadata attributes group = 'Timeseries' - operates_on_datatype = ['timeseries'] + operates_on_datatype = [DataType.TIMESERIES] operates_on_geotype = None operates_on_parameters = None - produces_datatype = ['timeseries'] + produces_datatype = [DataType.TIMESERIES] produces_geotype = None produces_parameters = None dataset = util.param.DatasetSelector(default=None, doc="""Dataset to apply filter to.""", - filters={'datatype': 'timeseries'}, + filters={'datatype': DataType.TIMESERIES}, ) def _run_tool(self): diff --git a/quest_tool_plugins/timeseries/ts_flow_duration.py b/quest_tool_plugins/timeseries/ts_flow_duration.py index 7af2769f..566026ec 100644 --- a/quest_tool_plugins/timeseries/ts_flow_duration.py +++ b/quest_tool_plugins/timeseries/ts_flow_duration.py @@ -1,10 +1,11 @@ import json +from quest import util from quest.plugins import ToolBase from quest.api import get_metadata, update_metadata -from quest import util -from quest.util import setattr_on_dataframe +from quest.static import DataType, UriType from quest.plugins import load_plugins +from quest.util import setattr_on_dataframe class TsFlowDuration(ToolBase): @@ -13,7 +14,7 @@ class TsFlowDuration(ToolBase): dataset = util.param.DatasetSelector(default=None, doc="""Dataset to apply filter to.""", - filters={'datatype': 'timeseries'}, + filters={'datatype': DataType.TIMESERIES}, ) def _run_tool(self): @@ -65,4 +66,4 @@ def _run_tool(self): output = load_plugins('io', 'xy-hdf5')['xy-hdf5'] output.write(file_path, new_df, new_metadata) - return {'datasets': new_dset, 'catalog_entries': catalog_entry} \ No newline at end of file + return {'datasets': new_dset, 'catalog_entries': catalog_entry} diff --git a/quest_tool_plugins/whitebox/whitebox_utils.py b/quest_tool_plugins/whitebox/whitebox_utils.py index 67a37306..80f8865a 100644 --- a/quest_tool_plugins/whitebox/whitebox_utils.py +++ b/quest_tool_plugins/whitebox/whitebox_utils.py @@ -1,20 +1,22 @@ -import logging import os -import inspect import re import time +import logging +import inspect from functools import wraps -import whitebox_tools import rasterio -import xarray as xr import numpy as np +import xarray as xr import pandas as pd +import whitebox_tools import geopandas as gpd from shapely.geometry import Point, shape +from quest.static import DataType from quest.util import listify, convert_nodata_to_nans + whitebox_log = logging.getLogger('whitebox') whitebox_log.addHandler(logging.NullHandler()) whitebox_log.propagate = True diff --git a/quest_tool_plugins/whitebox/whitebox_watershed.py b/quest_tool_plugins/whitebox/whitebox_watershed.py index 9346892c..cb66558e 100644 --- a/quest_tool_plugins/whitebox/whitebox_watershed.py +++ b/quest_tool_plugins/whitebox/whitebox_watershed.py @@ -1,9 +1,10 @@ -import numpy as np import param import json +import numpy as np -from quest.plugins import ToolBase from quest import util +from quest.plugins import ToolBase +from quest.static import DataType, UriType, GeomType from quest.api import get_metadata, update_metadata, open_dataset from .whitebox_utils import wbt, points_to_shp, raster_to_polygons @@ -14,17 +15,17 @@ class WBTFillDepressions(ToolBase): # metadata attributes group = 'raster' - operates_on_datatype = ['raster'] + operates_on_datatype = [DataType.RASTER] operates_on_geotype = None operates_on_parameters = None - produces_datatype = ['raster'] + produces_datatype = [DataType.RASTER] produces_geotype = None produces_parameters = None dataset = util.param.DatasetSelector( default=None, doc="""Dataset to run tool on.""", - filters={'datatype': 'raster'}, + filters={'datatype': DataType.RASTER}, ) def _run_tool(self): @@ -60,17 +61,17 @@ class WBTExtractStreamsWorkflow(ToolBase): # metadata attributes group = 'raster' - operates_on_datatype = ['raster'] + operates_on_datatype = [DataType.RASTER] operates_on_geotype = None operates_on_parameters = None - produces_datatype = ['raster'] + produces_datatype = [DataType.RASTER] produces_geotype = None produces_parameters = None dataset = util.param.DatasetSelector( default=None, doc="""Dataset to run tool on.""", - filters={'datatype': 'raster'}, + filters={'datatype': DataType.RASTER}, ) stream_threshold = param.Number( @@ -138,29 +139,29 @@ class WBTWatershedDelineationWorkflow(ToolBase): # metadata attributes group = 'raster' - operates_on_datatype = ['raster'] + operates_on_datatype = [DataType.RASTER] operates_on_geotype = None operates_on_parameters = None - produces_datatype = ['raster'] + produces_datatype = [DataType.RASTER] produces_geotype = None produces_parameters = None elevation_dataset = util.param.DatasetSelector( default=None, doc="""Dataset to run tool on.""", - filters={'datatype': 'raster'}, + filters={'datatype': DataType.RASTER}, ) streams_dataset = util.param.DatasetSelector( default=None, doc="""Dataset to run tool on.""", - filters={'datatype': 'raster'}, + filters={'datatype': DataType.RASTER}, ) outlets = util.param.CatalogEntrySelector( default=None, doc="""Point geometry to use for the outlet.""", - filters={'geom_type': 'point'}, + filters={'geom_type': GeomType.POINT}, ) snap_distance = param.Number( diff --git a/test/data.py b/test/data.py index 068f3406..e131341f 100644 --- a/test/data.py +++ b/test/data.py @@ -352,6 +352,6 @@ ('svc://usgs-nlcd:2001/5a1c65a5e4b09fc93dd648f1', None), ('svc://usgs-nlcd:2006/5a1c35b6e4b09fc93dd64011', None), ('svc://usgs-nlcd:2011/5a1c31abe4b09fc93dd6381c', None), - ('svc://usgs-nwis:dv/01010000', {'parameter': 'streamflow:mean:daily', 'start': '2016-01-01', 'end': '2016-01-02'}), ('svc://usgs-nwis:iv/01010000', {'parameter': 'gage_height', 'start': '2016-01-01', 'end': '2016-01-02'}), + ('svc://usgs-nwis:dv/01010000', {'parameter': 'streamflow:mean:daily', 'start': '2016-01-01', 'end': '2016-01-02'}), ] diff --git a/test/test_catalog.py b/test/test_catalog.py index 78a6687c..77b56af5 100644 --- a/test/test_catalog.py +++ b/test/test_catalog.py @@ -1,5 +1,6 @@ import pytest +from quest.static import GeomType from data import SERVICES_CATALOG_COUNT, CACHED_SERVICES ACTIVE_PROJECT = 'project1' @@ -60,12 +61,12 @@ def test_search_catalog_with_query(api): def test_new_catalog_entry(api): - c = api.new_catalog_entry(geom_type='Point', geom_coords=[-94.2, 23.4]) + c = api.new_catalog_entry(geom_type=GeomType.POINT, geom_coords=[-94.2, 23.4]) assert c in api.get_metadata(c) def test_delete_catalog_entry(api): - c = api.new_catalog_entry(geom_type='Point', geom_coords=[-94.2, 23.4]) + c = api.new_catalog_entry(geom_type=GeomType.POINT, geom_coords=[-94.2, 23.4]) d = api.new_dataset(collection='col1', catalog_entry=c, source='derived') api.delete(d) assert d not in api.get_datasets() @@ -74,7 +75,7 @@ def test_delete_catalog_entry(api): def test_delete_derived_dataset(api): - c = api.new_catalog_entry(geom_type='Point', geom_coords=[-94.2, 23.4]) + c = api.new_catalog_entry(geom_type=GeomType.POINT, geom_coords=[-94.2, 23.4]) d = api.add_datasets(collection='col1', catalog_entries=[c, c]) api.delete(d[0]) assert d[0] not in api.get_datasets() diff --git a/test/test_datasets.py b/test/test_datasets.py index 1dbec139..4ad21763 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -80,7 +80,6 @@ def test_new_dataset(api): new_dataset = api.new_dataset(CATALOG_ENTRY, 'col1') datasets = api.get_datasets() try: - # test number of datasets actual = len(datasets) expected = 2 assert actual == expected diff --git a/test/test_providers.py b/test/test_providers.py index 095e5ca9..36669be7 100644 --- a/test/test_providers.py +++ b/test/test_providers.py @@ -1,8 +1,12 @@ -from data import ALL_SERVICES, SERVICE_DOWNLOAD_OPTIONS -from conftest import FILES_DIR -import pytest import os +import pytest + +from conftest import FILES_DIR +from quest.static import DatasetStatus +from data import ALL_SERVICES, SERVICE_DOWNLOAD_OPTIONS + + pytestmark = pytest.mark.usefixtures('reset_projects_dir') @@ -35,4 +39,4 @@ def test_download(api, catalog_entry, options): d = api.add_datasets('test', catalog_entry)[0] api.stage_for_download(d, options=options) result = api.download_datasets(d, raise_on_error=True) - assert result[d] == 'downloaded' + assert result[d[0]] == DatasetStatus.DOWNLOADED diff --git a/test/test_util_misc.py b/test/test_util_misc.py index 61f4e3fa..71b49bea 100644 --- a/test/test_util_misc.py +++ b/test/test_util_misc.py @@ -8,7 +8,9 @@ def test_get_quest_dir(reset_projects_dir): def test_get_cache_data_dir(reset_projects_dir): - assert quest.util.get_cache_dir() == os.path.join(reset_projects_dir['BASE_DIR'], os.path.join('.cache', 'test_cache')) + actual = quest.util.get_cache_dir() + expected = os.path.join(reset_projects_dir['BASE_DIR'], os.path.join('.cache', 'test_cache')) + assert actual == expected folder_obj = tempfile.TemporaryDirectory() folder = folder_obj.name @@ -67,13 +69,13 @@ def test_bbox2poly(): [-160.0, 20.0], [-160.0, -20.0], [-180, -20.0]], - "type": "Polygon"}, + "type": quest.static.GeomType.POLYGON}, {"coordinates": [[160.0, -20.0], [160.0, 20.0], [180, 20.0], [180, -20.0], [160.0, -20.0]], - "type": "Polygon"}], + "type": quest.static.GeomType.POLYGON}], "type": "MultiPolygon"} poly = quest.util.bbox2poly(*bbox, as_shapely=True) @@ -91,7 +93,7 @@ def test_bbox2poly(): [10.0, 10.0], [10.0, -10.0], [-10.0, -10.0]], - 'type': 'Polygon'} + 'type': quest.static.GeomType.POLYGON} bbox = 160, -20, 200, 20 @@ -103,13 +105,13 @@ def test_bbox2poly(): [180, 20.0], [180, -20.0], [160.0, -20.0]], - 'type': 'Polygon'}, + 'type': quest.static.GeomType.POLYGON}, {'coordinates': [[-180, -20.0], [-180, 20.0], [-160.0, 20.0], [-160.0, -20.0], [-180, -20.0]], - 'type': 'Polygon'}], + 'type': quest.static.GeomType.POLYGON}], 'type': 'MultiPolygon'} poly = quest.util.bbox2poly(*bbox) From 4954a99749af365b5a0f76abb0cf63841f4d3af5 Mon Sep 17 00:00:00 2001 From: sdc50 Date: Mon, 26 Nov 2018 08:23:58 -0600 Subject: [PATCH 09/12] make search catalog more flexible (#118) * make search catalog more flexible * Update test/test_catalog.py * disable HydroShare tests --- quest/api/catalog.py | 87 +++++++++++++++++++++++++------------------- test/data.py | 4 +- test/test_catalog.py | 10 +++++ 3 files changed, 62 insertions(+), 39 deletions(-) diff --git a/quest/api/catalog.py b/quest/api/catalog.py index 7b67f41b..55545192 100644 --- a/quest/api/catalog.py +++ b/quest/api/catalog.py @@ -12,7 +12,7 @@ from .. import util from ..plugins import load_providers from ..static import DatasetSource, UriType -from ..database.database import get_db, db_session +from ..database.database import get_db, db_session, select_datasets @add_async @@ -81,80 +81,93 @@ def search_catalog(uris=None, expand=False, as_dataframe=False, as_geojson=False """ uris = list(itertools.chain(util.listify(uris) or [])) - grouped_uris = util.classify_uris(uris, as_dataframe=False, exclude=[UriType.DATASET, UriType.COLLECTION]) + grouped_uris = util.classify_uris(uris, as_dataframe=False, exclude=[UriType.DATASET], + raise_if_empty=False) services = grouped_uris.get(UriType.SERVICE) or [] + collections = grouped_uris.get(UriType.COLLECTION) or [] - all_datasets = [] + catalog_entries = [d['catalog_entry'] for d in select_datasets(lambda c: c.collection.name in collections)] + + all_catalog_entries = list() filters = filters or dict() for name in services: - provider, service, _ = util.parse_service_uri(name) + provider, service, catalog_entry = util.parse_service_uri(name) + if catalog_entry is not None: + catalog_entries.append(name) + continue provider_plugin = load_providers()[provider] - tmp_datasets = provider_plugin.search_catalog(service, update_cache=update_cache, **filters) - all_datasets.append(tmp_datasets) - - all_datasets.append(tmp_datasets) - - # drop duplicates fails when some columns have nested list/tuples like - # _geom_coords. so drop based on index - datasets = pd.concat(all_datasets) - datasets['index'] = datasets.index - datasets = datasets.drop_duplicates(subset='index') - datasets = datasets.set_index('index').sort_index() + tmp_catalog_entries = provider_plugin.search_catalog(service, update_cache=update_cache, **filters) + all_catalog_entries.append(tmp_catalog_entries) + + if catalog_entries: + all_catalog_entries.append(get_metadata(catalog_entries, as_dataframe=True)) + + if all_catalog_entries: + # drop duplicates fails when some columns have nested list/tuples like + # _geom_coords. so drop based on index + catalog_entries = pd.concat(all_catalog_entries) + catalog_entries['index'] = catalog_entries.index + catalog_entries = catalog_entries.drop_duplicates(subset='index') + catalog_entries = catalog_entries.set_index('index').sort_index() + else: + catalog_entries = pd.DataFrame() # apply any specified filters for k, v in filters.items(): - if datasets.empty: + if catalog_entries.empty: break # if dataframe is empty then doesn't try filtering any further else: if k == 'bbox': bbox = util.bbox2poly(*[float(x) for x in util.listify(v)], as_shapely=True) - idx = datasets.intersects(bbox) # http://geopandas.org/reference.html#GeoSeries.intersects - datasets = datasets[idx] + idx = catalog_entries.intersects(bbox) # http://geopandas.org/reference.html#GeoSeries.intersects + catalog_entries = catalog_entries[idx] elif k == 'geom_type': - idx = datasets.geom_type.str.contains(v).fillna(value=False) - datasets = datasets[idx] + idx = catalog_entries.geom_type.str.contains(v).fillna(value=False) + catalog_entries = catalog_entries[idx] elif k == 'parameter': - idx = datasets.parameters.str.contains(v) - datasets = datasets[idx] + idx = catalog_entries.parameters.str.contains(v) + catalog_entries = catalog_entries[idx] elif k == 'display_name': - idx = datasets.display_name.str.contains(v) - datasets = datasets[idx] + idx = catalog_entries.display_name.str.contains(v) + catalog_entries = catalog_entries[idx] elif k == 'description': - idx = datasets.display_name.str.contains(v) - datasets = datasets[idx] + idx = catalog_entries.display_name.str.contains(v) + catalog_entries = catalog_entries[idx] elif k == 'search_terms': - idx = np.column_stack([datasets[col].apply(str).str.contains(search_term, na=False) - for col, search_term in itertools.product(datasets.columns, v)]).any(axis=1) - datasets = datasets[idx] + idx = np.column_stack([ + catalog_entries[col].apply(str).str.contains(search_term, na=False) + for col, search_term in itertools.product(catalog_entries.columns, v) + ]).any(axis=1) + catalog_entries = catalog_entries[idx] else: - idx = datasets.metadata.map(lambda x: _multi_index(x, k) == v) - datasets = datasets[idx] + idx = catalog_entries.metadata.map(lambda x: _multi_index(x, k) == v) + catalog_entries = catalog_entries[idx] if queries is not None: for query in queries: - datasets = datasets.query(query) + catalog_entries = catalog_entries.query(query) if not (expand or as_dataframe or as_geojson): - return datasets.index.astype('unicode').tolist() + return catalog_entries.index.astype('unicode').tolist() if as_geojson: - if datasets.empty: + if catalog_entries.empty: return geojson.FeatureCollection([]) else: - return json.loads(datasets.to_json(default=util.to_json_default_handler)) + return json.loads(catalog_entries.to_json(default=util.to_json_default_handler)) if not as_dataframe: - datasets = datasets.to_dict(orient='index') + catalog_entries = catalog_entries.to_dict(orient='index') - return datasets + return catalog_entries def _multi_index(d, index): diff --git a/test/data.py b/test/data.py index e131341f..e5bf9a5f 100644 --- a/test/data.py +++ b/test/data.py @@ -307,8 +307,8 @@ ('svc://usgs-nwis:dv', 36368, 1000), ('svc://usgs-nwis:iv', 20412, 1000), ('svc://wmts:seamless_imagery', 1, 1), - ('svc://cuahsi-hydroshare:hs_geo', 1090, 1000), - ('svc://cuahsi-hydroshare:hs_norm', 2331, 1000), + # ('svc://cuahsi-hydroshare:hs_geo', 1090, 1000), + # ('svc://cuahsi-hydroshare:hs_norm', 2331, 1000), ] diff --git a/test/test_catalog.py b/test/test_catalog.py index 77b56af5..a03cae01 100644 --- a/test/test_catalog.py +++ b/test/test_catalog.py @@ -27,6 +27,16 @@ def test_add_datasets(api, catalog_entry): assert b == c +def test_search_catalog_with_no_uris(api): + catalog_entries = api.search_catalog() + assert catalog_entries == [] + + +def test_search_catalog_with_collection(api): + api.set_active_project('test_data') + catalog_entries = api.search_catalog('col1') + assert catalog_entries == ['svc://usgs-nwis:iv/01516350'] + @pytest.mark.slow @pytest.mark.parametrize("service, expected, tolerance", SERVICES_CATALOG_COUNT) def test_search_catalog_from_service(api, service, expected, tolerance): From e4743a931a2d2ab8268f6455f504d05b5f905027 Mon Sep 17 00:00:00 2001 From: sdc50 Date: Mon, 26 Nov 2018 21:23:50 -0600 Subject: [PATCH 10/12] Update readthedocs.yml (#119) --- readthedocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs.yml b/readthedocs.yml index eb7c2b3c..07b2d0eb 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -1,5 +1,5 @@ conda: - file: py3_conda_environment.yml + file: conda_environment.yml python: version: 3 pip_install: true From cd37b93ed1b3c5fdbef3b1cbfc584199221e7355 Mon Sep 17 00:00:00 2001 From: douggallup Date: Mon, 3 Dec 2018 14:07:32 -0700 Subject: [PATCH 11/12] Fix for rebase/merge. --- quest/api/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quest/api/datasets.py b/quest/api/datasets.py index eb719b34..a61eb370 100644 --- a/quest/api/datasets.py +++ b/quest/api/datasets.py @@ -390,7 +390,7 @@ def open_dataset(dataset, fmt=None, **kwargs): if file_format not in list_plugins(static.PluginType.IO): raise ValueError('No reader available for: %s' % file_format) - io = load_plugins(static.PluginType.IO, file_format)[file_format] + # Use intake plugin to open if intake_plugin: # New code, with 'intake_plugin' added to the local .db plugin_name = 'open_' + intake_plugin From b407e86423e03355b671060bc78ce1c5a9c597ed Mon Sep 17 00:00:00 2001 From: douggallup Date: Tue, 4 Dec 2018 15:31:09 -0700 Subject: [PATCH 12/12] json lost in rebase/merge. --- quest_tool_plugins/raster/rst_reprojection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/quest_tool_plugins/raster/rst_reprojection.py b/quest_tool_plugins/raster/rst_reprojection.py index 0abf820e..a82cd580 100644 --- a/quest_tool_plugins/raster/rst_reprojection.py +++ b/quest_tool_plugins/raster/rst_reprojection.py @@ -1,3 +1,4 @@ +import json import param import rasterio import subprocess