diff --git a/test/cli/test_gen.py b/test/cli/test_gen.py index 7a516ea58..fa8c415e9 100644 --- a/test/cli/test_gen.py +++ b/test/cli/test_gen.py @@ -33,12 +33,18 @@ def test_main_with_illegal_size_option(self): def test_main_with_illegal_region_option(self): with self.assertRaises(ValueError) as cm: self.invoke_cli(['gen', '-R', '50,_2,55,21', 'input.nc']) - self.assertEqual("output_region must have the form ,,,," - " where all four numbers must be floating point numbers in degrees", f'{cm.exception}') + self.assertEqual("output_region must have the form " + ",,,," + " where all four numbers must be floating point " + "numbers in the units of the output CRS", + f'{cm.exception}') with self.assertRaises(ValueError) as cm: self.invoke_cli(['gen', '-R', '50,20,55', 'input.nc']) - self.assertEqual("output_region must have the form ,,,," - " where all four numbers must be floating point numbers in degrees", f'{cm.exception}') + self.assertEqual("output_region must have the form " + ",,,," + " where all four numbers must be floating point " + "numbers in the units of the output CRS", + f'{cm.exception}') def test_main_with_illegal_variable_option(self): with self.assertRaises(ValueError) as cm: diff --git a/test/core/gen/test_config.py b/test/core/gen/test_config.py index 7a344a7c8..747a39b00 100644 --- a/test/core/gen/test_config.py +++ b/test/core/gen/test_config.py @@ -113,15 +113,19 @@ def test_output_region_option(self): config_obj = dict(output_region='50,_2,55,21') with self.assertRaises(ValueError) as cm: get_config_dict(**config_obj) - self.assertEqual("output_region must have the form ,,,," - " where all four numbers must be floating point numbers in degrees", + self.assertEqual("output_region must have the form " + ",,,," + " where all four numbers must be floating point " + "numbers in the units of the output CRS", f'{cm.exception}') config_obj = dict(output_region='50, 20, 55') with self.assertRaises(ValueError) as cm: get_config_dict(**config_obj) - self.assertEqual("output_region must have the form ,,,," - " where all four numbers must be floating point numbers in degrees", + self.assertEqual("output_region must have the form " + ",,,," + " where all four numbers must be floating point " + "numbers in the units of the output CRS", f'{cm.exception}') def test_output_variables_option(self): diff --git a/test/core/gen/test_iproc.py b/test/core/gen/test_iproc.py index 34283203e..9dd9c07ae 100644 --- a/test/core/gen/test_iproc.py +++ b/test/core/gen/test_iproc.py @@ -4,51 +4,10 @@ import pandas as pd import xarray as xr -from xcube.core.gen.iproc import DefaultInputProcessor, ReprojectionInfo +from xcube.core.gen.iproc import DefaultInputProcessor from xcube.core.timecoord import to_time_in_days_since_1970 -class ReprojectionInfoTest(unittest.TestCase): - def test_success(self): - # noinspection PyTypeChecker - ri = ReprojectionInfo(xy_names=['x', 'y']) - self.assertEqual(('x', 'y'), ri.xy_names) - self.assertEqual(None, ri.xy_tp_names) - self.assertEqual(None, ri.xy_crs) - self.assertEqual(None, ri.xy_gcp_step) - self.assertEqual(None, ri.xy_tp_gcp_step) - - # noinspection PyTypeChecker - ri = ReprojectionInfo(xy_names=['x', 'y'], xy_gcp_step=[2, 1]) - self.assertEqual(('x', 'y'), ri.xy_names) - self.assertEqual(None, ri.xy_tp_names) - self.assertEqual(None, ri.xy_crs) - self.assertEqual((2, 1), ri.xy_gcp_step) - self.assertEqual(None, ri.xy_tp_gcp_step) - - # noinspection PyTypeChecker - ri = ReprojectionInfo(xy_gcp_step=4, xy_tp_gcp_step=2) - self.assertEqual(None, ri.xy_names) - self.assertEqual(None, ri.xy_tp_names) - self.assertEqual(None, ri.xy_crs) - self.assertEqual((4, 4), ri.xy_gcp_step) - self.assertEqual((2, 2), ri.xy_tp_gcp_step) - - def test_fail(self): - with self.assertRaises(ValueError): - # noinspection PyTypeChecker - ReprojectionInfo(xy_names=['x', '']) - with self.assertRaises(ValueError): - # noinspection PyTypeChecker - ReprojectionInfo(xy_names=[0, 'y']) - with self.assertRaises(ValueError): - # noinspection PyTypeChecker - ReprojectionInfo(xy_gcp_step=[7, 'y']) - with self.assertRaises(ValueError): - # noinspection PyTypeChecker - ReprojectionInfo(xy_gcp_step=[7, 0]) - - class DefaultInputProcessorTest(unittest.TestCase): def setUp(self): @@ -62,11 +21,6 @@ def test_props(self): processor = DefaultInputProcessor(input_reader="zarr") self.assertEqual('zarr', processor.input_reader) - def test_reprojection_info(self): - # noinspection PyNoneFunctionAssignment - reprojection_info = self.processor.get_reprojection_info(create_default_dataset()) - self.assertIsNotNone(reprojection_info) - def test_get_time_range(self): ds = create_default_dataset(time_mode="time") t1, t2 = self.processor.get_time_range(ds) diff --git a/xcube/cli/gen.py b/xcube/cli/gen.py index 1b5f50ded..bb7c08e3e 100644 --- a/xcube/cli/gen.py +++ b/xcube/cli/gen.py @@ -52,8 +52,15 @@ 'xcube gen --info. If omitted, the format will be guessed from the given output path.') @click.option('--size', '-S', metavar='SIZE', help='Output size in pixels using format ",".') +@click.option('--crs', metavar='CRS', + help='The target coordinate reference system. Must be given as' + 'either (a) a WKT-String decribing a CRS or (b) an EPSG-code' + '(with or without leading "EPSG:". If not provided, the ' + 'output cube will be resampled to the WGS84 CRS.') @click.option('--region', '-R', metavar='REGION', - help='Output region using format ",,,"') + help='Output region using format ' + '",,,". Output region ' + 'must be given in coordinates of the output crs.') @click.option('--variables', '--vars', metavar='VARIABLES', help='Variables to be included in output. ' 'Comma-separated list of names which may contain wildcard characters "*" and "?".') @@ -81,6 +88,7 @@ def gen(input: Sequence[str], output: str, format: str, size: str, + crs: str, region: str, variables: str, resampling: str, @@ -115,6 +123,7 @@ def gen(input: Sequence[str], output_path=output, output_writer_name=format, output_size=size, + output_crs=crs, output_region=region, output_variables=variables, output_resampling=resampling, diff --git a/xcube/core/gen/config.py b/xcube/core/gen/config.py index b9198ff0e..5aa70ebc2 100644 --- a/xcube/core/gen/config.py +++ b/xcube/core/gen/config.py @@ -29,6 +29,7 @@ def get_config_dict(config_files: Sequence[str] = None, output_path: str = None, output_writer_name: str = None, output_size: str = None, + output_crs: str = None, output_region: str = None, output_variables: str = None, output_resampling: str = None, @@ -68,6 +69,9 @@ def get_config_dict(config_files: Sequence[str] = None, if output_writer_name is not None and 'output_writer_name' not in config: config['output_writer_name'] = output_writer_name + if output_crs is not None and 'output_crs' not in config: + config['output_crs'] = output_crs + if output_resampling is not None and 'output_resampling' not in config: config['output_resampling'] = output_resampling @@ -87,8 +91,10 @@ def get_config_dict(config_files: Sequence[str] = None, except ValueError: output_region = None if output_region is None or len(output_region) != 4: - raise ValueError(f'output_region must have the form ,,,,' - f' where all four numbers must be floating point numbers in degrees') + raise ValueError(f'output_region must have the form ' + f',,,, ' + f'where all four numbers must be floating point ' + f'numbers in the units of the output CRS') config['output_region'] = output_region if output_variables is not None: diff --git a/xcube/core/gen/gen.py b/xcube/core/gen/gen.py index c7d857ce2..4e4eb3dee 100644 --- a/xcube/core/gen/gen.py +++ b/xcube/core/gen/gen.py @@ -24,6 +24,7 @@ import io import os import pstats +import pyproj import time import traceback import warnings @@ -36,7 +37,7 @@ from xcube.core.gen.defaults import DEFAULT_OUTPUT_PATH, DEFAULT_OUTPUT_RESAMPLING, DEFAULT_OUTPUT_SIZE from xcube.core.gen.iproc import InputProcessor, find_input_processor_class from xcube.core.gridmapping import GridMapping -from xcube.core.gridmapping import CRS_WGS84 +from xcube.core.gridmapping import DEFAULT_CRS from xcube.core.optimize import optimize_dataset from xcube.core.select import select_spatial_subset, select_variables_subset from xcube.core.timecoord import add_time_coords, from_time_in_days_since_1970 @@ -50,6 +51,7 @@ def gen_cube(input_paths: Sequence[str] = None, input_processor_params: Dict = None, input_reader_name: str = None, input_reader_params: Dict[str, Any] = None, + output_crs: str = DEFAULT_CRS, output_region: Tuple[float, float, float, float] = None, output_size: Tuple[int, int] = DEFAULT_OUTPUT_SIZE, output_resampling: str = DEFAULT_OUTPUT_RESAMPLING, @@ -70,12 +72,18 @@ def gen_cube(input_paths: Sequence[str] = None, :param no_sort_mode: :param input_paths: The input paths. :param input_processor_name: Name of a registered input processor - (xcube.core.gen.inputprocessor.InputProcessor) to be used to transform the inputs. - :param input_processor_params: Parameters to be passed to the input processor. - :param input_reader_name: Name of a registered input reader (xcube.core.util.dsio.DatasetIO). + (xcube.core.gen.inputprocessor.InputProcessor) to be used to transform + the inputs. + :param input_processor_params: Parameters to be passed to the + input processor. + :param input_reader_name: Name of a registered input reader + (xcube.core.util.dsio.DatasetIO). :param input_reader_params: Parameters passed to the input reader. - :param output_region: Output region as tuple of floats: (lon_min, lat_min, lon_max, lat_max). - :param output_size: The spatial dimensions of the output as tuple of ints: (width, height). + :param output_crs: Output crs, given as either WLT string or EPSG code + :param output_region: Output region as tuple of floats: + (x_min, y_min, x_max, y_max). + :param output_size: The spatial dimensions of the output as tuple of ints: + (width, height). :param output_resampling: The resampling method for the output. :param output_path: The output directory. :param output_writer_name: Name of an output writer @@ -85,7 +93,8 @@ def gen_cube(input_paths: Sequence[str] = None, :param output_variables: Output variables. :param processed_variables: Processed variables computed on-the-fly. :param profile_mode: Whether profiling should be enabled. - :param append_mode: Deprecated. The function will always either insert, replace, or append new time slices. + :param append_mode: Deprecated. The function will always either insert, + replace, or append new time slices. :param dry_run: Doesn't write any data. For testing. :param monitor: A progress monitor. :return: True for success. @@ -161,6 +170,7 @@ def monitor(*args): effective_output_writer_params, input_file, output_size, + output_crs, output_region, output_resampling, output_path, @@ -187,6 +197,7 @@ def _process_input(input_processor: InputProcessor, output_writer_params: Dict[str, Any], input_file: str, output_size: Tuple[int, int], + output_crs: str, output_region: Tuple[float, float, float, float], output_resampling: str, output_path: str, @@ -219,6 +230,8 @@ def _process_input(input_processor: InputProcessor, time_index, update_mode = find_time_slice(output_path, from_time_in_days_since_1970((time_range[0] + time_range[1]) / 2)) + crs = pyproj.crs.CRS.from_string(output_crs) + width, height = output_size x_min, y_min, x_max, y_max = output_region xy_res = max((x_max - x_min) / width, (y_max - y_min) / height) @@ -227,7 +240,7 @@ def _process_input(input_processor: InputProcessor, output_geom = GridMapping.regular(size=output_size, xy_min=(x_min, y_min), xy_res=xy_res, - crs=CRS_WGS84, + crs=crs, tile_size=tile_size) steps = [] diff --git a/xcube/core/gen/iproc.py b/xcube/core/gen/iproc.py index 3ff62a007..e8a9e06eb 100644 --- a/xcube/core/gen/iproc.py +++ b/xcube/core/gen/iproc.py @@ -37,97 +37,6 @@ from xcube.util.plugin import get_extension_registry -class ReprojectionInfo: - """ - Characterize input datasets so we can reproject. - - :param xy_names: Names of variables providing the spatial x- and y-coordinates, - e.g. ('longitude', 'latitude') - :param xy_tp_names: Optional names of tie-point variables providing the spatial y- and y-coordinates, - e.g. ('TP_longitude', 'TP_latitude') - :param xy_crs: Optional spatial reference system, e.g. 'EPSG:4326' or WKT or proj4 mapping - :param xy_gcp_step: Optional step size for collecting ground control points from spatial - coordinate arrays denoted by *xy_names*. - :param xy_tp_gcp_step: Optional step size for collecting ground control points from spatial - coordinate arrays denoted by *xy_tp_names*. - """ - - def __init__(self, - xy_names: Tuple[str, str] = None, - xy_tp_names: Tuple[str, str] = None, - xy_crs: Any = None, - xy_gcp_step: Union[int, Tuple[int, int]] = None, - xy_tp_gcp_step: Union[int, Tuple[int, int]] = None): - self._xy_names = self._assert_name_pair('xy_names', xy_names) - self._xy_tp_names = self._assert_name_pair('xy_tp_names', xy_tp_names) - self._xy_crs = xy_crs - self._xy_gcp_step = self._assert_step_pair('xy_gcp_step', xy_gcp_step) - self._xy_tp_gcp_step = self._assert_step_pair('xy_tp_gcp_step', xy_tp_gcp_step) - - def derive(self, - xy_names: Tuple[str, str] = None, - xy_tp_names: Tuple[str, str] = None, - xy_crs: Any = None, - xy_gcp_step: Union[int, Tuple[int, int]] = None, - xy_tp_gcp_step: Union[int, Tuple[int, int]] = None): - return ReprojectionInfo(self.xy_names if xy_names is None else xy_names, - xy_tp_names=self.xy_tp_names if xy_tp_names is None else xy_tp_names, - xy_crs=self.xy_crs if xy_crs is None else xy_crs, - xy_gcp_step=self.xy_gcp_step if xy_gcp_step is None else xy_gcp_step, - xy_tp_gcp_step=self.xy_tp_gcp_step if xy_tp_gcp_step is None else xy_tp_gcp_step) - - @property - def xy_names(self) -> Optional[Tuple[str, str]]: - return self._xy_names - - @property - def xy_tp_names(self) -> Optional[Tuple[str, str]]: - return self._xy_tp_names - - @property - def xy_crs(self) -> Any: - return self._xy_crs - - @property - def xy_gcp_step(self) -> Optional[int]: - return self._xy_gcp_step - - @property - def xy_tp_gcp_step(self) -> Optional[int]: - return self._xy_tp_gcp_step - - def _assert_name_pair(self, keyword: str, value): - if value is not None: - v1, v2 = value - self._assert_name(keyword, v1) - self._assert_name(keyword, v2) - return v1, v2 - return value - - def _assert_step_pair(self, keyword: str, value): - if value is not None: - if isinstance(value, int): - v1, v2 = value, value - else: - v1, v2 = value - self._assert_step(keyword, v1) - self._assert_step(keyword, v2) - return v1, v2 - return value - - def _assert_name(self, keyword: str, value): - if value is None: - raise ValueError(f'invalid {keyword}, missing name') - if not isinstance(value, str) or not value: - raise ValueError(f'invalid {keyword}, name must be a non-empty string') - - def _assert_step(self, keyword: str, value): - if value is None: - raise ValueError(f'invalid {keyword}, missing name') - if not isinstance(value, int) or value <= 0: - raise ValueError(f'invalid {keyword}, step must be an integer number') - - class InputProcessor(ExtensionComponent, metaclass=ABCMeta): """ Read and process inputs for the gen tool. @@ -270,32 +179,17 @@ def default_parameters(self) -> Dict[str, Any]: default_parameters.update(xy_names=('lon', 'lat')) return default_parameters - def get_reprojection_info(self, dataset: xr.Dataset) -> ReprojectionInfo: - """ - Information about special fields in input datasets used for reprojection. - :param dataset: The dataset. - :return: The reprojection information of the dataset or None. - """ - parameters = self.parameters - return ReprojectionInfo(xy_names=parameters.get('xy_names', ('lon', 'lat')), - xy_tp_names=parameters.get('xy_tp_names'), - xy_crs=parameters.get('xy_crs'), - xy_gcp_step=parameters.get('xy_gcp_step'), - xy_tp_gcp_step=parameters.get('xy_tp_gcp_step')) - def get_extra_vars(self, dataset: xr.Dataset) -> Optional[Collection[str]]: """ Return the names of variables containing spatial coordinates. They should not be removed, as they are required for the reprojection. """ - reprojection_info = self.get_reprojection_info(dataset) - if reprojection_info is None: + gridmapping = GridMapping.from_dataset(dataset) + if gridmapping is None: return dataset extra_vars = set() - if reprojection_info.xy_names: - extra_vars.update(set(reprojection_info.xy_names)) - if reprojection_info.xy_tp_names: - extra_vars.update(set(reprojection_info.xy_tp_names)) + if gridmapping.xy_var_names: + extra_vars.update(set(gridmapping.xy_var_names)) return extra_vars def process(self, @@ -305,23 +199,26 @@ def process(self, output_resampling: str, include_non_spatial_vars=False) -> xr.Dataset: """ - Perform reprojection using tie-points / ground control points. + Perform reprojection. """ - reprojection_info = self.get_reprojection_info(dataset) + xy_names = self.parameters.get('xy_names', ('lon', 'lat')) + xy_tp_names = self.parameters.get('xy_tp_names'), + xy_crs = self.parameters.get('xy_crs'), + xy_gcp_step = self.parameters.get('xy_gcp_step'), + xy_tp_gcp_step = self.parameters.get('xy_tp_gcp_step') warn_prefix = 'unsupported argument in np-GCP rectification mode' - if reprojection_info.xy_crs is not None: - warnings.warn(f'{warn_prefix}: ignoring ' - f'reprojection_info.xy_crs = {reprojection_info.xy_crs!r}') - if reprojection_info.xy_tp_names is not None: + if xy_crs is not None: + warnings.warn(f'{warn_prefix}: ignoring xy_crs = {xy_crs!r}') + if xy_tp_names is not None: warnings.warn(f'{warn_prefix}: ignoring ' - f'reprojection_info.xy_tp_names = {reprojection_info.xy_tp_names!r}') - if reprojection_info.xy_gcp_step is not None: + f'xy_tp_names = {xy_tp_names!r}') + if xy_gcp_step is not None: warnings.warn(f'{warn_prefix}: ignoring ' - f'reprojection_info.xy_gcp_step = {reprojection_info.xy_gcp_step!r}') - if reprojection_info.xy_tp_gcp_step is not None: + f'xy_gcp_step = {xy_gcp_step!r}') + if xy_tp_gcp_step is not None: warnings.warn(f'{warn_prefix}: ignoring ' - f'reprojection_info.xy_tp_gcp_step = {reprojection_info.xy_tp_gcp_step!r}') + f'xy_tp_gcp_step = {xy_tp_gcp_step!r}') if output_resampling != 'Nearest': warnings.warn(f'{warn_prefix}: ignoring ' f'dst_resampling = {output_resampling!r}') @@ -329,8 +226,7 @@ def process(self, warnings.warn(f'{warn_prefix}: ignoring ' f'include_non_spatial_vars = {include_non_spatial_vars!r}') - geo_coding = geo_coding.derive(xy_var_names=(reprojection_info.xy_names[0], - reprojection_info.xy_names[1])) + geo_coding = geo_coding.derive(xy_var_names=(xy_names[0], xy_names[1])) dataset = rectify_dataset(dataset, compute_subset=False, diff --git a/xcube/core/gen2/opener.py b/xcube/core/gen2/opener.py new file mode 100644 index 000000000..e69de29bb diff --git a/xcube/core/gridmapping/__init__.py b/xcube/core/gridmapping/__init__.py index 6ba7c1246..66d5e5264 100644 --- a/xcube/core/gridmapping/__init__.py +++ b/xcube/core/gridmapping/__init__.py @@ -21,5 +21,6 @@ from .base import CRS_CRS84 from .base import CRS_WGS84 +from .base import DEFAULT_CRS from .base import DEFAULT_TOLERANCE from .base import GridMapping diff --git a/xcube/core/gridmapping/base.py b/xcube/core/gridmapping/base.py index 44cc87dc3..062dffc13 100644 --- a/xcube/core/gridmapping/base.py +++ b/xcube/core/gridmapping/base.py @@ -46,6 +46,8 @@ from .helpers import _to_affine from .helpers import scale_xy_res_and_size +DEFAULT_CRS = 'EPSG:4326' + # WGS84, axis order: lat, lon CRS_WGS84 = pyproj.crs.CRS(4326) diff --git a/xcube/core/select.py b/xcube/core/select.py index ad9d6314a..7431a698d 100644 --- a/xcube/core/select.py +++ b/xcube/core/select.py @@ -3,6 +3,7 @@ import cftime import pandas as pd +import pyproj import xarray as xr from xcube.core.gridmapping import GridMapping @@ -37,7 +38,10 @@ def select_subset(dataset: xr.Dataset, *, var_names: Collection[str] = None, + crs: str = None, bbox: Bbox = None, + spatial_res: Union[float, Tuple[float]] = None, + tile_size: Union[int, Tuple[int, int]] = None, time_range: TimeRange = None): """ Create a subset from *dataset* given *var_names*, @@ -51,8 +55,13 @@ def select_subset(dataset: xr.Dataset, :param dataset: The dataset. :param var_names: Optional variable names. + :param crs: The dataset's CRS. If not set the dataset's current CRS will + be used. :param bbox: Optional bounding box in the dataset's CRS coordinate units. + :param spatial_res: Otional spatial resolution in the dataset's + CRS coordinate units. + :param tile_size: Optional tile size :param time_range: Optional time range :return: a subset of *dataset*, or unchanged *dataset* if no keyword-arguments are used. @@ -60,7 +69,29 @@ def select_subset(dataset: xr.Dataset, if var_names is not None: dataset = select_variables_subset(dataset, var_names=var_names) if bbox is not None: - dataset = select_spatial_subset(dataset, xy_bbox=bbox) + geo_coding = None + if crs is not None: + if spatial_res is None: + raise ValueError('If CRS is provided, ' + 'spatial_res must be given') + import math + crs = pyproj.crs.CRS.from_string(crs) + x_min, y_min, x_max, y_max = bbox + x_res = spatial_res[0] if isinstance(spatial_res, tuple) \ + else spatial_res + width = math.ceil((x_max - x_min) / x_res) + y_res = spatial_res[1] if isinstance(spatial_res, tuple) \ + else spatial_res + height = math.ceil((y_max - y_min) / y_res) + geo_coding = GridMapping.regular(size=(width, height), + xy_min=(x_min, y_min), + xy_res=spatial_res, + crs=crs, + tile_size=tile_size) + + dataset = select_spatial_subset(dataset, + xy_bbox=bbox, + geo_coding=geo_coding) if time_range is not None: dataset = select_temporal_subset(dataset, time_range=time_range) return dataset