diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7fa70ef..f05dbc7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- migrate datasets from SQL to CephFS storage.
+## [3.5.1] - 2025-12-12
+
+### Added
+- multitime cutout queries.
+
+## [3.4.2] - 2025-11-18
+
+### Changed
+- Default cutout limits.
+
## [3.4.1] - 2025-10-30
### Added
diff --git a/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip b/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip
index 54e60a3..2ed65ea 100644
Binary files a/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip and b/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip differ
diff --git a/DEMO_notebooks/DEMO_local_python_notebooks.zip b/DEMO_notebooks/DEMO_local_python_notebooks.zip
index e8a34ca..b25b480 100644
Binary files a/DEMO_notebooks/DEMO_local_python_notebooks.zip and b/DEMO_notebooks/DEMO_local_python_notebooks.zip differ
diff --git a/DEMO_notebooks/DEMO_wind_SciServer_python_notebooks.zip b/DEMO_notebooks/DEMO_wind_SciServer_python_notebooks.zip
index 6fd89ce..9b41594 100644
Binary files a/DEMO_notebooks/DEMO_wind_SciServer_python_notebooks.zip and b/DEMO_notebooks/DEMO_wind_SciServer_python_notebooks.zip differ
diff --git a/giverny/pyproject.toml b/giverny/pyproject.toml
index 0d2374a..06ab6ee 100644
--- a/giverny/pyproject.toml
+++ b/giverny/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "giverny"
-version = "3.4.1"
+version = "3.5.1"
description = "library to query the Johns Hopkins Turbulence Database (JHTDB)"
authors = [
{name = "Johns Hopkins Turbulence Database Group"}
diff --git a/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py b/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py
index 8bd72b7..7d4471b 100644
--- a/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py
+++ b/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py
@@ -116,20 +116,40 @@ def check_timepoint(metadata, timepoint, dataset_title, query_type, max_num_time
time_upper = np.float64(time_metadata['upper'])
time_steps = np.int64(time_metadata['n'])
- if query_type == 'getcutout' or dataset_title in time_index_datasets:
+ if query_type == 'getcutout':
+ try:
+ list(timepoint)
+ except:
+ raise Exception(f"t_range must be specified as a python list or numpy array, e.g. [{timepoint}, {timepoint}]")
+
+ # check that the time range is specified as minimum and maximum integer values.
+ if timepoint.dtype not in [np.int32, np.int64]:
+ raise Exception('all t_range values, [minimum, maximum], should be specified as integers')
+
+ if len(timepoint) != 2 or timepoint[0] > timepoint[1]:
+ raise Exception(f't_range, {list(timepoint)}, is not correctly specified as [minimum, maximum]')
+
valid_timepoints = range(1, time_steps + 1)
# handles checking datasets with time indices.
- if timepoint not in valid_timepoints:
- raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be an integer and in the inclusive range of " +
+ if timepoint[0] not in valid_timepoints or timepoint[1] not in valid_timepoints:
+ raise Exception(f"'t_range', [{timepoint[0]}, {timepoint[1]}], is not a valid time range for '{dataset_title}': all times must be in the inclusive range of " +
f'[{valid_timepoints[0]}, {valid_timepoints[-1]}]')
elif query_type == 'getdata':
- valid_timepoints = (time_lower, time_upper)
+ if dataset_title in time_index_datasets:
+ valid_timepoints = range(1, time_steps + 1)
- # handles checking datasets with real times.
- if timepoint < valid_timepoints[0] or timepoint > valid_timepoints[1]:
- raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be in the inclusive range of " +
- f'[{valid_timepoints[0]}, {valid_timepoints[1]}]')
+ # handles checking datasets with time indices.
+ if timepoint not in valid_timepoints:
+ raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be an integer and in the inclusive range of " +
+ f'[{valid_timepoints[0]}, {valid_timepoints[-1]}]')
+ else:
+ valid_timepoints = (time_lower, time_upper)
+
+ # handles checking datasets with real times.
+ if timepoint < valid_timepoints[0] or timepoint > valid_timepoints[1]:
+ raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be in the inclusive range of " +
+ f'[{valid_timepoints[0]}, {valid_timepoints[1]}]')
elif query_type == 'getturbinedata':
try:
timepoint = list(timepoint)
@@ -559,7 +579,7 @@ def get_time_index_from_timepoint(metadata, dataset_title, timepoint, tint, quer
time_index = (timepoint / dt) + time_index_shift
# round the time index the nearest time index grid point if 'none' time interpolation was specified.
if tint == 'none':
- time_index = int(math.floor(time_index + 0.5))
+ time_index = np.floor(time_index + 0.5).astype(int)
else:
# do not convert the timepoint to a time index for datasets processed by pyJHTDB.
time_index = timepoint
@@ -862,74 +882,91 @@ def write_cutout_hdf5_and_xmf_files(cube, output_data, output_filename):
# get the dataset name used for the hdf5 file.
h5_var = cube.var
h5_attribute_type = get_cardinality_name(cube.metadata, cube.var)
- h5_dataset_name = cube.dataset_name
+ h5_dataset_names = list(output_data.data_vars.keys())
# the shape of the cutout. ordering of the dimensions in the xarray, output_data, is (z, y, x), so shape is reversed ([::-1]) to keep
# consistent with the expected (x, y, z) ordering.
shape = [*output_data.sizes.values()][:3][::-1]
- # get the output timepoint.
- xmf_timepoint = cube.timepoint_original
-
if cube.dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and cube.var == 'velocity':
# split up "zcoor" into "zcoor_uv" and "zcoor_w" for the "velocity" variable of the "sabl" datasets.
output_str = f"""
-
-
-
-
-
-
- """
+
+
+
+ """
+
+ for h5_dataset_name in h5_dataset_names:
+ # get the output timepoint.
+ xmf_timepoint = int(h5_dataset_name.split('_')[1].strip())
+
+ output_str += f"""
+
+ """
+
+ output_str += f"""
+
+
+"""
else:
# handle other datasets and variables.
output_str = f"""
-
-
-
-
-
-
- """
+
+
+
+ """
+
+ for h5_dataset_name in h5_dataset_names:
+ # get the output timepoint.
+ xmf_timepoint = int(h5_dataset_name.split('_')[1].strip())
+
+ output_str += f"""
+
+ """
+
+ output_str += f"""
+
+
+"""
with open(cube.output_path.joinpath(output_filename + '.xmf'), 'w') as output_file:
output_file.write(output_str)
@@ -938,7 +975,7 @@ def write_cutout_hdf5_and_xmf_files(cube, output_data, output_filename):
print('-----')
sys.stdout.flush()
-def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strides, output_filename,
+def contour_plot(cube, value_index, cutout_data, time, plot_ranges, xyzt_axes_ranges, xyzt_strides, output_filename,
colormap = 'inferno', equal_aspect_ratio = True):
"""
create a contour plot from the getCutout output.
@@ -953,6 +990,13 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid
metadata = cube.metadata
variable = cube.var
dataset_title = cube.dataset_title
+ dataset_name = variable + '_' + str(time).zfill(4)
+
+ # remove the time axis from axes_ranges and strides.
+ axes_ranges = xyzt_axes_ranges[:3]
+ strides = xyzt_strides[:3]
+ # get the time range.
+ time_range = xyzt_axes_ranges[3]
# names for each value, e.g. value index 0 for velocity data corresponds to the x-component of the velocity ("ux").
value_name_map = get_variable_component_names_map(metadata)
@@ -984,6 +1028,10 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid
if not(np.all(axes_min <= plot_ranges_min) and np.all(plot_ranges_max <= axes_max)):
raise Exception(f'the specified plot ranges are not all bounded by the box volume defined by:\n{axes_ranges}')
+ # raise exception if the plot time is not one of the queried timepoints.
+ if time not in range(time_range[0], time_range[1] + 1, 1):
+ raise Exception(f'the specified time ({time}) is not a queried time, t_range:\n{time_range}')
+
# determine how many of the axis minimum values are equal to their corresponding axis maximum value.
num_axes_equal_min_max = plot_ranges_min == plot_ranges_max
# raise exception if the data being plotted is not 2-dimensional.
@@ -1025,15 +1073,15 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid
# specify the subset (or full) axes ranges to use for plotting. cutout_data is of the format [z-range, y-range, x-range, output value index].
if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and variable == 'velocity':
# zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets.
- plot_data = cutout_data[cube.dataset_name].sel(xcoor = xcoor_values,
- ycoor = ycoor_values,
- zcoor_uv = zcoor_values,
- values = value_index)
+ plot_data = cutout_data[dataset_name].sel(xcoor = xcoor_values,
+ ycoor = ycoor_values,
+ zcoor_uv = zcoor_values,
+ values = value_index)
else:
- plot_data = cutout_data[cube.dataset_name].sel(xcoor = xcoor_values,
- ycoor = ycoor_values,
- zcoor = zcoor_values,
- values = value_index)
+ plot_data = cutout_data[dataset_name].sel(xcoor = xcoor_values,
+ ycoor = ycoor_values,
+ zcoor = zcoor_values,
+ values = value_index)
# raise exception if only one point is going to be plotted along more than 1 axis. a contour plot requires more
# than 1 point along 2 axes. this check is required in case the user specifies a stride along an axis that
@@ -1083,7 +1131,7 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid
plane_axis = original_axis_title[0].strip()
# plot_ranges_min = plot_ranges_max for plane_axis.
plane_point = plot_ranges_min[axis_index_map[plane_axis]]
- axis_title = plane_axis + ' = ' + str(plane_point) + ', t = ' + str(cutout_data.attrs['t_start'])
+ axis_title = plane_axis + ' = ' + str(plane_point) + ', t = ' + str(time)
title_str = f'{output_dataset_title}\n{variable} ({value_name}) contour plot ({axis_title})'
# remove '_uv' from the axis variable names for display in the plot.
x_axis_variable = x_axis_variable.replace('_uv', '')
@@ -1118,7 +1166,7 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid
print('contour plot created successfully.')
sys.stdout.flush()
-def cutout_values(cube, x, y, z, output_data, axes_ranges, strides):
+def cutout_values(cube, x, y, z, output_data, time, xyzt_axes_ranges, xyzt_strides):
"""
retrieve data values for all of the specified points.
"""
@@ -1127,6 +1175,12 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides):
metadata = cube.metadata
variable = cube.var
dataset_title = cube.dataset_title
+ dataset_name = variable + '_' + str(time).zfill(4)
+
+ # remove the time axis from axes_ranges and strides.
+ axes_ranges = xyzt_axes_ranges[:3]
+ strides = xyzt_strides[:3]
+ time_range = xyzt_axes_ranges[3]
# minimum and maximum endpoints along each axis for the points the user requested.
endpoints_min = np.array([np.min(x), np.min(y), np.min(z)], dtype = np.int32)
@@ -1137,6 +1191,10 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides):
# raise exception if all of the user requested datapoints are not inside the bounds of the user box volume.
if not(np.all(axes_ranges[:, 0] <= endpoints_min) and np.all(endpoints_max <= axes_ranges[:, 1])):
raise Exception(f'the specified point(s) are not all bounded by the box volume defined by:\n{axes_ranges}')
+
+ # raise exception if the time is not one of the queried timepoints.
+ if time not in range(time_range[0], time_range[1] + 1, 1):
+ raise Exception(f'the specified time ({time}) is not a queried time, t_range:\n{time_range}')
# datasets that have an irregular y-grid.
irregular_ygrid_datasets = get_irregular_mesh_ygrid_datasets(metadata, variable)
@@ -1161,15 +1219,15 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides):
# value(s) corresponding to the specified (x, y, z) datapoint(s).
if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and variable == 'velocity':
# zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets.
- output_values = output_data[cube.dataset_name].sel(xcoor = xcoor_values,
- ycoor = ycoor_values,
- zcoor_uv = zcoor_values)
+ output_values = output_data[dataset_name].sel(xcoor = xcoor_values,
+ ycoor = ycoor_values,
+ zcoor_uv = zcoor_values)
# add the zcoor_w coordinate to the returned xarray dataarray.
output_values = output_values.assign_coords({'zcoor_w': output_values.zcoor_uv + (cube.dz / 2)})
return output_values
else:
- return output_data[cube.dataset_name].sel(xcoor = xcoor_values,
- ycoor = ycoor_values,
- zcoor = zcoor_values)
+ return output_data[dataset_name].sel(xcoor = xcoor_values,
+ ycoor = ycoor_values,
+ zcoor = zcoor_values)
diff --git a/giverny/src/giverny/turbulence_toolkit.py b/giverny/src/giverny/turbulence_toolkit.py
index 49bb452..6cba169 100644
--- a/giverny/src/giverny/turbulence_toolkit.py
+++ b/giverny/src/giverny/turbulence_toolkit.py
@@ -48,7 +48,7 @@
finally:
import pyJHTDB
-def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
+def getCutout(cube, var, xyzt_axes_ranges_original, xyzt_strides,
trace_memory = False, verbose = True):
"""
retrieve a cutout of the isotropic cube.
@@ -71,8 +71,7 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
# data constants.
c = metadata['constants']
- # only time_step and filter_width values of 1 are currently allowed.
- time_step = 1
+ # only filter_width value of 1 is currently allowed.
filter_width = 1
# field (variable) map for legacy datasets.
@@ -88,10 +87,19 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
# retrieve the list of datasets processed by the giverny code.
giverny_datasets = get_giverny_datasets()
+ # xyz original axes ranges.
+ axes_ranges_original = xyzt_axes_ranges_original[:3]
+ # time original range.
+ timepoint_range_original = xyzt_axes_ranges_original[3]
+ # xyz original axes strides.
+ strides = xyzt_strides[:3]
+ # time original stride.
+ timepoint_stride = xyzt_strides[3]
+
# housekeeping procedures.
# -----
- var_offsets, axes_ranges, timepoint = \
- getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, strides, var, timepoint_original)
+ var_offsets, axes_ranges, timepoint_range = \
+ getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, xyzt_strides, var, timepoint_range_original)
# the number of values to read per datapoint. for pressure data this value is 1. for velocity
# data this value is 3, because there is a velocity measurement along each axis.
@@ -99,8 +107,10 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
# number of original datapoints along each axis specified by the user. used for checking that the user did not request
# too much data and that result is filled correctly.
axes_lengths_original = axes_ranges_original[:, 1] - axes_ranges_original[:, 0] + 1
+ # number of original times queried by the user.
+ num_times = ((timepoint_range_original[1] - timepoint_range_original[0]) // timepoint_stride) + 1
# total number of datapoints, used for checking if the user requested too much data..
- num_datapoints = np.prod(axes_lengths_original)
+ num_datapoints = np.prod(axes_lengths_original) * num_times
# total size of data, in GBs, requested by the user's box.
requested_data_size = (num_datapoints * c['bytes_per_datapoint'] * num_values_per_datapoint) / float(1024**3)
# maximum number of datapoints that can be read in. currently set to 16 GBs worth of datapoints.
@@ -122,9 +132,12 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
spatial_method = 'none'
temporal_method = 'none'
option = [-999.9, -999.9]
- # initialize cube constants. this is done so that all of the constants are known for pre-processing of the data.
- cube.init_constants(query_type, var, var_offsets, timepoint, timepoint_original,
- spatial_method, temporal_method, option, num_values_per_datapoint, c)
+
+ if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and var == 'velocity':
+ # zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets.
+ dims_list = ['zcoor_uv', 'ycoor', 'xcoor', 'values']
+ else:
+ dims_list = ['zcoor', 'ycoor', 'xcoor', 'values']
# -----
# starting the tracemalloc library.
@@ -134,54 +147,72 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
tracemem_start = [mem_value / (1024**3) for mem_value in tracemalloc.get_traced_memory()]
tracemem_used_start = tracemalloc.get_tracemalloc_memory() / (1024**3)
- # create a small placeholder array for error checking. a full pre-filled array is created in lJHTDB.getbigCutout (pyJHTDB datasets) and
- # getCutout_process_data (giverny datasets). initially the datatype is set to "f" (float) so that the array is filled with the
- # missing placeholder value (-999.9).
- result = np.array([c['missing_value_placeholder']], dtype = 'f')
-
- # process the data query, retrieve a cutout for the various datasets.
- if dataset_title in giverny_datasets:
- """
- get the results for the datasets processed by giverny.
- """
- # parse the database files, generate the result matrix.
- result = getCutout_process_data(cube, metadata, axes_ranges, var, timepoint,
- axes_ranges_original, strides, var_offsets, timepoint_original, c)
- else:
- """
- get the results for the legacy datasets processed by pyJHTDB.
- """
- # initialize lJHTDB gSOAP resources and add the user's authorization token.
- lJHTDB = pyJHTDB.libJHTDB(auth_token = auth_token)
- lJHTDB.initialize()
-
- # get the field (variable) integer for the legacy datasets.
- field = field_map[var]
-
- # the strides will be applied later after retrieving the data.
- result = lJHTDB.getbigCutout(data_set = dataset_title, fields = field, t_start = timepoint_original, t_end = timepoint_original, t_step = time_step,
- start = np.array([axes_ranges[0, 0], axes_ranges[1, 0], axes_ranges[2, 0]], dtype = int),
- end = np.array([axes_ranges[0, 1], axes_ranges[1, 1], axes_ranges[2, 1]], dtype = int),
- step = np.array([1, 1, 1], dtype = int),
- filter_width = filter_width)
-
- # free up gSOAP resources.
- lJHTDB.finalize()
+ result_map = {}
+ # iterate over the timepoints to retrieve the cutouts.
+ for timepoint, timepoint_original in zip(
+ range(timepoint_range[0], timepoint_range[1] + 1, timepoint_stride),
+ range(timepoint_range_original[0], timepoint_range_original[1] + 1, timepoint_stride)
+ ):
+ # initialize cube constants. this is done so that all of the constants are known for pre-processing of the data.
+ cube.init_constants(query_type, var, var_offsets, timepoint, timepoint_original,
+ spatial_method, temporal_method, option, num_values_per_datapoint, c)
+
+ # create a small placeholder array for error checking. a full pre-filled array is created in lJHTDB.getbigCutout (pyJHTDB datasets) and
+ # getCutout_process_data (giverny datasets). initially the datatype is set to "f" (float) so that the array is filled with the
+ # missing placeholder value (-999.9).
+ result = np.array([c['missing_value_placeholder']], dtype = 'f')
+
+ # process the data query, retrieve a cutout for the various datasets.
+ if dataset_title in giverny_datasets:
+ """
+ get the results for the datasets processed by giverny.
+ """
+ # parse the database files, generate the result matrix.
+ result = getCutout_process_data(cube, metadata, axes_ranges, var, timepoint,
+ axes_ranges_original, strides, var_offsets, timepoint_original, c)
+ else:
+ """
+ get the results for the legacy datasets processed by pyJHTDB.
+ """
+ # initialize lJHTDB gSOAP resources and add the user's authorization token.
+ lJHTDB = pyJHTDB.libJHTDB(auth_token = auth_token)
+ lJHTDB.initialize()
+
+ # get the field (variable) integer for the legacy datasets.
+ field = field_map[var]
+
+ # the strides will be applied later after retrieving the data.
+ result = lJHTDB.getbigCutout(data_set = dataset_title, fields = field, t_start = timepoint_original, t_end = timepoint_original, t_step = 1,
+ start = np.array([axes_ranges[0, 0], axes_ranges[1, 0], axes_ranges[2, 0]], dtype = int),
+ end = np.array([axes_ranges[0, 1], axes_ranges[1, 1], axes_ranges[2, 1]], dtype = int),
+ step = np.array([1, 1, 1], dtype = int),
+ filter_width = filter_width)
+
+ # free up gSOAP resources.
+ lJHTDB.finalize()
+
+ # determines how many copies of data need to be me made along each axis when the number of datapoints the user specified
+ # exceeds the cube resolution (cube.N). note: no copies of the data values should be made, hence data_value_multiplier equals 1.
+ axes_multipliers = np.ceil(axes_lengths_original / cube.N).astype(int)
+ data_value_multiplier = 1
+
+ # duplicates the data along the z-, y-, and x-axes of output_data if the the user asked for more datapoints than the cube resolution along any axis.
+ if np.any(axes_multipliers > 1):
+ result = np.tile(result, (axes_multipliers[2], axes_multipliers[1], axes_multipliers[0], data_value_multiplier))
+ # truncate any extra datapoints from the duplicate data outside of the original range of the datapoints specified by the user.
+ result = np.copy(result[0 : axes_lengths_original[2], 0 : axes_lengths_original[1], 0 : axes_lengths_original[0], :])
+
+ # checks to make sure that data was read in for all points.
+ if c['missing_value_placeholder'] in result or result.shape != (axes_lengths_original[2], axes_lengths_original[1], axes_lengths_original[0], num_values_per_datapoint):
+ raise Exception(f'result was not filled correctly')
+
+ # apply the strides to output_data.
+ result = xr.DataArray(data = result[::strides[2], ::strides[1], ::strides[0], :],
+ dims = dims_list)
- # determines how many copies of data need to be me made along each axis when the number of datapoints the user specified
- # exceeds the cube resolution (cube.N). note: no copies of the data values should be made, hence data_value_multiplier equals 1.
- axes_multipliers = np.ceil(axes_lengths_original / cube.N).astype(int)
- data_value_multiplier = 1
-
- # duplicates the data along the z-, y-, and x-axes of output_data if the the user asked for more datapoints than the cube resolution along any axis.
- if np.any(axes_multipliers > 1):
- result = np.tile(result, (axes_multipliers[2], axes_multipliers[1], axes_multipliers[0], data_value_multiplier))
- # truncate any extra datapoints from the duplicate data outside of the original range of the datapoints specified by the user.
- result = np.copy(result[0 : axes_lengths_original[2], 0 : axes_lengths_original[1], 0 : axes_lengths_original[0], :])
-
- # checks to make sure that data was read in for all points.
- if c['missing_value_placeholder'] in result or result.shape != (axes_lengths_original[2], axes_lengths_original[1], axes_lengths_original[0], num_values_per_datapoint):
- raise Exception(f'result was not filled correctly')
+ # set the dataset name to be used in the hdf5 file.
+ h5_dataset_name = cube.dataset_name
+ result_map[h5_dataset_name] = result
# datasets that have an irregular y-grid.
irregular_ygrid_datasets = get_irregular_mesh_ygrid_datasets(metadata, var)
@@ -203,24 +234,15 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
x_coords = np.around(np.arange(axes_ranges_original[0][0] - 1, axes_ranges_original[0][1], strides[0], dtype = np.float32) * cube.dx, decimals = c['decimals'])
x_coords += cube.coor_offsets[0]
- # set the dataset name to be used in the hdf5 file.
- h5_dataset_name = cube.dataset_name
-
if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and var == 'velocity':
# zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets.
coords_map = {'zcoor_uv':z_coords, 'zcoor_w':z_coords + (0.1953125 / 2), 'ycoor':y_coords, 'xcoor':x_coords}
- dims_list = ['zcoor_uv', 'ycoor', 'xcoor', 'values']
else:
coords_map = {'zcoor':z_coords, 'ycoor':y_coords, 'xcoor':x_coords}
- dims_list = ['zcoor', 'ycoor', 'xcoor', 'values']
-
- # apply the strides to output_data.
- result = xr.DataArray(data = result[::strides[2], ::strides[1], ::strides[0], :],
- dims = dims_list)
- result = xr.Dataset(data_vars = {h5_dataset_name:result},
+ result = xr.Dataset(data_vars = result_map,
coords = coords_map,
- attrs = {'dataset':dataset_title, 't_start':timepoint_original, 't_end':timepoint_original, 't_step':time_step,
+ attrs = {'dataset':dataset_title, 't_start':timepoint_range_original[0], 't_end':timepoint_range_original[1], 't_step':timepoint_stride,
'x_start':axes_ranges_original[0][0], 'y_start':axes_ranges_original[1][0], 'z_start':axes_ranges_original[2][0],
'x_end':axes_ranges_original[0][1], 'y_end':axes_ranges_original[1][1], 'z_end':axes_ranges_original[2][1],
'x_step':strides[0], 'y_step':strides[1], 'z_step':strides[2],
@@ -256,7 +278,7 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
return result
-def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, strides, var, timepoint_original):
+def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, xyzt_strides, var, timepoint_range_original):
"""
complete all of the getCutout housekeeping procedures before data processing.
"""
@@ -265,11 +287,11 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_
# check that the user-input variable is a valid variable name.
check_variable(metadata, var, dataset_title, query_type)
# check that the user-input timepoint is a valid timepoint for the dataset.
- check_timepoint(metadata, timepoint_original, dataset_title, query_type)
+ check_timepoint(metadata, timepoint_range_original, dataset_title, query_type)
# check that the user-input x-, y-, and z-axis ranges are all specified correctly as [minimum, maximum] integer values.
check_axes_ranges(metadata, axes_ranges_original, dataset_title, var)
# check that the user-input strides are all positive integers.
- check_strides(strides)
+ check_strides(xyzt_strides)
# pre-processing steps.
# -----
@@ -279,7 +301,7 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_
axes_ranges = convert_to_0_based_ranges(metadata, axes_ranges_original, dataset_title, var)
# convert the original input timepoint to the correct time index.
- timepoint = get_time_index_from_timepoint(metadata, dataset_title, timepoint_original, tint = 'none', query_type = query_type)
+ timepoint_range = get_time_index_from_timepoint(metadata, dataset_title, timepoint_range_original, tint = 'none', query_type = query_type)
# set var_offsets to var for getCutout. 'velocity' is handled differently in getData for the 'sabl2048low', 'sabl2048high', 'stsabl2048low', and 'stsabl2048high' datasets.
if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and var == 'velocity':
@@ -288,7 +310,7 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_
else:
var_offsets = var
- return (var_offsets, axes_ranges, timepoint)
+ return (var_offsets, axes_ranges, timepoint_range)
def getData(cube, var, timepoint_original_notebook, temporal_method, spatial_method_original, spatial_operator, points,
option = [-999.9, -999.9],
diff --git a/givernylocal/pyproject.toml b/givernylocal/pyproject.toml
index 9db209e..a33704f 100644
--- a/givernylocal/pyproject.toml
+++ b/givernylocal/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "givernylocal"
-version = "3.4.1"
+version = "3.5.1"
description = "library to query the Johns Hopkins Turbulence Database (JHTDB)"
authors = [
{name = "Johns Hopkins Turbulence Database Group"}
diff --git a/givernylocal/src/givernylocal/turbulence_gizmos/basic_gizmos.py b/givernylocal/src/givernylocal/turbulence_gizmos/basic_gizmos.py
index e20a293..87f4281 100644
--- a/givernylocal/src/givernylocal/turbulence_gizmos/basic_gizmos.py
+++ b/givernylocal/src/givernylocal/turbulence_gizmos/basic_gizmos.py
@@ -116,20 +116,40 @@ def check_timepoint(metadata, timepoint, dataset_title, query_type, max_num_time
time_upper = np.float64(time_metadata['upper'])
time_steps = np.int64(time_metadata['n'])
- if query_type == 'getcutout' or dataset_title in time_index_datasets:
+ if query_type == 'getcutout':
+ try:
+ list(timepoint)
+ except:
+ raise Exception(f"t_range must be specified as a python list or numpy array, e.g. [{timepoint}, {timepoint}]")
+
+ # check that the time range is specified as minimum and maximum integer values.
+ if timepoint.dtype not in [np.int32, np.int64]:
+ raise Exception('all t_range values, [minimum, maximum], should be specified as integers')
+
+ if len(timepoint) != 2 or timepoint[0] > timepoint[1]:
+ raise Exception(f't_range, {list(timepoint)}, is not correctly specified as [minimum, maximum]')
+
valid_timepoints = range(1, time_steps + 1)
# handles checking datasets with time indices.
- if timepoint not in valid_timepoints:
- raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be an integer and in the inclusive range of " +
+ if timepoint[0] not in valid_timepoints or timepoint[1] not in valid_timepoints:
+ raise Exception(f"'t_range', [{timepoint[0]}, {timepoint[1]}], is not a valid time range for '{dataset_title}': all times must be in the inclusive range of " +
f'[{valid_timepoints[0]}, {valid_timepoints[-1]}]')
elif query_type == 'getdata':
- valid_timepoints = (time_lower, time_upper)
+ if dataset_title in time_index_datasets:
+ valid_timepoints = range(1, time_steps + 1)
- # handles checking datasets with real times.
- if timepoint < valid_timepoints[0] or timepoint > valid_timepoints[1]:
- raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be in the inclusive range of " +
- f'[{valid_timepoints[0]}, {valid_timepoints[1]}]')
+ # handles checking datasets with time indices.
+ if timepoint not in valid_timepoints:
+ raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be an integer and in the inclusive range of " +
+ f'[{valid_timepoints[0]}, {valid_timepoints[-1]}]')
+ else:
+ valid_timepoints = (time_lower, time_upper)
+
+ # handles checking datasets with real times.
+ if timepoint < valid_timepoints[0] or timepoint > valid_timepoints[1]:
+ raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be in the inclusive range of " +
+ f'[{valid_timepoints[0]}, {valid_timepoints[1]}]')
elif query_type == 'getturbinedata':
try:
timepoint = list(timepoint)
@@ -559,7 +579,7 @@ def get_time_index_from_timepoint(metadata, dataset_title, timepoint, tint, quer
time_index = (timepoint / dt) + time_index_shift
# round the time index the nearest time index grid point if 'none' time interpolation was specified.
if tint == 'none':
- time_index = int(math.floor(time_index + 0.5))
+ time_index = np.floor(time_index + 0.5).astype(int)
else:
# do not convert the timepoint to a time index for datasets processed by pyJHTDB.
time_index = timepoint
@@ -862,74 +882,91 @@ def write_cutout_hdf5_and_xmf_files(cube, output_data, output_filename):
# get the dataset name used for the hdf5 file.
h5_var = cube.var
h5_attribute_type = get_cardinality_name(cube.metadata, cube.var)
- h5_dataset_name = cube.dataset_name
+ h5_dataset_names = list(output_data.data_vars.keys())
# the shape of the cutout. ordering of the dimensions in the xarray, output_data, is (z, y, x), so shape is reversed ([::-1]) to keep
# consistent with the expected (x, y, z) ordering.
shape = [*output_data.sizes.values()][:3][::-1]
- # get the output timepoint.
- xmf_timepoint = cube.timepoint_original
-
if cube.dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and cube.var == 'velocity':
# split up "zcoor" into "zcoor_uv" and "zcoor_w" for the "velocity" variable of the "sabl" datasets.
output_str = f"""
-
-
-
-
-
-
- """
+
+
+
+ """
+
+ for h5_dataset_name in h5_dataset_names:
+ # get the output timepoint.
+ xmf_timepoint = int(h5_dataset_name.split('_')[1].strip())
+
+ output_str += f"""
+
+ """
+
+ output_str += f"""
+
+
+"""
else:
# handle other datasets and variables.
output_str = f"""
-
-
-
-
-
-
- """
+
+
+
+ """
+
+ for h5_dataset_name in h5_dataset_names:
+ # get the output timepoint.
+ xmf_timepoint = int(h5_dataset_name.split('_')[1].strip())
+
+ output_str += f"""
+
+ """
+
+ output_str += f"""
+
+
+"""
with open(cube.output_path.joinpath(output_filename + '.xmf'), 'w') as output_file:
output_file.write(output_str)
@@ -938,7 +975,7 @@ def write_cutout_hdf5_and_xmf_files(cube, output_data, output_filename):
print('-----')
sys.stdout.flush()
-def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strides, output_filename,
+def contour_plot(cube, value_index, cutout_data, time, plot_ranges, xyzt_axes_ranges, xyzt_strides, output_filename,
colormap = 'inferno', equal_aspect_ratio = True):
"""
create a contour plot from the getCutout output.
@@ -953,6 +990,13 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid
metadata = cube.metadata
variable = cube.var
dataset_title = cube.dataset_title
+ dataset_name = variable + '_' + str(time).zfill(4)
+
+ # remove the time axis from axes_ranges and strides.
+ axes_ranges = xyzt_axes_ranges[:3]
+ strides = xyzt_strides[:3]
+ # get the time range.
+ time_range = xyzt_axes_ranges[3]
# names for each value, e.g. value index 0 for velocity data corresponds to the x-component of the velocity ("ux").
value_name_map = get_variable_component_names_map(metadata)
@@ -984,6 +1028,10 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid
if not(np.all(axes_min <= plot_ranges_min) and np.all(plot_ranges_max <= axes_max)):
raise Exception(f'the specified plot ranges are not all bounded by the box volume defined by:\n{axes_ranges}')
+ # raise exception if the plot time is not one of the queried timepoints.
+ if time not in range(time_range[0], time_range[1] + 1, 1):
+ raise Exception(f'the specified time ({time}) is not a queried time, t_range:\n{time_range}')
+
# determine how many of the axis minimum values are equal to their corresponding axis maximum value.
num_axes_equal_min_max = plot_ranges_min == plot_ranges_max
# raise exception if the data being plotted is not 2-dimensional.
@@ -1025,15 +1073,15 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid
# specify the subset (or full) axes ranges to use for plotting. cutout_data is of the format [z-range, y-range, x-range, output value index].
if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and variable == 'velocity':
# zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets.
- plot_data = cutout_data[cube.dataset_name].sel(xcoor = xcoor_values,
- ycoor = ycoor_values,
- zcoor_uv = zcoor_values,
- values = value_index)
+ plot_data = cutout_data[dataset_name].sel(xcoor = xcoor_values,
+ ycoor = ycoor_values,
+ zcoor_uv = zcoor_values,
+ values = value_index)
else:
- plot_data = cutout_data[cube.dataset_name].sel(xcoor = xcoor_values,
- ycoor = ycoor_values,
- zcoor = zcoor_values,
- values = value_index)
+ plot_data = cutout_data[dataset_name].sel(xcoor = xcoor_values,
+ ycoor = ycoor_values,
+ zcoor = zcoor_values,
+ values = value_index)
# raise exception if only one point is going to be plotted along more than 1 axis. a contour plot requires more
# than 1 point along 2 axes. this check is required in case the user specifies a stride along an axis that
@@ -1083,7 +1131,7 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid
plane_axis = original_axis_title[0].strip()
# plot_ranges_min = plot_ranges_max for plane_axis.
plane_point = plot_ranges_min[axis_index_map[plane_axis]]
- axis_title = plane_axis + ' = ' + str(plane_point) + ', t = ' + str(cutout_data.attrs['t_start'])
+ axis_title = plane_axis + ' = ' + str(plane_point) + ', t = ' + str(time)
title_str = f'{output_dataset_title}\n{variable} ({value_name}) contour plot ({axis_title})'
# remove '_uv' from the axis variable names for display in the plot.
x_axis_variable = x_axis_variable.replace('_uv', '')
@@ -1118,7 +1166,7 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid
print('contour plot created successfully.')
sys.stdout.flush()
-def cutout_values(cube, x, y, z, output_data, axes_ranges, strides):
+def cutout_values(cube, x, y, z, output_data, time, xyzt_axes_ranges, xyzt_strides):
"""
retrieve data values for all of the specified points.
"""
@@ -1127,6 +1175,12 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides):
metadata = cube.metadata
variable = cube.var
dataset_title = cube.dataset_title
+ dataset_name = variable + '_' + str(time).zfill(4)
+
+ # remove the time axis from axes_ranges and strides.
+ axes_ranges = xyzt_axes_ranges[:3]
+ strides = xyzt_strides[:3]
+ time_range = xyzt_axes_ranges[3]
# minimum and maximum endpoints along each axis for the points the user requested.
endpoints_min = np.array([np.min(x), np.min(y), np.min(z)], dtype = np.int32)
@@ -1137,6 +1191,10 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides):
# raise exception if all of the user requested datapoints are not inside the bounds of the user box volume.
if not(np.all(axes_ranges[:, 0] <= endpoints_min) and np.all(endpoints_max <= axes_ranges[:, 1])):
raise Exception(f'the specified point(s) are not all bounded by the box volume defined by:\n{axes_ranges}')
+
+ # raise exception if the time is not one of the queried timepoints.
+ if time not in range(time_range[0], time_range[1] + 1, 1):
+ raise Exception(f'the specified time ({time}) is not a queried time, t_range:\n{time_range}')
# datasets that have an irregular y-grid.
irregular_ygrid_datasets = get_irregular_mesh_ygrid_datasets(metadata, variable)
@@ -1161,15 +1219,15 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides):
# value(s) corresponding to the specified (x, y, z) datapoint(s).
if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and variable == 'velocity':
# zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets.
- output_values = output_data[cube.dataset_name].sel(xcoor = xcoor_values,
- ycoor = ycoor_values,
- zcoor_uv = zcoor_values)
+ output_values = output_data[dataset_name].sel(xcoor = xcoor_values,
+ ycoor = ycoor_values,
+ zcoor_uv = zcoor_values)
# add the zcoor_w coordinate to the returned xarray dataarray.
output_values = output_values.assign_coords({'zcoor_w': output_values.zcoor_uv + (cube.dz / 2)})
return output_values
else:
- return output_data[cube.dataset_name].sel(xcoor = xcoor_values,
- ycoor = ycoor_values,
- zcoor = zcoor_values)
+ return output_data[dataset_name].sel(xcoor = xcoor_values,
+ ycoor = ycoor_values,
+ zcoor = zcoor_values)
diff --git a/givernylocal/src/givernylocal/turbulence_toolkit.py b/givernylocal/src/givernylocal/turbulence_toolkit.py
index 22421be..2f0b762 100644
--- a/givernylocal/src/givernylocal/turbulence_toolkit.py
+++ b/givernylocal/src/givernylocal/turbulence_toolkit.py
@@ -23,6 +23,7 @@
import json
import math
import time
+import logging
import requests
import tracemalloc
import numpy as np
@@ -31,7 +32,7 @@
from givernylocal.turbulence_dataset import *
from givernylocal.turbulence_gizmos.basic_gizmos import *
-def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
+def getCutout(cube, var, xyzt_axes_ranges_original, xyzt_strides,
trace_memory = False, verbose = True):
"""
retrieve a cutout of the isotropic cube.
@@ -54,17 +55,25 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
# data constants.
c = metadata['constants']
- # only time_step and filter_width values of 1 are currently allowed.
- time_step = 1
+ # only filter_width value of 1 is currently allowed.
filter_width = 1
# retrieve the list of datasets processed by the giverny code.
giverny_datasets = get_giverny_datasets()
+ # xyz original axes ranges.
+ axes_ranges_original = xyzt_axes_ranges_original[:3]
+ # time original range.
+ timepoint_range_original = xyzt_axes_ranges_original[3]
+ # xyz original axes strides.
+ strides = xyzt_strides[:3]
+ # time original stride.
+ timepoint_stride = xyzt_strides[3]
+
# housekeeping procedures.
# -----
- var_offsets, timepoint = \
- getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, strides, var, timepoint_original)
+ var_offsets, timepoint_range = \
+ getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, xyzt_strides, var, timepoint_range_original)
# the number of values to read per datapoint. for pressure data this value is 1. for velocity
# data this value is 3, because there is a velocity measurement along each axis.
@@ -72,8 +81,10 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
# number of original datapoints along each axis specified by the user. used for checking that the user did not request
# too much data and that result is filled correctly.
axes_lengths_original = axes_ranges_original[:, 1] - axes_ranges_original[:, 0] + 1
+ # number of original times queried by the user.
+ num_times = ((timepoint_range_original[1] - timepoint_range_original[0]) // timepoint_stride) + 1
# total number of datapoints, used for checking if the user requested too much data..
- num_datapoints = np.prod(axes_lengths_original)
+ num_datapoints = np.prod(axes_lengths_original) * num_times
# total size of data, in GBs, requested by the user's box.
requested_data_size = (num_datapoints * c['bytes_per_datapoint'] * num_values_per_datapoint) / float(1024**2)
# maximum number of datapoints that can be read in. currently set to 16 GBs worth of datapoints.
@@ -90,13 +101,18 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
if requested_data_size > max_cutout_size:
raise ValueError(f'max local cutout size, {max_cutout_size} MB, exceeded. please specify a box with fewer than (xe - xs) * (ye - ys) * (ze - zs) = {max_datapoints + 1:,} ' + \
f'data points, regardless of strides.')
+
+ if num_datapoints > 128**3:
+ logging.warning(f'givernylocal will typically work for up to ~200^3 cube cutouts (~100 MB) depending on system load ' + \
+ 'and internet connection speed, otherwise HTTP errors may result. for larger cutouts, use the giverny library getCutout function on SciServer')
# placeholder values for getData settings.
spatial_method = 'none'
temporal_method = 'none'
option = [-999.9, -999.9]
- # initialize cube constants. this is done so that all of the constants are known for pre-processing of the data.
- cube.init_constants(query_type, var, var_offsets, timepoint, timepoint_original,
+ # initialize cube constants. this is done so that all of the constants are known for pre-processing of the data. use the last timepoint
+ # as a placeholder to keep consistent with how giverny on SciServer works, i.e. the last queried timepoint is the timepoint stored as the instance variable.
+ cube.init_constants(query_type, var, var_offsets, timepoint_range[1], timepoint_range_original[1],
spatial_method, temporal_method, option, num_values_per_datapoint, c)
# -----
@@ -106,20 +122,15 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
# checking the memory usage of the program.
tracemem_start = [mem_value / (1024**3) for mem_value in tracemalloc.get_traced_memory()]
tracemem_used_start = tracemalloc.get_tracemalloc_memory() / (1024**3)
-
- # create a small placeholder array for error checking. a full pre-filled array is created in lJHTDB.getbigCutout (pyJHTDB datasets) and
- # getCutout_process_data (giverny datasets). initially the datatype is set to "f" (float) so that the array is filled with the
- # missing placeholder value (-999.9).
- result = np.array([c['missing_value_placeholder']], dtype = 'f')
# request url.
- url = f'https://web.idies.jhu.edu/turbulence-svc-testing/cutout/api/local?token={auth_token}' \
+ url = f'https://web.idies.jhu.edu/turbulence-svc/cutout/api/local?token={auth_token}' \
f'&function={var}&dataset={dataset_title}' \
f'&xs={axes_ranges_original[0, 0]}&xe={axes_ranges_original[0, 1]}' \
f'&ys={axes_ranges_original[1, 0]}&ye={axes_ranges_original[1, 1]}' \
f'&zs={axes_ranges_original[2, 0]}&ze={axes_ranges_original[2, 1]}' \
- f'&ts={timepoint_original}&te={timepoint_original}' \
- f'&stridet=1&stridex={strides[0]}&stridey={strides[1]}&stridez={strides[2]}' \
+ f'&ts={timepoint_range_original[0]}&te={timepoint_range_original[1]}' \
+ f'&stridet={timepoint_stride}&stridex={strides[0]}&stridey={strides[1]}&stridez={strides[2]}' \
f'&filter_width={filter_width}'
try:
@@ -144,25 +155,37 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
# load the xarray dataset returned by giverny.
json_data = json.loads(response.content)
+ # result DataArray map.
+ result_map = {}
+ for dataset_name in json_data['data_vars']:
+ # create a small placeholder array for error checking. a full pre-filled array is created in lJHTDB.getbigCutout (pyJHTDB datasets) and
+ # getCutout_process_data (giverny datasets). initially the datatype is set to "f" (float) so that the array is filled with the
+ # missing placeholder value (-999.9).
+ result = np.array([c['missing_value_placeholder']], dtype = 'f')
+
+ # load the data for the variable-time into a numpy array.
+ result = np.array(json_data['data_vars'][dataset_name]['data'], dtype = 'f')
+
+ # checks to make sure that data was received for all points.
+ strided_lengths = (axes_lengths_original + strides - 1) // strides
+ if c['missing_value_placeholder'] in result or result.shape != (strided_lengths[2], strided_lengths[1], strided_lengths[0], num_values_per_datapoint):
+ raise Exception(f'result was not filled correctly for the "{dataset_name}" dataset')
+
+ # create the xarray DataArray.
+ result = xr.DataArray(data = result,
+ dims = json_data['data_vars'][dataset_name]['dims'])
+
+ # aggregate the DataArrays into a dictionary.
+ result_map[dataset_name] = result
+
# parse the json data into the coords map. store the values as np.float64 for accuracy since the json data does not contain
# the original data type information (mostly np.float32, but sometimes np.float64).
coords_map = {k: np.array(v['data'], dtype = np.float64) for k, v in json_data['coords'].items()}
- # result value array.
- result = np.array(json_data['data'], dtype = 'f')
-
- # checks to make sure that data was received for all points.
- strided_lengths = (axes_lengths_original + strides - 1) // strides
- if c['missing_value_placeholder'] in result or result.shape != (strided_lengths[2], strided_lengths[1], strided_lengths[0], num_values_per_datapoint):
- raise Exception(f'result was not filled correctly')
-
- # create the xarray DataArray.
- result = xr.DataArray(data = result,
- dims = json_data['dims'])
# create the xarray Dataset.
- result = xr.Dataset(data_vars = {json_data['name']:result},
+ result = xr.Dataset(data_vars = result_map,
coords = coords_map,
- attrs = {'dataset':dataset_title, 't_start':timepoint_original, 't_end':timepoint_original, 't_step':time_step,
+ attrs = {'dataset':dataset_title, 't_start':timepoint_range_original[0], 't_end':timepoint_range_original[1], 't_step':timepoint_stride,
'x_start':axes_ranges_original[0][0], 'y_start':axes_ranges_original[1][0], 'z_start':axes_ranges_original[2][0],
'x_end':axes_ranges_original[0][1], 'y_end':axes_ranges_original[1][1], 'z_end':axes_ranges_original[2][1],
'x_step':strides[0], 'y_step':strides[1], 'z_step':strides[2],
@@ -198,7 +221,7 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides,
return result
-def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, strides, var, timepoint_original):
+def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, xyzt_strides, var, timepoint_range_original):
"""
complete all of the getCutout housekeeping procedures before data processing.
"""
@@ -207,16 +230,16 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_
# check that the user-input variable is a valid variable name.
check_variable(metadata, var, dataset_title, query_type)
# check that the user-input timepoint is a valid timepoint for the dataset.
- check_timepoint(metadata, timepoint_original, dataset_title, query_type)
+ check_timepoint(metadata, timepoint_range_original, dataset_title, query_type)
# check that the user-input x-, y-, and z-axis ranges are all specified correctly as [minimum, maximum] integer values.
check_axes_ranges(metadata, axes_ranges_original, dataset_title, var)
# check that the user-input strides are all positive integers.
- check_strides(strides)
+ check_strides(xyzt_strides)
# pre-processing steps.
# -----
# convert the original input timepoint to the correct time index.
- timepoint = get_time_index_from_timepoint(metadata, dataset_title, timepoint_original, tint = 'none', query_type = query_type)
+ timepoint_range = get_time_index_from_timepoint(metadata, dataset_title, timepoint_range_original, tint = 'none', query_type = query_type)
# set var_offsets to var for getCutout. 'velocity' is handled differently in getData for the 'sabl2048low', 'sabl2048high', 'stsabl2048low', and 'stsabl2048high' datasets.
if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and var == 'velocity':
@@ -225,7 +248,7 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_
else:
var_offsets = var
- return (var_offsets, timepoint)
+ return (var_offsets, timepoint_range)
def getData(cube, var, timepoint_original, temporal_method, spatial_method_original, spatial_operator, points,
option = [-999.9, -999.9],