diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fa70ef..f05dbc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - migrate datasets from SQL to CephFS storage. +## [3.5.1] - 2025-12-12 + +### Added +- multitime cutout queries. + +## [3.4.2] - 2025-11-18 + +### Changed +- Default cutout limits. + ## [3.4.1] - 2025-10-30 ### Added diff --git a/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip b/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip index 54e60a3..2ed65ea 100644 Binary files a/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip and b/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip differ diff --git a/DEMO_notebooks/DEMO_local_python_notebooks.zip b/DEMO_notebooks/DEMO_local_python_notebooks.zip index e8a34ca..b25b480 100644 Binary files a/DEMO_notebooks/DEMO_local_python_notebooks.zip and b/DEMO_notebooks/DEMO_local_python_notebooks.zip differ diff --git a/DEMO_notebooks/DEMO_wind_SciServer_python_notebooks.zip b/DEMO_notebooks/DEMO_wind_SciServer_python_notebooks.zip index 6fd89ce..9b41594 100644 Binary files a/DEMO_notebooks/DEMO_wind_SciServer_python_notebooks.zip and b/DEMO_notebooks/DEMO_wind_SciServer_python_notebooks.zip differ diff --git a/giverny/pyproject.toml b/giverny/pyproject.toml index 0d2374a..06ab6ee 100644 --- a/giverny/pyproject.toml +++ b/giverny/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "giverny" -version = "3.4.1" +version = "3.5.1" description = "library to query the Johns Hopkins Turbulence Database (JHTDB)" authors = [ {name = "Johns Hopkins Turbulence Database Group"} diff --git a/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py b/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py index 8bd72b7..7d4471b 100644 --- a/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py +++ b/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py @@ -116,20 +116,40 @@ def check_timepoint(metadata, timepoint, dataset_title, query_type, max_num_time time_upper = np.float64(time_metadata['upper']) time_steps = np.int64(time_metadata['n']) - if query_type == 'getcutout' or dataset_title in time_index_datasets: + if query_type == 'getcutout': + try: + list(timepoint) + except: + raise Exception(f"t_range must be specified as a python list or numpy array, e.g. [{timepoint}, {timepoint}]") + + # check that the time range is specified as minimum and maximum integer values. + if timepoint.dtype not in [np.int32, np.int64]: + raise Exception('all t_range values, [minimum, maximum], should be specified as integers') + + if len(timepoint) != 2 or timepoint[0] > timepoint[1]: + raise Exception(f't_range, {list(timepoint)}, is not correctly specified as [minimum, maximum]') + valid_timepoints = range(1, time_steps + 1) # handles checking datasets with time indices. - if timepoint not in valid_timepoints: - raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be an integer and in the inclusive range of " + + if timepoint[0] not in valid_timepoints or timepoint[1] not in valid_timepoints: + raise Exception(f"'t_range', [{timepoint[0]}, {timepoint[1]}], is not a valid time range for '{dataset_title}': all times must be in the inclusive range of " + f'[{valid_timepoints[0]}, {valid_timepoints[-1]}]') elif query_type == 'getdata': - valid_timepoints = (time_lower, time_upper) + if dataset_title in time_index_datasets: + valid_timepoints = range(1, time_steps + 1) - # handles checking datasets with real times. - if timepoint < valid_timepoints[0] or timepoint > valid_timepoints[1]: - raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be in the inclusive range of " + - f'[{valid_timepoints[0]}, {valid_timepoints[1]}]') + # handles checking datasets with time indices. + if timepoint not in valid_timepoints: + raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be an integer and in the inclusive range of " + + f'[{valid_timepoints[0]}, {valid_timepoints[-1]}]') + else: + valid_timepoints = (time_lower, time_upper) + + # handles checking datasets with real times. + if timepoint < valid_timepoints[0] or timepoint > valid_timepoints[1]: + raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be in the inclusive range of " + + f'[{valid_timepoints[0]}, {valid_timepoints[1]}]') elif query_type == 'getturbinedata': try: timepoint = list(timepoint) @@ -559,7 +579,7 @@ def get_time_index_from_timepoint(metadata, dataset_title, timepoint, tint, quer time_index = (timepoint / dt) + time_index_shift # round the time index the nearest time index grid point if 'none' time interpolation was specified. if tint == 'none': - time_index = int(math.floor(time_index + 0.5)) + time_index = np.floor(time_index + 0.5).astype(int) else: # do not convert the timepoint to a time index for datasets processed by pyJHTDB. time_index = timepoint @@ -862,74 +882,91 @@ def write_cutout_hdf5_and_xmf_files(cube, output_data, output_filename): # get the dataset name used for the hdf5 file. h5_var = cube.var h5_attribute_type = get_cardinality_name(cube.metadata, cube.var) - h5_dataset_name = cube.dataset_name + h5_dataset_names = list(output_data.data_vars.keys()) # the shape of the cutout. ordering of the dimensions in the xarray, output_data, is (z, y, x), so shape is reversed ([::-1]) to keep # consistent with the expected (x, y, z) ordering. shape = [*output_data.sizes.values()][:3][::-1] - # get the output timepoint. - xmf_timepoint = cube.timepoint_original - if cube.dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and cube.var == 'velocity': # split up "zcoor" into "zcoor_uv" and "zcoor_w" for the "velocity" variable of the "sabl" datasets. output_str = f""" - - - - - - - """ + + + + """ + + for h5_dataset_name in h5_dataset_names: + # get the output timepoint. + xmf_timepoint = int(h5_dataset_name.split('_')[1].strip()) + + output_str += f""" + + """ + + output_str += f""" + + +""" else: # handle other datasets and variables. output_str = f""" - - - - - - - """ + + + + """ + + for h5_dataset_name in h5_dataset_names: + # get the output timepoint. + xmf_timepoint = int(h5_dataset_name.split('_')[1].strip()) + + output_str += f""" + + """ + + output_str += f""" + + +""" with open(cube.output_path.joinpath(output_filename + '.xmf'), 'w') as output_file: output_file.write(output_str) @@ -938,7 +975,7 @@ def write_cutout_hdf5_and_xmf_files(cube, output_data, output_filename): print('-----') sys.stdout.flush() -def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strides, output_filename, +def contour_plot(cube, value_index, cutout_data, time, plot_ranges, xyzt_axes_ranges, xyzt_strides, output_filename, colormap = 'inferno', equal_aspect_ratio = True): """ create a contour plot from the getCutout output. @@ -953,6 +990,13 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid metadata = cube.metadata variable = cube.var dataset_title = cube.dataset_title + dataset_name = variable + '_' + str(time).zfill(4) + + # remove the time axis from axes_ranges and strides. + axes_ranges = xyzt_axes_ranges[:3] + strides = xyzt_strides[:3] + # get the time range. + time_range = xyzt_axes_ranges[3] # names for each value, e.g. value index 0 for velocity data corresponds to the x-component of the velocity ("ux"). value_name_map = get_variable_component_names_map(metadata) @@ -984,6 +1028,10 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid if not(np.all(axes_min <= plot_ranges_min) and np.all(plot_ranges_max <= axes_max)): raise Exception(f'the specified plot ranges are not all bounded by the box volume defined by:\n{axes_ranges}') + # raise exception if the plot time is not one of the queried timepoints. + if time not in range(time_range[0], time_range[1] + 1, 1): + raise Exception(f'the specified time ({time}) is not a queried time, t_range:\n{time_range}') + # determine how many of the axis minimum values are equal to their corresponding axis maximum value. num_axes_equal_min_max = plot_ranges_min == plot_ranges_max # raise exception if the data being plotted is not 2-dimensional. @@ -1025,15 +1073,15 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid # specify the subset (or full) axes ranges to use for plotting. cutout_data is of the format [z-range, y-range, x-range, output value index]. if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and variable == 'velocity': # zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets. - plot_data = cutout_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor_uv = zcoor_values, - values = value_index) + plot_data = cutout_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor_uv = zcoor_values, + values = value_index) else: - plot_data = cutout_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor = zcoor_values, - values = value_index) + plot_data = cutout_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor = zcoor_values, + values = value_index) # raise exception if only one point is going to be plotted along more than 1 axis. a contour plot requires more # than 1 point along 2 axes. this check is required in case the user specifies a stride along an axis that @@ -1083,7 +1131,7 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid plane_axis = original_axis_title[0].strip() # plot_ranges_min = plot_ranges_max for plane_axis. plane_point = plot_ranges_min[axis_index_map[plane_axis]] - axis_title = plane_axis + ' = ' + str(plane_point) + ', t = ' + str(cutout_data.attrs['t_start']) + axis_title = plane_axis + ' = ' + str(plane_point) + ', t = ' + str(time) title_str = f'{output_dataset_title}\n{variable} ({value_name}) contour plot ({axis_title})' # remove '_uv' from the axis variable names for display in the plot. x_axis_variable = x_axis_variable.replace('_uv', '') @@ -1118,7 +1166,7 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid print('contour plot created successfully.') sys.stdout.flush() -def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): +def cutout_values(cube, x, y, z, output_data, time, xyzt_axes_ranges, xyzt_strides): """ retrieve data values for all of the specified points. """ @@ -1127,6 +1175,12 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): metadata = cube.metadata variable = cube.var dataset_title = cube.dataset_title + dataset_name = variable + '_' + str(time).zfill(4) + + # remove the time axis from axes_ranges and strides. + axes_ranges = xyzt_axes_ranges[:3] + strides = xyzt_strides[:3] + time_range = xyzt_axes_ranges[3] # minimum and maximum endpoints along each axis for the points the user requested. endpoints_min = np.array([np.min(x), np.min(y), np.min(z)], dtype = np.int32) @@ -1137,6 +1191,10 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): # raise exception if all of the user requested datapoints are not inside the bounds of the user box volume. if not(np.all(axes_ranges[:, 0] <= endpoints_min) and np.all(endpoints_max <= axes_ranges[:, 1])): raise Exception(f'the specified point(s) are not all bounded by the box volume defined by:\n{axes_ranges}') + + # raise exception if the time is not one of the queried timepoints. + if time not in range(time_range[0], time_range[1] + 1, 1): + raise Exception(f'the specified time ({time}) is not a queried time, t_range:\n{time_range}') # datasets that have an irregular y-grid. irregular_ygrid_datasets = get_irregular_mesh_ygrid_datasets(metadata, variable) @@ -1161,15 +1219,15 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): # value(s) corresponding to the specified (x, y, z) datapoint(s). if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and variable == 'velocity': # zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets. - output_values = output_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor_uv = zcoor_values) + output_values = output_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor_uv = zcoor_values) # add the zcoor_w coordinate to the returned xarray dataarray. output_values = output_values.assign_coords({'zcoor_w': output_values.zcoor_uv + (cube.dz / 2)}) return output_values else: - return output_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor = zcoor_values) + return output_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor = zcoor_values) diff --git a/giverny/src/giverny/turbulence_toolkit.py b/giverny/src/giverny/turbulence_toolkit.py index 49bb452..6cba169 100644 --- a/giverny/src/giverny/turbulence_toolkit.py +++ b/giverny/src/giverny/turbulence_toolkit.py @@ -48,7 +48,7 @@ finally: import pyJHTDB -def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, +def getCutout(cube, var, xyzt_axes_ranges_original, xyzt_strides, trace_memory = False, verbose = True): """ retrieve a cutout of the isotropic cube. @@ -71,8 +71,7 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, # data constants. c = metadata['constants'] - # only time_step and filter_width values of 1 are currently allowed. - time_step = 1 + # only filter_width value of 1 is currently allowed. filter_width = 1 # field (variable) map for legacy datasets. @@ -88,10 +87,19 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, # retrieve the list of datasets processed by the giverny code. giverny_datasets = get_giverny_datasets() + # xyz original axes ranges. + axes_ranges_original = xyzt_axes_ranges_original[:3] + # time original range. + timepoint_range_original = xyzt_axes_ranges_original[3] + # xyz original axes strides. + strides = xyzt_strides[:3] + # time original stride. + timepoint_stride = xyzt_strides[3] + # housekeeping procedures. # ----- - var_offsets, axes_ranges, timepoint = \ - getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, strides, var, timepoint_original) + var_offsets, axes_ranges, timepoint_range = \ + getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, xyzt_strides, var, timepoint_range_original) # the number of values to read per datapoint. for pressure data this value is 1. for velocity # data this value is 3, because there is a velocity measurement along each axis. @@ -99,8 +107,10 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, # number of original datapoints along each axis specified by the user. used for checking that the user did not request # too much data and that result is filled correctly. axes_lengths_original = axes_ranges_original[:, 1] - axes_ranges_original[:, 0] + 1 + # number of original times queried by the user. + num_times = ((timepoint_range_original[1] - timepoint_range_original[0]) // timepoint_stride) + 1 # total number of datapoints, used for checking if the user requested too much data.. - num_datapoints = np.prod(axes_lengths_original) + num_datapoints = np.prod(axes_lengths_original) * num_times # total size of data, in GBs, requested by the user's box. requested_data_size = (num_datapoints * c['bytes_per_datapoint'] * num_values_per_datapoint) / float(1024**3) # maximum number of datapoints that can be read in. currently set to 16 GBs worth of datapoints. @@ -122,9 +132,12 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, spatial_method = 'none' temporal_method = 'none' option = [-999.9, -999.9] - # initialize cube constants. this is done so that all of the constants are known for pre-processing of the data. - cube.init_constants(query_type, var, var_offsets, timepoint, timepoint_original, - spatial_method, temporal_method, option, num_values_per_datapoint, c) + + if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and var == 'velocity': + # zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets. + dims_list = ['zcoor_uv', 'ycoor', 'xcoor', 'values'] + else: + dims_list = ['zcoor', 'ycoor', 'xcoor', 'values'] # ----- # starting the tracemalloc library. @@ -134,54 +147,72 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, tracemem_start = [mem_value / (1024**3) for mem_value in tracemalloc.get_traced_memory()] tracemem_used_start = tracemalloc.get_tracemalloc_memory() / (1024**3) - # create a small placeholder array for error checking. a full pre-filled array is created in lJHTDB.getbigCutout (pyJHTDB datasets) and - # getCutout_process_data (giverny datasets). initially the datatype is set to "f" (float) so that the array is filled with the - # missing placeholder value (-999.9). - result = np.array([c['missing_value_placeholder']], dtype = 'f') - - # process the data query, retrieve a cutout for the various datasets. - if dataset_title in giverny_datasets: - """ - get the results for the datasets processed by giverny. - """ - # parse the database files, generate the result matrix. - result = getCutout_process_data(cube, metadata, axes_ranges, var, timepoint, - axes_ranges_original, strides, var_offsets, timepoint_original, c) - else: - """ - get the results for the legacy datasets processed by pyJHTDB. - """ - # initialize lJHTDB gSOAP resources and add the user's authorization token. - lJHTDB = pyJHTDB.libJHTDB(auth_token = auth_token) - lJHTDB.initialize() - - # get the field (variable) integer for the legacy datasets. - field = field_map[var] - - # the strides will be applied later after retrieving the data. - result = lJHTDB.getbigCutout(data_set = dataset_title, fields = field, t_start = timepoint_original, t_end = timepoint_original, t_step = time_step, - start = np.array([axes_ranges[0, 0], axes_ranges[1, 0], axes_ranges[2, 0]], dtype = int), - end = np.array([axes_ranges[0, 1], axes_ranges[1, 1], axes_ranges[2, 1]], dtype = int), - step = np.array([1, 1, 1], dtype = int), - filter_width = filter_width) - - # free up gSOAP resources. - lJHTDB.finalize() + result_map = {} + # iterate over the timepoints to retrieve the cutouts. + for timepoint, timepoint_original in zip( + range(timepoint_range[0], timepoint_range[1] + 1, timepoint_stride), + range(timepoint_range_original[0], timepoint_range_original[1] + 1, timepoint_stride) + ): + # initialize cube constants. this is done so that all of the constants are known for pre-processing of the data. + cube.init_constants(query_type, var, var_offsets, timepoint, timepoint_original, + spatial_method, temporal_method, option, num_values_per_datapoint, c) + + # create a small placeholder array for error checking. a full pre-filled array is created in lJHTDB.getbigCutout (pyJHTDB datasets) and + # getCutout_process_data (giverny datasets). initially the datatype is set to "f" (float) so that the array is filled with the + # missing placeholder value (-999.9). + result = np.array([c['missing_value_placeholder']], dtype = 'f') + + # process the data query, retrieve a cutout for the various datasets. + if dataset_title in giverny_datasets: + """ + get the results for the datasets processed by giverny. + """ + # parse the database files, generate the result matrix. + result = getCutout_process_data(cube, metadata, axes_ranges, var, timepoint, + axes_ranges_original, strides, var_offsets, timepoint_original, c) + else: + """ + get the results for the legacy datasets processed by pyJHTDB. + """ + # initialize lJHTDB gSOAP resources and add the user's authorization token. + lJHTDB = pyJHTDB.libJHTDB(auth_token = auth_token) + lJHTDB.initialize() + + # get the field (variable) integer for the legacy datasets. + field = field_map[var] + + # the strides will be applied later after retrieving the data. + result = lJHTDB.getbigCutout(data_set = dataset_title, fields = field, t_start = timepoint_original, t_end = timepoint_original, t_step = 1, + start = np.array([axes_ranges[0, 0], axes_ranges[1, 0], axes_ranges[2, 0]], dtype = int), + end = np.array([axes_ranges[0, 1], axes_ranges[1, 1], axes_ranges[2, 1]], dtype = int), + step = np.array([1, 1, 1], dtype = int), + filter_width = filter_width) + + # free up gSOAP resources. + lJHTDB.finalize() + + # determines how many copies of data need to be me made along each axis when the number of datapoints the user specified + # exceeds the cube resolution (cube.N). note: no copies of the data values should be made, hence data_value_multiplier equals 1. + axes_multipliers = np.ceil(axes_lengths_original / cube.N).astype(int) + data_value_multiplier = 1 + + # duplicates the data along the z-, y-, and x-axes of output_data if the the user asked for more datapoints than the cube resolution along any axis. + if np.any(axes_multipliers > 1): + result = np.tile(result, (axes_multipliers[2], axes_multipliers[1], axes_multipliers[0], data_value_multiplier)) + # truncate any extra datapoints from the duplicate data outside of the original range of the datapoints specified by the user. + result = np.copy(result[0 : axes_lengths_original[2], 0 : axes_lengths_original[1], 0 : axes_lengths_original[0], :]) + + # checks to make sure that data was read in for all points. + if c['missing_value_placeholder'] in result or result.shape != (axes_lengths_original[2], axes_lengths_original[1], axes_lengths_original[0], num_values_per_datapoint): + raise Exception(f'result was not filled correctly') + + # apply the strides to output_data. + result = xr.DataArray(data = result[::strides[2], ::strides[1], ::strides[0], :], + dims = dims_list) - # determines how many copies of data need to be me made along each axis when the number of datapoints the user specified - # exceeds the cube resolution (cube.N). note: no copies of the data values should be made, hence data_value_multiplier equals 1. - axes_multipliers = np.ceil(axes_lengths_original / cube.N).astype(int) - data_value_multiplier = 1 - - # duplicates the data along the z-, y-, and x-axes of output_data if the the user asked for more datapoints than the cube resolution along any axis. - if np.any(axes_multipliers > 1): - result = np.tile(result, (axes_multipliers[2], axes_multipliers[1], axes_multipliers[0], data_value_multiplier)) - # truncate any extra datapoints from the duplicate data outside of the original range of the datapoints specified by the user. - result = np.copy(result[0 : axes_lengths_original[2], 0 : axes_lengths_original[1], 0 : axes_lengths_original[0], :]) - - # checks to make sure that data was read in for all points. - if c['missing_value_placeholder'] in result or result.shape != (axes_lengths_original[2], axes_lengths_original[1], axes_lengths_original[0], num_values_per_datapoint): - raise Exception(f'result was not filled correctly') + # set the dataset name to be used in the hdf5 file. + h5_dataset_name = cube.dataset_name + result_map[h5_dataset_name] = result # datasets that have an irregular y-grid. irregular_ygrid_datasets = get_irregular_mesh_ygrid_datasets(metadata, var) @@ -203,24 +234,15 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, x_coords = np.around(np.arange(axes_ranges_original[0][0] - 1, axes_ranges_original[0][1], strides[0], dtype = np.float32) * cube.dx, decimals = c['decimals']) x_coords += cube.coor_offsets[0] - # set the dataset name to be used in the hdf5 file. - h5_dataset_name = cube.dataset_name - if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and var == 'velocity': # zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets. coords_map = {'zcoor_uv':z_coords, 'zcoor_w':z_coords + (0.1953125 / 2), 'ycoor':y_coords, 'xcoor':x_coords} - dims_list = ['zcoor_uv', 'ycoor', 'xcoor', 'values'] else: coords_map = {'zcoor':z_coords, 'ycoor':y_coords, 'xcoor':x_coords} - dims_list = ['zcoor', 'ycoor', 'xcoor', 'values'] - - # apply the strides to output_data. - result = xr.DataArray(data = result[::strides[2], ::strides[1], ::strides[0], :], - dims = dims_list) - result = xr.Dataset(data_vars = {h5_dataset_name:result}, + result = xr.Dataset(data_vars = result_map, coords = coords_map, - attrs = {'dataset':dataset_title, 't_start':timepoint_original, 't_end':timepoint_original, 't_step':time_step, + attrs = {'dataset':dataset_title, 't_start':timepoint_range_original[0], 't_end':timepoint_range_original[1], 't_step':timepoint_stride, 'x_start':axes_ranges_original[0][0], 'y_start':axes_ranges_original[1][0], 'z_start':axes_ranges_original[2][0], 'x_end':axes_ranges_original[0][1], 'y_end':axes_ranges_original[1][1], 'z_end':axes_ranges_original[2][1], 'x_step':strides[0], 'y_step':strides[1], 'z_step':strides[2], @@ -256,7 +278,7 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, return result -def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, strides, var, timepoint_original): +def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, xyzt_strides, var, timepoint_range_original): """ complete all of the getCutout housekeeping procedures before data processing. """ @@ -265,11 +287,11 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ # check that the user-input variable is a valid variable name. check_variable(metadata, var, dataset_title, query_type) # check that the user-input timepoint is a valid timepoint for the dataset. - check_timepoint(metadata, timepoint_original, dataset_title, query_type) + check_timepoint(metadata, timepoint_range_original, dataset_title, query_type) # check that the user-input x-, y-, and z-axis ranges are all specified correctly as [minimum, maximum] integer values. check_axes_ranges(metadata, axes_ranges_original, dataset_title, var) # check that the user-input strides are all positive integers. - check_strides(strides) + check_strides(xyzt_strides) # pre-processing steps. # ----- @@ -279,7 +301,7 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ axes_ranges = convert_to_0_based_ranges(metadata, axes_ranges_original, dataset_title, var) # convert the original input timepoint to the correct time index. - timepoint = get_time_index_from_timepoint(metadata, dataset_title, timepoint_original, tint = 'none', query_type = query_type) + timepoint_range = get_time_index_from_timepoint(metadata, dataset_title, timepoint_range_original, tint = 'none', query_type = query_type) # set var_offsets to var for getCutout. 'velocity' is handled differently in getData for the 'sabl2048low', 'sabl2048high', 'stsabl2048low', and 'stsabl2048high' datasets. if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and var == 'velocity': @@ -288,7 +310,7 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ else: var_offsets = var - return (var_offsets, axes_ranges, timepoint) + return (var_offsets, axes_ranges, timepoint_range) def getData(cube, var, timepoint_original_notebook, temporal_method, spatial_method_original, spatial_operator, points, option = [-999.9, -999.9], diff --git a/givernylocal/pyproject.toml b/givernylocal/pyproject.toml index 9db209e..a33704f 100644 --- a/givernylocal/pyproject.toml +++ b/givernylocal/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "givernylocal" -version = "3.4.1" +version = "3.5.1" description = "library to query the Johns Hopkins Turbulence Database (JHTDB)" authors = [ {name = "Johns Hopkins Turbulence Database Group"} diff --git a/givernylocal/src/givernylocal/turbulence_gizmos/basic_gizmos.py b/givernylocal/src/givernylocal/turbulence_gizmos/basic_gizmos.py index e20a293..87f4281 100644 --- a/givernylocal/src/givernylocal/turbulence_gizmos/basic_gizmos.py +++ b/givernylocal/src/givernylocal/turbulence_gizmos/basic_gizmos.py @@ -116,20 +116,40 @@ def check_timepoint(metadata, timepoint, dataset_title, query_type, max_num_time time_upper = np.float64(time_metadata['upper']) time_steps = np.int64(time_metadata['n']) - if query_type == 'getcutout' or dataset_title in time_index_datasets: + if query_type == 'getcutout': + try: + list(timepoint) + except: + raise Exception(f"t_range must be specified as a python list or numpy array, e.g. [{timepoint}, {timepoint}]") + + # check that the time range is specified as minimum and maximum integer values. + if timepoint.dtype not in [np.int32, np.int64]: + raise Exception('all t_range values, [minimum, maximum], should be specified as integers') + + if len(timepoint) != 2 or timepoint[0] > timepoint[1]: + raise Exception(f't_range, {list(timepoint)}, is not correctly specified as [minimum, maximum]') + valid_timepoints = range(1, time_steps + 1) # handles checking datasets with time indices. - if timepoint not in valid_timepoints: - raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be an integer and in the inclusive range of " + + if timepoint[0] not in valid_timepoints or timepoint[1] not in valid_timepoints: + raise Exception(f"'t_range', [{timepoint[0]}, {timepoint[1]}], is not a valid time range for '{dataset_title}': all times must be in the inclusive range of " + f'[{valid_timepoints[0]}, {valid_timepoints[-1]}]') elif query_type == 'getdata': - valid_timepoints = (time_lower, time_upper) + if dataset_title in time_index_datasets: + valid_timepoints = range(1, time_steps + 1) - # handles checking datasets with real times. - if timepoint < valid_timepoints[0] or timepoint > valid_timepoints[1]: - raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be in the inclusive range of " + - f'[{valid_timepoints[0]}, {valid_timepoints[1]}]') + # handles checking datasets with time indices. + if timepoint not in valid_timepoints: + raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be an integer and in the inclusive range of " + + f'[{valid_timepoints[0]}, {valid_timepoints[-1]}]') + else: + valid_timepoints = (time_lower, time_upper) + + # handles checking datasets with real times. + if timepoint < valid_timepoints[0] or timepoint > valid_timepoints[1]: + raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be in the inclusive range of " + + f'[{valid_timepoints[0]}, {valid_timepoints[1]}]') elif query_type == 'getturbinedata': try: timepoint = list(timepoint) @@ -559,7 +579,7 @@ def get_time_index_from_timepoint(metadata, dataset_title, timepoint, tint, quer time_index = (timepoint / dt) + time_index_shift # round the time index the nearest time index grid point if 'none' time interpolation was specified. if tint == 'none': - time_index = int(math.floor(time_index + 0.5)) + time_index = np.floor(time_index + 0.5).astype(int) else: # do not convert the timepoint to a time index for datasets processed by pyJHTDB. time_index = timepoint @@ -862,74 +882,91 @@ def write_cutout_hdf5_and_xmf_files(cube, output_data, output_filename): # get the dataset name used for the hdf5 file. h5_var = cube.var h5_attribute_type = get_cardinality_name(cube.metadata, cube.var) - h5_dataset_name = cube.dataset_name + h5_dataset_names = list(output_data.data_vars.keys()) # the shape of the cutout. ordering of the dimensions in the xarray, output_data, is (z, y, x), so shape is reversed ([::-1]) to keep # consistent with the expected (x, y, z) ordering. shape = [*output_data.sizes.values()][:3][::-1] - # get the output timepoint. - xmf_timepoint = cube.timepoint_original - if cube.dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and cube.var == 'velocity': # split up "zcoor" into "zcoor_uv" and "zcoor_w" for the "velocity" variable of the "sabl" datasets. output_str = f""" - - - - - - - """ + + + + """ + + for h5_dataset_name in h5_dataset_names: + # get the output timepoint. + xmf_timepoint = int(h5_dataset_name.split('_')[1].strip()) + + output_str += f""" + + """ + + output_str += f""" + + +""" else: # handle other datasets and variables. output_str = f""" - - - - - - - """ + + + + """ + + for h5_dataset_name in h5_dataset_names: + # get the output timepoint. + xmf_timepoint = int(h5_dataset_name.split('_')[1].strip()) + + output_str += f""" + + """ + + output_str += f""" + + +""" with open(cube.output_path.joinpath(output_filename + '.xmf'), 'w') as output_file: output_file.write(output_str) @@ -938,7 +975,7 @@ def write_cutout_hdf5_and_xmf_files(cube, output_data, output_filename): print('-----') sys.stdout.flush() -def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strides, output_filename, +def contour_plot(cube, value_index, cutout_data, time, plot_ranges, xyzt_axes_ranges, xyzt_strides, output_filename, colormap = 'inferno', equal_aspect_ratio = True): """ create a contour plot from the getCutout output. @@ -953,6 +990,13 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid metadata = cube.metadata variable = cube.var dataset_title = cube.dataset_title + dataset_name = variable + '_' + str(time).zfill(4) + + # remove the time axis from axes_ranges and strides. + axes_ranges = xyzt_axes_ranges[:3] + strides = xyzt_strides[:3] + # get the time range. + time_range = xyzt_axes_ranges[3] # names for each value, e.g. value index 0 for velocity data corresponds to the x-component of the velocity ("ux"). value_name_map = get_variable_component_names_map(metadata) @@ -984,6 +1028,10 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid if not(np.all(axes_min <= plot_ranges_min) and np.all(plot_ranges_max <= axes_max)): raise Exception(f'the specified plot ranges are not all bounded by the box volume defined by:\n{axes_ranges}') + # raise exception if the plot time is not one of the queried timepoints. + if time not in range(time_range[0], time_range[1] + 1, 1): + raise Exception(f'the specified time ({time}) is not a queried time, t_range:\n{time_range}') + # determine how many of the axis minimum values are equal to their corresponding axis maximum value. num_axes_equal_min_max = plot_ranges_min == plot_ranges_max # raise exception if the data being plotted is not 2-dimensional. @@ -1025,15 +1073,15 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid # specify the subset (or full) axes ranges to use for plotting. cutout_data is of the format [z-range, y-range, x-range, output value index]. if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and variable == 'velocity': # zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets. - plot_data = cutout_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor_uv = zcoor_values, - values = value_index) + plot_data = cutout_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor_uv = zcoor_values, + values = value_index) else: - plot_data = cutout_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor = zcoor_values, - values = value_index) + plot_data = cutout_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor = zcoor_values, + values = value_index) # raise exception if only one point is going to be plotted along more than 1 axis. a contour plot requires more # than 1 point along 2 axes. this check is required in case the user specifies a stride along an axis that @@ -1083,7 +1131,7 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid plane_axis = original_axis_title[0].strip() # plot_ranges_min = plot_ranges_max for plane_axis. plane_point = plot_ranges_min[axis_index_map[plane_axis]] - axis_title = plane_axis + ' = ' + str(plane_point) + ', t = ' + str(cutout_data.attrs['t_start']) + axis_title = plane_axis + ' = ' + str(plane_point) + ', t = ' + str(time) title_str = f'{output_dataset_title}\n{variable} ({value_name}) contour plot ({axis_title})' # remove '_uv' from the axis variable names for display in the plot. x_axis_variable = x_axis_variable.replace('_uv', '') @@ -1118,7 +1166,7 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid print('contour plot created successfully.') sys.stdout.flush() -def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): +def cutout_values(cube, x, y, z, output_data, time, xyzt_axes_ranges, xyzt_strides): """ retrieve data values for all of the specified points. """ @@ -1127,6 +1175,12 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): metadata = cube.metadata variable = cube.var dataset_title = cube.dataset_title + dataset_name = variable + '_' + str(time).zfill(4) + + # remove the time axis from axes_ranges and strides. + axes_ranges = xyzt_axes_ranges[:3] + strides = xyzt_strides[:3] + time_range = xyzt_axes_ranges[3] # minimum and maximum endpoints along each axis for the points the user requested. endpoints_min = np.array([np.min(x), np.min(y), np.min(z)], dtype = np.int32) @@ -1137,6 +1191,10 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): # raise exception if all of the user requested datapoints are not inside the bounds of the user box volume. if not(np.all(axes_ranges[:, 0] <= endpoints_min) and np.all(endpoints_max <= axes_ranges[:, 1])): raise Exception(f'the specified point(s) are not all bounded by the box volume defined by:\n{axes_ranges}') + + # raise exception if the time is not one of the queried timepoints. + if time not in range(time_range[0], time_range[1] + 1, 1): + raise Exception(f'the specified time ({time}) is not a queried time, t_range:\n{time_range}') # datasets that have an irregular y-grid. irregular_ygrid_datasets = get_irregular_mesh_ygrid_datasets(metadata, variable) @@ -1161,15 +1219,15 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): # value(s) corresponding to the specified (x, y, z) datapoint(s). if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and variable == 'velocity': # zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets. - output_values = output_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor_uv = zcoor_values) + output_values = output_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor_uv = zcoor_values) # add the zcoor_w coordinate to the returned xarray dataarray. output_values = output_values.assign_coords({'zcoor_w': output_values.zcoor_uv + (cube.dz / 2)}) return output_values else: - return output_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor = zcoor_values) + return output_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor = zcoor_values) diff --git a/givernylocal/src/givernylocal/turbulence_toolkit.py b/givernylocal/src/givernylocal/turbulence_toolkit.py index 22421be..2f0b762 100644 --- a/givernylocal/src/givernylocal/turbulence_toolkit.py +++ b/givernylocal/src/givernylocal/turbulence_toolkit.py @@ -23,6 +23,7 @@ import json import math import time +import logging import requests import tracemalloc import numpy as np @@ -31,7 +32,7 @@ from givernylocal.turbulence_dataset import * from givernylocal.turbulence_gizmos.basic_gizmos import * -def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, +def getCutout(cube, var, xyzt_axes_ranges_original, xyzt_strides, trace_memory = False, verbose = True): """ retrieve a cutout of the isotropic cube. @@ -54,17 +55,25 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, # data constants. c = metadata['constants'] - # only time_step and filter_width values of 1 are currently allowed. - time_step = 1 + # only filter_width value of 1 is currently allowed. filter_width = 1 # retrieve the list of datasets processed by the giverny code. giverny_datasets = get_giverny_datasets() + # xyz original axes ranges. + axes_ranges_original = xyzt_axes_ranges_original[:3] + # time original range. + timepoint_range_original = xyzt_axes_ranges_original[3] + # xyz original axes strides. + strides = xyzt_strides[:3] + # time original stride. + timepoint_stride = xyzt_strides[3] + # housekeeping procedures. # ----- - var_offsets, timepoint = \ - getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, strides, var, timepoint_original) + var_offsets, timepoint_range = \ + getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, xyzt_strides, var, timepoint_range_original) # the number of values to read per datapoint. for pressure data this value is 1. for velocity # data this value is 3, because there is a velocity measurement along each axis. @@ -72,8 +81,10 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, # number of original datapoints along each axis specified by the user. used for checking that the user did not request # too much data and that result is filled correctly. axes_lengths_original = axes_ranges_original[:, 1] - axes_ranges_original[:, 0] + 1 + # number of original times queried by the user. + num_times = ((timepoint_range_original[1] - timepoint_range_original[0]) // timepoint_stride) + 1 # total number of datapoints, used for checking if the user requested too much data.. - num_datapoints = np.prod(axes_lengths_original) + num_datapoints = np.prod(axes_lengths_original) * num_times # total size of data, in GBs, requested by the user's box. requested_data_size = (num_datapoints * c['bytes_per_datapoint'] * num_values_per_datapoint) / float(1024**2) # maximum number of datapoints that can be read in. currently set to 16 GBs worth of datapoints. @@ -90,13 +101,18 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, if requested_data_size > max_cutout_size: raise ValueError(f'max local cutout size, {max_cutout_size} MB, exceeded. please specify a box with fewer than (xe - xs) * (ye - ys) * (ze - zs) = {max_datapoints + 1:,} ' + \ f'data points, regardless of strides.') + + if num_datapoints > 128**3: + logging.warning(f'givernylocal will typically work for up to ~200^3 cube cutouts (~100 MB) depending on system load ' + \ + 'and internet connection speed, otherwise HTTP errors may result. for larger cutouts, use the giverny library getCutout function on SciServer') # placeholder values for getData settings. spatial_method = 'none' temporal_method = 'none' option = [-999.9, -999.9] - # initialize cube constants. this is done so that all of the constants are known for pre-processing of the data. - cube.init_constants(query_type, var, var_offsets, timepoint, timepoint_original, + # initialize cube constants. this is done so that all of the constants are known for pre-processing of the data. use the last timepoint + # as a placeholder to keep consistent with how giverny on SciServer works, i.e. the last queried timepoint is the timepoint stored as the instance variable. + cube.init_constants(query_type, var, var_offsets, timepoint_range[1], timepoint_range_original[1], spatial_method, temporal_method, option, num_values_per_datapoint, c) # ----- @@ -106,20 +122,15 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, # checking the memory usage of the program. tracemem_start = [mem_value / (1024**3) for mem_value in tracemalloc.get_traced_memory()] tracemem_used_start = tracemalloc.get_tracemalloc_memory() / (1024**3) - - # create a small placeholder array for error checking. a full pre-filled array is created in lJHTDB.getbigCutout (pyJHTDB datasets) and - # getCutout_process_data (giverny datasets). initially the datatype is set to "f" (float) so that the array is filled with the - # missing placeholder value (-999.9). - result = np.array([c['missing_value_placeholder']], dtype = 'f') # request url. - url = f'https://web.idies.jhu.edu/turbulence-svc-testing/cutout/api/local?token={auth_token}' \ + url = f'https://web.idies.jhu.edu/turbulence-svc/cutout/api/local?token={auth_token}' \ f'&function={var}&dataset={dataset_title}' \ f'&xs={axes_ranges_original[0, 0]}&xe={axes_ranges_original[0, 1]}' \ f'&ys={axes_ranges_original[1, 0]}&ye={axes_ranges_original[1, 1]}' \ f'&zs={axes_ranges_original[2, 0]}&ze={axes_ranges_original[2, 1]}' \ - f'&ts={timepoint_original}&te={timepoint_original}' \ - f'&stridet=1&stridex={strides[0]}&stridey={strides[1]}&stridez={strides[2]}' \ + f'&ts={timepoint_range_original[0]}&te={timepoint_range_original[1]}' \ + f'&stridet={timepoint_stride}&stridex={strides[0]}&stridey={strides[1]}&stridez={strides[2]}' \ f'&filter_width={filter_width}' try: @@ -144,25 +155,37 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, # load the xarray dataset returned by giverny. json_data = json.loads(response.content) + # result DataArray map. + result_map = {} + for dataset_name in json_data['data_vars']: + # create a small placeholder array for error checking. a full pre-filled array is created in lJHTDB.getbigCutout (pyJHTDB datasets) and + # getCutout_process_data (giverny datasets). initially the datatype is set to "f" (float) so that the array is filled with the + # missing placeholder value (-999.9). + result = np.array([c['missing_value_placeholder']], dtype = 'f') + + # load the data for the variable-time into a numpy array. + result = np.array(json_data['data_vars'][dataset_name]['data'], dtype = 'f') + + # checks to make sure that data was received for all points. + strided_lengths = (axes_lengths_original + strides - 1) // strides + if c['missing_value_placeholder'] in result or result.shape != (strided_lengths[2], strided_lengths[1], strided_lengths[0], num_values_per_datapoint): + raise Exception(f'result was not filled correctly for the "{dataset_name}" dataset') + + # create the xarray DataArray. + result = xr.DataArray(data = result, + dims = json_data['data_vars'][dataset_name]['dims']) + + # aggregate the DataArrays into a dictionary. + result_map[dataset_name] = result + # parse the json data into the coords map. store the values as np.float64 for accuracy since the json data does not contain # the original data type information (mostly np.float32, but sometimes np.float64). coords_map = {k: np.array(v['data'], dtype = np.float64) for k, v in json_data['coords'].items()} - # result value array. - result = np.array(json_data['data'], dtype = 'f') - - # checks to make sure that data was received for all points. - strided_lengths = (axes_lengths_original + strides - 1) // strides - if c['missing_value_placeholder'] in result or result.shape != (strided_lengths[2], strided_lengths[1], strided_lengths[0], num_values_per_datapoint): - raise Exception(f'result was not filled correctly') - - # create the xarray DataArray. - result = xr.DataArray(data = result, - dims = json_data['dims']) # create the xarray Dataset. - result = xr.Dataset(data_vars = {json_data['name']:result}, + result = xr.Dataset(data_vars = result_map, coords = coords_map, - attrs = {'dataset':dataset_title, 't_start':timepoint_original, 't_end':timepoint_original, 't_step':time_step, + attrs = {'dataset':dataset_title, 't_start':timepoint_range_original[0], 't_end':timepoint_range_original[1], 't_step':timepoint_stride, 'x_start':axes_ranges_original[0][0], 'y_start':axes_ranges_original[1][0], 'z_start':axes_ranges_original[2][0], 'x_end':axes_ranges_original[0][1], 'y_end':axes_ranges_original[1][1], 'z_end':axes_ranges_original[2][1], 'x_step':strides[0], 'y_step':strides[1], 'z_step':strides[2], @@ -198,7 +221,7 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, return result -def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, strides, var, timepoint_original): +def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, xyzt_strides, var, timepoint_range_original): """ complete all of the getCutout housekeeping procedures before data processing. """ @@ -207,16 +230,16 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ # check that the user-input variable is a valid variable name. check_variable(metadata, var, dataset_title, query_type) # check that the user-input timepoint is a valid timepoint for the dataset. - check_timepoint(metadata, timepoint_original, dataset_title, query_type) + check_timepoint(metadata, timepoint_range_original, dataset_title, query_type) # check that the user-input x-, y-, and z-axis ranges are all specified correctly as [minimum, maximum] integer values. check_axes_ranges(metadata, axes_ranges_original, dataset_title, var) # check that the user-input strides are all positive integers. - check_strides(strides) + check_strides(xyzt_strides) # pre-processing steps. # ----- # convert the original input timepoint to the correct time index. - timepoint = get_time_index_from_timepoint(metadata, dataset_title, timepoint_original, tint = 'none', query_type = query_type) + timepoint_range = get_time_index_from_timepoint(metadata, dataset_title, timepoint_range_original, tint = 'none', query_type = query_type) # set var_offsets to var for getCutout. 'velocity' is handled differently in getData for the 'sabl2048low', 'sabl2048high', 'stsabl2048low', and 'stsabl2048high' datasets. if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and var == 'velocity': @@ -225,7 +248,7 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ else: var_offsets = var - return (var_offsets, timepoint) + return (var_offsets, timepoint_range) def getData(cube, var, timepoint_original, temporal_method, spatial_method_original, spatial_operator, points, option = [-999.9, -999.9],