From e08e31576b158fd059005edbb9a01a0d98ae4156 Mon Sep 17 00:00:00 2001 From: mike schnaubelt <93147480+mschnau@users.noreply.github.com> Date: Tue, 18 Nov 2025 15:38:48 -0500 Subject: [PATCH 1/5] giverny 3.4.2 - updated default cutout limits --- CHANGELOG.md | 5 +++++ .../DEMO_SciServer_python_notebooks.zip | Bin 55583 -> 56151 bytes .../DEMO_local_python_notebooks.zip | Bin 55141 -> 55690 bytes giverny/pyproject.toml | 2 +- givernylocal/pyproject.toml | 2 +- 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fa70ef..113ad71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - migrate datasets from SQL to CephFS storage. +## [3.4.2] - 2025-11-18 + +### Changed +- Default cutout limits. + ## [3.4.1] - 2025-10-30 ### Added diff --git a/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip b/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip index 54e60a30b995c0bd0508c0c95a85c330d139dba5..5d149d6a0330e1b873faf26fbe46a7b6abcabfba 100644 GIT binary patch delta 557 zcmbQgiTV0AX6XQLW)=|!5YVnEihg%QkAvTrfdPc&CeOQ}IN8@bY;u5}&&GQ%RVL4K zVPMQ_|s500N~dBw3)`P4x=r8y<)3NDik zY(?vJ6(Ich?9@sHD+Lf)lCO}MoSa%*4A$iWG|923D6vu@GY>2PQLg|JFHSAdQ%KG) z0tyx6=cNEOgUtmxDL*$K=&w|T#GIVT2mM4Q&s7tML~^~Y zLYg|V-NhwE8X(g(^{Ub`b8<9HG;7rnPJn7m1bfXE8Vb<}<0c2L6XixyGv#N+vILHL3RTRV3YvEeSw=CBPeRbH(zsGIgt-F-Y56oh+*>foviOIH<|OM L0Nd(YptJ!1(fz$C delta 182 zcmcbExrgVwi-zC#U<$i3NDGvVok%1ccLobny)k4*-0iI2ix{ diff --git a/DEMO_notebooks/DEMO_local_python_notebooks.zip b/DEMO_notebooks/DEMO_local_python_notebooks.zip index e8a34ca2fedbd5e5ecd414bf63ff8488fa9aa945..d2fc2d926bb8eef8abab4c15e36927595d6bb88e 100644 GIT binary patch delta 541 zcmY*X&r1S96qbyFw4xsv7{$w!%}8@8YYPm7gdh(cql}t#%%#<-*{M*II(I6jzrzSS z)Ujiix@FM0ymags(aaX94`vwNyzhJ8n{VFA_Opt8aAn!v+HaHLJ*MlEr1;*YbA6G#&ZEQ8@FLktWG#pZ#*6j($InV1FZERre!zxSKFI1lq6 zcSZmulh`yZ772?|it?c6ti*5qKOH0q6{$UvO9=o0lJFD{5!KPQflHf3re|>};Spjm z#m%k3qLT%nn^JWaF@{rcq_Yed7^U>w4iqpY7O#**vLuBV24ol@4sseZ$VZTStzm`; zq?JYiCZl7rhd)?I>xMBl?Jp^H9e+S%4H)@*i}gC)21nif&c-Sp?X6Yr=V}hiu$x#< odq@+82{RsI! Date: Fri, 12 Dec 2025 15:09:51 -0500 Subject: [PATCH 2/5] giverny 3.5.1 --- CHANGELOG.md | 5 + .../DEMO_SciServer_python_notebooks.zip | Bin 56151 -> 53067 bytes .../DEMO_local_python_notebooks.zip | Bin 55690 -> 52601 bytes giverny/pyproject.toml | 2 +- .../giverny/turbulence_gizmos/basic_gizmos.py | 202 +++++++++++------- giverny/src/giverny/turbulence_toolkit.py | 166 +++++++------- givernylocal/pyproject.toml | 2 +- .../turbulence_gizmos/basic_gizmos.py | 46 +++- 8 files changed, 266 insertions(+), 157 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 113ad71..f05dbc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - migrate datasets from SQL to CephFS storage. +## [3.5.1] - 2025-12-12 + +### Added +- multitime cutout queries. + ## [3.4.2] - 2025-11-18 ### Changed diff --git a/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip b/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip index 5d149d6a0330e1b873faf26fbe46a7b6abcabfba..f5b3e7493f499d70cf369c7021f7ed8c33aa0ae9 100644 GIT binary patch delta 305 zcmcbnM~=mQ{BG(-G=vlaHv2PM%>Y5~r+CoSLdol98%Vlv-SxQ=*WTUj*i+Wfm2eD3oO8 zrs~DyDd})20D%%jO>~-iby;FjW@1uKYHfTq&{7Sko1zV3HT9~}GIMe?Of+lNVTxaa=PIluF}znRU|`cE(F z+pbR6H?-Es)xT&4e!TTJHqX@5G|xP5JK1tRZ5e4RG|v)gF2K!okvcQxyangr97GKh zM2d&Cs1}Blq3M<)yrAh|#W^mv`0!i9rNWh#)kFAoyaRvrUdQcq7tZxf;+N%i+}hlW zD^Eu7m^+2{-Q%^hSS%dD-|oxpmX_oyuq+oLlj$)oR1lV_MYzB>1K$fzmAXck&Cnr& z8-Yh*>Io5Mv7MMCBIz+ZW^)52t3pnNyjKPL#I6lSv;?6>+#mBp7AC+xRR&cUCj9@I zy71?tS=I{~Es+3|6H`84&PCWvYz_i~J(LjaA&E>Q9gBlP0hXnJ z4+gzhe79X_8+u>cT_t+f$FiCz5mf{h8+cEy3YOa)mal^4cY}4|+VOV0r4}!S%Wl6a zvr=G^PO5J5J7g1!N2>VAihr6)Da5(2-7U2pJRQ_~)O|pUW}y9v(NZ8!fl>Up+)EB= ztVs6{bPrdMn}G7IP-HK`Eft)bR*9QPCoCNC^t;NTc<<#}Zy7WDhU6@nxRK!C*-7)j z1)Ll@eAnlc3C-~wjDdVw2C#6L3t=m8f%;CFCd)ZSaquAH#K#X|!=#caLyu5`K_*z3 zXQsw(C-_omLE3$Z9FU1~lb3Xm>VPbN4kQBT{>jlvW=W+pniYoxXAw=0L7tn2U6&{s zNrUF%z{rboKx&U0nU0$~6WJ3?K-5#-pS~?ccJTW1QL?ad%Ddz#(Qp!(Jsy1)wxEGh zxmkQz*Xk-q6^a9$`|-i+ro!LxXSI*l&#oM({Kep_55cIm8 zLP=(Bs$NW+?er(M@mk-r@lWs;Z1vpHu^^uxfa#Miq>KRZ7006QWUAF)L delta 1168 zcmZuxOH30{6h*4o@KI1;Q8BqKrJaUpp#}^f_#wE01xAbpXzHVVZ6{1SWoAl0NQ_$+ z#`LZvG0`Uel%s?2~$zF7MmvH@LJC?n( zxR+nX57Vdd=1>G*4^5-P7sZ2DZMf#^L-(K?pZ2`1eDt?EJ1j)i!r1+07azL1R^rTL zRgLCWYLoS~nGkktEGZ z-y(f?gSCv&bzX!i5N}fh7K?Giu!xu=2v>BS6-gj9W&{Edc7hgyn5I+8td@yWfJRc- z+GM)I)>OV7-4ZjfO*+V@xX$Zz2$Dc#y9Cyh(BYGk)65iv=M^&rDra#elY|o2H7hSc z($X6B#etC`MT1lx-8%!z)sCzVW+36W*EdXE5)}fWQi6mmjRm&-br`ikX0=D-U~#)r zPZtrR+iCauJD>Rcp0o0G#QEq%k! Bk?H^d diff --git a/giverny/pyproject.toml b/giverny/pyproject.toml index d227262..78b65b7 100644 --- a/giverny/pyproject.toml +++ b/giverny/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "giverny" -version = "3.4.2" +version = "3.4.2.1" description = "library to query the Johns Hopkins Turbulence Database (JHTDB)" authors = [ {name = "Johns Hopkins Turbulence Database Group"} diff --git a/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py b/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py index 8bd72b7..bc36a4e 100644 --- a/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py +++ b/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py @@ -116,20 +116,40 @@ def check_timepoint(metadata, timepoint, dataset_title, query_type, max_num_time time_upper = np.float64(time_metadata['upper']) time_steps = np.int64(time_metadata['n']) - if query_type == 'getcutout' or dataset_title in time_index_datasets: + if query_type == 'getcutout': + try: + list(timepoint) + except: + raise Exception(f"t_range must be specified as a python list or numpy array, e.g. [{timepoint}, {timepoint}]") + + # check that the time range is specified as minimum and maximum integer values. + if timepoint.dtype not in [np.int32, np.int64]: + raise Exception('all t_range values, [minimum, maximum], should be specified as integers') + + if len(timepoint) != 2 or timepoint[0] > timepoint[1]: + raise Exception(f't_range, {list(timepoint)}, is not correctly specified as [minimum, maximum]') + valid_timepoints = range(1, time_steps + 1) # handles checking datasets with time indices. - if timepoint not in valid_timepoints: - raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be an integer and in the inclusive range of " + + if timepoint[0] not in valid_timepoints or timepoint[1] not in valid_timepoints: + raise Exception(f"'t_range', [{timepoint[0]}, {timepoint[1]}], is not a valid time range for '{dataset_title}': all times must be in the inclusive range of " + f'[{valid_timepoints[0]}, {valid_timepoints[-1]}]') elif query_type == 'getdata': - valid_timepoints = (time_lower, time_upper) + if dataset_title in time_index_datasets: + valid_timepoints = range(1, time_steps + 1) - # handles checking datasets with real times. - if timepoint < valid_timepoints[0] or timepoint > valid_timepoints[1]: - raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be in the inclusive range of " + - f'[{valid_timepoints[0]}, {valid_timepoints[1]}]') + # handles checking datasets with time indices. + if timepoint not in valid_timepoints: + raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be an integer and in the inclusive range of " + + f'[{valid_timepoints[0]}, {valid_timepoints[-1]}]') + else: + valid_timepoints = (time_lower, time_upper) + + # handles checking datasets with real times. + if timepoint < valid_timepoints[0] or timepoint > valid_timepoints[1]: + raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be in the inclusive range of " + + f'[{valid_timepoints[0]}, {valid_timepoints[1]}]') elif query_type == 'getturbinedata': try: timepoint = list(timepoint) @@ -559,7 +579,7 @@ def get_time_index_from_timepoint(metadata, dataset_title, timepoint, tint, quer time_index = (timepoint / dt) + time_index_shift # round the time index the nearest time index grid point if 'none' time interpolation was specified. if tint == 'none': - time_index = int(math.floor(time_index + 0.5)) + time_index = np.floor(time_index + 0.5).astype(int) else: # do not convert the timepoint to a time index for datasets processed by pyJHTDB. time_index = timepoint @@ -862,44 +882,49 @@ def write_cutout_hdf5_and_xmf_files(cube, output_data, output_filename): # get the dataset name used for the hdf5 file. h5_var = cube.var h5_attribute_type = get_cardinality_name(cube.metadata, cube.var) - h5_dataset_name = cube.dataset_name + h5_dataset_names = list(output_data.data_vars.keys()) # the shape of the cutout. ordering of the dimensions in the xarray, output_data, is (z, y, x), so shape is reversed ([::-1]) to keep # consistent with the expected (x, y, z) ordering. shape = [*output_data.sizes.values()][:3][::-1] - # get the output timepoint. - xmf_timepoint = cube.timepoint_original - if cube.dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and cube.var == 'velocity': # split up "zcoor" into "zcoor_uv" and "zcoor_w" for the "velocity" variable of the "sabl" datasets. output_str = f""" - - - + """ + + for h5_dataset_name in h5_dataset_names: + # get the output timepoint. + xmf_timepoint = int(h5_dataset_name.split('_')[1].strip()) + + output_str += f""" + + """ + + output_str += f""" """ else: @@ -907,27 +932,35 @@ def write_cutout_hdf5_and_xmf_files(cube, output_data, output_filename): output_str = f""" - - - + """ + + for h5_dataset_name in h5_dataset_names: + # get the output timepoint. + xmf_timepoint = int(h5_dataset_name.split('_')[1].strip()) + + output_str += f""" + + """ + + output_str += f""" """ @@ -938,7 +971,7 @@ def write_cutout_hdf5_and_xmf_files(cube, output_data, output_filename): print('-----') sys.stdout.flush() -def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strides, output_filename, +def contour_plot(cube, value_index, cutout_data, time, plot_ranges, xyzt_axes_ranges, xyzt_strides, output_filename, colormap = 'inferno', equal_aspect_ratio = True): """ create a contour plot from the getCutout output. @@ -953,6 +986,13 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid metadata = cube.metadata variable = cube.var dataset_title = cube.dataset_title + dataset_name = variable + '_' + str(time).zfill(4) + + # remove the time axis from axes_ranges and strides. + axes_ranges = xyzt_axes_ranges[:3] + strides = xyzt_strides[:3] + # get the time range. + time_range = xyzt_axes_ranges[3] # names for each value, e.g. value index 0 for velocity data corresponds to the x-component of the velocity ("ux"). value_name_map = get_variable_component_names_map(metadata) @@ -984,6 +1024,10 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid if not(np.all(axes_min <= plot_ranges_min) and np.all(plot_ranges_max <= axes_max)): raise Exception(f'the specified plot ranges are not all bounded by the box volume defined by:\n{axes_ranges}') + # raise exception if the plot time is not one of the queried timepoints. + if time not in range(time_range[0], time_range[1] + 1, 1): + raise Exception(f'the specified time ({time}) is not a queried time, t_range:\n{time_range}') + # determine how many of the axis minimum values are equal to their corresponding axis maximum value. num_axes_equal_min_max = plot_ranges_min == plot_ranges_max # raise exception if the data being plotted is not 2-dimensional. @@ -1025,15 +1069,15 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid # specify the subset (or full) axes ranges to use for plotting. cutout_data is of the format [z-range, y-range, x-range, output value index]. if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and variable == 'velocity': # zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets. - plot_data = cutout_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor_uv = zcoor_values, - values = value_index) + plot_data = cutout_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor_uv = zcoor_values, + values = value_index) else: - plot_data = cutout_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor = zcoor_values, - values = value_index) + plot_data = cutout_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor = zcoor_values, + values = value_index) # raise exception if only one point is going to be plotted along more than 1 axis. a contour plot requires more # than 1 point along 2 axes. this check is required in case the user specifies a stride along an axis that @@ -1118,7 +1162,7 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid print('contour plot created successfully.') sys.stdout.flush() -def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): +def cutout_values(cube, x, y, z, output_data, time, xyzt_axes_ranges, xyzt_strides): """ retrieve data values for all of the specified points. """ @@ -1127,6 +1171,12 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): metadata = cube.metadata variable = cube.var dataset_title = cube.dataset_title + dataset_name = variable + '_' + str(time).zfill(4) + + # remove the time axis from axes_ranges and strides. + axes_ranges = xyzt_axes_ranges[:3] + strides = xyzt_strides[:3] + time_range = xyzt_axes_ranges[3] # minimum and maximum endpoints along each axis for the points the user requested. endpoints_min = np.array([np.min(x), np.min(y), np.min(z)], dtype = np.int32) @@ -1137,6 +1187,10 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): # raise exception if all of the user requested datapoints are not inside the bounds of the user box volume. if not(np.all(axes_ranges[:, 0] <= endpoints_min) and np.all(endpoints_max <= axes_ranges[:, 1])): raise Exception(f'the specified point(s) are not all bounded by the box volume defined by:\n{axes_ranges}') + + # raise exception if the time is not one of the queried timepoints. + if time not in range(time_range[0], time_range[1] + 1, 1): + raise Exception(f'the specified time ({time}) is not a queried time, t_range:\n{time_range}') # datasets that have an irregular y-grid. irregular_ygrid_datasets = get_irregular_mesh_ygrid_datasets(metadata, variable) @@ -1161,15 +1215,15 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): # value(s) corresponding to the specified (x, y, z) datapoint(s). if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and variable == 'velocity': # zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets. - output_values = output_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor_uv = zcoor_values) + output_values = output_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor_uv = zcoor_values) # add the zcoor_w coordinate to the returned xarray dataarray. output_values = output_values.assign_coords({'zcoor_w': output_values.zcoor_uv + (cube.dz / 2)}) return output_values else: - return output_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor = zcoor_values) + return output_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor = zcoor_values) diff --git a/giverny/src/giverny/turbulence_toolkit.py b/giverny/src/giverny/turbulence_toolkit.py index 49bb452..1ab6d50 100644 --- a/giverny/src/giverny/turbulence_toolkit.py +++ b/giverny/src/giverny/turbulence_toolkit.py @@ -48,7 +48,7 @@ finally: import pyJHTDB -def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, +def getCutout(cube, var, xyzt_axes_ranges_original, xyzt_strides, trace_memory = False, verbose = True): """ retrieve a cutout of the isotropic cube. @@ -71,8 +71,7 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, # data constants. c = metadata['constants'] - # only time_step and filter_width values of 1 are currently allowed. - time_step = 1 + # only filter_width values of 1 are currently allowed. filter_width = 1 # field (variable) map for legacy datasets. @@ -88,10 +87,19 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, # retrieve the list of datasets processed by the giverny code. giverny_datasets = get_giverny_datasets() + # xyz original axes ranges. + axes_ranges_original = xyzt_axes_ranges_original[:3] + # time original range. + timepoint_range_original = xyzt_axes_ranges_original[3] + # xyz original axes strides. + strides = xyzt_strides[:3] + # time original stride. + timepoint_stride = xyzt_strides[3] + # housekeeping procedures. # ----- - var_offsets, axes_ranges, timepoint = \ - getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, strides, var, timepoint_original) + var_offsets, axes_ranges, timepoint_range = \ + getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, xyzt_strides, var, timepoint_range_original) # the number of values to read per datapoint. for pressure data this value is 1. for velocity # data this value is 3, because there is a velocity measurement along each axis. @@ -99,8 +107,10 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, # number of original datapoints along each axis specified by the user. used for checking that the user did not request # too much data and that result is filled correctly. axes_lengths_original = axes_ranges_original[:, 1] - axes_ranges_original[:, 0] + 1 + # number of original times queried by the user. + num_times = ((timepoint_range_original[1] - timepoint_range_original[0]) // timepoint_stride) + 1 # total number of datapoints, used for checking if the user requested too much data.. - num_datapoints = np.prod(axes_lengths_original) + num_datapoints = np.prod(axes_lengths_original) * num_times # total size of data, in GBs, requested by the user's box. requested_data_size = (num_datapoints * c['bytes_per_datapoint'] * num_values_per_datapoint) / float(1024**3) # maximum number of datapoints that can be read in. currently set to 16 GBs worth of datapoints. @@ -122,9 +132,12 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, spatial_method = 'none' temporal_method = 'none' option = [-999.9, -999.9] - # initialize cube constants. this is done so that all of the constants are known for pre-processing of the data. - cube.init_constants(query_type, var, var_offsets, timepoint, timepoint_original, - spatial_method, temporal_method, option, num_values_per_datapoint, c) + + if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and var == 'velocity': + # zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets. + dims_list = ['zcoor_uv', 'ycoor', 'xcoor', 'values'] + else: + dims_list = ['zcoor', 'ycoor', 'xcoor', 'values'] # ----- # starting the tracemalloc library. @@ -134,54 +147,72 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, tracemem_start = [mem_value / (1024**3) for mem_value in tracemalloc.get_traced_memory()] tracemem_used_start = tracemalloc.get_tracemalloc_memory() / (1024**3) - # create a small placeholder array for error checking. a full pre-filled array is created in lJHTDB.getbigCutout (pyJHTDB datasets) and - # getCutout_process_data (giverny datasets). initially the datatype is set to "f" (float) so that the array is filled with the - # missing placeholder value (-999.9). - result = np.array([c['missing_value_placeholder']], dtype = 'f') - - # process the data query, retrieve a cutout for the various datasets. - if dataset_title in giverny_datasets: - """ - get the results for the datasets processed by giverny. - """ - # parse the database files, generate the result matrix. - result = getCutout_process_data(cube, metadata, axes_ranges, var, timepoint, - axes_ranges_original, strides, var_offsets, timepoint_original, c) - else: - """ - get the results for the legacy datasets processed by pyJHTDB. - """ - # initialize lJHTDB gSOAP resources and add the user's authorization token. - lJHTDB = pyJHTDB.libJHTDB(auth_token = auth_token) - lJHTDB.initialize() - - # get the field (variable) integer for the legacy datasets. - field = field_map[var] - - # the strides will be applied later after retrieving the data. - result = lJHTDB.getbigCutout(data_set = dataset_title, fields = field, t_start = timepoint_original, t_end = timepoint_original, t_step = time_step, - start = np.array([axes_ranges[0, 0], axes_ranges[1, 0], axes_ranges[2, 0]], dtype = int), - end = np.array([axes_ranges[0, 1], axes_ranges[1, 1], axes_ranges[2, 1]], dtype = int), - step = np.array([1, 1, 1], dtype = int), - filter_width = filter_width) - - # free up gSOAP resources. - lJHTDB.finalize() + result_map = {} + # iterate over the timepoints to retrieve the cutouts. + for timepoint, timepoint_original in zip( + range(timepoint_range[0], timepoint_range[1] + 1, timepoint_stride), + range(timepoint_range_original[0], timepoint_range_original[1] + 1, timepoint_stride) + ): + # initialize cube constants. this is done so that all of the constants are known for pre-processing of the data. + cube.init_constants(query_type, var, var_offsets, timepoint, timepoint_original, + spatial_method, temporal_method, option, num_values_per_datapoint, c) + + # create a small placeholder array for error checking. a full pre-filled array is created in lJHTDB.getbigCutout (pyJHTDB datasets) and + # getCutout_process_data (giverny datasets). initially the datatype is set to "f" (float) so that the array is filled with the + # missing placeholder value (-999.9). + result = np.array([c['missing_value_placeholder']], dtype = 'f') + + # process the data query, retrieve a cutout for the various datasets. + if dataset_title in giverny_datasets: + """ + get the results for the datasets processed by giverny. + """ + # parse the database files, generate the result matrix. + result = getCutout_process_data(cube, metadata, axes_ranges, var, timepoint, + axes_ranges_original, strides, var_offsets, timepoint_original, c) + else: + """ + get the results for the legacy datasets processed by pyJHTDB. + """ + # initialize lJHTDB gSOAP resources and add the user's authorization token. + lJHTDB = pyJHTDB.libJHTDB(auth_token = auth_token) + lJHTDB.initialize() + + # get the field (variable) integer for the legacy datasets. + field = field_map[var] + + # the strides will be applied later after retrieving the data. + result = lJHTDB.getbigCutout(data_set = dataset_title, fields = field, t_start = timepoint_original, t_end = timepoint_original, t_step = 1, + start = np.array([axes_ranges[0, 0], axes_ranges[1, 0], axes_ranges[2, 0]], dtype = int), + end = np.array([axes_ranges[0, 1], axes_ranges[1, 1], axes_ranges[2, 1]], dtype = int), + step = np.array([1, 1, 1], dtype = int), + filter_width = filter_width) + + # free up gSOAP resources. + lJHTDB.finalize() + + # determines how many copies of data need to be me made along each axis when the number of datapoints the user specified + # exceeds the cube resolution (cube.N). note: no copies of the data values should be made, hence data_value_multiplier equals 1. + axes_multipliers = np.ceil(axes_lengths_original / cube.N).astype(int) + data_value_multiplier = 1 + + # duplicates the data along the z-, y-, and x-axes of output_data if the the user asked for more datapoints than the cube resolution along any axis. + if np.any(axes_multipliers > 1): + result = np.tile(result, (axes_multipliers[2], axes_multipliers[1], axes_multipliers[0], data_value_multiplier)) + # truncate any extra datapoints from the duplicate data outside of the original range of the datapoints specified by the user. + result = np.copy(result[0 : axes_lengths_original[2], 0 : axes_lengths_original[1], 0 : axes_lengths_original[0], :]) + + # checks to make sure that data was read in for all points. + if c['missing_value_placeholder'] in result or result.shape != (axes_lengths_original[2], axes_lengths_original[1], axes_lengths_original[0], num_values_per_datapoint): + raise Exception(f'result was not filled correctly') + + # apply the strides to output_data. + result = xr.DataArray(data = result[::strides[2], ::strides[1], ::strides[0], :], + dims = dims_list) - # determines how many copies of data need to be me made along each axis when the number of datapoints the user specified - # exceeds the cube resolution (cube.N). note: no copies of the data values should be made, hence data_value_multiplier equals 1. - axes_multipliers = np.ceil(axes_lengths_original / cube.N).astype(int) - data_value_multiplier = 1 - - # duplicates the data along the z-, y-, and x-axes of output_data if the the user asked for more datapoints than the cube resolution along any axis. - if np.any(axes_multipliers > 1): - result = np.tile(result, (axes_multipliers[2], axes_multipliers[1], axes_multipliers[0], data_value_multiplier)) - # truncate any extra datapoints from the duplicate data outside of the original range of the datapoints specified by the user. - result = np.copy(result[0 : axes_lengths_original[2], 0 : axes_lengths_original[1], 0 : axes_lengths_original[0], :]) - - # checks to make sure that data was read in for all points. - if c['missing_value_placeholder'] in result or result.shape != (axes_lengths_original[2], axes_lengths_original[1], axes_lengths_original[0], num_values_per_datapoint): - raise Exception(f'result was not filled correctly') + # set the dataset name to be used in the hdf5 file. + h5_dataset_name = cube.dataset_name + result_map[h5_dataset_name] = result # datasets that have an irregular y-grid. irregular_ygrid_datasets = get_irregular_mesh_ygrid_datasets(metadata, var) @@ -203,24 +234,15 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, x_coords = np.around(np.arange(axes_ranges_original[0][0] - 1, axes_ranges_original[0][1], strides[0], dtype = np.float32) * cube.dx, decimals = c['decimals']) x_coords += cube.coor_offsets[0] - # set the dataset name to be used in the hdf5 file. - h5_dataset_name = cube.dataset_name - if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and var == 'velocity': # zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets. coords_map = {'zcoor_uv':z_coords, 'zcoor_w':z_coords + (0.1953125 / 2), 'ycoor':y_coords, 'xcoor':x_coords} - dims_list = ['zcoor_uv', 'ycoor', 'xcoor', 'values'] else: coords_map = {'zcoor':z_coords, 'ycoor':y_coords, 'xcoor':x_coords} - dims_list = ['zcoor', 'ycoor', 'xcoor', 'values'] - - # apply the strides to output_data. - result = xr.DataArray(data = result[::strides[2], ::strides[1], ::strides[0], :], - dims = dims_list) - result = xr.Dataset(data_vars = {h5_dataset_name:result}, + result = xr.Dataset(data_vars = result_map, coords = coords_map, - attrs = {'dataset':dataset_title, 't_start':timepoint_original, 't_end':timepoint_original, 't_step':time_step, + attrs = {'dataset':dataset_title, 't_start':timepoint_range_original[0], 't_end':timepoint_range_original[1], 't_step':timepoint_stride, 'x_start':axes_ranges_original[0][0], 'y_start':axes_ranges_original[1][0], 'z_start':axes_ranges_original[2][0], 'x_end':axes_ranges_original[0][1], 'y_end':axes_ranges_original[1][1], 'z_end':axes_ranges_original[2][1], 'x_step':strides[0], 'y_step':strides[1], 'z_step':strides[2], @@ -256,7 +278,7 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, return result -def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, strides, var, timepoint_original): +def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, xyzt_strides, var, timepoint_range_original): """ complete all of the getCutout housekeeping procedures before data processing. """ @@ -265,11 +287,11 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ # check that the user-input variable is a valid variable name. check_variable(metadata, var, dataset_title, query_type) # check that the user-input timepoint is a valid timepoint for the dataset. - check_timepoint(metadata, timepoint_original, dataset_title, query_type) + check_timepoint(metadata, timepoint_range_original, dataset_title, query_type) # check that the user-input x-, y-, and z-axis ranges are all specified correctly as [minimum, maximum] integer values. check_axes_ranges(metadata, axes_ranges_original, dataset_title, var) # check that the user-input strides are all positive integers. - check_strides(strides) + check_strides(xyzt_strides) # pre-processing steps. # ----- @@ -279,7 +301,7 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ axes_ranges = convert_to_0_based_ranges(metadata, axes_ranges_original, dataset_title, var) # convert the original input timepoint to the correct time index. - timepoint = get_time_index_from_timepoint(metadata, dataset_title, timepoint_original, tint = 'none', query_type = query_type) + timepoint_range = get_time_index_from_timepoint(metadata, dataset_title, timepoint_range_original, tint = 'none', query_type = query_type) # set var_offsets to var for getCutout. 'velocity' is handled differently in getData for the 'sabl2048low', 'sabl2048high', 'stsabl2048low', and 'stsabl2048high' datasets. if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and var == 'velocity': @@ -288,7 +310,7 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ else: var_offsets = var - return (var_offsets, axes_ranges, timepoint) + return (var_offsets, axes_ranges, timepoint_range) def getData(cube, var, timepoint_original_notebook, temporal_method, spatial_method_original, spatial_operator, points, option = [-999.9, -999.9], diff --git a/givernylocal/pyproject.toml b/givernylocal/pyproject.toml index 98a39e9..90fdeb4 100644 --- a/givernylocal/pyproject.toml +++ b/givernylocal/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "givernylocal" -version = "3.4.2" +version = "3.4.2.1" description = "library to query the Johns Hopkins Turbulence Database (JHTDB)" authors = [ {name = "Johns Hopkins Turbulence Database Group"} diff --git a/givernylocal/src/givernylocal/turbulence_gizmos/basic_gizmos.py b/givernylocal/src/givernylocal/turbulence_gizmos/basic_gizmos.py index e20a293..ec416d0 100644 --- a/givernylocal/src/givernylocal/turbulence_gizmos/basic_gizmos.py +++ b/givernylocal/src/givernylocal/turbulence_gizmos/basic_gizmos.py @@ -116,20 +116,40 @@ def check_timepoint(metadata, timepoint, dataset_title, query_type, max_num_time time_upper = np.float64(time_metadata['upper']) time_steps = np.int64(time_metadata['n']) - if query_type == 'getcutout' or dataset_title in time_index_datasets: + if query_type == 'getcutout': + try: + list(timepoint) + except: + raise Exception(f"t_range must be specified as a python list or numpy array, e.g. [{timepoint}, {timepoint}]") + + # check that the time range is specified as minimum and maximum integer values. + if timepoint.dtype not in [np.int32, np.int64]: + raise Exception('all t_range values, [minimum, maximum], should be specified as integers') + + if len(timepoint) != 2 or timepoint[0] > timepoint[1]: + raise Exception(f't_range, {list(timepoint)}, is not correctly specified as [minimum, maximum]') + valid_timepoints = range(1, time_steps + 1) # handles checking datasets with time indices. - if timepoint not in valid_timepoints: - raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be an integer and in the inclusive range of " + + if timepoint[0] not in valid_timepoints or timepoint[1] not in valid_timepoints: + raise Exception(f"'t_range', [{timepoint[0]}, {timepoint[1]}], is not a valid time range for '{dataset_title}': all times must be in the inclusive range of " + f'[{valid_timepoints[0]}, {valid_timepoints[-1]}]') elif query_type == 'getdata': - valid_timepoints = (time_lower, time_upper) + if dataset_title in time_index_datasets: + valid_timepoints = range(1, time_steps + 1) - # handles checking datasets with real times. - if timepoint < valid_timepoints[0] or timepoint > valid_timepoints[1]: - raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be in the inclusive range of " + - f'[{valid_timepoints[0]}, {valid_timepoints[1]}]') + # handles checking datasets with time indices. + if timepoint not in valid_timepoints: + raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be an integer and in the inclusive range of " + + f'[{valid_timepoints[0]}, {valid_timepoints[-1]}]') + else: + valid_timepoints = (time_lower, time_upper) + + # handles checking datasets with real times. + if timepoint < valid_timepoints[0] or timepoint > valid_timepoints[1]: + raise Exception(f"{timepoint} is not a valid time for '{dataset_title}': must be in the inclusive range of " + + f'[{valid_timepoints[0]}, {valid_timepoints[1]}]') elif query_type == 'getturbinedata': try: timepoint = list(timepoint) @@ -559,7 +579,7 @@ def get_time_index_from_timepoint(metadata, dataset_title, timepoint, tint, quer time_index = (timepoint / dt) + time_index_shift # round the time index the nearest time index grid point if 'none' time interpolation was specified. if tint == 'none': - time_index = int(math.floor(time_index + 0.5)) + time_index = np.floor(time_index + 0.5).astype(int) else: # do not convert the timepoint to a time index for datasets processed by pyJHTDB. time_index = timepoint @@ -954,6 +974,10 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid variable = cube.var dataset_title = cube.dataset_title + # remove the time axis from axes_ranges and strides. + axes_ranges = axes_ranges[:3] + strides = strides[:3] + # names for each value, e.g. value index 0 for velocity data corresponds to the x-component of the velocity ("ux"). value_name_map = get_variable_component_names_map(metadata) @@ -1128,6 +1152,10 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): variable = cube.var dataset_title = cube.dataset_title + # remove the time axis from axes_ranges and strides. + axes_ranges = axes_ranges[:3] + strides = strides[:3] + # minimum and maximum endpoints along each axis for the points the user requested. endpoints_min = np.array([np.min(x), np.min(y), np.min(z)], dtype = np.int32) endpoints_max = np.array([np.max(x), np.max(y), np.max(z)], dtype = np.int32) From 081475eec8adbbd2673a21cc806df3789893cbe0 Mon Sep 17 00:00:00 2001 From: mike schnaubelt <93147480+mschnau@users.noreply.github.com> Date: Sat, 13 Dec 2025 09:17:15 -0500 Subject: [PATCH 3/5] giverny 3.5.1 --- .../DEMO_SciServer_python_notebooks.zip | Bin 53067 -> 53067 bytes giverny/pyproject.toml | 2 +- .../giverny/turbulence_gizmos/basic_gizmos.py | 110 +++++++++--------- givernylocal/pyproject.toml | 2 +- 4 files changed, 59 insertions(+), 55 deletions(-) diff --git a/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip b/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip index f5b3e7493f499d70cf369c7021f7ed8c33aa0ae9..9b8842afd4623c9a0b04c89f6271b43e93bd88b7 100644 GIT binary patch delta 32 kcmX>-kNNaGX5IjAW)=|!5ZLY6yODR^Ic6X|dEL3W0Hm7>mjD0& delta 32 kcmX>-kNNaGX5IjAW)=|!5I9=Vvype+Ic6X|dEL3W0I3lS2LJ#7 diff --git a/giverny/pyproject.toml b/giverny/pyproject.toml index 78b65b7..fe40c35 100644 --- a/giverny/pyproject.toml +++ b/giverny/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "giverny" -version = "3.4.2.1" +version = "3.4.2.2" description = "library to query the Johns Hopkins Turbulence Database (JHTDB)" authors = [ {name = "Johns Hopkins Turbulence Database Group"} diff --git a/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py b/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py index bc36a4e..d1106c3 100644 --- a/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py +++ b/giverny/src/giverny/turbulence_gizmos/basic_gizmos.py @@ -891,78 +891,82 @@ def write_cutout_hdf5_and_xmf_files(cube, output_data, output_filename): if cube.dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and cube.var == 'velocity': # split up "zcoor" into "zcoor_uv" and "zcoor_w" for the "velocity" variable of the "sabl" datasets. output_str = f""" - - - """ + + + + """ for h5_dataset_name in h5_dataset_names: # get the output timepoint. xmf_timepoint = int(h5_dataset_name.split('_')[1].strip()) output_str += f""" - - """ + + """ output_str += f""" - - """ + + +""" else: # handle other datasets and variables. output_str = f""" - - - """ + + + + """ for h5_dataset_name in h5_dataset_names: # get the output timepoint. xmf_timepoint = int(h5_dataset_name.split('_')[1].strip()) output_str += f""" - - """ + + """ output_str += f""" - - """ + + +""" with open(cube.output_path.joinpath(output_filename + '.xmf'), 'w') as output_file: output_file.write(output_str) diff --git a/givernylocal/pyproject.toml b/givernylocal/pyproject.toml index 90fdeb4..c76f3b1 100644 --- a/givernylocal/pyproject.toml +++ b/givernylocal/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "givernylocal" -version = "3.4.2.1" +version = "3.4.2.2" description = "library to query the Johns Hopkins Turbulence Database (JHTDB)" authors = [ {name = "Johns Hopkins Turbulence Database Group"} From aedd181bb6f42fb10691abde93eb58cf8614103d Mon Sep 17 00:00:00 2001 From: mike schnaubelt <93147480+mschnau@users.noreply.github.com> Date: Fri, 19 Dec 2025 15:16:10 -0500 Subject: [PATCH 4/5] giverny 3.5.1 --- .../DEMO_SciServer_python_notebooks.zip | Bin 53067 -> 53501 bytes .../DEMO_local_python_notebooks.zip | Bin 52601 -> 53059 bytes .../DEMO_wind_SciServer_python_notebooks.zip | Bin 81754 -> 81754 bytes giverny/pyproject.toml | 2 +- .../giverny/turbulence_gizmos/basic_gizmos.py | 2 +- giverny/src/giverny/turbulence_toolkit.py | 2 +- givernylocal/pyproject.toml | 2 +- .../turbulence_gizmos/basic_gizmos.py | 186 ++++++++++-------- .../src/givernylocal/turbulence_toolkit.py | 89 +++++---- 9 files changed, 168 insertions(+), 115 deletions(-) diff --git a/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip b/DEMO_notebooks/DEMO_SciServer_python_notebooks.zip index 9b8842afd4623c9a0b04c89f6271b43e93bd88b7..2ed65ea2f098079ebfba262140158ab4b7c389e0 100644 GIT binary patch delta 407 zcmX>-kNNLHW{CiAW)=|!5a=kK9IdfT`ukQV28OLp8zn;ICQBGkn!G+?=4Mt!PR7YO za>q0j((;QG5*3Ow^U`xt6$)|^^HLRxQ%e+bGxIWYOLG-$6><|RfUL=Ze5s5jlk4R~ z1&i|%3yL%HOBC`-bCXhwG>RuL$S`s(E=WzzOsiB#ELMnyYSw}3&5hMVwWcI97q=~w zr^;XFD2XpFDauSqoqQo#%ml>HQOMLw)l(?RNQGLfke{XiF$e0LVuhsClJeBlJcZQ6 z5EPkwKbe)OB!6;Yy~tz(uF%af z<}Iw8@dY_RZ`mpsPM)}1bn_pZRgA1)q0KAp7BI2Q00!q~U&pv!KGcAnyz`tLSaS03 Xb5cwnohC;+%1t&sFTfUf0Tc@W;PaIe delta 278 zcmeynkoojHX6XQLW)=|!5ZLY68$FMsN0!-cs)DV8;pD)HqMO&)tzu*b3T%#cSir=xL*Q7{=KoG{y?m(tnw)*X c4lFr&?FA_&d&kMV&T^BVUl3rMe-0D~0Bm$&p8x;= diff --git a/DEMO_notebooks/DEMO_local_python_notebooks.zip b/DEMO_notebooks/DEMO_local_python_notebooks.zip index 2dc3cda8448a0cd42178b71469d5b6e87b761a5e..b25b4809349e5fd08682ef205411d2453a831014 100644 GIT binary patch delta 452 zcmex4i}~<8X6XQLW)=|!5SUsyIr?ei?aW7x3=AMFHR-JEMvGY4$va~wO@0|SbF-cT zC*$O)vd1(P((;QG5*3Ow^U`xt6$)|^^HLRxQ%e+bGxIWYOLG-$6><|RK&;7(ys515 zMTvRoldTlxnKX(gFUU5UtSiT1msqS24OOlKQyr^^YH&$rE^c#EChwEG9#IlsTvC*o zlB%GVnpa$4Z3U$vI>9nJ3YmJTdI}{OsS0_exk;%-3i)XY5c#~sg5r$)l46CV)ROYl z)I5dM#N-T!+~f`HN|O^#NKL*VW54-=;$J>yy(GQO7Y&}W$dtqv+kNM{;W{CiAW)=|!5afdT zh;ed!k}z*^USdIUMt+GxUTJR9bLhl4`a2q|s9rU4??2{F3;h z#Ju#>Vg - - - - - - - """ + + + + """ + + for h5_dataset_name in h5_dataset_names: + # get the output timepoint. + xmf_timepoint = int(h5_dataset_name.split('_')[1].strip()) + + output_str += f""" + + """ + + output_str += f""" + + +""" else: # handle other datasets and variables. output_str = f""" - - - - - - - """ + + + + """ + + for h5_dataset_name in h5_dataset_names: + # get the output timepoint. + xmf_timepoint = int(h5_dataset_name.split('_')[1].strip()) + + output_str += f""" + + """ + + output_str += f""" + + +""" with open(cube.output_path.joinpath(output_filename + '.xmf'), 'w') as output_file: output_file.write(output_str) @@ -958,7 +975,7 @@ def write_cutout_hdf5_and_xmf_files(cube, output_data, output_filename): print('-----') sys.stdout.flush() -def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strides, output_filename, +def contour_plot(cube, value_index, cutout_data, time, plot_ranges, xyzt_axes_ranges, xyzt_strides, output_filename, colormap = 'inferno', equal_aspect_ratio = True): """ create a contour plot from the getCutout output. @@ -973,10 +990,13 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid metadata = cube.metadata variable = cube.var dataset_title = cube.dataset_title + dataset_name = variable + '_' + str(time).zfill(4) # remove the time axis from axes_ranges and strides. - axes_ranges = axes_ranges[:3] - strides = strides[:3] + axes_ranges = xyzt_axes_ranges[:3] + strides = xyzt_strides[:3] + # get the time range. + time_range = xyzt_axes_ranges[3] # names for each value, e.g. value index 0 for velocity data corresponds to the x-component of the velocity ("ux"). value_name_map = get_variable_component_names_map(metadata) @@ -1008,6 +1028,10 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid if not(np.all(axes_min <= plot_ranges_min) and np.all(plot_ranges_max <= axes_max)): raise Exception(f'the specified plot ranges are not all bounded by the box volume defined by:\n{axes_ranges}') + # raise exception if the plot time is not one of the queried timepoints. + if time not in range(time_range[0], time_range[1] + 1, 1): + raise Exception(f'the specified time ({time}) is not a queried time, t_range:\n{time_range}') + # determine how many of the axis minimum values are equal to their corresponding axis maximum value. num_axes_equal_min_max = plot_ranges_min == plot_ranges_max # raise exception if the data being plotted is not 2-dimensional. @@ -1049,15 +1073,15 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid # specify the subset (or full) axes ranges to use for plotting. cutout_data is of the format [z-range, y-range, x-range, output value index]. if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and variable == 'velocity': # zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets. - plot_data = cutout_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor_uv = zcoor_values, - values = value_index) + plot_data = cutout_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor_uv = zcoor_values, + values = value_index) else: - plot_data = cutout_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor = zcoor_values, - values = value_index) + plot_data = cutout_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor = zcoor_values, + values = value_index) # raise exception if only one point is going to be plotted along more than 1 axis. a contour plot requires more # than 1 point along 2 axes. this check is required in case the user specifies a stride along an axis that @@ -1107,7 +1131,7 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid plane_axis = original_axis_title[0].strip() # plot_ranges_min = plot_ranges_max for plane_axis. plane_point = plot_ranges_min[axis_index_map[plane_axis]] - axis_title = plane_axis + ' = ' + str(plane_point) + ', t = ' + str(cutout_data.attrs['t_start']) + axis_title = plane_axis + ' = ' + str(plane_point) + ', t = ' + str(time) title_str = f'{output_dataset_title}\n{variable} ({value_name}) contour plot ({axis_title})' # remove '_uv' from the axis variable names for display in the plot. x_axis_variable = x_axis_variable.replace('_uv', '') @@ -1142,7 +1166,7 @@ def contour_plot(cube, value_index, cutout_data, plot_ranges, axes_ranges, strid print('contour plot created successfully.') sys.stdout.flush() -def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): +def cutout_values(cube, x, y, z, output_data, time, xyzt_axes_ranges, xyzt_strides): """ retrieve data values for all of the specified points. """ @@ -1151,10 +1175,12 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): metadata = cube.metadata variable = cube.var dataset_title = cube.dataset_title + dataset_name = variable + '_' + str(time).zfill(4) # remove the time axis from axes_ranges and strides. - axes_ranges = axes_ranges[:3] - strides = strides[:3] + axes_ranges = xyzt_axes_ranges[:3] + strides = xyzt_strides[:3] + time_range = xyzt_axes_ranges[3] # minimum and maximum endpoints along each axis for the points the user requested. endpoints_min = np.array([np.min(x), np.min(y), np.min(z)], dtype = np.int32) @@ -1165,6 +1191,10 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): # raise exception if all of the user requested datapoints are not inside the bounds of the user box volume. if not(np.all(axes_ranges[:, 0] <= endpoints_min) and np.all(endpoints_max <= axes_ranges[:, 1])): raise Exception(f'the specified point(s) are not all bounded by the box volume defined by:\n{axes_ranges}') + + # raise exception if the time is not one of the queried timepoints. + if time not in range(time_range[0], time_range[1] + 1, 1): + raise Exception(f'the specified time ({time}) is not a queried time, t_range:\n{time_range}') # datasets that have an irregular y-grid. irregular_ygrid_datasets = get_irregular_mesh_ygrid_datasets(metadata, variable) @@ -1189,15 +1219,15 @@ def cutout_values(cube, x, y, z, output_data, axes_ranges, strides): # value(s) corresponding to the specified (x, y, z) datapoint(s). if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and variable == 'velocity': # zcoor_uv are the default z-axis coordinates for the 'velocity' variable of the 'sabl' datasets. - output_values = output_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor_uv = zcoor_values) + output_values = output_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor_uv = zcoor_values) # add the zcoor_w coordinate to the returned xarray dataarray. output_values = output_values.assign_coords({'zcoor_w': output_values.zcoor_uv + (cube.dz / 2)}) return output_values else: - return output_data[cube.dataset_name].sel(xcoor = xcoor_values, - ycoor = ycoor_values, - zcoor = zcoor_values) + return output_data[dataset_name].sel(xcoor = xcoor_values, + ycoor = ycoor_values, + zcoor = zcoor_values) diff --git a/givernylocal/src/givernylocal/turbulence_toolkit.py b/givernylocal/src/givernylocal/turbulence_toolkit.py index 22421be..ceba936 100644 --- a/givernylocal/src/givernylocal/turbulence_toolkit.py +++ b/givernylocal/src/givernylocal/turbulence_toolkit.py @@ -23,6 +23,7 @@ import json import math import time +import logging import requests import tracemalloc import numpy as np @@ -31,7 +32,7 @@ from givernylocal.turbulence_dataset import * from givernylocal.turbulence_gizmos.basic_gizmos import * -def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, +def getCutout(cube, var, xyzt_axes_ranges_original, xyzt_strides, trace_memory = False, verbose = True): """ retrieve a cutout of the isotropic cube. @@ -54,17 +55,25 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, # data constants. c = metadata['constants'] - # only time_step and filter_width values of 1 are currently allowed. - time_step = 1 + # only filter_width value of 1 is currently allowed. filter_width = 1 # retrieve the list of datasets processed by the giverny code. giverny_datasets = get_giverny_datasets() + # xyz original axes ranges. + axes_ranges_original = xyzt_axes_ranges_original[:3] + # time original range. + timepoint_range_original = xyzt_axes_ranges_original[3] + # xyz original axes strides. + strides = xyzt_strides[:3] + # time original stride. + timepoint_stride = xyzt_strides[3] + # housekeeping procedures. # ----- - var_offsets, timepoint = \ - getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, strides, var, timepoint_original) + var_offsets, timepoint_range = \ + getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, xyzt_strides, var, timepoint_range_original) # the number of values to read per datapoint. for pressure data this value is 1. for velocity # data this value is 3, because there is a velocity measurement along each axis. @@ -72,8 +81,10 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, # number of original datapoints along each axis specified by the user. used for checking that the user did not request # too much data and that result is filled correctly. axes_lengths_original = axes_ranges_original[:, 1] - axes_ranges_original[:, 0] + 1 + # number of original times queried by the user. + num_times = ((timepoint_range_original[1] - timepoint_range_original[0]) // timepoint_stride) + 1 # total number of datapoints, used for checking if the user requested too much data.. - num_datapoints = np.prod(axes_lengths_original) + num_datapoints = np.prod(axes_lengths_original) * num_times # total size of data, in GBs, requested by the user's box. requested_data_size = (num_datapoints * c['bytes_per_datapoint'] * num_values_per_datapoint) / float(1024**2) # maximum number of datapoints that can be read in. currently set to 16 GBs worth of datapoints. @@ -90,13 +101,18 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, if requested_data_size > max_cutout_size: raise ValueError(f'max local cutout size, {max_cutout_size} MB, exceeded. please specify a box with fewer than (xe - xs) * (ye - ys) * (ze - zs) = {max_datapoints + 1:,} ' + \ f'data points, regardless of strides.') + + if num_datapoints > 128**3: + logging.warning(f'givernylocal will typically work for up to ~200^3 cube cutouts (~100 MB) depending on system load ' + \ + 'and internet connection speed, otherwise HTTP errors may result. for larger cutouts, use the giverny library getCutout function on SciServer') # placeholder values for getData settings. spatial_method = 'none' temporal_method = 'none' option = [-999.9, -999.9] - # initialize cube constants. this is done so that all of the constants are known for pre-processing of the data. - cube.init_constants(query_type, var, var_offsets, timepoint, timepoint_original, + # initialize cube constants. this is done so that all of the constants are known for pre-processing of the data. use the last timepoint + # as a placeholder to keep consistent with how giverny on SciServer works, i.e. the last queried timepoint is the timepoint stored as the instance variable. + cube.init_constants(query_type, var, var_offsets, timepoint_range[1], timepoint_range_original[1], spatial_method, temporal_method, option, num_values_per_datapoint, c) # ----- @@ -106,11 +122,6 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, # checking the memory usage of the program. tracemem_start = [mem_value / (1024**3) for mem_value in tracemalloc.get_traced_memory()] tracemem_used_start = tracemalloc.get_tracemalloc_memory() / (1024**3) - - # create a small placeholder array for error checking. a full pre-filled array is created in lJHTDB.getbigCutout (pyJHTDB datasets) and - # getCutout_process_data (giverny datasets). initially the datatype is set to "f" (float) so that the array is filled with the - # missing placeholder value (-999.9). - result = np.array([c['missing_value_placeholder']], dtype = 'f') # request url. url = f'https://web.idies.jhu.edu/turbulence-svc-testing/cutout/api/local?token={auth_token}' \ @@ -118,8 +129,8 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, f'&xs={axes_ranges_original[0, 0]}&xe={axes_ranges_original[0, 1]}' \ f'&ys={axes_ranges_original[1, 0]}&ye={axes_ranges_original[1, 1]}' \ f'&zs={axes_ranges_original[2, 0]}&ze={axes_ranges_original[2, 1]}' \ - f'&ts={timepoint_original}&te={timepoint_original}' \ - f'&stridet=1&stridex={strides[0]}&stridey={strides[1]}&stridez={strides[2]}' \ + f'&ts={timepoint_range_original[0]}&te={timepoint_range_original[1]}' \ + f'&stridet={timepoint_stride}&stridex={strides[0]}&stridey={strides[1]}&stridez={strides[2]}' \ f'&filter_width={filter_width}' try: @@ -144,25 +155,37 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, # load the xarray dataset returned by giverny. json_data = json.loads(response.content) + # result DataArray map. + result_map = {} + for dataset_name in json_data['data_vars']: + # create a small placeholder array for error checking. a full pre-filled array is created in lJHTDB.getbigCutout (pyJHTDB datasets) and + # getCutout_process_data (giverny datasets). initially the datatype is set to "f" (float) so that the array is filled with the + # missing placeholder value (-999.9). + result = np.array([c['missing_value_placeholder']], dtype = 'f') + + # load the data for the variable-time into a numpy array. + result = np.array(json_data['data_vars'][dataset_name]['data'], dtype = 'f') + + # checks to make sure that data was received for all points. + strided_lengths = (axes_lengths_original + strides - 1) // strides + if c['missing_value_placeholder'] in result or result.shape != (strided_lengths[2], strided_lengths[1], strided_lengths[0], num_values_per_datapoint): + raise Exception(f'result was not filled correctly for the "{dataset_name}" dataset') + + # create the xarray DataArray. + result = xr.DataArray(data = result, + dims = json_data['data_vars'][dataset_name]['dims']) + + # aggregate the DataArrays into a dictionary. + result_map[dataset_name] = result + # parse the json data into the coords map. store the values as np.float64 for accuracy since the json data does not contain # the original data type information (mostly np.float32, but sometimes np.float64). coords_map = {k: np.array(v['data'], dtype = np.float64) for k, v in json_data['coords'].items()} - # result value array. - result = np.array(json_data['data'], dtype = 'f') - - # checks to make sure that data was received for all points. - strided_lengths = (axes_lengths_original + strides - 1) // strides - if c['missing_value_placeholder'] in result or result.shape != (strided_lengths[2], strided_lengths[1], strided_lengths[0], num_values_per_datapoint): - raise Exception(f'result was not filled correctly') - - # create the xarray DataArray. - result = xr.DataArray(data = result, - dims = json_data['dims']) # create the xarray Dataset. - result = xr.Dataset(data_vars = {json_data['name']:result}, + result = xr.Dataset(data_vars = result_map, coords = coords_map, - attrs = {'dataset':dataset_title, 't_start':timepoint_original, 't_end':timepoint_original, 't_step':time_step, + attrs = {'dataset':dataset_title, 't_start':timepoint_range_original[0], 't_end':timepoint_range_original[1], 't_step':timepoint_stride, 'x_start':axes_ranges_original[0][0], 'y_start':axes_ranges_original[1][0], 'z_start':axes_ranges_original[2][0], 'x_end':axes_ranges_original[0][1], 'y_end':axes_ranges_original[1][1], 'z_end':axes_ranges_original[2][1], 'x_step':strides[0], 'y_step':strides[1], 'z_step':strides[2], @@ -198,7 +221,7 @@ def getCutout(cube, var, timepoint_original, axes_ranges_original, strides, return result -def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, strides, var, timepoint_original): +def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ranges_original, xyzt_strides, var, timepoint_range_original): """ complete all of the getCutout housekeeping procedures before data processing. """ @@ -207,16 +230,16 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ # check that the user-input variable is a valid variable name. check_variable(metadata, var, dataset_title, query_type) # check that the user-input timepoint is a valid timepoint for the dataset. - check_timepoint(metadata, timepoint_original, dataset_title, query_type) + check_timepoint(metadata, timepoint_range_original, dataset_title, query_type) # check that the user-input x-, y-, and z-axis ranges are all specified correctly as [minimum, maximum] integer values. check_axes_ranges(metadata, axes_ranges_original, dataset_title, var) # check that the user-input strides are all positive integers. - check_strides(strides) + check_strides(xyzt_strides) # pre-processing steps. # ----- # convert the original input timepoint to the correct time index. - timepoint = get_time_index_from_timepoint(metadata, dataset_title, timepoint_original, tint = 'none', query_type = query_type) + timepoint_range = get_time_index_from_timepoint(metadata, dataset_title, timepoint_range_original, tint = 'none', query_type = query_type) # set var_offsets to var for getCutout. 'velocity' is handled differently in getData for the 'sabl2048low', 'sabl2048high', 'stsabl2048low', and 'stsabl2048high' datasets. if dataset_title in ['sabl2048low', 'sabl2048high', 'stsabl2048low', 'stsabl2048high'] and var == 'velocity': @@ -225,7 +248,7 @@ def getCutout_housekeeping_procedures(query_type, metadata, dataset_title, axes_ else: var_offsets = var - return (var_offsets, timepoint) + return (var_offsets, timepoint_range) def getData(cube, var, timepoint_original, temporal_method, spatial_method_original, spatial_operator, points, option = [-999.9, -999.9], From 6c0481790fc8fb981f7142d403ec33787f696f5b Mon Sep 17 00:00:00 2001 From: mike schnaubelt <93147480+mschnau@users.noreply.github.com> Date: Mon, 22 Dec 2025 15:28:45 -0500 Subject: [PATCH 5/5] giverny 3.5.1 - add multi-time cutouts --- giverny/pyproject.toml | 2 +- givernylocal/pyproject.toml | 2 +- givernylocal/src/givernylocal/turbulence_toolkit.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/giverny/pyproject.toml b/giverny/pyproject.toml index 6ed8ef7..06ab6ee 100644 --- a/giverny/pyproject.toml +++ b/giverny/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "giverny" -version = "3.4.2.3" +version = "3.5.1" description = "library to query the Johns Hopkins Turbulence Database (JHTDB)" authors = [ {name = "Johns Hopkins Turbulence Database Group"} diff --git a/givernylocal/pyproject.toml b/givernylocal/pyproject.toml index 72cfa21..a33704f 100644 --- a/givernylocal/pyproject.toml +++ b/givernylocal/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "givernylocal" -version = "3.4.2.3" +version = "3.5.1" description = "library to query the Johns Hopkins Turbulence Database (JHTDB)" authors = [ {name = "Johns Hopkins Turbulence Database Group"} diff --git a/givernylocal/src/givernylocal/turbulence_toolkit.py b/givernylocal/src/givernylocal/turbulence_toolkit.py index ceba936..2f0b762 100644 --- a/givernylocal/src/givernylocal/turbulence_toolkit.py +++ b/givernylocal/src/givernylocal/turbulence_toolkit.py @@ -124,7 +124,7 @@ def getCutout(cube, var, xyzt_axes_ranges_original, xyzt_strides, tracemem_used_start = tracemalloc.get_tracemalloc_memory() / (1024**3) # request url. - url = f'https://web.idies.jhu.edu/turbulence-svc-testing/cutout/api/local?token={auth_token}' \ + url = f'https://web.idies.jhu.edu/turbulence-svc/cutout/api/local?token={auth_token}' \ f'&function={var}&dataset={dataset_title}' \ f'&xs={axes_ranges_original[0, 0]}&xe={axes_ranges_original[0, 1]}' \ f'&ys={axes_ranges_original[1, 0]}&ye={axes_ranges_original[1, 1]}' \