From 978e672c5ecc4385336216ef0fd485b4566af224 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 14 Apr 2025 16:58:17 +0200
Subject: [PATCH 01/49] add h5json package

---
 hsds/async_lib.py             |   5 +-
 hsds/attr_dn.py               |   7 +-
 hsds/attr_sn.py               |   9 +-
 hsds/chunk_crawl.py           |   7 +-
 hsds/chunk_dn.py              |   5 +-
 hsds/chunk_sn.py              |   8 +-
 hsds/chunklocator.py          |   4 +-
 hsds/ctype_sn.py              |   4 +-
 hsds/datanode_lib.py          |   6 +-
 hsds/dset_lib.py              |   6 +-
 hsds/dset_sn.py               |   6 +-
 hsds/link_dn.py               |   1 -
 hsds/servicenode_lib.py       |   3 +-
 hsds/util/arrayUtil.py        | 731 ----------------------------
 hsds/util/hdf5dtype.py        | 876 ----------------------------------
 pyproject.toml                |   1 +
 testall.py                    |   2 +-
 tests/integ/attr_test.py      |   1 +
 tests/integ/vlen_test.py      |  10 +-
 tests/unit/array_util_test.py |  12 +-
 tests/unit/hdf5_dtype_test.py | 717 ----------------------------
 21 files changed, 61 insertions(+), 2360 deletions(-)
 delete mode 100644 hsds/util/arrayUtil.py
 delete mode 100644 hsds/util/hdf5dtype.py
 delete mode 100755 tests/unit/hdf5_dtype_test.py

diff --git a/hsds/async_lib.py b/hsds/async_lib.py
index e749e8a2..15d67f5f 100755
--- a/hsds/async_lib.py
+++ b/hsds/async_lib.py
@@ -15,11 +15,12 @@
 from aiohttp.client_exceptions import ClientError
 from aiohttp.web_exceptions import HTTPNotFound, HTTPInternalServerError
 from aiohttp.web_exceptions import HTTPForbidden
+from h5json.hdf5dtype import getItemSize
+from h5json.hdf5dtype import createDataType
+from h5json.array_util import getNumElements, bytesToArray
 from .util.idUtil import isValidUuid, isSchema2Id, getS3Key, isS3ObjKey
 from .util.idUtil import getObjId, isValidChunkId, getCollectionForId
 from .util.chunkUtil import getDatasetId, getNumChunks, ChunkIterator
-from .util.hdf5dtype import getItemSize, createDataType
-from .util.arrayUtil import getNumElements, bytesToArray
 from .util.dsetUtil import getHyperslabSelection, getFilterOps, getChunkDims, getFilters
 from .util.dsetUtil import getDatasetLayoutClass, getDatasetLayout, getShapeDims
 from .util.storUtil import getStorKeys, putStorJSONObj, getStorJSONObj
diff --git a/hsds/attr_dn.py b/hsds/attr_dn.py
index 456e9854..cb002623 100755
--- a/hsds/attr_dn.py
+++ b/hsds/attr_dn.py
@@ -19,12 +19,13 @@
 from aiohttp.web_exceptions import HTTPInternalServerError
 from aiohttp.web import json_response
 
+from h5json.hdf5dtype import getItemSize, createDataType
+from h5json.array_util import arrayToBytes, jsonToArray, decodeData
+from h5json.array_util import bytesToArray, bytesArrayToList, getNumElements
+
 from .util.attrUtil import validateAttributeName, isEqualAttr
-from .util.hdf5dtype import getItemSize, createDataType
 from .util.globparser import globmatch
 from .util.dsetUtil import getShapeDims
-from .util.arrayUtil import arrayToBytes, jsonToArray, decodeData
-from .util.arrayUtil import bytesToArray, bytesArrayToList, getNumElements
 from .util.domainUtil import isValidBucketName
 from .datanode_lib import get_obj_id, get_metadata_obj, save_metadata_obj
 from . import hsds_logger as log
diff --git a/hsds/attr_sn.py b/hsds/attr_sn.py
index b7ecdce4..a735c5c6 100755
--- a/hsds/attr_sn.py
+++ b/hsds/attr_sn.py
@@ -18,6 +18,11 @@
 from aiohttp.web import StreamResponse
 from json import JSONDecodeError
 
+from h5json.hdf5dtype import validateTypeItem, getBaseTypeJson
+from h5json.hdf5dtype import createDataType, getItemSize
+from h5json.array_util import jsonToArray, getNumElements, bytesArrayToList
+from h5json.array_util import bytesToArray, arrayToBytes, decodeData, encodeData
+
 from .util.httpUtil import getAcceptType, jsonResponse, getHref, getBooleanParam
 from .util.globparser import globmatch
 from .util.idUtil import isValidUuid, getRootObjId
@@ -25,10 +30,6 @@
 from .util.domainUtil import getDomainFromRequest, isValidDomain
 from .util.domainUtil import getBucketForDomain, verifyRoot
 from .util.attrUtil import validateAttributeName, getRequestCollectionName
-from .util.hdf5dtype import validateTypeItem, getBaseTypeJson
-from .util.hdf5dtype import createDataType, getItemSize
-from .util.arrayUtil import jsonToArray, getNumElements, bytesArrayToList
-from .util.arrayUtil import bytesToArray, arrayToBytes, decodeData, encodeData
 from .util.dsetUtil import getShapeDims
 
 from .servicenode_lib import getDomainJson, getObjectJson, validateAction
diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py
index 847f0933..a153bfe8 100755
--- a/hsds/chunk_crawl.py
+++ b/hsds/chunk_crawl.py
@@ -24,16 +24,17 @@
 from aiohttp.web_exceptions import HTTPInternalServerError
 from aiohttp.client_exceptions import ClientError
 
+from h5json.hdf5dtype import createDataType
+from h5json.array_util import jsonToArray, getNumpyValue
+from h5json.array_util import getNumElements, arrayToBytes, bytesToArray
+
 from .util.httpUtil import http_get, http_put, http_post, get_http_client
 from .util.httpUtil import isUnixDomainUrl
 from .util.idUtil import getDataNodeUrl, getNodeCount
-from .util.hdf5dtype import createDataType
 from .util.dsetUtil import getSliceQueryParam, getShapeDims
 from .util.dsetUtil import getSelectionShape, getChunkLayout
 from .util.chunkUtil import getChunkCoverage, getDataCoverage
 from .util.chunkUtil import getChunkIdForPartition, getQueryDtype
-from .util.arrayUtil import jsonToArray, getNumpyValue
-from .util.arrayUtil import getNumElements, arrayToBytes, bytesToArray
 
 from . import config
 from . import hsds_logger as log
diff --git a/hsds/chunk_dn.py b/hsds/chunk_dn.py
index e2671b61..eeeed88d 100644
--- a/hsds/chunk_dn.py
+++ b/hsds/chunk_dn.py
@@ -20,11 +20,12 @@
 from aiohttp.web_exceptions import HTTPNotFound, HTTPServiceUnavailable
 from aiohttp.web import json_response, StreamResponse
 
+from h5json.hdf5dtype import createDataType, getSubType
+from h5json.array_util import bytesToArray, arrayToBytes, getBroadcastShape
+
 from .util.httpUtil import request_read, getContentType
-from .util.arrayUtil import bytesToArray, arrayToBytes, getBroadcastShape
 from .util.idUtil import getS3Key, validateInPartition, isValidUuid
 from .util.storUtil import isStorObj, deleteStorObj
-from .util.hdf5dtype import createDataType, getSubType
 from .util.dsetUtil import getSelectionList, getChunkLayout, getShapeDims
 from .util.dsetUtil import getSelectionShape, getChunkInitializer
 from .util.chunkUtil import getChunkIndex, getDatasetId, chunkQuery
diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index 68575007..921feaf0 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -25,19 +25,19 @@
 from aiohttp.web_exceptions import HTTPConflict, HTTPInternalServerError
 from aiohttp.web import StreamResponse
 
+from h5json.hdf5dtype import getItemSize, getDtypeItemSize, getSubType, createDataType
+from h5json.array_util import bytesArrayToList, jsonToArray, getNumElements, arrayToBytes
+from h5json.array_util import bytesToArray, squeezeArray, getBroadcastShape
+
 from .util.httpUtil import getHref, getAcceptType, getContentType
 from .util.httpUtil import request_read, jsonResponse, isAWSLambda
 from .util.idUtil import isValidUuid
 from .util.domainUtil import getDomainFromRequest, isValidDomain
 from .util.domainUtil import getBucketForDomain
-from .util.hdf5dtype import getItemSize, getDtypeItemSize, getSubType, createDataType
 from .util.dsetUtil import isNullSpace, isScalarSpace, get_slices, getShapeDims
 from .util.dsetUtil import isExtensible, getSelectionPagination
 from .util.dsetUtil import getSelectionShape, getDsetMaxDims, getChunkLayout
 from .util.chunkUtil import getNumChunks, getChunkIds, getChunkId
-from .util.arrayUtil import bytesArrayToList, jsonToArray
-from .util.arrayUtil import getNumElements, arrayToBytes, bytesToArray
-from .util.arrayUtil import squeezeArray, getBroadcastShape
 from .util.authUtil import getUserPasswordFromRequest, validateUserPassword
 from .servicenode_lib import getDsetJson, validateAction
 from .dset_lib import getSelectionData, getParser, extendShape
diff --git a/hsds/chunklocator.py b/hsds/chunklocator.py
index 6727de9e..2f8bfbaf 100644
--- a/hsds/chunklocator.py
+++ b/hsds/chunklocator.py
@@ -5,7 +5,9 @@
 import numpy as np
 from . import config
 from . import hsds_logger as log
-from .util.arrayUtil import bytesArrayToList, getNumElements
+
+from h5json.array_util import bytesArrayToList, getNumElements
+
 from .util.dsetUtil import getSelectionList, getSelectionShape
 
 
diff --git a/hsds/ctype_sn.py b/hsds/ctype_sn.py
index 84cdd17f..59faccd1 100755
--- a/hsds/ctype_sn.py
+++ b/hsds/ctype_sn.py
@@ -16,6 +16,9 @@
 
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPGone
 from json import JSONDecodeError
+
+from h5json.hdf5dtype import validateTypeItem, getBaseTypeJson
+
 from .util.httpUtil import getHref, respJsonAssemble, getBooleanParam
 from .util.httpUtil import jsonResponse
 from .util.idUtil import isValidUuid
@@ -24,7 +27,6 @@
 from .util.authUtil import validateUserPassword
 from .util.domainUtil import getDomainFromRequest, getPathForDomain, isValidDomain
 from .util.domainUtil import getBucketForDomain, verifyRoot
-from .util.hdf5dtype import validateTypeItem, getBaseTypeJson
 from .servicenode_lib import getDomainJson, getObjectJson, validateAction
 from .servicenode_lib import getObjectIdByPath, getPathForObjectId
 from .servicenode_lib import createObject, createObjectByPath, deleteObject
diff --git a/hsds/datanode_lib.py b/hsds/datanode_lib.py
index 08ecc52a..1c6c3b6c 100644
--- a/hsds/datanode_lib.py
+++ b/hsds/datanode_lib.py
@@ -19,6 +19,10 @@
 from aiohttp.web_exceptions import HTTPGone, HTTPInternalServerError
 from aiohttp.web_exceptions import HTTPNotFound, HTTPForbidden
 from aiohttp.web_exceptions import HTTPServiceUnavailable, HTTPBadRequest
+
+from h5json.hdf5dtype import createDataType
+from h5json.array_util import arrayToBytes, bytesToArray, jsonToArray
+
 from .util.idUtil import validateInPartition, getS3Key, isValidUuid
 from .util.idUtil import isValidChunkId, getDataNodeUrl, isSchema2Id
 from .util.idUtil import getRootObjId, isRootObjId
@@ -31,8 +35,6 @@
 from .util.dsetUtil import getChunkLayout, getFilterOps, getShapeDims
 from .util.dsetUtil import getChunkInitializer, getSliceQueryParam, getFilters
 from .util.chunkUtil import getDatasetId, getChunkSelection, getChunkIndex
-from .util.arrayUtil import arrayToBytes, bytesToArray, jsonToArray
-from .util.hdf5dtype import createDataType
 from .util.rangegetUtil import ChunkLocation, chunkMunge, getHyperChunkIndex, getHyperChunkFactors
 from .util.timeUtil import getNow
 from . import config
diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py
index 1fe89b3e..5b729afb 100755
--- a/hsds/dset_lib.py
+++ b/hsds/dset_lib.py
@@ -16,7 +16,10 @@
 
 from aiohttp.client_exceptions import ClientError
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPConflict, HTTPInternalServerError
-from .util.arrayUtil import getNumpyValue
+
+from h5json.hdf5dtype import createDataType, getItemSize
+from h5json.array_util import getNumpyValue
+
 from .util.boolparser import BooleanParser
 from .util.dsetUtil import isNullSpace, getDatasetLayout, getDatasetLayoutClass, get_slices
 from .util.dsetUtil import getChunkLayout, getSelectionShape, getShapeDims
@@ -24,7 +27,6 @@
 from .util.chunkUtil import getNumChunks, getChunkIds, getChunkId
 from .util.chunkUtil import getChunkCoverage, getDataCoverage
 from .util.chunkUtil import getQueryDtype, get_chunktable_dims
-from .util.hdf5dtype import createDataType, getItemSize
 from .util.httpUtil import http_delete, http_put
 from .util.idUtil import getDataNodeUrl, isSchema2Id, getS3Key, getObjId
 from .util.rangegetUtil import getHyperChunkFactors
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 38e1156a..721970fb 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -18,11 +18,13 @@
 from json import JSONDecodeError
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPNotFound
 
+from h5json.hdf5dtype import validateTypeItem, createDataType, getBaseTypeJson, getItemSize
+from h5json.array_util import getNumElements, getNumpyValue
+
 from .util.httpUtil import getHref, respJsonAssemble
 from .util.httpUtil import jsonResponse, getBooleanParam
 from .util.idUtil import isValidUuid, isSchema2Id
 from .util.dsetUtil import getPreviewQuery, getFilterItem, getShapeDims
-from .util.arrayUtil import getNumElements, getNumpyValue
 from .util.chunkUtil import getChunkSize, guessChunk, expandChunk, shrinkChunk
 from .util.chunkUtil import getContiguousLayout
 from .util.authUtil import getUserPasswordFromRequest, aclCheck
@@ -30,8 +32,6 @@
 from .util.domainUtil import getDomainFromRequest, getPathForDomain, isValidDomain
 from .util.domainUtil import getBucketForDomain, verifyRoot
 from .util.storUtil import getSupportedFilters
-from .util.hdf5dtype import validateTypeItem, createDataType, getBaseTypeJson
-from .util.hdf5dtype import getItemSize
 from .util.linkUtil import validateLinkName
 from .servicenode_lib import getDomainJson, getObjectJson, getDsetJson, getPathForObjectId
 from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo
diff --git a/hsds/link_dn.py b/hsds/link_dn.py
index f7ec5956..e53984ed 100755
--- a/hsds/link_dn.py
+++ b/hsds/link_dn.py
@@ -378,7 +378,6 @@ async def PUT_Links(request):
     if new_links:
         # update the group lastModified
         group_json["lastModified"] = create_time
-        log.debug(f"tbd: group_json: {group_json}")
 
         # write back to S3, save to metadata cache
         await save_metadata_obj(app, group_id, group_json, bucket=bucket)
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index c8c84f75..3d65e619 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -21,8 +21,9 @@
 from aiohttp.client_exceptions import ClientOSError, ClientError
 from aiohttp import ClientResponseError
 
+from h5json.array_util import encodeData
+
 from .util.authUtil import getAclKeys
-from .util.arrayUtil import encodeData
 from .util.idUtil import getDataNodeUrl, getCollectionForId, createObjId, getRootObjId
 from .util.idUtil import isSchema2Id, getS3Key, isValidUuid
 from .util.linkUtil import h5Join, validateLinkName, getLinkClass
diff --git a/hsds/util/arrayUtil.py b/hsds/util/arrayUtil.py
deleted file mode 100644
index 67c847c3..00000000
--- a/hsds/util/arrayUtil.py
+++ /dev/null
@@ -1,731 +0,0 @@
-##############################################################################
-# Copyright by The HDF Group.                                                #
-# All rights reserved.                                                       #
-#                                                                            #
-# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
-# Utilities.  The full HSDS copyright notice, including                      #
-# terms governing use, modification, and redistribution, is contained in     #
-# the file COPYING, which can be found at the root of the source code        #
-# distribution tree.  If you do not have access to this file, you may        #
-# request a copy from help@hdfgroup.org.                                     #
-##############################################################################
-
-import math
-import base64
-import binascii
-import numpy as np
-
-MAX_VLEN_ELEMENT = 1_000_000  # restrict largest vlen element to one million
-
-
-def bytesArrayToList(data):
-    """
-    Convert list that may contain bytes type elements to list of string elements
-
-    TBD: Need to deal with non-string byte data (hexencode?)
-    """
-    if type(data) in (bytes, str):
-        is_list = False
-    elif isinstance(data, (np.ndarray, np.generic)):
-        if len(data.shape) == 0:
-            is_list = False
-            data = data.tolist()  # tolist will return a scalar in this case
-            if type(data) in (list, tuple):
-                is_list = True
-            else:
-                is_list = False
-        else:
-            is_list = True
-    elif type(data) in (list, tuple):
-        is_list = True
-    else:
-        is_list = False
-
-    if is_list:
-        out = []
-        for item in data:
-            try:
-                rec_item = bytesArrayToList(item)  # recursive call
-                out.append(rec_item)
-            except ValueError as err:
-                raise err
-    elif type(data) is bytes:
-        try:
-            out = data.decode("utf-8")
-        except UnicodeDecodeError as err:
-            raise ValueError(err)
-    else:
-        out = data
-
-    return out
-
-
-def toTuple(rank, data):
-    """
-    Convert a list to a tuple, recursively.
-    Example. [[1,2],[3,4]] -> ((1,2),(3,4))
-    """
-    if type(data) in (list, tuple):
-        if rank > 0:
-            return list(toTuple(rank - 1, x) for x in data)
-        else:
-            return tuple(toTuple(rank - 1, x) for x in data)
-    else:
-        if isinstance(data, str):
-            data = data.encode("utf8")
-        return data
-
-
-def getArraySize(arr):
-    """
-    Get size in bytes of a numpy array.
-    """
-    nbytes = arr.dtype.itemsize
-    for n in arr.shape:
-        nbytes *= n
-    return nbytes
-
-
-def getNumElements(dims):
-    """
-    Get num elements defined by a shape
-    """
-    num_elements = 0
-    if isinstance(dims, int):
-        num_elements = dims
-    elif isinstance(dims, (list, tuple)):
-        num_elements = 1
-        for dim in dims:
-            num_elements *= dim
-    else:
-        raise ValueError("Unexpected argument")
-    return num_elements
-
-
-def isVlen(dt):
-    """
-    Return True if the type contains variable length elements
-    """
-    is_vlen = False
-    if len(dt) > 1:
-        names = dt.names
-        for name in names:
-            if isVlen(dt[name]):
-                is_vlen = True
-                break
-    else:
-        if dt.metadata and "vlen" in dt.metadata:
-            is_vlen = True
-    return is_vlen
-
-
-def jsonToArray(data_shape, data_dtype, data_json):
-    """
-    Return numpy array from the given json array.
-    """
-    def fillVlenArray(rank, data, arr, index):
-        for i in range(len(data)):
-            if rank > 1:
-                index = fillVlenArray(rank - 1, data[i], arr, index)
-            else:
-                arr[index] = data[i]
-                index += 1
-        return index
-
-    if data_json is None:
-        return np.array([]).astype(data_dtype)
-
-    if isinstance(data_json, (list, tuple)):
-        if None in data_json:
-            return np.array([]).astype(data_dtype)
-
-    # need some special conversion for compound types --
-    # each element must be a tuple, but the JSON decoder
-    # gives us a list instead.
-    if len(data_dtype) > 1 and not isinstance(data_json, (list, tuple)):
-        raise TypeError("expected list data for compound data type")
-    npoints = getNumElements(data_shape)
-    np_shape_rank = len(data_shape)
-
-    if type(data_json) in (list, tuple):
-        converted_data = []
-        if npoints == 1 and len(data_json) == len(data_dtype):
-            converted_data.append(toTuple(0, data_json))
-        else:
-            converted_data = toTuple(np_shape_rank, data_json)
-        data_json = converted_data
-    else:
-        if isinstance(data_json, str):
-            data_json = data_json.encode("utf8")
-        data_json = [data_json,]  # listify
-
-    if isVlen(data_dtype):
-        arr = np.zeros((npoints,), dtype=data_dtype)
-        fillVlenArray(np_shape_rank, data_json, arr, 0)
-    else:
-        try:
-            arr = np.array(data_json, dtype=data_dtype)
-        except UnicodeEncodeError as ude:
-            msg = "Unable to encode data"
-            raise ValueError(msg) from ude
-    # raise an exception of the array shape doesn't match the selection shape
-    # allow if the array is a scalar and the selection shape is one element,
-    # numpy is ok with this
-    if arr.size != npoints:
-        msg = "Input data doesn't match selection number of elements"
-        msg += f" Expected {npoints}, but received: {arr.size}"
-        raise ValueError(msg)
-    if arr.shape != data_shape:
-        arr = arr.reshape(data_shape)  # reshape to match selection
-
-    return arr
-
-
-def getElementSize(e, dt):
-    """
-    Get number of byte needed to given element as a bytestream
-    """
-    # print(f"getElementSize - e: {e}  dt: {dt} metadata: {dt.metadata}")
-    if len(dt) > 1:
-        count = 0
-        for name in dt.names:
-            field_dt = dt[name]
-            field_val = e[name]
-            count += getElementSize(field_val, field_dt)
-    elif not dt.metadata or "vlen" not in dt.metadata:
-        count = dt.itemsize  # fixed size element
-    else:
-        # variable length element
-        vlen = dt.metadata["vlen"]
-        if isinstance(e, int):
-            if e == 0:
-                count = 4  # non-initialized element
-            else:
-                raise ValueError("Unexpected value: {}".format(e))
-        elif isinstance(e, bytes):
-            count = len(e) + 4
-        elif isinstance(e, str):
-            count = len(e.encode("utf-8")) + 4
-        elif isinstance(e, np.ndarray):
-            nElements = math.prod(e.shape)
-            if e.dtype.kind != "O":
-                count = e.dtype.itemsize * nElements
-            else:
-                arr1d = e.reshape((nElements,))
-                count = 0
-                for item in arr1d:
-                    count += getElementSize(item, dt)
-            count += 4  # byte count
-        elif isinstance(e, list) or isinstance(e, tuple):
-            if not e:
-                # empty list, just add byte count
-                count = 4
-            else:
-                # not sure how to deal with this
-                count = len(e) * vlen.itemsize + 4  # +4 for byte count
-        else:
-            raise TypeError("unexpected type: {}".format(type(e)))
-    return count
-
-
-def getByteArraySize(arr):
-    """
-    Get number of bytes needed to store given numpy array as a bytestream
-    """
-    if not isVlen(arr.dtype):
-        return arr.itemsize * math.prod(arr.shape)
-    nElements = math.prod(arr.shape)
-    # reshape to 1d for easier iteration
-    arr1d = arr.reshape((nElements,))
-    dt = arr1d.dtype
-    count = 0
-    for e in arr1d:
-        count += getElementSize(e, dt)
-    return count
-
-
-def copyBuffer(src, des, offset):
-    """
-    Copy to buffer at given offset
-    """
-    # print(f"copyBuffer - src: {src} offset: {offset}")
-    # TBD: just do: des[offset:] = src[:]  ?
-    for i in range(len(src)):
-        des[i + offset] = src[i]
-
-    # print("returning:", offset + len(src))
-    return offset + len(src)
-
-
-def copyElement(e, dt, buffer, offset):
-    """
-    Copy element to bytearray
-    """
-    # print(f"copyElement - dt: {dt}  offset: {offset}")
-    if len(dt) > 1:
-        for name in dt.names:
-            field_dt = dt[name]
-            field_val = e[name]
-            offset = copyElement(field_val, field_dt, buffer, offset)
-    elif not dt.metadata or "vlen" not in dt.metadata:
-        # print(f"e vlen: {e} type: {type(e)} itemsize: {dt.itemsize}")
-        e_buf = e.tobytes()
-        # print("tobytes:", e_buf)
-        if len(e_buf) < dt.itemsize:
-            # extend the buffer for fixed size strings
-            # print("extending buffer")
-            e_buf_ex = bytearray(dt.itemsize)
-            for i in range(len(e_buf)):
-                e_buf_ex[i] = e_buf[i]
-            e_buf = bytes(e_buf_ex)
-
-        # print("length:", len(e_buf))
-        offset = copyBuffer(e_buf, buffer, offset)
-    else:
-        # variable length element
-        vlen = dt.metadata["vlen"]
-        # print("copyBuffer vlen:", vlen)
-        if isinstance(e, int):
-            # print("copyBuffer int")
-            if e == 0:
-                # write 4-byte integer 0 to buffer
-                offset = copyBuffer(b"\x00\x00\x00\x00", buffer, offset)
-            else:
-                raise ValueError("Unexpected value: {}".format(e))
-        elif isinstance(e, bytes):
-            # print("copyBuffer bytes")
-            count = np.int32(len(e))
-            if count > MAX_VLEN_ELEMENT:
-                raise ValueError("vlen element too large")
-            offset = copyBuffer(count.tobytes(), buffer, offset)
-            offset = copyBuffer(e, buffer, offset)
-        elif isinstance(e, str):
-            # print("copyBuffer, str")
-            text = e.encode("utf-8")
-            count = np.int32(len(text))
-            if count > MAX_VLEN_ELEMENT:
-                raise ValueError("vlen element too large")
-            offset = copyBuffer(count.tobytes(), buffer, offset)
-            offset = copyBuffer(text, buffer, offset)
-
-        elif isinstance(e, np.ndarray):
-            nElements = math.prod(e.shape)
-            # print("copyBuffer ndarray, nElements:", nElements)
-
-            if e.dtype.kind != "O":
-                count = np.int32(e.dtype.itemsize * nElements)
-                # print("copyBuffeer got vlen count:", count)
-                # print("copyBuffer e:", e)
-                if count > MAX_VLEN_ELEMENT:
-                    raise ValueError("vlen element too large")
-                offset = copyBuffer(count.tobytes(), buffer, offset)
-                # print("copyBuffer write new count, offset:", offset)
-                offset = copyBuffer(e.tobytes(), buffer, offset)
-                # print("copyBuffer write data, offset:", offset)
-            else:
-                arr1d = e.reshape((nElements,))
-                for item in arr1d:
-                    offset = copyElement(item, dt, buffer, offset)
-
-        elif isinstance(e, list) or isinstance(e, tuple):
-            # print("cooyBuffer list/tuple  vlen:", vlen, "e:", e)
-            count = np.int32(len(e) * vlen.itemsize)
-            offset = copyBuffer(count.tobytes(), buffer, offset)
-            if isinstance(e, np.ndarray):
-                arr = e
-            else:
-                arr = np.asarray(e, dtype=vlen)
-            offset = copyBuffer(arr.tobytes(), buffer, offset)
-
-        else:
-            raise TypeError("unexpected type: {}".format(type(e)))
-        # print("buffer: {}".format(buffer))
-    return offset
-
-
-def getElementCount(buffer, offset=0):
-    """
-    Get the count value from persisted vlen array
-    """
-
-    n = offset
-    m = offset + 4
-    count_bytes = bytes(buffer[n:m])
-
-    try:
-        count = int(np.frombuffer(count_bytes, dtype="<i4")[0])
-    except TypeError as e:
-        msg = f"Unexpected error reading count value for varlen element: {e}"
-        raise TypeError(msg)
-    if count < 0:
-        # shouldn't be negative
-        raise ValueError(f"Unexpected count value for varlen element: {count}")
-    if count > MAX_VLEN_ELEMENT:
-        # expect variable length element to be between 0 and 1mb
-        raise ValueError("varlen element size expected to be less than 1MB")
-    return count
-
-
-def readElement(buffer, offset, arr, index, dt):
-    """
-    Read a single element from buffer into array.
-
-    Parameters:
-        buffer (bytearray): Byte array to read an element from.
-        offset (int): Starting offset in the buffer.
-        arr (numpy.ndarray): Array to store the element.
-        index (int): Index in 'arr' at which to store the element.
-        dt (numpy.dtype): Numpy datatype of the element.
-
-    Note: If the provided datatype is a variable-length sequence,
-    this function will read the byte count from the first 4 bytes
-    of the buffer, and then read the entire sequence.
-
-    Returns:
-        int: The updated offset value after reading the element.
-    """
-    if len(dt) > 1:
-        e = arr[index]
-        for name in dt.names:
-            field_dt = dt[name]
-            offset = readElement(buffer, offset, e, name, field_dt)
-    elif not dt.metadata or "vlen" not in dt.metadata:
-        count = dt.itemsize
-        n = offset
-        m = offset + count
-        e_buffer = buffer[n:m]
-        offset += count
-        try:
-            e = np.frombuffer(bytes(e_buffer), dtype=dt)
-            arr[index] = e[0]
-        except ValueError:
-            print(f"ERROR: ValueError setting {e_buffer} and dtype: {dt}")
-            raise
-    else:
-        # variable length element
-        vlenBaseType = dt.metadata["vlen"]
-        e = arr[index]
-
-        if isinstance(e, np.ndarray):
-            nelements = math.prod(dt.shape)
-            e.reshape((nelements,))
-            for i in range(nelements):
-                offset = readElement(buffer, offset, e, i, dt)
-            e.reshape(dt.shape)
-        else:
-            # total number of bytes in the vlen sequence/variable-length string
-            count = getElementCount(buffer, offset=offset)
-            offset += 4
-            n = offset
-            m = offset + count
-            if count > 0:
-                e_buffer = buffer[n:m]
-                offset += count
-
-                if vlenBaseType is bytes:
-                    arr[index] = bytes(e_buffer)
-                elif vlenBaseType is str:
-                    s = e_buffer.decode("utf-8")
-                    arr[index] = s
-                else:
-                    try:
-                        e = np.frombuffer(bytes(e_buffer), dtype=vlenBaseType)
-                    except ValueError:
-                        msg = f"Failed to parse vlen data: {e_buffer} with dtype: {vlenBaseType}"
-                        raise ValueError(msg)
-                    arr[index] = e
-    return offset
-
-
-def encodeData(data, encoding="base64"):
-    """ Encode given data """
-    if encoding != "base64":
-        raise ValueError("only base64 encoding is supported")
-    try:
-        if isinstance(data, str):
-            data = data.encode("utf8")
-    except UnicodeEncodeError:
-        raise ValueError("can not encode string value")
-    if not isinstance(data, bytes):
-        msg = "Expected str or bytes type to encodeData, "
-        msg += f"but got: {type(data)}"
-        raise TypeError(msg)
-    try:
-        encoded_data = base64.b64encode(data)
-    except Exception as e:
-        # TBD: what exceptions can be raised?
-        raise ValueError(f"Unable to encode: {e}")
-    return encoded_data
-
-
-def decodeData(data, encoding="base64"):
-    if encoding != "base64":
-        raise ValueError("only base64 decoding is supported")
-    try:
-        decoded_data = base64.b64decode(data)
-    except Exception as e:
-        # TBD: catch actual exception
-        raise ValueError(f"Unable to decode: {e}")
-    return decoded_data
-
-
-def arrayToBytes(arr, encoding=None):
-    """
-    Return byte representation of numpy array
-    """
-    if isVlen(arr.dtype):
-        nSize = getByteArraySize(arr)
-        buffer = bytearray(nSize)
-        offset = 0
-        nElements = math.prod(arr.shape)
-        arr1d = arr.reshape((nElements,))
-        for e in arr1d:
-            # print("arrayToBytes:", e)
-            offset = copyElement(e, arr1d.dtype, buffer, offset)
-        data = bytes(buffer)
-    else:
-        # fixed length type
-        data = arr.tobytes()
-
-    if encoding:
-        data = encodeData(data)
-    return data
-
-
-def bytesToArray(data, dt, shape, encoding=None):
-    """
-    Create numpy array based on byte representation
-    """
-    if encoding:
-        # decode the data
-        # will raise ValueError if non-decodeable
-        data = decodeData(data)
-    if not isVlen(dt):
-        # regular numpy from string
-        arr = np.frombuffer(data, dtype=dt)
-    else:
-        nelements = getNumElements(shape)
-
-        arr = np.zeros((nelements,), dtype=dt)
-        offset = 0
-        for index in range(nelements):
-            offset = readElement(data, offset, arr, index, dt)
-    if shape is not None:
-        arr = arr.reshape(shape)
-    # check that we can update the array if needed
-    # Note: this seems to have been required starting with numpuy v 1.17
-    # Setting the flag directly is not recommended.
-    # cf: https://github.com/numpy/numpy/issues/9440
-
-    if not arr.flags["WRITEABLE"]:
-        arr_copy = arr.copy()
-        arr = arr_copy
-
-    return arr
-
-
-def getNumpyValue(value, dt=None, encoding=None):
-    """
-    Return value as numpy type for given dtype and encoding
-    Encoding is expected to be one of None or "base64"
-    """
-    # create a scalar numpy array
-    arr = np.zeros((), dtype=dt)
-
-    if encoding and not isinstance(value, str):
-        msg = "Expected value to be string to use encoding"
-        raise ValueError(msg)
-
-    if encoding == "base64":
-        try:
-            data = base64.decodebytes(value.encode("utf-8"))
-        except binascii.Error:
-            msg = "Unable to decode base64 string: {value}"
-            # log.warn(msg)
-            raise ValueError(msg)
-        arr = bytesToArray(data, dt, dt.shape)
-    else:
-        if isinstance(value, list):
-            # convert to tuple
-            value = tuple(value)
-        elif dt.kind == "f" and isinstance(value, str) and value == "nan":
-            value = np.nan
-        else:
-            # use as is
-            pass
-        arr = np.asarray(value, dtype=dt.base)
-    return arr[()]
-
-
-def squeezeArray(data):
-    """
-    Reduce dimensions by removing any 1-extent dimensions.
-    Just return input if no 1-extent dimensions
-
-    Note: only works with ndarrays (for now at least)
-    """
-    if not isinstance(data, np.ndarray):
-        raise TypeError("expected ndarray")
-    if len(data.shape) <= 1:
-        return data
-    can_reduce = True
-    for extent in data.shape:
-        if extent == 1:
-            can_reduce = True
-        break
-    if can_reduce:
-        data = data.squeeze()
-    return data
-
-
-class IndexIterator(object):
-    """
-    Class to iterate through list of chunks of a given dataset
-    """
-
-    def __init__(self, shape, sel=None):
-        self._shape = shape
-        self._rank = len(self._shape)
-        self._stop = False
-
-        if self._rank < 1:
-            raise ValueError("IndexIterator can not be used on arrays of zero rank")
-
-        if sel is None:
-            # select over entire dataset
-            slices = []
-            for dim in range(self._rank):
-                slices.append(slice(0, self._shape[dim]))
-            self._sel = tuple(slices)
-        else:
-            if isinstance(sel, slice):
-                self._sel = (sel,)
-            else:
-                self._sel = sel
-        if len(self._sel) != self._rank:
-            raise ValueError("Invalid selection - selection region must have same rank as shape")
-        self._index = []
-        for dim in range(self._rank):
-            s = self._sel[dim]
-            if s.start < 0 or s.stop > self._shape[dim] or s.stop <= s.start:
-                raise ValueError(
-                    "Invalid selection - selection region must be within dataset space"
-                )
-            self._index.append(s.start)
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        if self._stop:
-            raise StopIteration()
-        # bump up the last index and carry forward if we run outside the selection
-        dim = self._rank - 1
-        ret_index = self._index.copy()
-        while True:
-            s = self._sel[dim]
-            if s.step:
-                step = s.step
-            else:
-                step = 1
-            self._index[dim] += step
-
-            if self._index[dim] < s.stop:
-                # we still have room to extend along this dimensions
-                break
-
-            # reset to the start and continue iterating with higher dimension
-            self._index[dim] = s.start
-            dim -= 1
-            if dim < 0:
-                # ran past last index, stop iteration on next run
-                self._stop = True
-
-        return tuple(ret_index)
-
-
-def ndarray_compare(arr1, arr2):
-    # compare two numpy arrays.
-    # return true if the same (exclusive of null vs. empty array)
-    # false otherwise
-    # TBD: this is slow for multi-megabyte vlen arrays, needs to be optimized
-    if not isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
-        if not isinstance(arr1, np.void) and not isinstance(arr2, np.void):
-            return arr1 == arr2
-        if isinstance(arr1, np.void) and not isinstance(arr2, np.void):
-            if arr1.size == 0 and not arr2:
-                return True
-            else:
-                return False
-        if not isinstance(arr1, np.void) and isinstance(arr2, np.void):
-            if not arr1 and arr2.size == 0:
-                return True
-            else:
-                return False
-        # both np.voids
-        if arr1.size != arr2.size:
-            return False
-
-        if len(arr1) != len(arr2):
-            return False
-
-        for i in range(len(arr1)):
-            if not ndarray_compare(arr1[i], arr2[i]):
-                return False
-        return True
-
-    if isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray):
-        # same only if arr1 is empty and arr2 is 0
-        if arr1.size == 0 and not arr2:
-            return True
-        else:
-            return False
-    if not isinstance(arr1, np.ndarray) and isinstance(arr2, np.ndarray):
-        # same only if arr1 is empty and arr2 size is 0
-        if not arr1 and arr2.size == 0:
-            return True
-        else:
-            return False
-
-    # two ndarrays...
-    if arr1.shape != arr2.shape:
-        return False
-    if arr2.dtype != arr2.dtype:
-        return False
-
-    if isVlen(arr1.dtype):
-        # need to compare element by element
-
-        nElements = np.prod(arr1.shape)
-        arr1 = arr1.reshape((nElements,))
-        arr2 = arr2.reshape((nElements,))
-        for i in range(nElements):
-            if not ndarray_compare(arr1[i], arr2[i]):
-                return False
-        return True
-    else:
-        # can just us np array_compare
-        return np.array_equal(arr1, arr2)
-
-
-def getBroadcastShape(mshape, element_count):
-    # if element_count is less than the number of elements
-    # defined by mshape, return a numpy compatible broadcast
-    # shape that contains element_count elements.
-    # If non exists return None
-
-    if np.prod(mshape) == element_count:
-        return None
-
-    if element_count == 1:
-        # this always works
-        return [1,]
-
-    bcshape = []
-    rank = len(mshape)
-    for n in range(rank - 1):
-        bcshape.insert(0, mshape[rank - n - 1])
-        if element_count == np.prod(bcshape):
-            return bcshape  # have a match
-
-    return None  # no broadcast found
diff --git a/hsds/util/hdf5dtype.py b/hsds/util/hdf5dtype.py
deleted file mode 100644
index 3d7d1d2f..00000000
--- a/hsds/util/hdf5dtype.py
+++ /dev/null
@@ -1,876 +0,0 @@
-##############################################################################
-# Copyright by The HDF Group.                                                #
-# All rights reserved.                                                       #
-#                                                                            #
-# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
-# Utilities.  The full HSDS copyright notice, including                      #
-# terms governing use, modification, and redistribution, is contained in     #
-# the file COPYING, which can be found at the root of the source code        #
-# distribution tree.  If you do not have access to this file, you may        #
-# request a copy from help@hdfgroup.org.                                     #
-##############################################################################
-
-import weakref
-import numpy as np
-
-
-class Reference:
-    """
-    Represents an HDF5 object reference
-    """
-
-    @property
-    def id(self):
-        """Low-level identifier appropriate for this object"""
-        return self._id
-
-    @property
-    def objref(self):
-        """Weak reference to object"""
-        return self._objref  # return weak ref to ref'd object
-
-    def __init__(self, bind):
-        """Create a new reference by binding to
-        a group/dataset/committed type
-        """
-        self._id = bind._id
-        self._objref = weakref.ref(bind)
-
-    def __repr__(self):
-        # TBD: this is not consistent with hsds or h5py...
-        if not isinstance(self._id.id, str):
-            raise TypeError("Expected string id")
-        item = None
-
-        collection_type = self._id.collection_type
-        item = f"{collection_type}/{self._id.id}"
-        return item
-
-    def tolist(self):
-        if type(self._id.id) is not str:
-            raise TypeError("Expected string id")
-        if self._id.objtype_code == "d":
-            return [
-                ("datasets/" + self._id.id),
-            ]
-        elif self._id.objtype_code == "g":
-            return [
-                ("groups/" + self._id.id),
-            ]
-        elif self._id.objtype_code == "t":
-            return [
-                ("datatypes/" + self._id.id),
-            ]
-        else:
-            raise TypeError("Unexpected id type")
-
-
-class RegionReference:
-    """
-    Represents an HDF5 region reference
-    """
-
-    @property
-    def id(self):
-        """Low-level identifier appropriate for this object"""
-        return self._id
-
-    @property
-    def objref(self):
-        """Weak reference to object"""
-        return self._objref  # return weak ref to ref'd object
-
-    def __init__(self, bind):
-        """Create a new reference by binding to
-        a group/dataset/committed type
-        """
-        self._id = bind._id
-        self._objref = weakref.ref(bind)
-
-    def __repr__(self):
-        return "<HDF5 region reference>"
-
-
-def special_dtype(**kwds):
-    """Create a new h5py "special" type.  Only one keyword may be given.
-
-    Legal keywords are:
-
-    vlen = basetype
-        Base type for HDF5 variable-length datatype. This can be Python
-        str type or instance of np.dtype.
-        Example: special_dtype( vlen=str )
-
-    enum = (basetype, values_dict)
-        Create a NumPy representation of an HDF5 enumerated type.  Provide
-        a 2-tuple containing an (integer) base dtype and a dict mapping
-        string names to integer values.
-
-    ref = Reference | RegionReference
-        Create a NumPy representation of an HDF5 object or region reference
-        type."""
-
-    if len(kwds) != 1:
-        raise TypeError("Exactly one keyword may be provided")
-
-    name, val = kwds.popitem()
-
-    if name == "vlen":
-
-        return np.dtype("O", metadata={"vlen": val})
-
-    if name == "enum":
-
-        try:
-            dt, enum_vals = val
-        except TypeError:
-            msg = "Enums must be created from a 2-tuple "
-            msg += "(basetype, values_dict)"
-            raise TypeError(msg)
-
-        dt = np.dtype(dt)
-        if dt.kind not in "iu":
-            raise TypeError("Only integer types can be used as enums")
-
-        return np.dtype(dt, metadata={"enum": enum_vals})
-
-    if name == "ref":
-        dt = None
-        if val is Reference:
-            dt = np.dtype("S48", metadata={"ref": Reference})
-        elif val is RegionReference:
-            dt = np.dtype("S48", metadata={"ref": RegionReference})
-        else:
-            raise ValueError("Ref class must be Reference or RegionReference")
-
-        return dt
-
-    raise TypeError(f'Unknown special type "{name}"')
-
-
-def check_dtype(**kwds):
-    """Check a dtype for h5py special type "hint" information.  Only one
-    keyword may be given.
-
-    vlen = dtype
-        If the dtype represents an HDF5 vlen, returns the Python base class.
-        Currently only builting string vlens (str) are supported.  Returns
-        None if the dtype does not represent an HDF5 vlen.
-
-    enum = dtype
-        If the dtype represents an HDF5 enumerated type, returns the dictionary
-        mapping string names to integer values.  Returns None if the dtype does
-        not represent an HDF5 enumerated type.
-
-    ref = dtype
-        If the dtype represents an HDF5 reference type, returns the reference
-        class (either Reference or RegionReference).  Returns None if the dtype
-        does not represent an HDF5 reference type.
-    """
-
-    if len(kwds) != 1:
-        raise TypeError("Exactly one keyword may be provided")
-
-    name, dt = kwds.popitem()
-
-    if name not in ("vlen", "enum", "ref"):
-        raise TypeError('Unknown special type "%s"' % name)
-
-    try:
-        return dt.metadata[name]
-    except TypeError:
-        return None
-    except KeyError:
-        return None
-
-
-def getTypeResponse(typeItem):
-    """
-    Convert the given type item  to a predefined type string for
-        predefined integer and floating point types ("H5T_STD_I64LE", et. al).
-        For compound types, recursively iterate through the typeItem and do
-        same conversion for fields of the compound type."""
-    response = None
-    if "uuid" in typeItem:
-        # committed type, just return uuid
-        response = "datatypes/" + typeItem["uuid"]
-    elif typeItem["class"] in ("H5T_INTEGER", "H5T_FLOAT"):
-        # just return the class and base for pre-defined types
-        response = {}
-        response["class"] = typeItem["class"]
-        response["base"] = typeItem["base"]
-    elif typeItem["class"] == "H5T_OPAQUE":
-        response = {}
-        response["class"] = "H5T_OPAQUE"
-        response["size"] = typeItem["size"]
-    elif typeItem["class"] == "H5T_REFERENCE":
-        response = {}
-        response["class"] = "H5T_REFERENCE"
-        response["base"] = typeItem["base"]
-    elif typeItem["class"] == "H5T_COMPOUND":
-        response = {}
-        response["class"] = "H5T_COMPOUND"
-        fieldList = []
-        for field in typeItem["fields"]:
-            fieldItem = {}
-            fieldItem["name"] = field["name"]
-            fieldItem["type"] = getTypeResponse(field["type"])  # recurse call
-            fieldList.append(fieldItem)
-        response["fields"] = fieldList
-    else:
-        response = {}  # otherwise, return full type
-        for k in typeItem.keys():
-            if k == "base":
-                if isinstance(typeItem[k], dict):
-                    response[k] = getTypeResponse(typeItem[k])  # recurse call
-                else:
-                    response[k] = typeItem[k]  # predefined type
-            elif k not in ("size", "base_size"):
-                response[k] = typeItem[k]
-    return response
-
-
-def getTypeItem(dt, metadata=None):
-    """
-    Return type info.
-          For primitive types, return string with typename
-          For compound types return array of dictionary items
-    """
-    predefined_int_types = {
-        "int8": "H5T_STD_I8",
-        "uint8": "H5T_STD_U8",
-        "int16": "H5T_STD_I16",
-        "uint16": "H5T_STD_U16",
-        "int32": "H5T_STD_I32",
-        "uint32": "H5T_STD_U32",
-        "int64": "H5T_STD_I64",
-        "uint64": "H5T_STD_U64",
-    }
-    predefined_float_types = {
-        "float16": "H5T_IEEE_F16",
-        "float32": "H5T_IEEE_F32",
-        "float64": "H5T_IEEE_F64",
-    }
-    # print(">getTypeItem:", dt.str)
-    if not metadata and dt.metadata:
-        metadata = dt.metadata
-    # if metadata:
-    #    print(">  metadata:", metadata)
-    # if dt.shape:
-    #    print(">  shape:", dt.shape)
-    # if len(dt) > 1:
-    #    print(">  len:", len(dt))
-
-    type_info = {}
-    if len(dt) > 1:
-        # compound type
-        names = dt.names
-        type_info["class"] = "H5T_COMPOUND"
-        fields = []
-        for name in names:
-            field = {"name": name}
-            field["type"] = getTypeItem(dt[name])
-            fields.append(field)
-            type_info["fields"] = fields
-    elif dt.shape:
-        # array type
-        if dt.base == dt:
-            raise TypeError("Expected base type to be different than parent")
-        # array type
-        type_info["dims"] = dt.shape
-        type_info["class"] = "H5T_ARRAY"
-        # print(">  array type, metadata:", metadata)
-        type_info["base"] = getTypeItem(dt.base, metadata=metadata)
-    elif dt.kind == "O":
-        # vlen string or data
-        #
-        # check for h5py variable length extension
-
-        if metadata and "vlen" in metadata:
-            vlen_check = metadata["vlen"]
-            if vlen_check is not None and not isinstance(vlen_check, np.dtype):
-                vlen_check = np.dtype(vlen_check)
-
-        if metadata and "ref" in metadata:
-            ref_check = metadata["ref"]
-        else:
-            ref_check = check_dtype(ref=dt.base)
-        if vlen_check == bytes:
-            type_info["class"] = "H5T_STRING"
-            type_info["length"] = "H5T_VARIABLE"
-            type_info["charSet"] = "H5T_CSET_ASCII"
-            type_info["strPad"] = "H5T_STR_NULLTERM"
-        elif vlen_check == str:
-            type_info["class"] = "H5T_STRING"
-            type_info["length"] = "H5T_VARIABLE"
-            type_info["charSet"] = "H5T_CSET_UTF8"
-            type_info["strPad"] = "H5T_STR_NULLTERM"
-        elif isinstance(vlen_check, np.dtype):
-            # vlen data
-            type_info["class"] = "H5T_VLEN"
-            type_info["size"] = "H5T_VARIABLE"
-            type_info["base"] = getTypeItem(vlen_check)
-        elif vlen_check is not None:
-            #  unknown vlen type
-            raise TypeError("Unknown h5py vlen type: " + str(vlen_check))
-        elif ref_check is not None:
-            # a reference type
-            type_info["class"] = "H5T_REFERENCE"
-
-            if ref_check is Reference:
-                type_info["base"] = "H5T_STD_REF_OBJ"  # objref
-            elif ref_check is RegionReference:
-                type_info["base"] = "H5T_STD_REF_DSETREG"  # region ref
-            else:
-                raise TypeError("unexpected reference type")
-        else:
-            raise TypeError("unknown object type")
-    elif dt.kind == "V":
-        # void type
-        type_info["class"] = "H5T_OPAQUE"
-        type_info["size"] = dt.itemsize
-        type_info["tag"] = ""  # todo - determine tag
-    elif dt.base.kind == "S":
-        # check for object reference
-        ref_check = check_dtype(ref=dt.base)
-        if ref_check is not None:
-            # a reference type
-            type_info["class"] = "H5T_REFERENCE"
-
-            if ref_check is Reference:
-                type_info["base"] = "H5T_STD_REF_OBJ"  # objref
-            elif ref_check is RegionReference:
-                type_info["base"] = "H5T_STD_REF_DSETREG"  # region ref
-            else:
-                raise TypeError("unexpected reference type")
-        else:
-            # Fixed length string type
-            type_info["class"] = "H5T_STRING"
-        type_info["length"] = dt.itemsize
-        type_info["charSet"] = "H5T_CSET_ASCII"
-        type_info["strPad"] = "H5T_STR_NULLPAD"
-    elif dt.base.kind == "U":
-        # Fixed length unicode type
-        ref_check = check_dtype(ref=dt.base)
-        if ref_check is not None:
-            raise TypeError("unexpected reference type")
-
-        # Fixed length string type with unicode support
-        type_info["class"] = "H5T_STRING"
-
-        # this can be problematic if the encoding of the string is not valid,
-        # or reqires too many bytes.  Use variable length strings to handle all
-        # UTF8 strings correctly
-        type_info["charSet"] = "H5T_CSET_UTF8"
-        # convert from UTF32 length to a fixed length
-        type_info["length"] = dt.itemsize
-        type_info["strPad"] = "H5T_STR_NULLPAD"
-
-    elif dt.kind == "b":
-        # boolean type - h5py stores as enum
-        # assume LE unless the numpy byteorder is '>'
-        byteorder = "LE"
-        if dt.base.byteorder == ">":
-            byteorder = "BE"
-        # this mapping is an h5py convention for boolean support
-        mapping = {"FALSE": 0, "TRUE": 1}
-        type_info["class"] = "H5T_ENUM"
-        type_info["mapping"] = mapping
-        base_info = {"class": "H5T_INTEGER"}
-        base_info["base"] = "H5T_STD_I8" + byteorder
-        type_info["base"] = base_info
-    elif dt.kind == "f":
-        # floating point type
-        type_info["class"] = "H5T_FLOAT"
-        byteorder = "LE"
-        if dt.byteorder == ">":
-            byteorder = "BE"
-        if dt.name in predefined_float_types:
-            # maps to one of the HDF5 predefined types
-            float_type = predefined_float_types[dt.base.name]
-            type_info["base"] = float_type + byteorder
-        else:
-            raise TypeError("Unexpected floating point type: " + dt.name)
-    elif dt.kind == "i" or dt.kind == "u":
-        # integer type
-
-        # assume LE unless the numpy byteorder is '>'
-        byteorder = "LE"
-        if dt.base.byteorder == ">":
-            byteorder = "BE"
-
-        # numpy integer type - but check to see if this is the hypy
-        # enum extension
-        if metadata and "enum" in metadata:
-            # yes, this is an enum!
-            mapping = metadata["enum"]
-            type_info["class"] = "H5T_ENUM"
-            type_info["mapping"] = mapping
-            if dt.name not in predefined_int_types:
-                raise TypeError("Unexpected integer type: " + dt.name)
-            # maps to one of the HDF5 predefined types
-            base_info = {"class": "H5T_INTEGER"}
-            base_info["base"] = predefined_int_types[dt.name] + byteorder
-            type_info["base"] = base_info
-        else:
-            type_info["class"] = "H5T_INTEGER"
-            base_name = dt.name
-
-            if dt.name not in predefined_int_types:
-                raise TypeError("Unexpected integer type: " + dt.name)
-
-            type_info["base"] = predefined_int_types[base_name] + byteorder
-
-    else:
-        # unexpected kind
-        raise TypeError(f"unexpected dtype kind: {dt.kind}")
-
-    return type_info
-
-
-def getItemSize(typeItem):
-    """
-    Get size of an item in bytes.
-        For variable length types (e.g. variable length strings),
-        return the string "H5T_VARIABLE"
-    """
-    # handle the case where we are passed a primitive type first
-    if isinstance(typeItem, str) or isinstance(typeItem, bytes):
-        for type_prefix in ("H5T_STD_I", "H5T_STD_U", "H5T_IEEE_F"):
-            if typeItem.startswith(type_prefix):
-                nlen = len(type_prefix)
-                num_bits = typeItem[nlen:]
-                if num_bits[-2:] in ("LE", "BE"):
-                    num_bits = num_bits[:-2]
-                try:
-                    return int(num_bits) // 8
-                except ValueError:
-                    raise TypeError("Invalid Type")
-        # none of the expect primative types mathched
-        raise TypeError("Invalid Type")
-    if not isinstance(typeItem, dict):
-        raise TypeError("invalid type")
-
-    item_size = 0
-    if "class" not in typeItem:
-        raise KeyError("'class' not provided")
-    typeClass = typeItem["class"]
-
-    if typeClass == "H5T_INTEGER":
-        if "base" not in typeItem:
-            raise KeyError("'base' not provided")
-        item_size = getItemSize(typeItem["base"])
-
-    elif typeClass == "H5T_FLOAT":
-        if "base" not in typeItem:
-            raise KeyError("'base' not provided")
-        item_size = getItemSize(typeItem["base"])
-
-    elif typeClass == "H5T_STRING":
-        if "length" not in typeItem:
-            raise KeyError("'length' not provided")
-        item_size = typeItem["length"]
-
-    elif typeClass == "H5T_VLEN":
-        item_size = "H5T_VARIABLE"
-    elif typeClass == "H5T_OPAQUE":
-        if "size" not in typeItem:
-            raise KeyError("'size' not provided")
-        item_size = int(typeItem["size"])
-
-    elif typeClass == "H5T_ARRAY":
-        if "dims" not in typeItem:
-            raise KeyError("'dims' must be provided for array types")
-        if "base" not in typeItem:
-            raise KeyError("'base' not provided")
-        item_size = getItemSize(typeItem["base"])
-
-    elif typeClass == "H5T_ENUM":
-        if "base" not in typeItem:
-            raise KeyError("'base' must be provided for enum types")
-        item_size = getItemSize(typeItem["base"])
-
-    elif typeClass == "H5T_REFERENCE":
-        if "length" in typeItem:
-            item_size = typeItem["length"]
-        elif "base" in typeItem and typeItem["base"] == "H5T_STD_REF_OBJ":
-            # obj ref values are in the form: "groups/<id>" or
-            # "datasets/<id>" or "datatypes/<id>"
-            item_size = 48
-        else:
-            raise KeyError("Unable to determine item size for reference type")
-    elif typeClass == "H5T_COMPOUND":
-        if "fields" not in typeItem:
-            raise KeyError("'fields' not provided for compound type")
-        fields = typeItem["fields"]
-        if not isinstance(fields, list):
-            raise TypeError("Type Error: expected list type for 'fields'")
-        if not fields:
-            raise KeyError("no 'field' elements provided")
-        # add up the size of each sub-field
-        for field in fields:
-            if not isinstance(field, dict):
-                raise TypeError("Expected dictionary type for field")
-            if "type" not in field:
-                raise KeyError("'type' missing from field")
-            subtype_size = getItemSize(field["type"])  # recursive call
-            if subtype_size == "H5T_VARIABLE":
-                item_size = "H5T_VARIABLE"
-                break  # don't need to look at the rest
-
-            item_size += subtype_size
-    else:
-        raise TypeError("Invalid type class")
-
-    # calculate array type
-    if "dims" in typeItem and isinstance(item_size, int):
-        dims = typeItem["dims"]
-        for dim in dims:
-            item_size *= dim
-
-    return item_size
-
-
-def getDtypeItemSize(dtype):
-    """ Return size of dtype in bytes
-        For variable length types (e.g. variable length strings),
-        return the string "H5T_VARIABLE
-    """
-    item_size = 0
-    if len(dtype) > 0:
-        # compound dtype
-        for i in range(len(dtype)):
-            sub_dt = dtype[i]
-            sub_dt_size = getDtypeItemSize(sub_dt)
-            if sub_dt_size == "H5T_VARIABLE":
-                item_size = "H5T_VARIABLE"  # return variable if any component is variable
-                break
-            item_size += sub_dt_size
-    else:
-        # primitive type
-        if dtype.metadata and "vlen" in dtype.metadata:
-            item_size = "H5T_VARIABLE"
-        else:
-            item_size = dtype.itemsize
-    return item_size
-
-
-def getNumpyTypename(hdf5TypeName, typeClass=None):
-    predefined_int_types = {
-        "H5T_STD_I8": "i1",
-        "H5T_STD_U8": "u1",
-        "H5T_STD_I16": "i2",
-        "H5T_STD_U16": "u2",
-        "H5T_STD_I32": "i4",
-        "H5T_STD_U32": "u4",
-        "H5T_STD_I64": "i8",
-        "H5T_STD_U64": "u8",
-    }
-    predefined_float_types = {
-        "H5T_IEEE_F16": "f2",
-        "H5T_IEEE_F32": "f4",
-        "H5T_IEEE_F64": "f8",
-    }
-
-    if len(hdf5TypeName) < 3:
-        raise Exception("Type Error: invalid typename: ")
-    endian = "<"  # default endian
-    key = hdf5TypeName
-    if hdf5TypeName.endswith("LE"):
-        key = hdf5TypeName[:-2]
-    elif hdf5TypeName.endswith("BE"):
-        key = hdf5TypeName[:-2]
-        endian = ">"
-
-    if key in predefined_int_types and (
-        typeClass is None or typeClass == "H5T_INTEGER"
-    ):
-        return endian + predefined_int_types[key]
-    if key in predefined_float_types and (
-        typeClass is None or typeClass == "H5T_FLOAT"
-    ):
-        return endian + predefined_float_types[key]
-    raise TypeError("Type Error: invalid type")
-
-
-def createBaseDataType(typeItem):
-    dtRet = None
-    if isinstance(typeItem, str):
-        # should be one of the predefined types
-        dtName = getNumpyTypename(typeItem)
-        dtRet = np.dtype(dtName)
-        return dtRet  # return predefined type
-
-    if not isinstance(typeItem, dict):
-        raise TypeError("Type Error: invalid type")
-
-    if "class" not in typeItem:
-        raise KeyError("'class' not provided")
-    typeClass = typeItem["class"]
-
-    dims = ""
-    if "dims" in typeItem:
-        if typeClass != "H5T_ARRAY":
-            raise TypeError("'dims' only supported for integer types")
-
-        dims = None
-        if isinstance(typeItem["dims"], int):
-            dims = typeItem["dims"]  # make into a tuple
-        elif not isinstance(typeItem["dims"], list) and not isinstance(
-            typeItem["dims"], tuple
-        ):
-            raise TypeError("expected list or integer for dims")
-        else:
-            dims = typeItem["dims"]
-        dims = str(tuple(dims))
-
-    if typeClass == "H5T_INTEGER":
-        if "base" not in typeItem:
-            raise KeyError("'base' not provided")
-        baseType = getNumpyTypename(typeItem["base"], typeClass="H5T_INTEGER")
-        dtRet = np.dtype(dims + baseType)
-    elif typeClass == "H5T_FLOAT":
-        if "base" not in typeItem:
-            raise KeyError("'base' not provided")
-        baseType = getNumpyTypename(typeItem["base"], typeClass="H5T_FLOAT")
-        dtRet = np.dtype(dims + baseType)
-    elif typeClass == "H5T_STRING":
-        if "length" not in typeItem:
-            raise KeyError("'length' not provided")
-        if "charSet" not in typeItem:
-            raise KeyError("'charSet' not provided")
-
-        if typeItem["length"] == "H5T_VARIABLE":
-            if dims:
-                msg = "ArrayType is not supported for variable len types"
-                raise TypeError(msg)
-            if typeItem["charSet"] == "H5T_CSET_ASCII":
-                dtRet = special_dtype(vlen=bytes)
-            elif typeItem["charSet"] == "H5T_CSET_UTF8":
-                dtRet = special_dtype(vlen=str)
-            else:
-                raise TypeError("unexpected 'charSet' value")
-        else:
-            nStrSize = typeItem["length"]
-            if not isinstance(nStrSize, int):
-                raise TypeError("expecting integer value for 'length'")
-            type_code = None
-            if typeItem["charSet"] == "H5T_CSET_ASCII":
-                type_code = "S"
-            elif typeItem["charSet"] == "H5T_CSET_UTF8":
-                # use the same type_code as ascii strings
-                # (othewise, numpy will reserve bytes for UTF32 representation)
-                type_code = "S"
-            else:
-                raise TypeError("unexpected 'charSet' value")
-            # a fixed size string
-            dtRet = np.dtype(dims + type_code + str(nStrSize))
-    elif typeClass == "H5T_VLEN":
-        if dims:
-            msg = "ArrayType is not supported for variable len types"
-            raise TypeError(msg)
-        if "base" not in typeItem:
-            raise KeyError("'base' not provided")
-        baseType = createBaseDataType(typeItem["base"])
-        dtRet = special_dtype(vlen=np.dtype(baseType))
-    elif typeClass == "H5T_OPAQUE":
-        if dims:
-            msg = "Opaque Type is not supported for variable len types"
-            raise TypeError(msg)
-        if "size" not in typeItem:
-            raise KeyError("'size' not provided")
-        nSize = int(typeItem["size"])
-        if nSize <= 0:
-            raise TypeError("'size' must be non-negative")
-        dtRet = np.dtype("V" + str(nSize))
-    elif typeClass == "H5T_ARRAY":
-        if not dims:
-            raise KeyError("'dims' must be provided for array types")
-        if "base" not in typeItem:
-            raise KeyError("'base' not provided")
-        arrayBaseType = typeItem["base"]
-        if isinstance(arrayBaseType, dict):
-            if "class" not in arrayBaseType:
-                raise KeyError("'class' not provided for array base type")
-            type_classes = ("H5T_INTEGER", "H5T_FLOAT", "H5T_STRING", "H5T_ARRAY")
-            if arrayBaseType["class"] not in type_classes:
-                msg = "Array Type base type must be integer, float, string, or array"
-                raise TypeError(msg)
-        baseType = createDataType(arrayBaseType)
-        metadata = None
-        if baseType.metadata:
-            metadata = dict(baseType.metadata)
-            dtRet = np.dtype(dims + baseType.str, metadata=metadata)
-        else:
-            dtRet = np.dtype(dims + baseType.str)
-        return dtRet  # return predefined type
-    elif typeClass == "H5T_REFERENCE":
-        if "base" not in typeItem:
-            raise KeyError("'base' not provided")
-        if typeItem["base"] == "H5T_STD_REF_OBJ":
-            dtRet = special_dtype(ref=Reference)
-        elif typeItem["base"] == "H5T_STD_REF_DSETREG":
-            dtRet = special_dtype(ref=RegionReference)
-        else:
-            raise TypeError("Invalid base type for reference type")
-
-    elif typeClass == "H5T_ENUM":
-        if "base" not in typeItem:
-            raise KeyError("Expected 'base' to be provided for enum type")
-        base_json = typeItem["base"]
-        if "class" not in base_json:
-            raise KeyError("Expected class field in base type")
-        if base_json["class"] != "H5T_INTEGER":
-            msg = "Only integer base types can be used with enum type"
-            raise TypeError(msg)
-        if "mapping" not in typeItem:
-            raise KeyError("'mapping' not provided for enum type")
-        mapping = typeItem["mapping"]
-        if len(mapping) == 0:
-            raise KeyError("empty enum map")
-
-        dt = createBaseDataType(base_json)
-        if all(
-            (
-                dt.kind == "i",
-                dt.name == "int8",
-                len(mapping) == 2,
-                "TRUE" in mapping,
-                "FALSE" in mapping,
-            )
-        ):
-            # convert to numpy boolean type
-            dtRet = np.dtype("bool")
-        else:
-            # not a boolean enum, use h5py special dtype
-            dtRet = special_dtype(enum=(dt, mapping))
-
-    else:
-        raise TypeError("Invalid type class")
-
-    return dtRet
-
-
-def createDataType(typeItem):
-    """
-    Create a numpy datatype given a json type
-    """
-    dtRet = None
-    if type(typeItem) in (str, bytes):
-        # should be one of the predefined types
-        dtName = getNumpyTypename(typeItem)
-        dtRet = np.dtype(dtName)
-        return dtRet  # return predefined type
-
-    if not isinstance(typeItem, dict):
-        raise TypeError("invalid type")
-
-    if "class" not in typeItem:
-        raise KeyError("'class' not provided")
-    typeClass = typeItem["class"]
-
-    if typeClass == "H5T_COMPOUND":
-        if "fields" not in typeItem:
-            raise KeyError("'fields' not provided for compound type")
-        fields = typeItem["fields"]
-        if type(fields) is not list:
-            raise TypeError("Type Error: expected list type for 'fields'")
-        if not fields:
-            raise KeyError("no 'field' elements provided")
-        subtypes = []
-        for field in fields:
-
-            if not isinstance(field, dict):
-                raise TypeError("Expected dictionary type for field")
-            if "name" not in field:
-                raise KeyError("'name' missing from field")
-            if "type" not in field:
-                raise KeyError("'type' missing from field")
-            field_name = field["name"]
-            if not isinstance(field_name, str):
-                raise TypeError("field names must be strings")
-            # verify the field name is ascii
-            try:
-                field_name.encode("ascii")
-            except UnicodeEncodeError:
-                raise TypeError("non-ascii field name not allowed")
-
-            dt = createDataType(field["type"])  # recursive call
-            if dt is None:
-                raise Exception("unexpected error")
-            subtypes.append((field["name"], dt))  # append tuple
-
-        dtRet = np.dtype(subtypes)
-    else:
-        dtRet = createBaseDataType(typeItem)  # create non-compound dt
-    return dtRet
-
-
-def validateTypeItem(typeItem):
-    """
-    Validate a json type - call createDataType and if no exception,
-       it's valid
-    """
-    createDataType(typeItem)
-    # throws KeyError, TypeError, or ValueError
-
-
-def getBaseTypeJson(type_name):
-    """
-    Return JSON representation of a predefined type string
-    """
-    predefined_int_types = (
-        "H5T_STD_I8",
-        "H5T_STD_U8",
-        "H5T_STD_I16",
-        "H5T_STD_U16",
-        "H5T_STD_I32",
-        "H5T_STD_U32",
-        "H5T_STD_I64",
-        "H5T_STD_U64",
-    )
-    predefined_float_types = ("H5T_IEEE_F16", "H5T_IEEE_F32", "H5T_IEEE_F64")
-    type_json = {}
-    # predefined typenames start with 'H5T' and end with "LE" or "BE"
-    if all(
-        (
-            type_name.startswith("H5T_"),
-            type_name[-1] == "E",
-            type_name[-2] in ("L", "B"),
-        )
-    ):
-        # trime of the "BE/"LE"
-        type_prefix = type_name[:-2]
-        if type_prefix in predefined_int_types:
-            type_json["class"] = "H5T_INTEGER"
-            type_json["base"] = type_name
-        elif type_prefix in predefined_float_types:
-            type_json["class"] = "H5T_FLOAT"
-            type_json["base"] = type_name
-        else:
-            raise TypeError("Invalid type name")
-    else:
-        raise TypeError("Invalid type name")
-    return type_json
-
-
-def getSubType(dt_parent, fields):
-    """ Return a dtype that is a compound type composed of
-        the fields given in the field_names list
-    """
-    if len(dt_parent) == 0:
-        raise TypeError("getSubType - parent must be compound type")
-    if not fields:
-        raise TypeError("null field specification")
-    if isinstance(fields, str):
-        fields = [fields,]  # convert to a list
-
-    field_names = set(dt_parent.names)
-    dt_items = []
-    for field in fields:
-        if field not in field_names:
-            raise TypeError(f"field: {field} is not defined in parent type")
-        dt_items.append((field, dt_parent[field]))
-    dt = np.dtype(dt_items)
-
-    return dt
diff --git a/pyproject.toml b/pyproject.toml
index af575c13..9b733a85 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,6 +42,7 @@ dependencies = [
     "bitshuffle >=0.5.2",
     "cryptography",
     "h5py >= 3.6.0",
+    "h5json",
     "importlib_resources",
     "numcodecs",
     "numpy >=2.0.0rc1; python_version>='3.9'",
diff --git a/testall.py b/testall.py
index 1e8ea348..5955553a 100755
--- a/testall.py
+++ b/testall.py
@@ -16,7 +16,7 @@
 PYTHON_CMD = "python"  # change to "python3" if "python" invokes python version 2.x
 
 unit_tests = ('array_util_test', 'chunk_util_test', 'compression_test', 'domain_util_test',
-              'dset_util_test', 'hdf5_dtype_test', 'id_util_test', 'lru_cache_test',
+              'dset_util_test', 'id_util_test', 'lru_cache_test',
               'shuffle_test', 'rangeget_util_test')
 
 integ_tests = ('uptest', 'setup_test', 'domain_test', 'group_test',
diff --git a/tests/integ/attr_test.py b/tests/integ/attr_test.py
index de54c5ea..b9f4dd7e 100644
--- a/tests/integ/attr_test.py
+++ b/tests/integ/attr_test.py
@@ -915,6 +915,7 @@ def testPutCommittedType(self):
             value.append(i * 0.5)
         payload = {"type": dtype_uuid, "shape": 10, "value": value}
         req = self.endpoint + "/groups/" + root_id + "/attributes/" + attr_name
+        print("req:", req)
         rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)  # create attribute
 
diff --git a/tests/integ/vlen_test.py b/tests/integ/vlen_test.py
index e45504e6..38555c5d 100755
--- a/tests/integ/vlen_test.py
+++ b/tests/integ/vlen_test.py
@@ -15,9 +15,8 @@
 import numpy as np
 import sys
 
-sys.path.append("../..")
-from hsds.util.arrayUtil import arrayToBytes, bytesToArray
-from hsds.util.hdf5dtype import createDataType
+from h5json.hdf5dtype import createDataType
+from h5json.array_util import arrayToBytes, bytesToArray
 
 
 class VlenTest(unittest.TestCase):
@@ -646,7 +645,12 @@ def testPutVLenCompoundBinary(self):
 
         # write as binary data
         data = arrayToBytes(arr)
+        print("data:", data)
+        for i in range(len(data)):
+            print(f"{i:04d}: {data[i]}")
         self.assertEqual(len(data), 192)  # will vary based on count
+        arr_copy = bytesToArray(data, dt_compound, (count,))
+        print("arr_copy:", arr_copy)
         req = self.endpoint + "/datasets/" + dset_uuid + "/value"
         rsp = self.session.put(req, data=data, headers=headers_bin_req)
         self.assertEqual(rsp.status_code, 200)
diff --git a/tests/unit/array_util_test.py b/tests/unit/array_util_test.py
index 1a4f40e5..854e1314 100644
--- a/tests/unit/array_util_test.py
+++ b/tests/unit/array_util_test.py
@@ -16,6 +16,10 @@
 import sys
 import base64
 
+from h5json.hdf5dtype import special_dtype
+from h5json.hdf5dtype import check_dtype
+from h5json.hdf5dtype import createDataType
+
 sys.path.append("../..")
 from hsds.util.arrayUtil import (
     bytesArrayToList,
@@ -30,9 +34,6 @@
     getNumpyValue,
     getBroadcastShape
 )
-from hsds.util.hdf5dtype import special_dtype
-from hsds.util.hdf5dtype import check_dtype
-from hsds.util.hdf5dtype import createDataType
 
 
 class ArrayUtilTest(unittest.TestCase):
@@ -401,6 +402,11 @@ def testToBytes(self):
 
         # convert back to array
         arr_copy = bytesToArray(buffer, dt, (5,))
+        print("arr_copy bytes:", arrayToBytes(arr_copy))
+        print("arr_copy:", arr_copy)
+        print("arr_copy dt:", arr_copy.dtype)
+        print("arr_copy metadata:", arr_copy.dtype.metadata)
+        print("arr_copy kind:", arr_copy.dtype.kind)
         self.assertTrue(ndarray_compare(arr, arr_copy))
         # VLEN of bytes
         dt = np.dtype("O", metadata={"vlen": bytes})
diff --git a/tests/unit/hdf5_dtype_test.py b/tests/unit/hdf5_dtype_test.py
deleted file mode 100755
index e51913a6..00000000
--- a/tests/unit/hdf5_dtype_test.py
+++ /dev/null
@@ -1,717 +0,0 @@
-##############################################################################
-# Copyright by The HDF Group.                                                #
-# All rights reserved.                                                       #
-#                                                                            #
-# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
-# Utilities.  The full HSDS copyright notice, including                      #
-# terms governing use, modification, and redistribution, is contained in     #
-# the file COPYING, which can be found at the root of the source code        #
-# distribution tree.  If you do not have access to this file, you may        #
-# request a copy from help@hdfgroup.org.                                     #
-##############################################################################
-import unittest
-import logging
-import numpy as np
-import sys
-
-sys.path.append("../..")
-from hsds.util import hdf5dtype
-from hsds.util.hdf5dtype import special_dtype
-from hsds.util.hdf5dtype import check_dtype
-from hsds.util.hdf5dtype import Reference
-from hsds.util.hdf5dtype import RegionReference
-
-
-class Hdf5dtypeTest(unittest.TestCase):
-    def __init__(self, *args, **kwargs):
-        super(Hdf5dtypeTest, self).__init__(*args, **kwargs)
-        # main
-        self.logger = logging.getLogger()
-        self.logger.setLevel(logging.INFO)
-
-    def testGetBaseTypeJson(self):
-        type_json = hdf5dtype.getBaseTypeJson("H5T_IEEE_F64LE")
-        self.assertTrue("class" in type_json)
-        self.assertEqual(type_json["class"], "H5T_FLOAT")
-        self.assertTrue("base" in type_json)
-        self.assertEqual(type_json["base"], "H5T_IEEE_F64LE")
-
-        type_json = hdf5dtype.getBaseTypeJson("H5T_IEEE_F16LE")
-        self.assertTrue("class" in type_json)
-        self.assertEqual(type_json["class"], "H5T_FLOAT")
-        self.assertTrue("base" in type_json)
-        self.assertEqual(type_json["base"], "H5T_IEEE_F16LE")
-
-        type_json = hdf5dtype.getBaseTypeJson("H5T_STD_I32LE")
-        self.assertTrue("class" in type_json)
-        self.assertEqual(type_json["class"], "H5T_INTEGER")
-        self.assertTrue("base" in type_json)
-        self.assertEqual(type_json["base"], "H5T_STD_I32LE")
-
-        try:
-            hdf5dtype.getBaseTypeJson("foobar")
-            self.assertTrue(False)
-        except TypeError:
-            pass  # expected
-
-    def testBaseIntegerTypeItem(self):
-        dt = np.dtype("<i1")
-        typeItem = hdf5dtype.getTypeItem(dt)
-        self.assertEqual(typeItem["class"], "H5T_INTEGER")
-        self.assertEqual(typeItem["base"], "H5T_STD_I8LE")
-        typeItem = hdf5dtype.getTypeResponse(typeItem)  # non-verbose format
-        self.assertEqual(typeItem["class"], "H5T_INTEGER")
-        self.assertEqual(typeItem["base"], "H5T_STD_I8LE")
-
-    def testBaseFloatTypeItem(self):
-        dt = np.dtype("<f8")
-        typeItem = hdf5dtype.getTypeItem(dt)
-        self.assertEqual(typeItem["class"], "H5T_FLOAT")
-        self.assertEqual(typeItem["base"], "H5T_IEEE_F64LE")
-        typeItem = hdf5dtype.getTypeResponse(typeItem)  # non-verbose format
-        self.assertEqual(typeItem["class"], "H5T_FLOAT")
-        self.assertEqual(typeItem["base"], "H5T_IEEE_F64LE")
-
-    def testBaseFloat16TypeItem(self):
-        dt = np.dtype("<f2")
-        typeItem = hdf5dtype.getTypeItem(dt)
-        self.assertEqual(typeItem["class"], "H5T_FLOAT")
-        self.assertEqual(typeItem["base"], "H5T_IEEE_F16LE")
-        typeItem = hdf5dtype.getTypeResponse(typeItem)  # non-verbose format
-        self.assertEqual(typeItem["class"], "H5T_FLOAT")
-        self.assertEqual(typeItem["base"], "H5T_IEEE_F16LE")
-
-    def testBaseStringTypeItem(self):
-        dt = np.dtype("S3")
-        typeItem = hdf5dtype.getTypeItem(dt)
-        self.assertEqual(typeItem["class"], "H5T_STRING")
-        self.assertEqual(typeItem["length"], 3)
-        self.assertEqual(typeItem["strPad"], "H5T_STR_NULLPAD")
-        self.assertEqual(typeItem["charSet"], "H5T_CSET_ASCII")
-
-    def testBaseStringUTFTypeItem(self):
-        dt = np.dtype("U3")
-        typeItem = hdf5dtype.getTypeItem(dt)
-        self.assertEqual(typeItem["class"], "H5T_STRING")
-        # type item length in bytes (may no actual be enough space for some UTF strings)
-        self.assertEqual(typeItem["length"], 12)
-        self.assertEqual(typeItem["strPad"], "H5T_STR_NULLPAD")
-        self.assertEqual(typeItem["charSet"], "H5T_CSET_UTF8")
-
-    def testBaseVLenAsciiTypeItem(self):
-        dt = special_dtype(vlen=bytes)
-        typeItem = hdf5dtype.getTypeItem(dt)
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeItem["class"], "H5T_STRING")
-        self.assertEqual(typeItem["length"], "H5T_VARIABLE")
-        self.assertEqual(typeItem["strPad"], "H5T_STR_NULLTERM")
-        self.assertEqual(typeItem["charSet"], "H5T_CSET_ASCII")
-        self.assertEqual(typeSize, "H5T_VARIABLE")
-
-    def testBaseVLenUnicodeTypeItem(self):
-        dt = special_dtype(vlen=str)
-        typeItem = hdf5dtype.getTypeItem(dt)
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeItem["class"], "H5T_STRING")
-        self.assertEqual(typeItem["length"], "H5T_VARIABLE")
-        self.assertEqual(typeItem["strPad"], "H5T_STR_NULLTERM")
-        self.assertEqual(typeItem["charSet"], "H5T_CSET_UTF8")
-        self.assertEqual(typeSize, "H5T_VARIABLE")
-
-    def testBaseEnumTypeItem(self):
-        mapping = {"RED": 0, "GREEN": 1, "BLUE": 2}
-        dt = special_dtype(enum=(np.int8, mapping))
-        typeItem = hdf5dtype.getTypeItem(dt)
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeItem["class"], "H5T_ENUM")
-        baseItem = typeItem["base"]
-        self.assertEqual(baseItem["class"], "H5T_INTEGER")
-        self.assertEqual(baseItem["base"], "H5T_STD_I8LE")
-        self.assertTrue("mapping" in typeItem)
-        self.assertEqual(typeItem["mapping"]["GREEN"], 1)
-        self.assertEqual(typeSize, 1)
-
-    def testBaseBoolTypeItem(self):
-        typeItem = hdf5dtype.getTypeItem(np.dtype("bool"))
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeItem["class"], "H5T_ENUM")
-        baseItem = typeItem["base"]
-        self.assertEqual(baseItem["class"], "H5T_INTEGER")
-        self.assertEqual(baseItem["base"], "H5T_STD_I8LE")
-        self.assertTrue("mapping" in typeItem)
-        mapping = typeItem["mapping"]
-        self.assertEqual(len(mapping), 2)
-        self.assertEqual(mapping["FALSE"], 0)
-        self.assertEqual(mapping["TRUE"], 1)
-        self.assertEqual(typeSize, 1)
-
-    def testBaseArrayTypeItem(self):
-        dt = np.dtype("(2,2)<int32")
-        typeItem = hdf5dtype.getTypeItem(dt)
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeItem["class"], "H5T_ARRAY")
-        baseItem = typeItem["base"]
-        self.assertEqual(baseItem["class"], "H5T_INTEGER")
-        self.assertEqual(baseItem["base"], "H5T_STD_I32LE")
-        self.assertEqual(typeSize, 16)
-
-    def testObjReferenceTypeItem(self):
-        dt = special_dtype(ref=Reference)
-        typeItem = hdf5dtype.getTypeItem(dt)
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeItem["class"], "H5T_REFERENCE")
-        self.assertEqual(typeItem["base"], "H5T_STD_REF_OBJ")
-        # length of obj id, e.g.:
-        # g-b2c9a750-a557-11e7-ab09-0242ac110009
-        self.assertEqual(typeSize, 48)
-
-    def testRegionReferenceTypeItem(self):
-        dt = special_dtype(ref=RegionReference)
-        typeItem = hdf5dtype.getTypeItem(dt)
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeSize, 48)
-        self.assertEqual(typeItem["class"], "H5T_REFERENCE")
-        # self.assertEqual(typeItem['base'], 'H5T_STD_REF_DSETREG')
-        # self.assertEqual(typeSize, 'H5T_VARIABLE')
-
-    def testCompoundArrayTypeItem(self):
-        dt = np.dtype([("a", "<i1"), ("b", "S1", (10,))])
-        typeItem = hdf5dtype.getTypeItem(dt)
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeItem["class"], "H5T_COMPOUND")
-        fields = typeItem["fields"]
-        field_a = fields[0]
-        self.assertEqual(field_a["name"], "a")
-        field_a_type = field_a["type"]
-        self.assertEqual(field_a_type["class"], "H5T_INTEGER")
-        self.assertEqual(field_a_type["base"], "H5T_STD_I8LE")
-        field_b = fields[1]
-        self.assertEqual(field_b["name"], "b")
-        field_b_type = field_b["type"]
-        self.assertEqual(field_b_type["class"], "H5T_ARRAY")
-        self.assertEqual(field_b_type["dims"], (10,))
-        field_b_basetype = field_b_type["base"]
-        self.assertEqual(field_b_basetype["class"], "H5T_STRING")
-        self.assertEqual(typeSize, 11)
-
-    def testEnumArrayTypeItem(self):
-        mapping = {"RED": 0, "GREEN": 1, "BLUE": 2}
-        dt_enum = special_dtype(enum=(np.int8, mapping))
-        typeItem = hdf5dtype.getTypeItem(dt_enum)
-        dt_array = np.dtype("(2,3)" + dt_enum.str, metadata=dict(dt_enum.metadata))
-
-        typeItem = hdf5dtype.getTypeItem(dt_array)
-
-        self.assertEqual(typeItem["class"], "H5T_ARRAY")
-        self.assertTrue("dims" in typeItem)
-        self.assertEqual(typeItem["dims"], (2, 3))
-        baseItem = typeItem["base"]
-        self.assertEqual(baseItem["class"], "H5T_ENUM")
-        self.assertTrue("mapping" in baseItem)
-        self.assertEqual(baseItem["mapping"]["GREEN"], 1)
-        self.assertTrue("base" in baseItem)
-        basePrim = baseItem["base"]
-        self.assertEqual(basePrim["class"], "H5T_INTEGER")
-        self.assertEqual(basePrim["base"], "H5T_STD_I8LE")
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeSize, 6)  # one-byte for base enum type * shape of (2,3)
-
-    def testCompoundArrayVlenIntTypeItem(self):
-        dt_vlen = special_dtype(vlen=np.int32)
-        dt_arr = np.dtype((dt_vlen, (4,)))
-        dt_compound = np.dtype(
-            [("VALUE1", np.float64), ("VALUE2", np.int64), ("VALUE3", dt_arr)]
-        )
-        typeItem = hdf5dtype.getTypeItem(dt_compound)
-
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeSize, "H5T_VARIABLE")
-        self.assertEqual(typeItem["class"], "H5T_COMPOUND")
-        fields = typeItem["fields"]
-        field_a = fields[0]
-        self.assertEqual(field_a["name"], "VALUE1")
-        field_a_type = field_a["type"]
-        self.assertEqual(field_a_type["class"], "H5T_FLOAT")
-        self.assertEqual(field_a_type["base"], "H5T_IEEE_F64LE")
-        field_b = fields[1]
-        self.assertEqual(field_b["name"], "VALUE2")
-        field_b_type = field_b["type"]
-        self.assertEqual(field_b_type["class"], "H5T_INTEGER")
-        self.assertEqual(field_b_type["base"], "H5T_STD_I64LE")
-        field_c = fields[2]
-        field_c_type = field_c["type"]
-        self.assertEqual(field_c_type["class"], "H5T_ARRAY")
-        self.assertEqual(field_c_type["dims"], (4,))
-        field_c_base_type = field_c_type["base"]
-        self.assertEqual(field_c_base_type["class"], "H5T_VLEN")
-        self.assertEqual(field_c_base_type["size"], "H5T_VARIABLE")
-        field_c_base_base_type = field_c_base_type["base"]
-        self.assertEqual(field_c_base_base_type["class"], "H5T_INTEGER")
-        self.assertEqual(field_c_base_base_type["base"], "H5T_STD_I32LE")
-
-    def testCompoundArrayVlenStringTypeItem(self):
-        dt_vlen = special_dtype(vlen=bytes)
-        dt_arr = np.dtype((dt_vlen, (4,)))
-        dt_compound = np.dtype(
-            [("VALUE1", np.float64), ("VALUE2", np.int64), ("VALUE3", dt_arr)]
-        )
-        typeItem = hdf5dtype.getTypeItem(dt_compound)
-
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeSize, "H5T_VARIABLE")
-        self.assertEqual(typeItem["class"], "H5T_COMPOUND")
-        fields = typeItem["fields"]
-        field_a = fields[0]
-        self.assertEqual(field_a["name"], "VALUE1")
-        field_a_type = field_a["type"]
-        self.assertEqual(field_a_type["class"], "H5T_FLOAT")
-        self.assertEqual(field_a_type["base"], "H5T_IEEE_F64LE")
-        field_b = fields[1]
-        self.assertEqual(field_b["name"], "VALUE2")
-        field_b_type = field_b["type"]
-        self.assertEqual(field_b_type["class"], "H5T_INTEGER")
-        self.assertEqual(field_b_type["base"], "H5T_STD_I64LE")
-        field_c = fields[2]
-        field_c_type = field_c["type"]
-
-        self.assertEqual(field_c_type["class"], "H5T_ARRAY")
-        self.assertEqual(field_c_type["dims"], (4,))
-        field_c_base_type = field_c_type["base"]
-        self.assertEqual(field_c_base_type["class"], "H5T_STRING")
-        self.assertEqual(field_c_base_type["length"], "H5T_VARIABLE")
-        self.assertEqual(field_c_base_type["charSet"], "H5T_CSET_ASCII")
-
-    def testOpaqueTypeItem(self):
-        dt = np.dtype("V200")
-        typeItem = hdf5dtype.getTypeItem(dt)
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeItem["class"], "H5T_OPAQUE")
-        self.assertTrue("base" not in typeItem)
-        self.assertEqual(typeSize, 200)
-
-    def testVlenDataItem(self):
-        dt = special_dtype(vlen=np.dtype("int32"))
-        typeItem = hdf5dtype.getTypeItem(dt)
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeItem["class"], "H5T_VLEN")
-        self.assertEqual(typeItem["size"], "H5T_VARIABLE")
-        baseItem = typeItem["base"]
-        self.assertEqual(baseItem["base"], "H5T_STD_I32LE")
-        self.assertEqual(typeSize, "H5T_VARIABLE")
-
-    def testCompoundTypeItem(self):
-        dt = np.dtype(
-            [("temp", np.float32), ("pressure", np.float32), ("wind", np.int16)]
-        )
-        typeItem = hdf5dtype.getTypeItem(dt)
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeItem["class"], "H5T_COMPOUND")
-        self.assertTrue("fields" in typeItem)
-        fields = typeItem["fields"]
-        self.assertEqual(len(fields), 3)
-        tempField = fields[0]
-        self.assertEqual(tempField["name"], "temp")
-        self.assertTrue("type" in tempField)
-        tempFieldType = tempField["type"]
-        self.assertEqual(tempFieldType["class"], "H5T_FLOAT")
-        self.assertEqual(tempFieldType["base"], "H5T_IEEE_F32LE")
-        self.assertEqual(typeSize, 10)
-
-        typeItem = hdf5dtype.getTypeResponse(typeItem)  # non-verbose format
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeItem["class"], "H5T_COMPOUND")
-        self.assertTrue("fields" in typeItem)
-        fields = typeItem["fields"]
-        self.assertEqual(len(fields), 3)
-        tempField = fields[0]
-        self.assertEqual(tempField["name"], "temp")
-        self.assertTrue("type" in tempField)
-        tempFieldType = tempField["type"]
-        self.assertEqual(tempFieldType["class"], "H5T_FLOAT")
-        self.assertEqual(tempFieldType["base"], "H5T_IEEE_F32LE")
-        self.assertEqual(typeSize, 10)
-
-    def testCompoundofCompoundTypeItem(self):
-        dt1 = np.dtype([("x", np.float32), ("y", np.float32)])
-        dt2 = np.dtype([("a", np.float32), ("b", np.float32), ("c", np.float32)])
-        dt = np.dtype([("field1", dt1), ("field2", dt2)])
-        typeItem = hdf5dtype.getTypeItem(dt)
-
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeSize, 20)
-        self.assertEqual(typeItem["class"], "H5T_COMPOUND")
-        self.assertTrue("fields" in typeItem)
-        fields = typeItem["fields"]
-        self.assertEqual(len(fields), 2)
-        field1 = fields[0]
-
-        self.assertEqual(field1["name"], "field1")
-        field1_type = field1["type"]
-        self.assertEqual(field1_type["class"], "H5T_COMPOUND")
-        field2 = fields[1]
-
-        self.assertEqual(field2["name"], "field2")
-        field2_type = field2["type"]
-        self.assertEqual(field2_type["class"], "H5T_COMPOUND")
-
-    def testCreateBaseType(self):
-        dt = hdf5dtype.createDataType("H5T_STD_U32BE")
-        self.assertEqual(dt.name, "uint32")
-        self.assertEqual(dt.byteorder, ">")
-        self.assertEqual(dt.kind, "u")
-
-        dt = hdf5dtype.createDataType("H5T_STD_I16LE")
-        self.assertEqual(dt.name, "int16")
-        self.assertEqual(dt.kind, "i")
-
-        dt = hdf5dtype.createDataType("H5T_IEEE_F64LE")
-        self.assertEqual(dt.name, "float64")
-        self.assertEqual(dt.kind, "f")
-
-        dt = hdf5dtype.createDataType("H5T_IEEE_F32LE")
-        self.assertEqual(dt.name, "float32")
-        self.assertEqual(dt.kind, "f")
-
-        typeItem = {"class": "H5T_INTEGER", "base": "H5T_STD_I32BE"}
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        dt = hdf5dtype.createDataType(typeItem)
-        self.assertEqual(dt.name, "int32")
-        self.assertEqual(dt.kind, "i")
-        self.assertEqual(typeSize, 4)
-
-    def testCreateBaseStringType(self):
-        typeItem = {"class": "H5T_STRING", "charSet": "H5T_CSET_ASCII", "length": 6}
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        dt = hdf5dtype.createDataType(typeItem)
-        self.assertEqual(dt.name, "bytes48")
-        self.assertEqual(dt.kind, "S")
-        self.assertEqual(typeSize, 6)
-
-    def testCreateBaseUnicodeType(self):
-        typeItem = {"class": "H5T_STRING", "charSet": "H5T_CSET_UTF8", "length": 6}
-
-        dt = hdf5dtype.createDataType(typeItem)
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertTrue(dt is not None)
-        self.assertEqual(dt.name, "bytes48")
-        self.assertEqual(dt.kind, "S")  # uses byte
-        self.assertEqual(typeSize, 6)
-
-    def testCreateNullTermStringType(self):
-        typeItem = {
-            "class": "H5T_STRING",
-            "charSet": "H5T_CSET_ASCII",
-            "length": 6,
-            "strPad": "H5T_STR_NULLTERM",
-        }
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        dt = hdf5dtype.createDataType(typeItem)
-
-        self.assertEqual(dt.name, "bytes48")
-        self.assertEqual(dt.kind, "S")
-        self.assertEqual(typeSize, 6)
-
-    def testCreateVLenStringType(self):
-        typeItem = {
-            "class": "H5T_STRING",
-            "charSet": "H5T_CSET_ASCII",
-            "length": "H5T_VARIABLE",
-        }
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        dt = hdf5dtype.createDataType(typeItem)
-        self.assertEqual(dt.name, "object")
-        self.assertEqual(dt.kind, "O")
-        self.assertEqual(check_dtype(vlen=dt), bytes)
-        self.assertEqual(typeSize, "H5T_VARIABLE")
-
-    def testCreateVLenUTF8Type(self):
-        typeItem = {
-            "class": "H5T_STRING",
-            "charSet": "H5T_CSET_UTF8",
-            "length": "H5T_VARIABLE",
-        }
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        dt = hdf5dtype.createDataType(typeItem)
-        self.assertEqual(dt.name, "object")
-        self.assertEqual(dt.kind, "O")
-        self.assertEqual(check_dtype(vlen=dt), str)
-        self.assertEqual(typeSize, "H5T_VARIABLE")
-
-    def testCreateVLenDataType(self):
-        typeItem = {"class": "H5T_VLEN", "base": "H5T_STD_I32BE"}
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeSize, "H5T_VARIABLE")
-        dt = hdf5dtype.createDataType(typeItem)
-        self.assertEqual(dt.name, "object")
-        self.assertEqual(dt.kind, "O")
-
-    def testCreateOpaqueType(self):
-        typeItem = {"class": "H5T_OPAQUE", "size": 200}
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        dt = hdf5dtype.createDataType(typeItem)
-        self.assertEqual(dt.name, "void1600")
-        self.assertEqual(dt.kind, "V")
-        self.assertEqual(typeSize, 200)
-
-    def testCreateEnumType(self):
-        typeItem = {
-            "class": "H5T_ENUM",
-            "base": {"base": "H5T_STD_I16LE", "class": "H5T_INTEGER"},
-            "mapping": {"GAS": 2, "LIQUID": 1, "PLASMA": 3, "SOLID": 0},
-        }
-
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeSize, 2)
-        dt = hdf5dtype.createDataType(typeItem)
-        self.assertEqual(dt.name, "int16")
-        self.assertEqual(dt.kind, "i")
-        mapping = check_dtype(enum=dt)
-        self.assertTrue(isinstance(mapping, dict))
-        self.assertEqual(mapping["SOLID"], 0)
-        self.assertEqual(mapping["LIQUID"], 1)
-        self.assertEqual(mapping["GAS"], 2)
-        self.assertEqual(mapping["PLASMA"], 3)
-
-    def testCreateBoolType(self):
-        typeItem = {
-            "class": "H5T_ENUM",
-            "base": {"base": "H5T_STD_I8LE", "class": "H5T_INTEGER"},
-            "mapping": {"TRUE": 1, "FALSE": 0},
-        }
-
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeSize, 1)
-        dt = hdf5dtype.createDataType(typeItem)
-        self.assertEqual(dt.name, "bool")
-        self.assertEqual(dt.kind, "b")
-        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
-
-    def testCreateCompoundType(self):
-        typeItem = {
-            "class": "H5T_COMPOUND",
-            "fields": [
-                {"name": "temp", "type": "H5T_IEEE_F32LE"},
-                {"name": "pressure", "type": "H5T_IEEE_F32LE"},
-                {
-                    "name": "location",
-                    "type": {
-                        "length": "H5T_VARIABLE",
-                        "charSet": "H5T_CSET_ASCII",
-                        "class": "H5T_STRING",
-                        "strPad": "H5T_STR_NULLTERM",
-                    },
-                },
-                {"name": "wind", "type": "H5T_STD_I16LE"},
-            ],
-        }
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        dt = hdf5dtype.createDataType(typeItem)
-        self.assertEqual(dt.name, "void144")
-        self.assertEqual(dt.kind, "V")
-        self.assertEqual(len(dt.fields), 4)
-        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
-
-        dtLocation = dt[2]
-        self.assertEqual(dtLocation.name, "object")
-        self.assertEqual(dtLocation.kind, "O")
-        self.assertEqual(check_dtype(vlen=dtLocation), bytes)
-        self.assertEqual(typeSize, "H5T_VARIABLE")
-        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dtLocation))
-
-    def testCreateCompoundInvalidFieldName(self):
-        typeItem = {
-            "class": "H5T_COMPOUND",
-            "fields": [
-                {
-                    "name": "\u03b1",
-                    "type": {"base": "H5T_STD_I32LE", "class": "H5T_INTEGER"},
-                },
-                {
-                    "name": "\u03c9",
-                    "type": {"base": "H5T_STD_I32LE", "class": "H5T_INTEGER"},
-                },
-            ],
-        }
-        try:
-            hdf5dtype.createDataType(typeItem)
-            self.assertTrue(False)
-        except TypeError:
-            pass  # expected
-
-    def testCreateCompoundOfCompoundType(self):
-        typeItem = {
-            "class": "H5T_COMPOUND",
-            "fields": [
-                {
-                    "name": "field1",
-                    "type": {
-                        "class": "H5T_COMPOUND",
-                        "fields": [
-                            {
-                                "name": "x",
-                                "type": {
-                                    "class": "H5T_FLOAT",
-                                    "base": "H5T_IEEE_F32LE",
-                                },
-                            },
-                            {
-                                "name": "y",
-                                "type": {
-                                    "class": "H5T_FLOAT",
-                                    "base": "H5T_IEEE_F32LE",
-                                },
-                            },
-                        ],
-                    },
-                },
-                {
-                    "name": "field2",
-                    "type": {
-                        "class": "H5T_COMPOUND",
-                        "fields": [
-                            {
-                                "name": "a",
-                                "type": {
-                                    "class": "H5T_FLOAT",
-                                    "base": "H5T_IEEE_F32LE",
-                                },
-                            },
-                            {
-                                "name": "b",
-                                "type": {
-                                    "class": "H5T_FLOAT",
-                                    "base": "H5T_IEEE_F32LE",
-                                },
-                            },
-                            {
-                                "name": "c",
-                                "type": {
-                                    "class": "H5T_FLOAT",
-                                    "base": "H5T_IEEE_F32LE",
-                                },
-                            },
-                        ],
-                    },
-                },
-            ],
-        }
-        dt = hdf5dtype.createDataType(typeItem)
-        self.assertEqual(dt.name, "void160")
-        self.assertEqual(dt.kind, "V")
-        self.assertEqual(len(dt.fields), 2)
-        dt_field1 = dt[0]
-        self.assertEqual(dt_field1.name, "void64")
-        self.assertEqual(dt_field1.kind, "V")
-        self.assertEqual(len(dt_field1.fields), 2)
-        dt_field2 = dt[1]
-        self.assertEqual(dt_field2.name, "void96")
-        self.assertEqual(dt_field2.kind, "V")
-        self.assertEqual(len(dt_field2.fields), 3)
-
-    def testCreateCompoundTypeUnicodeFields(self):
-        typeItem = {
-            "class": "H5T_COMPOUND",
-            "fields": [
-                {"name": u"temp", "type": "H5T_IEEE_F32LE"},
-                {"name": u"pressure", "type": "H5T_IEEE_F32LE"},
-                {"name": u"wind", "type": "H5T_STD_I16LE"},
-            ],
-        }
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        dt = hdf5dtype.createDataType(typeItem)
-        self.assertEqual(dt.name, "void80")
-        self.assertEqual(dt.kind, "V")
-        self.assertEqual(len(dt.fields), 3)
-        self.assertEqual(typeSize, 10)
-        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
-
-    def testCreateArrayType(self):
-        typeItem = {"class": "H5T_ARRAY", "base": "H5T_STD_I64LE", "dims": (3, 5)}
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        dt = hdf5dtype.createDataType(typeItem)
-        self.assertEqual(dt.name, "void960")
-        self.assertEqual(dt.kind, "V")
-        self.assertEqual(typeSize, 120)
-        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
-
-    def testCreateArrayIntegerType(self):
-        typeItem = {"class": "H5T_INTEGER", "base": "H5T_STD_I64LE", "dims": (3, 5)}
-
-        try:
-            hdf5dtype.createDataType(typeItem)
-            self.assertTrue(False)  # expected exception - dims used with non-array type
-        except TypeError:
-            pass  # should get exception
-
-    def testCreateCompoundArrayType(self):
-        typeItem = {
-            "class": "H5T_COMPOUND",
-            "fields": [
-                {"type": {"base": "H5T_STD_I8LE", "class": "H5T_INTEGER"}, "name": "a"},
-                {
-                    "type": {
-                        "dims": [10],
-                        "base": {
-                            "length": 1,
-                            "charSet": "H5T_CSET_ASCII",
-                            "class": "H5T_STRING",
-                            "strPad": "H5T_STR_NULLPAD",
-                        },
-                        "class": "H5T_ARRAY",
-                    },
-                    "name": "b",
-                },
-            ],
-        }
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        dt = hdf5dtype.createDataType(typeItem)
-        self.assertEqual(len(dt.fields), 2)
-        self.assertTrue("a" in dt.fields.keys())
-        self.assertTrue("b" in dt.fields.keys())
-        self.assertEqual(typeSize, 11)
-        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
-
-    def testCompoundArrayType(self):
-        typeItem = {
-            "class": "H5T_COMPOUND",
-            "fields": [
-                {
-                    "type": {"class": "H5T_INTEGER", "base": "H5T_STD_U64BE"},
-                    "name": "VALUE1",
-                },
-                {
-                    "type": {"class": "H5T_FLOAT", "base": "H5T_IEEE_F64BE"},
-                    "name": "VALUE2",
-                },
-                {
-                    "type": {
-                        "class": "H5T_ARRAY",
-                        "dims": [2],
-                        "base": {
-                            "class": "H5T_STRING",
-                            "charSet": "H5T_CSET_ASCII",
-                            "strPad": "H5T_STR_NULLTERM",
-                            "length": "H5T_VARIABLE",
-                        },
-                    },
-                    "name": "VALUE3",
-                },
-            ],
-        }
-        dt = hdf5dtype.createDataType(typeItem)
-        typeSize = hdf5dtype.getItemSize(typeItem)
-        self.assertEqual(typeSize, "H5T_VARIABLE")
-        self.assertEqual(len(dt), 3)
-        self.assertTrue("VALUE1" in dt.fields.keys())
-        self.assertTrue("VALUE2" in dt.fields.keys())
-        self.assertTrue("VALUE3" in dt.fields.keys())
-        self.assertEqual(typeSize, hdf5dtype.getDtypeItemSize(dt))
-
-        dt3 = dt["VALUE3"]
-        self.assertEqual(check_dtype(vlen=dt3), bytes)
-
-
-if __name__ == "__main__":
-    # setup test files
-
-    unittest.main()

From abb5d0c2fd3aab428b0418a18f26982fd4cb0c8d Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 14 Apr 2025 17:59:27 +0200
Subject: [PATCH 02/49] temp use of github branch for h5json ref

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9b733a85..33ab5dd5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,7 +42,7 @@ dependencies = [
     "bitshuffle >=0.5.2",
     "cryptography",
     "h5py >= 3.6.0",
-    "h5json",
+    "h5json@git+https://github.com/HDFGroup/hdf5-json@abstract",
     "importlib_resources",
     "numcodecs",
     "numpy >=2.0.0rc1; python_version>='3.9'",

From ed44afabfbb8aee373f144cafd9dfedbf3ac5c32 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 14 Apr 2025 18:03:13 +0200
Subject: [PATCH 03/49] remove array_util test

---
 testall.py                    |    2 +-
 tests/unit/array_util_test.py | 1031 ---------------------------------
 2 files changed, 1 insertion(+), 1032 deletions(-)
 delete mode 100644 tests/unit/array_util_test.py

diff --git a/testall.py b/testall.py
index 5955553a..247d4a91 100755
--- a/testall.py
+++ b/testall.py
@@ -15,7 +15,7 @@
 
 PYTHON_CMD = "python"  # change to "python3" if "python" invokes python version 2.x
 
-unit_tests = ('array_util_test', 'chunk_util_test', 'compression_test', 'domain_util_test',
+unit_tests = ('chunk_util_test', 'compression_test', 'domain_util_test',
               'dset_util_test', 'id_util_test', 'lru_cache_test',
               'shuffle_test', 'rangeget_util_test')
 
diff --git a/tests/unit/array_util_test.py b/tests/unit/array_util_test.py
deleted file mode 100644
index 854e1314..00000000
--- a/tests/unit/array_util_test.py
+++ /dev/null
@@ -1,1031 +0,0 @@
-##############################################################################
-# Copyright by The HDF Group.                                                #
-# All rights reserved.                                                       #
-#                                                                            #
-# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
-# Utilities.  The full HSDS copyright notice, including                      #
-# terms governing use, modification, and redistribution, is contained in     #
-# the file COPYING, which can be found at the root of the source code        #
-# distribution tree.  If you do not have access to this file, you may        #
-# request a copy from help@hdfgroup.org.                                     #
-##############################################################################
-import unittest
-import json
-import numpy as np
-
-import sys
-import base64
-
-from h5json.hdf5dtype import special_dtype
-from h5json.hdf5dtype import check_dtype
-from h5json.hdf5dtype import createDataType
-
-sys.path.append("../..")
-from hsds.util.arrayUtil import (
-    bytesArrayToList,
-    toTuple,
-    getNumElements,
-    jsonToArray,
-    arrayToBytes,
-    bytesToArray,
-    getByteArraySize,
-    IndexIterator,
-    ndarray_compare,
-    getNumpyValue,
-    getBroadcastShape
-)
-
-
-class ArrayUtilTest(unittest.TestCase):
-    def __init__(self, *args, **kwargs):
-        super(ArrayUtilTest, self).__init__(*args, **kwargs)
-        # main
-
-    def testByteArrayToList(self):
-        data_items = (
-            42,
-            "foo",
-            b"foo",
-            [1, 2, 3],
-            (1, 2, 3),
-            ["A", "B", "C"],
-            [b"A", b"B", b"C"],
-            [["A", "B"], [b"a", b"b", b"c"]],
-        )
-        for data in data_items:
-            json_data = bytesArrayToList(data)
-            # will throw TypeError if not able to convert
-            json.dumps(json_data)
-
-    def testToTuple(self):
-        data0d = 42  # scalar
-        data1d1 = [1]  # one dimensional, one element list
-        data1d = [1, 2, 3, 4, 5]  # list
-        data2d1 = [
-            [1, 2],
-        ]  # two dimensional, one element
-        data2d = [[1, 0.1], [2, 0.2], [3, 0.3], [4, 0.4]]  # list of two-element lists
-        data3d = [[[0, 0.0], [1, 0.1]], [[2, 0.2], [3, 0.3]]]  # list of list of lists
-        out = toTuple(0, data0d)
-        self.assertEqual(data0d, out)
-        out = toTuple(1, data1d1)
-        self.assertEqual(data1d1, out)
-        out = toTuple(1, data1d)
-        self.assertEqual(data1d, out)
-        out = toTuple(2, data2d)
-        self.assertEqual(data2d, out)
-        out = toTuple(1, data2d1)
-        self.assertEqual([(1, 2)], out)
-        out = toTuple(3, data3d)
-        self.assertEqual(data3d, out)
-        out = toTuple(1, data2d)  # treat input as 1d array of two-field compound types
-        self.assertEqual([(1, 0.1), (2, 0.2), (3, 0.3), (4, 0.4)], out)
-        out = toTuple(2, data3d)  # treat input as 2d array of two-field compound types
-        self.assertEqual([[(0, 0.0), (1, 0.1)], [(2, 0.2), (3, 0.3)]], out)
-        out = toTuple(1, data3d)  # treat input a 1d array of compound type of compound types
-        self.assertEqual([((0, 0.0), (1, 0.1)), ((2, 0.2), (3, 0.3))], out)
-
-    def testGetNumElements(self):
-        shape = (4,)
-        nelements = getNumElements(shape)
-        self.assertEqual(nelements, 4)
-
-        shape = [10,]
-        nelements = getNumElements(shape)
-        self.assertEqual(nelements, 10)
-
-        shape = (10, 8)
-        nelements = getNumElements(shape)
-        self.assertEqual(nelements, 80)
-
-    def testJsonToArray(self):
-        dt = np.dtype("i4")
-        shape = [4, ]
-        data = [0, 2, 4, 6]
-        out = jsonToArray(shape, dt, data)
-
-        self.assertTrue(isinstance(out, np.ndarray))
-        self.assertEqual(out.shape, (4,))
-        for i in range(4):
-            self.assertEqual(out[i], i * 2)
-
-        # compound type
-        dt = np.dtype([("a", "i4"), ("b", "S5")])
-        shape = [2, ]
-        data = [[4, "four"], [5, "five"]]
-        out = jsonToArray(shape, dt, data)
-        self.assertTrue(isinstance(out, np.ndarray))
-
-        self.assertEqual(out.shape, (2,))
-        self.assertTrue(isinstance(out[0], np.void))
-        e0 = out[0].tolist()
-        self.assertEqual(e0, (4, b"four"))
-        self.assertTrue(isinstance(out[1], np.void))
-        e1 = out[1].tolist()
-        self.assertEqual(e1, (5, b"five"))
-
-        shape = [1, ]
-        data = [
-            [6, "six"],
-        ]
-        out = jsonToArray(shape, dt, data)
-        e0 = out[0].tolist()
-        self.assertEqual(e0, (6, b"six"))
-
-        data = [6, "six"]
-        out = jsonToArray(shape, dt, data)
-        e0 = out[0].tolist()
-        self.assertEqual(e0, (6, b"six"))
-
-        # test ascii chars >127
-        dt = np.dtype("S26")
-        data = "extended ascii char 241: " + chr(241)
-        out = jsonToArray(shape, dt, data)
-        self.assertEqual(out[0], b'extended ascii char 241: \xc3')
-
-        dt = np.dtype("S12")
-        data = "eight: \u516b"
-        out = jsonToArray(shape, dt, data)
-        self.assertEqual(out[0], b'eight: \xe5\x85\xab')
-
-        # VLEN ascii
-        dt = special_dtype(vlen=bytes)
-        data = [b"one", b"two", b"three", b"four", b"five"]
-        shape = [5, ]
-        out = jsonToArray(shape, dt, data)
-        self.assertTrue("vlen" in out.dtype.metadata)
-        self.assertEqual(out.dtype.metadata["vlen"], bytes)
-        self.assertEqual(out.dtype.kind, "O")
-        self.assertEqual(out.shape, (5,))
-        # TBD: code does not actually enforce use of bytes vs. str,
-        #  probably not worth the effort to fix
-        self.assertEqual(out[2], b"three")
-        self.assertEqual(out[3], b"four")
-
-        # VLEN str
-        dt = special_dtype(vlen=str)
-        data = [
-            [b"part 1 - section A", b"part 1 - section B"],
-            [b"part 2 - section A", b"part 2 - section B"],
-        ]
-        shape = [2,]
-        out = jsonToArray(shape, dt, data)
-        self.assertTrue("vlen" in out.dtype.metadata)
-        self.assertEqual(out.dtype.metadata["vlen"], str)
-        self.assertEqual(out.dtype.kind, "O")
-        self.assertEqual(out.shape, (2,))
-        self.assertEqual(out[0], tuple(data[0]))
-        self.assertEqual(out[1], tuple(data[1]))
-
-        # VLEN Scalar str
-        dt = special_dtype(vlen=str)
-        data = "I'm a string!"
-        shape = [1, ]
-        out = jsonToArray(shape, dt, data)
-
-        # VLEN unicode
-        dt = special_dtype(vlen=bytes)
-        data = ["one", "two", "three", "four", "five"]
-        shape = [5, ]
-        out = jsonToArray(shape, dt, data)
-        self.assertTrue("vlen" in out.dtype.metadata)
-        self.assertEqual(out.dtype.metadata["vlen"], bytes)
-        self.assertEqual(out.dtype.kind, "O")
-        self.assertEqual(out[2], b"three")
-
-        # VLEN data
-        dt = special_dtype(vlen=np.dtype("int32"))
-        shape = [4, ]
-        data = [
-            [1,],
-            [1, 2],
-            [1, 2, 3],
-            [1, 2, 3, 4],
-        ]
-        out = jsonToArray(shape, dt, data)
-        self.assertTrue(isinstance(out, np.ndarray))
-        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("int32"))
-
-        self.assertEqual(out.shape, (4,))
-        self.assertEqual(out.dtype.kind, "O")
-        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("int32"))
-        for i in range(4):
-            e = out[i]  # .tolist()
-            self.assertTrue(isinstance(e, tuple))
-            self.assertEqual(e, tuple(range(1, i + 2)))
-
-        # VLEN 2D data
-        dt = special_dtype(vlen=np.dtype("int32"))
-        shape = [2, 2]
-        data = [
-            [
-                [0,],
-                [1, 2],
-            ],
-            [
-                [1,],
-                [2, 3],
-            ],
-        ]
-        out = jsonToArray(shape, dt, data)
-        self.assertTrue(isinstance(out, np.ndarray))
-        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("int32"))
-
-        self.assertEqual(out.shape, (2, 2))
-        self.assertEqual(out.dtype.kind, "O")
-        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("int32"))
-        for i in range(2):
-            for j in range(2):
-                e = out[i, j]  # .tolist()
-                self.assertTrue(isinstance(e, tuple))
-
-        # create VLEN of obj ref's
-        ref_type = {"class": "H5T_REFERENCE", "base": "H5T_STD_REF_OBJ"}
-        vlen_type = {"class": "H5T_VLEN", "base": ref_type}
-        dt = createDataType(vlen_type)  # np datatype
-
-        id0 = b"g-a4f455b2-c8cf-11e7-8b73-0242ac110009"
-        id1 = b"g-a50af844-c8cf-11e7-8b73-0242ac110009"
-        id2 = b"g-a5236276-c8cf-11e7-8b73-0242ac110009"
-
-        data = [
-            [id0, ],
-            [id0, id1],
-            [id0, id1, id2],
-        ]
-        shape = [3, ]
-        out = jsonToArray(shape, dt, data)
-        self.assertTrue(isinstance(out, np.ndarray))
-        base_type = check_dtype(vlen=out.dtype)
-        self.assertEqual(base_type.kind, "S")
-        self.assertEqual(base_type.itemsize, 48)
-
-        self.assertEqual(out.shape, (3,))
-        self.assertEqual(out.dtype.kind, "O")
-        self.assertEqual(check_dtype(vlen=out.dtype), np.dtype("S48"))
-
-        e = out[0]
-        self.assertTrue(isinstance(e, tuple))
-        self.assertEqual(e, (id0,))
-        e = out[1]
-        self.assertTrue(isinstance(e, tuple))
-        self.assertEqual(e, (id0, id1))
-        e = out[2]
-        self.assertTrue(isinstance(e, tuple))
-        self.assertEqual(e, (id0, id1, id2))
-
-        # compound type with array field
-        dt = np.dtype([("a", ("i4", 3)), ("b", "S5")])
-        shape = [2, ]
-        data = [[[4, 8, 12], "four"], [[5, 10, 15], "five"]]
-        out = jsonToArray(shape, dt, data)
-        self.assertTrue(isinstance(out, np.ndarray))
-
-        self.assertEqual(out.shape, (2,))
-        self.assertTrue(isinstance(out[0], np.void))
-        e0 = out[0]
-        self.assertEqual(len(e0), 2)
-        e0a = e0[0]
-        self.assertTrue(isinstance(e0a, np.ndarray))
-        self.assertEqual(e0a[0], 4)
-        self.assertEqual(e0a[1], 8)
-        self.assertEqual(e0a[2], 12)
-        e0b = e0[1]
-        self.assertEqual(e0b, b"four")
-        self.assertTrue(isinstance(out[1], np.void))
-        e1 = out[1]
-        self.assertEqual(len(e1), 2)
-        e1a = e1[0]
-        self.assertTrue(isinstance(e1a, np.ndarray))
-        self.assertEqual(e1a[0], 5)
-        self.assertEqual(e1a[1], 10)
-        self.assertEqual(e1a[2], 15)
-        e1b = e1[1]
-        self.assertEqual(e1b, b"five")
-
-    def testToBytes(self):
-        # Simple array
-        dt = np.dtype("<i4")
-        arr = np.asarray((1, 2, 3, 4), dtype=dt)
-        buffer = arrayToBytes(arr)
-        self.assertEqual(buffer, arr.tobytes())
-
-        # convert buffer back to arr
-        arr_copy = bytesToArray(buffer, dt, (4,))
-        # print("arr_copy: {}".format(arr_copy))
-        self.assertTrue(np.array_equal(arr, arr_copy))
-
-        # fixed length string
-        dt = np.dtype("S8")
-        arr = np.asarray(("abcdefgh", "ABCDEFGH", "12345678"), dtype=dt)
-        buffer = arrayToBytes(arr)
-        self.assertEqual(buffer, arr.tobytes())
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (3,))
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        # fixed length UTF8 string
-        dt = np.dtype("S10")
-        arr = np.asarray(b'eight: \xe5\x85\xab', dtype=dt)
-        buffer = arrayToBytes(arr)
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, ())
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        # invalid UTF string
-        dt = np.dtype("S2")
-        arr = np.asarray(b'\xff\xfe', dtype=dt)
-        buffer = arrayToBytes(arr)
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, ())
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        # invalid UTF string with base64 encoding
-        dt = np.dtype("S2")
-        arr = np.asarray(b'\xff\xfe', dtype=dt)
-        buffer = b'//4='  # this is the base64 encoding of b'\xff\xfe'
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (), encoding="base64")
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        # Compound non-vlen
-        dt = np.dtype([("x", "f8"), ("y", "i4")])
-        arr = np.zeros((4,), dtype=dt)
-        arr[0] = (3.12, 42)
-        arr[3] = (1.28, 69)
-        buffer = arrayToBytes(arr)
-        self.assertEqual(buffer, arr.tobytes())
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (4,))
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        # VLEN of int32's
-        dt = np.dtype("O", metadata={"vlen": np.dtype("int32")})
-        arr = np.zeros((4,), dtype=dt)
-        arr[0] = np.int32([1, ])
-        arr[1] = np.int32([1, 2])
-        arr[2] = 0  # test un-intialized value
-        arr[3] = np.int32([1, 2, 3])
-        buffer = arrayToBytes(arr)
-        self.assertEqual(len(buffer), 40)
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (4,))
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        # VLEN of strings
-        dt = np.dtype("O", metadata={"vlen": str})
-        arr = np.zeros((5,), dtype=dt)
-        arr[0] = "one: \u4e00"
-        arr[1] = "two: \u4e8c"
-        arr[2] = "three: \u4e09"
-        arr[3] = "four: \u56db"
-        arr[4] = 0
-        buffer = arrayToBytes(arr)
-
-        expected_length = 55
-        expected = bytearray(expected_length)
-        expected[0:4] = b"\x08\x00\x00\x00"
-        expected[4:16] = b"one: \xe4\xb8\x80\x08\x00\x00\x00"
-        expected[16:28] = b"two: \xe4\xba\x8c\n\x00\x00\x00"
-        expected[28:42] = b"three: \xe4\xb8\x89\t\x00\x00\x00"
-        expected[42:55] = b"four: \xe5\x9b\x9b\x00\x00\x00\x00"
-
-        self.assertEqual(len(buffer), expected_length)
-
-        self.assertEqual(buffer, expected)
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (5,))
-        print("arr_copy bytes:", arrayToBytes(arr_copy))
-        print("arr_copy:", arr_copy)
-        print("arr_copy dt:", arr_copy.dtype)
-        print("arr_copy metadata:", arr_copy.dtype.metadata)
-        print("arr_copy kind:", arr_copy.dtype.kind)
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-        # VLEN of bytes
-        dt = np.dtype("O", metadata={"vlen": bytes})
-        arr = np.zeros((5,), dtype=dt)
-        arr[0] = b"Parting"
-        arr[1] = b"is such"
-        arr[2] = b"sweet"
-        arr[3] = b"sorrow"
-        arr[4] = 0
-
-        buffer = arrayToBytes(arr)
-
-        expected = bytearray(45)
-        expected[0:11] = b"\x07\x00\x00\x00Parting"
-        expected[11:22] = b"\x07\x00\x00\x00is such"
-        expected[22:31] = b"\x05\x00\x00\x00sweet"
-        expected[31:41] = b"\x06\x00\x00\x00sorrow"
-        expected[41:45] = b"\x00\x00\x00\x00"
-
-        self.assertEqual(len(buffer), len(expected))
-        self.assertEqual(buffer, expected)  # same serialization as with str
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (5,))
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        #
-        # Compound str vlen
-        #
-        dt_vstr = np.dtype("O", metadata={"vlen": str})
-        dt = np.dtype([("x", "i4"), ("tag", dt_vstr), ("code", "S4")])
-        arr = np.zeros((4,), dtype=dt)
-        arr[0] = (42, "Hello", "X1")
-        arr[3] = (84, "Bye", "XYZ")
-        count = getByteArraySize(arr)
-        buffer = arrayToBytes(arr)
-
-        self.assertEqual(len(buffer), 56)
-        self.assertEqual(buffer.find(b"Hello"), 8)
-        self.assertEqual(buffer.find(b"Bye"), 49)
-        self.assertEqual(buffer.find(b"X1"), 13)
-        self.assertEqual(buffer.find(b"XYZ"), 52)
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (4,))
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        #
-        # Compound int vlen
-        #
-        dt_vint = np.dtype("O", metadata={"vlen": "int32"})
-        dt = np.dtype([("x", "int32"), ("tag", dt_vint)])
-        arr = np.zeros((4,), dtype=dt)
-        arr[0] = (42, np.array((), dtype="int32"))
-        arr[3] = (84, np.array((1, 2, 3), dtype="int32"))
-        count = getByteArraySize(arr)
-        self.assertEqual(count, 44)
-        buffer = arrayToBytes(arr)
-        self.assertEqual(len(buffer), 44)
-        buffer_expected = {0: 42, 24: 84, 28: 12, 32: 1, 36: 2, 40: 3}
-        for i in range(44):
-            if i in buffer_expected:
-                self.assertEqual(buffer[i], buffer_expected[i])
-            else:
-                self.assertEqual(buffer[i], 0)
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (4,))
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        #
-        # VLEN utf string with array type
-        #
-        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": str})
-        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
-        arr = np.zeros((4,), dtype=dt)
-        dt_str = np.dtype("O", metadata={"vlen": str})
-        arr[0] = (42, np.asarray(["hi", "bye"], dtype=dt_str))
-        arr[3] = (84, np.asarray(["hi-hi", "bye-bye"], dtype=dt_str))
-        buffer = arrayToBytes(arr)
-        self.assertEqual(len(buffer), 81)
-
-        self.assertEqual(buffer.find(b"hi"), 8)
-        self.assertEqual(buffer.find(b"bye"), 14)
-        self.assertEqual(buffer.find(b"hi-hi"), 49)
-        self.assertEqual(buffer.find(b"bye-bye"), 58)
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (4,))
-
-        self.assertEqual(arr.dtype, arr_copy.dtype)
-        self.assertEqual(arr.shape, arr_copy.shape)
-        for i in range(4):
-            e = arr[i]
-            e_copy = arr_copy[i]
-            self.assertTrue(np.array_equal(e, e_copy))
-        #
-        # VLEN ascii with array type
-        #
-        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": bytes})
-        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
-        arr = np.zeros((4,), dtype=dt)
-        dt_str = np.dtype("O", metadata={"vlen": bytes})
-        arr[0] = (42, np.asarray([b"hi", b"bye"], dtype=dt_str))
-        arr[3] = (84, np.asarray([b"hi-hi", b"bye-bye"], dtype=dt_str))
-        buffer = arrayToBytes(arr)
-        self.assertEqual(len(buffer), 81)
-
-        self.assertEqual(buffer.find(b"hi"), 8)
-        self.assertEqual(buffer.find(b"bye"), 14)
-        self.assertEqual(buffer.find(b"hi-hi"), 49)
-        self.assertEqual(buffer.find(b"bye-bye"), 58)
-        # convert back to array
-
-        arr_copy = bytesToArray(buffer, dt, (4,))
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-    def testArrToBytesBase64(self):
-        # Simple array
-        dt = np.dtype("<i4")
-        arr = np.asarray((1, 2, 3, 4), dtype=dt)
-        buffer = arrayToBytes(arr, encoding="base64")
-        # should be a bit longer than the byte representation...
-        expected_num_bytes = np.prod(arr.shape) * dt.itemsize
-        self.assertTrue(len(buffer) > expected_num_bytes)
-
-        # convert buffer back to arr
-        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
-        self.assertTrue(np.array_equal(arr, arr_copy))
-
-        # fixed length string
-        dt = np.dtype("S8")
-        arr = np.asarray(("abcdefgh", "ABCDEFGH", "12345678"), dtype=dt)
-        buffer = arrayToBytes(arr, encoding="base64")
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (3,), encoding="base64")
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        # Compound non-vlen
-        dt = np.dtype([("x", "f8"), ("y", "i4")])
-        arr = np.zeros((4,), dtype=dt)
-        arr[0] = (3.12, 42)
-        arr[3] = (1.28, 69)
-        buffer = arrayToBytes(arr, encoding="base64")
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        # VLEN of int32's
-        dt = np.dtype("O", metadata={"vlen": np.dtype("int32")})
-        arr = np.zeros((4,), dtype=dt)
-        arr[0] = np.int32([1, ])
-        arr[1] = np.int32([1, 2])
-        arr[2] = 0  # test un-intialized value
-        arr[3] = np.int32([1, 2, 3])
-        buffer = arrayToBytes(arr, encoding="base64")
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        # VLEN of strings
-        dt = np.dtype("O", metadata={"vlen": str})
-        arr = np.zeros((5,), dtype=dt)
-        arr[0] = "one: \u4e00"
-        arr[1] = "two: \u4e8c"
-        arr[2] = "three: \u4e09"
-        arr[3] = "four: \u56db"
-        arr[4] = 0
-        buffer = arrayToBytes(arr, encoding="base64")
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (5,), encoding="base64")
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-        # VLEN of bytes
-        dt = np.dtype("O", metadata={"vlen": bytes})
-        arr = np.zeros((5,), dtype=dt)
-        arr[0] = b"Parting"
-        arr[1] = b"is such"
-        arr[2] = b"sweet"
-        arr[3] = b"sorrow"
-        arr[4] = 0
-
-        buffer = arrayToBytes(arr, encoding="base64")
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (5,), encoding="base64")
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        #
-        # Compound str vlen
-        #
-        dt_vstr = np.dtype("O", metadata={"vlen": str})
-        dt = np.dtype([("x", "i4"), ("tag", dt_vstr), ("code", "S4")])
-        arr = np.zeros((4,), dtype=dt)
-        arr[0] = (42, "Hello", "X1")
-        arr[3] = (84, "Bye", "XYZ")
-        count = getByteArraySize(arr)
-        buffer = arrayToBytes(arr, encoding="base64")
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        #
-        # Compound int vlen
-        #
-        dt_vint = np.dtype("O", metadata={"vlen": "int32"})
-        dt = np.dtype([("x", "int32"), ("tag", dt_vint)])
-        arr = np.zeros((4,), dtype=dt)
-        arr[0] = (42, np.array((), dtype="int32"))
-        arr[3] = (84, np.array((1, 2, 3), dtype="int32"))
-        count = getByteArraySize(arr)
-        self.assertEqual(count, 44)
-        buffer = arrayToBytes(arr, encoding="base64")
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-        #
-        # VLEN utf string with array type
-        #
-        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": str})
-        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
-        arr = np.zeros((4,), dtype=dt)
-        dt_str = np.dtype("O", metadata={"vlen": str})
-        arr[0] = (42, np.asarray(["hi", "bye"], dtype=dt_str))
-        arr[3] = (84, np.asarray(["hi-hi", "bye-bye"], dtype=dt_str))
-        buffer = arrayToBytes(arr, encoding="base64")
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
-
-        self.assertEqual(arr.dtype, arr_copy.dtype)
-        self.assertEqual(arr.shape, arr_copy.shape)
-        for i in range(4):
-            e = arr[i]
-            e_copy = arr_copy[i]
-            self.assertTrue(np.array_equal(e, e_copy))
-        #
-        # VLEN ascii with array type
-        #
-        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": bytes})
-        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
-        arr = np.zeros((4,), dtype=dt)
-        dt_str = np.dtype("O", metadata={"vlen": bytes})
-        arr[0] = (42, np.asarray([b"hi", b"bye"], dtype=dt_str))
-        arr[3] = (84, np.asarray([b"hi-hi", b"bye-bye"], dtype=dt_str))
-        buffer = arrayToBytes(arr, encoding="base64")
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (4,), encoding="base64")
-        self.assertTrue(ndarray_compare(arr, arr_copy))
-
-    def testArrayCompareInt(self):
-        # Simple array
-        dt = np.dtype("<i4")
-        arr1 = np.zeros((1024, 1024), dtype=dt)
-        arr2 = np.zeros((1024, 1024), dtype=dt)
-        for _ in range(100):
-            self.assertTrue(ndarray_compare(arr1, arr2))
-        arr1[123, 456] = 42
-        self.assertFalse(ndarray_compare(arr1, arr2))
-
-    def testArrayCompareVlenInt(self):
-        # Vlen array
-        dt_vint = np.dtype("O", metadata={"vlen": "int32"})
-        dt = np.dtype([("x", "int32"), ("tag", dt_vint)])
-        arr1 = np.zeros((1024, 1024), dtype=dt)
-        arr2 = np.zeros((1024, 1024), dtype=dt)
-        e1 = (42, np.array((), dtype="int32"))
-        e2 = (84, np.array((1, 2, 3), dtype="int32"))
-        arr1[123, 456] = e1
-        arr2[123, 456] = e1
-        arr1[888, 999] = e2
-        arr2[888, 999] = e2
-
-        # performance is marginal for this case
-        for _ in range(1):
-            self.assertTrue(ndarray_compare(arr1, arr2))
-        arr2[123, 456] = e2
-        self.assertFalse(ndarray_compare(arr1, arr2))
-
-    def testJsonToBytes(self):
-        #
-        # VLEN int
-        #
-
-        def array_equal(a, b):
-            """ compare two values element by element."""
-            if type(a) in (list, tuple, np.void, np.ndarray):
-                if len(a) != len(b):
-                    print("number of elements doesn't match")
-                    return False
-                nelements = len(a)
-                for i in range(nelements):
-                    if not array_equal(a[i], b[i]):
-                        return False
-            else:
-                # treat a string and bytes as equal if the utf-8 encoding
-                # of the string is equal to the byte encoding
-                if isinstance(a, str):
-                    a = a.encode("utf8")
-                if isinstance(b, str):
-                    b = b.encode("utf8")
-                if a != b:
-                    print(f"{a} != {b}")
-                    return False
-
-            return True
-
-        dt = special_dtype(vlen=np.dtype("int32"))
-        shape = [4,]
-        data = [
-            [1,],
-            [1, 2],
-            [1, 2, 3],
-            [1, 2, 3, 4],
-        ]
-        arr = jsonToArray(shape, dt, data)
-        self.assertTrue(isinstance(arr, np.ndarray))
-        self.assertEqual(check_dtype(vlen=arr.dtype), np.dtype("int32"))
-        buffer = arrayToBytes(arr)
-        self.assertEqual(len(buffer), 56)
-
-        expected = bytearray(48)
-        expected[0:8] = b"\x04\x00\x00\x00\x01\x00\x00\x00"
-        expected[8:16] = b"\x08\x00\x00\x00\x01\x00\x00\x00"
-        expected[16:24] = b"\x02\x00\x00\x00\x0c\x00\x00\x00"
-        expected[24:32] = b"\x01\x00\x00\x00\x02\x00\x00\x00"
-        expected[32:40] = b"\x03\x00\x00\x00\x10\x00\x00\x00"
-        expected[40:48] = b"\x01\x00\x00\x00\x02\x00\x00\x00"
-        expected[48:56] = b"\x03\x00\x00\x00\x04\x00\x00\x00"
-        self.assertEqual(buffer, expected)
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, shape)
-        # np.array_equal doesn't work for object arrays
-        self.assertEqual(arr.dtype, arr_copy.dtype)
-        self.assertEqual(arr.shape, arr_copy.shape)
-        for i in range(4):
-            e = arr[i]
-            e_copy = arr_copy[i]
-            self.assertTrue(np.array_equal(e, e_copy))
-        #
-        # Compound vlen
-        #
-        dt_str = np.dtype("O", metadata={"vlen": str})
-        dt = np.dtype([("x", "i4"), ("tag", dt_str)])
-        shape = [4, ]
-        data = [[42, "Hello"], [0, 0], [0, 0], [84, "Bye"]]
-        arr = jsonToArray(shape, dt, data)
-        self.assertTrue(isinstance(arr, np.ndarray))
-        buffer = arrayToBytes(arr)
-        self.assertEqual(len(buffer), 40)
-
-        expected = bytearray(40)
-        expected[0:8] = b"*\x00\x00\x00\x05\x00\x00\x00"
-        expected[8:19] = b"Hello\x00\x00\x00\x00\x00\x00"
-        expected[19:26] = b"\x00\x00\x00\x00\x00\x00\x00"
-        expected[26:40] = b"\x00\x00\x00T\x00\x00\x00\x03\x00\x00\x00Bye"
-
-        self.assertEqual(buffer, expected)
-
-        # convert back to array
-        arr_copy = bytesToArray(buffer, dt, (4,))
-        # np.array_equal doesn't work for object arrays
-        self.assertEqual(arr.dtype, arr_copy.dtype)
-        self.assertEqual(arr.shape, arr_copy.shape)
-        self.assertTrue(array_equal(arr, arr_copy))
-
-        #
-        # VLEN utf with array type
-        #
-        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": str})
-        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
-        shape = [4,]
-        data = [
-            [42, ["hi", "bye"]],
-            [0, [0, 0]],
-            [0, [0, 0]],
-            [84, ["hi-hi", "bye-bye"]],
-        ]
-        arr = jsonToArray(shape, dt, data)
-        self.assertTrue(isinstance(arr, np.ndarray))
-        buffer = arrayToBytes(arr)
-        self.assertEqual(len(buffer), 81)
-        self.assertEqual(buffer.find(b"hi"), 8)
-        self.assertEqual(buffer.find(b"bye"), 14)
-        self.assertEqual(buffer.find(b"hi-hi"), 49)
-        self.assertEqual(buffer.find(b"bye-bye"), 58)
-        arr_copy = bytesToArray(buffer, dt, shape)
-
-        self.assertEqual(arr.dtype, arr_copy.dtype)
-        self.assertEqual(arr.shape, arr_copy.shape)
-        self.assertTrue(array_equal(e, e_copy))
-
-        #
-        # VLEN ascii with array type
-        #
-        dt_arr_str = np.dtype("(2,)O", metadata={"vlen": bytes})
-        dt = np.dtype([("x", "i4"), ("tag", dt_arr_str)])
-        shape = [4,]
-        data = [
-            [42, [b"hi", b"bye"]],
-            [0, [0, 0]],
-            [0, [0, 0]],
-            [84, [b"hi-hi", b"bye-bye"]],
-        ]
-        arr = jsonToArray(shape, dt, data)
-        self.assertTrue(isinstance(arr, np.ndarray))
-        buffer = arrayToBytes(arr)
-        self.assertEqual(len(buffer), 81)
-        self.assertEqual(buffer.find(b"hi"), 8)
-        self.assertEqual(buffer.find(b"bye"), 14)
-        self.assertEqual(buffer.find(b"hi-hi"), 49)
-        self.assertEqual(buffer.find(b"bye-bye"), 58)
-        arr_copy = bytesToArray(buffer, dt, shape)
-
-        self.assertEqual(arr.dtype, arr_copy.dtype)
-        self.assertEqual(arr.shape, arr_copy.shape)
-        self.assertTrue(array_equal(e, e_copy))
-
-    def testIndexIterator(self):
-        i = 0
-        for index in IndexIterator((10,)):
-            self.assertEqual(index, (i,))
-            i += 1
-        self.assertEqual(i, 10)
-        i = 0
-        for index in IndexIterator((10,), sel=slice(0, 10, 2)):
-            self.assertEqual(index, (i,))
-
-            i += 2
-        self.assertEqual(i, 10)
-        i = 2
-        for index in IndexIterator((10, ), sel=slice(2, 8)):
-            self.assertEqual(index, (i,))
-            i += 1
-        self.assertEqual(i, 8)
-        cnt = 0
-        for index in IndexIterator((4, 5)):
-            cnt += 1
-        self.assertEqual(cnt, 20)
-        cnt = 0
-        for index in IndexIterator((8, 10), sel=(slice(0, 8, 2), slice(0, 10, 2))):
-            cnt += 1
-        self.assertEqual(cnt, 20)
-
-    def testGetNumpyValue(self):
-        # test int conversion
-        dt = np.dtype("<i4")
-        val = getNumpyValue(42, dt=dt)
-        self.assertTrue(isinstance(val, np.int32))
-        self.assertEqual(42, val)
-
-        # test fixed length string conversion
-        dt = np.dtype("S5")
-        val = getNumpyValue("hello", dt=dt)
-        self.assertTrue(isinstance(val, np.bytes_))
-        self.assertEqual(val, b"hello")
-
-        # test variable length string conversion
-        dt = np.dtype("O", metadata={"vlen": bytes})
-        val = getNumpyValue("hello", dt=dt)
-        self.assertTrue(isinstance(val, str))
-        self.assertEqual(val, "hello")
-
-        # test compound type
-        dt = np.dtype([('int', "<i4"), ('str', "S4")])
-        val = getNumpyValue((42, "hdf5"), dt=dt)
-        self.assertTrue(isinstance(val, np.void))
-        self.assertEqual(val[0], 42)
-        self.assertEqual(val[1], b'hdf5')
-
-        # test array of ints
-        dt = np.dtype("<i4")
-        arr = np.array([0, 1], dtype=dt)
-        dt = np.dtype(("<i4", (len(arr),)))
-        val = getNumpyValue(arr, dt=dt)
-
-        self.assertTrue(np.array_equal(val, arr))
-        self.assertTrue(isinstance(val[0], np.int32))
-
-        # test array of floats
-        dt = np.dtype("f4")
-        arr = np.array([0.001, 1.001], dtype=dt)
-        val = getNumpyValue(arr, dt=np.dtype(("f4", (len(arr),))))
-
-        self.assertTrue(np.array_equal(val, arr))
-        self.assertTrue(isinstance(val[0], np.float32))
-
-        # test array of fixed-length strings
-        dt = np.dtype("S5")
-        arr = np.array([b'hello', b'world'], dtype=dt)
-        val = getNumpyValue(arr, dt=np.dtype(("S5", (len(arr),))))
-
-        self.assertTrue(np.array_equal(val, arr))
-        self.assertTrue(isinstance(val[0], np.bytes_))
-
-        # test nan string
-        dt = np.dtype("f4")
-        val = getNumpyValue("nan", dt=dt)
-        self.assertTrue(isinstance(val, np.float32))
-        self.assertTrue(val != val)
-
-    def testGetNumpyValueBase64Encoded(self):
-        # Set up value, numpy dtype, and expected type after decoding
-        value_info = []
-        value_info.append([42, np.dtype("<i4"), np.int32])  # int
-        value_info.append([1.001, np.dtype("f4"), np.float32])  # float
-        value_info.append([b"hello", np.dtype("S5"), np.bytes_])  # fixed-length string
-        value_info.append([(42, b'hdf5'),
-                           np.dtype([('int', "<i4"), ('str', "S4")]), np.void])  # compound type
-        np_values = []
-
-        for vi in value_info:
-            np_values.append(np.array(vi[0], dtype=vi[1]))
-
-        for i in range(len(np_values)):
-            numpy_dtype_out = value_info[i][2]
-
-            # Turn numpy array to bytes object which can be encoded
-            encoded_val = np_values[i].tobytes()
-            # Encode numpy bytes object
-            encoded_val = base64.b64encode(encoded_val)
-            # Decode from bytes object to regular string containing a base64 encoded numpy array
-            # This prevents the utf-8 encoding inside getNumpyValue from prepending b'
-            encoded_val = encoded_val.decode()
-            decoded_val = getNumpyValue(encoded_val, dt=np_values[i].dtype, encoding="base64")
-            self.assertTrue(isinstance(decoded_val, numpy_dtype_out))
-            self.assertEqual(decoded_val, np_values[i])
-
-        # test array types
-
-        # Set up value, numpy dtype, and expected type after decoding
-        value_info = []
-        value_info.append([np.array([0, 1], dtype=np.dtype("<i4")),
-                           np.dtype(("<i4", (2,))), np.int32])  # int array
-        value_info.append([np.array([0.001, 1.001], dtype=np.dtype("f4")),
-                           np.dtype(("f4", (2,))), np.float32])  # float array
-        value_info.append([np.array([b'hello', b'world'], dtype=np.dtype("S5")),
-                           np.dtype(("S5", (2,))), np.bytes_])  # fixed length string array
-
-        for i in range(len(value_info)):
-            this_array = value_info[i][0]
-            array_dtype = value_info[i][1]
-            array_dtype_out = value_info[i][2]
-
-            # Turn numpy array to bytes object which can be encoded
-            encoded_val = this_array.tobytes()
-            # Encode numpy bytes object
-            encoded_val = base64.b64encode(encoded_val)
-            # Decode from bytes object to regular string containing a base64 encoded numpy array
-            # This prevents the utf-8 encoding inside getNumpyValue from prepending b'
-            encoded_val = encoded_val.decode()
-            decoded_val = getNumpyValue(encoded_val, dt=array_dtype, encoding="base64")
-
-            self.assertTrue(np.array_equal(decoded_val, this_array))
-            self.assertTrue(isinstance(decoded_val[0], array_dtype_out))
-
-        # test invalid base64 length
-        try:
-            dt = np.dtype("<i8")
-            getNumpyValue("KgAAAA==", dt=dt, encoding="base64")
-            self.assertTrue(False)
-        except ValueError:
-            pass  # expected
-
-    def testJsonToArrayOnNoneArray(self):
-        data_dtype = np.dtype("i4")
-        data_shape = [0, ]
-        data_json = [None]
-        arr = None
-
-        try:
-            arr = jsonToArray(data_shape, data_dtype, data_json)
-        except Exception as e:
-            print(f"Exception while testing jsonToArray on array with None elements: {e}")
-
-        self.assertTrue(len(arr) == 0)
-        self.assertTrue(arr.dtype == data_dtype)
-
-    def testGetBroadcastShape(self):
-        bcshape = getBroadcastShape([1, ], 1)
-        self.assertEqual(bcshape, None)
-        bcshape = getBroadcastShape([2, 3], 6)
-        self.assertEqual(bcshape, None)
-        bcshape = getBroadcastShape([2, 3], 5)
-        self.assertEqual(bcshape, None)
-
-        bcshape = getBroadcastShape([4, 5], 1)
-        self.assertEqual(bcshape, [1, ])
-        bcshape = getBroadcastShape([4, 5], 5)
-        self.assertEqual(bcshape, [5, ])
-
-        bcshape = getBroadcastShape([2, 3, 5], 1)
-        self.assertEqual(bcshape, [1, ])
-        bcshape = getBroadcastShape([2, 3, 5], 5)
-        self.assertEqual(bcshape, [5, ])
-        bcshape = getBroadcastShape([2, 3, 5], 15)
-        self.assertEqual(bcshape, [3, 5])
-
-    def testJsonToArrayOnNoneCompoundArray(self):
-        # compound type
-        dt = np.dtype([("a", "i4"), ("b", "S5")])
-        shape = [1,]
-        data = None
-
-        arr = jsonToArray(shape, dt, data)
-
-        self.assertEqual(len(arr), 0)
-        self.assertEqual(arr.dtype, dt)
-
-
-if __name__ == "__main__":
-    # setup test files
-
-    unittest.main()

From bdff6e4ce6253f0e8d468520ef7cc956cddb8861 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 14 Apr 2025 18:07:58 +0200
Subject: [PATCH 04/49] use h5json for ndarray_compare function

---
 hsds/util/chunkUtil.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hsds/util/chunkUtil.py b/hsds/util/chunkUtil.py
index dc03cc89..9c984de6 100644
--- a/hsds/util/chunkUtil.py
+++ b/hsds/util/chunkUtil.py
@@ -1,6 +1,8 @@
 import numpy as np
+
+from h5json.array_util import ndarray_compare
+
 from .. import hsds_logger as log
-from .arrayUtil import ndarray_compare
 
 CHUNK_BASE = 16 * 1024  # Multiplier by which chunks are adjusted
 CHUNK_MIN = 512 * 1024  # Soft lower limit (512k)

From 3904cf9eae97108f69d1ea7cb28d49c76d3ac957 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 23 Apr 2025 16:22:06 +0200
Subject: [PATCH 05/49] use h5json objid funcs

---
 hsds/async_lib.py          |   5 +-
 hsds/attr_sn.py            |   2 +-
 hsds/basenode.py           |   7 +-
 hsds/chunk_crawl.py        |   2 +-
 hsds/chunk_dn.py           |   3 +-
 hsds/chunk_sn.py           |   2 +-
 hsds/ctype_dn.py           |   3 +-
 hsds/ctype_sn.py           |   2 +-
 hsds/datanode.py           |   5 +-
 hsds/datanode_lib.py       |   8 +-
 hsds/domain_crawl.py       |   4 +-
 hsds/domain_dn.py          |   2 +-
 hsds/domain_sn.py          |   6 +-
 hsds/dset_dn.py            |   2 +-
 hsds/dset_lib.py           |   3 +-
 hsds/dset_sn.py            |   2 +-
 hsds/folder_crawl.py       |   3 +-
 hsds/group_dn.py           |   3 +-
 hsds/group_sn.py           |   3 +-
 hsds/headnode.py           |   2 +-
 hsds/link_dn.py            |   3 +-
 hsds/link_sn.py            |   4 +-
 hsds/servicenode_lib.py    |   5 +-
 hsds/util/httpUtil.py      |   3 +-
 hsds/util/idUtil.py        | 540 -------------------------------------
 testall.py                 |   2 +-
 tests/integ/vlen_test.py   |   5 -
 tests/unit/id_util_test.py | 212 ---------------
 28 files changed, 54 insertions(+), 789 deletions(-)
 delete mode 100644 hsds/util/idUtil.py
 delete mode 100755 tests/unit/id_util_test.py

diff --git a/hsds/async_lib.py b/hsds/async_lib.py
index 15d67f5f..715e7985 100755
--- a/hsds/async_lib.py
+++ b/hsds/async_lib.py
@@ -18,8 +18,9 @@
 from h5json.hdf5dtype import getItemSize
 from h5json.hdf5dtype import createDataType
 from h5json.array_util import getNumElements, bytesToArray
-from .util.idUtil import isValidUuid, isSchema2Id, getS3Key, isS3ObjKey
-from .util.idUtil import getObjId, isValidChunkId, getCollectionForId
+from h5json.objid import isValidUuid, isSchema2Id, getS3Key, isS3ObjKey
+from h5json.objid import getObjId, isValidChunkId, getCollectionForId
+
 from .util.chunkUtil import getDatasetId, getNumChunks, ChunkIterator
 from .util.dsetUtil import getHyperslabSelection, getFilterOps, getChunkDims, getFilters
 from .util.dsetUtil import getDatasetLayoutClass, getDatasetLayout, getShapeDims
diff --git a/hsds/attr_sn.py b/hsds/attr_sn.py
index a735c5c6..c5d76227 100755
--- a/hsds/attr_sn.py
+++ b/hsds/attr_sn.py
@@ -22,10 +22,10 @@
 from h5json.hdf5dtype import createDataType, getItemSize
 from h5json.array_util import jsonToArray, getNumElements, bytesArrayToList
 from h5json.array_util import bytesToArray, arrayToBytes, decodeData, encodeData
+from h5json.objid import isValidUuid, getRootObjId
 
 from .util.httpUtil import getAcceptType, jsonResponse, getHref, getBooleanParam
 from .util.globparser import globmatch
-from .util.idUtil import isValidUuid, getRootObjId
 from .util.authUtil import getUserPasswordFromRequest, validateUserPassword
 from .util.domainUtil import getDomainFromRequest, isValidDomain
 from .util.domainUtil import getBucketForDomain, verifyRoot
diff --git a/hsds/basenode.py b/hsds/basenode.py
index f3356f34..6dd83b64 100644
--- a/hsds/basenode.py
+++ b/hsds/basenode.py
@@ -25,15 +25,18 @@
 from aiohttp.web_exceptions import HTTPInternalServerError
 from aiohttp.web_exceptions import HTTPServiceUnavailable
 
+
+
 from . import config
 from .util.httpUtil import http_get, http_post, jsonResponse
-from .util.idUtil import createNodeId, getNodeNumber, getNodeCount
 from .util.authUtil import getUserPasswordFromRequest, validateUserPassword
 from .util.authUtil import isAdminUser
 from .util.k8sClient import getDnLabelSelector, getPodIps
+from .util.nodeUtil import createNodeId, getNodeNumber, getNodeCount
+
 from . import hsds_logger as log
 
-HSDS_VERSION = "0.9.2"
+HSDS_VERSION = "1.0.0"
 
 
 def getVersion():
diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py
index a153bfe8..960cdadf 100755
--- a/hsds/chunk_crawl.py
+++ b/hsds/chunk_crawl.py
@@ -28,9 +28,9 @@
 from h5json.array_util import jsonToArray, getNumpyValue
 from h5json.array_util import getNumElements, arrayToBytes, bytesToArray
 
+from .util.nodeUtil import getDataNodeUrl, getNodeCount
 from .util.httpUtil import http_get, http_put, http_post, get_http_client
 from .util.httpUtil import isUnixDomainUrl
-from .util.idUtil import getDataNodeUrl, getNodeCount
 from .util.dsetUtil import getSliceQueryParam, getShapeDims
 from .util.dsetUtil import getSelectionShape, getChunkLayout
 from .util.chunkUtil import getChunkCoverage, getDataCoverage
diff --git a/hsds/chunk_dn.py b/hsds/chunk_dn.py
index eeeed88d..97e86f01 100644
--- a/hsds/chunk_dn.py
+++ b/hsds/chunk_dn.py
@@ -22,9 +22,9 @@
 
 from h5json.hdf5dtype import createDataType, getSubType
 from h5json.array_util import bytesToArray, arrayToBytes, getBroadcastShape
+from h5json.objid import getS3Key, isValidUuid
 
 from .util.httpUtil import request_read, getContentType
-from .util.idUtil import getS3Key, validateInPartition, isValidUuid
 from .util.storUtil import isStorObj, deleteStorObj
 from .util.dsetUtil import getSelectionList, getChunkLayout, getShapeDims
 from .util.dsetUtil import getSelectionShape, getChunkInitializer
@@ -33,6 +33,7 @@
 from .util.chunkUtil import chunkWritePoints, chunkReadPoints
 from .util.domainUtil import isValidBucketName
 from .util.boolparser import BooleanParser
+from .util.nodeUtil import validateInPartition
 from .datanode_lib import get_metadata_obj, get_chunk, save_chunk
 
 from . import hsds_logger as log
diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index 921feaf0..4bb084b3 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -28,10 +28,10 @@
 from h5json.hdf5dtype import getItemSize, getDtypeItemSize, getSubType, createDataType
 from h5json.array_util import bytesArrayToList, jsonToArray, getNumElements, arrayToBytes
 from h5json.array_util import bytesToArray, squeezeArray, getBroadcastShape
+from h5json.objid import isValidUuid
 
 from .util.httpUtil import getHref, getAcceptType, getContentType
 from .util.httpUtil import request_read, jsonResponse, isAWSLambda
-from .util.idUtil import isValidUuid
 from .util.domainUtil import getDomainFromRequest, isValidDomain
 from .util.domainUtil import getBucketForDomain
 from .util.dsetUtil import isNullSpace, isScalarSpace, get_slices, getShapeDims
diff --git a/hsds/ctype_dn.py b/hsds/ctype_dn.py
index f06b98b3..fe8a67a7 100755
--- a/hsds/ctype_dn.py
+++ b/hsds/ctype_dn.py
@@ -18,7 +18,8 @@
 from aiohttp.web_exceptions import HTTPInternalServerError
 from aiohttp.web import json_response
 
-from .util.idUtil import isValidUuid, validateUuid
+from h5json.objid import isValidUuid, validateUuid
+
 from .datanode_lib import get_obj_id, get_metadata_obj, save_metadata_obj
 from .datanode_lib import delete_metadata_obj, check_metadata_obj
 from .util.domainUtil import isValidBucketName
diff --git a/hsds/ctype_sn.py b/hsds/ctype_sn.py
index 59faccd1..d85ffc07 100755
--- a/hsds/ctype_sn.py
+++ b/hsds/ctype_sn.py
@@ -18,10 +18,10 @@
 from json import JSONDecodeError
 
 from h5json.hdf5dtype import validateTypeItem, getBaseTypeJson
+from h5json.objid import isValidUuid
 
 from .util.httpUtil import getHref, respJsonAssemble, getBooleanParam
 from .util.httpUtil import jsonResponse
-from .util.idUtil import isValidUuid
 from .util.linkUtil import validateLinkName
 from .util.authUtil import getUserPasswordFromRequest, aclCheck
 from .util.authUtil import validateUserPassword
diff --git a/hsds/datanode.py b/hsds/datanode.py
index b7c00b9d..cef44bd0 100644
--- a/hsds/datanode.py
+++ b/hsds/datanode.py
@@ -17,10 +17,11 @@
 import traceback
 from aiohttp.web import run_app
 
+from h5json.objid import isValidUuid, isSchema2Id, getCollectionForId
+from h5json.objid import isRootObjId
+
 from . import config
 from .util.lruCache import LruCache
-from .util.idUtil import isValidUuid, isSchema2Id, getCollectionForId
-from .util.idUtil import isRootObjId
 from .util.httpUtil import isUnixDomainUrl, bindToSocket, getPortFromUrl
 from .util.httpUtil import jsonResponse, release_http_client
 from .util.storUtil import setBloscThreads, getBloscThreads
diff --git a/hsds/datanode_lib.py b/hsds/datanode_lib.py
index 1c6c3b6c..48843a25 100644
--- a/hsds/datanode_lib.py
+++ b/hsds/datanode_lib.py
@@ -22,10 +22,11 @@
 
 from h5json.hdf5dtype import createDataType
 from h5json.array_util import arrayToBytes, bytesToArray, jsonToArray
+from h5json.objid import getS3Key, isValidUuid
+from h5json.objid import isValidChunkId, isSchema2Id
+from h5json.objid import getRootObjId, isRootObjId
 
-from .util.idUtil import validateInPartition, getS3Key, isValidUuid
-from .util.idUtil import isValidChunkId, getDataNodeUrl, isSchema2Id
-from .util.idUtil import getRootObjId, isRootObjId
+from .util.nodeUtil import getDataNodeUrl
 from .util.storUtil import getStorJSONObj, putStorJSONObj, putStorBytes
 from .util.storUtil import getStorBytes, isStorObj, deleteStorObj, getHyperChunks
 from .util.storUtil import getBucketFromStorURI, getKeyFromStorURI, getURIFromKey
@@ -35,6 +36,7 @@
 from .util.dsetUtil import getChunkLayout, getFilterOps, getShapeDims
 from .util.dsetUtil import getChunkInitializer, getSliceQueryParam, getFilters
 from .util.chunkUtil import getDatasetId, getChunkSelection, getChunkIndex
+from .util.nodeUtil import validateInPartition
 from .util.rangegetUtil import ChunkLocation, chunkMunge, getHyperChunkIndex, getHyperChunkFactors
 from .util.timeUtil import getNow
 from . import config
diff --git a/hsds/domain_crawl.py b/hsds/domain_crawl.py
index b8e0ba39..656b04e6 100644
--- a/hsds/domain_crawl.py
+++ b/hsds/domain_crawl.py
@@ -18,8 +18,10 @@
 from aiohttp.web_exceptions import HTTPServiceUnavailable, HTTPConflict, HTTPBadRequest
 from aiohttp.web_exceptions import HTTPInternalServerError, HTTPNotFound, HTTPGone
 
+from h5json.objid import getCollectionForId
+
+from .util.nodeUtil import getDataNodeUrl
 from .util.httpUtil import isOK
-from .util.idUtil import getCollectionForId, getDataNodeUrl
 from .util.globparser import globmatch
 from .servicenode_lib import getObjectJson, getAttributes, putAttributes, getLinks, putLinks
 from . import hsds_logger as log
diff --git a/hsds/domain_dn.py b/hsds/domain_dn.py
index 83932e5d..0fe0d01c 100755
--- a/hsds/domain_dn.py
+++ b/hsds/domain_dn.py
@@ -18,7 +18,7 @@
 
 from .util.authUtil import getAclKeys
 from .util.domainUtil import isValidDomain, getBucketForDomain
-from .util.idUtil import validateInPartition
+from .util.nodeUtil import validateInPartition
 from .util.timeUtil import getNow
 from .datanode_lib import get_metadata_obj, save_metadata_obj
 from .datanode_lib import delete_metadata_obj, check_metadata_obj
diff --git a/hsds/domain_sn.py b/hsds/domain_sn.py
index 56d3611a..efbc31ab 100755
--- a/hsds/domain_sn.py
+++ b/hsds/domain_sn.py
@@ -22,11 +22,13 @@
 from aiohttp.web_exceptions import HTTPConflict, HTTPServiceUnavailable
 from aiohttp.web import json_response
 
+from h5json.objid import createObjId, getCollectionForId
+from h5json.objid import isValidUuid, isSchema2Id
+
+from .util.nodeUtil import getNodeCount, getDataNodeUrl
 from .util.httpUtil import getObjectClass, http_post, http_put, http_delete
 from .util.httpUtil import getHref, respJsonAssemble
 from .util.httpUtil import jsonResponse
-from .util.idUtil import getDataNodeUrl, createObjId, getCollectionForId
-from .util.idUtil import isValidUuid, isSchema2Id, getNodeCount
 from .util.authUtil import getUserPasswordFromRequest, aclCheck, isAdminUser
 from .util.authUtil import validateUserPassword, getAclKeys
 from .util.domainUtil import getParentDomain, getDomainFromRequest
diff --git a/hsds/dset_dn.py b/hsds/dset_dn.py
index 34a8ff6f..60d1037b 100755
--- a/hsds/dset_dn.py
+++ b/hsds/dset_dn.py
@@ -17,8 +17,8 @@
 from aiohttp.web_exceptions import HTTPInternalServerError
 from aiohttp.web import json_response
 
+from h5json.objid import isValidUuid, validateUuid
 
-from .util.idUtil import isValidUuid, validateUuid
 from .util.domainUtil import isValidBucketName
 from .util.timeUtil import getNow
 from .datanode_lib import get_obj_id, check_metadata_obj, get_metadata_obj
diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py
index 5b729afb..689c2c7e 100755
--- a/hsds/dset_lib.py
+++ b/hsds/dset_lib.py
@@ -19,7 +19,9 @@
 
 from h5json.hdf5dtype import createDataType, getItemSize
 from h5json.array_util import getNumpyValue
+from h5json.objid import isSchema2Id, getS3Key, getObjId
 
+from .util.nodeUtil import getDataNodeUrl
 from .util.boolparser import BooleanParser
 from .util.dsetUtil import isNullSpace, getDatasetLayout, getDatasetLayoutClass, get_slices
 from .util.dsetUtil import getChunkLayout, getSelectionShape, getShapeDims
@@ -28,7 +30,6 @@
 from .util.chunkUtil import getChunkCoverage, getDataCoverage
 from .util.chunkUtil import getQueryDtype, get_chunktable_dims
 from .util.httpUtil import http_delete, http_put
-from .util.idUtil import getDataNodeUrl, isSchema2Id, getS3Key, getObjId
 from .util.rangegetUtil import getHyperChunkFactors
 from .util.storUtil import getStorKeys
 
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 721970fb..77e85db0 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -20,10 +20,10 @@
 
 from h5json.hdf5dtype import validateTypeItem, createDataType, getBaseTypeJson, getItemSize
 from h5json.array_util import getNumElements, getNumpyValue
+from h5json.objid import isValidUuid, isSchema2Id
 
 from .util.httpUtil import getHref, respJsonAssemble
 from .util.httpUtil import jsonResponse, getBooleanParam
-from .util.idUtil import isValidUuid, isSchema2Id
 from .util.dsetUtil import getPreviewQuery, getFilterItem, getShapeDims
 from .util.chunkUtil import getChunkSize, guessChunk, expandChunk, shrinkChunk
 from .util.chunkUtil import getContiguousLayout
diff --git a/hsds/folder_crawl.py b/hsds/folder_crawl.py
index 48f37ce6..05048758 100644
--- a/hsds/folder_crawl.py
+++ b/hsds/folder_crawl.py
@@ -19,8 +19,9 @@
 from aiohttp.web_exceptions import HTTPGone, HTTPInternalServerError
 from aiohttp.web_exceptions import HTTPServiceUnavailable
 
-from .util.idUtil import getNodeCount
 from .servicenode_lib import getObjectJson, getDomainResponse, getDomainJson
+from .util.nodeUtil import getNodeCount
+
 from . import hsds_logger as log
 
 
diff --git a/hsds/group_dn.py b/hsds/group_dn.py
index 0a6bb937..d67f672e 100755
--- a/hsds/group_dn.py
+++ b/hsds/group_dn.py
@@ -19,7 +19,8 @@
 from aiohttp.web_exceptions import HTTPNotFound, HTTPServiceUnavailable
 from aiohttp.web import json_response
 
-from .util.idUtil import isValidUuid, isSchema2Id, isRootObjId, getRootObjId
+from h5json.objid import isValidUuid, isSchema2Id, isRootObjId, getRootObjId
+
 from .util.domainUtil import isValidBucketName
 from .util.timeUtil import getNow
 from .datanode_lib import get_obj_id, check_metadata_obj, get_metadata_obj
diff --git a/hsds/group_sn.py b/hsds/group_sn.py
index 2b573985..c857683e 100755
--- a/hsds/group_sn.py
+++ b/hsds/group_sn.py
@@ -16,8 +16,9 @@
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPForbidden, HTTPNotFound
 from json import JSONDecodeError
 
+from h5json.objid import isValidUuid
+
 from .util.httpUtil import getHref, jsonResponse, getBooleanParam
-from .util.idUtil import isValidUuid
 from .util.authUtil import getUserPasswordFromRequest, aclCheck
 from .util.authUtil import validateUserPassword
 from .util.domainUtil import getDomainFromRequest, isValidDomain
diff --git a/hsds/headnode.py b/hsds/headnode.py
index 9b49517d..354a17bc 100755
--- a/hsds/headnode.py
+++ b/hsds/headnode.py
@@ -22,7 +22,7 @@
 
 from . import config
 from .util.timeUtil import unixTimeToUTC, elapsedTime
-from .util.idUtil import createNodeId
+from .util.nodeUtil import createNodeId
 from . import hsds_logger as log
 from .util import query_marathon as marathonClient
 
diff --git a/hsds/link_dn.py b/hsds/link_dn.py
index e53984ed..09b3ac20 100755
--- a/hsds/link_dn.py
+++ b/hsds/link_dn.py
@@ -20,7 +20,8 @@
 from aiohttp.web_exceptions import HTTPInternalServerError
 from aiohttp.web import json_response
 
-from .util.idUtil import isValidUuid
+from h5json.objid import isValidUuid
+
 from .util.globparser import globmatch
 from .util.linkUtil import validateLinkName, getLinkClass, isEqualLink
 from .util.domainUtil import isValidBucketName
diff --git a/hsds/link_sn.py b/hsds/link_sn.py
index 71e39246..b7b36ef7 100755
--- a/hsds/link_sn.py
+++ b/hsds/link_sn.py
@@ -16,10 +16,12 @@
 from aiohttp.web_exceptions import HTTPBadRequest
 from json import JSONDecodeError
 
+from h5json.objid import isValidUuid, getCollectionForId
+
+from .util.nodeUtil import getDataNodeUrl
 from .util.httpUtil import getHref, getBooleanParam
 from .util.httpUtil import jsonResponse
 from .util.globparser import globmatch
-from .util.idUtil import isValidUuid, getDataNodeUrl, getCollectionForId
 from .util.authUtil import getUserPasswordFromRequest, validateUserPassword
 from .util.domainUtil import getDomainFromRequest, isValidDomain, verifyRoot
 from .util.domainUtil import getBucketForDomain
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 3d65e619..d2db9d4d 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -22,10 +22,11 @@
 from aiohttp import ClientResponseError
 
 from h5json.array_util import encodeData
+from h5json.objid import getCollectionForId, createObjId, getRootObjId
+from h5json.objid import isSchema2Id, getS3Key, isValidUuid
 
+from .util.nodeUtil import getDataNodeUrl
 from .util.authUtil import getAclKeys
-from .util.idUtil import getDataNodeUrl, getCollectionForId, createObjId, getRootObjId
-from .util.idUtil import isSchema2Id, getS3Key, isValidUuid
 from .util.linkUtil import h5Join, validateLinkName, getLinkClass
 from .util.storUtil import getStorJSONObj, isStorObj
 from .util.authUtil import aclCheck
diff --git a/hsds/util/httpUtil.py b/hsds/util/httpUtil.py
index 0d43ae4a..3ca19f19 100644
--- a/hsds/util/httpUtil.py
+++ b/hsds/util/httpUtil.py
@@ -25,7 +25,8 @@
 from aiohttp.web_exceptions import HTTPRequestEntityTooLarge
 from aiohttp.web_exceptions import HTTPServiceUnavailable, HTTPBadRequest
 from aiohttp.client_exceptions import ClientError
-from hsds.util.idUtil import isValidUuid
+
+from h5json.objid import isValidUuid
 
 from .. import hsds_logger as log
 from .. import config
diff --git a/hsds/util/idUtil.py b/hsds/util/idUtil.py
deleted file mode 100644
index fe21bbb0..00000000
--- a/hsds/util/idUtil.py
+++ /dev/null
@@ -1,540 +0,0 @@
-##############################################################################
-# Copyright by The HDF Group.                                                #
-# All rights reserved.                                                       #
-#                                                                            #
-# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
-# Utilities.  The full HSDS copyright notice, including                      #
-# terms governing use, modification, and redistribution, is contained in     #
-# the file COPYING, which can be found at the root of the source code        #
-# distribution tree.  If you do not have access to this file, you may        #
-# request a copy from help@hdfgroup.org.                                     #
-##############################################################################
-#
-# idUtil:
-# id (uuid) related functions
-#
-
-import os.path
-import hashlib
-import uuid
-from aiohttp.web_exceptions import HTTPServiceUnavailable
-from .. import hsds_logger as log
-
-
-S3_URI = "s3://"
-FILE_URI = "file://"
-AZURE_URI = "blob.core.windows.net/"  # preceded with "https://"
-
-
-def _getStorageProtocol(uri):
-    """ returns 's3://', 'file://', or 'https://...net/' prefix if present.
-    If the prefix is in the form: https://myaccount.blob.core.windows.net/mycontainer
-    (references Azure blob storage), return: https://myaccount.blob.core.windows.net/
-    otherwise None """
-
-    if not uri:
-        protocol = None
-    elif uri.startswith(S3_URI):
-        protocol = S3_URI
-    elif uri.startswith(FILE_URI):
-        protocol = FILE_URI
-    elif uri.startswith("https://") and uri.find(AZURE_URI) > 0:
-        n = uri.find(AZURE_URI) + len(AZURE_URI)
-        protocol = uri[:n]
-    elif uri.find("://") >= 0:
-        raise ValueError(f"storage uri: {uri} not supported")
-    else:
-        protocol = None
-    return protocol
-
-
-def _getBaseName(uri):
-    """ Return the part of the URI after the storage protocol (if any) """
-
-    protocol = _getStorageProtocol(uri)
-    if not protocol:
-        return uri
-    else:
-        return uri[len(protocol):]
-
-
-def getIdHash(id):
-    """Return md5 prefix based on id value"""
-    m = hashlib.new("md5")
-    m.update(id.encode("utf8"))
-    hexdigest = m.hexdigest()
-    return hexdigest[:5]
-
-
-def isSchema2Id(id):
-    """return true if this is a v2 id"""
-    # v1 ids are in the standard UUID format: 8-4-4-4-12
-    # v2 ids are in the non-standard: 8-8-4-6-6
-    parts = id.split("-")
-    if len(parts) != 6:
-        raise ValueError(f"Unexpected id formation for uuid: {id}")
-    if len(parts[2]) == 8:
-        return True
-    else:
-        return False
-
-
-def getIdHexChars(id):
-    """get the hex chars of the given id"""
-    if id[0] == "c":
-        # don't include chunk index
-        index = id.index("_")
-        parts = id[0:index].split("-")
-    else:
-        parts = id.split("-")
-    if len(parts) != 6:
-        raise ValueError(f"Unexpected id format for uuid: {id}")
-    return "".join(parts[1:])
-
-
-def hexRot(ch):
-    """rotate hex character by 8"""
-    return format((int(ch, base=16) + 8) % 16, "x")
-
-
-def isRootObjId(id):
-    """returns true if this is a root id (only for v2 schema)"""
-    if not isSchema2Id(id):
-        raise ValueError("isRootObjId can only be used with v2 ids")
-    validateUuid(id)  # will throw ValueError exception if not a objid
-    if id[0] != "g":
-        return False  # not a group
-    token = getIdHexChars(id)
-    # root ids will have last 16 chars rotated version of the first 16
-    is_root = True
-    for i in range(16):
-        if token[i] != hexRot(token[i + 16]):
-            is_root = False
-            break
-    return is_root
-
-
-def getRootObjId(id):
-    """returns root id for this objid if this is a root id
-    (only for v2 schema)
-    """
-    if isRootObjId(id):
-        return id  # this is the root id
-    token = list(getIdHexChars(id))
-    # root ids will have last 16 chars rotated version of the first 16
-    for i in range(16):
-        token[i + 16] = hexRot(token[i])
-    token = "".join(token)
-    root_id = "g-" + token[0:8] + "-" + token[8:16] + "-" + token[16:20]
-    root_id += "-" + token[20:26] + "-" + token[26:32]
-
-    return root_id
-
-
-def createObjId(obj_type, rootid=None):
-    if obj_type not in ("groups", "datasets", "datatypes", "chunks", "roots"):
-        raise ValueError("unexpected obj_type")
-
-    prefix = None
-    if obj_type == "datatypes":
-        prefix = "t"  # don't collide with datasets
-    elif obj_type == "roots":
-        prefix = "g"  # root obj is a group
-    else:
-        prefix = obj_type[0]
-    if not rootid and obj_type != "roots":
-        # v1 schema - folder
-        objid = prefix + "-" + str(uuid.uuid1())
-    elif rootid and not isSchema2Id(rootid):
-        # v1 schema - domain
-        objid = prefix + "-" + str(uuid.uuid1())
-    else:
-        # schema v2
-        salt = uuid.uuid4().hex
-        # take a hash to randomize the uuid
-        token = list(hashlib.sha256(salt.encode()).hexdigest())
-
-        if rootid:
-            # replace first 16 chars of token with first 16 chars of root id
-            root_hex = getIdHexChars(rootid)
-            token[0:16] = root_hex[0:16]
-        else:
-            # obj_type == "roots"
-            # use only 16 chars, but make it look a 32 char id
-            for i in range(16):
-                token[16 + i] = hexRot(token[i])
-        # format as a string
-        token = "".join(token)
-        objid = prefix + "-" + token[0:8] + "-" + token[8:16] + "-"
-        objid += token[16:20] + "-" + token[20:26] + "-" + token[26:32]
-
-    return objid
-
-
-def getS3Key(id):
-    """Return s3 key for given id.
-
-    For schema v1:
-        A md5 prefix is added to the front of the returned key to better
-        distribute S3 objects.
-    For schema v2:
-        The id is converted to the pattern: "db/{rootid[0:16]}" for rootids and
-        "db/id[0:16]/{prefix}/id[16-32]" for other ids
-        Chunk ids have the chunk index added after the slash:
-        "db/id[0:16]/d/id[16:32]/x_y_z
-
-    For domain id's:
-        Return a key with the .domain suffix and no preceding slash.
-        For non-default buckets, use the format: <bucket_name>/s3_key
-        If the id has a storage specifier ("s3://", "file://", etc.)
-        include that along with the bucket name. e.g.: "s3://mybucket/a_folder/a_file.h5"
-    """
-
-    base_id = _getBaseName(id)  # strip any s3://, etc.
-    if base_id.find("/") > 0:
-        # a domain id
-        domain_suffix = ".domain.json"
-        index = base_id.find("/") + 1
-        key = base_id[index:]
-        if not key.endswith(domain_suffix):
-            if key[-1] != "/":
-                key += "/"
-            key += domain_suffix
-    else:
-        if isSchema2Id(id):
-            # schema v2 id
-            hexid = getIdHexChars(id)
-            prefix = id[0]  # one of g, d, t, c
-            if prefix not in ("g", "d", "t", "c"):
-                raise ValueError(f"Unexpected id: {id}")
-
-            if isRootObjId(id):
-                key = f"db/{hexid[0:8]}-{hexid[8:16]}"
-            else:
-                partition = ""
-                if prefix == "c":
-                    # use 'g' so that chunks will show up under their dataset
-                    s3col = "d"
-                    n = id.find("-")
-                    if n > 1:
-                        # extract the partition index if present
-                        partition = "p" + id[1:n]
-                else:
-                    s3col = prefix
-                key = f"db/{hexid[0:8]}-{hexid[8:16]}/{s3col}/{hexid[16:20]}"
-                key += f"-{hexid[20:26]}-{hexid[26:32]}"
-            if prefix == "c":
-                if partition:
-                    key += "/"
-                    key += partition
-                # add the chunk coordinate
-                index = id.index("_")  # will raise ValueError if not found
-                n = index + 1
-                coord = id[n:]
-                key += "/"
-                key += coord
-            elif prefix == "g":
-                # add key suffix for group
-                key += "/.group.json"
-            elif prefix == "d":
-                # add key suffix for dataset
-                key += "/.dataset.json"
-            else:
-                # add key suffix for datatype
-                key += "/.datatype.json"
-        else:
-            # v1 id
-            # schema v1 id
-            idhash = getIdHash(id)
-            key = f"{idhash}-{id}"
-
-    return key
-
-
-def getObjId(s3key):
-    """Return object id given valid s3key"""
-    if all(
-        (
-            len(s3key) >= 44 and s3key[0:5].isalnum(),
-            len(s3key) >= 44 and s3key[5] == "-",
-            len(s3key) >= 44 and s3key[6] in ("g", "d", "c", "t"),
-        )
-    ):
-        # v1 obj keys
-        objid = s3key[6:]
-    elif s3key.endswith("/.domain.json"):
-        objid = "/" + s3key[: -(len("/.domain.json"))]
-    elif s3key.startswith("db/"):
-        # schema v2 object key
-        parts = s3key.split("/")
-        chunk_coord = ""  # used only for chunk ids
-        partition = ""  # likewise
-        token = []
-        for ch in parts[1]:
-            if ch != "-":
-                token.append(ch)
-
-        if len(parts) == 3:
-            # root id
-            # last part should be ".group.json"
-            if parts[2] != ".group.json":
-                raise ValueError(f"unexpected S3Key: {s3key}")
-            # add 16 more chars using rotated version of first 16
-            for i in range(16):
-                token.append(hexRot(token[i]))
-            prefix = "g"
-        elif len(parts) == 5:
-            # group, dataset, or datatype or chunk
-            for ch in parts[3]:
-                if ch != "-":
-                    token.append(ch)
-
-            if parts[2] == "g" and parts[4] == ".group.json":
-                prefix = "g"  # group json
-            elif parts[2] == "t" and parts[4] == ".datatype.json":
-                prefix = "t"  # datatype json
-            elif parts[2] == "d":
-                if parts[4] == ".dataset.json":
-                    prefix = "d"  # dataset json
-                else:
-                    # chunk object
-                    prefix = "c"
-                    chunk_coord = "_" + parts[4]
-            else:
-                raise ValueError(f"unexpected S3Key: {s3key}")
-        elif len(parts) == 6:
-            # chunk key with partitioning
-            for ch in parts[3]:
-                if ch != "-":
-                    token.append(ch)
-            if parts[2][0] != "d":
-                raise ValueError(f"unexpected S3Key: {s3key}")
-            prefix = "c"
-            partition = parts[4]
-            if partition[0] != "p":
-                raise ValueError(f"unexpected S3Key: {s3key}")
-            partition = partition[1:]  # strip off the p
-            chunk_coord = "_" + parts[5]
-        else:
-            raise ValueError(f"unexpected S3Key: {s3key}")
-
-        token = "".join(token)
-        objid = prefix + partition + "-" + token[0:8] + "-" + token[8:16]
-        objid += "-" + token[16:20] + "-" + token[20:26] + "-"
-        objid += token[26:32] + chunk_coord
-    else:
-        msg = f"unexpected S3Key: {s3key}"
-        log.warn(msg)
-        raise ValueError(msg)
-    return objid
-
-
-def isS3ObjKey(s3key):
-    valid = False
-    try:
-        objid = getObjId(s3key)
-        if objid:
-            valid = True
-    except KeyError:
-        pass  # ignore
-    except ValueError:
-        pass  # ignore
-    return valid
-
-
-def createNodeId(prefix, node_number=None):
-    """Create a random id used to identify nodes"""
-    node_id = ""  # nothing too bad happens if this doesn't get set
-    if node_number is not None:
-        # just make an id based on the node_number
-        hash_key = f"{node_number + 1:03d}"
-    else:
-        # use the container id if we are running inside docker
-        hash_key = getIdHash(str(uuid.uuid1()))
-        proc_file = "/proc/self/cgroup"
-        if os.path.isfile(proc_file):
-            with open(proc_file) as f:
-                first_line = f.readline()
-                if first_line:
-                    fields = first_line.split(":")
-                    if len(fields) >= 3:
-                        field = fields[2]
-                        if field.startswith("/docker/"):
-                            docker_len = len("/docker/")
-
-                            if len(field) > docker_len + 12:
-                                n = docker_len
-                                m = n + 12
-                                node_id = field[n:m]
-
-    if node_id:
-        key = f"{prefix}-{node_id}-{hash_key}"
-    else:
-        key = f"{prefix}-{hash_key}"
-    return key
-
-
-def getCollectionForId(obj_id):
-    """return groups/datasets/datatypes based on id"""
-    if not isinstance(obj_id, str):
-        raise ValueError("invalid object id")
-    collection = None
-    if obj_id.startswith("g-"):
-        collection = "groups"
-    elif obj_id.startswith("d-"):
-        collection = "datasets"
-    elif obj_id.startswith("t-"):
-        collection = "datatypes"
-    else:
-        raise ValueError("not a collection id")
-    return collection
-
-
-def validateUuid(id, obj_class=None):
-    if not isinstance(id, str):
-        raise ValueError("Expected string type")
-    if len(id) < 38:
-        # id should be prefix (e.g. "g-") and uuid value
-        raise ValueError("Unexpected id length")
-    if id[0] not in ("g", "d", "t", "c"):
-        raise ValueError("Unexpected prefix")
-    if id[0] != "c" and id[1] != "-":
-        # chunk ids may have a partition index following the c
-        raise ValueError("Unexpected prefix")
-    if obj_class is not None:
-        obj_class = obj_class.lower()
-        prefix = obj_class[0]
-        if obj_class.startswith("datatype"):
-            prefix = "t"
-        if id[0] != prefix:
-            raise ValueError(f"Unexpected prefix for class: {obj_class}")
-    if id[0] == "c":
-        # trim the type char and any partition id
-        n = id.find("-")
-        if n == -1:
-            raise ValueError("Invalid chunk id")
-
-        # trim the chunk index for chunk ids
-        m = id.find("_")
-        if m == -1:
-            raise ValueError("Invalid chunk id")
-        n += 1
-        id = "c-" + id[n:m]
-    if len(id) != 38:
-        # id should be 36 now
-        raise ValueError("Unexpected id length")
-
-    for ch in id:
-        if ch.isalnum():
-            continue
-        if ch == "-":
-            continue
-        raise ValueError(f"Unexpected character in uuid: {ch}")
-
-
-def isValidUuid(id, obj_class=None):
-    try:
-        validateUuid(id, obj_class)
-        return True
-    except ValueError:
-        return False
-
-
-def isValidChunkId(id):
-    if not isValidUuid(id):
-        return False
-    if id[0] != "c":
-        return False
-    return True
-
-
-def getClassForObjId(id):
-    """return domains/chunks/groups/datasets/datatypes based on id"""
-    if not isinstance(id, str):
-        raise ValueError("Expected string type")
-    if len(id) == 0:
-        raise ValueError("Empty string")
-    if id[0] == "/":
-        return "domains"
-    if isValidChunkId(id):
-        return "chunks"
-    else:
-        return getCollectionForId(id)
-
-
-def isObjId(id):
-    """return true if uuid or domain"""
-    if not isinstance(id, str) or len(id) == 0:
-        return False
-    if id.find("/") > 0:
-        # domain id is any string in the form <bucket_name>/<domain_path>
-        return True
-    return isValidUuid(id)
-
-
-def getUuidFromId(id):
-    """strip off the type prefix ('g-' or 'd-', or 't-')
-    and return the uuid part"""
-    return id[2:]
-
-
-def getObjPartition(id, count):
-    """Get the id of the dn node that should be handling the given obj id"""
-    hash_code = getIdHash(id)
-    hash_value = int(hash_code, 16)
-    number = hash_value % count
-    return number
-
-
-def getNodeNumber(app):
-    if app["node_type"] == "sn":
-        log.error("node number if only for DN nodes")
-        raise ValueError()
-
-    dn_ids = app["dn_ids"]
-    log.debug(f"getNodeNumber(from dn_ids: {dn_ids})")
-    for i in range(len(dn_ids)):
-        dn_id = dn_ids[i]
-        if dn_id == app["id"]:
-            log.debug(f"returning nodeNumber: {i}")
-            return i
-    log.error("getNodeNumber, no matching id")
-    return -1
-
-
-def getNodeCount(app):
-    dn_urls = app["dn_urls"]
-    log.debug(f"getNodeCount for dn_urls: {dn_urls}")
-    dn_node_count = len(dn_urls)
-    return dn_node_count
-
-
-def validateInPartition(app, obj_id):
-    node_number = getNodeNumber(app)
-    node_count = getNodeCount(app)
-    msg = f"obj_id: {obj_id}, node_count: {node_count}, "
-    msg += f"node_number: {node_number}"
-    log.debug(msg)
-    partition_number = getObjPartition(obj_id, node_count)
-    if partition_number != node_number:
-        # The request shouldn't have come to this node'
-        msg = f"wrong node for 'id':{obj_id}, expected node {node_number} "
-        msg += f"got {partition_number}"
-        log.error(msg)
-        raise KeyError(msg)
-
-
-def getDataNodeUrl(app, obj_id):
-    """Return host/port for datanode for given obj_id.
-    Throw exception if service is not ready"""
-    dn_urls = app["dn_urls"]
-    dn_node_count = getNodeCount(app)
-    node_state = app["node_state"]
-    if node_state != "READY" or dn_node_count <= 0:
-        msg = "Service not ready"
-        log.warn(msg)
-        raise HTTPServiceUnavailable()
-    dn_number = getObjPartition(obj_id, dn_node_count)
-    url = dn_urls[dn_number]
-    log.debug(f"got dn_url: {url} for obj_id: {obj_id}")
-    return url
diff --git a/testall.py b/testall.py
index 247d4a91..4123d87f 100755
--- a/testall.py
+++ b/testall.py
@@ -16,7 +16,7 @@
 PYTHON_CMD = "python"  # change to "python3" if "python" invokes python version 2.x
 
 unit_tests = ('chunk_util_test', 'compression_test', 'domain_util_test',
-              'dset_util_test', 'id_util_test', 'lru_cache_test',
+              'dset_util_test',  'lru_cache_test',
               'shuffle_test', 'rangeget_util_test')
 
 integ_tests = ('uptest', 'setup_test', 'domain_test', 'group_test',
diff --git a/tests/integ/vlen_test.py b/tests/integ/vlen_test.py
index 38555c5d..d318c1e7 100755
--- a/tests/integ/vlen_test.py
+++ b/tests/integ/vlen_test.py
@@ -23,7 +23,6 @@ class VlenTest(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super(VlenTest, self).__init__(*args, **kwargs)
         self.base_domain = helper.getTestDomainName(self.__class__.__name__)
-        print(self.base_domain)
         helper.setupDomain(self.base_domain)
         self.endpoint = helper.getEndpoint()
 
@@ -645,12 +644,8 @@ def testPutVLenCompoundBinary(self):
 
         # write as binary data
         data = arrayToBytes(arr)
-        print("data:", data)
-        for i in range(len(data)):
-            print(f"{i:04d}: {data[i]}")
         self.assertEqual(len(data), 192)  # will vary based on count
         arr_copy = bytesToArray(data, dt_compound, (count,))
-        print("arr_copy:", arr_copy)
         req = self.endpoint + "/datasets/" + dset_uuid + "/value"
         rsp = self.session.put(req, data=data, headers=headers_bin_req)
         self.assertEqual(rsp.status_code, 200)
diff --git a/tests/unit/id_util_test.py b/tests/unit/id_util_test.py
deleted file mode 100755
index 06f974c4..00000000
--- a/tests/unit/id_util_test.py
+++ /dev/null
@@ -1,212 +0,0 @@
-##############################################################################
-# Copyright by The HDF Group.                                                #
-# All rights reserved.                                                       #
-#                                                                            #
-# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
-# Utilities.  The full HSDS copyright notice, including                      #
-# terms governing use, modification, and redistribution, is contained in     #
-# the file COPYING, which can be found at the root of the source code        #
-# distribution tree.  If you do not have access to this file, you may        #
-# request a copy from help@hdfgroup.org.                                     #
-##############################################################################
-import unittest
-import sys
-
-sys.path.append("../..")
-from hsds.util.idUtil import getObjPartition, isValidUuid, validateUuid
-from hsds.util.idUtil import createObjId, getCollectionForId
-from hsds.util.idUtil import isObjId, isS3ObjKey, getS3Key, getObjId, isSchema2Id
-from hsds.util.idUtil import isRootObjId, getRootObjId
-
-
-class IdUtilTest(unittest.TestCase):
-    def __init__(self, *args, **kwargs):
-        super(IdUtilTest, self).__init__(*args, **kwargs)
-        # main
-
-    def testCreateObjId(self):
-        id_len = 38  # 36 for uuid plus two for prefix ("g-", "d-")
-        ids = set()
-        for obj_class in ("groups", "datasets", "datatypes", "chunks"):
-            for i in range(100):
-                id = createObjId(obj_class)
-                self.assertEqual(len(id), id_len)
-                self.assertTrue(id[0] in ("g", "d", "t", "c"))
-                self.assertEqual(id[1], "-")
-                ids.add(id)
-
-        self.assertEqual(len(ids), 400)
-        try:
-            createObjId("bad_class")
-            self.assertTrue(False)  # should throw exception
-        except ValueError:
-            pass  # expected
-
-    def testIsValidUuid(self):
-        group1_id = "g-314d61b8-9954-11e6-a733-3c15c2da029e"      # orig schema
-        group2_id = "g-314d61b8-995411e6-a733-3c15c2-da029e"
-        root_id = "g-f9aaa28e-d42e10e5-7122-2a065c-a6986d"
-        dataset1_id = "d-4c48f3ae-9954-11e6-a3cd-3c15c2da029e"    # orig schema
-        dataset2_id = "d-4c48f3ae-995411e6-a3cd-3c15c2-da029e"
-        ctype1_id = "t-8c785f1c-9953-11e6-9bc2-0242ac110005"      # orig schema
-        ctype2_id = "t-8c785f1c-995311e6-9bc2-0242ac-110005"
-        chunk1_id = "c-8c785f1c-9953-11e6-9bc2-0242ac110005_7_2"  # orig schema
-        chunk2_id = "c-8c785f1c-995311e6-9bc2-0242ac-110005_7_2"
-        domain_id = "mybucket/bob/mydata.h5"
-        s3_domain_id = "s3://mybucket/bob/mydata.h5"
-        file_domain_id = "file://mybucket/bob/mydata.h5"
-        azure_domain_id = "https://myaccount.blob.core.windows.net/mybucket/bob/mydata.h5"
-        valid_id_map = {
-            group1_id: "a49be-g-314d61b8-9954-11e6-a733-3c15c2da029e",
-            group2_id: "db/314d61b8-995411e6/g/a733-3c15c2-da029e/.group.json",
-            dataset1_id: "26928-d-4c48f3ae-9954-11e6-a3cd-3c15c2da029e",
-            dataset2_id: "db/4c48f3ae-995411e6/d/a3cd-3c15c2-da029e/.dataset.json",
-            ctype1_id: "5a9cf-t-8c785f1c-9953-11e6-9bc2-0242ac110005",
-            ctype2_id: "db/8c785f1c-995311e6/t/9bc2-0242ac-110005/.datatype.json",
-            chunk1_id: "dc4ce-c-8c785f1c-9953-11e6-9bc2-0242ac110005_7_2",
-            chunk2_id: "db/8c785f1c-995311e6/d/9bc2-0242ac-110005/7_2",
-            domain_id: "bob/mydata.h5/.domain.json",
-            s3_domain_id: "bob/mydata.h5/.domain.json",
-            file_domain_id: "bob/mydata.h5/.domain.json",
-            azure_domain_id: "bob/mydata.h5/.domain.json", }
-
-        bad_ids = ("g-1e76d862", "/bob/mydata.h5")
-
-        self.assertTrue(isValidUuid(group1_id))
-        self.assertFalse(isSchema2Id(group1_id))
-        self.assertTrue(isValidUuid(group1_id, obj_class="Group"))
-        self.assertTrue(isValidUuid(group1_id, obj_class="group"))
-        self.assertTrue(isValidUuid(group1_id, obj_class="groups"))
-        self.assertTrue(isSchema2Id(root_id))
-        self.assertTrue(isValidUuid(root_id, obj_class="Group"))
-        self.assertTrue(isValidUuid(root_id, obj_class="group"))
-        self.assertTrue(isValidUuid(root_id, obj_class="groups"))
-        self.assertTrue(isRootObjId(root_id))
-        self.assertTrue(isValidUuid(dataset1_id, obj_class="datasets"))
-        self.assertFalse(isSchema2Id(dataset1_id))
-        self.assertTrue(isValidUuid(ctype1_id, obj_class="datatypes"))
-        self.assertFalse(isSchema2Id(ctype1_id))
-        self.assertTrue(isValidUuid(chunk1_id, obj_class="chunks"))
-        self.assertFalse(isSchema2Id(chunk1_id))
-        self.assertTrue(isValidUuid(group2_id))
-        self.assertTrue(isSchema2Id(group2_id))
-        self.assertTrue(isValidUuid(group2_id, obj_class="Group"))
-        self.assertTrue(isValidUuid(group2_id, obj_class="group"))
-        self.assertTrue(isValidUuid(group2_id, obj_class="groups"))
-        self.assertFalse(isRootObjId(group2_id))
-        self.assertTrue(isValidUuid(dataset2_id, obj_class="datasets"))
-        self.assertTrue(isSchema2Id(dataset2_id))
-        self.assertTrue(isValidUuid(ctype2_id, obj_class="datatypes"))
-        self.assertTrue(isSchema2Id(ctype2_id))
-        self.assertTrue(isValidUuid(chunk2_id, obj_class="chunks"))
-        self.assertTrue(isSchema2Id(chunk2_id))
-        validateUuid(group1_id)
-        try:
-            isRootObjId(group1_id)
-            self.assertTrue(False)
-        except ValueError:
-            # only works for v2 schema
-            pass  # expected
-
-        for item in valid_id_map:
-            self.assertTrue(isObjId(item))
-            s3key = getS3Key(item)
-            self.assertTrue(s3key[0] != "/")
-            self.assertTrue(isS3ObjKey(s3key))
-            expected = valid_id_map[item]
-            self.assertEqual(s3key, expected)
-            if item.find("/") > 0:
-                continue  # bucket name gets lost when domain ids get converted to s3keys
-            objid = getObjId(s3key)
-            self.assertEqual(objid, item)
-        for item in bad_ids:
-            self.assertFalse(isValidUuid(item))
-            self.assertFalse(isObjId(item))
-
-    def testGetObjPartition(self):
-        node_count = 12
-        for obj_class in ("groups", "datasets", "datatypes", "chunks"):
-            for i in range(100):
-                id = createObjId(obj_class)
-                node_number = getObjPartition(id, node_count)
-                self.assertTrue(node_number >= 0)
-                self.assertTrue(node_number < node_count)
-        # try a domain partition
-        node_number = getObjPartition("/home/test_user1", node_count)
-        self.assertTrue(node_number >= 0)
-        self.assertTrue(node_number < node_count)
-
-    def testGetCollection(self):
-        group_id = "g-314d61b8-9954-11e6-a733-3c15c2da029e"
-        dataset_id = "d-4c48f3ae-9954-11e6-a3cd-3c15c2da029e"
-        ctype_id = "t-8c785f1c-9953-11e6-9bc2-0242ac110005"
-        bad_id = "x-59647858-9954-11e6-95d2-3c15c2da029e"
-        self.assertEqual(getCollectionForId(group_id), "groups")
-        self.assertEqual(getCollectionForId(dataset_id), "datasets")
-        self.assertEqual(getCollectionForId(ctype_id), "datatypes")
-        try:
-            getCollectionForId(bad_id)
-            self.assertTrue(False)
-        except ValueError:
-            pass  # expected
-        try:
-            getCollectionForId(None)
-            self.assertTrue(False)
-        except ValueError:
-            pass  # expected
-
-    def testSchema2Id(self):
-        root_id = createObjId("roots")
-        group_id = createObjId("groups", rootid=root_id)
-        dataset_id = createObjId("datasets", rootid=root_id)
-        ctype_id = createObjId("datatypes", rootid=root_id)
-
-        self.assertEqual(getCollectionForId(root_id), "groups")
-        self.assertEqual(getCollectionForId(group_id), "groups")
-        self.assertEqual(getCollectionForId(dataset_id), "datasets")
-        self.assertEqual(getCollectionForId(ctype_id), "datatypes")
-        chunk_id = "c" + dataset_id[1:] + "_1_2"
-        print(chunk_id)
-        chunk_partition_id = "c42-" + dataset_id[2:] + "_1_2"
-
-        for id in (chunk_id, chunk_partition_id):
-            try:
-                getCollectionForId(id)
-                self.assertTrue(False)
-            except ValueError:
-                pass  # expected
-        valid_ids = (
-            group_id,
-            dataset_id,
-            ctype_id,
-            chunk_id,
-            chunk_partition_id,
-            root_id,
-        )
-        s3prefix = getS3Key(root_id)
-        self.assertTrue(s3prefix.endswith("/.group.json"))
-        s3prefix = s3prefix[: -(len(".group.json"))]
-        for oid in valid_ids:
-            print("oid:", oid)
-            self.assertTrue(len(oid) >= 38)
-            parts = oid.split("-")
-            self.assertEqual(len(parts), 6)
-            self.assertTrue(oid[0] in ("g", "d", "t", "c"))
-            self.assertTrue(isSchema2Id(oid))
-            if oid == root_id:
-                self.assertTrue(isRootObjId(oid))
-            else:
-                self.assertFalse(isRootObjId(oid))
-            self.assertEqual(getRootObjId(oid), root_id)
-
-            s3key = getS3Key(oid)
-            print(s3key)
-            self.assertTrue(s3key.startswith(s3prefix))
-            self.assertEqual(getObjId(s3key), oid)
-            self.assertTrue(isS3ObjKey(s3key))
-
-
-if __name__ == "__main__":
-    # setup test files
-
-    unittest.main()

From e1926c06c536b5f76d3af45dfb58526268a5850f Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 23 Apr 2025 18:30:16 +0200
Subject: [PATCH 06/49] add nodeUtil.py

---
 hsds/util/nodeUtil.py | 122 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 hsds/util/nodeUtil.py

diff --git a/hsds/util/nodeUtil.py b/hsds/util/nodeUtil.py
new file mode 100644
index 00000000..d39f158c
--- /dev/null
+++ b/hsds/util/nodeUtil.py
@@ -0,0 +1,122 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+#
+# nodeUtil:
+# node (SN/DN mapping) related functions
+#
+import hashlib
+import os.path
+import uuid
+
+from aiohttp.web_exceptions import HTTPServiceUnavailable
+
+from .. import hsds_logger as log
+
+
+def _getIdHash(id):
+    """Return md5 prefix based on id value"""
+    m = hashlib.new("md5")
+    m.update(id.encode("utf8"))
+    hexdigest = m.hexdigest()
+    return hexdigest[:5]
+
+def createNodeId(prefix, node_number=None):
+    """Create a random id used to identify nodes"""
+    node_id = ""  # nothing too bad happens if this doesn't get set
+    if node_number is not None:
+        # just make an id based on the node_number
+        hash_key = f"{node_number + 1:03d}"
+    else:
+        # use the container id if we are running inside docker
+        hash_key = _getIdHash(str(uuid.uuid1()))
+        proc_file = "/proc/self/cgroup"
+        if os.path.isfile(proc_file):
+            with open(proc_file) as f:
+                first_line = f.readline()
+                if first_line:
+                    fields = first_line.split(":")
+                    if len(fields) >= 3:
+                        field = fields[2]
+                        if field.startswith("/docker/"):
+                            docker_len = len("/docker/")
+
+                            if len(field) > docker_len + 12:
+                                n = docker_len
+                                m = n + 12
+                                node_id = field[n:m]
+
+    if node_id:
+        key = f"{prefix}-{node_id}-{hash_key}"
+    else:
+        key = f"{prefix}-{hash_key}"
+    return key
+
+
+def getObjPartition(id, count):
+    """Get the id of the dn node that should be handling the given obj id"""
+    hash_code = _getIdHash(id)
+    hash_value = int(hash_code, 16)
+    number = hash_value % count
+    return number
+
+
+def getNodeNumber(app):
+    if app["node_type"] == "sn":
+        log.error("node number if only for DN nodes")
+        raise ValueError()
+
+    dn_ids = app["dn_ids"]
+    log.debug(f"getNodeNumber(from dn_ids: {dn_ids})")
+    for i in range(len(dn_ids)):
+        dn_id = dn_ids[i]
+        if dn_id == app["id"]:
+            log.debug(f"returning nodeNumber: {i}")
+            return i
+    log.error("getNodeNumber, no matching id")
+    return -1
+
+def getNodeCount(app):
+    dn_urls = app["dn_urls"]
+    log.debug(f"getNodeCount for dn_urls: {dn_urls}")
+    dn_node_count = len(dn_urls)
+    return dn_node_count
+
+
+def validateInPartition(app, obj_id):
+    node_number = getNodeNumber(app)
+    node_count = getNodeCount(app)
+    msg = f"obj_id: {obj_id}, node_count: {node_count}, "
+    msg += f"node_number: {node_number}"
+    log.debug(msg)
+    partition_number = getObjPartition(obj_id, node_count)
+    if partition_number != node_number:
+        # The request shouldn't have come to this node'
+        msg = f"wrong node for 'id':{obj_id}, expected node {node_number} "
+        msg += f"got {partition_number}"
+        log.error(msg)
+        raise KeyError(msg)
+
+
+def getDataNodeUrl(app, obj_id):
+    """Return host/port for datanode for given obj_id.
+    Throw exception if service is not ready"""
+    dn_urls = app["dn_urls"]
+    dn_node_count = getNodeCount(app)
+    node_state = app["node_state"]
+    if node_state != "READY" or dn_node_count <= 0:
+        msg = "Service not ready"
+        log.warn(msg)
+        raise HTTPServiceUnavailable()
+    dn_number = getObjPartition(obj_id, dn_node_count)
+    url = dn_urls[dn_number]
+    log.debug(f"got dn_url: {url} for obj_id: {obj_id}")
+    return url

From ae4579ff98d9169acf5557144618212de5f32ca1 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 23 Apr 2025 18:54:06 +0200
Subject: [PATCH 07/49] fix parameter for createObjId call

---
 hsds/servicenode_lib.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index d2db9d4d..80835bcb 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -1076,7 +1076,7 @@ async def createObject(app,
     if creation_props:
         log.debug(f"    cprops: {creation_props}")
 
-    obj_id = createObjId(collection, rootid=root_id)
+    obj_id = createObjId(collection, root_id=root_id)
     log.info(f"new obj id: {obj_id}")
     obj_json = {"id": obj_id, "root": root_id}
     if obj_type:

From d6cad74320b18d491ec2424d8c267849026eb31d Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 23 Apr 2025 19:00:30 +0200
Subject: [PATCH 08/49] fix collection name for use with h5json

---
 hsds/domain_sn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hsds/domain_sn.py b/hsds/domain_sn.py
index efbc31ab..4436db37 100755
--- a/hsds/domain_sn.py
+++ b/hsds/domain_sn.py
@@ -985,7 +985,7 @@ async def PUT_Domain(request):
 
     if not is_folder and not linked_json:
         # create a root group for the new domain
-        root_id = createObjId("roots")
+        root_id = createObjId("groups")
         log.debug(f"new root group id: {root_id}")
         group_json = {"id": root_id, "root": root_id, "domain": domain}
         log.debug(f"create group for domain, body: {group_json}")

From 6add48a9185cb4eb1bc0d2d9d20458fad8c4bd5f Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 23 Apr 2025 19:38:21 +0200
Subject: [PATCH 09/49] use connsistent collection name for isValidUuid

---
 hsds/ctype_dn.py        |  4 ++--
 hsds/ctype_sn.py        |  8 ++++----
 hsds/dset_dn.py         |  8 ++++----
 hsds/group_dn.py        | 10 +++++-----
 hsds/link_dn.py         |  8 ++++----
 hsds/link_sn.py         | 10 +++++-----
 hsds/servicenode_lib.py |  2 +-
 7 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/hsds/ctype_dn.py b/hsds/ctype_dn.py
index fe8a67a7..0d0f83e3 100755
--- a/hsds/ctype_dn.py
+++ b/hsds/ctype_dn.py
@@ -34,7 +34,7 @@ async def GET_Datatype(request):
     params = request.rel_url.query
     ctype_id = get_obj_id(request)
 
-    if not isValidUuid(ctype_id, obj_class="type"):
+    if not isValidUuid(ctype_id, obj_class="datatypes"):
         log.error(f"Unexpected type_id: {ctype_id}")
         raise HTTPInternalServerError()
 
@@ -91,7 +91,7 @@ async def POST_Datatype(request):
         raise HTTPBadRequest(reason=msg)
 
     ctype_id = get_obj_id(request, body=body)
-    if not isValidUuid(ctype_id, obj_class="datatype"):
+    if not isValidUuid(ctype_id, obj_class="datatypes"):
         log.error("Unexpected type_id: {ctype_id}")
         raise HTTPInternalServerError()
 
diff --git a/hsds/ctype_sn.py b/hsds/ctype_sn.py
index d85ffc07..2030b63b 100755
--- a/hsds/ctype_sn.py
+++ b/hsds/ctype_sn.py
@@ -51,7 +51,7 @@ async def GET_Datatype(request):
         include_attrs = True
 
     if ctype_id:
-        if not isValidUuid(ctype_id, "Type"):
+        if not isValidUuid(ctype_id, "datatypes"):
             msg = f"Invalid type id: {ctype_id}"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
@@ -62,7 +62,7 @@ async def GET_Datatype(request):
         group_id = None
         if "grpid" in params:
             group_id = params["grpid"]
-            if not isValidUuid(group_id, "Group"):
+            if not isValidUuid(group_id, "groups"):
                 msg = f"Invalid parent group id: {group_id}"
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
@@ -103,7 +103,7 @@ async def GET_Datatype(request):
         # throws 404 if not found
         kwargs = {"bucket": bucket, "domain": domain}
         ctype_id, domain, _ = await getObjectIdByPath(app, group_id, h5path, **kwargs)
-        if not isValidUuid(ctype_id, "Datatype"):
+        if not isValidUuid(ctype_id, "datatypes"):
             msg = f"No datatype exist with the path: {h5path}"
             log.warn(msg)
             raise HTTPGone()
@@ -273,7 +273,7 @@ async def DELETE_Datatype(request):
         msg = "Missing committed type id"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(ctype_id, "Type"):
+    if not isValidUuid(ctype_id, "datatypes"):
         msg = f"Invalid committed type id: {ctype_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
diff --git a/hsds/dset_dn.py b/hsds/dset_dn.py
index 60d1037b..dd761365 100755
--- a/hsds/dset_dn.py
+++ b/hsds/dset_dn.py
@@ -33,7 +33,7 @@ async def GET_Dataset(request):
     params = request.rel_url.query
     dset_id = get_obj_id(request)
 
-    if not isValidUuid(dset_id, obj_class="dataset"):
+    if not isValidUuid(dset_id, obj_class="datasets"):
         log.error(f"Unexpected dataset_id: {dset_id}")
         raise HTTPInternalServerError()
     if "bucket" in params:
@@ -94,7 +94,7 @@ async def POST_Dataset(request):
         raise HTTPBadRequest(reason=msg)
 
     dset_id = get_obj_id(request, body=body)
-    if not isValidUuid(dset_id, obj_class="dataset"):
+    if not isValidUuid(dset_id, obj_class="datasets"):
         log.error(f"Unexpected dataset_id: {dset_id}")
         raise HTTPInternalServerError()
 
@@ -176,7 +176,7 @@ async def DELETE_Dataset(request):
     dset_id = request.match_info.get("id")
     log.info(f"DELETE dataset: {dset_id}")
 
-    if not isValidUuid(dset_id, obj_class="dataset"):
+    if not isValidUuid(dset_id, obj_class="datasets"):
         log.error(f"Unexpected dataset id: {dset_id}")
         raise HTTPInternalServerError()
 
@@ -220,7 +220,7 @@ async def PUT_DatasetShape(request):
     params = request.rel_url.query
     dset_id = request.match_info.get("id")
 
-    if not isValidUuid(dset_id, obj_class="dataset"):
+    if not isValidUuid(dset_id, obj_class="datasets"):
         log.error(f"Unexpected dset_id: {dset_id}")
         raise HTTPInternalServerError()
 
diff --git a/hsds/group_dn.py b/hsds/group_dn.py
index d67f672e..8a4f10f8 100755
--- a/hsds/group_dn.py
+++ b/hsds/group_dn.py
@@ -47,7 +47,7 @@ async def GET_Group(request):
 
     log.info(f"GET group: {group_id} bucket: {bucket}")
 
-    if not isValidUuid(group_id, obj_class="group"):
+    if not isValidUuid(group_id, obj_class="groups"):
         log.error(f"Unexpected group_id: {group_id}")
         raise HTTPInternalServerError()
 
@@ -100,7 +100,7 @@ async def POST_Group(request):
     group_id = get_obj_id(request, body=body)
 
     log.info(f"POST group: {group_id} bucket: {bucket}")
-    if not isValidUuid(group_id, obj_class="group"):
+    if not isValidUuid(group_id, obj_class="groups"):
         log.error(f"Unexpected group_id: {group_id}")
         raise HTTPInternalServerError()
     if "root" not in body:
@@ -116,7 +116,7 @@ async def POST_Group(request):
 
     root_id = body["root"]
 
-    if not isValidUuid(root_id, obj_class="group"):
+    if not isValidUuid(root_id, obj_class="groups"):
         msg = "Invalid root_id: " + root_id
         log.error(msg)
         raise HTTPInternalServerError()
@@ -179,7 +179,7 @@ async def PUT_Group(request):
     # don't really need bucket param since the dirty ids know which bucket
     # they should write too
 
-    if not isValidUuid(root_id, obj_class="group"):
+    if not isValidUuid(root_id, obj_class="groups"):
         log.error(f"Unexpected group_id: {root_id}")
         raise HTTPInternalServerError()
 
@@ -248,7 +248,7 @@ async def DELETE_Group(request):
     params = request.rel_url.query
     group_id = get_obj_id(request)
 
-    if not isValidUuid(group_id, obj_class="group"):
+    if not isValidUuid(group_id, obj_class="groups"):
         log.error(f"Unexpected group_id: {group_id}")
         raise HTTPInternalServerError()
 
diff --git a/hsds/link_dn.py b/hsds/link_dn.py
index 09b3ac20..1ad6133e 100755
--- a/hsds/link_dn.py
+++ b/hsds/link_dn.py
@@ -75,7 +75,7 @@ async def GET_Links(request):
     log.debug(f"GET_Links params: {params}")
     group_id = get_obj_id(request)
     log.info(f"GET links: {group_id}")
-    if not isValidUuid(group_id, obj_class="group"):
+    if not isValidUuid(group_id, obj_class="groups"):
         log.error(f"Unexpected group_id: {group_id}")
         raise HTTPInternalServerError()
 
@@ -171,7 +171,7 @@ async def POST_Links(request):
     group_id = get_obj_id(request)
     log.info(f"POST_Links: {group_id}")
 
-    if not isValidUuid(group_id, obj_class="group"):
+    if not isValidUuid(group_id, obj_class="groups"):
         log.error(f"Unexpected group_id: {group_id}")
         raise HTTPInternalServerError()
 
@@ -286,7 +286,7 @@ async def PUT_Links(request):
     group_id = get_obj_id(request)
     log.info(f"PUT links: {group_id}")
 
-    if not isValidUuid(group_id, obj_class="group"):
+    if not isValidUuid(group_id, obj_class="groups"):
         log.error(f"Unexpected group_id: {group_id}")
         raise HTTPInternalServerError()
 
@@ -405,7 +405,7 @@ async def DELETE_Links(request):
     group_id = get_obj_id(request)
     log.info(f"DELETE links: {group_id}")
 
-    if not isValidUuid(group_id, obj_class="group"):
+    if not isValidUuid(group_id, obj_class="groups"):
         msg = f"Unexpected group_id: {group_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
diff --git a/hsds/link_sn.py b/hsds/link_sn.py
index b7b36ef7..dc80d9e4 100755
--- a/hsds/link_sn.py
+++ b/hsds/link_sn.py
@@ -45,7 +45,7 @@ async def GET_Links(request):
         msg = "Missing group id"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(group_id, obj_class="Group"):
+    if not isValidUuid(group_id, obj_class="groups"):
         msg = f"Invalid group id: {group_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
@@ -177,7 +177,7 @@ async def GET_Link(request):
         msg = "Missing group id"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(group_id, obj_class="Group"):
+    if not isValidUuid(group_id, obj_class="groups"):
         msg = f"Invalid group id: {group_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
@@ -495,7 +495,7 @@ async def DELETE_Links(request):
         msg = "Missing group id"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(group_id, obj_class="Group"):
+    if not isValidUuid(group_id, obj_class="groups"):
         msg = f"Invalid group id: {group_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
@@ -642,7 +642,7 @@ async def POST_Links(request):
 
     # do a check that everything is as it should with the item list
     for group_id in items:
-        if not isValidUuid(group_id, obj_class="Group"):
+        if not isValidUuid(group_id, obj_class="groups"):
             msg = f"Invalid group id: {group_id}"
             log.warn(msg)
 
@@ -749,7 +749,7 @@ async def DELETE_Link(request):
         msg = "Missing group id"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(group_id, obj_class="Group"):
+    if not isValidUuid(group_id, obj_class="groups"):
         msg = f"Invalid group id: {group_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 80835bcb..bb40620a 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -527,7 +527,7 @@ async def putLinks(app, group_id, items, bucket=None):
     """ create a new links.  Return 201 if any item is a new link,
     or 200 if it's a duplicate of an existing link. """
 
-    isValidUuid(group_id, obj_class="group")
+    isValidUuid(group_id, obj_class="groups")
     group_json = None
 
     # validate input

From b13321cd97bb5e81efb63958dd5cbd2b8e4d826b Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 23 Apr 2025 19:48:00 +0200
Subject: [PATCH 10/49] fix flake8 format errors

---
 hsds/basenode.py         | 1 -
 hsds/util/nodeUtil.py    | 2 ++
 tests/integ/vlen_test.py | 2 --
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/hsds/basenode.py b/hsds/basenode.py
index 6dd83b64..1b1bbbd0 100644
--- a/hsds/basenode.py
+++ b/hsds/basenode.py
@@ -26,7 +26,6 @@
 from aiohttp.web_exceptions import HTTPServiceUnavailable
 
 
-
 from . import config
 from .util.httpUtil import http_get, http_post, jsonResponse
 from .util.authUtil import getUserPasswordFromRequest, validateUserPassword
diff --git a/hsds/util/nodeUtil.py b/hsds/util/nodeUtil.py
index d39f158c..8f67f400 100644
--- a/hsds/util/nodeUtil.py
+++ b/hsds/util/nodeUtil.py
@@ -29,6 +29,7 @@ def _getIdHash(id):
     hexdigest = m.hexdigest()
     return hexdigest[:5]
 
+
 def createNodeId(prefix, node_number=None):
     """Create a random id used to identify nodes"""
     node_id = ""  # nothing too bad happens if this doesn't get set
@@ -84,6 +85,7 @@ def getNodeNumber(app):
     log.error("getNodeNumber, no matching id")
     return -1
 
+
 def getNodeCount(app):
     dn_urls = app["dn_urls"]
     log.debug(f"getNodeCount for dn_urls: {dn_urls}")
diff --git a/tests/integ/vlen_test.py b/tests/integ/vlen_test.py
index d318c1e7..d3d44ab5 100755
--- a/tests/integ/vlen_test.py
+++ b/tests/integ/vlen_test.py
@@ -13,7 +13,6 @@
 import json
 import helper
 import numpy as np
-import sys
 
 from h5json.hdf5dtype import createDataType
 from h5json.array_util import arrayToBytes, bytesToArray
@@ -645,7 +644,6 @@ def testPutVLenCompoundBinary(self):
         # write as binary data
         data = arrayToBytes(arr)
         self.assertEqual(len(data), 192)  # will vary based on count
-        arr_copy = bytesToArray(data, dt_compound, (count,))
         req = self.endpoint + "/datasets/" + dset_uuid + "/value"
         rsp = self.session.put(req, data=data, headers=headers_bin_req)
         self.assertEqual(rsp.status_code, 200)

From fee9390ee6fd0bc60275deac8c69ec5c31847ddc Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 23 Apr 2025 19:59:40 +0200
Subject: [PATCH 11/49] fix flake8 error in testall

---
 testall.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testall.py b/testall.py
index 4123d87f..480ab693 100755
--- a/testall.py
+++ b/testall.py
@@ -16,7 +16,7 @@
 PYTHON_CMD = "python"  # change to "python3" if "python" invokes python version 2.x
 
 unit_tests = ('chunk_util_test', 'compression_test', 'domain_util_test',
-              'dset_util_test',  'lru_cache_test',
+              'dset_util_test', 'lru_cache_test',
               'shuffle_test', 'rangeget_util_test')
 
 integ_tests = ('uptest', 'setup_test', 'domain_test', 'group_test',

From f1b1cabddad9a19e93f75c597e17bbc49eb824c8 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 23 Apr 2025 20:17:52 +0200
Subject: [PATCH 12/49] use h5json for unit test id

---
 tests/unit/lru_cache_test.py | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/tests/unit/lru_cache_test.py b/tests/unit/lru_cache_test.py
index 5e747c92..002ca822 100755
--- a/tests/unit/lru_cache_test.py
+++ b/tests/unit/lru_cache_test.py
@@ -14,9 +14,16 @@
 import sys
 import numpy as np
 
+from h5json.objid import createObjId
+
 sys.path.append("../..")
 from hsds.util.lruCache import LruCache
-from hsds.util.idUtil import createObjId
+
+
+def _createId():
+    objid = createObjId("groups")
+    objid = 'c' + objid[1:]  # fake a chunk id
+    return objid
 
 
 class LruCacheTest(unittest.TestCase):
@@ -34,7 +41,7 @@ def testSimple(self):
 
         self.assertFalse("xyz" in cc)
 
-        id = createObjId("chunks")
+        id = _createId()
         try:
             # only dict objects can be added
             cc[id] = list(range(20))
@@ -42,7 +49,7 @@ def testSimple(self):
         except TypeError:
             pass  # expected
 
-        rand_id = createObjId("chunks")
+        rand_id = _createId()
         np_arr = np.random.random((500, 500))  # smaller than our chunk cache size
         cc[rand_id] = np_arr  # add to cache
         cc.consistencyCheck()
@@ -104,7 +111,7 @@ def testLRU(self):
         ids = []
         # add chunks to the cache
         for i in range(10):
-            id = createObjId("chunks")
+            id = _createId()
             ids.append(id)
             arr = np.empty((16, 16), dtype="i4")  # 1024 bytes
             arr[...] = i
@@ -165,7 +172,7 @@ def testClearCache(self):
         ids = []
         # add chunks to the cache
         for i in range(10):
-            id = createObjId("chunks")
+            id = _createId()
             ids.append(id)
             arr = np.empty((16, 16), dtype="i4")  # 1024 bytes
             arr[...] = i
@@ -190,7 +197,7 @@ def testMemUtil(self):
         self.assertEqual(len(cc), 0)
         ids = set()
         for i in range(10):
-            id = createObjId("chunks")
+            id = _createId()
             ids.add(id)
             arr = np.empty((16, 16), dtype="i4")  # 1024 bytes
             arr[...] = i
@@ -208,7 +215,7 @@ def testMemUtil(self):
 
         # add 10 more chunks, but set dirty to true each time
         for i in range(10):
-            id = createObjId("chunks")
+            id = _createId()
             ids.add(id)
             arr = np.empty((16, 16), dtype="i4")  # 1024 bytes
             arr[...] = i
@@ -255,7 +262,7 @@ def testMetaDataCache(self):
 
         data = {"x": 123, "y": 456}
 
-        rand_id = createObjId("groups")
+        rand_id = _createId()
         data = {"foo": "bar"}
         cc[rand_id] = data  # add to cache
         cc.consistencyCheck()

From 5dc3f761adbf528913a9eec5499fc937f597cc1e Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 24 Apr 2025 21:16:55 +0200
Subject: [PATCH 13/49] restrict version on numcodecs

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 33ab5dd5..3f1dc4de 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,7 +44,7 @@ dependencies = [
     "h5py >= 3.6.0",
     "h5json@git+https://github.com/HDFGroup/hdf5-json@abstract",
     "importlib_resources",
-    "numcodecs",
+    "numcodecs <= 0.15.1",
     "numpy >=2.0.0rc1; python_version>='3.9'",
     "psutil",
     "pyjwt",

From fb17e1052dcd6c7bae9b032851df56985984b223 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 30 Apr 2025 19:57:32 +0200
Subject: [PATCH 14/49] allow client to generate obj ids

---
 hsds/ctype_dn.py             |   5 +-
 hsds/ctype_sn.py             |   9 +++
 hsds/dset_dn.py              |   5 +-
 hsds/dset_sn.py              |  10 ++-
 hsds/group_dn.py             |   5 +-
 hsds/group_sn.py             |   8 ++
 hsds/servicenode_lib.py      |  25 +++++-
 tests/integ/dataset_test.py  | 107 ++++++++++++++++++++++++++
 tests/integ/datatype_test.py |  51 ++++++++++++
 tests/integ/group_test.py    | 145 ++++++++++++++++++++++++++++++++++-
 10 files changed, 358 insertions(+), 12 deletions(-)

diff --git a/hsds/ctype_dn.py b/hsds/ctype_dn.py
index 0d0f83e3..b63d0a4d 100755
--- a/hsds/ctype_dn.py
+++ b/hsds/ctype_dn.py
@@ -98,8 +98,9 @@ async def POST_Datatype(request):
     # verify the id doesn't already exist
     obj_found = await check_metadata_obj(app, ctype_id, bucket=bucket)
     if obj_found:
-        log.error(f"Post with existing type_id: {ctype_id}")
-        raise HTTPInternalServerError()
+        msg = f"Post with existing type_id: {ctype_id}"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
 
     root_id = None
 
diff --git a/hsds/ctype_sn.py b/hsds/ctype_sn.py
index 2030b63b..dfd026f1 100755
--- a/hsds/ctype_sn.py
+++ b/hsds/ctype_sn.py
@@ -211,7 +211,12 @@ async def POST_Datatype(request):
 
     parent_id = None
     link_title = None
+    obj_id = None
     h5path = None
+    if "id" in body:
+        obj_id = body["id"]
+        log.debug(f"POST datatype using client id: {obj_id}")
+
     if "link" in body:
         if "h5path" in body:
             msg = "link can't be used with h5path"
@@ -220,6 +225,7 @@ async def POST_Datatype(request):
         link_body = body["link"]
         if "id" in link_body:
             parent_id = link_body["id"]
+
         if "name" in link_body:
             link_title = link_body["name"]
             try:
@@ -243,6 +249,9 @@ async def POST_Datatype(request):
 
     # setup args to createObject
     kwargs = {"bucket": bucket, "obj_type": datatype}
+    if obj_id:
+        kwargs["obj_id"] = obj_id
+
     # TBD: creation props for datatype obj?
     if parent_id:
         kwargs["parent_id"] = parent_id
diff --git a/hsds/dset_dn.py b/hsds/dset_dn.py
index dd761365..159d6b63 100755
--- a/hsds/dset_dn.py
+++ b/hsds/dset_dn.py
@@ -101,8 +101,9 @@ async def POST_Dataset(request):
     # verify the id doesn't already exist
     obj_found = await check_metadata_obj(app, dset_id, bucket=bucket)
     if obj_found:
-        log.error("Post with existing dset_id: {}".format(dset_id))
-        raise HTTPInternalServerError()
+        msg = f"Post with existing dset_id: {dset_id}"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
 
     if "root" not in body:
         msg = "POST_Dataset with no root"
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 77e85db0..0c0f4619 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -1098,11 +1098,16 @@ async def POST_Dataset(request):
             log.debug(f"setting filters to: {f_out}")
             creationProperties["filters"] = f_out
 
-        log.debug(f"set dataset json creationPropries: {creationProperties}")
+        log.debug(f"set dataset json creationProperties: {creationProperties}")
 
     parent_id = None
+    obj_id = None
     link_title = None
     h5path = None
+    if "id" in body:
+        obj_id = body["id"]
+        log.debug(f"POST dataset using client id: {obj_id}")
+
     if "link" in body:
         if "h5path" in body:
             msg = "link can't be used with h5path"
@@ -1111,6 +1116,7 @@ async def POST_Dataset(request):
         link_body = body["link"]
         if "id" in link_body:
             parent_id = link_body["id"]
+
         if "name" in link_body:
             link_title = link_body["name"]
             try:
@@ -1134,6 +1140,8 @@ async def POST_Dataset(request):
 
     # setup args to createObject
     kwargs = {"bucket": bucket, "obj_type": datatype, "obj_shape": shape_json}
+    if obj_id:
+        kwargs["obj_id"] = obj_id
     if creationProperties:
         kwargs["creation_props"] = creationProperties
     if layout:
diff --git a/hsds/group_dn.py b/hsds/group_dn.py
index 8a4f10f8..db146a62 100755
--- a/hsds/group_dn.py
+++ b/hsds/group_dn.py
@@ -111,8 +111,9 @@ async def POST_Group(request):
     # verify the id doesn't already exist
     obj_found = await check_metadata_obj(app, group_id, bucket=bucket)
     if obj_found:
-        log.error(f"Post with existing group_id: {group_id}")
-        raise HTTPInternalServerError()
+        msg = f"Post with existing group_id: {group_id}"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
 
     root_id = body["root"]
 
diff --git a/hsds/group_sn.py b/hsds/group_sn.py
index c857683e..68f5fab3 100755
--- a/hsds/group_sn.py
+++ b/hsds/group_sn.py
@@ -185,6 +185,7 @@ async def POST_Group(request):
     implicit = getBooleanParam(params, "implicit")
 
     parent_id = None
+    obj_id = None
     h5path = None
     creation_props = None
 
@@ -227,11 +228,16 @@ async def POST_Group(request):
                     parent_id = root_id
                 else:
                     parent_id = body["parent_id"]
+            if "id" in body:
+                obj_id = body["id"]
+                log.debug(f"POST group using client id: {obj_id}")
             if "creationProperties" in body:
                 creation_props = body["creationProperties"]
 
     if parent_id:
         kwargs = {"bucket": bucket, "parent_id": parent_id, "h5path": h5path}
+        if obj_id:
+            kwargs["obj_id"] = obj_id
         if creation_props:
             kwargs["creation_props"] = creation_props
         if implicit:
@@ -240,6 +246,8 @@ async def POST_Group(request):
     else:
         # create an anonymous group
         kwargs = {"bucket": bucket, "root_id": root_id}
+        if obj_id:
+            kwargs["obj_id"] = obj_id
         if creation_props:
             kwargs["creation_props"] = creation_props
         group_json = await createObject(app, **kwargs)
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index bb40620a..2bb3919e 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -1048,6 +1048,7 @@ async def deleteObject(app, obj_id, bucket=None):
 
 async def createObject(app,
                        root_id=None,
+                       obj_id=None,
                        obj_type=None,
                        obj_shape=None,
                        layout=None,
@@ -1076,7 +1077,18 @@ async def createObject(app,
     if creation_props:
         log.debug(f"    cprops: {creation_props}")
 
-    obj_id = createObjId(collection, root_id=root_id)
+    if obj_id:
+        log.debug(f"using client supplied id: {obj_id}")
+        if not isValidUuid(obj_id, obj_class=collection):
+            msg = f"invalid id: {obj_id}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        if getRootObjId(obj_id) != root_id:
+            msg = f"id: {obj_id} is not valid for root: {root_id}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+    else:
+        obj_id = createObjId(collection, root_id=root_id)
     log.info(f"new obj id: {obj_id}")
     obj_json = {"id": obj_id, "root": root_id}
     if obj_type:
@@ -1098,6 +1110,7 @@ async def createObject(app,
 
 async def createObjectByPath(app,
                              parent_id=None,
+                             obj_id=None,
                              h5path=None,
                              implicit=False,
                              obj_type=None,
@@ -1118,6 +1131,8 @@ async def createObjectByPath(app,
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
     log.debug(f"createObjectByPath - parent_id: {parent_id}, h5path: {h5path}")
+    if obj_id:
+        log.debug(f"createObjectByPath using client id: {obj_id}")
 
     root_id = getRootObjId(parent_id)
 
@@ -1196,11 +1211,13 @@ async def createObjectByPath(app,
                     kwargs["layout"] = layout
                 if creation_props:
                     kwargs["creation_props"] = creation_props
+                if obj_id:
+                    kwargs["obj_id"] = obj_id
             obj_json = await createObject(app, **kwargs)
-            obj_id = obj_json["id"]
+            tgt_id = obj_json["id"]
             # create a link to the new object
-            await putHardLink(app, parent_id, link_title, tgt_id=obj_id, bucket=bucket)
-            parent_id = obj_id  # new parent
+            await putHardLink(app, parent_id, link_title, tgt_id=tgt_id, bucket=bucket)
+            parent_id = tgt_id  # new parent
     log.info(f"createObjectByPath {h5path} done, returning obj_json")
 
     return obj_json
diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py
index 280877cf..958b6552 100755
--- a/tests/integ/dataset_test.py
+++ b/tests/integ/dataset_test.py
@@ -13,6 +13,9 @@
 import json
 import time
 import numpy as np
+
+from h5json.objid import createObjId
+
 import helper
 import config
 
@@ -190,6 +193,110 @@ def testScalarDataset(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 410)
 
+    def testPostDatasetWithId(self):
+        # Test creation of a dataset obj with client creating obj id
+        domain = self.base_domain + "/testPostDatasetWithId.h5"
+        helper.setupDomain(domain)
+        print("testPostDatasetWithId", domain)
+        headers = helper.getRequestHeaders(domain=domain)
+        req = self.endpoint + "/"
+
+        # Get root uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # make a new dataset id
+        dset_id = createObjId("datasets", root_id=root_uuid)
+
+        # create a dataset obj
+        data = {"id": dset_id, "type": "H5T_IEEE_F32LE", "shape": "H5S_SCALAR"}
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["attributeCount"], 0)
+        self.assertEqual(rspJson["id"], dset_id)
+
+        # read back the obj
+        req = self.endpoint + "/datasets/" + dset_id
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+
+        expected_keys = [
+            "id",
+            "shape",
+            "hrefs",
+            "layout",
+            "creationProperties",
+            "attributeCount",
+            "created",
+            "lastModified",
+            "root",
+            "domain",
+        ]
+
+        for name in expected_keys:
+            self.assertTrue(name in rspJson)
+        self.assertEqual(rspJson["id"], dset_id)
+        self.assertEqual(rspJson["root"], root_uuid)
+        self.assertEqual(rspJson["domain"], domain)
+        self.assertEqual(rspJson["attributeCount"], 0)
+        shape_json = rspJson["shape"]
+        self.assertTrue(shape_json["class"], "H5S_SCALAR")
+        self.assertTrue(rspJson["type"], "H5T_IEEE_F32LE")
+
+        # Get the type
+        rsp = self.session.get(req + "/type", headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("type" in rspJson)
+        self.assertTrue(rspJson["type"], "H5T_IEEE_F32LE")
+        self.assertTrue("hrefs" in rspJson)
+        hrefs = rspJson["hrefs"]
+        self.assertEqual(len(hrefs), 3)
+
+        # Get the shape
+        rsp = self.session.get(req + "/shape", headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("created" in rspJson)
+        self.assertTrue("lastModified" in rspJson)
+        self.assertTrue("hrefs" in rspJson)
+        self.assertTrue("shape" in rspJson)
+        shape_json = rspJson["shape"]
+        self.assertTrue(shape_json["class"], "H5S_SCALAR")
+
+        # try getting verbose info
+        params = {"verbose": 1}
+        rsp = self.session.get(req, params=params, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+
+        for name in expected_keys:
+            self.assertTrue(name in rspJson)
+
+        # flush to storage and force an immediate rescan
+        domain_req = self.endpoint + "/"
+        domain_params = {"flush": 1, "rescan": 1}
+        rsp = self.session.put(domain_req, params=domain_params, headers=headers)
+        # should get a NO_CONTENT code,
+        self.assertEqual(rsp.status_code, 204)
+
+        # do a get and verify the additional keys are present
+        expected_keys.append("num_chunks")
+        expected_keys.append("allocated_size")
+
+        rsp = self.session.get(req, params=params, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+
+        for name in expected_keys:
+            self.assertTrue(name in rspJson)
+
     def testScalarEmptyDimsDataset(self):
         # Test creation/deletion of scalar dataset obj
         domain = self.base_domain + "/testScalarEmptyDimsDataset.h5"
diff --git a/tests/integ/datatype_test.py b/tests/integ/datatype_test.py
index f3f2d1a9..93c0b3d5 100755
--- a/tests/integ/datatype_test.py
+++ b/tests/integ/datatype_test.py
@@ -11,6 +11,9 @@
 ##############################################################################
 import unittest
 import json
+
+from h5json.objid import createObjId
+
 import helper
 import config
 
@@ -120,6 +123,54 @@ def testCommittedType(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 410)
 
+    def testPostdTypeWithId(self):
+        # Test creation/deletion of datatype obj
+
+        print("testPostTypeWithId", self.base_domain)
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+        req = self.endpoint + "/"
+
+        # Get root uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # create a datatype id
+        ctype_id = createObjId("datatypes", root_id=root_uuid)
+
+        # create a committed type obj
+        data = {"id": ctype_id, "type": "H5T_IEEE_F32LE"}
+        req = self.endpoint + "/datatypes"
+        rsp = self.session.post(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["attributeCount"], 0)
+        self.assertEqual(rspJson["id"], ctype_id)
+        self.assertTrue("type" in rspJson)
+        type_json = rspJson["type"]
+        self.assertEqual(type_json["class"], "H5T_FLOAT")
+        self.assertEqual(type_json["base"], "H5T_IEEE_F32LE")
+
+        # read back the obj
+        req = self.endpoint + "/datatypes/" + ctype_id
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("id" in rspJson)
+        self.assertEqual(rspJson["id"], ctype_id)
+        self.assertTrue("root" in rspJson)
+        self.assertEqual(rspJson["root"], root_uuid)
+        self.assertTrue("created" in rspJson)
+        self.assertTrue("lastModified" in rspJson)
+        self.assertTrue("attributeCount" in rspJson)
+        self.assertEqual(rspJson["attributeCount"], 0)
+        self.assertTrue("type" in rspJson)
+        type_json = rspJson["type"]
+        self.assertEqual(type_json["class"], "H5T_FLOAT")
+        self.assertEqual(type_json["base"], "H5T_IEEE_F32LE")
+
     def testPostTypes(self):
         # Test creation with all primitive types
 
diff --git a/tests/integ/group_test.py b/tests/integ/group_test.py
index 7a832271..6c154836 100755
--- a/tests/integ/group_test.py
+++ b/tests/integ/group_test.py
@@ -13,6 +13,9 @@
 import time
 import json
 import uuid
+
+from h5json.objid import createObjId
+
 import helper
 import config
 
@@ -233,6 +236,39 @@ def testPost(self):
         rsp = self.session.post(req, headers=headers)
         self.assertEqual(rsp.status_code, 403)  # forbidden
 
+    def testPostId(self):
+        # test POST group
+        print("testPostId", self.base_domain)
+        endpoint = helper.getEndpoint()
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+        req = endpoint + "/groups"
+
+        # get root id
+        req = helper.getEndpoint() + "/"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # create a group id
+        grp_id = createObjId("groups", root_id=root_uuid)
+
+        # create a new group using the grp_id
+        payload = {"id": grp_id}
+        req = helper.getEndpoint() + "/groups"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], 0)
+        self.assertEqual(rspJson["attributeCount"], 0)
+        self.assertEqual(grp_id, rspJson["id"])
+
+        # try sending the same request again
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 400)  # bad request
+
     def testPostWithLink(self):
         # test POST with link creation
         print("testPostWithLink", self.base_domain)
@@ -310,6 +346,32 @@ def testPostWithLink(self):
         self.assertTrue("alias" in rspJson)
         self.assertEqual(rspJson["alias"], ["/linked_group",])
 
+    def testPostIdWithLink(self):
+        # test POST with link creation
+        print("testPostIdWithLink", self.base_domain)
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+
+        # get root id
+        req = helper.getEndpoint() + "/"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # create a group id
+        grp_id = createObjId("groups", root_id=root_uuid)
+
+        # create new group
+        payload = {"id": grp_id, "link": {"id": root_uuid, "name": "linked_group"}}
+        req = helper.getEndpoint() + "/groups"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], 0)
+        self.assertEqual(rspJson["attributeCount"], 0)
+        self.assertEqual(grp_id, rspJson["id"])
+
     def testPostWithPath(self):
         # test POST with implicit parent group creation
         print("testPostWithPath", self.base_domain)
@@ -427,9 +489,90 @@ def testPostWithPath(self):
         rsp = self.session.get(req, headers=headers, params=params)
         self.assertEqual(rsp.status_code, 200)
 
+    def testPostIdWithPath(self):
+        # test POST with implicit parent group creation
+        print("testPostIdWithPath", self.base_domain)
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+
+        # get root id
+        req = helper.getEndpoint() + "/"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # get root group and verify link count is 0
+        req = helper.getEndpoint() + "/groups/" + root_uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], 0)
+
+        # create new group with link path: /g1
+        g1_id = createObjId("groups", root_id=root_uuid)
+        payload = {"id": g1_id, "h5path": "g1"}
+        req = helper.getEndpoint() + "/groups"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], 0)
+        self.assertEqual(rspJson["attributeCount"], 0)
+        self.assertEqual(rspJson["id"], g1_id)
+
+        # get root group and verify link count is 1
+        req = helper.getEndpoint() + "/groups/" + root_uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], 1)
+
+        # get the group at "g1"
+        req = helper.getEndpoint() + "/groups/"
+        params = {"h5path": "/g1"}
+        rsp = self.session.get(req, headers=headers, params=params)
+        self.assertEqual(rsp.status_code, 200)
+
+        # try creating new group with link path: /g2/g2.1
+        g21_id = createObjId("groups", root_id=root_uuid)
+        payload = {"id": g21_id, "h5path": "g2/g2.1"}
+        req = helper.getEndpoint() + "/groups"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 404)  # g2 not found
+
+        # try again with implicit creation set
+        params = {"implicit": 1}
+        rsp = self.session.post(req, data=json.dumps(payload), params=params, headers=headers)
+        self.assertEqual(rsp.status_code, 201)  # g2 and g2.1 created
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], 0)
+        self.assertEqual(rspJson["attributeCount"], 0)
+        self.assertEqual(rspJson["id"], g21_id)
+
+        # get root group and verify link count is 2
+        req = helper.getEndpoint() + "/groups/" + root_uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], 2)
+
+        # get the group at "/g2"
+        req = helper.getEndpoint() + "/groups/"
+        params = {"h5path": "/g2"}
+        rsp = self.session.get(req, headers=headers, params=params)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], 1)  # group g2.1
+
+        # get the group at "/g2/g2.1"
+        req = helper.getEndpoint() + "/groups/"
+        params = {"h5path": "/g2/g2.1"}
+        rsp = self.session.get(req, headers=headers, params=params)
+        self.assertEqual(rsp.status_code, 200)
+
     def testPostWithCreationProps(self):
         # test POST group with creation properties
-        print("testPost", self.base_domain)
+        print("testPostWithCreationProps", self.base_domain)
         endpoint = helper.getEndpoint()
         headers = helper.getRequestHeaders(domain=self.base_domain)
         req = endpoint + "/groups"

From 3be18a08d691e614df06a3e3b2a0404f0029ef9a Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 7 May 2025 13:24:22 +0200
Subject: [PATCH 15/49] enable attributes to be included with POST req

---
 hsds/attr_sn.py              | 262 ++---------------------------------
 hsds/ctype_dn.py             |  13 +-
 hsds/ctype_sn.py             |   8 ++
 hsds/dset_dn.py              |  13 +-
 hsds/dset_sn.py              |   8 ++
 hsds/group_dn.py             |  13 +-
 hsds/group_sn.py             |   8 ++
 hsds/servicenode_lib.py      | 251 ++++++++++++++++++++++++++++++++-
 tests/integ/dataset_test.py  |  38 +++++
 tests/integ/datatype_test.py |  39 +++++-
 tests/integ/group_test.py    |  40 +++++-
 tests/integ/link_test.py     |   1 -
 12 files changed, 429 insertions(+), 265 deletions(-)

diff --git a/hsds/attr_sn.py b/hsds/attr_sn.py
index c5d76227..d3b05ca0 100755
--- a/hsds/attr_sn.py
+++ b/hsds/attr_sn.py
@@ -18,11 +18,10 @@
 from aiohttp.web import StreamResponse
 from json import JSONDecodeError
 
-from h5json.hdf5dtype import validateTypeItem, getBaseTypeJson
 from h5json.hdf5dtype import createDataType, getItemSize
-from h5json.array_util import jsonToArray, getNumElements, bytesArrayToList
+from h5json.array_util import jsonToArray, getNumElements
 from h5json.array_util import bytesToArray, arrayToBytes, decodeData, encodeData
-from h5json.objid import isValidUuid, getRootObjId
+from h5json.objid import isValidUuid
 
 from .util.httpUtil import getAcceptType, jsonResponse, getHref, getBooleanParam
 from .util.globparser import globmatch
@@ -32,8 +31,8 @@
 from .util.attrUtil import validateAttributeName, getRequestCollectionName
 from .util.dsetUtil import getShapeDims
 
-from .servicenode_lib import getDomainJson, getObjectJson, validateAction
-from .servicenode_lib import getAttributes, putAttributes, deleteAttributes
+from .servicenode_lib import getDomainJson, getAttributeFromRequest, getAttributesFromRequest
+from .servicenode_lib import getAttributes, putAttributes, deleteAttributes, validateAction
 from .domain_crawl import DomainCrawler
 from . import hsds_logger as log
 from . import config
@@ -296,244 +295,6 @@ async def GET_Attribute(request):
     return resp
 
 
-async def _getTypeFromRequest(app, body, obj_id=None, bucket=None):
-    """ return a type json from the request body """
-    if "type" not in body:
-        msg = "PUT attribute with no type in body"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-    datatype = body["type"]
-
-    if isinstance(datatype, str) and datatype.startswith("t-"):
-        # Committed type - fetch type json from DN
-        ctype_id = datatype
-        log.debug(f"got ctypeid: {ctype_id}")
-        ctype_json = await getObjectJson(app, ctype_id, bucket=bucket)
-        log.debug(f"ctype {ctype_id}: {ctype_json}")
-        root_id = getRootObjId(obj_id)
-        if ctype_json["root"] != root_id:
-            msg = "Referenced committed datatype must belong in same domain"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        datatype = ctype_json["type"]
-        # add the ctype_id to the type
-        datatype["id"] = ctype_id
-    elif isinstance(datatype, str):
-        try:
-            # convert predefined type string (e.g. "H5T_STD_I32LE") to
-            # corresponding json representation
-            datatype = getBaseTypeJson(datatype)
-        except TypeError:
-            msg = "PUT attribute with invalid predefined type"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-
-    try:
-        validateTypeItem(datatype)
-    except KeyError as ke:
-        msg = f"KeyError creating type: {ke}"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-    except TypeError as te:
-        msg = f"TypeError creating type: {te}"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-    except ValueError as ve:
-        msg = f"ValueError creating type: {ve}"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-
-    return datatype
-
-
-def _getShapeFromRequest(body):
-    """ get shape json from request body """
-    shape_json = {}
-    if "shape" in body:
-        shape_body = body["shape"]
-        shape_class = None
-        if isinstance(shape_body, dict) and "class" in shape_body:
-            shape_class = shape_body["class"]
-        elif isinstance(shape_body, str):
-            shape_class = shape_body
-        if shape_class:
-            if shape_class == "H5S_NULL":
-                shape_json["class"] = "H5S_NULL"
-                if isinstance(shape_body, dict) and "dims" in shape_body:
-                    msg = "can't include dims with null shape"
-                    log.warn(msg)
-                    raise HTTPBadRequest(reason=msg)
-                if isinstance(shape_body, dict) and "value" in body:
-                    msg = "can't have H5S_NULL shape with value"
-                    log.warn(msg)
-                    raise HTTPBadRequest(reason=msg)
-            elif shape_class == "H5S_SCALAR":
-                shape_json["class"] = "H5S_SCALAR"
-                dims = getShapeDims(shape_body)
-                if len(dims) != 1 or dims[0] != 1:
-                    msg = "dimensions aren't valid for scalar attribute"
-                    log.warn(msg)
-                    raise HTTPBadRequest(reason=msg)
-            elif shape_class == "H5S_SIMPLE":
-                shape_json["class"] = "H5S_SIMPLE"
-                dims = getShapeDims(shape_body)
-                shape_json["dims"] = dims
-            else:
-                msg = f"Unknown shape class: {shape_class}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-        else:
-            # no class, interpet shape value as dimensions and
-            # use H5S_SIMPLE as class
-            if isinstance(shape_body, list) and len(shape_body) == 0:
-                shape_json["class"] = "H5S_SCALAR"
-            else:
-                shape_json["class"] = "H5S_SIMPLE"
-                dims = getShapeDims(shape_body)
-                shape_json["dims"] = dims
-    else:
-        shape_json["class"] = "H5S_SCALAR"
-
-    return shape_json
-
-
-def _getValueFromRequest(body, data_type, data_shape):
-    """ Get attribute value from request json """
-    dims = getShapeDims(data_shape)
-    if "value" in body:
-        if dims is None:
-            msg = "Bad Request: data can not be included with H5S_NULL space"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        value = body["value"]
-        # validate that the value agrees with type/shape
-        arr_dtype = createDataType(data_type)  # np datatype
-        if len(dims) == 0:
-            np_dims = [1, ]
-        else:
-            np_dims = dims
-
-        if body.get("encoding"):
-            item_size = getItemSize(data_type)
-            if item_size == "H5T_VARIABLE":
-                msg = "base64 encoding is not support for variable length attributes"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            try:
-                data = decodeData(value)
-            except ValueError:
-                msg = "unable to decode data"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-            expected_numbytes = arr_dtype.itemsize * np.prod(dims)
-            if len(data) != expected_numbytes:
-                msg = f"expected: {expected_numbytes} but got: {len(data)}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-            # check to see if this works with our shape and type
-            try:
-                arr = bytesToArray(data, arr_dtype, np_dims)
-            except ValueError as e:
-                log.debug(f"data: {data}")
-                log.debug(f"type: {arr_dtype}")
-                log.debug(f"np_dims: {np_dims}")
-                msg = f"Bad Request: encoded input data doesn't match shape and type: {e}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-            value_json = None
-            # now try converting to JSON
-            list_data = arr.tolist()
-            try:
-                value_json = bytesArrayToList(list_data)
-            except ValueError as err:
-                msg = f"Cannot decode bytes to list: {err}, will store as encoded bytes"
-                log.warn(msg)
-            if value_json:
-                log.debug("will store base64 input as json")
-                if data_shape["class"] == "H5S_SCALAR":
-                    # just use the scalar value
-                    value = value_json[0]
-                else:
-                    value = value_json  # return this
-            else:
-                value = data  # return bytes to signal that this needs to be encoded
-        else:
-            # verify that the input data matches the array shape and type
-            try:
-                jsonToArray(np_dims, arr_dtype, value)
-            except ValueError as e:
-                msg = f"Bad Request: input data doesn't match selection: {e}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-    else:
-        value = None
-
-    return value
-
-
-async def _getAttributeFromRequest(app, req_json, obj_id=None, bucket=None):
-    """ return attribute from given request json """
-    attr_item = {}
-    attr_type = await _getTypeFromRequest(app, req_json, obj_id=obj_id, bucket=bucket)
-    attr_shape = _getShapeFromRequest(req_json)
-    attr_item = {"type": attr_type, "shape": attr_shape}
-    attr_value = _getValueFromRequest(req_json, attr_type, attr_shape)
-    if attr_value is not None:
-        if isinstance(attr_value, bytes):
-            attr_value = encodeData(attr_value)  # store as base64
-            attr_item["encoding"] = "base64"
-        else:
-            # just store the JSON dict or primitive value
-            attr_item["value"] = attr_value
-    else:
-        attr_item["value"] = None
-
-    return attr_item
-
-
-async def _getAttributesFromRequest(request, req_json, obj_id=None, bucket=None):
-    """ read the given JSON dictinary and return dict of attribute json """
-
-    app = request.app
-    attr_items = {}
-    kwargs = {"obj_id": obj_id}
-    if bucket:
-        kwargs["bucket"] = bucket
-    if "attributes" in req_json:
-        attributes = req_json["attributes"]
-        if not isinstance(attributes, dict):
-            msg = f"expected list for attributes but got: {type(attributes)}"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        # read each attr_item and canonicalize the shape, type, verify value
-        for attr_name in attributes:
-            attr_json = attributes[attr_name]
-            attr_item = await _getAttributeFromRequest(app, attr_json, **kwargs)
-            attr_items[attr_name] = attr_item
-
-    elif "type" in req_json:
-        # single attribute create - fake an item list
-        attr_item = await _getAttributeFromRequest(app, req_json, **kwargs)
-        if "name" in req_json:
-            attr_name = req_json["name"]
-        else:
-            attr_name = request.match_info.get("name")
-            validateAttributeName(attr_name)
-        if not attr_name:
-            msg = "Missing attribute name"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-
-        attr_items[attr_name] = attr_item
-    else:
-        log.debug(f"_getAttributes from request - no attribute defined in {req_json}")
-
-    return attr_items
-
-
 async def PUT_Attribute(request):
     """HTTP method to create a new attribute"""
     log.request(request)
@@ -556,7 +317,7 @@ async def PUT_Attribute(request):
         log.debug(f"Attribute name: [{attr_name}]")
         validateAttributeName(attr_name)
 
-    log.info(f"PUT Attributes id: {req_obj_id} name: {attr_name}")
+    log.info(f"PUT Attribute id: {req_obj_id} name: {attr_name}")
     username, pswd = getUserPasswordFromRequest(request)
     # write actions need auth
     await validateUserPassword(app, username, pswd)
@@ -588,7 +349,7 @@ async def PUT_Attribute(request):
 
     # get attribute from request body
     kwargs = {"bucket": bucket, "obj_id": req_obj_id}
-    attr_body = await _getAttributeFromRequest(app, body, **kwargs)
+    attr_body = await getAttributeFromRequest(app, body, **kwargs)
 
     # write attribute to DN
     attr_json = {attr_name: attr_body}
@@ -625,7 +386,7 @@ async def PUT_Attributes(request):
     await validateUserPassword(app, username, pswd)
 
     if not request.has_body:
-        msg = "PUT Attribute with no body"
+        msg = "PUT Attributes with no body"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
     try:
@@ -655,10 +416,10 @@ async def PUT_Attributes(request):
     if not req_obj_id:
         req_obj_id = domain_json["root"]
     kwargs = {"obj_id": req_obj_id, "bucket": bucket}
-    attr_items = await _getAttributesFromRequest(request, body, **kwargs)
+    attr_items = await getAttributesFromRequest(app, body, **kwargs)
 
     if attr_items:
-        log.debug(f"PUT Attribute {len(attr_items)} attibutes to add")
+        log.debug(f"PUT Attribute {len(attr_items)} attributes to add")
     else:
         log.debug("no attributes defined yet")
 
@@ -667,6 +428,7 @@ async def PUT_Attributes(request):
     obj_ids = {}
     if "obj_ids" in body:
         body_ids = body["obj_ids"]
+
         if isinstance(body_ids, list):
             # multi cast the attributes - each attribute  in attr-items
             # will be written to each of the objects identified by obj_id
@@ -686,7 +448,7 @@ async def PUT_Attributes(request):
                 msg += f"{len(obj_ids)} objects"
                 log.info(msg)
         elif isinstance(body_ids, dict):
-            # each value is body_ids is a set of attriutes to write to the object
+            # each value is body_ids is a set of attributes to write to the object
             # unlike the above case, different attributes can be written to
             # different objects
             if attr_items:
@@ -702,7 +464,7 @@ async def PUT_Attributes(request):
                     id_json = body_ids[obj_id]
 
                     kwargs = {"obj_id": obj_id, "bucket": bucket}
-                    obj_items = await _getAttributesFromRequest(request, id_json, **kwargs)
+                    obj_items = await getAttributesFromRequest(app, id_json, **kwargs)
                     if obj_items:
                         obj_ids[obj_id] = obj_items
 
diff --git a/hsds/ctype_dn.py b/hsds/ctype_dn.py
index b63d0a4d..0b14ab41 100755
--- a/hsds/ctype_dn.py
+++ b/hsds/ctype_dn.py
@@ -122,10 +122,17 @@ async def POST_Datatype(request):
         raise HTTPInternalServerError()
     type_json = body["type"]
 
+    if "attributes" in body:
+        # initialize attributes
+        attrs = body["attributes"]
+        log.debug(f"POST datatype with attributes: {attrs}")
+    else:
+        attrs = {}
+
     # ok - all set, create committed type obj
     now = getNow(app)
 
-    log.info(f"POST_datatype, typejson: {type_json}")
+    log.info(f"POST_datatype, type_json: {type_json}")
 
     ctype_json = {
         "id": ctype_id,
@@ -133,7 +140,7 @@ async def POST_Datatype(request):
         "created": now,
         "lastModified": now,
         "type": type_json,
-        "attributes": {},
+        "attributes": attrs,
     }
 
     kwargs = {"bucket": bucket, "notify": True, "flush": True}
@@ -145,7 +152,7 @@ async def POST_Datatype(request):
     resp_json["created"] = ctype_json["created"]
     resp_json["lastModified"] = ctype_json["lastModified"]
     resp_json["type"] = type_json
-    resp_json["attributeCount"] = 0
+    resp_json["attributeCount"] = len(attrs)
     resp = json_response(resp_json, status=201)
 
     log.response(request, resp=resp)
diff --git a/hsds/ctype_sn.py b/hsds/ctype_sn.py
index dfd026f1..ccf033ac 100755
--- a/hsds/ctype_sn.py
+++ b/hsds/ctype_sn.py
@@ -213,10 +213,16 @@ async def POST_Datatype(request):
     link_title = None
     obj_id = None
     h5path = None
+    attrs = None
+
     if "id" in body:
         obj_id = body["id"]
         log.debug(f"POST datatype using client id: {obj_id}")
 
+    if "attributes" in body:
+        attrs = body["attributes"]
+        log.debug(f"POST datatype attributes: {attrs}")
+
     if "link" in body:
         if "h5path" in body:
             msg = "link can't be used with h5path"
@@ -251,6 +257,8 @@ async def POST_Datatype(request):
     kwargs = {"bucket": bucket, "obj_type": datatype}
     if obj_id:
         kwargs["obj_id"] = obj_id
+    if attrs:
+        kwargs["attrs"] = attrs
 
     # TBD: creation props for datatype obj?
     if parent_id:
diff --git a/hsds/dset_dn.py b/hsds/dset_dn.py
index 159d6b63..bca36457 100755
--- a/hsds/dset_dn.py
+++ b/hsds/dset_dn.py
@@ -135,7 +135,14 @@ async def POST_Dataset(request):
     # ok - all set, create committed type obj
     now = getNow(app)
 
-    log.debug(f"POST_dataset typejson: {type_json}, shapejson: {shape_json}")
+    if "attributes" in body:
+        # initialize attributes
+        attrs = body["attributes"]
+        log.debug(f"POST Dataset with attributes: {attrs}")
+    else:
+        attrs = {}
+
+    log.debug(f"POST_dataset type_json: {type_json}, shape_json: {shape_json}")
 
     dset_json = {
         "id": dset_id,
@@ -144,7 +151,7 @@ async def POST_Dataset(request):
         "lastModified": now,
         "type": type_json,
         "shape": shape_json,
-        "attributes": {},
+        "attributes": attrs,
     }
 
     if "creationProperties" in body:
@@ -162,7 +169,7 @@ async def POST_Dataset(request):
     resp_json["type"] = type_json
     resp_json["shape"] = shape_json
     resp_json["lastModified"] = dset_json["lastModified"]
-    resp_json["attributeCount"] = 0
+    resp_json["attributeCount"] = len(attrs)
 
     resp = json_response(resp_json, status=201)
     log.response(request, resp=resp)
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 0c0f4619..c6c5e502 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -859,6 +859,12 @@ async def POST_Dataset(request):
     else:
         creationProperties = {}
 
+    if "attributes" in body:
+        attrs = body["attributes"]
+        log.debug(f"POST Dataset attributes: {attrs}")
+    else:
+        attrs = None
+
     # TBD: check for invalid layout class...
     if layout_props:
         if layout_props["class"] == "H5D_CONTIGUOUS":
@@ -1144,6 +1150,8 @@ async def POST_Dataset(request):
         kwargs["obj_id"] = obj_id
     if creationProperties:
         kwargs["creation_props"] = creationProperties
+    if attrs:
+        kwargs["attrs"] = attrs
     if layout:
         kwargs["layout"] = layout
 
diff --git a/hsds/group_dn.py b/hsds/group_dn.py
index db146a62..dfce8f66 100755
--- a/hsds/group_dn.py
+++ b/hsds/group_dn.py
@@ -99,7 +99,7 @@ async def POST_Group(request):
 
     group_id = get_obj_id(request, body=body)
 
-    log.info(f"POST group: {group_id} bucket: {bucket}")
+    log.info(f"POST group: {group_id} bucket: {bucket} body: {body}")
     if not isValidUuid(group_id, obj_class="groups"):
         log.error(f"Unexpected group_id: {group_id}")
         raise HTTPInternalServerError()
@@ -125,13 +125,20 @@ async def POST_Group(request):
     # ok - all set, create group obj
     now = getNow(app)
 
+    if "attributes" in body:
+        # initialize attributes
+        attrs = body["attributes"]
+        log.debug(f"POST Group with attributes: {attrs}")
+    else:
+        attrs = {}
+
     group_json = {
         "id": group_id,
         "root": root_id,
         "created": now,
         "lastModified": now,
         "links": {},
-        "attributes": {},
+        "attributes": attrs,
     }
 
     if "creationProperties" in body:
@@ -147,7 +154,7 @@ async def POST_Group(request):
     resp_json["created"] = group_json["created"]
     resp_json["lastModified"] = group_json["lastModified"]
     resp_json["linkCount"] = 0
-    resp_json["attributeCount"] = 0
+    resp_json["attributeCount"] = len(attrs)
 
     resp = json_response(resp_json, status=201)
     log.response(request, resp=resp)
diff --git a/hsds/group_sn.py b/hsds/group_sn.py
index 68f5fab3..e2395826 100755
--- a/hsds/group_sn.py
+++ b/hsds/group_sn.py
@@ -188,6 +188,7 @@ async def POST_Group(request):
     obj_id = None
     h5path = None
     creation_props = None
+    attrs = None
 
     if request.has_body:
         try:
@@ -233,6 +234,9 @@ async def POST_Group(request):
                 log.debug(f"POST group using client id: {obj_id}")
             if "creationProperties" in body:
                 creation_props = body["creationProperties"]
+            if "attributes" in body:
+                attrs = body["attributes"]
+                log.debug(f"POST Group attributes: {attrs}")
 
     if parent_id:
         kwargs = {"bucket": bucket, "parent_id": parent_id, "h5path": h5path}
@@ -240,6 +244,8 @@ async def POST_Group(request):
             kwargs["obj_id"] = obj_id
         if creation_props:
             kwargs["creation_props"] = creation_props
+        if attrs:
+            kwargs["attrs"] = attrs
         if implicit:
             kwargs["implicit"] = True
         group_json = await createObjectByPath(app, **kwargs)
@@ -250,6 +256,8 @@ async def POST_Group(request):
             kwargs["obj_id"] = obj_id
         if creation_props:
             kwargs["creation_props"] = creation_props
+        if attrs:
+            kwargs["attrs"] = attrs
         group_json = await createObject(app, **kwargs)
 
     log.debug(f"returning resp: {group_json}")
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 2bb3919e..69b909dc 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -15,15 +15,17 @@
 
 import asyncio
 import json
+import numpy as np
 
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPForbidden, HTTPGone, HTTPConflict
 from aiohttp.web_exceptions import HTTPNotFound, HTTPInternalServerError
 from aiohttp.client_exceptions import ClientOSError, ClientError
 from aiohttp import ClientResponseError
 
-from h5json.array_util import encodeData
+from h5json.array_util import encodeData, decodeData, bytesToArray, bytesArrayToList, jsonToArray
 from h5json.objid import getCollectionForId, createObjId, getRootObjId
 from h5json.objid import isSchema2Id, getS3Key, isValidUuid
+from h5json.hdf5dtype import getBaseTypeJson, validateTypeItem, createDataType, getItemSize
 
 from .util.nodeUtil import getDataNodeUrl
 from .util.authUtil import getAclKeys
@@ -33,6 +35,7 @@
 from .util.httpUtil import http_get, http_put, http_post, http_delete
 from .util.domainUtil import getBucketForDomain, verifyRoot, getLimits
 from .util.storUtil import getCompressors
+from .util.dsetUtil import getShapeDims
 from .basenode import getVersion
 
 from . import hsds_logger as log
@@ -888,6 +891,229 @@ async def doFlush(app, root_id, bucket=None):
         return dn_ids
 
 
+async def getTypeFromRequest(app, body, obj_id=None, bucket=None):
+    """ return a type json from the request body """
+    if "type" not in body:
+        msg = "expected type in body"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+    datatype = body["type"]
+
+    if isinstance(datatype, str) and datatype.startswith("t-"):
+        # Committed type - fetch type json from DN
+        ctype_id = datatype
+        log.debug(f"got ctypeid: {ctype_id}")
+        ctype_json = await getObjectJson(app, ctype_id, bucket=bucket)
+        log.debug(f"ctype {ctype_id}: {ctype_json}")
+        root_id = getRootObjId(obj_id)
+        if ctype_json["root"] != root_id:
+            msg = "Referenced committed datatype must belong in same domain"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        datatype = ctype_json["type"]
+        # add the ctype_id to the type
+        datatype["id"] = ctype_id
+    elif isinstance(datatype, str):
+        try:
+            # convert predefined type string (e.g. "H5T_STD_I32LE") to
+            # corresponding json representation
+            datatype = getBaseTypeJson(datatype)
+        except TypeError:
+            msg = "PUT attribute with invalid predefined type"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+
+    try:
+        validateTypeItem(datatype)
+    except KeyError as ke:
+        msg = f"KeyError creating type: {ke}"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+    except TypeError as te:
+        msg = f"TypeError creating type: {te}"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+    except ValueError as ve:
+        msg = f"ValueError creating type: {ve}"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+
+    return datatype
+
+
+def getShapeFromRequest(body):
+    """ get shape json from request body """
+    shape_json = {}
+    if "shape" in body:
+        shape_body = body["shape"]
+        shape_class = None
+        if isinstance(shape_body, dict) and "class" in shape_body:
+            shape_class = shape_body["class"]
+        elif isinstance(shape_body, str):
+            shape_class = shape_body
+        if shape_class:
+            if shape_class == "H5S_NULL":
+                shape_json["class"] = "H5S_NULL"
+                if isinstance(shape_body, dict) and "dims" in shape_body:
+                    msg = "can't include dims with null shape"
+                    log.warn(msg)
+                    raise HTTPBadRequest(reason=msg)
+                if isinstance(shape_body, dict) and "value" in body:
+                    msg = "can't have H5S_NULL shape with value"
+                    log.warn(msg)
+                    raise HTTPBadRequest(reason=msg)
+            elif shape_class == "H5S_SCALAR":
+                shape_json["class"] = "H5S_SCALAR"
+                dims = getShapeDims(shape_body)
+                if len(dims) != 1 or dims[0] != 1:
+                    msg = "dimensions aren't valid for scalar attribute"
+                    log.warn(msg)
+                    raise HTTPBadRequest(reason=msg)
+            elif shape_class == "H5S_SIMPLE":
+                shape_json["class"] = "H5S_SIMPLE"
+                dims = getShapeDims(shape_body)
+                shape_json["dims"] = dims
+            else:
+                msg = f"Unknown shape class: {shape_class}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+        else:
+            # no class, interpret shape value as dimensions and
+            # use H5S_SIMPLE as class
+            if isinstance(shape_body, list) and len(shape_body) == 0:
+                shape_json["class"] = "H5S_SCALAR"
+            else:
+                shape_json["class"] = "H5S_SIMPLE"
+                dims = getShapeDims(shape_body)
+                shape_json["dims"] = dims
+    else:
+        shape_json["class"] = "H5S_SCALAR"
+
+    return shape_json
+
+
+async def getAttributeFromRequest(app, req_json, obj_id=None, bucket=None):
+    """ return attribute from given request json """
+    attr_item = {}
+    log.debug(f"getAttributeFromRequest req_json: {req_json} obj_id: {obj_id}")
+    attr_type = await getTypeFromRequest(app, req_json, obj_id=obj_id, bucket=bucket)
+    attr_shape = getShapeFromRequest(req_json)
+    attr_item = {"type": attr_type, "shape": attr_shape}
+    attr_value = getValueFromRequest(req_json, attr_type, attr_shape)
+    if attr_value is not None:
+        if isinstance(attr_value, bytes):
+            attr_value = encodeData(attr_value)  # store as base64
+            attr_item["encoding"] = "base64"
+        else:
+            # just store the JSON dict or primitive value
+            attr_item["value"] = attr_value
+    else:
+        attr_item["value"] = None
+
+    return attr_item
+
+
+async def getAttributesFromRequest(app, req_json, obj_id=None, bucket=None):
+    """ read the given JSON dictionary and return dict of attribute json """
+
+    attr_items = {}
+    kwargs = {"obj_id": obj_id}
+    if bucket:
+        kwargs["bucket"] = bucket
+    if "attributes" in req_json:
+        attributes = req_json["attributes"]
+        if not isinstance(attributes, dict):
+            msg = f"expected list for attributes but got: {type(attributes)}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        # read each attr_item and canonicalize the shape, type, verify value
+        for attr_name in attributes:
+            attr_json = attributes[attr_name]
+            attr_item = await getAttributeFromRequest(app, attr_json, **kwargs)
+            attr_items[attr_name] = attr_item
+    else:
+        log.debug(f"getAttributesFromRequest - no attribute defined in {req_json}")
+
+    return attr_items
+
+
+def getValueFromRequest(body, data_type, data_shape):
+    """ Get attribute value from request json """
+    dims = getShapeDims(data_shape)
+    if "value" in body:
+        if dims is None:
+            msg = "Bad Request: data can not be included with H5S_NULL space"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        value = body["value"]
+        # validate that the value agrees with type/shape
+        arr_dtype = createDataType(data_type)  # np datatype
+        if len(dims) == 0:
+            np_dims = [1, ]
+        else:
+            np_dims = dims
+
+        if body.get("encoding"):
+            item_size = getItemSize(data_type)
+            if item_size == "H5T_VARIABLE":
+                msg = "base64 encoding is not support for variable length attributes"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+            try:
+                data = decodeData(value)
+            except ValueError:
+                msg = "unable to decode data"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+
+            expected_byte_count = arr_dtype.itemsize * np.prod(dims)
+            if len(data) != expected_byte_count:
+                msg = f"expected: {expected_byte_count} but got: {len(data)}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+
+            # check to see if this works with our shape and type
+            try:
+                arr = bytesToArray(data, arr_dtype, np_dims)
+            except ValueError as e:
+                log.debug(f"data: {data}")
+                log.debug(f"type: {arr_dtype}")
+                log.debug(f"np_dims: {np_dims}")
+                msg = f"Bad Request: encoded input data doesn't match shape and type: {e}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+
+            value_json = None
+            # now try converting to JSON
+            list_data = arr.tolist()
+            try:
+                value_json = bytesArrayToList(list_data)
+            except ValueError as err:
+                msg = f"Cannot decode bytes to list: {err}, will store as encoded bytes"
+                log.warn(msg)
+            if value_json:
+                log.debug("will store base64 input as json")
+                if data_shape["class"] == "H5S_SCALAR":
+                    # just use the scalar value
+                    value = value_json[0]
+                else:
+                    value = value_json  # return this
+            else:
+                value = data  # return bytes to signal that this needs to be encoded
+        else:
+            # verify that the input data matches the array shape and type
+            try:
+                jsonToArray(np_dims, arr_dtype, value)
+            except ValueError as e:
+                msg = f"Bad Request: input data doesn't match selection: {e}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+    else:
+        value = None
+
+    return value
+
+
 async def getAttributes(app, obj_id,
                         attr_names=None,
                         include_data=False,
@@ -1053,6 +1279,7 @@ async def createObject(app,
                        obj_shape=None,
                        layout=None,
                        creation_props=None,
+                       attrs=None,
                        bucket=None):
     """ create a group, ctype, or dataset object and return object json
         Determination on whether a group, ctype, or dataset is created is based on:
@@ -1076,6 +1303,8 @@ async def createObject(app,
         log.debug(f"    layout: {layout}")
     if creation_props:
         log.debug(f"    cprops: {creation_props}")
+    if attrs:
+        log.debug(f"    attrs: {attrs}")
 
     if obj_id:
         log.debug(f"using client supplied id: {obj_id}")
@@ -1099,6 +1328,13 @@ async def createObject(app,
         obj_json["layout"] = layout
     if creation_props:
         obj_json["creationProperties"] = creation_props
+    if attrs:
+        kwargs = {"obj_id": obj_id, "bucket": bucket}
+        attrs_json = {"attributes": attrs}
+        attr_items = await getAttributesFromRequest(app, attrs_json, **kwargs)
+        log.debug(f"got attr_items: {attr_items}")
+
+        obj_json["attributes"] = attr_items
     log.debug(f"create {collection} obj, body: {obj_json}")
     dn_url = getDataNodeUrl(app, obj_id)
     req = f"{dn_url}/{collection}"
@@ -1117,6 +1353,7 @@ async def createObjectByPath(app,
                              obj_shape=None,
                              layout=None,
                              creation_props=None,
+                             attrs=None,
                              bucket=None):
 
     """ create an object at the designated path relative to the parent.
@@ -1133,6 +1370,16 @@ async def createObjectByPath(app,
     log.debug(f"createObjectByPath - parent_id: {parent_id}, h5path: {h5path}")
     if obj_id:
         log.debug(f"createObjectByPath using client id: {obj_id}")
+    if obj_type:
+        log.debug(f"    obj_type: {obj_type}")
+    if obj_shape:
+        log.debug(f"    obj_shape: {obj_shape}")
+    if layout:
+        log.debug(f"    layout: {layout}")
+    if creation_props:
+        log.debug(f"    cprops: {creation_props}")
+    if attrs:
+        log.debug(f"    attrs: {attrs}")
 
     root_id = getRootObjId(parent_id)
 
@@ -1211,6 +1458,8 @@ async def createObjectByPath(app,
                     kwargs["layout"] = layout
                 if creation_props:
                     kwargs["creation_props"] = creation_props
+                if attrs:
+                    kwargs["attrs"] = attrs
                 if obj_id:
                     kwargs["obj_id"] = obj_id
             obj_json = await createObject(app, **kwargs)
diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py
index 958b6552..d0b60f1a 100755
--- a/tests/integ/dataset_test.py
+++ b/tests/integ/dataset_test.py
@@ -297,6 +297,44 @@ def testPostDatasetWithId(self):
         for name in expected_keys:
             self.assertTrue(name in rspJson)
 
+    def testPostDatasetWithAttributes(self):
+        # test POST with attribute initialization
+        domain = self.base_domain + "/testPostDatasetWithAttributes.h5"
+        helper.setupDomain(domain)
+        print("testPostDatasetWithAttributes", domain)
+        headers = helper.getRequestHeaders(domain=domain)
+
+        # get root id
+        req = helper.getEndpoint() + "/"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        print("rspJson:", rspJson)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # setup some attributes to include
+        attr_count = 4
+        attributes = {}
+        extent = 10
+        for i in range(attr_count):
+            value = [i * 10 + j for j in range(extent)]
+            data = {"type": "H5T_STD_I32LE", "shape": extent, "value": value}
+            attr_name = f"attr{i + 1:04d}"
+            attributes[attr_name] = data
+
+        # create new dataset
+        payload = {"type": "H5T_IEEE_F32LE", "shape": "H5S_SCALAR"}
+        payload["attributes"] = attributes
+        payload["link"] = {"id": root_uuid, "name": "linked_datatype"}
+
+        req = helper.getEndpoint() + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["attributeCount"], 4)
+        self.assertTrue(helper.validateId(rspJson["id"]))
+
     def testScalarEmptyDimsDataset(self):
         # Test creation/deletion of scalar dataset obj
         domain = self.base_domain + "/testScalarEmptyDimsDataset.h5"
diff --git a/tests/integ/datatype_test.py b/tests/integ/datatype_test.py
index 93c0b3d5..ce2a0e1a 100755
--- a/tests/integ/datatype_test.py
+++ b/tests/integ/datatype_test.py
@@ -123,7 +123,7 @@ def testCommittedType(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 410)
 
-    def testPostdTypeWithId(self):
+    def testPostTypeWithId(self):
         # Test creation/deletion of datatype obj
 
         print("testPostTypeWithId", self.base_domain)
@@ -171,6 +171,43 @@ def testPostdTypeWithId(self):
         self.assertEqual(type_json["class"], "H5T_FLOAT")
         self.assertEqual(type_json["base"], "H5T_IEEE_F32LE")
 
+    def testPostWithAttributes(self):
+        # test POST with attribute initialization
+        print("testPostWithAttributes", self.base_domain)
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+
+        # get root id
+        req = helper.getEndpoint() + "/"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # setup some attributes to include
+        attr_count = 4
+        attributes = {}
+        extent = 10
+        for i in range(attr_count):
+            value = [i * 10 + j for j in range(extent)]
+            data = {"type": "H5T_STD_I32LE", "shape": extent, "value": value}
+            attr_name = f"attr{i + 1:04d}"
+            attributes[attr_name] = data
+
+        # create new datatype
+        link = {"id": root_uuid, "name": "linked_datatype"}
+        payload = {"type": "H5T_IEEE_F32LE", "attributes": attributes, "link": link}
+        req = helper.getEndpoint() + "/datatypes"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue(helper.validateId(rspJson["id"]))
+        self.assertTrue("type" in rspJson)
+        type_json = rspJson["type"]
+        self.assertEqual(type_json["class"], "H5T_FLOAT")
+        self.assertEqual(type_json["base"], "H5T_IEEE_F32LE")
+        self.assertEqual(rspJson["attributeCount"], 4)
+
     def testPostTypes(self):
         # Test creation with all primitive types
 
diff --git a/tests/integ/group_test.py b/tests/integ/group_test.py
index 6c154836..32234245 100755
--- a/tests/integ/group_test.py
+++ b/tests/integ/group_test.py
@@ -236,9 +236,9 @@ def testPost(self):
         rsp = self.session.post(req, headers=headers)
         self.assertEqual(rsp.status_code, 403)  # forbidden
 
-    def testPostId(self):
-        # test POST group
-        print("testPostId", self.base_domain)
+    def testPostWithId(self):
+        # test POST group with a client-generated id
+        print("testPostWithId", self.base_domain)
         endpoint = helper.getEndpoint()
         headers = helper.getRequestHeaders(domain=self.base_domain)
         req = endpoint + "/groups"
@@ -261,6 +261,7 @@ def testPostId(self):
 
         self.assertEqual(rsp.status_code, 201)
         rspJson = json.loads(rsp.text)
+        print("rspJson:", rspJson)
         self.assertEqual(rspJson["linkCount"], 0)
         self.assertEqual(rspJson["attributeCount"], 0)
         self.assertEqual(grp_id, rspJson["id"])
@@ -372,6 +373,39 @@ def testPostIdWithLink(self):
         self.assertEqual(rspJson["attributeCount"], 0)
         self.assertEqual(grp_id, rspJson["id"])
 
+    def testPostWithAttributes(self):
+        # test POST with attribute initialization
+        print("testPostWithAttributes", self.base_domain)
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+
+        # get root id
+        req = helper.getEndpoint() + "/"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # setup some attributes to include
+        attr_count = 4
+        attributes = {}
+        extent = 10
+        for i in range(attr_count):
+            value = [i * 10 + j for j in range(extent)]
+            data = {"type": "H5T_STD_I32LE", "shape": extent, "value": value}
+            attr_name = f"attr{i + 1:04d}"
+            attributes[attr_name] = data
+
+        # create new group
+        payload = {"attributes": attributes, "link": {"id": root_uuid, "name": "linked_group"}}
+        req = helper.getEndpoint() + "/groups"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], 0)
+        self.assertEqual(rspJson["attributeCount"], 4)
+        self.assertTrue(helper.validateId(rspJson["id"]))
+
     def testPostWithPath(self):
         # test POST with implicit parent group creation
         print("testPostWithPath", self.base_domain)
diff --git a/tests/integ/link_test.py b/tests/integ/link_test.py
index a6f72aeb..7c909435 100755
--- a/tests/integ/link_test.py
+++ b/tests/integ/link_test.py
@@ -1767,7 +1767,6 @@ def testLinkCreationOrder(self):
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
         links_json = rspJson["links"]
-        print("params:", params)
 
         # verify the links are in order
         for i in range(link_count - 1):

From 00d7c962c2f0195fb82ceeb5cd1b69b529441225 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 7 May 2025 15:28:34 +0200
Subject: [PATCH 16/49] add create timestamps for attributes in obj create

---
 admin/config/config.yml      |  1 +
 hsds/servicenode_lib.py      | 14 ++++++++++++++
 tests/integ/dataset_test.py  | 14 +++++++++++++-
 tests/integ/datatype_test.py | 16 ++++++++++++++--
 tests/integ/group_test.py    | 16 ++++++++++++++--
 5 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/admin/config/config.yml b/admin/config/config.yml
index 756be465..6e92d65b 100755
--- a/admin/config/config.yml
+++ b/admin/config/config.yml
@@ -88,6 +88,7 @@ allow_any_bucket_read: true  # enable reads to buckets other than default bucket
 allow_any_bucket_write: true # enable writes to buckets other than default bucket
 bit_shuffle_default_blocksize: 2048 # default blocksize for bitshuffle filter
 max_rangeget_gap: 1024 # max gap in byte for intelligent range get requests
+predate_maxtime: 10.0 # max delta between object created timestamp in request and actual time
 # DEPRECATED - the remaining config values are not used in currently but kept for backward compatibility with older container images
 aws_lambda_chunkread_function: null # name of aws lambda function for chunk reading
 aws_lambda_threshold: 4 # number of chunks per node per request to reach before using lambda
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 69b909dc..80fe7af1 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -15,6 +15,7 @@
 
 import asyncio
 import json
+import time
 import numpy as np
 
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPForbidden, HTTPGone, HTTPConflict
@@ -39,6 +40,7 @@
 from .basenode import getVersion
 
 from . import hsds_logger as log
+from . import config
 
 
 async def getDomainJson(app, domain, reload=False):
@@ -1010,6 +1012,18 @@ async def getAttributeFromRequest(app, req_json, obj_id=None, bucket=None):
     else:
         attr_item["value"] = None
 
+    now = time.time()
+    if "created" in req_json:
+        created = req_json["created"]
+        # allow "pre-dated" attributes if the timestamp is within the last 10 seconds
+        predate_max_time = config.get("predate_max_time", default=10.0)
+        if now - created > predate_max_time:
+            attr_item["created"] = created
+        else:
+            log.warn("stale created timestamp for attribute, ignoring")
+    if "created" not in attr_item:
+        attr_item["created"] = now
+
     return attr_item
 
 
diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py
index d0b60f1a..46726e60 100755
--- a/tests/integ/dataset_test.py
+++ b/tests/integ/dataset_test.py
@@ -333,7 +333,19 @@ def testPostDatasetWithAttributes(self):
         self.assertEqual(rsp.status_code, 201)
         rspJson = json.loads(rsp.text)
         self.assertEqual(rspJson["attributeCount"], 4)
-        self.assertTrue(helper.validateId(rspJson["id"]))
+        dset_id = rspJson["id"]
+        self.assertTrue(helper.validateId(dset_id))
+
+        # fetch the attributes
+        req = f"{helper.getEndpoint()}/datasets/{dset_id}/attributes"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("hrefs" in rspJson)
+        self.assertFalse("type" in rspJson)
+        self.assertFalse("shape" in rspJson)
+        self.assertTrue("attributes") in rspJson
+        self.assertEqual(len(rspJson["attributes"]), attr_count)
 
     def testScalarEmptyDimsDataset(self):
         # Test creation/deletion of scalar dataset obj
diff --git a/tests/integ/datatype_test.py b/tests/integ/datatype_test.py
index ce2a0e1a..f8f01bea 100755
--- a/tests/integ/datatype_test.py
+++ b/tests/integ/datatype_test.py
@@ -201,12 +201,24 @@ def testPostWithAttributes(self):
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)
         rspJson = json.loads(rsp.text)
-        self.assertTrue(helper.validateId(rspJson["id"]))
+        ctype_id = rspJson["id"]
+        self.assertTrue(helper.validateId(ctype_id))
         self.assertTrue("type" in rspJson)
         type_json = rspJson["type"]
         self.assertEqual(type_json["class"], "H5T_FLOAT")
         self.assertEqual(type_json["base"], "H5T_IEEE_F32LE")
-        self.assertEqual(rspJson["attributeCount"], 4)
+        self.assertEqual(rspJson["attributeCount"], attr_count)
+
+        # fetch the attributes, check count
+        req = f"{helper.getEndpoint()}/datatypes/{ctype_id}/attributes"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("hrefs" in rspJson)
+        self.assertFalse("type" in rspJson)
+        self.assertFalse("shape" in rspJson)
+        self.assertTrue("attributes") in rspJson
+        self.assertEqual(len(rspJson["attributes"]), attr_count)
 
     def testPostTypes(self):
         # Test creation with all primitive types
diff --git a/tests/integ/group_test.py b/tests/integ/group_test.py
index 32234245..b78b8e58 100755
--- a/tests/integ/group_test.py
+++ b/tests/integ/group_test.py
@@ -403,8 +403,20 @@ def testPostWithAttributes(self):
         self.assertEqual(rsp.status_code, 201)
         rspJson = json.loads(rsp.text)
         self.assertEqual(rspJson["linkCount"], 0)
-        self.assertEqual(rspJson["attributeCount"], 4)
-        self.assertTrue(helper.validateId(rspJson["id"]))
+        self.assertEqual(rspJson["attributeCount"], attr_count)
+        grp_id = rspJson["id"]
+        self.assertTrue(helper.validateId(grp_id))
+
+        # fetch the attributes, check count
+        req = f"{helper.getEndpoint()}/groups/{grp_id}/attributes"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("hrefs" in rspJson)
+        self.assertFalse("type" in rspJson)
+        self.assertFalse("shape" in rspJson)
+        self.assertTrue("attributes") in rspJson
+        self.assertEqual(len(rspJson["attributes"]), attr_count)
 
     def testPostWithPath(self):
         # test POST with implicit parent group creation

From 47b9a6e55050231fd3f4f64e755fc3e6ac4de3fc Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 7 May 2025 18:46:32 +0200
Subject: [PATCH 17/49] enable links to be initialized in post groups

---
 hsds/group_dn.py          | 11 ++++--
 hsds/group_sn.py          | 72 +++++++++++++++++++++++++++++++--------
 hsds/link_sn.py           |  6 +++-
 hsds/servicenode_lib.py   | 19 ++++++++++-
 tests/integ/group_test.py | 60 +++++++++++++++++++++++++++++++-
 5 files changed, 148 insertions(+), 20 deletions(-)

diff --git a/hsds/group_dn.py b/hsds/group_dn.py
index dfce8f66..0a93bed4 100755
--- a/hsds/group_dn.py
+++ b/hsds/group_dn.py
@@ -132,12 +132,19 @@ async def POST_Group(request):
     else:
         attrs = {}
 
+    if "links" in body:
+        # initialize links
+        links = body["links"]
+        log.debug(f"POST Group with links: {links}")
+    else:
+        links = {}
+
     group_json = {
         "id": group_id,
         "root": root_id,
         "created": now,
         "lastModified": now,
-        "links": {},
+        "links": links,
         "attributes": attrs,
     }
 
@@ -153,7 +160,7 @@ async def POST_Group(request):
     resp_json["root"] = root_id
     resp_json["created"] = group_json["created"]
     resp_json["lastModified"] = group_json["lastModified"]
-    resp_json["linkCount"] = 0
+    resp_json["linkCount"] = len(links)
     resp_json["attributeCount"] = len(attrs)
 
     resp = json_response(resp_json, status=201)
diff --git a/hsds/group_sn.py b/hsds/group_sn.py
index e2395826..06874f5c 100755
--- a/hsds/group_sn.py
+++ b/hsds/group_sn.py
@@ -13,6 +13,8 @@
 # group handler for service node of hsds cluster
 #
 
+import time
+
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPForbidden, HTTPNotFound
 from json import JSONDecodeError
 
@@ -23,11 +25,12 @@
 from .util.authUtil import validateUserPassword
 from .util.domainUtil import getDomainFromRequest, isValidDomain
 from .util.domainUtil import getBucketForDomain, getPathForDomain, verifyRoot
-from .util.linkUtil import validateLinkName
+from .util.linkUtil import validateLinkName, getLinkClass
 from .servicenode_lib import getDomainJson, getObjectJson, validateAction
 from .servicenode_lib import getObjectIdByPath, getPathForObjectId
 from .servicenode_lib import createObject, createObjectByPath, deleteObject
 from . import hsds_logger as log
+from . import config
 
 
 async def GET_Group(request):
@@ -189,6 +192,7 @@ async def POST_Group(request):
     h5path = None
     creation_props = None
     attrs = None
+    links = None
 
     if request.has_body:
         try:
@@ -236,28 +240,66 @@ async def POST_Group(request):
                 creation_props = body["creationProperties"]
             if "attributes" in body:
                 attrs = body["attributes"]
+                if not isinstance(attrs, dict):
+                    msg = f"POST_Groups expected dict for for links, but got: {type(links)}"
+                    log.warn(msg)
+                    raise HTTPBadRequest(reason=msg)
                 log.debug(f"POST Group attributes: {attrs}")
+            if "links" in body:
+                links = body["links"]
+                if not isinstance(links, dict):
+                    msg = f"POST_Groups expected dict for for links, but got: {type(links)}"
+                    log.warn(msg)
+                    raise HTTPBadRequest(reason=msg)
+                # validate the links
+                now = time.time()
+
+                for title in links:
+                    try:
+                        validateLinkName(title)
+                        link_item = links[title]
+                        link_class = getLinkClass(link_item)
+                        if "class" in link_item:
+                            if link_class != link_item["class"]:
+                                msg = f"expected link class of: {link_class} but got {link_item}"
+                                log.warn(msg)
+                                raise HTTPBadRequest(reason=msg)
+                        else:
+                            link_item["class"] = link_class
+                        getLinkClass(link_item)
+                        if "created" in link_item:
+                            created = link_item["created"]
+                            # allow "pre-dated" attributes if recent enough
+                            predate_max_time = config.get("predate_max_time", default=10.0)
+                            if now - created > predate_max_time:
+                                link_item["created"] = created
+                            else:
+                                log.warn("stale created timestamp for link, ignoring")
+                            if "created" not in link_item:
+                                link_item["created"] = now
+
+                    except ValueError:
+                        raise HTTPBadRequest(reason="invalid link item")
+
+    kwargs = {"bucket": bucket}
+    if obj_id:
+        kwargs["obj_id"] = obj_id
+    if creation_props:
+        kwargs["creation_props"] = creation_props
+    if attrs:
+        kwargs["attrs"] = attrs
+    if links:
+        kwargs["links"] = links
 
     if parent_id:
-        kwargs = {"bucket": bucket, "parent_id": parent_id, "h5path": h5path}
-        if obj_id:
-            kwargs["obj_id"] = obj_id
-        if creation_props:
-            kwargs["creation_props"] = creation_props
-        if attrs:
-            kwargs["attrs"] = attrs
+        kwargs["parent_id"] = parent_id
+        kwargs["h5path"] = h5path
         if implicit:
             kwargs["implicit"] = True
         group_json = await createObjectByPath(app, **kwargs)
     else:
         # create an anonymous group
-        kwargs = {"bucket": bucket, "root_id": root_id}
-        if obj_id:
-            kwargs["obj_id"] = obj_id
-        if creation_props:
-            kwargs["creation_props"] = creation_props
-        if attrs:
-            kwargs["attrs"] = attrs
+        kwargs["root_id"] = root_id
         group_json = await createObject(app, **kwargs)
 
     log.debug(f"returning resp: {group_json}")
diff --git a/hsds/link_sn.py b/hsds/link_sn.py
index dc80d9e4..bc7f79ee 100755
--- a/hsds/link_sn.py
+++ b/hsds/link_sn.py
@@ -13,7 +13,7 @@
 # service node of hsds cluster
 #
 
-from aiohttp.web_exceptions import HTTPBadRequest
+from aiohttp.web_exceptions import HTTPBadRequest, HTTPInternalServerError
 from json import JSONDecodeError
 
 from h5json.objid import isValidUuid, getCollectionForId
@@ -142,6 +142,10 @@ async def GET_Links(request):
 
         # mix in collection key, target and hrefs
         for link in links:
+            if "class" not in link:
+                log.error("expected to find class key in link")
+                raise HTTPInternalServerError()
+
             if link["class"] == "H5L_TYPE_HARD":
                 collection_name = getCollectionForId(link["id"])
                 link["collection"] = collection_name
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 80fe7af1..4247ab7a 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -1294,6 +1294,7 @@ async def createObject(app,
                        layout=None,
                        creation_props=None,
                        attrs=None,
+                       links=None,
                        bucket=None):
     """ create a group, ctype, or dataset object and return object json
         Determination on whether a group, ctype, or dataset is created is based on:
@@ -1319,6 +1320,8 @@ async def createObject(app,
         log.debug(f"    cprops: {creation_props}")
     if attrs:
         log.debug(f"    attrs: {attrs}")
+    if links:
+        log.debug(f"    links: {links}")
 
     if obj_id:
         log.debug(f"using client supplied id: {obj_id}")
@@ -1347,8 +1350,13 @@ async def createObject(app,
         attrs_json = {"attributes": attrs}
         attr_items = await getAttributesFromRequest(app, attrs_json, **kwargs)
         log.debug(f"got attr_items: {attr_items}")
-
         obj_json["attributes"] = attr_items
+    if links:
+        if collection != "groups":
+            msg = "links can only be used with groups"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        obj_json["links"] = links
     log.debug(f"create {collection} obj, body: {obj_json}")
     dn_url = getDataNodeUrl(app, obj_id)
     req = f"{dn_url}/{collection}"
@@ -1368,6 +1376,7 @@ async def createObjectByPath(app,
                              layout=None,
                              creation_props=None,
                              attrs=None,
+                             links=None,
                              bucket=None):
 
     """ create an object at the designated path relative to the parent.
@@ -1394,6 +1403,12 @@ async def createObjectByPath(app,
         log.debug(f"    cprops: {creation_props}")
     if attrs:
         log.debug(f"    attrs: {attrs}")
+    if links:
+        log.debug(f"   links: {links}")
+        if obj_type:
+            msg = "only group objects can have links"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
 
     root_id = getRootObjId(parent_id)
 
@@ -1474,6 +1489,8 @@ async def createObjectByPath(app,
                     kwargs["creation_props"] = creation_props
                 if attrs:
                     kwargs["attrs"] = attrs
+                if links:
+                    kwargs["links"] = links
                 if obj_id:
                     kwargs["obj_id"] = obj_id
             obj_json = await createObject(app, **kwargs)
diff --git a/tests/integ/group_test.py b/tests/integ/group_test.py
index b78b8e58..d2ac33b5 100755
--- a/tests/integ/group_test.py
+++ b/tests/integ/group_test.py
@@ -316,7 +316,7 @@ def testPostWithLink(self):
         self.assertEqual(rspJson["linkCount"], 0)
         self.assertEqual(rspJson["attributeCount"], 0)
         new_group_id = rspJson["id"]
-        self.assertTrue(helper.validateId(rspJson["id"]))
+        self.assertTrue(helper.validateId(new_group_id))
         self.assertTrue(new_group_id != root_uuid)
 
         # get root group and verify link count is 1
@@ -418,6 +418,64 @@ def testPostWithAttributes(self):
         self.assertTrue("attributes") in rspJson
         self.assertEqual(len(rspJson["attributes"]), attr_count)
 
+    def testPostWithLinks(self):
+        # test POST with attribute initialization
+        print("testPostWithLinks", self.base_domain)
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+
+        # get root id
+        req = helper.getEndpoint() + "/"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # some objects to link
+        link_count = 4
+        links = {}
+        req = helper.getEndpoint() + "/groups"
+
+        for i in range(link_count):
+            rsp = self.session.post(req, headers=headers)
+            self.assertEqual(rsp.status_code, 201)
+            rspJson = json.loads(rsp.text)
+            group_id = rspJson["id"]
+            self.assertTrue(helper.validateId(group_id))
+            links[f"obj_{i}"] = {"id": group_id}
+
+        # create new group
+        payload = {"links": links, "link": {"id": root_uuid, "name": "g1"}}
+        req = helper.getEndpoint() + "/groups"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], link_count)
+        self.assertEqual(rspJson["attributeCount"], 0)
+        grp_id = rspJson["id"]
+        helper.validateId(grp_id)
+
+        # fetch all the links
+        req = helper.getEndpoint() + "/groups/" + grp_id + "/links"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+
+        self.assertTrue("links" in rspJson)
+        links_rsp = rspJson["links"]
+        self.assertEqual(len(links_rsp), link_count)
+        for i in range(link_count):
+            link_rsp = links_rsp[i]
+            self.assertTrue("class" in link_rsp)
+            self.assertEqual(link_rsp["class"], "H5L_TYPE_HARD")
+            self.assertTrue("id" in link_rsp)
+            self.assertTrue("title" in link_rsp)
+            self.assertEqual(link_rsp["title"], f"obj_{i}")
+            self.assertTrue("collection" in link_rsp)
+            self.assertEqual(link_rsp["collection"], "groups")
+            self.assertTrue("target" in link_rsp)
+            self.assertTrue("href" in link_rsp)
+
     def testPostWithPath(self):
         # test POST with implicit parent group creation
         print("testPostWithPath", self.base_domain)

From d9c3e875d87a53c1c60ddff15c732db74a87099c Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 8 May 2025 20:46:51 +0200
Subject: [PATCH 18/49] support dataset value init in post request

---
 hsds/chunk_crawl.py         |   1 +
 hsds/chunk_sn.py            | 192 +----------------------------------
 hsds/dset_lib.py            | 195 +++++++++++++++++++++++++++++++++++-
 hsds/dset_sn.py             |  51 +++++++++-
 hsds/servicenode_lib.py     |   5 +-
 tests/integ/dataset_test.py |   1 -
 tests/integ/value_test.py   |  61 +++++++++++
 7 files changed, 307 insertions(+), 199 deletions(-)

diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py
index 960cdadf..47b4b114 100755
--- a/hsds/chunk_crawl.py
+++ b/hsds/chunk_crawl.py
@@ -84,6 +84,7 @@ async def write_chunk_hyperslab(
 
     msg = f"write_chunk_hyperslab, chunk_id: {chunk_id}, slices: {slices}, "
     msg += f"bucket: {bucket}"
+    msg += f" dset_json: {dset_json}"
     log.info(msg)
     if "layout" not in dset_json:
         log.error(f"No layout found in dset_json: {dset_json}")
diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index 4bb084b3..87f2fdb4 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -19,7 +19,6 @@
 import numpy as np
 
 from json import JSONDecodeError
-from asyncio import IncompleteReadError
 from aiohttp.web_exceptions import HTTPException, HTTPBadRequest
 from aiohttp.web_exceptions import HTTPRequestEntityTooLarge
 from aiohttp.web_exceptions import HTTPConflict, HTTPInternalServerError
@@ -37,11 +36,9 @@
 from .util.dsetUtil import isNullSpace, isScalarSpace, get_slices, getShapeDims
 from .util.dsetUtil import isExtensible, getSelectionPagination
 from .util.dsetUtil import getSelectionShape, getDsetMaxDims, getChunkLayout
-from .util.chunkUtil import getNumChunks, getChunkIds, getChunkId
 from .util.authUtil import getUserPasswordFromRequest, validateUserPassword
 from .servicenode_lib import getDsetJson, validateAction
-from .dset_lib import getSelectionData, getParser, extendShape
-from .chunk_crawl import ChunkCrawler
+from .dset_lib import getSelectionData, getParser, extendShape, doPointWrite, doHyperslabWrite
 from . import config
 from . import hsds_logger as log
 
@@ -464,188 +461,6 @@ async def arrayResponse(arr, request, dset_json):
     return resp
 
 
-async def _doPointWrite(app,
-                        request,
-                        points=None,
-                        data=None,
-                        dset_json=None,
-                        bucket=None
-                        ):
-    """ write the given points to the dataset """
-
-    num_points = len(points)
-    log.debug(f"doPointWrite - num_points: {num_points}")
-    dset_id = dset_json["id"]
-    layout = getChunkLayout(dset_json)
-    datashape = dset_json["shape"]
-    dims = getShapeDims(datashape)
-    rank = len(dims)
-
-    chunk_dict = {}  # chunk ids to list of points in chunk
-
-    for pt_indx in range(num_points):
-        if rank == 1:
-            point = int(points[pt_indx])
-        else:
-            point_tuple = points[pt_indx]
-            point = []
-            for i in range(len(point_tuple)):
-                point.append(int(point_tuple[i]))
-        if rank == 1:
-            if point < 0 or point >= dims[0]:
-                msg = f"PUT Value point: {point} is not within the "
-                msg += "bounds of the dataset"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-        else:
-            if len(point) != rank:
-                msg = "PUT Value point value did not match dataset rank"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            for i in range(rank):
-                if point[i] < 0 or point[i] >= dims[i]:
-                    msg = f"PUT Value point: {point} is not within the "
-                    msg += "bounds of the dataset"
-                    log.warn(msg)
-                    raise HTTPBadRequest(reason=msg)
-        chunk_id = getChunkId(dset_id, point, layout)
-        # get the pt_indx element from the input data
-        value = data[pt_indx]
-        if chunk_id not in chunk_dict:
-            point_list = [point, ]
-            point_data = [value, ]
-            chunk_dict[chunk_id] = {"indices": point_list, "points": point_data}
-        else:
-            item = chunk_dict[chunk_id]
-            point_list = item["indices"]
-            point_list.append(point)
-            point_data = item["points"]
-            point_data.append(value)
-
-    num_chunks = len(chunk_dict)
-    log.debug(f"num_chunks: {num_chunks}")
-    max_chunks = int(config.get("max_chunks_per_request", default=1000))
-    if num_chunks > max_chunks:
-        msg = f"PUT value request with more than {max_chunks} chunks"
-        log.warn(msg)
-
-    chunk_ids = list(chunk_dict.keys())
-    chunk_ids.sort()
-
-    crawler = ChunkCrawler(
-        app,
-        chunk_ids,
-        dset_json=dset_json,
-        bucket=bucket,
-        points=chunk_dict,
-        action="write_point_sel",
-    )
-    await crawler.crawl()
-
-    crawler_status = crawler.get_status()
-
-    if crawler_status not in (200, 201):
-        msg = f"doPointWritte raising HTTPInternalServerError for status: {crawler_status}"
-        log.error(msg)
-        raise HTTPInternalServerError()
-    else:
-        log.info("doPointWrite success")
-
-
-async def _doHyperslabWrite(app,
-                            request,
-                            page_number=0,
-                            page=None,
-                            data=None,
-                            dset_json=None,
-                            select_dtype=None,
-                            bucket=None
-                            ):
-    """ write the given page selection to the dataset """
-    dset_id = dset_json["id"]
-    log.info(f"_doHyperslabWrite on {dset_id} - page: {page_number}")
-    type_json = dset_json["type"]
-
-    if select_dtype is not None:
-        item_size = getDtypeItemSize(select_dtype)
-    else:
-        item_size = getItemSize(type_json)
-    if item_size == "H5T_VARIABLE" and data is None:
-        msg = "unexpected call to _doHyperslabWrite for variable length data"
-        log.error(msg)
-        raise HTTPInternalServerError()
-
-    layout = getChunkLayout(dset_json)
-
-    num_chunks = getNumChunks(page, layout)
-    log.debug(f"num_chunks: {num_chunks}")
-    max_chunks = int(config.get("max_chunks_per_request", default=1000))
-    if num_chunks > max_chunks:
-        msg = f"PUT value chunk count: {num_chunks} exceeds max_chunks: {max_chunks}"
-        log.warn(msg)
-    select_shape = getSelectionShape(page)
-    log.debug(f"got select_shape: {select_shape} for page: {page_number}")
-
-    if data is None:
-        num_bytes = math.prod(select_shape) * item_size
-        log.debug(f"reading {num_bytes} from request stream")
-        # read page of data from input stream
-        try:
-            page_bytes = await request_read(request, count=num_bytes)
-        except HTTPRequestEntityTooLarge as tle:
-            msg = "Got HTTPRequestEntityTooLarge exception during "
-            msg += f"binary read: {tle}) for page: {page_number}"
-            log.warn(msg)
-            raise  # re-throw
-        except IncompleteReadError as ire:
-            msg = "Got asyncio.IncompleteReadError during binary "
-            msg += f"read: {ire} for page: {page_number}"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        log.debug(f"read {len(page_bytes)} for page: {page_number}")
-        try:
-            arr = bytesToArray(page_bytes, select_dtype, select_shape)
-        except ValueError as ve:
-            msg = f"bytesToArray value error for page: {page_number}: {ve}"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-    else:
-        arr = data  # use array provided to function
-
-    try:
-        chunk_ids = getChunkIds(dset_id, page, layout)
-    except ValueError:
-        log.warn("getChunkIds failed")
-        raise HTTPInternalServerError()
-    if len(chunk_ids) < 10:
-        log.debug(f"chunk_ids: {chunk_ids}")
-    else:
-        log.debug(f"chunk_ids: {chunk_ids[:10]} ...")
-    if len(chunk_ids) > max_chunks:
-        msg = f"got {len(chunk_ids)} for page: {page_number}.  max_chunks: {max_chunks}"
-        log.warn(msg)
-
-    crawler = ChunkCrawler(
-        app,
-        chunk_ids,
-        dset_json=dset_json,
-        bucket=bucket,
-        slices=page,
-        arr=arr,
-        action="write_chunk_hyperslab",
-    )
-    await crawler.crawl()
-
-    crawler_status = crawler.get_status()
-
-    if crawler_status not in (200, 201):
-        msg = f"crawler failed for page: {page_number} with status: {crawler_status}"
-        log.error(msg)
-        raise HTTPInternalServerError()
-    else:
-        log.info("crawler write_chunk_hyperslab successful")
-
-
 async def PUT_Value(request):
     """
     Handler for PUT /<dset_uuid>/value request
@@ -940,13 +755,13 @@ async def PUT_Value(request):
             else:
                 kwargs["data"] = None
             # do write for one page selection
-            await _doHyperslabWrite(app, request, **kwargs)
+            await doHyperslabWrite(app, request, **kwargs)
     else:
         #
         # Do point put
         #
         kwargs = {"points": points, "data": arr, "dset_json": dset_json, "bucket": bucket}
-        await _doPointWrite(app, request, **kwargs)
+        await doPointWrite(app, request, **kwargs)
 
     # write successful
 
@@ -1089,7 +904,6 @@ async def GET_Value(request):
         arr = None  # will be set based on returned data
 
         if stream_pagination:
-            # example
             # get binary data a page at a time and write back to response
             if item_size == "H5T_VARIABLE":
                 page_item_size = VARIABLE_AVG_ITEM_SIZE  # random guess of avg item_size
diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py
index 689c2c7e..c40e2c39 100755
--- a/hsds/dset_lib.py
+++ b/hsds/dset_lib.py
@@ -11,25 +11,28 @@
 ##############################################################################
 
 import asyncio
+from asyncio import IncompleteReadError
+
 import math
 import numpy as np
 
 from aiohttp.client_exceptions import ClientError
-from aiohttp.web_exceptions import HTTPBadRequest, HTTPConflict, HTTPInternalServerError
+from aiohttp.web_exceptions import HTTPBadRequest, HTTPConflict
+from aiohttp.web_exceptions import HTTPInternalServerError, HTTPRequestEntityTooLarge
 
-from h5json.hdf5dtype import createDataType, getItemSize
-from h5json.array_util import getNumpyValue
+from h5json.hdf5dtype import createDataType, getItemSize, getDtypeItemSize
+from h5json.array_util import getNumpyValue, bytesToArray
 from h5json.objid import isSchema2Id, getS3Key, getObjId
 
 from .util.nodeUtil import getDataNodeUrl
 from .util.boolparser import BooleanParser
 from .util.dsetUtil import isNullSpace, getDatasetLayout, getDatasetLayoutClass, get_slices
-from .util.dsetUtil import getChunkLayout, getSelectionShape, getShapeDims
+from .util.dsetUtil import getShapeDims, getSelectionShape, getChunkLayout
 from .util.chunkUtil import getChunkCoordinate, getChunkIndex, getChunkSuffix
 from .util.chunkUtil import getNumChunks, getChunkIds, getChunkId
 from .util.chunkUtil import getChunkCoverage, getDataCoverage
 from .util.chunkUtil import getQueryDtype, get_chunktable_dims
-from .util.httpUtil import http_delete, http_put
+from .util.httpUtil import http_delete, http_put, request_read
 from .util.rangegetUtil import getHyperChunkFactors
 from .util.storUtil import getStorKeys
 
@@ -1056,3 +1059,185 @@ async def deleteAllChunks(app, dset_id, bucket=None):
         await removeChunks(app, chunk_ids, bucket=bucket)
     else:
         log.info(f"deleteAllChunks for {dset_id} - no chunks need deletion")
+
+
+async def doPointWrite(app,
+                       request,
+                       points=None,
+                       data=None,
+                       dset_json=None,
+                       bucket=None
+                       ):
+    """ write the given points to the dataset """
+
+    num_points = len(points)
+    log.debug(f"doPointWrite - num_points: {num_points}")
+    dset_id = dset_json["id"]
+    layout = getChunkLayout(dset_json)
+    datashape = dset_json["shape"]
+    dims = getShapeDims(datashape)
+    rank = len(dims)
+
+    chunk_dict = {}  # chunk ids to list of points in chunk
+
+    for pt_indx in range(num_points):
+        if rank == 1:
+            point = int(points[pt_indx])
+        else:
+            point_tuple = points[pt_indx]
+            point = []
+            for i in range(len(point_tuple)):
+                point.append(int(point_tuple[i]))
+        if rank == 1:
+            if point < 0 or point >= dims[0]:
+                msg = f"PUT Value point: {point} is not within the "
+                msg += "bounds of the dataset"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+        else:
+            if len(point) != rank:
+                msg = "PUT Value point value did not match dataset rank"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+            for i in range(rank):
+                if point[i] < 0 or point[i] >= dims[i]:
+                    msg = f"PUT Value point: {point} is not within the "
+                    msg += "bounds of the dataset"
+                    log.warn(msg)
+                    raise HTTPBadRequest(reason=msg)
+        chunk_id = getChunkId(dset_id, point, layout)
+        # get the pt_indx element from the input data
+        value = data[pt_indx]
+        if chunk_id not in chunk_dict:
+            point_list = [point, ]
+            point_data = [value, ]
+            chunk_dict[chunk_id] = {"indices": point_list, "points": point_data}
+        else:
+            item = chunk_dict[chunk_id]
+            point_list = item["indices"]
+            point_list.append(point)
+            point_data = item["points"]
+            point_data.append(value)
+
+    num_chunks = len(chunk_dict)
+    log.debug(f"num_chunks: {num_chunks}")
+    max_chunks = int(config.get("max_chunks_per_request", default=1000))
+    if num_chunks > max_chunks:
+        msg = f"PUT value request with more than {max_chunks} chunks"
+        log.warn(msg)
+
+    chunk_ids = list(chunk_dict.keys())
+    chunk_ids.sort()
+
+    crawler = ChunkCrawler(
+        app,
+        chunk_ids,
+        dset_json=dset_json,
+        bucket=bucket,
+        points=chunk_dict,
+        action="write_point_sel",
+    )
+    await crawler.crawl()
+
+    crawler_status = crawler.get_status()
+
+    if crawler_status not in (200, 201):
+        msg = f"doPointWritte raising HTTPInternalServerError for status: {crawler_status}"
+        log.error(msg)
+        raise HTTPInternalServerError()
+    else:
+        log.info("doPointWrite success")
+
+
+async def doHyperslabWrite(app,
+                           request,
+                           page_number=0,
+                           page=None,
+                           data=None,
+                           dset_json=None,
+                           select_dtype=None,
+                           bucket=None
+                           ):
+    """ write the given page selection to the dataset """
+    dset_id = dset_json["id"]
+    log.info(f"doHyperslabWrite on {dset_id} - page: {page_number} dset_json: {dset_json}")
+    type_json = dset_json["type"]
+
+    if select_dtype is not None:
+        item_size = getDtypeItemSize(select_dtype)
+    else:
+        item_size = getItemSize(type_json)
+    if item_size == "H5T_VARIABLE" and data is None:
+        msg = "unexpected call to doHyperslabWrite for variable length data"
+        log.error(msg)
+        raise HTTPInternalServerError()
+
+    layout = getChunkLayout(dset_json)
+
+    num_chunks = getNumChunks(page, layout)
+    log.debug(f"num_chunks: {num_chunks}")
+    max_chunks = int(config.get("max_chunks_per_request", default=1000))
+    if num_chunks > max_chunks:
+        msg = f"PUT value chunk count: {num_chunks} exceeds max_chunks: {max_chunks}"
+        log.warn(msg)
+    select_shape = getSelectionShape(page)
+    log.debug(f"got select_shape: {select_shape} for page: {page_number}")
+
+    if data is None:
+        num_bytes = math.prod(select_shape) * item_size
+        log.debug(f"reading {num_bytes} from request stream")
+        # read page of data from input stream
+        try:
+            page_bytes = await request_read(request, count=num_bytes)
+        except HTTPRequestEntityTooLarge as tle:
+            msg = "Got HTTPRequestEntityTooLarge exception during "
+            msg += f"binary read: {tle}) for page: {page_number}"
+            log.warn(msg)
+            raise  # re-throw
+        except IncompleteReadError as ire:
+            msg = "Got asyncio.IncompleteReadError during binary "
+            msg += f"read: {ire} for page: {page_number}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        log.debug(f"read {len(page_bytes)} for page: {page_number}")
+        try:
+            arr = bytesToArray(page_bytes, select_dtype, select_shape)
+        except ValueError as ve:
+            msg = f"bytesToArray value error for page: {page_number}: {ve}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+    else:
+        arr = data  # use array provided to function
+
+    try:
+        chunk_ids = getChunkIds(dset_id, page, layout)
+    except ValueError:
+        log.warn("getChunkIds failed")
+        raise HTTPInternalServerError()
+    if len(chunk_ids) < 10:
+        log.debug(f"chunk_ids: {chunk_ids}")
+    else:
+        log.debug(f"chunk_ids: {chunk_ids[:10]} ...")
+    if len(chunk_ids) > max_chunks:
+        msg = f"got {len(chunk_ids)} for page: {page_number}.  max_chunks: {max_chunks}"
+        log.warn(msg)
+
+    crawler = ChunkCrawler(
+        app,
+        chunk_ids,
+        dset_json=dset_json,
+        bucket=bucket,
+        slices=page,
+        arr=arr,
+        action="write_chunk_hyperslab",
+    )
+    await crawler.crawl()
+
+    crawler_status = crawler.get_status()
+
+    if crawler_status not in (200, 201):
+        msg = f"crawler failed for page: {page_number} with status: {crawler_status}"
+        log.error(msg)
+        raise HTTPInternalServerError()
+    else:
+        log.info("crawler write_chunk_hyperslab successful")
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index c6c5e502..7d50dbef 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -19,7 +19,7 @@
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPNotFound
 
 from h5json.hdf5dtype import validateTypeItem, createDataType, getBaseTypeJson, getItemSize
-from h5json.array_util import getNumElements, getNumpyValue
+from h5json.array_util import getNumElements, getNumpyValue, jsonToArray
 from h5json.objid import isValidUuid, isSchema2Id
 
 from .util.httpUtil import getHref, respJsonAssemble
@@ -36,7 +36,7 @@
 from .servicenode_lib import getDomainJson, getObjectJson, getDsetJson, getPathForObjectId
 from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo
 from .servicenode_lib import createObject, createObjectByPath, deleteObject
-from .dset_lib import updateShape, deleteAllChunks
+from .dset_lib import updateShape, deleteAllChunks, doHyperslabWrite
 from . import config
 from . import hsds_logger as log
 
@@ -764,7 +764,7 @@ async def POST_Dataset(request):
             elif shape == "H5S_SCALAR":
                 shape_json["class"] = "H5S_SCALAR"
             else:
-                msg = "POST Datset with invalid shape value"
+                msg = "POST Dataset with invalid shape value"
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
         elif isinstance(shape, list):
@@ -847,6 +847,30 @@ async def POST_Dataset(request):
             else:
                 shape_json["maxdims"].append(maxextent)
 
+    if "value" in body and body["value"]:
+        # data to initialize dataset included in request
+        input_data = body["value"]
+        msg = "input data doesn't match request type and shape"
+        dims = getShapeDims(shape_json)
+        if not dims:
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        arr_dtype = createDataType(datatype)
+        try:
+            input_arr = jsonToArray(dims, arr_dtype, input_data)
+        except ValueError:
+            log.warn(f"ValueError: {msg}")
+            raise HTTPBadRequest(reason=msg)
+        except TypeError:
+            log.warn(f"TypeError: {msg}")
+            raise HTTPBadRequest(reason=msg)
+        except IndexError:
+            log.warn(f"IndexError: {msg}")
+            raise HTTPBadRequest(reason=msg)
+        log.debug(f"got json arr: {input_arr.shape}")
+    else:
+        input_arr = None
+
     layout_props = None
     min_chunk_size = int(config.get("min_chunk_size"))
     max_chunk_size = int(config.get("max_chunk_size"))
@@ -1168,6 +1192,27 @@ async def POST_Dataset(request):
         kwargs["root_id"] = root_id
         dset_json = await createObject(app, **kwargs)
 
+    # write data if provided
+    if input_arr:
+        log.debug(f"write input_arr: {input_arr}")
+        # mixin the layout
+        dset_json["layout"] = layout
+        # make selection for entire dataspace
+        dims = getShapeDims(shape_json)
+        slices = []
+        for dim in dims:
+            s = slice(0, dim, 1)
+            slices.append(s)
+        # make a one page list to handle the write in one chunk crawler run
+        # (larger write request should user binary streaming)
+        kwargs = {"page_number": 0, "page": slices}
+        kwargs["dset_json"] = dset_json
+        kwargs["bucket"] = bucket
+        kwargs["select_dtype"] = input_arr.dtype
+        kwargs["data"] = input_arr
+        # do write
+        await doHyperslabWrite(app, request, **kwargs)
+
     # dataset creation successful
     resp = await jsonResponse(request, dset_json, status=201)
     log.response(request, resp=resp)
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 4247ab7a..baba0f27 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -20,13 +20,15 @@
 
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPForbidden, HTTPGone, HTTPConflict
 from aiohttp.web_exceptions import HTTPNotFound, HTTPInternalServerError
+
 from aiohttp.client_exceptions import ClientOSError, ClientError
 from aiohttp import ClientResponseError
 
 from h5json.array_util import encodeData, decodeData, bytesToArray, bytesArrayToList, jsonToArray
 from h5json.objid import getCollectionForId, createObjId, getRootObjId
 from h5json.objid import isSchema2Id, getS3Key, isValidUuid
-from h5json.hdf5dtype import getBaseTypeJson, validateTypeItem, createDataType, getItemSize
+from h5json.hdf5dtype import getBaseTypeJson, validateTypeItem, createDataType
+from h5json.hdf5dtype import getItemSize
 
 from .util.nodeUtil import getDataNodeUrl
 from .util.authUtil import getAclKeys
@@ -37,6 +39,7 @@
 from .util.domainUtil import getBucketForDomain, verifyRoot, getLimits
 from .util.storUtil import getCompressors
 from .util.dsetUtil import getShapeDims
+
 from .basenode import getVersion
 
 from . import hsds_logger as log
diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py
index 46726e60..3d4610a4 100755
--- a/tests/integ/dataset_test.py
+++ b/tests/integ/dataset_test.py
@@ -309,7 +309,6 @@ def testPostDatasetWithAttributes(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        print("rspJson:", rspJson)
         root_uuid = rspJson["root"]
         helper.validateId(root_uuid)
 
diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py
index 66287caf..2eca94bb 100755
--- a/tests/integ/value_test.py
+++ b/tests/integ/value_test.py
@@ -951,6 +951,67 @@ def testPutScalarDataset(self):
         self.assertTrue("value" in rspJson)
         self.assertEqual(rspJson["value"], "Hello, world")
 
+    def testScalarDatasetInitData(self):
+        # Test creation/deletion of scalar dataset obj along with initial data
+        print("testScalarDatasetInitData", self.base_domain)
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+        req = self.endpoint + "/"
+
+        # Get root uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # create a dataset obj
+        data = {"type": "H5T_STD_I32LE", "shape": "H5S_SCALAR", "value": 42}
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["attributeCount"], 0)
+        dset_id = rspJson["id"]
+        self.assertTrue(helper.validateId(dset_id))
+
+        # read back the obj
+        req = self.endpoint + "/datasets/" + dset_id
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+
+        expected_keys = [
+            "id",
+            "shape",
+            "hrefs",
+            "layout",
+            "creationProperties",
+            "attributeCount",
+            "created",
+            "lastModified",
+            "root",
+            "domain",
+        ]
+
+        for name in expected_keys:
+            self.assertTrue(name in rspJson)
+        self.assertEqual(rspJson["id"], dset_id)
+        self.assertEqual(rspJson["root"], root_uuid)
+        self.assertEqual(rspJson["domain"], self.base_domain)
+        self.assertEqual(rspJson["attributeCount"], 0)
+        shape_json = rspJson["shape"]
+        self.assertTrue(shape_json["class"], "H5S_SCALAR")
+        self.assertTrue(rspJson["type"], "H5T_STD_I32LE")
+
+        # read the data back
+        req += "/value"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("hrefs" in rspJson)
+        self.assertTrue("value" in rspJson)
+        self.assertEqual(rspJson["value"], 42)
+
     def testNullSpaceDataset(self):
         # Test attempted read/write to null space dataset
         print("testNullSpaceDataset", self.base_domain)

From 4ab24fc6837158ca9eecfb14c3f5c63e391f6190 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 9 May 2025 12:05:00 +0200
Subject: [PATCH 19/49] add compound init value test

---
 tests/integ/attr_test.py  |  1 -
 tests/integ/value_test.py | 90 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/tests/integ/attr_test.py b/tests/integ/attr_test.py
index b9f4dd7e..de54c5ea 100644
--- a/tests/integ/attr_test.py
+++ b/tests/integ/attr_test.py
@@ -915,7 +915,6 @@ def testPutCommittedType(self):
             value.append(i * 0.5)
         payload = {"type": dtype_uuid, "shape": 10, "value": value}
         req = self.endpoint + "/groups/" + root_id + "/attributes/" + attr_name
-        print("req:", req)
         rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)  # create attribute
 
diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py
index 2eca94bb..dd4ef4f7 100755
--- a/tests/integ/value_test.py
+++ b/tests/integ/value_test.py
@@ -1273,6 +1273,96 @@ def testPutCompound(self):
                 self.assertEqual(len(item), 1)
                 self.assertEqual(item[0], i * 10)
 
+    def testPutCompoundInitData(self):
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+        req = self.endpoint + "/"
+
+        # Get root uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        str_type = {
+            "charSet": "H5T_CSET_ASCII",
+            "class": "H5T_STRING",
+            "strPad": "H5T_STR_NULLPAD",
+            "length": 1,
+        }
+
+        fields = (
+            {"name": "temp", "type": "H5T_STD_I32LE"},
+            {"name": "unit", "type": str_type},
+        )
+        datatype = {"class": "H5T_COMPOUND", "fields": fields}
+
+        #
+        # create compound scalar dataset
+        #
+        value = (42, 'F')
+        payload = {"type": datatype}  # , "value": value}
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)  # create dataset
+
+        rspJson = json.loads(rsp.text)
+        dset0d_uuid = rspJson["id"]
+        self.assertTrue(helper.validateId(dset0d_uuid))
+
+        # verify the shape of the dataset
+        req = self.endpoint + "/datasets/" + dset0d_uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)  # get dataset
+        rspJson = json.loads(rsp.text)
+        shape = rspJson["shape"]
+        self.assertEqual(shape["class"], "H5S_SCALAR")
+
+        # write entire array
+        payload = {"value": value}
+        req = self.endpoint + "/datasets/" + dset0d_uuid + "/value"
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 200)  # write value
+
+        # read back the value
+        req = self.endpoint + "/datasets/" + dset0d_uuid + "/value"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("hrefs" in rspJson)
+        self.assertTrue("value" in rspJson)
+
+        #
+        # create 1d dataset
+        #
+        num_elements = 10
+        payload = {"type": datatype, "shape": num_elements}
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)  # create dataset
+
+        rspJson = json.loads(rsp.text)
+        dset1d_uuid = rspJson["id"]
+        self.assertTrue(helper.validateId(dset1d_uuid))
+
+        # link new dataset as 'dset1'
+        name = "dset1d" + helper.getRandomName()
+        req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
+        payload = {"id": dset1d_uuid}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
+        # write entire array
+        value = []
+        for i in range(num_elements):
+            item = (i * 10, 'F')
+            value.append(item)
+        payload = {"value": value}
+
+        req = self.endpoint + "/datasets/" + dset1d_uuid + "/value"
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 200)  # write value
+
     def testSimpleTypeFillValue(self):
         # test Dataset with simple type and fill value
         print("testSimpleTypeFillValue", self.base_domain)

From fc3ad689abb54ea19787247d2be256aa516f1a2c Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 9 May 2025 14:52:11 +0200
Subject: [PATCH 20/49] added post data with compound data initializer

---
 hsds/dset_sn.py           |  3 ++-
 hsds/servicenode_lib.py   |  4 +++-
 tests/integ/value_test.py | 45 +++++++++++++++++----------------------
 3 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 7d50dbef..9f0593e6 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -856,6 +856,7 @@ async def POST_Dataset(request):
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
         arr_dtype = createDataType(datatype)
+
         try:
             input_arr = jsonToArray(dims, arr_dtype, input_data)
         except ValueError:
@@ -1193,7 +1194,7 @@ async def POST_Dataset(request):
         dset_json = await createObject(app, **kwargs)
 
     # write data if provided
-    if input_arr:
+    if input_arr is not None:
         log.debug(f"write input_arr: {input_arr}")
         # mixin the layout
         dset_json["layout"] = layout
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index baba0f27..de1253ee 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -1070,7 +1070,9 @@ def getValueFromRequest(body, data_type, data_shape):
         else:
             np_dims = dims
 
-        if body.get("encoding"):
+        if "encoding" in body:
+            encoding = body["encoding"]
+            log.debug(f"using encoding: {encoding}")
             item_size = getItemSize(data_type)
             if item_size == "H5T_VARIABLE":
                 msg = "base64 encoding is not support for variable length attributes"
diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py
index dd4ef4f7..c9e88afb 100755
--- a/tests/integ/value_test.py
+++ b/tests/integ/value_test.py
@@ -1288,7 +1288,7 @@ def testPutCompoundInitData(self):
             "charSet": "H5T_CSET_ASCII",
             "class": "H5T_STRING",
             "strPad": "H5T_STR_NULLPAD",
-            "length": 1,
+            "length": 5,
         }
 
         fields = (
@@ -1300,8 +1300,8 @@ def testPutCompoundInitData(self):
         #
         # create compound scalar dataset
         #
-        value = (42, 'F')
-        payload = {"type": datatype}  # , "value": value}
+        value = (42, 'C')
+        payload = {"type": datatype, "value": value}
         req = self.endpoint + "/datasets"
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)  # create dataset
@@ -1318,12 +1318,6 @@ def testPutCompoundInitData(self):
         shape = rspJson["shape"]
         self.assertEqual(shape["class"], "H5S_SCALAR")
 
-        # write entire array
-        payload = {"value": value}
-        req = self.endpoint + "/datasets/" + dset0d_uuid + "/value"
-        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
-        self.assertEqual(rsp.status_code, 200)  # write value
-
         # read back the value
         req = self.endpoint + "/datasets/" + dset0d_uuid + "/value"
         rsp = self.session.get(req, headers=headers)
@@ -1331,12 +1325,19 @@ def testPutCompoundInitData(self):
         rspJson = json.loads(rsp.text)
         self.assertTrue("hrefs" in rspJson)
         self.assertTrue("value" in rspJson)
+        self.assertEqual(rspJson["value"], [42, 'C'])
 
         #
         # create 1d dataset
         #
+
+        # make up some data
         num_elements = 10
-        payload = {"type": datatype, "shape": num_elements}
+        value = []
+        for i in range(num_elements):
+            item = (i * 10, chr(ord('A') + i))
+            value.append(item)
+        payload = {"type": datatype, "shape": num_elements, "value": value}
         req = self.endpoint + "/datasets"
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)  # create dataset
@@ -1345,23 +1346,15 @@ def testPutCompoundInitData(self):
         dset1d_uuid = rspJson["id"]
         self.assertTrue(helper.validateId(dset1d_uuid))
 
-        # link new dataset as 'dset1'
-        name = "dset1d" + helper.getRandomName()
-        req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
-        payload = {"id": dset1d_uuid}
-        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
-        self.assertEqual(rsp.status_code, 201)
-
-        # write entire array
-        value = []
-        for i in range(num_elements):
-            item = (i * 10, 'F')
-            value.append(item)
-        payload = {"value": value}
-
+        # read back the value
         req = self.endpoint + "/datasets/" + dset1d_uuid + "/value"
-        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
-        self.assertEqual(rsp.status_code, 200)  # write value
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("hrefs" in rspJson)
+        self.assertTrue("value" in rspJson)
+        self.assertEqual(len(rspJson["value"]), num_elements)
+        self.assertEqual(rspJson["value"][2], [20, 'C'])
 
     def testSimpleTypeFillValue(self):
         # test Dataset with simple type and fill value

From 8a1894558d969017dda50e719f3d3424964e58c5 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 15 May 2025 15:01:30 +0200
Subject: [PATCH 21/49] add post_crawler class

---
 hsds/domain_crawl.py      |   2 +-
 hsds/dset_lib.py          |   2 +-
 hsds/group_sn.py          | 259 ++++++++++++++++++++++----------------
 hsds/link_sn.py           |  10 +-
 hsds/post_crawl.py        | 213 +++++++++++++++++++++++++++++++
 hsds/servicenode_lib.py   |   1 -
 hsds/util/linkUtil.py     |  78 ++++++++++++
 tests/integ/group_test.py |  94 ++++++++++++--
 8 files changed, 534 insertions(+), 125 deletions(-)
 create mode 100644 hsds/post_crawl.py

diff --git a/hsds/domain_crawl.py b/hsds/domain_crawl.py
index 656b04e6..0b707329 100644
--- a/hsds/domain_crawl.py
+++ b/hsds/domain_crawl.py
@@ -466,7 +466,7 @@ async def crawl(self):
                     pass  # ok
                 elif status == 400:
                     log.warn("DomainCrawler - BadRequest")
-                    raise HTTPBadRequest(reason="unkown")
+                    raise HTTPBadRequest(reason="unknown")
                 elif status == 404:
                     log.warn("DomainCrawler - not found")
                     raise HTTPNotFound()
diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py
index c40e2c39..fc1d3626 100755
--- a/hsds/dset_lib.py
+++ b/hsds/dset_lib.py
@@ -1142,7 +1142,7 @@ async def doPointWrite(app,
     crawler_status = crawler.get_status()
 
     if crawler_status not in (200, 201):
-        msg = f"doPointWritte raising HTTPInternalServerError for status: {crawler_status}"
+        msg = f"doPointWrite raising HTTPInternalServerError for status: {crawler_status}"
         log.error(msg)
         raise HTTPInternalServerError()
     else:
diff --git a/hsds/group_sn.py b/hsds/group_sn.py
index 06874f5c..e8dd9325 100755
--- a/hsds/group_sn.py
+++ b/hsds/group_sn.py
@@ -13,8 +13,6 @@
 # group handler for service node of hsds cluster
 #
 
-import time
-
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPForbidden, HTTPNotFound
 from json import JSONDecodeError
 
@@ -25,11 +23,12 @@
 from .util.authUtil import validateUserPassword
 from .util.domainUtil import getDomainFromRequest, isValidDomain
 from .util.domainUtil import getBucketForDomain, getPathForDomain, verifyRoot
-from .util.linkUtil import validateLinkName, getLinkClass
+from .util.linkUtil import validateLinkName, getRequestLinks
 from .servicenode_lib import getDomainJson, getObjectJson, validateAction
 from .servicenode_lib import getObjectIdByPath, getPathForObjectId
 from .servicenode_lib import createObject, createObjectByPath, deleteObject
 from . import hsds_logger as log
+from .post_crawl import createObjects
 from . import config
 
 
@@ -159,6 +158,114 @@ async def GET_Group(request):
     return resp
 
 
+async def _create_group(app, **kwargs):
+    """ helper method for group creation """
+
+    if kwargs.get("parent_id") and kwargs.get("h5path"):
+        group_json = await createObjectByPath(app, **kwargs)
+    else:
+        # create an anonymous group
+        log.debug(f"_create_group - kwargs: {kwargs}")
+        group_json = await createObject(app, **kwargs)
+
+    return group_json
+
+
+def _get_create_args(body, root_id=None, bucket=None, implicit=False):
+    """ get query args for _create_group from request body """
+    kwargs = {"bucket": bucket}
+    predate_max_time = config.get("predate_max_time", default=10.0)
+
+    parent_id = None
+    obj_id = None
+    h5path = None
+
+    if "link" in body:
+        if "h5path" in body:
+            msg = "link can't be used with h5path"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        link_body = body["link"]
+        log.debug(f"link_body: {link_body}")
+        if "id" in link_body:
+            parent_id = link_body["id"]
+        if "name" in link_body:
+            link_title = link_body["name"]
+            try:
+                # will throw exception if there's a slash in the name
+                validateLinkName(link_title)
+            except ValueError:
+                msg = f"invalid link title: {link_title}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+
+        if parent_id and link_title:
+            log.debug(f"parent id: {parent_id}, link_title: {link_title}")
+            h5path = link_title  # just use the link name as the h5path
+
+    if "parent_id" not in body:
+        parent_id = root_id
+    else:
+        parent_id = body["parent_id"]
+
+    if "h5path" in body:
+        h5path = body["h5path"]
+        # normalize the h5path
+        if h5path.startswith("/"):
+            if parent_id == root_id:
+                # just adjust the path to be relative
+                h5path = h5path[1:]
+            else:
+                msg = f"PostCrawler expecting relative h5path, but got: {h5path}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+
+        if h5path.endswith("/"):
+            h5path = h5path[:-1]  # makes iterating through the links a bit easier
+
+    if parent_id and h5path:
+        # these are used by createObjectByPath
+        kwargs["parent_id"] = parent_id
+        kwargs["implicit"] = implicit
+        kwargs["h5path"] = h5path
+    else:
+        kwargs["root_id"] = root_id
+
+    if "id" in body:
+        obj_id = body["id"]
+        # tbd: validate this is a group id
+        kwargs["obj_id"] = obj_id
+        log.debug(f"POST group using client id: {obj_id}")
+
+    if "creationProperties" in body:
+        creation_props = body["creationProperties"]
+        # tbd: validate creation_props
+        kwargs["creation_props"] = creation_props
+
+    if "attributes" in body:
+        attrs = body["attributes"]
+        if not isinstance(attrs, dict):
+            msg = f"POST_Groups expected dict for for attributes, but got: {type(attrs)}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        log.debug(f"POST Group attributes: {attrs}")
+
+        # tbd: validate attributes
+        kwargs["attrs"] = attrs
+    if "links" in body:
+        body_links = body["links"]
+        log.debug(f"got links for new group: {body_links}")
+        try:
+            links = getRequestLinks(body["links"], predate_max_time=predate_max_time)
+        except ValueError:
+            msg = "invalid link item sent in request"
+            raise HTTPBadRequest(reason=msg)
+        log.debug(f"adding links to group POST request: {links}")
+        kwargs["links"] = links
+
+    return kwargs
+
+
 async def POST_Group(request):
     """HTTP method to create new Group object"""
     log.request(request)
@@ -177,6 +284,7 @@ async def POST_Group(request):
     bucket = getBucketForDomain(domain)
 
     domain_json = await getDomainJson(app, domain, reload=True)
+    log.debug(f"got domain_json: {domain_json}")
 
     # throws exception if not allowed
     aclCheck(app, domain_json, "create", username)
@@ -186,14 +294,8 @@ async def POST_Group(request):
 
     # allow parent group creation or not
     implicit = getBooleanParam(params, "implicit")
-
-    parent_id = None
-    obj_id = None
-    h5path = None
-    creation_props = None
-    attrs = None
-    links = None
-
+    kwargs = {}
+    post_group_rsp = None
     if request.has_body:
         try:
             body = await request.json()
@@ -204,107 +306,48 @@ async def POST_Group(request):
 
         log.info(f"POST Group body: {body}")
         if body:
-            if "link" in body:
-                if "h5path" in body:
-                    msg = "link can't be used with h5path"
-                    log.warn(msg)
-                    raise HTTPBadRequest(reason=msg)
-                link_body = body["link"]
-                log.debug(f"link_body: {link_body}")
-                if "id" in link_body:
-                    parent_id = link_body["id"]
-                if "name" in link_body:
-                    link_title = link_body["name"]
-                    try:
-                        # will throw exception if there's a slash in the name
-                        validateLinkName(link_title)
-                    except ValueError:
-                        msg = f"invalid link title: {link_title}"
-                        log.warn(msg)
-                        raise HTTPBadRequest(reason=msg)
-
-                if parent_id and link_title:
-                    log.debug(f"parent id: {parent_id}, link_title: {link_title}")
-                    h5path = link_title  # just use the link name as the h5path
-
-            if "h5path" in body:
-                h5path = body["h5path"]
-                if "parent_id" not in body:
-                    parent_id = root_id
+            if isinstance(body, list):
+                count = len(body)
+                log.debug(f"multiple group create: {count} items")
+                if count == 0:
+                    # equivalent to no body, anonymous group case
+                    kwargs = {"root_id": root_id, "bucket": bucket}
+                elif count == 1:
+                    # just create one object in typical way
+                    kwargs = _get_create_args(body[0],
+                                              root_id=root_id,
+                                              bucket=bucket,
+                                              implicit=implicit)
                 else:
-                    parent_id = body["parent_id"]
-            if "id" in body:
-                obj_id = body["id"]
-                log.debug(f"POST group using client id: {obj_id}")
-            if "creationProperties" in body:
-                creation_props = body["creationProperties"]
-            if "attributes" in body:
-                attrs = body["attributes"]
-                if not isinstance(attrs, dict):
-                    msg = f"POST_Groups expected dict for for links, but got: {type(links)}"
-                    log.warn(msg)
-                    raise HTTPBadRequest(reason=msg)
-                log.debug(f"POST Group attributes: {attrs}")
-            if "links" in body:
-                links = body["links"]
-                if not isinstance(links, dict):
-                    msg = f"POST_Groups expected dict for for links, but got: {type(links)}"
-                    log.warn(msg)
-                    raise HTTPBadRequest(reason=msg)
-                # validate the links
-                now = time.time()
-
-                for title in links:
-                    try:
-                        validateLinkName(title)
-                        link_item = links[title]
-                        link_class = getLinkClass(link_item)
-                        if "class" in link_item:
-                            if link_class != link_item["class"]:
-                                msg = f"expected link class of: {link_class} but got {link_item}"
-                                log.warn(msg)
-                                raise HTTPBadRequest(reason=msg)
-                        else:
-                            link_item["class"] = link_class
-                        getLinkClass(link_item)
-                        if "created" in link_item:
-                            created = link_item["created"]
-                            # allow "pre-dated" attributes if recent enough
-                            predate_max_time = config.get("predate_max_time", default=10.0)
-                            if now - created > predate_max_time:
-                                link_item["created"] = created
-                            else:
-                                log.warn("stale created timestamp for link, ignoring")
-                            if "created" not in link_item:
-                                link_item["created"] = now
-
-                    except ValueError:
-                        raise HTTPBadRequest(reason="invalid link item")
-
-    kwargs = {"bucket": bucket}
-    if obj_id:
-        kwargs["obj_id"] = obj_id
-    if creation_props:
-        kwargs["creation_props"] = creation_props
-    if attrs:
-        kwargs["attrs"] = attrs
-    if links:
-        kwargs["links"] = links
-
-    if parent_id:
-        kwargs["parent_id"] = parent_id
-        kwargs["h5path"] = h5path
-        if implicit:
-            kwargs["implicit"] = True
-        group_json = await createObjectByPath(app, **kwargs)
+                    # create multiple group objects
+                    kwarg_list = []  # list of kwargs for each object
+
+                    for item in body:
+                        log.debug(f"item: {item}")
+                        if not isinstance(item, dict):
+                            msg = f"PostGroup - invalid item type: {type(item)}"
+                            log.warn(msg)
+                            raise HTTPBadRequest(reason=msg)
+                        kwargs = _get_create_args(item, root_id=root_id, bucket=bucket)
+                        kwarg_list.append(kwargs)
+                        kwargs = {"bucket": bucket, "root_id": root_id}
+                    post_group_rsp = await createObjects(app, kwarg_list, **kwargs)
+            else:
+                kwargs = _get_create_args(body, root_id=root_id, bucket=bucket, implicit=implicit)
+        else:
+            kwargs["root_id"] = root_id
+            kwargs["bucket"] = bucket
     else:
-        # create an anonymous group
-        kwargs["root_id"] = root_id
-        group_json = await createObject(app, **kwargs)
+        kwargs = {"root_id": root_id, "bucket": bucket}
+
+    if post_group_rsp is None:
+        # Handle cases other than multi-group create here
+        log.debug(f"_create_group - kwargs: {kwargs}")
+        post_group_rsp = await _create_group(app, **kwargs)
 
-    log.debug(f"returning resp: {group_json}")
+    log.debug(f"returning resp: {post_group_rsp}")
     # group creation successful
-    resp = await jsonResponse(request, group_json, status=201)
+    resp = await jsonResponse(request, post_group_rsp, status=201)
     log.response(request, resp=resp)
     return resp
 
diff --git a/hsds/link_sn.py b/hsds/link_sn.py
index bc7f79ee..66e3a698 100755
--- a/hsds/link_sn.py
+++ b/hsds/link_sn.py
@@ -142,11 +142,15 @@ async def GET_Links(request):
 
         # mix in collection key, target and hrefs
         for link in links:
-            if "class" not in link:
-                log.error("expected to find class key in link")
-                raise HTTPInternalServerError()
+            for key in ("class", "title"):
+                if key not in link:
+                    log.error(f"expected to find {key} key in link")
+                    raise HTTPInternalServerError()
 
             if link["class"] == "H5L_TYPE_HARD":
+                if "id" not in link:
+                    log.error("expected to id key in hard link")
+                    raise HTTPInternalServerError()
                 collection_name = getCollectionForId(link["id"])
                 link["collection"] = collection_name
                 target_uri = "/" + collection_name + "/" + link["id"]
diff --git a/hsds/post_crawl.py b/hsds/post_crawl.py
new file mode 100644
index 00000000..057cd96d
--- /dev/null
+++ b/hsds/post_crawl.py
@@ -0,0 +1,213 @@
+##############################################################################
+# Copyright by The HDF Group.                                                #
+# All rights reserved.                                                       #
+#                                                                            #
+# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
+# Utilities.  The full HSDS copyright notice, including                      #
+# terms governing use, modification, and redistribution, is contained in     #
+# the file COPYING, which can be found at the root of the source code        #
+# distribution tree.  If you do not have access to this file, you may        #
+# request a copy from help@hdfgroup.org.                                     #
+##############################################################################
+#
+# post crawler
+#
+
+import asyncio
+
+from aiohttp.web_exceptions import HTTPServiceUnavailable, HTTPConflict, HTTPBadRequest
+from aiohttp.web_exceptions import HTTPInternalServerError, HTTPNotFound, HTTPGone
+
+from .util.httpUtil import isOK
+from .servicenode_lib import createObject, createObjectByPath
+from . import hsds_logger as log
+
+
+class PostCrawler:
+    def __init__(
+        self,
+        app,
+        items=None,
+        root_id=None,
+        bucket=None,
+        max_tasks=40,
+        ignore_error=False
+    ):
+        log.info("PostCrawler.__init__")
+        self._app = app
+        self._root_id = root_id
+        self._bucket = bucket
+        self._max_tasks = max_tasks
+        self._ignore_error = ignore_error
+
+        if not items:
+            log.error("no post requests for crawler to crawl!")
+            raise ValueError()
+        if not bucket:
+            log.error("bucket not set for PostCrawler")
+            raise ValueError()
+        self._count = len(items)
+        self._items = items
+        self._rsp_objs = [None,] * self._count
+        self._q = asyncio.Queue()
+        log.debug(f"PostCrawler adding index 0 - {self._count} to queue")
+        for i in range(self._count):
+            self._q.put_nowait(i)
+
+    def get_rsp_objs(self):
+        """ return list of object responses """
+
+        return self._rsp_objs
+
+    def get_status(self):
+        """ return the highest status of any of the returned objects """
+        status = None
+        for i in range(self._count):
+            item = self._rsp_objs[i]
+            if not item:
+                continue  # resp not filled in yet
+            if "status" in item:
+                item_status = item["status"]
+                if status is None or item_status > status:
+                    # return the more severe error
+                    log.debug(f"setting status to {item_status}")
+                    status = item_status
+            elif "id" in item:
+                # post request succeeded
+                if status is None:
+                    status = 201
+            else:
+                log.error(f"PostCrawler unexpected response for item {i}: {item}")
+                status = 500
+
+        return status
+
+    async def crawl(self):
+        max_tasks = min(self._max_tasks, self._count)
+        workers = [asyncio.Task(self.work()) for _ in range(max_tasks)]
+        # When all work is done, exit.
+        msg = "PostCrawler - await queue.join - "
+        msg += f"count: {self._count} with {max_tasks} workers"
+        log.info(msg)
+        await self._q.join()
+        msg = "PostCrawler - join complete - "
+        msg += f"count: {self._count}"
+        log.info(msg)
+
+        for w in workers:
+            w.cancel()
+        log.debug("PostCrawler - workers canceled")
+
+        status = self.get_status()
+        if status:
+            log.debug(f"PostCrawler -- status: {status}")
+            log.debug(f"ignore_error: {self._ignore_error}")
+            if not self._ignore_error:
+                # throw the appropriate exception if other than 200, 201
+                if isOK(status):
+                    pass  # ok
+                elif status == 400:
+                    log.warn("PostCrawler - BadRequest")
+                    raise HTTPBadRequest(reason="unknown")
+                elif status == 404:
+                    log.warn("PostCrawler - not found")
+                    raise HTTPNotFound()
+                elif status == 409:
+                    log.warn("PostCrawler - conflict")
+                    raise HTTPConflict()
+                elif status == 410:
+                    log.warn("PostCrawler - gone")
+                    raise HTTPGone()
+                elif status == 500:
+                    log.error("PostCrawler - internal server error")
+                    raise HTTPInternalServerError()
+                elif status == 503:
+                    log.error("PostCrawler - server busy")
+                    raise HTTPServiceUnavailable()
+                else:
+                    log.error(f"PostCrawler - unexpected status: {status}")
+                    raise HTTPInternalServerError()
+        else:
+            # no tasks returned anything
+            log.error("PostCrawler - no results returned")
+            if not self._ignore_error:
+                raise HTTPInternalServerError()
+
+    async def work(self):
+        while True:
+            index = await self._q.get()
+            await self.create(index)
+            self._q.task_done()
+
+    async def create(self, index):
+        log.debug(f"PostCrawler fetch for index: {index}")
+        item = self._items[index]
+        log.debug(f"got item: {item}")
+        kwargs = {"bucket": self._bucket}
+
+        if "obj_id" in item:
+            kwargs["obj_id"] = item["obj_id"]
+        if "type" in item:
+            kwargs["obj_type"] = item["type"]
+        if "layout" in item:
+            kwargs["layout"] = item["layout"]
+        if "creation_props" in item:
+            kwargs["creation_props"] = item["creation_props"]
+        if "attrs" in item:
+            kwargs["attrs"] = item["attrs"]
+        if "parent_id" in item:
+            kwargs["parent_id"] = item["parent_id"]
+        elif "root_id" in item:
+            kwargs["root_id"] = item["root_id"]
+        if "h5path" in item:
+            kwargs["h5path"] = item["h5path"]
+        if "links" in item:
+            kwargs["links"] = item["links"]
+
+        log.debug(f"PostCrawler index {index} kwargs: {kwargs}")
+        rsp_json = None
+        try:
+            if kwargs.get("parent_id") and kwargs.get("h5path"):
+                rsp_json = await createObjectByPath(self._app, **kwargs)
+            else:
+                # create an anonymous group
+                rsp_json = await createObject(self._app, **kwargs)
+        except HTTPConflict:
+            log.warn("PostCrawler - got HTTPConflict from http_post")
+            rsp_json = {"status_code": 409}
+        except HTTPServiceUnavailable:
+            rsp_json = {"status_code": 503}
+        except HTTPInternalServerError:
+            rsp_json = {"status_code": 500}
+        except Exception as e:
+            log.error(f"unexpected exception {e}")
+            rsp_json = {"status_code": 500}
+
+        log.info(f"PostCrawler - index: {index} post rsp: {rsp_json}")
+
+        self._rsp_objs[index] = rsp_json
+
+
+async def createObjects(app, items, root_id=None, bucket=None):
+    """ create an objects based on parameters in items list """
+
+    if not root_id:
+        msg = "no root_id given for createObjects"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+
+    log.info(f"createObjects with {len(items)} items, root_id: {root_id}")
+
+    post_crawler = PostCrawler(app, root_id=root_id, bucket=bucket, items=items)
+    await post_crawler.crawl()
+    if post_crawler.get_status() > 201:
+        msg = f"createObjects returning status from crawler: {post_crawler.get_status()}"
+        log.error(msg)
+        raise HTTPInternalServerError()
+
+    obj_list = post_crawler.get_rsp_objs()
+    if not isinstance(obj_list, list):
+        msg = f"createObjects expected list but got: {type(obj_list)}"
+        log.error(msg)
+        raise HTTPInternalServerError()
+    return {"objects": obj_list}
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index de1253ee..dd32bd6e 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -41,7 +41,6 @@
 from .util.dsetUtil import getShapeDims
 
 from .basenode import getVersion
-
 from . import hsds_logger as log
 from . import config
 
diff --git a/hsds/util/linkUtil.py b/hsds/util/linkUtil.py
index 3469a8a1..f872f848 100644
--- a/hsds/util/linkUtil.py
+++ b/hsds/util/linkUtil.py
@@ -13,6 +13,7 @@
 # linkdUtil:
 # link related functions
 #
+import time
 
 from .. import hsds_logger as log
 
@@ -132,3 +133,80 @@ def h5Join(path, paths):
             h5path += "/"
         h5path += s
     return h5path
+
+
+def getRequestLink(title, link_json, predate_max_time=0.0):
+    """ return normalized link from request json 
+        Throw value error if badly formatted """
+    
+    if not isinstance(link_json, dict):
+            msg = f"expected dict for for links, but got: {type(link_json)}"
+            log.warn(msg)
+            raise ValueError(msg)
+       
+    log.debug(f"getRequestLink title: {title} link_json: {link_json}")   
+    link_item = {}  # normalized link item to return
+    
+    now = time.time()
+
+    validateLinkName(title)  # will raise ValueError is invalid
+
+    link_class = getLinkClass(link_json)
+    if "class" in link_item:
+        if link_class != link_json["class"]:
+            msg = f"expected link class of: {link_class} but got {link_json}"
+            log.warn(msg)
+            raise ValueError(msg)
+        
+    link_item = {"class": link_class}
+
+    if link_class == "H5L_TYPE_HARD":
+        if "id" not in link_json:
+            msg = "expected id key for hard link"
+            log.warn(msg)
+            raise ValueError
+        link_item["id"] = link_json["id"]
+    else:
+        if link_class in ("H5L_TYPE_SOFT", "H5L_TYPE_EXTERNAL"):
+            if "h5path" not in link_json:
+                msg = "expected h5path key for soft link"
+                log.warn(msg)
+                raise ValueError(msg)
+            link_item["h5path"] = link_json["h5path"]
+        
+        if link_class == "H5L_TYPE_EXTERNAL":
+            if "h5domain" not in link_json:
+                msg = "expected h5domain key for external link"
+                log.warn(msg)
+                raise ValueError(msg)
+                         
+    if "created" in link_json:
+        created = link_json["created"]
+        # allow "pre-dated" attributes if recent enough
+        if now - created > predate_max_time:
+            link_item["created"] = created
+        else:
+            log.warn("stale created timestamp for link, ignoring")
+    if "created" not in link_item:
+        link_item["created"] = now
+
+    return link_item
+
+    
+def getRequestLinks(links_json, predate_max_time=0.0):
+    """ return list of normalized links from request json 
+        Throw value error if any is badly formatted """
+    
+    if not isinstance(links_json, dict):
+            msg = f"POST_Groups expected dict for for links, but got: {type(links_json)}"
+            log.warn(msg)
+            raise ValueError(msg)
+       
+    links = {}  # normalized link items to return
+    kwargs = {"predate_max_time": predate_max_time}
+
+    for title in links_json:
+        links[title] = getRequestLink(title, links_json[title], **kwargs)
+
+    return links
+
diff --git a/tests/integ/group_test.py b/tests/integ/group_test.py
index d2ac33b5..fbbda066 100755
--- a/tests/integ/group_test.py
+++ b/tests/integ/group_test.py
@@ -261,7 +261,6 @@ def testPostWithId(self):
 
         self.assertEqual(rsp.status_code, 201)
         rspJson = json.loads(rsp.text)
-        print("rspJson:", rspJson)
         self.assertEqual(rspJson["linkCount"], 0)
         self.assertEqual(rspJson["attributeCount"], 0)
         self.assertEqual(grp_id, rspJson["id"])
@@ -360,18 +359,28 @@ def testPostIdWithLink(self):
         root_uuid = rspJson["root"]
         helper.validateId(root_uuid)
 
-        # create a group id
-        grp_id = createObjId("groups", root_id=root_uuid)
-
-        # create new group
-        payload = {"id": grp_id, "link": {"id": root_uuid, "name": "linked_group"}}
+        grp_count = 3
         req = helper.getEndpoint() + "/groups"
-        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
-        self.assertEqual(rsp.status_code, 201)
+
+        for i in range(grp_count):
+            # create a group id
+            grp_id = createObjId("groups", root_id=root_uuid)
+
+            # create new group
+            payload = {"id": grp_id, "link": {"id": root_uuid, "name": f"g{i:04d}"}}
+            rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+            self.assertEqual(rsp.status_code, 201)
+            rspJson = json.loads(rsp.text)
+            self.assertEqual(rspJson["linkCount"], 0)
+            self.assertEqual(rspJson["attributeCount"], 0)
+            self.assertEqual(grp_id, rspJson["id"])
+
+        # get root group and verify number of links
+        req = helper.getEndpoint() + "/groups/" + root_uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        self.assertEqual(rspJson["linkCount"], 0)
-        self.assertEqual(rspJson["attributeCount"], 0)
-        self.assertEqual(grp_id, rspJson["id"])
+        self.assertEqual(rspJson["linkCount"], grp_count)
 
     def testPostWithAttributes(self):
         # test POST with attribute initialization
@@ -717,6 +726,69 @@ def testPostWithCreationProps(self):
         self.assertTrue("alias" in rspJson)
         self.assertEqual(rspJson["alias"], [])
 
+    def testPostMulti(self):
+        # test POST with multi-object creation
+        print("testPostMulti", self.base_domain)
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+
+        # get root id
+        req = helper.getEndpoint() + "/"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # get root group and verify link count is 0
+        req = helper.getEndpoint() + "/groups/" + root_uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], 0)
+
+        # create a set of anonymous groups
+        grp_count = 3
+        req = helper.getEndpoint() + "/groups"
+
+        payload = [{},] * grp_count
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("objects" in rspJson)
+        rsp_objs = rspJson["objects"]
+        self.assertEqual(len(rsp_objs), grp_count)
+        for i in range(grp_count):
+            grp_rsp = rsp_objs[i]
+            self.assertEqual(grp_rsp["linkCount"], 0)
+            self.assertEqual(grp_rsp["attributeCount"], 0)
+            group_id = grp_rsp["id"]
+            self.assertTrue(helper.validateId(group_id))
+
+        # create a set of linked groups
+        grp_count = 3
+        payload = []
+        for i in range(grp_count):
+            payload.append({"link": {"id": root_uuid, "name": f"g{i}"}})
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("objects" in rspJson)
+        rsp_objs = rspJson["objects"]
+        self.assertEqual(len(rsp_objs), grp_count)
+        for i in range(grp_count):
+            grp_rsp = rsp_objs[i]
+            self.assertEqual(grp_rsp["linkCount"], 0)
+            self.assertEqual(grp_rsp["attributeCount"], 0)
+            group_id = grp_rsp["id"]
+            self.assertTrue(helper.validateId(group_id))
+
+        # get root group and verify link count is grp_count
+        req = helper.getEndpoint() + "/groups/" + root_uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], grp_count)
+
     def testDelete(self):
         # test Delete
         print("testDelete", self.base_domain)

From a8ec66d9d3325ec62eb7582306c7b2c58e025d5a Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 15 May 2025 19:38:23 +0200
Subject: [PATCH 22/49] avoid exception for mkdir race condition

---
 hsds/util/fileClient.py | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/hsds/util/fileClient.py b/hsds/util/fileClient.py
index 1bc5e786..feebe2c1 100644
--- a/hsds/util/fileClient.py
+++ b/hsds/util/fileClient.py
@@ -88,7 +88,7 @@ def _getFileStats(self, filepath, data=None):
         return key_stats
 
     def _file_stats_increment(self, counter, inc=1):
-        """Incremenet the indicated connter"""
+        """Increment the indicated counter"""
         if "file_stats" not in self._app:
             # setup stats
             file_stats = {}
@@ -175,6 +175,26 @@ async def get_object(self, key, bucket=None, offset=0, length=-1):
             raise HTTPInternalServerError()
         return data
 
+    def _mkdir(self, dirpath):
+        """ create the given directory if it doesn't already exist """
+        try:
+            dirpath = pp.normpath(dirpath)
+            log.debug(f"normpath: {dirpath}")
+
+            if not pp.isdir(dirpath):
+                log.debug(f"mkdir({dirpath})")
+                mkdir(dirpath)
+            else:
+                log.debug(f"isdir {dirpath} found")
+        except IOError as ioe:
+            if ioe.errno == 17:
+                # likely directory was created by another process since we checked
+                log.warn(f"mkdir failed, {dirpath} created outside this process")
+            else:
+                msg = f"fileClient: IOError on mkdir {dirpath}: {ioe}"
+                log.warn(msg)
+                raise HTTPInternalServerError()
+
     async def put_object(self, key, data, bucket=None):
         """Write data to given key.
         Returns client specific dict on success
@@ -202,15 +222,7 @@ async def put_object(self, key, data, bucket=None):
                 for key_dir in key_dirs:
                     dirpath = pp.join(dirpath, key_dir)
                     log.debug(f"pp.join({key_dir}) => {dirpath}")
-
-                    dirpath = pp.normpath(dirpath)
-                    log.debug(f"normpath: {dirpath}")
-
-                    if not pp.isdir(dirpath):
-                        log.debug(f"mkdir({dirpath})")
-                        mkdir(dirpath)
-                    else:
-                        log.debug(f"isdir {dirpath} found")
+                    self._mkdir(dirpath)
             log.debug(f"open({filepath}, 'wb')")
             async with aiofiles.open(filepath, loop=loop, mode="wb") as f:
                 await f.write(data)

From 41e23e91b057eb4cff98a3c1593c79b5824a0fd2 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 15 May 2025 19:39:14 +0200
Subject: [PATCH 23/49] use domain crawler to create links for post group multi

---
 hsds/domain_crawl.py    |  4 ++--
 hsds/group_sn.py        | 48 ++++++++++++++++++++++++++++++++++++++---
 hsds/link_sn.py         |  2 +-
 hsds/post_crawl.py      |  4 ++--
 hsds/servicenode_lib.py | 21 ++++++++----------
 hsds/util/linkUtil.py   | 37 ++++++++++++++++---------------
 6 files changed, 77 insertions(+), 39 deletions(-)

diff --git a/hsds/domain_crawl.py b/hsds/domain_crawl.py
index 0b707329..35b20bf9 100644
--- a/hsds/domain_crawl.py
+++ b/hsds/domain_crawl.py
@@ -248,7 +248,7 @@ async def put_attributes(self, obj_id, attr_items):
     async def get_obj_json(self, obj_id):
         """ get the given obj_json for the obj_id.
             for each group found, search the links if follow_links is set """
-        log.debug(f"get_obj_json: {obj_id}")
+        log.debug(f"DomainCrawler get_obj_json: {obj_id}")
         collection = getCollectionForId(obj_id)
         kwargs = {"bucket": self._bucket, "include_attrs": self._include_attrs}
 
@@ -408,7 +408,7 @@ async def get_links(self, grp_id, titles=None):
 
     async def put_links(self, grp_id, link_items):
         # write the given links for the obj_id
-        log.debug(f"put_links for {grp_id}, {len(link_items)} links")
+        log.debug(f"DomainCrawler put_links for {grp_id}, {len(link_items)} links")
         req = getDataNodeUrl(self._app, grp_id)
         req += f"/groups/{grp_id}/links"
         kwargs = {"bucket": self._bucket}
diff --git a/hsds/group_sn.py b/hsds/group_sn.py
index e8dd9325..bfad5b28 100755
--- a/hsds/group_sn.py
+++ b/hsds/group_sn.py
@@ -29,6 +29,7 @@
 from .servicenode_lib import createObject, createObjectByPath, deleteObject
 from . import hsds_logger as log
 from .post_crawl import createObjects
+from .domain_crawl import DomainCrawler
 from . import config
 
 
@@ -171,7 +172,7 @@ async def _create_group(app, **kwargs):
     return group_json
 
 
-def _get_create_args(body, root_id=None, bucket=None, implicit=False):
+def _get_create_args(body, root_id=None, bucket=None, implicit=False, ignore_link=False):
     """ get query args for _create_group from request body """
     kwargs = {"bucket": bucket}
     predate_max_time = config.get("predate_max_time", default=10.0)
@@ -187,7 +188,7 @@ def _get_create_args(body, root_id=None, bucket=None, implicit=False):
             raise HTTPBadRequest(reason=msg)
         link_body = body["link"]
         log.debug(f"link_body: {link_body}")
-        if "id" in link_body:
+        if "id" in link_body and not ignore_link:
             parent_id = link_body["id"]
         if "name" in link_body:
             link_title = link_body["name"]
@@ -201,7 +202,8 @@ def _get_create_args(body, root_id=None, bucket=None, implicit=False):
 
         if parent_id and link_title:
             log.debug(f"parent id: {parent_id}, link_title: {link_title}")
-            h5path = link_title  # just use the link name as the h5path
+            if not ignore_link:
+                h5path = link_title  # just use the link name as the h5path
 
     if "parent_id" not in body:
         parent_id = root_id
@@ -329,6 +331,7 @@ async def POST_Group(request):
                             log.warn(msg)
                             raise HTTPBadRequest(reason=msg)
                         kwargs = _get_create_args(item, root_id=root_id, bucket=bucket)
+                        kwargs["ignore_link"] = True
                         kwarg_list.append(kwargs)
                         kwargs = {"bucket": bucket, "root_id": root_id}
                     post_group_rsp = await createObjects(app, kwarg_list, **kwargs)
@@ -346,6 +349,45 @@ async def POST_Group(request):
         post_group_rsp = await _create_group(app, **kwargs)
 
     log.debug(f"returning resp: {post_group_rsp}")
+
+    if "objects" in post_group_rsp:
+        # add any links in multi request
+        objects = post_group_rsp["objects"]
+        obj_count = len(objects)
+        log.debug(f"PostGroup multi create: {obj_count} objects")
+        if len(body) != obj_count:
+            msg = f"Expected {obj_count} objects but got {len(body)}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        parent_ids = {}
+        for index in range(obj_count):
+            item = body[index]
+            if "link" in item:
+                link_item = item["link"]
+                parent_id = link_item.get("id")
+                title = link_item.get("name")
+                if parent_id and title:
+                    # add a hard link
+                    object = objects[index]
+                    obj_id = object["id"]
+                    if parent_id not in parent_ids:
+                        parent_ids[parent_id] = {}
+                    links = parent_ids[parent_id]
+                    links[title] = {"id": obj_id}
+        if parent_ids:
+            log.debug(f"POSTGroup multi - adding links: {parent_ids}")
+            kwargs = {"action": "put_link", "bucket": bucket}
+            kwargs["replace"] = True
+
+            crawler = DomainCrawler(app, parent_ids, **kwargs)
+
+            # will raise exception on not found, server busy, etc.
+            await crawler.crawl()
+
+            status = crawler.get_status()
+
+            log.info(f"DomainCrawler done for put_links action, status: {status}")
+
     # group creation successful
     resp = await jsonResponse(request, post_group_rsp, status=201)
     log.response(request, resp=resp)
diff --git a/hsds/link_sn.py b/hsds/link_sn.py
index 66e3a698..938f78c2 100755
--- a/hsds/link_sn.py
+++ b/hsds/link_sn.py
@@ -457,7 +457,7 @@ async def PUT_Links(request):
     count = len(grp_ids)
     if count == 0:
         msg = "no grp_ids defined"
-        log.warn(f"PUT_Attributes: {msg}")
+        log.warn(f"PUT_Links: {msg}")
         raise HTTPBadRequest(reason=msg)
     elif count == 1:
         # just send one PUT Attributes request to the dn
diff --git a/hsds/post_crawl.py b/hsds/post_crawl.py
index 057cd96d..88ffab92 100644
--- a/hsds/post_crawl.py
+++ b/hsds/post_crawl.py
@@ -66,8 +66,8 @@ def get_status(self):
             item = self._rsp_objs[i]
             if not item:
                 continue  # resp not filled in yet
-            if "status" in item:
-                item_status = item["status"]
+            if "status_code" in item:
+                item_status = item["status_code"]
                 if status is None or item_status > status:
                     # return the more severe error
                     log.debug(f"setting status to {item_status}")
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index dd32bd6e..8230502e 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -535,7 +535,6 @@ async def putLinks(app, group_id, items, bucket=None):
     or 200 if it's a duplicate of an existing link. """
 
     isValidUuid(group_id, obj_class="groups")
-    group_json = None
 
     # validate input
     for title in items:
@@ -548,25 +547,23 @@ async def putLinks(app, group_id, items, bucket=None):
             raise HTTPBadRequest(reason="invalid link")
 
         if link_class == "H5L_TYPE_HARD":
+            if "id" not in item:
+                msg = "expected id key for hard link class"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
             tgt_id = item["id"]
-            isValidUuid(tgt_id)
-            # for hard links, verify that the referenced id exists and is in
-            # this domain
-            ref_json = await getObjectJson(app, tgt_id, bucket=bucket)
-            if not group_json:
-                # just need to fetch this once
-                group_json = await getObjectJson(app, group_id, bucket=bucket)
-            if ref_json["root"] != group_json["root"]:
-                msg = "Hard link must reference an object in the same domain"
+            try:
+                isValidUuid(tgt_id)
+            except ValueError:
+                msg = f"invalid object id: {tgt_id}"
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
 
     # ready to add links now
     req = getDataNodeUrl(app, group_id)
     req += "/groups/" + group_id + "/links"
-    log.debug(f"PUT links - PUT request: {req}")
+    log.debug(f"PUT links {len(items)} items - PUT request: {req}")
     params = {"bucket": bucket}
-
     data = {"links": items}
 
     put_rsp = await http_put(app, req, data=data, params=params)
diff --git a/hsds/util/linkUtil.py b/hsds/util/linkUtil.py
index f872f848..0090b045 100644
--- a/hsds/util/linkUtil.py
+++ b/hsds/util/linkUtil.py
@@ -136,17 +136,17 @@ def h5Join(path, paths):
 
 
 def getRequestLink(title, link_json, predate_max_time=0.0):
-    """ return normalized link from request json 
+    """ return normalized link from request json
         Throw value error if badly formatted """
-    
+
     if not isinstance(link_json, dict):
-            msg = f"expected dict for for links, but got: {type(link_json)}"
-            log.warn(msg)
-            raise ValueError(msg)
-       
-    log.debug(f"getRequestLink title: {title} link_json: {link_json}")   
+        msg = f"expected dict for for links, but got: {type(link_json)}"
+        log.warn(msg)
+        raise ValueError(msg)
+
+    log.debug(f"getRequestLink title: {title} link_json: {link_json}")
     link_item = {}  # normalized link item to return
-    
+
     now = time.time()
 
     validateLinkName(title)  # will raise ValueError is invalid
@@ -157,7 +157,7 @@ def getRequestLink(title, link_json, predate_max_time=0.0):
             msg = f"expected link class of: {link_class} but got {link_json}"
             log.warn(msg)
             raise ValueError(msg)
-        
+
     link_item = {"class": link_class}
 
     if link_class == "H5L_TYPE_HARD":
@@ -173,13 +173,13 @@ def getRequestLink(title, link_json, predate_max_time=0.0):
                 log.warn(msg)
                 raise ValueError(msg)
             link_item["h5path"] = link_json["h5path"]
-        
+
         if link_class == "H5L_TYPE_EXTERNAL":
             if "h5domain" not in link_json:
                 msg = "expected h5domain key for external link"
                 log.warn(msg)
                 raise ValueError(msg)
-                         
+
     if "created" in link_json:
         created = link_json["created"]
         # allow "pre-dated" attributes if recent enough
@@ -192,16 +192,16 @@ def getRequestLink(title, link_json, predate_max_time=0.0):
 
     return link_item
 
-    
+
 def getRequestLinks(links_json, predate_max_time=0.0):
-    """ return list of normalized links from request json 
+    """ return list of normalized links from request json
         Throw value error if any is badly formatted """
-    
+
     if not isinstance(links_json, dict):
-            msg = f"POST_Groups expected dict for for links, but got: {type(links_json)}"
-            log.warn(msg)
-            raise ValueError(msg)
-       
+        msg = f"POST_Groups expected dict for for links, but got: {type(links_json)}"
+        log.warn(msg)
+        raise ValueError(msg)
+
     links = {}  # normalized link items to return
     kwargs = {"predate_max_time": predate_max_time}
 
@@ -209,4 +209,3 @@ def getRequestLinks(links_json, predate_max_time=0.0):
         links[title] = getRequestLink(title, links_json[title], **kwargs)
 
     return links
-

From 7cfa3d67ebcbab48a5680e9a2cf7a6d76a93a231 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 16 May 2025 18:21:09 +0200
Subject: [PATCH 24/49] added multi create for datatype objs

---
 hsds/ctype_sn.py             | 174 ++++++-------
 hsds/dset_sn.py              |  71 +----
 hsds/group_sn.py             | 154 ++---------
 hsds/post_crawl.py           |  81 ++++--
 hsds/servicenode_lib.py      | 485 +++++++++++++++++++++++++----------
 tests/integ/datatype_test.py |  74 ++++++
 tests/integ/group_test.py    |  18 ++
 7 files changed, 615 insertions(+), 442 deletions(-)

diff --git a/hsds/ctype_sn.py b/hsds/ctype_sn.py
index ccf033ac..d9cfb71d 100755
--- a/hsds/ctype_sn.py
+++ b/hsds/ctype_sn.py
@@ -17,19 +17,19 @@
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPGone
 from json import JSONDecodeError
 
-from h5json.hdf5dtype import validateTypeItem, getBaseTypeJson
 from h5json.objid import isValidUuid
 
 from .util.httpUtil import getHref, respJsonAssemble, getBooleanParam
 from .util.httpUtil import jsonResponse
-from .util.linkUtil import validateLinkName
 from .util.authUtil import getUserPasswordFromRequest, aclCheck
 from .util.authUtil import validateUserPassword
 from .util.domainUtil import getDomainFromRequest, getPathForDomain, isValidDomain
 from .util.domainUtil import getBucketForDomain, verifyRoot
 from .servicenode_lib import getDomainJson, getObjectJson, validateAction
-from .servicenode_lib import getObjectIdByPath, getPathForObjectId
-from .servicenode_lib import createObject, createObjectByPath, deleteObject
+from .servicenode_lib import getObjectIdByPath, getPathForObjectId, deleteObject
+from .servicenode_lib import getCreateArgs, createDatatypeObj
+from .post_crawl import createDatatypeObjs
+from .domain_crawl import DomainCrawler
 from . import hsds_logger as log
 
 
@@ -165,35 +165,6 @@ async def POST_Datatype(request):
         msg = "Unable to load JSON body"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if "type" not in body:
-        msg = "POST Datatype has no type key in body"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-    datatype = body["type"]
-    if isinstance(datatype, str):
-        try:
-            # convert predefined type string (e.g. "H5T_STD_I32LE") to
-            # corresponding json representation
-            datatype = getBaseTypeJson(datatype)
-            log.debug(f"got datatype: {datatype}")
-        except TypeError:
-            msg = "POST Dataset with invalid predefined type"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-    try:
-        validateTypeItem(datatype)
-    except KeyError as ke:
-        msg = f"KeyError creating type: {ke}"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-    except TypeError as te:
-        msg = f"TypeError creating type: {te}"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-    except ValueError as ve:
-        msg = f"ValueError creating type: {ve}"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
 
     domain = getDomainFromRequest(request)
     if not isValidDomain(domain):
@@ -209,73 +180,92 @@ async def POST_Datatype(request):
     verifyRoot(domain_json)
     root_id = domain_json["root"]
 
-    parent_id = None
-    link_title = None
-    obj_id = None
-    h5path = None
-    attrs = None
-
-    if "id" in body:
-        obj_id = body["id"]
-        log.debug(f"POST datatype using client id: {obj_id}")
+    # allow parent group creation or not
+    implicit = getBooleanParam(params, "implicit")
 
-    if "attributes" in body:
-        attrs = body["attributes"]
-        log.debug(f"POST datatype attributes: {attrs}")
+    post_rsp = None
 
-    if "link" in body:
-        if "h5path" in body:
-            msg = "link can't be used with h5path"
+    if isinstance(body, list):
+        count = len(body)
+        log.debug(f"multiple ctype create: {count} items")
+        if count == 0:
+            # equivalent to no body
+            msg = "POST Datatype with no body"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        link_body = body["link"]
-        if "id" in link_body:
-            parent_id = link_body["id"]
-
-        if "name" in link_body:
-            link_title = link_body["name"]
-            try:
-                # will throw exception if there's a slash in the name
-                validateLinkName(link_title)
-            except ValueError:
-                msg = f"invalid link title: {link_title}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-        if parent_id and link_title:
-            log.debug(f"parent id: {parent_id}, link_title: {link_title}")
-            h5path = link_title  # just use the link name as the h5path
-
-    if "h5path" in body:
-        h5path = body["h5path"]
-        if "parent_id" not in body:
-            parent_id = root_id
+        elif count == 1:
+            # just create one object in typical way
+            kwargs = getCreateArgs(body[0],
+                                   root_id=root_id,
+                                   bucket=bucket,
+                                   implicit=implicit)
         else:
-            parent_id = body["parent_id"]
-
-    # setup args to createObject
-    kwargs = {"bucket": bucket, "obj_type": datatype}
-    if obj_id:
-        kwargs["obj_id"] = obj_id
-    if attrs:
-        kwargs["attrs"] = attrs
-
-    # TBD: creation props for datatype obj?
-    if parent_id:
-        kwargs["parent_id"] = parent_id
-        kwargs["h5path"] = h5path
-        # allow parent group creation or not
-        implicit = getBooleanParam(params, "implicit")
-        if implicit:
-            kwargs["implicit"] = True
-        ctype_json = await createObjectByPath(app, **kwargs)
+            # create multiple ctype objects
+            kwarg_list = []  # list of kwargs for each object
+
+            for item in body:
+                log.debug(f"item: {item}")
+                if not isinstance(item, dict):
+                    msg = f"Post_Datatype - invalid item type: {type(item)}"
+                    log.warn(msg)
+                    raise HTTPBadRequest(reason=msg)
+                kwargs = getCreateArgs(item, root_id=root_id, bucket=bucket)
+                kwargs["ignore_link"] = True
+                kwarg_list.append(kwargs)
+            kwargs = {"bucket": bucket, "root_id": root_id}
+            log.debug(f"createDatatypeObjcs, items: {kwarg_list}")
+            post_rsp = await createDatatypeObjs(app, kwarg_list, **kwargs)
     else:
-        # create an anonymous datatype
-        kwargs["root_id"] = root_id
-        ctype_json = await createObject(app, **kwargs)
+        # single object create
+        kwargs = getCreateArgs(body, root_id=root_id, bucket=bucket, implicit=implicit)
+        log.debug(f"kwargs for datatype create: {kwargs}")
+
+    if post_rsp is None:
+        # Handle cases other than multi ctype create here
+        post_rsp = await createDatatypeObj(app, **kwargs)
+
+    log.debug(f"returning resp: {post_rsp}")
+
+    if "objects" in post_rsp:
+        # add any links in multi request
+        objects = post_rsp["objects"]
+        obj_count = len(objects)
+        log.debug(f"Post datatype multi create: {obj_count} objects")
+        if len(body) != obj_count:
+            msg = f"Expected {obj_count} objects but got {len(body)}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        parent_ids = {}
+        for index in range(obj_count):
+            item = body[index]
+            if "link" in item:
+                link_item = item["link"]
+                parent_id = link_item.get("id")
+                title = link_item.get("name")
+                if parent_id and title:
+                    # add a hard link
+                    object = objects[index]
+                    obj_id = object["id"]
+                    if parent_id not in parent_ids:
+                        parent_ids[parent_id] = {}
+                    links = parent_ids[parent_id]
+                    links[title] = {"id": obj_id}
+        if parent_ids:
+            log.debug(f"POST ctype multi - adding links: {parent_ids}")
+            kwargs = {"action": "put_link", "bucket": bucket}
+            kwargs["replace"] = True
+
+            crawler = DomainCrawler(app, parent_ids, **kwargs)
+
+            # will raise exception on not found, server busy, etc.
+            await crawler.crawl()
+
+            status = crawler.get_status()
+
+            log.info(f"DomainCrawler done for put_links action, status: {status}")
 
     # datatype creation successful
-    resp = await jsonResponse(request, ctype_json, status=201)
+    resp = await jsonResponse(request, post_rsp, status=201)
     log.response(request, resp=resp)
 
     return resp
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 9f0593e6..3650db7b 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -32,10 +32,9 @@
 from .util.domainUtil import getDomainFromRequest, getPathForDomain, isValidDomain
 from .util.domainUtil import getBucketForDomain, verifyRoot
 from .util.storUtil import getSupportedFilters
-from .util.linkUtil import validateLinkName
 from .servicenode_lib import getDomainJson, getObjectJson, getDsetJson, getPathForObjectId
 from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo
-from .servicenode_lib import createObject, createObjectByPath, deleteObject
+from .servicenode_lib import getCreateArgs, createDataset, deleteObject
 from .dset_lib import updateShape, deleteAllChunks, doHyperslabWrite
 from . import config
 from . import hsds_logger as log
@@ -1131,67 +1130,17 @@ async def POST_Dataset(request):
 
         log.debug(f"set dataset json creationProperties: {creationProperties}")
 
-    parent_id = None
-    obj_id = None
-    link_title = None
-    h5path = None
-    if "id" in body:
-        obj_id = body["id"]
-        log.debug(f"POST dataset using client id: {obj_id}")
+    # setup args to createDataset
+    implicit = getBooleanParam(params, "implicit")
+    kwargs = getCreateArgs(body, root_id=root_id, type=datatype, bucket=bucket, implicit=implicit)
+    # fill in dataset-specific keys
+    kwargs["creation_props"] = creationProperties
+    kwargs["shape"] = shape_json
+    kwargs["layout"] = layout
 
-    if "link" in body:
-        if "h5path" in body:
-            msg = "link can't be used with h5path"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        link_body = body["link"]
-        if "id" in link_body:
-            parent_id = link_body["id"]
+    log.debug(f"kwargs for dataset create: {kwargs}")
 
-        if "name" in link_body:
-            link_title = link_body["name"]
-            try:
-                # will throw exception if there's a slash in the name
-                validateLinkName(link_title)
-            except ValueError:
-                msg = f"invalid link title: {link_title}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-        if parent_id and link_title:
-            log.debug(f"parent id: {parent_id}, link_title: {link_title}")
-            h5path = link_title  # just use the link name as the h5path
-
-    if "h5path" in body:
-        h5path = body["h5path"]
-        if "parent_id" not in body:
-            parent_id = root_id
-        else:
-            parent_id = body["parent_id"]
-
-    # setup args to createObject
-    kwargs = {"bucket": bucket, "obj_type": datatype, "obj_shape": shape_json}
-    if obj_id:
-        kwargs["obj_id"] = obj_id
-    if creationProperties:
-        kwargs["creation_props"] = creationProperties
-    if attrs:
-        kwargs["attrs"] = attrs
-    if layout:
-        kwargs["layout"] = layout
-
-    if parent_id:
-        kwargs["parent_id"] = parent_id
-        kwargs["h5path"] = h5path
-        # allow parent group creation or not
-        implicit = getBooleanParam(params, "implicit")
-        if implicit:
-            kwargs["implicit"] = True
-        dset_json = await createObjectByPath(app, **kwargs)
-    else:
-        # create an anonymous datatype
-        kwargs["root_id"] = root_id
-        dset_json = await createObject(app, **kwargs)
+    dset_json = await createDataset(app, **kwargs)
 
     # write data if provided
     if input_arr is not None:
diff --git a/hsds/group_sn.py b/hsds/group_sn.py
index bfad5b28..4d83e5c7 100755
--- a/hsds/group_sn.py
+++ b/hsds/group_sn.py
@@ -23,14 +23,12 @@
 from .util.authUtil import validateUserPassword
 from .util.domainUtil import getDomainFromRequest, isValidDomain
 from .util.domainUtil import getBucketForDomain, getPathForDomain, verifyRoot
-from .util.linkUtil import validateLinkName, getRequestLinks
 from .servicenode_lib import getDomainJson, getObjectJson, validateAction
-from .servicenode_lib import getObjectIdByPath, getPathForObjectId
-from .servicenode_lib import createObject, createObjectByPath, deleteObject
+from .servicenode_lib import getObjectIdByPath, getPathForObjectId, deleteObject
+from .servicenode_lib import getCreateArgs, createGroup
 from . import hsds_logger as log
-from .post_crawl import createObjects
+from .post_crawl import createGroups
 from .domain_crawl import DomainCrawler
-from . import config
 
 
 async def GET_Group(request):
@@ -159,115 +157,6 @@ async def GET_Group(request):
     return resp
 
 
-async def _create_group(app, **kwargs):
-    """ helper method for group creation """
-
-    if kwargs.get("parent_id") and kwargs.get("h5path"):
-        group_json = await createObjectByPath(app, **kwargs)
-    else:
-        # create an anonymous group
-        log.debug(f"_create_group - kwargs: {kwargs}")
-        group_json = await createObject(app, **kwargs)
-
-    return group_json
-
-
-def _get_create_args(body, root_id=None, bucket=None, implicit=False, ignore_link=False):
-    """ get query args for _create_group from request body """
-    kwargs = {"bucket": bucket}
-    predate_max_time = config.get("predate_max_time", default=10.0)
-
-    parent_id = None
-    obj_id = None
-    h5path = None
-
-    if "link" in body:
-        if "h5path" in body:
-            msg = "link can't be used with h5path"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        link_body = body["link"]
-        log.debug(f"link_body: {link_body}")
-        if "id" in link_body and not ignore_link:
-            parent_id = link_body["id"]
-        if "name" in link_body:
-            link_title = link_body["name"]
-            try:
-                # will throw exception if there's a slash in the name
-                validateLinkName(link_title)
-            except ValueError:
-                msg = f"invalid link title: {link_title}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-        if parent_id and link_title:
-            log.debug(f"parent id: {parent_id}, link_title: {link_title}")
-            if not ignore_link:
-                h5path = link_title  # just use the link name as the h5path
-
-    if "parent_id" not in body:
-        parent_id = root_id
-    else:
-        parent_id = body["parent_id"]
-
-    if "h5path" in body:
-        h5path = body["h5path"]
-        # normalize the h5path
-        if h5path.startswith("/"):
-            if parent_id == root_id:
-                # just adjust the path to be relative
-                h5path = h5path[1:]
-            else:
-                msg = f"PostCrawler expecting relative h5path, but got: {h5path}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-        if h5path.endswith("/"):
-            h5path = h5path[:-1]  # makes iterating through the links a bit easier
-
-    if parent_id and h5path:
-        # these are used by createObjectByPath
-        kwargs["parent_id"] = parent_id
-        kwargs["implicit"] = implicit
-        kwargs["h5path"] = h5path
-    else:
-        kwargs["root_id"] = root_id
-
-    if "id" in body:
-        obj_id = body["id"]
-        # tbd: validate this is a group id
-        kwargs["obj_id"] = obj_id
-        log.debug(f"POST group using client id: {obj_id}")
-
-    if "creationProperties" in body:
-        creation_props = body["creationProperties"]
-        # tbd: validate creation_props
-        kwargs["creation_props"] = creation_props
-
-    if "attributes" in body:
-        attrs = body["attributes"]
-        if not isinstance(attrs, dict):
-            msg = f"POST_Groups expected dict for for attributes, but got: {type(attrs)}"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        log.debug(f"POST Group attributes: {attrs}")
-
-        # tbd: validate attributes
-        kwargs["attrs"] = attrs
-    if "links" in body:
-        body_links = body["links"]
-        log.debug(f"got links for new group: {body_links}")
-        try:
-            links = getRequestLinks(body["links"], predate_max_time=predate_max_time)
-        except ValueError:
-            msg = "invalid link item sent in request"
-            raise HTTPBadRequest(reason=msg)
-        log.debug(f"adding links to group POST request: {links}")
-        kwargs["links"] = links
-
-    return kwargs
-
-
 async def POST_Group(request):
     """HTTP method to create new Group object"""
     log.request(request)
@@ -297,7 +186,7 @@ async def POST_Group(request):
     # allow parent group creation or not
     implicit = getBooleanParam(params, "implicit")
     kwargs = {}
-    post_group_rsp = None
+    post_rsp = None
     if request.has_body:
         try:
             body = await request.json()
@@ -316,10 +205,10 @@ async def POST_Group(request):
                     kwargs = {"root_id": root_id, "bucket": bucket}
                 elif count == 1:
                     # just create one object in typical way
-                    kwargs = _get_create_args(body[0],
-                                              root_id=root_id,
-                                              bucket=bucket,
-                                              implicit=implicit)
+                    kwargs = getCreateArgs(body[0],
+                                           root_id=root_id,
+                                           bucket=bucket,
+                                           implicit=implicit)
                 else:
                     # create multiple group objects
                     kwarg_list = []  # list of kwargs for each object
@@ -330,31 +219,34 @@ async def POST_Group(request):
                             msg = f"PostGroup - invalid item type: {type(item)}"
                             log.warn(msg)
                             raise HTTPBadRequest(reason=msg)
-                        kwargs = _get_create_args(item, root_id=root_id, bucket=bucket)
+                        kwargs = getCreateArgs(item, root_id=root_id, bucket=bucket)
                         kwargs["ignore_link"] = True
                         kwarg_list.append(kwargs)
                         kwargs = {"bucket": bucket, "root_id": root_id}
-                    post_group_rsp = await createObjects(app, kwarg_list, **kwargs)
+                    post_rsp = await createGroups(app, kwarg_list, **kwargs)
             else:
-                kwargs = _get_create_args(body, root_id=root_id, bucket=bucket, implicit=implicit)
+                kwargs = getCreateArgs(body, root_id=root_id, bucket=bucket, implicit=implicit)
         else:
             kwargs["root_id"] = root_id
             kwargs["bucket"] = bucket
     else:
         kwargs = {"root_id": root_id, "bucket": bucket}
 
-    if post_group_rsp is None:
+    if post_rsp is None:
         # Handle cases other than multi-group create here
-        log.debug(f"_create_group - kwargs: {kwargs}")
-        post_group_rsp = await _create_group(app, **kwargs)
+        if "type" in kwargs:
+            msg = "type key is not allowed for Group creation"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        post_rsp = await createGroup(app, **kwargs)
 
-    log.debug(f"returning resp: {post_group_rsp}")
+    log.debug(f"returning resp: {post_rsp}")
 
-    if "objects" in post_group_rsp:
+    if "objects" in post_rsp:
         # add any links in multi request
-        objects = post_group_rsp["objects"]
+        objects = post_rsp["objects"]
         obj_count = len(objects)
-        log.debug(f"PostGroup multi create: {obj_count} objects")
+        log.debug(f"Post group multi create: {obj_count} objects")
         if len(body) != obj_count:
             msg = f"Expected {obj_count} objects but got {len(body)}"
             log.warn(msg)
@@ -375,7 +267,7 @@ async def POST_Group(request):
                     links = parent_ids[parent_id]
                     links[title] = {"id": obj_id}
         if parent_ids:
-            log.debug(f"POSTGroup multi - adding links: {parent_ids}")
+            log.debug(f"POST group multi - adding links: {parent_ids}")
             kwargs = {"action": "put_link", "bucket": bucket}
             kwargs["replace"] = True
 
@@ -389,7 +281,7 @@ async def POST_Group(request):
             log.info(f"DomainCrawler done for put_links action, status: {status}")
 
     # group creation successful
-    resp = await jsonResponse(request, post_group_rsp, status=201)
+    resp = await jsonResponse(request, post_rsp, status=201)
     log.response(request, resp=resp)
     return resp
 
diff --git a/hsds/post_crawl.py b/hsds/post_crawl.py
index 88ffab92..225376aa 100644
--- a/hsds/post_crawl.py
+++ b/hsds/post_crawl.py
@@ -19,7 +19,7 @@
 from aiohttp.web_exceptions import HTTPInternalServerError, HTTPNotFound, HTTPGone
 
 from .util.httpUtil import isOK
-from .servicenode_lib import createObject, createObjectByPath
+from .servicenode_lib import createObject
 from . import hsds_logger as log
 
 
@@ -148,7 +148,7 @@ async def create(self, index):
         if "obj_id" in item:
             kwargs["obj_id"] = item["obj_id"]
         if "type" in item:
-            kwargs["obj_type"] = item["type"]
+            kwargs["type"] = item["type"]
         if "layout" in item:
             kwargs["layout"] = item["layout"]
         if "creation_props" in item:
@@ -167,11 +167,7 @@ async def create(self, index):
         log.debug(f"PostCrawler index {index} kwargs: {kwargs}")
         rsp_json = None
         try:
-            if kwargs.get("parent_id") and kwargs.get("h5path"):
-                rsp_json = await createObjectByPath(self._app, **kwargs)
-            else:
-                # create an anonymous group
-                rsp_json = await createObject(self._app, **kwargs)
+            rsp_json = await createObject(self._app, **kwargs)
         except HTTPConflict:
             log.warn("PostCrawler - got HTTPConflict from http_post")
             rsp_json = {"status_code": 409}
@@ -188,26 +184,75 @@ async def create(self, index):
         self._rsp_objs[index] = rsp_json
 
 
-async def createObjects(app, items, root_id=None, bucket=None):
-    """ create an objects based on parameters in items list """
-
-    if not root_id:
-        msg = "no root_id given for createObjects"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-
-    log.info(f"createObjects with {len(items)} items, root_id: {root_id}")
+async def _createObjects(app, items: list, root_id=None, bucket=None):
+    """ generic create function """
 
     post_crawler = PostCrawler(app, root_id=root_id, bucket=bucket, items=items)
     await post_crawler.crawl()
     if post_crawler.get_status() > 201:
-        msg = f"createObjects returning status from crawler: {post_crawler.get_status()}"
+        msg = f"createGroups returning status from crawler: {post_crawler.get_status()}"
         log.error(msg)
         raise HTTPInternalServerError()
 
     obj_list = post_crawler.get_rsp_objs()
     if not isinstance(obj_list, list):
-        msg = f"createObjects expected list but got: {type(obj_list)}"
+        msg = f"createGroups expected list but got: {type(obj_list)}"
         log.error(msg)
         raise HTTPInternalServerError()
     return {"objects": obj_list}
+
+
+async def createGroups(app, items: list, root_id=None, bucket=None):
+    """ create an group objects based on parameters in items list """
+
+    if not root_id:
+        msg = "no root_id given for createObjects"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+
+    for item in items:
+        if not isinstance(item, dict):
+            msg = "expected list of dictionary objects for multi-object create"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        if "type" in item:
+            msg = "type key not allowed for multi-group create"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        if "shape" in item:
+            msg = "shape key not allowed for multi-group create"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+
+    log.info(f"createGroups with {len(items)} items, root_id: {root_id}")
+
+    rsp_json = await _createObjects(app, items=items, root_id=root_id, bucket=bucket)
+    return rsp_json
+
+
+async def createDatatypeObjs(app, items: list, root_id=None, bucket=None):
+    """ create an datatype objects based on parameters in items list """
+
+    if not root_id:
+        msg = "no root_id given for createDatatypeObjs"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+
+    for item in items:
+        if not isinstance(item, dict):
+            msg = "expected list of dictionary objects for multi-object create"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        if "type" not in item:
+            msg = "type key not provided for multi-datatype create"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        if "shape" in item:
+            msg = "shape key not allowed for multi-datatype create"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+
+    log.info(f"createDatatypes with {len(items)} items, root_id: {root_id}")
+
+    rsp_json = await _createObjects(app, items=items, root_id=root_id, bucket=bucket)
+    return rsp_json
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 8230502e..fdbfe6c4 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -32,7 +32,7 @@
 
 from .util.nodeUtil import getDataNodeUrl
 from .util.authUtil import getAclKeys
-from .util.linkUtil import h5Join, validateLinkName, getLinkClass
+from .util.linkUtil import h5Join, validateLinkName, getLinkClass, getRequestLinks
 from .util.storUtil import getStorJSONObj, isStorObj
 from .util.authUtil import aclCheck
 from .util.httpUtil import http_get, http_put, http_post, http_delete
@@ -1287,34 +1287,254 @@ async def deleteObject(app, obj_id, bucket=None):
         del meta_cache[obj_id]  # remove from cache
 
 
+def getCreateArgs(body,
+                  root_id=None,
+                  bucket=None,
+                  type=None,
+                  shape=None,
+                  implicit=False,
+                  ignore_link=False):
+    """ get args for createObject from request body """
+
+    kwargs = {"bucket": bucket}
+    predate_max_time = config.get("predate_max_time", default=10.0)
+
+    parent_id = None
+    obj_id = None
+    h5path = None
+
+    if "link" in body:
+        if "h5path" in body:
+            msg = "link can't be used with h5path"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        # if ingore_link is set, parent_links will be created post object creation
+        link_body = body["link"]
+        log.debug(f"link_body: {link_body}")
+        if "id" in link_body and not ignore_link:
+            parent_id = link_body["id"]
+        if "name" in link_body:
+            link_title = link_body["name"]
+            try:
+                # will throw exception if there's a slash in the name
+                validateLinkName(link_title)
+            except ValueError:
+                msg = f"invalid link title: {link_title}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+
+        if parent_id and link_title:
+            log.debug(f"parent id: {parent_id}, link_title: {link_title}")
+            if not ignore_link:
+                h5path = link_title  # just use the link name as the h5path
+
+    if "parent_id" not in body:
+        parent_id = root_id
+    else:
+        parent_id = body["parent_id"]
+
+    if "h5path" in body:
+        h5path = body["h5path"]
+        # normalize the h5path
+        if h5path.startswith("/"):
+            if parent_id == root_id:
+                # just adjust the path to be relative
+                h5path = h5path[1:]
+            else:
+                msg = f"PostCrawler expecting relative h5path, but got: {h5path}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+
+        if h5path.endswith("/"):
+            h5path = h5path[:-1]  # makes iterating through the links a bit easier
+
+    if parent_id and h5path:
+        # these are used by createObjectByPath
+        kwargs["parent_id"] = parent_id
+        kwargs["implicit"] = implicit
+        kwargs["h5path"] = h5path
+    else:
+        kwargs["root_id"] = root_id
+
+    if "id" in body:
+        obj_id = body["id"]
+        # tbd: validate this is a group id
+        kwargs["obj_id"] = obj_id
+        log.debug(f"createObject will use client id: {obj_id}")
+
+    if "creationProperties" in body:
+        creation_props = body["creationProperties"]
+        # tbd: validate creation_props
+        kwargs["creation_props"] = creation_props
+
+    if "attributes" in body:
+        attrs = body["attributes"]
+        if not isinstance(attrs, dict):
+            msg = f"expected dict for for attributes, but got: {type(attrs)}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        log.debug(f"createObject attributes: {attrs}")
+
+        # tbd: validate attributes
+        kwargs["attrs"] = attrs
+
+    if "links" in body:
+        body_links = body["links"]
+        log.debug(f"got links for new group: {body_links}")
+        try:
+            links = getRequestLinks(body["links"], predate_max_time=predate_max_time)
+        except ValueError:
+            msg = "invalid link item sent in request"
+            raise HTTPBadRequest(reason=msg)
+        log.debug(f"adding links to createObject request: {links}")
+        kwargs["links"] = links
+
+    if type:
+        kwargs["type"] = type
+    elif "type" in body:
+        datatype = body["type"]
+        if isinstance(datatype, str):
+            try:
+                # convert predefined type string (e.g. "H5T_STD_I32LE") to
+                # corresponding json representation
+                datatype = getBaseTypeJson(datatype)
+                log.debug(f"got datatype: {datatype}")
+            except TypeError:
+                msg = f"POST with invalid predefined type: {datatype}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+        try:
+            validateTypeItem(datatype)
+        except KeyError as ke:
+            msg = f"KeyError creating type: {ke}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        except TypeError as te:
+            msg = f"TypeError creating type: {te}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        except ValueError as ve:
+            msg = f"ValueError creating type: {ve}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        kwargs["type"] = datatype
+    else:
+        pass  # no type
+
+    return kwargs
+
+
+async def createLinkFromParent(app, parent_id, h5path, tgt_id=None, bucket=None, implicit=False):
+    """ create link or links from parentId to tgt_id.
+        If implicit is True, create any intermediate group objects needed """
+
+    if not h5path:
+        log.warn("createLinkFromParent with null h5path")
+        return
+    log.info(f"createLinkFromParent, parent_id: {parent_id} h5path: {h5path} tgt_id={tgt_id}")
+    if implicit:
+        log.debug("createLinkFromParent - using implicit creation")
+    link_titles = h5path.split("/")
+    log.debug(f"link_titles: {link_titles}")
+    for i in range(len(link_titles)):
+        if i == len(link_titles) - 1:
+            last_link = True
+        else:
+            last_link = False
+        link_title = link_titles[i]
+        log.debug(f"createLinkFromParent - processing link: {link_title}")
+        link_json = None
+        try:
+            link_json = await getLink(app, parent_id, link_title, bucket=bucket)
+        except (HTTPNotFound, HTTPGone):
+            pass  # link doesn't exist
+
+        if link_json:
+            log.debug(f"link for link_title {link_title} found: {link_json}")
+            # if this is the last link, that's a problem
+            if last_link:
+                msg = f"object at {h5path} already exists"
+                log.warn(msg)
+                raise HTTPConflict()
+            # otherwise, verify that this is a hardlink
+            if link_json.get("class") != "H5L_TYPE_HARD":
+                msg = "createLinkFromParent - h5path must contain only hard links"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+            parent_id = link_json["id"]
+            if getCollectionForId(parent_id) != "groups":
+                # parent objects must be groups!
+                msg = f"{link_title} is not a group"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+            else:
+                log.debug(f"link: {link_title} to sub-group found")
+        else:
+            log.debug(f"link for link_title {link_title} not found")
+            if last_link:
+                # create a link to the new object
+                await putHardLink(app, parent_id, link_title, tgt_id=tgt_id, bucket=bucket)
+                parent_id = tgt_id  # new parent
+            elif implicit:
+                # create a new group object
+                log.info(f"creating intermediate group object for: {link_title}")
+                kwargs = {"parent_id": parent_id, "bucket": bucket}
+                grp_id = createObjId("groups", root_id=getRootObjId(parent_id))
+                kwargs["obj_id"] = grp_id
+                # createObject won't call back to this function since we haven't set the h5path
+                await createObject(app, **kwargs)
+                # create a link to the subgroup
+                await putHardLink(app, parent_id, link_title, tgt_id=grp_id, bucket=bucket)
+                parent_id = grp_id  # new parent
+            else:
+                if len(link_titles) > 1:
+                    msg = f"createLinkFromParent failed: not all groups in {h5path} exist"
+                else:
+                    msg = f"createLinkFromParent failed: {h5path} does not exist"
+                log.warn(msg)
+                raise HTTPNotFound(reason=msg)
+
+
 async def createObject(app,
+                       parent_id=None,
                        root_id=None,
+                       h5path=None,
                        obj_id=None,
-                       obj_type=None,
-                       obj_shape=None,
+                       type=None,
+                       shape=None,
                        layout=None,
                        creation_props=None,
                        attrs=None,
                        links=None,
+                       implicit=None,
                        bucket=None):
     """ create a group, ctype, or dataset object and return object json
         Determination on whether a group, ctype, or dataset is created is based on:
-            1) if obj_type and obj_shape are set, a dataset object will be created
-            2) if obj_type is set but not obj_shape, a  datatype object will be created
+            1) if type and shape are set, a dataset object will be created
+            2) if type is set but not shape, a  datatype object will be created
             3) otherwise (type and shape are both None), a group object will be created
         The layout parameter only applies to dataset creation
     """
-    if obj_type and obj_shape:
+    if type and shape:
         collection = "datasets"
-    elif obj_type:
+    elif type:
         collection = "datatypes"
     else:
         collection = "groups"
-    log.info(f"createObject for {collection} collection, root: {root_id}, bucket: {bucket}")
-    if obj_type:
-        log.debug(f"    obj_type: {obj_type}")
-    if obj_shape:
-        log.debug(f"    obj_shape: {obj_shape}")
+
+    if not root_id:
+        root_id = getRootObjId(parent_id)
+    log.info(f"createObject for {collection} collection, root_id: {root_id}, bucket: {bucket}")
+    if root_id != parent_id:
+        log.debug(f"    parent_id: {parent_id}")
+    if obj_id:
+        log.debug(f"    obj_id: {obj_id}")
+    if h5path:
+        log.debug(f"    h5path: {h5path}")
+    if type:
+        log.debug(f"    type: {type}")
+    if shape:
+        log.debug(f"    shape: {shape}")
     if layout:
         log.debug(f"    layout: {layout}")
     if creation_props:
@@ -1324,6 +1544,19 @@ async def createObject(app,
     if links:
         log.debug(f"    links: {links}")
 
+    if h5path:
+        if h5path.startswith("/"):
+            if parent_id == root_id:
+                # just adjust the path to be relative
+                h5path = h5path[1:]
+            else:
+                msg = f"createObject expecting relative h5path, but got: {h5path}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+
+        if h5path.endswith("/"):
+            h5path = h5path[:-1]  # makes iterating through the links a bit easier
+
     if obj_id:
         log.debug(f"using client supplied id: {obj_id}")
         if not isValidUuid(obj_id, obj_class=collection):
@@ -1336,12 +1569,12 @@ async def createObject(app,
             raise HTTPBadRequest(reason=msg)
     else:
         obj_id = createObjId(collection, root_id=root_id)
-    log.info(f"new obj id: {obj_id}")
+        log.info(f"new obj id: {obj_id}")
     obj_json = {"id": obj_id, "root": root_id}
-    if obj_type:
-        obj_json["type"] = obj_type
-    if obj_shape:
-        obj_json["shape"] = obj_shape
+    if type:
+        obj_json["type"] = type
+    if shape:
+        obj_json["shape"] = shape
     if layout:
         obj_json["layout"] = layout
     if creation_props:
@@ -1364,141 +1597,113 @@ async def createObject(app,
     params = {"bucket": bucket}
     rsp_json = await http_post(app, req, data=obj_json, params=params)
 
+    # object creation successful, create link from parent if requested
+    if h5path:
+        kwargs = {"tgt_id": obj_id, "bucket": bucket, "implicit": implicit}
+        await createLinkFromParent(app, parent_id, h5path, **kwargs)
+
     return rsp_json
 
 
-async def createObjectByPath(app,
-                             parent_id=None,
-                             obj_id=None,
-                             h5path=None,
-                             implicit=False,
-                             obj_type=None,
-                             obj_shape=None,
-                             layout=None,
-                             creation_props=None,
-                             attrs=None,
-                             links=None,
-                             bucket=None):
+async def createGroup(app,
+                      parent_id=None,
+                      root_id=None,
+                      h5path=None,
+                      obj_id=None,
+                      creation_props=None,
+                      attrs=None,
+                      links=None,
+                      implicit=None,
+                      bucket=None):
 
-    """ create an object at the designated path relative to the parent.
-    If implicit is True, make any intermediate groups needed in the h5path. """
+    """ create a new group object """
 
-    if not parent_id:
-        msg = "no parent_id given for createObjectByPath"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-    if not h5path:
-        msg = "no h5path given for createObjectByPath"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-    log.debug(f"createObjectByPath - parent_id: {parent_id}, h5path: {h5path}")
-    if obj_id:
-        log.debug(f"createObjectByPath using client id: {obj_id}")
-    if obj_type:
-        log.debug(f"    obj_type: {obj_type}")
-    if obj_shape:
-        log.debug(f"    obj_shape: {obj_shape}")
-    if layout:
-        log.debug(f"    layout: {layout}")
-    if creation_props:
-        log.debug(f"    cprops: {creation_props}")
-    if attrs:
-        log.debug(f"    attrs: {attrs}")
-    if links:
-        log.debug(f"   links: {links}")
-        if obj_type:
-            msg = "only group objects can have links"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
+    kwargs = {}
+    kwargs["parent_id"] = parent_id
+    kwargs["root_id"] = root_id
+    kwargs["h5path"] = h5path
+    kwargs["obj_id"] = obj_id
+    kwargs["creation_props"] = creation_props
+    kwargs["attrs"] = attrs
+    kwargs["links"] = links
+    kwargs["implicit"] = implicit
+    kwargs["bucket"] = bucket
+    rsp_json = await createObject(app, **kwargs)
+    return rsp_json
 
-    root_id = getRootObjId(parent_id)
 
-    if h5path.startswith("/"):
-        if parent_id == root_id:
-            # just adjust the path to be relative
-            h5path = h5path[1:]
-        else:
-            msg = f"createObjectByPath expecting relative h5path, but got: {h5path}"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
+async def createDatatypeObj(app,
+                            parent_id=None,
+                            root_id=None,
+                            type=None,
+                            h5path=None,
+                            obj_id=None,
+                            creation_props=None,
+                            attrs=None,
+                            links=None,
+                            implicit=None,
+                            bucket=None):
 
-    if h5path.endswith("/"):
-        h5path = h5path[:-1]  # makes iterating through the links a bit easier
+    """ create a new committed type object"""
 
-    if not h5path:
-        msg = "h5path for createObjectByPath invalid"
+    if not type:
+        msg = "type not set for committed type creation"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
 
-    obj_json = None
-    link_titles = h5path.split("/")
-    log.debug(f"link_titles: {link_titles}")
-    for i in range(len(link_titles)):
-        if i == len(link_titles) - 1:
-            last_link = True
-        else:
-            last_link = False
-        link_title = link_titles[i]
-        log.debug(f"createObjectByPath - processing link: {link_title}")
-        link_json = None
-        try:
-            link_json = await getLink(app, parent_id, link_title, bucket=bucket)
-        except (HTTPNotFound, HTTPGone):
-            pass  # link doesn't exist
+    kwargs = {}
+    kwargs["parent_id"] = parent_id
+    kwargs["root_id"] = root_id
+    kwargs["type"] = type
+    kwargs["h5path"] = h5path
+    kwargs["obj_id"] = obj_id
+    kwargs["creation_props"] = creation_props
+    kwargs["attrs"] = attrs
+    kwargs["links"] = links
+    kwargs["implicit"] = implicit
+    kwargs["bucket"] = bucket
+    rsp_json = await createObject(app, **kwargs)
+    return rsp_json
 
-        if link_json:
-            log.debug(f"link for link_title {link_title} found: {link_json}")
-            # if this is the last link, that's a problem
-            if last_link:
-                msg = f"object at {h5path} already exists"
-                log.warn(msg)
-                raise HTTPConflict()
-            # otherwise, verify that this is a hardlink
-            if link_json.get("class") != "H5L_TYPE_HARD":
-                msg = "createObjectByPath - h5path must contain only hardlinks"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            parent_id = link_json["id"]
-            if getCollectionForId(parent_id) != "groups":
-                # parent objects must be groups!
-                msg = f"{link_title} is not a group"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            else:
-                log.debug(f"link: {link_title} to sub-group found")
-        else:
-            log.debug(f"link for link_title {link_title} not found")
-            if not last_link and not implicit:
-                if len(link_titles) > 1:
-                    msg = f"createObjectByPath failed: not all groups in {h5path} exist"
-                else:
-                    msg = f"createObjectByPath failed: {h5path} does not exist"
-                log.warn(msg)
-                raise HTTPNotFound(reason=msg)
-            # create the group or group/datatype/dataset for the last
-            # item in the path (based on parameters passed in)
-            kwargs = {"bucket": bucket, "root_id": root_id}
 
-            if last_link:
-                if obj_type:
-                    kwargs["obj_type"] = obj_type
-                if obj_shape:
-                    kwargs["obj_shape"] = obj_shape
-                if layout:
-                    kwargs["layout"] = layout
-                if creation_props:
-                    kwargs["creation_props"] = creation_props
-                if attrs:
-                    kwargs["attrs"] = attrs
-                if links:
-                    kwargs["links"] = links
-                if obj_id:
-                    kwargs["obj_id"] = obj_id
-            obj_json = await createObject(app, **kwargs)
-            tgt_id = obj_json["id"]
-            # create a link to the new object
-            await putHardLink(app, parent_id, link_title, tgt_id=tgt_id, bucket=bucket)
-            parent_id = tgt_id  # new parent
-    log.info(f"createObjectByPath {h5path} done, returning obj_json")
+async def createDataset(app,
+                        parent_id=None,
+                        root_id=None,
+                        type=None,
+                        shape=None,
+                        h5path=None,
+                        obj_id=None,
+                        creation_props=None,
+                        layout=None,
+                        attrs=None,
+                        links=None,
+                        implicit=None,
+                        bucket=None):
+
+    """ create a new dataset object"""
+
+    if not type:
+        msg = "type not set for dataset creation"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
 
-    return obj_json
+    if not shape:
+        msg = "shape not set for dataset creation"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+
+    kwargs = {}
+    kwargs["parent_id"] = parent_id
+    kwargs["root_id"] = root_id
+    kwargs["type"] = type
+    kwargs["shape"] = shape
+    kwargs["h5path"] = h5path
+    kwargs["obj_id"] = obj_id
+    kwargs["layout"] = layout
+    kwargs["creation_props"] = creation_props
+    kwargs["attrs"] = attrs
+    kwargs["links"] = links
+    kwargs["implicit"] = implicit
+    kwargs["bucket"] = bucket
+    rsp_json = await createObject(app, **kwargs)
+    return rsp_json
diff --git a/tests/integ/datatype_test.py b/tests/integ/datatype_test.py
index f8f01bea..9a73d9a5 100755
--- a/tests/integ/datatype_test.py
+++ b/tests/integ/datatype_test.py
@@ -609,6 +609,80 @@ def testPostWithPath(self):
         rspJson = json.loads(rsp.text)
         self.assertEqual(rspJson["id"], new_datatype_id)
 
+    def testPostMulti(self):
+        # test POST with multi-object creation
+        print("testPostMulti", self.base_domain)
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+
+        # get root id
+        req = helper.getEndpoint() + "/"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # get root group and verify link count is 0
+        req = helper.getEndpoint() + "/groups/" + root_uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], 0)
+
+        str_type = {
+            "charSet": "H5T_CSET_ASCII",
+            "class": "H5T_STRING",
+            "length": 12,
+            "strPad": "H5T_STR_NULLPAD",
+        }
+
+        float_type = "H5T_IEEE_F32LE"
+
+        # create a set of anonymous ctypes
+        fields = (
+            {"name": "temp", "type": "H5T_STD_I32LE"},
+            {"name": "pressure", "type": "H5T_IEEE_F32LE"},
+        )
+        compound_type = {"class": "H5T_COMPOUND", "fields": fields}
+
+        payload = [{"type": str_type}, {"type": float_type}, {"type": compound_type}]
+        req = helper.getEndpoint() + "/datatypes"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("objects" in rspJson)
+        rsp_objs = rspJson["objects"]
+        self.assertEqual(len(rsp_objs), 3)
+
+        for i in range(3):
+            obj_json = rsp_objs[i]
+            self.assertEqual(obj_json["attributeCount"], 0)
+            ctype_id = obj_json["id"]
+            self.assertTrue(helper.validateId(ctype_id))
+
+        # create a set of linked ctypes
+        for i in range(3):
+            item = payload[i]
+            item["link"] = {"id": root_uuid, "name": f"ctype_{i + 1}"}
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("objects" in rspJson)
+        rsp_objs = rspJson["objects"]
+        self.assertEqual(len(rsp_objs), 3)
+        for i in range(3):
+            json_rsp = rsp_objs[i]
+            self.assertEqual(json_rsp["attributeCount"], 0)
+            ctype_id = json_rsp["id"]
+            self.assertTrue(helper.validateId(ctype_id))
+
+        # get root group and verify link count is 3
+        req = helper.getEndpoint() + "/groups/" + root_uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], 3)
+
 
 if __name__ == "__main__":
     # setup test files
diff --git a/tests/integ/group_test.py b/tests/integ/group_test.py
index fbbda066..ce617e4e 100755
--- a/tests/integ/group_test.py
+++ b/tests/integ/group_test.py
@@ -223,6 +223,22 @@ def testPost(self):
         self.assertTrue("alias" in rspJson)
         self.assertEqual(rspJson["alias"], [])
 
+        # try with an empty body
+        payload = {}
+        req = endpoint + "/groups"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], 0)
+        self.assertEqual(rspJson["attributeCount"], 0)
+        group_id = rspJson["id"]
+        self.assertTrue(helper.validateId(group_id))
+
+        # try with a type in body (as if we were trying to create a committed type)
+        payload["type"] = "H5T_IEEE_F32LE"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 400)
+
         # try POST with user who doesn't have create permission on this domain
         test_user2 = config.get("user2_name")  # some tests will be skipped if not set
         if not test_user2:
@@ -655,6 +671,8 @@ def testPostIdWithPath(self):
 
         # try again with implicit creation set
         params = {"implicit": 1}
+        g21_id = createObjId("groups", root_id=root_uuid)
+        payload = {"id": g21_id, "h5path": "g2/g2.1"}
         rsp = self.session.post(req, data=json.dumps(payload), params=params, headers=headers)
         self.assertEqual(rsp.status_code, 201)  # g2 and g2.1 created
         rspJson = json.loads(rsp.text)

From ef746d0f595c7ffed48d38b35be192213822a38b Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Sun, 18 May 2025 12:24:22 +0200
Subject: [PATCH 25/49] added datatype test with no type in body

---
 hsds/ctype_sn.py             | 6 +++---
 tests/integ/datatype_test.py | 7 ++++++-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/hsds/ctype_sn.py b/hsds/ctype_sn.py
index d9cfb71d..b2a3d260 100755
--- a/hsds/ctype_sn.py
+++ b/hsds/ctype_sn.py
@@ -155,7 +155,7 @@ async def POST_Datatype(request):
     await validateUserPassword(app, username, pswd)
 
     if not request.has_body:
-        msg = "POST Datatype with no body"
+        msg = "POST datatype with no body"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
 
@@ -213,7 +213,7 @@ async def POST_Datatype(request):
                 kwargs["ignore_link"] = True
                 kwarg_list.append(kwargs)
             kwargs = {"bucket": bucket, "root_id": root_id}
-            log.debug(f"createDatatypeObjcs, items: {kwarg_list}")
+            log.debug(f"createDatatypeObjects, items: {kwarg_list}")
             post_rsp = await createDatatypeObjs(app, kwarg_list, **kwargs)
     else:
         # single object create
@@ -251,7 +251,7 @@ async def POST_Datatype(request):
                     links = parent_ids[parent_id]
                     links[title] = {"id": obj_id}
         if parent_ids:
-            log.debug(f"POST ctype multi - adding links: {parent_ids}")
+            log.debug(f"POST datatype multi - adding links: {parent_ids}")
             kwargs = {"action": "put_link", "bucket": bucket}
             kwargs["replace"] = True
 
diff --git a/tests/integ/datatype_test.py b/tests/integ/datatype_test.py
index 9a73d9a5..a3f20fd3 100755
--- a/tests/integ/datatype_test.py
+++ b/tests/integ/datatype_test.py
@@ -140,9 +140,14 @@ def testPostTypeWithId(self):
         # create a datatype id
         ctype_id = createObjId("datatypes", root_id=root_uuid)
 
+        # try creating a committed type without a type in the body
+        req = self.endpoint + "/datatypes"
+        data = {"id": ctype_id}
+        rsp = self.session.post(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 401)  # bad request
+
         # create a committed type obj
         data = {"id": ctype_id, "type": "H5T_IEEE_F32LE"}
-        req = self.endpoint + "/datatypes"
         rsp = self.session.post(req, data=json.dumps(data), headers=headers)
         self.assertEqual(rsp.status_code, 201)
         rspJson = json.loads(rsp.text)

From b1af9bc7347f51f4447df59fb25e35789224c871 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 20 May 2025 20:41:57 +0200
Subject: [PATCH 26/49] modularize dataset creation args processing

---
 hsds/dset_sn.py               | 736 +++++------------------------
 hsds/post_crawl.py            |  31 +-
 hsds/servicenode_lib.py       | 381 ++++++++++++++-
 hsds/util/chunkUtil.py        | 224 ---------
 hsds/util/dsetUtil.py         | 842 +++++++++++++++++++++++++++++++---
 tests/integ/datatype_test.py  |   2 +-
 tests/integ/value_test.py     |   2 +-
 tests/unit/chunk_util_test.py | 287 +-----------
 tests/unit/dset_util_test.py  | 274 ++++++++++-
 9 files changed, 1569 insertions(+), 1210 deletions(-)

diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 3650db7b..75110bb7 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -14,225 +14,29 @@
 # handles dataset requests
 #
 
-import math
 from json import JSONDecodeError
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPNotFound
 
-from h5json.hdf5dtype import validateTypeItem, createDataType, getBaseTypeJson, getItemSize
-from h5json.array_util import getNumElements, getNumpyValue, jsonToArray
+#from h5json.hdf5dtype import createDataType
+from h5json.array_util import getNumElements #, jsonToArray
 from h5json.objid import isValidUuid, isSchema2Id
 
 from .util.httpUtil import getHref, respJsonAssemble
 from .util.httpUtil import jsonResponse, getBooleanParam
-from .util.dsetUtil import getPreviewQuery, getFilterItem, getShapeDims
-from .util.chunkUtil import getChunkSize, guessChunk, expandChunk, shrinkChunk
-from .util.chunkUtil import getContiguousLayout
+from .util.dsetUtil import getPreviewQuery# , getShapeDims, validateChunkLayout
 from .util.authUtil import getUserPasswordFromRequest, aclCheck
 from .util.authUtil import validateUserPassword
 from .util.domainUtil import getDomainFromRequest, getPathForDomain, isValidDomain
 from .util.domainUtil import getBucketForDomain, verifyRoot
-from .util.storUtil import getSupportedFilters
 from .servicenode_lib import getDomainJson, getObjectJson, getDsetJson, getPathForObjectId
 from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo
-from .servicenode_lib import getCreateArgs, createDataset, deleteObject
-from .dset_lib import updateShape, deleteAllChunks, doHyperslabWrite
-from . import config
+from .servicenode_lib import getDatasetCreateArgs, createDataset, deleteObject
+from .dset_lib import updateShape, deleteAllChunks #, doHyperslabWrite
+from .post_crawl import createDatasets
+from .domain_crawl import DomainCrawler
 from . import hsds_logger as log
 
 
-async def validateChunkLayout(app, shape_json, item_size, layout, bucket=None):
-    """
-    Use chunk layout given in the creationPropertiesList (if defined and
-    layout is valid).
-    Return chunk_layout_json
-    """
-
-    rank = 0
-    space_dims = None
-    chunk_dims = None
-    max_dims = None
-
-    if "dims" in shape_json:
-        space_dims = shape_json["dims"]
-        rank = len(space_dims)
-
-    if "maxdims" in shape_json:
-        max_dims = shape_json["maxdims"]
-    if "dims" in layout:
-        chunk_dims = layout["dims"]
-
-    if chunk_dims:
-        # validate that the chunk_dims are valid and correlates with the
-        # dataset shape
-        if isinstance(chunk_dims, int):
-            chunk_dims = [
-                chunk_dims,
-            ]  # promote to array
-        if len(chunk_dims) != rank:
-            msg = "Layout rank does not match shape rank"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        for i in range(rank):
-            dim_extent = space_dims[i]
-            chunk_extent = chunk_dims[i]
-            if not isinstance(chunk_extent, int):
-                msg = "Layout dims must be integer or integer array"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            if chunk_extent <= 0:
-                msg = "Invalid layout value"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            if max_dims is None:
-                if chunk_extent > dim_extent:
-                    msg = "Invalid layout value"
-                    log.warn(msg)
-                    raise HTTPBadRequest(reason=msg)
-            elif max_dims[i] != 0:
-                if chunk_extent > max_dims[i]:
-                    msg = "Invalid layout value for extensible dimension"
-                    log.warn(msg)
-                    raise HTTPBadRequest(reason=msg)
-            else:
-                pass  # allow any positive value for unlimited dimensions
-
-    if "class" not in layout:
-        msg = "class key not found in layout for creation property list"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-
-    layout_class = layout["class"]
-
-    if layout_class == "H5D_CONTIGUOUS_REF":
-        # reference to a dataset in a traditional HDF5 files with
-        # contigious storage
-        if item_size == "H5T_VARIABLE":
-            # can't be used with variable types...
-            msg = "Datsets with variable types cannot be used with "
-            msg += "reference layouts"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        if "file_uri" not in layout:
-            # needed for H5D_CONTIGUOUS_REF
-            msg = "'file_uri' key must be provided for "
-            msg += "H5D_CONTIGUOUS_REF layout"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        if "offset" not in layout:
-            # needed for H5D_CONTIGUOUS_REF
-            msg = "'offset' key must be provided for "
-            msg += "H5D_CONTIGUOUS_REF layout"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        if "size" not in layout:
-            # needed for H5D_CONTIGUOUS_REF
-            msg = "'size' key must be provided for "
-            msg += "H5D_CONTIGUOUS_REF layout"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        if "dims" in layout:
-            # used defined chunk layout not allowed for H5D_CONTIGUOUS_REF
-            msg = "'dims' key can not be provided for "
-            msg += "H5D_CONTIGUOUS_REF layout"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-    elif layout_class == "H5D_CHUNKED_REF":
-        # reference to a dataset in a traditional HDF5 files with
-        # chunked storage
-        if item_size == "H5T_VARIABLE":
-            # can't be used with variable types..
-            msg = "Datsets with variable types cannot be used with "
-            msg += "reference layouts"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        if "file_uri" not in layout:
-            # needed for H5D_CHUNKED_REF
-            msg = "'file_uri' key must be provided for "
-            msg += "H5D_CHUNKED_REF layout"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        if "dims" not in layout:
-            # needed for H5D_CHUNKED_REF
-            msg = "'dimns' key must be provided for "
-            msg += "H5D_CHUNKED_REF layout"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        if "chunks" not in layout:
-            msg = "'chunks' key must be provided for "
-            msg += "H5D_CHUNKED_REF layout"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-    elif layout_class == "H5D_CHUNKED_REF_INDIRECT":
-        # reference to a dataset in a traditional HDF5 files with chunked
-        # storage using an auxillary dataset
-        if item_size == "H5T_VARIABLE":
-            # can't be used with variable types..
-            msg = "Datsets with variable types cannot be used with "
-            msg += "reference layouts"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        if "dims" not in layout:
-            # needed for H5D_CHUNKED_REF_INDIRECT
-            msg = "'dimns' key must be provided for "
-            msg += "H5D_CHUNKED_REF_INDIRECT layout"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        if "chunk_table" not in layout:
-            msg = "'chunk_table' key must be provided for "
-            msg += "H5D_CHUNKED_REF_INDIRECT layout"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        chunktable_id = layout["chunk_table"]
-        if not isValidUuid(chunktable_id, "Dataset"):
-            msg = f"Invalid chunk table id: {chunktable_id}"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        # verify the chunk table exists and is of reasonable shape
-        try:
-            chunktable_json = await getDsetJson(app, chunktable_id, bucket=bucket)
-        except HTTPNotFound:
-            msg = f"chunk table id: {chunktable_id} not found"
-            log.warn(msg)
-            raise
-        chunktable_shape = chunktable_json["shape"]
-        if chunktable_shape["class"] == "H5S_NULL":
-            msg = "Null space datasets can not be used as chunk tables"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-
-        chunktable_dims = getShapeDims(chunktable_shape)
-        if len(chunktable_dims) != len(space_dims):
-            msg = "Chunk table rank must be same as dataspace rank"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-    elif layout_class == "H5D_CHUNKED":
-        if "dims" not in layout:
-            msg = "dims key not found in layout for creation property list"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        if shape_json["class"] != "H5S_SIMPLE":
-            msg = "Bad Request: chunked layout not valid with shape class: "
-            msg += f"{shape_json['class']}"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-    elif layout_class == "H5D_CONTIGUOUS":
-        if "dims" in layout:
-            msg = "dims key found in layout for creation property list "
-            msg += "for H5D_CONTIGUOUS storage class"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-    elif layout_class == "H5D_COMPACT":
-        if "dims" in layout:
-            msg = "dims key found in layout for creation property list "
-            msg += "for H5D_COMPACT storage class"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-    else:
-        msg = f"Unexpected layout: {layout_class}"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-
-
 async def getDatasetDetails(app, dset_id, root_id, bucket=None):
     """Get extra information about the given dataset"""
     # Gather additional info on the domain
@@ -687,165 +491,118 @@ async def POST_Dataset(request):
 
     verifyRoot(domain_json)
 
-    #
-    # validate type input
-    #
-    if "type" not in body:
-        msg = "POST Dataset has no type key in body"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-
-    datatype = body["type"]
-    log.debug(f"got datatype: {datatype}")
-    if isinstance(datatype, str) and datatype.startswith("t-"):
-        # Committed type - fetch type json from DN
-        ctype_id = datatype
-        log.debug(f"got ctypeid: {ctype_id}")
-        ctype_json = await getObjectJson(app, ctype_id, bucket=bucket)
-        log.debug(f"ctype: {ctype_json}")
-        if ctype_json["root"] != root_id:
-            msg = "Referenced committed datatype must belong in same domain"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        datatype = ctype_json["type"]
-        # add the ctype_id to type type
-        datatype["id"] = ctype_id
-    elif isinstance(datatype, str):
-        try:
-            # convert predefined type string (e.g. "H5T_STD_I32LE") to
-            # corresponding json representation
-            datatype = getBaseTypeJson(datatype)
-            log.debug(f"got datatype: {datatype}")
-        except TypeError:
-            msg = "POST Dataset with invalid predefined type"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
+    # allow parent group creation or not
+    implicit = getBooleanParam(params, "implicit")
 
-    try:
-        validateTypeItem(datatype)
-    except KeyError as ke:
-        msg = f"KeyError creating type: {ke}"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-    except TypeError as te:
-        msg = f"TypeError creating type: {te}"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-    except ValueError as ve:
-        msg = f"ValueError creating type: {ve}"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
+    post_rsp = None
 
-    item_size = getItemSize(datatype)
+    datatype_json = None
 
     #
-    # Validate shape input
+    # handle case of committed type input
     #
-    dims = None
-    shape_json = {}
-    rank = 0
-    chunk_size = None
-
-    if "shape" not in body:
-        shape_json["class"] = "H5S_SCALAR"
-    else:
-        shape = body["shape"]
-        log.debug(f"got shape: {shape}")
-        if isinstance(shape, int):
-            shape_json["class"] = "H5S_SIMPLE"
-            dims = [shape, ]
-            shape_json["dims"] = dims
-            rank = 1
-        elif isinstance(shape, str):
-            # only valid string value is H5S_NULL or H5S_SCALAR
-            if shape == "H5S_NULL":
-                shape_json["class"] = "H5S_NULL"
-            elif shape == "H5S_SCALAR":
-                shape_json["class"] = "H5S_SCALAR"
-            else:
-                msg = "POST Dataset with invalid shape value"
+    if isinstance(body, dict) and "type" in body:
+         
+        body_type = body["type"]
+        log.debug(f"got datatype: {body_type}")
+        if isinstance(body_type, str) and body_type.startswith("t-"):
+            ctype_id = body_type
+            # Committed type - fetch type json from DN
+            log.debug(f"got ctype_id: {ctype_id}")
+            ctype_json = await getObjectJson(app, ctype_id, bucket=bucket)
+            log.debug(f"ctype: {ctype_json}")
+            if ctype_json["root"] != root_id:
+                msg = "Referenced committed datatype must belong in same domain"
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
-        elif isinstance(shape, list):
-            if len(shape) == 0:
-                shape_json["class"] = "H5S_SCALAR"
-            else:
-                shape_json["class"] = "H5S_SIMPLE"
-                shape_json["dims"] = shape
-                dims = shape
-                rank = len(dims)
+            datatype_json = ctype_json["type"]
+            # add the ctype_id to type type
+            datatype_json["id"] = ctype_id
         else:
-            msg = "Bad Request: shape is invalid"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-
-    if dims is not None:
-        for i in range(rank):
-            extent = dims[i]
-            if not isinstance(extent, int):
-                msg = "Invalid shape type"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            if extent < 0:
-                msg = "shape dimension is negative"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-    maxdims = None
-    if "maxdims" in body:
-        if dims is None:
-            msg = "Maxdims cannot be supplied if space is NULL"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-
-        maxdims = body["maxdims"]
-        if isinstance(maxdims, int):
-            dim1 = maxdims
-            maxdims = [dim1]
-        elif isinstance(maxdims, list):
-            pass  # can use as is
+            pass  # we'll fetch type in getDatasetCreateArgs
+
+    if isinstance(body, list):
+        count = len(body)
+        log.debug(f"multiple dataset create: {count} items")
+        if count == 0:
+            # equivalent to no body
+            msg = "POST Dataset with no body"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        elif count == 1:
+            # just create one object in typical way
+            kwargs = getDatasetCreateArgs(body[0],
+                                   root_id=root_id,
+                                   type=datatype_json,
+                                   bucket=bucket,
+                                   implicit=implicit)
         else:
-            msg = "Bad Request: maxdims is invalid"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        if len(dims) != len(maxdims):
-            msg = "Maxdims rank doesn't match Shape"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-
-    if maxdims is not None:
-        for extent in maxdims:
-            if not isinstance(extent, int):
-                msg = "Invalid maxdims type"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            if extent < 0:
-                msg = "maxdims dimension is negative"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-        if len(maxdims) != len(dims):
-            msg = "Bad Request: maxdims array length must equal "
-            msg += "shape array length"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        shape_json["maxdims"] = []
-        for i in range(rank):
-            maxextent = maxdims[i]
-            if not isinstance(maxextent, int):
-                msg = "Bad Request: maxdims must be integer type"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            elif maxextent == 0:
-                # unlimited dimension
-                shape_json["maxdims"].append(0)
-            elif maxextent < dims[i]:
-                msg = "Bad Request: maxdims extent can't be smaller "
-                msg += "than shape extent"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            else:
-                shape_json["maxdims"].append(maxextent)
+            # create multiple dataset objects
+            kwarg_list = []  # list of kwargs for each object
 
+            for item in body:
+                log.debug(f"item: {item}")
+                if not isinstance(item, dict):
+                    msg = f"Post_Dataset - invalid item type: {type(item)}"
+                    log.warn(msg)
+                    raise HTTPBadRequest(reason=msg)
+                kwargs = getDatasetCreateArgs(item, root_id=root_id, type=datatype_json, bucket=bucket)
+                kwargs["ignore_link"] = True
+                kwarg_list.append(kwargs)
+            kwargs = {"bucket": bucket, "root_id": root_id}
+            if datatype_json:
+                kwargs["type"] = datatype_json
+            log.debug(f"createDatasetObjects, items: {kwarg_list}")
+            post_rsp = await createDatasets(app, kwarg_list, **kwargs)
+    else:
+        # single object create
+        kwargs = getDatasetCreateArgs(body, root_id=root_id, type=datatype_json, bucket=bucket, implicit=implicit)
+        log.debug(f"kwargs for dataset create: {kwargs}")
+
+    if post_rsp is None:
+        # Handle cases other than multi ctype create here
+        post_rsp = await createDataset(app, **kwargs)
+
+    log.debug(f"returning resp: {post_rsp}")
+
+    if "objects" in post_rsp:
+        # add any links in multi request
+        objects = post_rsp["objects"]
+        obj_count = len(objects)
+        log.debug(f"Post datatype multi create: {obj_count} objects")
+        if len(body) != obj_count:
+            msg = f"Expected {obj_count} objects but got {len(body)}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        parent_ids = {}
+        for index in range(obj_count):
+            item = body[index]
+            if "link" in item:
+                link_item = item["link"]
+                parent_id = link_item.get("id")
+                title = link_item.get("name")
+                if parent_id and title:
+                    # add a hard link
+                    object = objects[index]
+                    obj_id = object["id"]
+                    if parent_id not in parent_ids:
+                        parent_ids[parent_id] = {}
+                    links = parent_ids[parent_id]
+                    links[title] = {"id": obj_id}
+        if parent_ids:
+            log.debug(f"POST dataset multi - adding links: {parent_ids}")
+            kwargs = {"action": "put_link", "bucket": bucket}
+            kwargs["replace"] = True
+
+            crawler = DomainCrawler(app, parent_ids, **kwargs)
+
+            # will raise exception on not found, server busy, etc.
+            await crawler.crawl()
+
+            status = crawler.get_status()
+
+            log.info(f"DomainCrawler done for put_links action, status: {status}")
+    """ 
     if "value" in body and body["value"]:
         # data to initialize dataset included in request
         input_data = body["value"]
@@ -871,277 +628,6 @@ async def POST_Dataset(request):
     else:
         input_arr = None
 
-    layout_props = None
-    min_chunk_size = int(config.get("min_chunk_size"))
-    max_chunk_size = int(config.get("max_chunk_size"))
-    if "creationProperties" in body:
-        creationProperties = body["creationProperties"]
-        log.debug(f"got creationProperties: {creationProperties}")
-        if "layout" in creationProperties:
-            layout_props = creationProperties["layout"]
-            await validateChunkLayout(app, shape_json, item_size, layout_props, bucket=bucket)
-    else:
-        creationProperties = {}
-
-    if "attributes" in body:
-        attrs = body["attributes"]
-        log.debug(f"POST Dataset attributes: {attrs}")
-    else:
-        attrs = None
-
-    # TBD: check for invalid layout class...
-    if layout_props:
-        if layout_props["class"] == "H5D_CONTIGUOUS":
-            # treat contiguous as chunked
-            layout_class = "H5D_CHUNKED"
-        else:
-            layout_class = layout_props["class"]
-    elif shape_json["class"] != "H5S_NULL":
-        layout_class = "H5D_CHUNKED"
-    else:
-        layout_class = None
-
-    if layout_class == "H5D_COMPACT":
-        layout = {"class": "H5D_COMPACT"}
-    elif layout_class:
-        # initialize to H5D_CHUNKED
-        layout = {"class": "H5D_CHUNKED"}
-    else:
-        # null space - no layout
-        layout = None
-
-    if layout_props and "dims" in layout_props:
-        chunk_dims = layout_props["dims"]
-    else:
-        chunk_dims = None
-
-    if layout_class == "H5D_CONTIGUOUS_REF":
-        kwargs = {"chunk_min": min_chunk_size, "chunk_max": max_chunk_size}
-        chunk_dims = getContiguousLayout(shape_json, item_size, **kwargs)
-        layout["dims"] = chunk_dims
-        log.debug(f"autoContiguous layout: {layout}")
-
-    if layout_class == "H5D_CHUNKED" and chunk_dims is None:
-        # do autochunking
-        chunk_dims = guessChunk(shape_json, item_size)
-        log.debug(f"initial autochunk layout: {chunk_dims}")
-
-    if layout_class == "H5D_CHUNKED":
-        chunk_size = getChunkSize(chunk_dims, item_size)
-
-        msg = f"chunk_size: {chunk_size}, min: {min_chunk_size}, "
-        msg += f"max: {max_chunk_size}"
-        log.debug(msg)
-        # adjust the chunk shape if chunk size is too small or too big
-        adjusted_chunk_dims = None
-        if chunk_size < min_chunk_size:
-            msg = f"chunk size: {chunk_size} less than min size: "
-            msg += f"{min_chunk_size}, expanding"
-            log.debug(msg)
-            kwargs = {"chunk_min": min_chunk_size, "layout_class": layout_class}
-            adjusted_chunk_dims = expandChunk(chunk_dims, item_size, shape_json, **kwargs)
-        elif chunk_size > max_chunk_size:
-            msg = f"chunk size: {chunk_size} greater than max size: "
-            msg += f"{max_chunk_size}, shrinking"
-            log.debug(msg)
-            kwargs = {"chunk_max": max_chunk_size}
-            adjusted_chunk_dims = shrinkChunk(chunk_dims, item_size, **kwargs)
-        if adjusted_chunk_dims:
-            msg = f"requested chunk_dimensions: {chunk_dims} modified "
-            msg += f"dimensions: {adjusted_chunk_dims}"
-            log.debug(msg)
-            layout["dims"] = adjusted_chunk_dims
-        else:
-            layout["dims"] = chunk_dims  # don't need to adjust chunk size
-
-        # set partition_count if needed:
-        max_chunks_per_folder = int(config.get("max_chunks_per_folder"))
-        set_partition = False
-        if max_chunks_per_folder > 0:
-            if "dims" in shape_json and "dims" in layout:
-                set_partition = True
-
-        if set_partition:
-            chunk_dims = layout["dims"]
-            shape_dims = shape_json["dims"]
-            if "maxdims" in shape_json:
-                max_dims = shape_json["maxdims"]
-            else:
-                max_dims = None
-            num_chunks = 1
-            rank = len(shape_dims)
-            unlimited_count = 0
-            if max_dims:
-                for i in range(rank):
-                    if max_dims[i] == 0:
-                        unlimited_count += 1
-                msg = f"number of unlimited dimensions: {unlimited_count}"
-                log.debug(msg)
-
-            for i in range(rank):
-                max_dim = 1
-                if max_dims:
-                    max_dim = max_dims[i]
-                    if max_dim == 0:
-                        # don't really know what the ultimate extent
-                        # could be, but assume 10^6 for total number of
-                        # elements and square-shaped array...
-                        MAX_ELEMENT_GUESS = 10.0 ** 6
-                        exp = 1 / unlimited_count
-                        max_dim = int(math.pow(MAX_ELEMENT_GUESS, exp))
-                else:
-                    max_dim = shape_dims[i]
-                num_chunks *= math.ceil(max_dim / chunk_dims[i])
-
-            if num_chunks > max_chunks_per_folder:
-                partition_count = math.ceil(num_chunks / max_chunks_per_folder)
-                msg = f"set partition count to: {partition_count}, "
-                msg += f"num_chunks: {num_chunks}"
-                log.info(msg)
-                layout["partition_count"] = partition_count
-            else:
-                msg = "do not need chunk partitions, num_chunks: "
-                msg += f"{num_chunks} max_chunks_per_folder: "
-                msg += f"{max_chunks_per_folder}"
-                log.info(msg)
-
-    if layout_class in ("H5D_CHUNKED_REF", "H5D_CHUNKED_REF_INDIRECT"):
-        chunk_size = getChunkSize(chunk_dims, item_size)
-
-        msg = f"chunk_size: {chunk_size}, min: {min_chunk_size}, "
-        msg += f"max: {max_chunk_size}"
-        log.debug(msg)
-        # nothing to do about inefficiently small chunks, but large chunks
-        # can be subdivided
-        if chunk_size < min_chunk_size:
-            msg = f"chunk size: {chunk_size} less than min size: "
-            msg += f"{min_chunk_size} for {layout_class} dataset"
-            log.warn(msg)
-        elif chunk_size > max_chunk_size:
-            msg = f"chunk size: {chunk_size} greater than max size: "
-            msg += f"{max_chunk_size}, for {layout_class} dataset"
-            log.warn(msg)
-        layout["dims"] = chunk_dims
-
-    if creationProperties:
-        # TBD - validate all creationProperties
-        if "fillValue" in creationProperties:
-            # validate fill value compatible with type
-            dt = createDataType(datatype)
-            fill_value = creationProperties["fillValue"]
-            if "fillValue_encoding" in creationProperties:
-                fill_value_encoding = creationProperties["fillValue_encoding"]
-
-                if fill_value_encoding not in ("None", "base64"):
-                    msg = f"unexpected value for fill_value_encoding: {fill_value_encoding}"
-                    log.warn(msg)
-                    raise HTTPBadRequest(reason=msg)
-                else:
-                    # should see a string in this case
-                    if not isinstance(fill_value, str):
-                        msg = f"unexpected fill value: {fill_value} "
-                        msg += f"for encoding: {fill_value_encoding}"
-                        log.warn(msg)
-                        raise HTTPBadRequest(reason=msg)
-            else:
-                fill_value_encoding = None
-
-            try:
-                getNumpyValue(fill_value, dt=dt, encoding=fill_value_encoding)
-            except ValueError:
-                msg = f"invalid fill value: {fill_value}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-        if "filters" in creationProperties:
-            # convert to standard representation
-            # refer to https://hdf5-json.readthedocs.io/en/latest/bnf/\
-            # filters.html#grammar-token-filter_list
-            f_in = creationProperties["filters"]
-            supported_filters = getSupportedFilters(include_compressors=True)
-            log.debug(f"supported_compressors: {supported_filters}")
-
-            log.debug(f"filters provided in creationProperties: {f_in}")
-
-            if not isinstance(f_in, list):
-                msg = "Expected filters in creationProperties to be a list"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-            if f_in and chunk_size is None:
-                # filters can only be used with chunked datasets
-                msg = "Filters can only be used with chunked datasets"
-                log.warning(msg)
-                raise HTTPBadRequest(reason=msg)
-
-            f_out = []
-            for filter in f_in:
-                if isinstance(filter, int) or isinstance(filter, str):
-                    item = getFilterItem(filter)
-                    if not item:
-                        msg = f"filter {filter} not recognized"
-                        log.warn(msg)
-                        raise HTTPBadRequest(reason=msg)
-
-                    if item["name"] not in supported_filters:
-                        msg = f"filter {filter} is not supported"
-                        log.warn(msg)
-                        raise HTTPBadRequest(reason=msg)
-                    f_out.append(item)
-                elif isinstance(filter, dict):
-                    if "class" not in filter:
-                        msg = "expected 'class' key for filter property"
-                        log.warn(msg)
-                        raise HTTPBadRequest(reason=msg)
-                    if filter["class"] != "H5Z_FILTER_USER":
-                        item = getFilterItem(filter["class"])
-                    elif "id" in filter:
-                        item = getFilterItem(filter["id"])
-                    elif "name" in filter:
-                        item = getFilterItem(filter["name"])
-                    else:
-                        item = None
-                    if not item:
-                        msg = f"filter {filter['class']} not recognized"
-                        log.warn(msg)
-                        raise HTTPBadRequest(reason=msg)
-                    if "id" not in filter:
-                        filter["id"] = item["id"]
-                    elif item["id"] != filter["id"]:
-                        msg = f"Expected {filter['class']} to have id: "
-                        msg += f"{item['id']} but got {filter['id']}"
-                        log.warn(msg)
-                        raise HTTPBadRequest(reason=msg)
-                    if "name" not in filter:
-                        filter["name"] = item["name"]
-                    if filter["name"] not in supported_filters:
-                        msg = f"filter {filter} is not supported"
-                        log.warn(msg)
-                        raise HTTPBadRequest(reason=msg)
-
-                    f_out.append(filter)
-                else:
-                    msg = f"Unexpected type for filter: {filter}"
-                    log.warn(msg)
-                    raise HTTPBadRequest(reason=msg)
-            # replace filters with our starndardized list
-            log.debug(f"setting filters to: {f_out}")
-            creationProperties["filters"] = f_out
-
-        log.debug(f"set dataset json creationProperties: {creationProperties}")
-
-    # setup args to createDataset
-    implicit = getBooleanParam(params, "implicit")
-    kwargs = getCreateArgs(body, root_id=root_id, type=datatype, bucket=bucket, implicit=implicit)
-    # fill in dataset-specific keys
-    kwargs["creation_props"] = creationProperties
-    kwargs["shape"] = shape_json
-    kwargs["layout"] = layout
-
-    log.debug(f"kwargs for dataset create: {kwargs}")
-
-    dset_json = await createDataset(app, **kwargs)
-
     # write data if provided
     if input_arr is not None:
         log.debug(f"write input_arr: {input_arr}")
@@ -1162,9 +648,9 @@ async def POST_Dataset(request):
         kwargs["data"] = input_arr
         # do write
         await doHyperslabWrite(app, request, **kwargs)
-
+    """
     # dataset creation successful
-    resp = await jsonResponse(request, dset_json, status=201)
+    resp = await jsonResponse(request, post_rsp, status=201)
     log.response(request, resp=resp)
 
     return resp
diff --git a/hsds/post_crawl.py b/hsds/post_crawl.py
index 225376aa..198b1492 100644
--- a/hsds/post_crawl.py
+++ b/hsds/post_crawl.py
@@ -149,6 +149,8 @@ async def create(self, index):
             kwargs["obj_id"] = item["obj_id"]
         if "type" in item:
             kwargs["type"] = item["type"]
+        if "shape" in item:
+            kwargs["shape"] = item["shape"]
         if "layout" in item:
             kwargs["layout"] = item["layout"]
         if "creation_props" in item:
@@ -231,7 +233,7 @@ async def createGroups(app, items: list, root_id=None, bucket=None):
 
 
 async def createDatatypeObjs(app, items: list, root_id=None, bucket=None):
-    """ create an datatype objects based on parameters in items list """
+    """ create datatype objects based on parameters in items list """
 
     if not root_id:
         msg = "no root_id given for createDatatypeObjs"
@@ -256,3 +258,30 @@ async def createDatatypeObjs(app, items: list, root_id=None, bucket=None):
 
     rsp_json = await _createObjects(app, items=items, root_id=root_id, bucket=bucket)
     return rsp_json
+
+async def createDatasets(app, items: list, root_id=None, bucket=None):
+    """ create dataset objects based on parameters in items list """
+
+    if not root_id:
+        msg = "no root_id given for createDatatypeObjs"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+
+    for item in items:
+        if not isinstance(item, dict):
+            msg = "expected list of dictionary objects for multi-object create"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        if "type" not in item:
+            msg = "type key not provided for multi-dataset create"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        if "shape" not in item:
+            msg = "shape key not provided for multi-dataset create"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+
+    log.info(f"createDatasets with {len(items)} items, root_id: {root_id}")
+
+    rsp_json = await _createObjects(app, items=items, root_id=root_id, bucket=bucket)
+    return rsp_json
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index fdbfe6c4..bc089420 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -15,6 +15,7 @@
 
 import asyncio
 import json
+import math
 import time
 import numpy as np
 
@@ -24,7 +25,8 @@
 from aiohttp.client_exceptions import ClientOSError, ClientError
 from aiohttp import ClientResponseError
 
-from h5json.array_util import encodeData, decodeData, bytesToArray, bytesArrayToList, jsonToArray
+from h5json.array_util import encodeData, decodeData, bytesToArray, bytesArrayToList
+from h5json.array_util import jsonToArray, getNumpyValue
 from h5json.objid import getCollectionForId, createObjId, getRootObjId
 from h5json.objid import isSchema2Id, getS3Key, isValidUuid
 from h5json.hdf5dtype import getBaseTypeJson, validateTypeItem, createDataType
@@ -33,12 +35,14 @@
 from .util.nodeUtil import getDataNodeUrl
 from .util.authUtil import getAclKeys
 from .util.linkUtil import h5Join, validateLinkName, getLinkClass, getRequestLinks
-from .util.storUtil import getStorJSONObj, isStorObj
+from .util.storUtil import getStorJSONObj, isStorObj, getSupportedFilters
 from .util.authUtil import aclCheck
 from .util.httpUtil import http_get, http_put, http_post, http_delete
 from .util.domainUtil import getBucketForDomain, verifyRoot, getLimits
 from .util.storUtil import getCompressors
-from .util.dsetUtil import getShapeDims
+from .util.dsetUtil import getShapeDims, getShapeJson, getFiltersJson, validateChunkLayout
+from .util.dsetUtil import getContiguousLayout, guessChunk, getChunkSize
+from .util.dsetUtil import expandChunk, shrinkChunk
 
 from .basenode import getVersion
 from . import hsds_logger as log
@@ -1287,15 +1291,69 @@ async def deleteObject(app, obj_id, bucket=None):
         del meta_cache[obj_id]  # remove from cache
 
 
+def validateDatasetCreationProps(creation_props, type_json=None, shape=None):
+    """ validate creation props """
+
+    log.debug(f"validateCreationProps: {creation_props}")
+    if "fillValue" in creation_props:
+        if not type_json or not shape:
+            msg = "shape and type must be set to use fillValue"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        
+        # validate fill value compatible with type
+        dt = createDataType(type_json)
+        fill_value = creation_props["fillValue"]
+        log.debug(f"got fill_value: {fill_value}")
+        if "fillValue_encoding" in creation_props:
+            fill_value_encoding = creation_props["fillValue_encoding"]
+            if fill_value_encoding not in ("None", "base64"):
+                msg = f"unexpected value for fill_value_encoding: {fill_value_encoding}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+            else:
+                # should see a string in this case
+                if not isinstance(fill_value, str):
+                    msg = f"unexpected fill value: {fill_value} "
+                    msg += f"for encoding: {fill_value_encoding}"
+                    log.warn(msg)
+                    raise HTTPBadRequest(reason=msg)
+        else:
+            fill_value_encoding = None
+
+            try:
+                getNumpyValue(fill_value, dt=dt, encoding=fill_value_encoding)
+            except ValueError:
+                msg = f"invalid fill value: {fill_value}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+            
+    if "filters" in creation_props:
+        if not type_json or not shape:
+            msg = "shape and type must be set to use filters"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        
+        supported_filters = getSupportedFilters() 
+        # will raise bad request exception if not valid
+        supported_filters = getSupportedFilters(include_compressors=True)
+        log.debug(f"supported_filters: {supported_filters}")
+        filters_out = getFiltersJson(creation_props, supported_filters=supported_filters)
+        # replace filters with our starndardized list
+        log.debug(f"setting filters to: {filters_out}")
+        creation_props["filters"] = filters_out
+
 def getCreateArgs(body,
                   root_id=None,
                   bucket=None,
                   type=None,
-                  shape=None,
                   implicit=False,
+                  chunk_table=None,
                   ignore_link=False):
     """ get args for createObject from request body """
 
+    log.debug(f"getCreateArgs with body keys: {list(body.keys())}")
+
     kwargs = {"bucket": bucket}
     predate_max_time = config.get("predate_max_time", default=10.0)
 
@@ -1364,8 +1422,10 @@ def getCreateArgs(body,
 
     if "creationProperties" in body:
         creation_props = body["creationProperties"]
-        # tbd: validate creation_props
-        kwargs["creation_props"] = creation_props
+        # validate after we've checked for shape and type
+    else:
+        creation_props = {}
+    kwargs["creation_props"] = creation_props
 
     if "attributes" in body:
         attrs = body["attributes"]
@@ -1391,20 +1451,25 @@ def getCreateArgs(body,
 
     if type:
         kwargs["type"] = type
+        type_json = type
     elif "type" in body:
-        datatype = body["type"]
-        if isinstance(datatype, str):
+        type_json = body["type"]
+        if isinstance(type_json, str):
             try:
                 # convert predefined type string (e.g. "H5T_STD_I32LE") to
                 # corresponding json representation
-                datatype = getBaseTypeJson(datatype)
-                log.debug(f"got datatype: {datatype}")
+                type_json = getBaseTypeJson(type_json)
+                log.debug(f"got type: {type_json}")
             except TypeError:
-                msg = f"POST with invalid predefined type: {datatype}"
+                msg = f"POST with invalid predefined type: {type_json}"
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
+    else:
+        type_json = None
+
+    if type_json:
         try:
-            validateTypeItem(datatype)
+            validateTypeItem(type_json)
         except KeyError as ke:
             msg = f"KeyError creating type: {ke}"
             log.warn(msg)
@@ -1417,10 +1482,237 @@ def getCreateArgs(body,
             msg = f"ValueError creating type: {ve}"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        kwargs["type"] = datatype
+        kwargs["type"] = type_json
     else:
         pass  # no type
+    return kwargs
+
+def getDatasetCreateArgs(body,
+                  root_id=None,
+                  bucket=None,
+                  type=None,
+                  implicit=False,
+                  chunk_table=None,
+                  ignore_link=False):
+
+    """ get args for createDataset from request body """
+
+    # call getCreateArgs for group, datatype objects, then fill in for dataset specific options
+    kwargs = getCreateArgs(body,
+                           root_id=root_id,
+                           bucket=bucket,
+                           type=type,
+                           implicit=implicit,
+                           ignore_link=ignore_link)
+    
+    if not "type" in kwargs:
+        msg = "no type specified for create dataset"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+    
+    type_json = kwargs["type"]
+    #
+    # Validate shape if present
+    #
+
+    # will return scalar shape if no shape key in body
+    shape_json = getShapeJson(body)
+    kwargs["shape"] = shape_json
+       
+    # get layout for dataset creation
+    log.debug("getting dataset creation settings")
+    layout_props = None
+    min_chunk_size = int(config.get("min_chunk_size"))
+    max_chunk_size = int(config.get("max_chunk_size"))
+    type_json = kwargs["type"]
+    item_size = getItemSize(type_json)
+    creation_props = kwargs["creation_props"]
+    layout_props = None
+
+    if creation_props:
+        validateDatasetCreationProps(creation_props, type_json=type_json, shape=shape_json)
+        if "layout" in creation_props:
+            layout_props = creation_props["layout"]
+            try:
+                validateChunkLayout(shape_json, item_size, layout_props, chunk_table=chunk_table)
+            except ValueError:
+                msg = f"invalid chunk layout: {layout_props}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+
+    # TBD: check for invalid layout class...
+    if layout_props:
+        if layout_props["class"] == "H5D_CONTIGUOUS":
+            # treat contiguous as chunked
+            layout_class = "H5D_CHUNKED"
+        else:
+            layout_class = layout_props["class"]
+    elif shape_json["class"] != "H5S_NULL":
+        layout_class = "H5D_CHUNKED"
+    else:
+        layout_class = None
+    log.debug(f"using layout_class: {layout_class}")
+
+    if layout_class == "H5D_COMPACT":
+        layout = {"class": "H5D_COMPACT"}
+    elif layout_class:
+        # initialize to H5D_CHUNKED
+        layout = {"class": "H5D_CHUNKED"}
+    else:
+        # null space - no layout
+        layout = None
+
+    if layout_props and "dims" in layout_props:
+        chunk_dims = layout_props["dims"]
+    else:
+        chunk_dims = None
+
+    if layout_class == "H5D_CONTIGUOUS_REF":
+        opts = {"chunk_min": min_chunk_size, "chunk_max": max_chunk_size}
+        chunk_dims = getContiguousLayout(shape_json, item_size, **opts)
+        layout["dims"] = chunk_dims
+        log.debug(f"autoContiguous layout: {layout}")
+
+    if layout_class == "H5D_CHUNKED" and chunk_dims is None:
+        # do auto-chunking
+        chunk_dims = guessChunk(shape_json, item_size)
+        log.debug(f"initial autochunk layout: {chunk_dims}")
 
+    if layout_class == "H5D_CHUNKED":
+        chunk_size = getChunkSize(chunk_dims, item_size)
+
+        msg = f"chunk_size: {chunk_size}, min: {min_chunk_size}, "
+        msg += f"max: {max_chunk_size}"
+        log.debug(msg)
+        
+        # adjust the chunk shape if chunk size is too small or too big
+        adjusted_chunk_dims = None
+        if chunk_size < min_chunk_size:
+            msg = f"chunk size: {chunk_size} less than min size: "
+            msg += f"{min_chunk_size}, expanding"
+            log.debug(msg)
+            opts = {"chunk_min": min_chunk_size, "layout_class": layout_class}
+            adjusted_chunk_dims = expandChunk(chunk_dims, item_size, shape_json, **opts)
+        elif chunk_size > max_chunk_size:
+            msg = f"chunk size: {chunk_size} greater than max size: "
+            msg += f"{max_chunk_size}, shrinking"
+            log.debug(msg)
+            opts = {"chunk_max": max_chunk_size}
+            adjusted_chunk_dims = shrinkChunk(chunk_dims, item_size, **opts)
+        
+        if adjusted_chunk_dims:
+            msg = f"requested chunk_dimensions: {chunk_dims} modified "
+            msg += f"dimensions: {adjusted_chunk_dims}"
+            log.debug(msg)
+            layout["dims"] = adjusted_chunk_dims
+        else:
+            layout["dims"] = chunk_dims  # don't need to adjust chunk size
+
+        # set partition_count if needed:
+        max_chunks_per_folder = int(config.get("max_chunks_per_folder"))
+        set_partition = False
+        if max_chunks_per_folder > 0:
+            if "dims" in shape_json and "dims" in layout:
+                set_partition = True
+
+        if set_partition:
+            chunk_dims = layout["dims"]
+            shape_dims = shape_json["dims"]
+            if "maxdims" in shape_json:
+                max_dims = shape_json["maxdims"]
+            else:
+                max_dims = None
+            num_chunks = 1
+            rank = len(shape_dims)
+            unlimited_count = 0
+            if max_dims:
+                for i in range(rank):
+                    if max_dims[i] == 0:
+                        unlimited_count += 1
+                msg = f"number of unlimited dimensions: {unlimited_count}"
+                log.debug(msg)
+
+            for i in range(rank):
+                max_dim = 1
+                if max_dims:
+                    max_dim = max_dims[i]
+                    if max_dim == 0:
+                        # don't really know what the ultimate extent
+                        # could be, but assume 10^6 for total number of
+                        # elements and square-shaped array...
+                        MAX_ELEMENT_GUESS = 10.0 ** 6
+                        exp = 1 / unlimited_count
+                        max_dim = int(math.pow(MAX_ELEMENT_GUESS, exp))
+                else:
+                    max_dim = shape_dims[i]
+                num_chunks *= math.ceil(max_dim / chunk_dims[i])
+
+            if num_chunks > max_chunks_per_folder:
+                partition_count = math.ceil(num_chunks / max_chunks_per_folder)
+                msg = f"set partition count to: {partition_count}, "
+                msg += f"num_chunks: {num_chunks}"
+                log.info(msg)
+                layout["partition_count"] = partition_count
+            else:
+                msg = "do not need chunk partitions, num_chunks: "
+                msg += f"{num_chunks} max_chunks_per_folder: "
+                msg += f"{max_chunks_per_folder}"
+                log.info(msg)
+
+    if layout_class in ("H5D_CHUNKED_REF", "H5D_CHUNKED_REF_INDIRECT"):
+        chunk_size = getChunkSize(chunk_dims, item_size)
+
+        msg = f"chunk_size: {chunk_size}, min: {min_chunk_size}, "
+        msg += f"max: {max_chunk_size}"
+        log.debug(msg)
+        # nothing to do about inefficiently small chunks, but large chunks
+        # can be subdivided
+        if chunk_size < min_chunk_size:
+            msg = f"chunk size: {chunk_size} less than min size: "
+            msg += f"{min_chunk_size} for {layout_class} dataset"
+            log.warn(msg)
+        elif chunk_size > max_chunk_size:
+            msg = f"chunk size: {chunk_size} greater than max size: "
+            msg += f"{max_chunk_size}, for {layout_class} dataset"
+            log.warn(msg)
+        layout["dims"] = chunk_dims
+        
+    if layout:
+        log.debug(f"setting layout to: {layout}")
+        kwargs["layout"] = layout
+
+    # 
+    # get input data if present
+    #
+    if "value" in body and body["value"]:
+        # data to initialize dataset included in request
+        if shape_json["class"] == "H5S_NULL":
+            msg = "null shape datasets can not have initial values"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        
+        input_data = body["value"]
+        msg = "input data doesn't match request type and shape"
+        dims = getShapeDims(shape_json)
+        if not dims:
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        arr_dtype = createDataType(type_json)
+
+        try:
+            input_arr = jsonToArray(dims, arr_dtype, input_data)
+        except ValueError:
+            log.warn(f"ValueError: {msg}")
+            raise HTTPBadRequest(reason=msg)
+        except TypeError:
+            log.warn(f"TypeError: {msg}")
+            raise HTTPBadRequest(reason=msg)
+        except IndexError:
+            log.warn(f"IndexError: {msg}")
+            raise HTTPBadRequest(reason=msg)
+        log.debug(f"got json arr: {input_arr.shape}")
+        kwargs["value"] = input_data
+     
     return kwargs
 
 
@@ -1597,6 +1889,8 @@ async def createObject(app,
     params = {"bucket": bucket}
     rsp_json = await http_post(app, req, data=obj_json, params=params)
 
+    log.debug(f"createObject: {req} got rsp_json: {rsp_json}")
+
     # object creation successful, create link from parent if requested
     if h5path:
         kwargs = {"tgt_id": obj_id, "bucket": bucket, "implicit": implicit}
@@ -1675,6 +1969,7 @@ async def createDataset(app,
                         obj_id=None,
                         creation_props=None,
                         layout=None,
+                        value=None,
                         attrs=None,
                         links=None,
                         implicit=None,
@@ -1688,9 +1983,8 @@ async def createDataset(app,
         raise HTTPBadRequest(reason=msg)
 
     if not shape:
-        msg = "shape not set for dataset creation"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
+        # default to a scalar dataset
+        shape = {"class": "H5S_SCALAR"}
 
     kwargs = {}
     kwargs["parent_id"] = parent_id
@@ -1705,5 +1999,56 @@ async def createDataset(app,
     kwargs["links"] = links
     kwargs["implicit"] = implicit
     kwargs["bucket"] = bucket
-    rsp_json = await createObject(app, **kwargs)
-    return rsp_json
+    dset_json = await createObject(app, **kwargs)
+
+    if value:
+        log.debug(f"tbd - set dataset value to: {value}")
+        shape_json = kwargs["shape"]
+        type_json = kwargs["type"]
+        # data to initialize dataset included in request
+        msg = "input data doesn't match request type and shape"
+        dims = getShapeDims(shape_json)
+        if not dims:
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        arr_dtype = createDataType(type_json)
+
+        try:
+            input_arr = jsonToArray(dims, arr_dtype, value)
+        except ValueError:
+            log.warn(f"ValueError: {msg}")
+            raise HTTPBadRequest(reason=msg)
+        except TypeError:
+            log.warn(f"TypeError: {msg}")
+            raise HTTPBadRequest(reason=msg)
+        except IndexError:
+            log.warn(f"IndexError: {msg}")
+            raise HTTPBadRequest(reason=msg)
+        log.debug(f"got json arr: {input_arr.shape}")
+    else:
+        input_arr = None
+
+    # write data if provided
+    if input_arr is not None:
+        log.debug(f"write input_arr: {input_arr}")
+        # mixin the layout
+        dset_json["layout"] = layout
+        # make selection for entire dataspace
+        dims = getShapeDims(shape_json)
+        slices = []
+        for dim in dims:
+            s = slice(0, dim, 1)
+            slices.append(s)
+        # make a one page list to handle the write in one chunk crawler run
+        # (larger write request should user binary streaming)
+        kwargs = {"page_number": 0, "page": slices}
+        kwargs["dset_json"] = dset_json
+        kwargs["bucket"] = bucket
+        kwargs["select_dtype"] = input_arr.dtype
+        kwargs["data"] = input_arr
+        log.debug(f"kwargs for hyperslab write: {kwargs}")
+        # do write
+        #request = None  # don't need in this case since not reading from input stream
+        #await doHyperslabWrite(app, request, **kwargs)
+
+    return dset_json
diff --git a/hsds/util/chunkUtil.py b/hsds/util/chunkUtil.py
index 9c984de6..9dd51bf9 100644
--- a/hsds/util/chunkUtil.py
+++ b/hsds/util/chunkUtil.py
@@ -11,230 +11,6 @@
 PRIMES = [29, 31, 37, 41, 43, 47, 53, 59, 61, 67]  # for chunk partitioning
 
 
-def getChunkSize(layout, type_size):
-    """Return chunk size given layout.
-    i.e. just the product of the values in the list.
-    """
-    if type_size == "H5T_VARIABLE":
-        type_size = DEFAULT_TYPE_SIZE
-
-    chunk_size = type_size
-    for n in layout:
-        if n <= 0:
-            raise ValueError("Invalid chunk layout")
-        chunk_size *= n
-    return chunk_size
-
-
-def get_dset_size(shape_json, typesize):
-    """Return the size of the dataspace.  For
-    any unlimited dimensions, assume a value of 1.
-    (so the return size will be the absolute minimum)
-    """
-    if shape_json is None or shape_json["class"] == "H5S_NULL":
-        return None
-    if shape_json["class"] == "H5S_SCALAR":
-        return typesize  # just return size for one item
-    if typesize == "H5T_VARIABLE":
-        typesize = DEFAULT_TYPE_SIZE  # just take a guess at the item size
-    dset_size = typesize
-    shape = shape_json["dims"]
-    rank = len(shape)
-
-    for n in range(rank):
-        if shape[n] == 0:
-            # extendable extent with value of 0
-            continue  # assume this is one
-        dset_size *= shape[n]
-    return dset_size
-
-
-def expandChunk(
-    layout, typesize, shape_json, chunk_min=CHUNK_MIN, layout_class="H5D_CHUNKED"
-):
-    """Compute an increased chunk shape with a size in bytes greater than chunk_min."""
-    if shape_json is None or shape_json["class"] == "H5S_NULL":
-        return None
-    if shape_json["class"] == "H5S_SCALAR":
-        return (1,)  # just enough to store one item
-
-    layout = list(layout)
-    log.debug(f"expandChunk layout: {layout} typesize: {typesize}")
-    dims = shape_json["dims"]
-    rank = len(dims)
-    extendable_dims = 0  # number of dimensions that are extenable
-    maxdims = None
-    if "maxdims" in shape_json:
-        maxdims = shape_json["maxdims"]
-        for n in range(rank):
-            if maxdims[n] == 0 or maxdims[n] > dims[n]:
-                extendable_dims += 1
-
-    dset_size = get_dset_size(shape_json, typesize)
-    if dset_size <= chunk_min and extendable_dims == 0:
-        # just use the entire dataspace shape as one big chunk
-        return tuple(dims)
-
-    chunk_size = getChunkSize(layout, typesize)
-    if chunk_size >= chunk_min:
-        return tuple(layout)  # good already
-    while chunk_size < chunk_min:
-        # just adjust along extendable dimensions first
-        old_chunk_size = chunk_size
-        for n in range(rank):
-            dim = rank - n - 1  # start from last dim
-
-            if extendable_dims > 0:
-                if maxdims[dim] == 0:
-                    # infinitely extendable dimensions
-                    layout[dim] *= 2
-                    chunk_size = getChunkSize(layout, typesize)
-                    if chunk_size > chunk_min:
-                        break
-                elif maxdims[dim] > layout[dim]:
-                    # can only be extended so much
-                    layout[dim] *= 2
-                    if layout[dim] >= dims[dim]:
-                        layout[dim] = maxdims[dim]  # trim back
-                        extendable_dims -= 1  # one less extenable dimension
-
-                    chunk_size = getChunkSize(layout, typesize)
-                    if chunk_size > chunk_min:
-                        break
-                    else:
-                        pass  # ignore non-extensible for now
-            else:
-                # no extendable dimensions
-                if dims[dim] > layout[dim]:
-                    # can expand chunk along this dimension
-                    layout[dim] *= 2
-                    if layout[dim] > dims[dim]:
-                        layout[dim] = dims[dim]  # trim back
-                    chunk_size = getChunkSize(layout, typesize)
-                    if chunk_size > chunk_min:
-                        break
-                else:
-                    pass  # can't extend chunk along this dimension
-        if chunk_size <= old_chunk_size:
-            # stop iteration if we haven't increased the chunk size
-            log.debug("stopping expandChunk iteration")
-            break
-        elif chunk_size > chunk_min:
-            break  # we're good
-        else:
-            pass  # do another round
-    return tuple(layout)
-
-
-def shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX, layout_class="H5D_CHUNKED"):
-    """Compute a reduced chunk shape with a size in bytes less than chunk_max."""
-    layout = list(layout)
-    chunk_size = getChunkSize(layout, typesize)
-    if chunk_size <= chunk_max:
-        return tuple(layout)  # good already
-    log.debug(f"shrinkChunk layout: {layout} typesize: {typesize}")
-    rank = len(layout)
-
-    while chunk_size > chunk_max:
-        # just adjust along extendable dimensions first
-        old_chunk_size = chunk_size
-        for dim in range(rank):
-            if layout[dim] > 1:
-                # tricky way to do  x // 2 with ceil
-                layout[dim] = -(-layout[dim] // 2)
-                chunk_size = getChunkSize(layout, typesize)
-                if chunk_size <= chunk_max:
-                    break
-            else:
-                pass  # can't shrink chunk along this dimension
-        if chunk_size >= old_chunk_size:
-            # reality check to see if we'll ever break out of the while loop
-            log.warning("Unexpected error in shrink_chunk")
-            break
-        elif chunk_size <= chunk_max:
-            break  # we're good
-        else:
-            pass  # do another round
-    return tuple(layout)
-
-
-def guessChunk(shape_json, typesize):
-    """Guess an appropriate chunk layout for a dataset, given its shape and
-    the size of each element in bytes.  Will allocate chunks only as large
-    as MAX_SIZE.  Chunks are generally close to some power-of-2 fraction of
-    each axis, slightly favoring bigger values for the last index.
-
-    Undocumented and subject to change without warning.
-    """
-    if shape_json is None or shape_json["class"] == "H5S_NULL":
-        return None
-    if shape_json["class"] == "H5S_SCALAR":
-        return (1,)  # just enough to store one item
-
-    if "maxdims" in shape_json:
-        shape = shape_json["maxdims"]
-    else:
-        shape = shape_json["dims"]
-
-    if typesize == "H5T_VARIABLE":
-        typesize = 128  # just take a guess at the item size
-
-    # For unlimited dimensions we have to guess. use 1024
-    shape = tuple((x if x != 0 else 1024) for i, x in enumerate(shape))
-
-    return shape
-
-
-def getContiguousLayout(
-    shape_json, item_size, chunk_min=1000 * 1000, chunk_max=4 * 1000 * 1000
-):
-    """
-    create a chunklayout for datasets use continguous storage.
-    """
-    if not isinstance(item_size, int):
-        msg = "ContiguousLayout can only be used with fixed-length types"
-        raise ValueError(msg)
-    if chunk_max < chunk_min:
-        raise ValueError("chunk_max cannot be less than chunk_min")
-    if shape_json is None or shape_json["class"] == "H5S_NULL":
-        return None
-    if shape_json["class"] == "H5S_SCALAR":
-        return (1,)  # just enough to store one item
-    dims = shape_json["dims"]
-    rank = len(dims)
-    if rank == 0:
-        raise ValueError("rank must be positive for Contiguous Layout")
-    for dim in dims:
-        if dim < 0:
-            raise ValueError("extents must be positive for Contiguous Layout")
-        if dim == 0:
-            # datashape with no elements, just return dims as layout
-            return dims
-
-    nsize = item_size
-    layout = [
-        1,
-    ] * rank
-
-    for i in range(rank):
-        dim = rank - i - 1
-        extent = dims[dim]
-        if extent * nsize < chunk_max:
-            # just use the full extent as layout
-            layout[dim] = extent
-            nsize *= extent
-        else:
-            n = extent
-            while n > 1:
-                n = -(-n // 2)  # use negatives so we round up on odds
-                if n * nsize < chunk_max:
-                    break
-            layout[dim] = n
-            break  # just use 1's for the rest of the layout
-
-    return layout
-
-
 def frac(x, d):
     """
     Utility func -- Works like fractional div, but returns ceiling
diff --git a/hsds/util/dsetUtil.py b/hsds/util/dsetUtil.py
index 044127f0..b259aae9 100644
--- a/hsds/util/dsetUtil.py
+++ b/hsds/util/dsetUtil.py
@@ -13,7 +13,17 @@
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPInternalServerError
 import math
 
+from h5json.hdf5dtype import getItemSize, isVlen
+from h5json.objid import isValidUuid
+
 from .. import hsds_logger as log
+from .. import config
+
+#from .chunkUtil import getChunkSize, guessChunk, expandChunk, shrinkChunk
+
+CHUNK_MIN = 512 * 1024  # Soft lower limit (512k)
+CHUNK_MAX = 2048 * 1024  # Hard upper limit (2M)
+DEFAULT_TYPE_SIZE = 128  # Type size case when it is variable
 
 """
 Filters that are known to HSDS.
@@ -74,24 +84,27 @@
     "H5D_CONTIGUOUS_REF",
 )
 
-
-# copied from arrayUtil.py
-def isVlen(dt):
+def get_dset_size(shape_json, typesize):
+    """Return the size of the dataspace.  For
+    any unlimited dimensions, assume a value of 1.
+    (so the return size will be the absolute minimum)
     """
-    Return True if the type contains variable length elements
-    """
-    is_vlen = False
-    if len(dt) > 1:
-        names = dt.names
-        for name in names:
-            if isVlen(dt[name]):
-                is_vlen = True
-                break
-    else:
-        if dt.metadata and "vlen" in dt.metadata:
-            is_vlen = True
-    return is_vlen
+    if shape_json is None or shape_json["class"] == "H5S_NULL":
+        return None
+    if shape_json["class"] == "H5S_SCALAR":
+        return typesize  # just return size for one item
+    if typesize == "H5T_VARIABLE":
+        typesize = DEFAULT_TYPE_SIZE  # just take a guess at the item size
+    dset_size = typesize
+    shape = shape_json["dims"]
+    rank = len(shape)
 
+    for n in range(rank):
+        if shape[n] == 0:
+            # extendable extent with value of 0
+            continue  # assume this is one
+        dset_size *= shape[n]
+    return dset_size
 
 def getFilterItem(key):
     """
@@ -107,6 +120,81 @@ def getFilterItem(key):
     return None  # not found
 
 
+def getFiltersJson(create_props, supported_filters=None):
+    """ return standardized filter representation from creation properties 
+        raise bad request if invalid """
+        
+    # refer to https://hdf5-json.readthedocs.io/en/latest/bnf/\
+    # filters.html#grammar-token-filter_list
+
+    if "filters" not in create_props:
+        return {}  # null set
+    
+    f_in = create_props["filters"]
+
+    log.debug(f"filters provided in creation_prop: {f_in}")
+
+    if not isinstance(f_in, list):
+        msg = "Expected filters in creation_props to be a list"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+
+    f_out = []
+    for filter in f_in:
+        if isinstance(filter, int) or isinstance(filter, str):
+            item = getFilterItem(filter)
+            if not item:
+                msg = f"filter {filter} not recognized"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+
+            if item["name"] not in supported_filters:
+                msg = f"filter {filter} is not supported"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+            f_out.append(item)
+        elif isinstance(filter, dict):
+            if "class" not in filter:
+                msg = "expected 'class' key for filter property"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+            if filter["class"] != "H5Z_FILTER_USER":
+                item = getFilterItem(filter["class"])
+            elif "id" in filter:
+                item = getFilterItem(filter["id"])
+            elif "name" in filter:
+                item = getFilterItem(filter["name"])
+            else:
+                item = None
+            if not item:
+                msg = f"filter {filter['class']} not recognized"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+            if "id" not in filter:
+                filter["id"] = item["id"]
+            elif item["id"] != filter["id"]:
+                msg = f"Expected {filter['class']} to have id: "
+                msg += f"{item['id']} but got {filter['id']}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+            if "name" not in filter:
+                filter["name"] = item["name"]
+            if filter["name"] not in supported_filters:
+                msg = f"filter {filter} is not supported"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+
+            f_out.append(filter)
+        else:
+            msg = f"Unexpected type for filter: {filter}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        
+    # return standardized filter representation
+    log.debug(f"using filters: {f_out}")
+    return f_out
+    
+
 def getFilters(dset_json):
     """Return list of filters, or empty list"""
     if "creationProperties" not in dset_json:
@@ -210,27 +298,141 @@ def getFilterOps(app, dset_id, filters, dtype=None, chunk_shape=None):
         return filter_ops
     else:
         return None
+    
+def getShapeJson(body):
+    """ Return normalized json description of data space """
+
+    dims = None
+    maxdims = None
+    shape_class = None
+
+    if "shape" not in body:
+        shape_class = "H5S_SCALAR"
+        log.debug("not shape given - using H5S_SCALAR")
+        return {"class": shape_class}
+    
+    body_shape = body["shape"]    
+    log.debug(f"got shape: {body_shape}")
+
+    if isinstance(body_shape, int):
+        shape_class = "H5S_SIMPLE"
+        dims = [body_shape, ]
+    elif isinstance(body_shape, str):
+        # only valid string value is H5S_NULL or H5S_SCALAR
+        shape_class = body_shape
+    elif isinstance(body_shape, (tuple, list)):
+        if len(body_shape) == 0:
+            shape_class = "H5S_SCALAR"
+        else:
+            shape_class = "H5S_SIMPLE"
+            dims = body_shape
+    else:        
+        msg = "invalid shape: {body_shape}"
+        log.warn(msg)
+        raise ValueError(msg)
+    
+    if shape_class not in ("H5S_NULL", "H5S_SCALAR", "H5S_SIMPLE"):
+        msg = f"invalid shape class: {shape_class}"
+        log.warn(msg)
+        raise ValueError(msg)
 
+    if shape_class in ("H5S_NULL", "H5S_SCALAR") and dims:
+        msg = f"dims not valid for shape class: {body_shape}"
+        log.warn(msg)
+        raise ValueError(msg)
 
-def getDsetRank(dset_json):
-    """Get rank returning 0 for sclar or NULL datashapes"""
-    datashape = dset_json["shape"]
-    if datashape["class"] == "H5S_NULL":
-        return 0
-    if datashape["class"] == "H5S_SCALAR":
+    if dims is None and shape_class == "H5S_SIMPLE":
+        msg = "dims not specified for H5S_SIMPLE shape"
+        log.warn(msg)
+        raise ValueError(msg)
+
+    if dims is not None:
+        rank = len(dims)
+        for i in range(rank):
+            extent = dims[i]
+            if not isinstance(extent, int):
+                msg = f"Invalid shape dims: {dims}"
+                log.warn(msg)
+                raise ValueError(msg)
+            if extent < 0:
+                msg = f"shape dimension is negative for dims: {dims}"
+                log.warn(msg)
+                raise ValueError(msg)
+
+    if "maxdims" in body:
+        maxdims = body["maxdims"]
+    elif isinstance(body_shape, dict) and "maxdims" in body_shape:
+        maxdims = body_shape["maxdims"]
+    else:
+        maxdims = None
+
+    # validate maxdims
+    if maxdims:
+        if dims is None:
+            msg = f"maxdims cannot be supplied for space class: {shape_class}"
+            log.warn(msg)
+            raise ValueError(msg)
+
+        if isinstance(maxdims, int):
+            dim1 = maxdims
+            maxdims = [dim1]
+        elif isinstance(maxdims, list):
+            pass  # can use as is
+        else:
+            msg = f"Bad Request: maxdims is invalid: {maxdims}"
+            log.warn(msg)
+            raise ValueError(msg)
+        if len(dims) != len(maxdims):
+            msg = "max_dims rank doesn't match dims"
+            log.warn(msg)
+            raise ValueError(msg)
+        
+    # return json description of shape
+    shape_json = {"class": shape_class}
+    if shape_class == "H5S_SIMPLE":
+        shape_json["dims"] = dims
+    if maxdims:
+        shape_json["maxdims"] = maxdims
+    log.debug(f"returning shape_json: {shape_json}")
+    return shape_json
+
+def getShapeClass(data_shape):
+    """ Return shape class of the given data shape """
+
+    if not isinstance(data_shape, dict):
+        raise TypeError("expected dict object")
+
+    if "class" not in data_shape:
+        raise KeyError("expected 'class' key for data shape")\
+        
+    return data_shape["class"]
+
+def getRank(data_shape):
+    """ Return rank of given data shape_json """ 
+
+    shape_class = getShapeClass(data_shape)
+
+    if shape_class == "H5S_NULL":
         return 0
-    if "dims" not in datashape:
-        log.warn(f"expected to find dims key in shape_json: {datashape}")
+    elif shape_class == "H5S_SCALAR":
         return 0
-    dims = datashape["dims"]
-    rank = len(dims)
-    return rank
+    elif shape_class == "H5S_SIMPLE":
+        if "dims" not in data_shape:
+            raise KeyError("expected dims key for H5S_SIMPLE data shape")
+        return len(data_shape["dims"])
+    else:
+        raise ValueError(f"unexpected data shape class: {shape_class}")
+
+def getDsetRank(dset_json):
+    """Get rank returning 0 for scalar or NULL data shapes"""
+    data_shape = dset_json["shape"]
+    return getRank(data_shape)
 
 
 def isNullSpace(dset_json):
-    """Return true if this dataset is a null dataspace"""
-    datashape = dset_json["shape"]
-    if datashape["class"] == "H5S_NULL":
+    """Return true if this dataset is a null data space"""
+    shape_class = getShapeClass(dset_json["shape"])
+    if shape_class == "H5S_NULL":
         return True
     else:
         return False
@@ -238,33 +440,567 @@ def isNullSpace(dset_json):
 
 def isScalarSpace(dset_json):
     """ return true if this is a scalar dataset """
-    datashape = dset_json["shape"]
-    is_scalar = False
-    if datashape["class"] == "H5S_NULL":
-        is_scalar = False
-    elif datashape["class"] == "H5S_SCALAR":
-        is_scalar = True
+
+    data_shape = dset_json["shape"]
+    shape_class = getShapeClass(data_shape)
+    if shape_class == "H5S_NULL":
+        return False
+    
+    rank = getRank(data_shape)
+    return True if rank == 0 else False
+
+
+def getContiguousLayout(shape_json, item_size, chunk_min=None, chunk_max=None):
+    """
+    create a chunk layout for datasets use contiguous storage.
+    """
+    if not isinstance(item_size, int):
+        msg = "ContiguousLayout can only be used with fixed-length types"
+        log.warn(msg)
+        raise ValueError(msg)
+    
+    if chunk_min is None:
+        msg = "chunk_min not set"
+        log.warn(msg)
+        raise ValueError(msg)
+    if chunk_max is None:
+        msg = "chunk_max not set"
+        log.warn(msg)
+        raise ValueError(msg)
+
+    if chunk_max < chunk_min:
+        raise ValueError("chunk_max cannot be less than chunk_min")
+    
+    if shape_json is None or shape_json["class"] == "H5S_NULL":
+        return None
+    if shape_json["class"] == "H5S_SCALAR":
+        return (1,)  # just enough to store one item
+    dims = shape_json["dims"]
+    rank = len(dims)
+    if rank == 0:
+        raise ValueError("rank must be positive for Contiguous Layout")
+    for dim in dims:
+        if dim < 0:
+            raise ValueError("extents must be positive for Contiguous Layout")
+        if dim == 0:
+            # data shape with no elements, just return dims as layout
+            return dims
+
+    nsize = item_size
+    layout = [1,] * rank
+
+    for i in range(rank):
+        dim = rank - i - 1
+        extent = dims[dim]
+        if extent * nsize < chunk_max:
+            # just use the full extent as layout
+            layout[dim] = extent
+            nsize *= extent
+        else:
+            n = extent
+            while n > 1:
+                n = -(-n // 2)  # use negatives so we round up on odds
+                if n * nsize < chunk_max:
+                    break
+            layout[dim] = n
+            break  # just use 1's for the rest of the layout
+
+    return layout
+
+def getChunkSize(layout, type_size):
+    """Return chunk size given layout.
+    i.e. just the product of the values in the list.
+    """
+    if type_size == "H5T_VARIABLE":
+        type_size = DEFAULT_TYPE_SIZE
+
+    chunk_size = type_size
+    for n in layout:
+        if n <= 0:
+            raise ValueError("Invalid chunk layout")
+        chunk_size *= n
+    return chunk_size
+
+def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
+    """
+    Use chunk layout given in the creationPropertiesList (if defined and
+    layout is valid).
+    Return chunk_layout_json
+    """
+
+    rank = 0
+    space_dims = None
+    chunk_dims = None
+    max_dims = None
+
+    if "dims" in shape_json:
+        space_dims = shape_json["dims"]
+        rank = len(space_dims)
+
+    if "maxdims" in shape_json:
+        max_dims = shape_json["maxdims"]
+    if "dims" in layout:
+        chunk_dims = layout["dims"]
+
+    if chunk_dims:
+        # validate that the chunk_dims are valid and correlates with the
+        # dataset shape
+        if isinstance(chunk_dims, int):
+            chunk_dims = [
+                chunk_dims,
+            ]  # promote to array
+        if len(chunk_dims) != rank:
+            msg = "Layout rank does not match shape rank"
+            log.warn(msg)
+            raise ValueError(msg)
+        for i in range(rank):
+            dim_extent = space_dims[i]
+            chunk_extent = chunk_dims[i]
+            if not isinstance(chunk_extent, int):
+                msg = "Layout dims must be integer or integer array"
+                log.warn(msg)
+                raise ValueError(msg)
+            if chunk_extent <= 0:
+                msg = "Invalid layout value"
+                log.warn(msg)
+                raise ValueError(msg)
+            if max_dims is None:
+                if chunk_extent > dim_extent:
+                    msg = "Invalid layout value"
+                    log.warn(msg)
+                    raise HTTPBadRequest(reason=msg)
+            elif max_dims[i] != 0:
+                if chunk_extent > max_dims[i]:
+                    msg = "Invalid layout value for extensible dimension"
+                    log.warn(msg)
+                    raise ValueError(msg)
+            else:
+                pass  # allow any positive value for unlimited dimensions
+
+    if "class" not in layout:
+        msg = "class key not found in layout for creation property list"
+        log.warn(msg)
+        raise ValueError(msg)
+
+    layout_class = layout["class"]
+
+    if layout_class == "H5D_CONTIGUOUS_REF":
+        # reference to a dataset in a traditional HDF5 files with
+        # contiguous storage
+        if item_size == "H5T_VARIABLE":
+            # can't be used with variable types...
+            msg = "Datasets with variable types cannot be used with "
+            msg += "reference layouts"
+            log.warn(msg)
+            raise ValueError(msg)
+        if "file_uri" not in layout:
+            # needed for H5D_CONTIGUOUS_REF
+            msg = "'file_uri' key must be provided for "
+            msg += "H5D_CONTIGUOUS_REF layout"
+            log.warn(msg)
+            raise ValueError(msg)
+        if "offset" not in layout:
+            # needed for H5D_CONTIGUOUS_REF
+            msg = "'offset' key must be provided for "
+            msg += "H5D_CONTIGUOUS_REF layout"
+            log.warn(msg)
+            raise ValueError(msg)
+        if "size" not in layout:
+            # needed for H5D_CONTIGUOUS_REF
+            msg = "'size' key must be provided for "
+            msg += "H5D_CONTIGUOUS_REF layout"
+            log.warn(msg)
+            raise ValueError(msg)
+        if "dims" in layout:
+            # used defined chunk layout not allowed for H5D_CONTIGUOUS_REF
+            msg = "'dims' key can not be provided for "
+            msg += "H5D_CONTIGUOUS_REF layout"
+            log.warn(msg)
+            raise ValueError(msg)
+    elif layout_class == "H5D_CHUNKED_REF":
+        # reference to a dataset in a traditional HDF5 files with
+        # chunked storage
+        if item_size == "H5T_VARIABLE":
+            # can't be used with variable types..
+            msg = "Datasets with variable types cannot be used with "
+            msg += "reference layouts"
+            log.warn(msg)
+            raise ValueError(msg)
+        if "file_uri" not in layout:
+            # needed for H5D_CHUNKED_REF
+            msg = "'file_uri' key must be provided for "
+            msg += "H5D_CHUNKED_REF layout"
+            log.warn(msg)
+            raise ValueError(msg)
+        if "dims" not in layout:
+            # needed for H5D_CHUNKED_REF
+            msg = "'dimns' key must be provided for "
+            msg += "H5D_CHUNKED_REF layout"
+            log.warn(msg)
+            raise ValueError(msg)
+        if "chunks" not in layout:
+            msg = "'chunks' key must be provided for "
+            msg += "H5D_CHUNKED_REF layout"
+            log.warn(msg)
+            raise ValueError(msg)
+    elif layout_class == "H5D_CHUNKED_REF_INDIRECT":
+        # reference to a dataset in a traditional HDF5 files with chunked
+        # storage using an auxiliary dataset
+        if item_size == "H5T_VARIABLE":
+            # can't be used with variable types..
+            msg = "Datasets with variable types cannot be used with "
+            msg += "reference layouts"
+            log.warn(msg)
+            raise ValueError(msg)
+        if "dims" not in layout:
+            # needed for H5D_CHUNKED_REF_INDIRECT
+            msg = "'dims' key must be provided for "
+            msg += "H5D_CHUNKED_REF_INDIRECT layout"
+            log.warn(msg)
+            raise ValueError(msg)
+        if "chunk_table" not in layout:
+            msg = "'chunk_table' key must be provided for "
+            msg += "H5D_CHUNKED_REF_INDIRECT layout"
+            log.warn(msg)
+            raise ValueError(msg)
+        chunk_table_id = layout["chunk_table"]
+        if not isValidUuid(chunk_table_id, "Dataset"):
+            msg = f"Invalid chunk table id: {chunk_table_id}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        
+    elif layout_class == "H5D_CHUNKED":
+        if "dims" not in layout:
+            msg = "dims key not found in layout for creation property list"
+            log.warn(msg)
+            raise ValueError(msg)
+        if shape_json["class"] != "H5S_SIMPLE":
+            msg = "Bad Request: chunked layout not valid with shape class: "
+            msg += f"{shape_json['class']}"
+            log.warn(msg)
+            raise ValueError(msg)
+    elif layout_class == "H5D_CONTIGUOUS":
+        if "dims" in layout:
+            msg = "dims key found in layout for creation property list "
+            msg += "for H5D_CONTIGUOUS storage class"
+            log.warn(msg)
+            raise ValueError(msg)
+    elif layout_class == "H5D_COMPACT":
+        if "dims" in layout:
+            msg = "dims key found in layout for creation property list "
+            msg += "for H5D_COMPACT storage class"
+            log.warn(msg)
+            raise ValueError(msg)
     else:
-        if "dims" not in datashape:
-            log.warn(f"expected to find dims key in shape_json: {datashape}")
-            is_scalar = False
+        msg = f"Unexpected layout: {layout_class}"
+        log.warn(msg)
+        raise ValueError(msg)
+    
+def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN, layout_class="H5D_CHUNKED"):
+    """Compute an increased chunk shape with a size in bytes greater than chunk_min."""
+    if shape_json is None or shape_json["class"] == "H5S_NULL":
+        return None
+    if shape_json["class"] == "H5S_SCALAR":
+        return (1,)  # just enough to store one item
+
+    layout = list(layout)
+    log.debug(f"expandChunk layout: {layout} typesize: {typesize}")
+    dims = shape_json["dims"]
+    rank = len(dims)
+    extendable_dims = 0  # number of dimensions that are extenable
+    maxdims = None
+    if "maxdims" in shape_json:
+        maxdims = shape_json["maxdims"]
+        for n in range(rank):
+            if maxdims[n] == 0 or maxdims[n] > dims[n]:
+                extendable_dims += 1
+
+    dset_size = get_dset_size(shape_json, typesize)
+    if dset_size <= chunk_min and extendable_dims == 0:
+        # just use the entire dataspace shape as one big chunk
+        return tuple(dims)
+
+    chunk_size = getChunkSize(layout, typesize)
+    if chunk_size >= chunk_min:
+        return tuple(layout)  # good already
+    while chunk_size < chunk_min:
+        # just adjust along extendable dimensions first
+        old_chunk_size = chunk_size
+        for n in range(rank):
+            dim = rank - n - 1  # start from last dim
+
+            if extendable_dims > 0:
+                if maxdims[dim] == 0:
+                    # infinitely extendable dimensions
+                    layout[dim] *= 2
+                    chunk_size = getChunkSize(layout, typesize)
+                    if chunk_size > chunk_min:
+                        break
+                elif maxdims[dim] > layout[dim]:
+                    # can only be extended so much
+                    layout[dim] *= 2
+                    if layout[dim] >= dims[dim]:
+                        layout[dim] = maxdims[dim]  # trim back
+                        extendable_dims -= 1  # one less extenable dimension
+
+                    chunk_size = getChunkSize(layout, typesize)
+                    if chunk_size > chunk_min:
+                        break
+                    else:
+                        pass  # ignore non-extensible for now
+            else:
+                # no extendable dimensions
+                if dims[dim] > layout[dim]:
+                    # can expand chunk along this dimension
+                    layout[dim] *= 2
+                    if layout[dim] > dims[dim]:
+                        layout[dim] = dims[dim]  # trim back
+                    chunk_size = getChunkSize(layout, typesize)
+                    if chunk_size > chunk_min:
+                        break
+                else:
+                    pass  # can't extend chunk along this dimension
+        if chunk_size <= old_chunk_size:
+            # stop iteration if we haven't increased the chunk size
+            log.debug("stopping expandChunk iteration")
+            break
+        elif chunk_size > chunk_min:
+            break  # we're good
+        else:
+            pass  # do another round
+    return tuple(layout)
+
+
+def shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX, layout_class="H5D_CHUNKED"):
+    """Compute a reduced chunk shape with a size in bytes less than chunk_max."""
+    layout = list(layout)
+    chunk_size = getChunkSize(layout, typesize)
+    if chunk_size <= chunk_max:
+        return tuple(layout)  # good already
+    log.debug(f"shrinkChunk layout: {layout} typesize: {typesize}")
+    rank = len(layout)
+
+    while chunk_size > chunk_max:
+        # just adjust along extendable dimensions first
+        old_chunk_size = chunk_size
+        for dim in range(rank):
+            if layout[dim] > 1:
+                # tricky way to do  x // 2 with ceil
+                layout[dim] = -(-layout[dim] // 2)
+                chunk_size = getChunkSize(layout, typesize)
+                if chunk_size <= chunk_max:
+                    break
+            else:
+                pass  # can't shrink chunk along this dimension
+        if chunk_size >= old_chunk_size:
+            # reality check to see if we'll ever break out of the while loop
+            log.warning("Unexpected error in shrink_chunk")
+            break
+        elif chunk_size <= chunk_max:
+            break  # we're good
         else:
-            dims = datashape["dims"]
-            if len(dims) == 0:
-                # guess this properly be a H5S_SCALAR class
-                # but treat this as equivalent
-                is_scalar = True
-    return is_scalar
+            pass  # do another round
+    return tuple(layout)
 
 
-def getHyperslabSelection(dsetshape, start=None, stop=None, step=None):
+def guessChunk(shape_json, typesize):
+    """Guess an appropriate chunk layout for a dataset, given its shape and
+    the size of each element in bytes.  Will allocate chunks only as large
+    as MAX_SIZE.  Chunks are generally close to some power-of-2 fraction of
+    each axis, slightly favoring bigger values for the last index.
+
+    Undocumented and subject to change without warning.
+    """
+    if shape_json is None or shape_json["class"] == "H5S_NULL":
+        return None
+    if shape_json["class"] == "H5S_SCALAR":
+        return (1,)  # just enough to store one item
+
+    if "maxdims" in shape_json:
+        shape = shape_json["maxdims"]
+    else:
+        shape = shape_json["dims"]
+
+    if typesize == "H5T_VARIABLE":
+        typesize = 128  # just take a guess at the item size
+
+    # For unlimited dimensions we have to guess. use 1024
+    shape = tuple((x if x != 0 else 1024) for i, x in enumerate(shape))
+
+    return shape
+
+
+def getLayoutJson(creation_props, shape=None, type_json=None, chunk_min=None, chunk_max=None):
+    """ Get the layout json given by creation_props.
+        Raise bad request error if invalid """
+    
+    min_chunk_size = int(config.get("min_chunk_size"))
+    max_chunk_size = int(config.get("max_chunk_size"))
+
+    item_size = getItemSize(type_json)
+    if chunk_min is None:
+        chunk_min = 1000 * 1000
+    if chunk_max is None:
+        chunk_max = 4 * 1000 * 1000
+
+    if chunk_min > chunk_max:
+        msg = "chunk_max must be larger than chunk_min"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+
+    layout = None
+    if "layout" in creation_props:
+        layout_props = creation_props["layout"]
+    else:
+        layout_props = None
+        
+    if layout_props:
+        if "class" not in layout_props:
+            msg = "expected class key in layout props"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        layout_class = layout_props["class"]
+        if layout_class == "H5D_CONTIGUOUS":
+            # treat contiguous as chunked
+            layout_class = "H5D_CHUNKED"
+        else:
+            layout_class = layout_props["class"]
+    elif shape["class"] != "H5S_NULL":
+        layout_class = "H5D_CHUNKED"
+    else:
+        layout_class = None
+
+    if layout_class == "H5D_COMPACT":
+        layout = {"class": "H5D_COMPACT"}
+    elif layout_class:
+        # initialize to H5D_CHUNKED
+        layout = {"class": "H5D_CHUNKED"}
+    else:
+        # null space - no layout
+        layout = None
+
+    if layout_props and "dims" in layout_props:
+        chunk_dims = layout_props["dims"]
+    else:
+        chunk_dims = None
+
+    if layout_class == "H5D_CONTIGUOUS_REF":
+        kwargs = {"chunk_min": min_chunk_size, "chunk_max": max_chunk_size}
+        chunk_dims = getContiguousLayout(shape, item_size, **kwargs)
+        layout["dims"] = chunk_dims
+        log.debug(f"autoContiguous layout: {layout}")
+
+    if layout_class == "H5D_CHUNKED" and chunk_dims is None:
+        # do auto-chunking
+        chunk_dims = guessChunk(shape, item_size)
+        log.debug(f"initial autochunk layout: {chunk_dims}")
+
+    if layout_class == "H5D_CHUNKED":
+        chunk_size = getChunkSize(chunk_dims, item_size)
+
+        msg = f"chunk_size: {chunk_size}, min: {min_chunk_size}, "
+        msg += f"max: {max_chunk_size}"
+        log.debug(msg)
+        # adjust the chunk shape if chunk size is too small or too big
+        adjusted_chunk_dims = None
+        if chunk_size < min_chunk_size:
+            msg = f"chunk size: {chunk_size} less than min size: "
+            msg += f"{min_chunk_size}, expanding"
+            log.debug(msg)
+            kwargs = {"chunk_min": min_chunk_size, "layout_class": layout_class}
+            adjusted_chunk_dims = expandChunk(chunk_dims, item_size, shape, **kwargs)
+        elif chunk_size > max_chunk_size:
+            msg = f"chunk size: {chunk_size} greater than max size: "
+            msg += f"{max_chunk_size}, shrinking"
+            log.debug(msg)
+            kwargs = {"chunk_max": max_chunk_size}
+            adjusted_chunk_dims = shrinkChunk(chunk_dims, item_size, **kwargs)
+        if adjusted_chunk_dims:
+            msg = f"requested chunk_dimensions: {chunk_dims} modified "
+            msg += f"dimensions: {adjusted_chunk_dims}"
+            log.debug(msg)
+            layout["dims"] = adjusted_chunk_dims
+        else:
+            layout["dims"] = chunk_dims  # don't need to adjust chunk size
+
+        # set partition_count if needed:
+        max_chunks_per_folder = int(config.get("max_chunks_per_folder"))
+        set_partition = False
+        if max_chunks_per_folder > 0:
+            if "dims" in shape and "dims" in layout:
+                set_partition = True
+
+        if set_partition:
+            chunk_dims = layout["dims"]
+            shape_dims = shape["dims"]
+            if "maxdims" in shape:
+                max_dims = shape["maxdims"]
+            else:
+                max_dims = None
+            num_chunks = 1
+            rank = len(shape_dims)
+            unlimited_count = 0
+            if max_dims:
+                for i in range(rank):
+                    if max_dims[i] == 0:
+                        unlimited_count += 1
+                msg = f"number of unlimited dimensions: {unlimited_count}"
+                log.debug(msg)
+
+            for i in range(rank):
+                max_dim = 1
+                if max_dims:
+                    max_dim = max_dims[i]
+                    if max_dim == 0:
+                        # don't really know what the ultimate extent
+                        # could be, but assume 10^6 for total number of
+                        # elements and square-shaped array...
+                        MAX_ELEMENT_GUESS = 10.0 ** 6
+                        exp = 1 / unlimited_count
+                        max_dim = int(math.pow(MAX_ELEMENT_GUESS, exp))
+                else:
+                    max_dim = shape_dims[i]
+                num_chunks *= math.ceil(max_dim / chunk_dims[i])
+
+            if num_chunks > max_chunks_per_folder:
+                partition_count = math.ceil(num_chunks / max_chunks_per_folder)
+                msg = f"set partition count to: {partition_count}, "
+                msg += f"num_chunks: {num_chunks}"
+                log.info(msg)
+                layout["partition_count"] = partition_count
+            else:
+                msg = "do not need chunk partitions, num_chunks: "
+                msg += f"{num_chunks} max_chunks_per_folder: "
+                msg += f"{max_chunks_per_folder}"
+                log.info(msg)
+
+    if layout_class in ("H5D_CHUNKED_REF", "H5D_CHUNKED_REF_INDIRECT"):
+        chunk_size = getChunkSize(chunk_dims, item_size)
+
+        msg = f"chunk_size: {chunk_size}, min: {min_chunk_size}, "
+        msg += f"max: {max_chunk_size}"
+        log.debug(msg)
+        # nothing to do about inefficiently small chunks, but large chunks
+        # can be subdivided
+        if chunk_size < min_chunk_size:
+            msg = f"chunk size: {chunk_size} less than min size: "
+            msg += f"{min_chunk_size} for {layout_class} dataset"
+            log.warn(msg)
+        elif chunk_size > max_chunk_size:
+            msg = f"chunk size: {chunk_size} greater than max size: "
+            msg += f"{max_chunk_size}, for {layout_class} dataset"
+            log.warn(msg)
+        layout["dims"] = chunk_dims
+
+
+def getHyperslabSelection(dims, start=None, stop=None, step=None):
     """
     Get slices given lists of start, stop, step values
 
     TBD: for step>1, adjust the slice to not extend beyond last
         data point returned
     """
-    rank = len(dsetshape)
+    rank = len(dims)
     if start:
         if not isinstance(start, (list, tuple)):
             start = [start]
@@ -273,7 +1009,7 @@ def getHyperslabSelection(dsetshape, start=None, stop=None, step=None):
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
         for dim in range(rank):
-            if start[dim] < 0 or start[dim] >= dsetshape[dim]:
+            if start[dim] < 0 or start[dim] >= dims[dim]:
                 msg = "Bad Request: start index invalid for dim: " + str(dim)
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
@@ -290,14 +1026,14 @@ def getHyperslabSelection(dsetshape, start=None, stop=None, step=None):
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
         for dim in range(rank):
-            if stop[dim] <= start[dim] or stop[dim] > dsetshape[dim]:
+            if stop[dim] <= start[dim] or stop[dim] > dims[dim]:
                 msg = "Bad Request: stop index invalid for dim: " + str(dim)
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
     else:
         stop = []
         for dim in range(rank):
-            stop.append(dsetshape[dim])
+            stop.append(dims[dim])
 
     if step:
         if not isinstance(step, (list, tuple)):
@@ -307,7 +1043,7 @@ def getHyperslabSelection(dsetshape, start=None, stop=None, step=None):
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
         for dim in range(rank):
-            if step[dim] <= 0 or step[dim] > dsetshape[dim]:
+            if step[dim] <= 0 or step[dim] > dims[dim]:
                 msg = "Bad Request: step index invalid for dim: " + str(dim)
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
@@ -427,11 +1163,11 @@ def isSelectAll(slices, dims):
 
 def getQueryParameter(request, query_name, body=None, default=None):
     """
-    Herlper function, get query parameter value from request.
+    Helper function, get query parameter value from request.
     If body is provided (as a JSON object) look in JSON and if not found
     look for query param.  Return default value (or None) if not found
     """
-    # as a convience, look up different capitilizations of query name
+    # as a convenience, look up different capitalizations of query name
     params = request.rel_url.query
     query_names = []
     query_names.append(query_name.lower())
diff --git a/tests/integ/datatype_test.py b/tests/integ/datatype_test.py
index a3f20fd3..ce7418cb 100755
--- a/tests/integ/datatype_test.py
+++ b/tests/integ/datatype_test.py
@@ -144,7 +144,7 @@ def testPostTypeWithId(self):
         req = self.endpoint + "/datatypes"
         data = {"id": ctype_id}
         rsp = self.session.post(req, data=json.dumps(data), headers=headers)
-        self.assertEqual(rsp.status_code, 401)  # bad request
+        self.assertEqual(rsp.status_code, 400)  # bad request
 
         # create a committed type obj
         data = {"id": ctype_id, "type": "H5T_IEEE_F32LE"}
diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py
index c9e88afb..9ad5c5e6 100755
--- a/tests/integ/value_test.py
+++ b/tests/integ/value_test.py
@@ -928,7 +928,7 @@ def testPutScalarDataset(self):
         rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)
 
-        # read unintialized value from dataset
+        # read uninitialized value from dataset
         req = self.endpoint + "/datasets/" + dset_id + "/value"
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
diff --git a/tests/unit/chunk_util_test.py b/tests/unit/chunk_util_test.py
index 37d1e512..7ae16bd5 100755
--- a/tests/unit/chunk_util_test.py
+++ b/tests/unit/chunk_util_test.py
@@ -23,7 +23,6 @@
     chunkReadPoints,
     chunkWritePoints,
     chunkQuery,
-    guessChunk,
     getNumChunks,
     getChunkIds,
     getChunkId,
@@ -33,11 +32,7 @@
     getChunkSelection,
     getChunkCoverage,
     getDataCoverage,
-    getChunkSize,
-    shrinkChunk,
-    expandChunk,
     getDatasetId,
-    getContiguousLayout,
     _getEvalStr,
     _getWhereFieldName,
     _getWhereElements,
@@ -50,288 +45,8 @@ def __init__(self, *args, **kwargs):
         # main
         logging.getLogger().setLevel(logging.ERROR)
 
-    def testGuessChunk(self):
-
-        typesize = "H5T_VARIABLE"
-        logging.debug("hello")
-
-        shape = {"class": "H5S_NULL"}
-        layout = guessChunk(shape, typesize)
-        self.assertTrue(layout is None)
-
-        shape = {"class": "H5S_SCALAR"}
-        layout = guessChunk(shape, typesize)
-        self.assertEqual(layout, (1,))
-
-        shape = {"class": "H5S_SIMPLE", "dims": [100, 100]}
-        layout = guessChunk(shape, typesize)
-        self.assertTrue(len(layout), 2)
-        for i in range(2):
-            self.assertTrue(layout[i] >= 1)
-            self.assertTrue(layout[i] <= 100)
-
-        typesize = 8
-        layout = guessChunk(shape, typesize)
-        self.assertTrue(len(layout), 2)
-        for i in range(2):
-            self.assertTrue(layout[i] >= 1)
-            self.assertTrue(layout[i] <= 100)
-
-        shape = {"class": "H5S_SIMPLE", "dims": [5]}
-        layout = guessChunk(shape, typesize)
-        self.assertEqual(layout, (5,))
-
-        shape = {"class": "H5S_SIMPLE", "dims": [100, 100, 100]}
-        layout = guessChunk(shape, typesize)
-        print("layout:", layout)
-        self.assertTrue(len(layout), 3)
-        for i in range(3):
-            self.assertTrue(layout[i] >= 1)
-            self.assertTrue(layout[i] <= 100)
-
-        shape = {"class": "H5S_SIMPLE", "dims": [100, 0], "maxdims": [100, 0]}
-        layout = guessChunk(shape, typesize)
-        self.assertTrue(len(layout), 2)
-        for i in range(2):
-            self.assertTrue(layout[i] >= 1)
-            self.assertTrue(layout[i] <= 1024)
-
-        shape = {"class": "H5S_SCALAR"}
-        layout = guessChunk(shape, typesize)
-        self.assertEqual(layout, (1,))
-
-        shape = {"class": "H5S_NULL"}
-        layout = guessChunk(shape, typesize)
-        self.assertEqual(layout, None)
-
-    def testShrinkChunk(self):
-        CHUNK_MIN = 500
-        CHUNK_MAX = 5000
-        typesize = 1
-        layout = (1, 2, 3)
-        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
-        self.assertEqual(shrunk, layout)
-
-        layout = (100, 200, 300)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes > CHUNK_MAX)
-        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
-        rank = len(layout)
-        for i in range(rank):
-            self.assertTrue(shrunk[i] >= 1)
-            self.assertTrue(shrunk[i] <= 1000 * (i + 1))
-        num_bytes = getChunkSize(shrunk, typesize)
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-        layout = (300, 200, 100)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes > CHUNK_MAX)
-        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
-        rank = len(layout)
-        for i in range(rank):
-            self.assertTrue(shrunk[i] >= 1)
-            self.assertTrue(shrunk[i] <= 1000 * (3 - i))
-        num_bytes = getChunkSize(shrunk, typesize)
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-        CHUNK_MIN = 1 * 1024 * 1024
-        CHUNK_MAX = 4 * 1024 * 1024
-        typesize = 4
-        layout = (117, 201, 189, 1)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes > CHUNK_MAX)
-        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
-        self.assertEqual(shrunk, (59, 101, 95, 1))
-        num_bytes = getChunkSize(shrunk, typesize)
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-    def testExpandChunk(self):
-        CHUNK_MIN = 5000
-        CHUNK_MAX = 50000
-
-        typesize = 20
-        shape = {"class": "H5S_SIMPLE", "dims": [12, ], "maxdims": [20, ]}
-        layout = (20,)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, typesize)
-        # chunk layout can't be larger than dataspace
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        self.assertEqual(expanded, (20,))
-
-        typesize = 1
-        shape = {"class": "H5S_SIMPLE", "dims": [10, 10, 10]}
-        layout = (10, 10, 10)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, typesize)
-        # chunk layout can't be larger than dataspace
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        self.assertEqual(expanded, (10, 10, 10))
-
-        shape = {"class": "H5S_SIMPLE", "dims": [1000, 2000, 3000]}
-        layout = (10, 10, 10)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, typesize)
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-        shape = {"class": "H5S_SIMPLE", "dims": [1000,]}
-        layout = (10,)
-        num_bytes = getChunkSize(layout, "H5T_VARIABLE")
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, "H5T_VARIABLE", shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, "H5T_VARIABLE")
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-        shape = {
-            "class": "H5S_SIMPLE",
-            "dims": [1000, 10, 1000],
-            "maxdims": [1000, 100, 1000],
-        }
-        layout = (10, 10, 10)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, typesize)
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-        shape = {
-            "class": "H5S_SIMPLE",
-            "dims": [1000, 0, 1000],
-            "maxdims": [1000, 100, 1000],
-        }
-        layout = (10, 10, 10)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, typesize)
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-        shape = {
-            "class": "H5S_SIMPLE",
-            "dims": [1000, 10, 1000],
-            "maxdims": [1000, 0, 1000],
-        }
-        layout = (10, 10, 10)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, typesize)
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-    def testGetContiguiousLayout(self):
-
-        typesize = 4
-        chunk_min = 400
-        chunk_max = 800
-
-        def get_num_bytes(dims):
-            num_bytes = typesize
-            for n in dims:
-                num_bytes *= n
-            return num_bytes
-
-        try:
-            shape = {"class": "H5S_SIMPLE", "dims": [100, 100]}
-            layout = getContiguousLayout(shape, "H5T_VARIABLE")
-            self.assertTrue(False)
-        except ValueError:
-            pass  # expected
-
-        shape = {"class": "H5S_NULL"}
-        layout = getContiguousLayout(shape, typesize)
-        self.assertTrue(layout is None)
-
-        shape = {"class": "H5S_SCALAR"}
-        layout = getContiguousLayout(shape, typesize)
-        self.assertEqual(layout, (1,))
-
-        for extent in (1, 100, 10000):
-            dims = [
-                extent,
-            ]
-            shape = {"class": "H5S_SIMPLE", "dims": dims}
-            layout = getContiguousLayout(
-                shape, typesize, chunk_min=chunk_min, chunk_max=chunk_max
-            )
-            self.assertTrue(len(layout), 1)
-            chunk_bytes = get_num_bytes(layout)
-            space_bytes = get_num_bytes(dims)
-            if space_bytes > chunk_min:
-                self.assertTrue(chunk_bytes >= chunk_min)
-
-            self.assertTrue(chunk_bytes <= chunk_max)
-
-        for extent in (1, 9, 90):
-            dims = [extent, extent]
-            shape = {"class": "H5S_SIMPLE", "dims": dims}
-            layout = getContiguousLayout(
-                shape, typesize, chunk_min=chunk_min, chunk_max=chunk_max
-            )
-            self.assertTrue(len(layout), 2)
-            for i in range(2):
-                self.assertTrue(layout[i] >= 1)
-                self.assertTrue(layout[i] <= extent)
-            self.assertEqual(layout[1], extent)
-            chunk_bytes = get_num_bytes(layout)
-            space_bytes = get_num_bytes(dims)
-
-            if space_bytes > chunk_min:
-                self.assertTrue(chunk_bytes >= chunk_min)
-            self.assertTrue(chunk_bytes <= chunk_max)
-
-        for extent in (1, 10, 100):
-            dims = [extent, extent, 50]
-            shape = {"class": "H5S_SIMPLE", "dims": dims}
-            layout = getContiguousLayout(
-                shape, typesize, chunk_min=chunk_min, chunk_max=chunk_max
-            )
-            self.assertTrue(len(layout), 3)
-            for i in range(3):
-                self.assertTrue(layout[i] >= 1)
-                self.assertTrue(layout[i] <= dims[i])
-
-            chunk_bytes = get_num_bytes(layout)
-            space_bytes = get_num_bytes(dims)
-
-            if space_bytes > chunk_min:
-                self.assertTrue(chunk_bytes >= chunk_min)
-            self.assertTrue(chunk_bytes <= chunk_max)
-
-        for extent in (1, 100, 1000):
-            dims = [extent, 4]
-            shape = {"class": "H5S_SIMPLE", "dims": dims}
-            layout = getContiguousLayout(
-                shape, typesize, chunk_min=chunk_min, chunk_max=chunk_max
-            )
-            self.assertTrue(len(layout), 2)
-            for i in range(2):
-                self.assertTrue(layout[i] >= 1)
-                self.assertTrue(layout[i] <= dims[i])
-
-            chunk_bytes = get_num_bytes(layout)
-            space_bytes = get_num_bytes(dims)
-
-            if space_bytes > chunk_min:
-                self.assertTrue(chunk_bytes >= chunk_min)
-            self.assertTrue(chunk_bytes <= chunk_max)
-
     def testGetNumChunks(self):
-        datashape = [
-            100,
-        ]
+        datashape = [100,]
         layout = (10,)
         selection = getHyperslabSelection(datashape)
         count = getNumChunks(selection, layout)
diff --git a/tests/unit/dset_util_test.py b/tests/unit/dset_util_test.py
index 0e77ab1b..7c2028b9 100755
--- a/tests/unit/dset_util_test.py
+++ b/tests/unit/dset_util_test.py
@@ -15,7 +15,8 @@
 
 sys.path.append("../..")
 from hsds.util.dsetUtil import getHyperslabSelection, getSelectionShape
-from hsds.util.dsetUtil import getSelectionList, ItemIterator, getSelectionPagination
+from hsds.util.dsetUtil import getSelectionList, ItemIterator, getSelectionPagination, expandChunk
+from hsds.util.dsetUtil import guessChunk, shrinkChunk, getChunkSize, getContiguousLayout
 
 
 class DsetUtilTest(unittest.TestCase):
@@ -25,6 +26,277 @@ def __init__(self, *args, **kwargs):
         self.logger = logging.getLogger()
         self.logger.setLevel(logging.WARNING)
 
+    def testGuessChunk(self):
+
+        typesize = "H5T_VARIABLE"
+        logging.debug("hello")
+
+        shape = {"class": "H5S_NULL"}
+        layout = guessChunk(shape, typesize)
+        self.assertTrue(layout is None)
+
+        shape = {"class": "H5S_SCALAR"}
+        layout = guessChunk(shape, typesize)
+        self.assertEqual(layout, (1,))
+
+        shape = {"class": "H5S_SIMPLE", "dims": [100, 100]}
+        layout = guessChunk(shape, typesize)
+        self.assertTrue(len(layout), 2)
+        for i in range(2):
+            self.assertTrue(layout[i] >= 1)
+            self.assertTrue(layout[i] <= 100)
+
+        typesize = 8
+        layout = guessChunk(shape, typesize)
+        self.assertTrue(len(layout), 2)
+        for i in range(2):
+            self.assertTrue(layout[i] >= 1)
+            self.assertTrue(layout[i] <= 100)
+
+        shape = {"class": "H5S_SIMPLE", "dims": [5]}
+        layout = guessChunk(shape, typesize)
+        self.assertEqual(layout, (5,))
+
+        shape = {"class": "H5S_SIMPLE", "dims": [100, 100, 100]}
+        layout = guessChunk(shape, typesize)
+        print("layout:", layout)
+        self.assertTrue(len(layout), 3)
+        for i in range(3):
+            self.assertTrue(layout[i] >= 1)
+            self.assertTrue(layout[i] <= 100)
+
+        shape = {"class": "H5S_SIMPLE", "dims": [100, 0], "maxdims": [100, 0]}
+        layout = guessChunk(shape, typesize)
+        self.assertTrue(len(layout), 2)
+        for i in range(2):
+            self.assertTrue(layout[i] >= 1)
+            self.assertTrue(layout[i] <= 1024)
+
+        shape = {"class": "H5S_SCALAR"}
+        layout = guessChunk(shape, typesize)
+        self.assertEqual(layout, (1,))
+
+        shape = {"class": "H5S_NULL"}
+        layout = guessChunk(shape, typesize)
+        self.assertEqual(layout, None)
+
+    def testShrinkChunk(self):
+        CHUNK_MIN = 500
+        CHUNK_MAX = 5000
+        typesize = 1
+        layout = (1, 2, 3)
+        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
+        self.assertEqual(shrunk, layout)
+
+        layout = (100, 200, 300)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes > CHUNK_MAX)
+        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
+        rank = len(layout)
+        for i in range(rank):
+            self.assertTrue(shrunk[i] >= 1)
+            self.assertTrue(shrunk[i] <= 1000 * (i + 1))
+        num_bytes = getChunkSize(shrunk, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+        layout = (300, 200, 100)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes > CHUNK_MAX)
+        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
+        rank = len(layout)
+        for i in range(rank):
+            self.assertTrue(shrunk[i] >= 1)
+            self.assertTrue(shrunk[i] <= 1000 * (3 - i))
+        num_bytes = getChunkSize(shrunk, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+        CHUNK_MIN = 1 * 1024 * 1024
+        CHUNK_MAX = 4 * 1024 * 1024
+        typesize = 4
+        layout = (117, 201, 189, 1)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes > CHUNK_MAX)
+        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
+        self.assertEqual(shrunk, (59, 101, 95, 1))
+        num_bytes = getChunkSize(shrunk, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+    def testExpandChunk(self):
+        CHUNK_MIN = 5000
+        CHUNK_MAX = 50000
+
+        typesize = 20
+        shape = {"class": "H5S_SIMPLE", "dims": [12, ], "maxdims": [20, ]}
+        layout = (20,)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, typesize)
+        # chunk layout can't be larger than dataspace
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        self.assertEqual(expanded, (20,))
+
+        typesize = 1
+        shape = {"class": "H5S_SIMPLE", "dims": [10, 10, 10]}
+        layout = (10, 10, 10)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, typesize)
+        # chunk layout can't be larger than dataspace
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        self.assertEqual(expanded, (10, 10, 10))
+
+        shape = {"class": "H5S_SIMPLE", "dims": [1000, 2000, 3000]}
+        layout = (10, 10, 10)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+        shape = {"class": "H5S_SIMPLE", "dims": [1000,]}
+        layout = (10,)
+        num_bytes = getChunkSize(layout, "H5T_VARIABLE")
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, "H5T_VARIABLE", shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, "H5T_VARIABLE")
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+        shape = {
+            "class": "H5S_SIMPLE",
+            "dims": [1000, 10, 1000],
+            "maxdims": [1000, 100, 1000],
+        }
+        layout = (10, 10, 10)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+        shape = {
+            "class": "H5S_SIMPLE",
+            "dims": [1000, 0, 1000],
+            "maxdims": [1000, 100, 1000],
+        }
+        layout = (10, 10, 10)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+        shape = {
+            "class": "H5S_SIMPLE",
+            "dims": [1000, 10, 1000],
+            "maxdims": [1000, 0, 1000],
+        }
+        layout = (10, 10, 10)
+        num_bytes = getChunkSize(layout, typesize)
+        self.assertTrue(num_bytes < CHUNK_MIN)
+        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
+        num_bytes = getChunkSize(expanded, typesize)
+        self.assertTrue(num_bytes > CHUNK_MIN)
+        self.assertTrue(num_bytes < CHUNK_MAX)
+
+    def testGetContiguousLayout(self):
+        typesize = 4
+        chunk_min = 400
+        chunk_max = 800
+
+        kwargs = {"chunk_min": chunk_min, "chunk_max": chunk_max}
+
+        def get_num_bytes(dims):
+            num_bytes = typesize
+            for n in dims:
+                num_bytes *= n
+            return num_bytes
+
+        try:
+            shape = {"class": "H5S_SIMPLE", "dims": [100, 100]}
+            layout = getContiguousLayout(shape, "H5T_VARIABLE", **kwargs)
+            self.assertTrue(False)
+        except ValueError:
+            pass  # expected
+
+        shape = {"class": "H5S_NULL"}
+        layout = getContiguousLayout(shape, typesize, **kwargs)
+        self.assertTrue(layout is None)
+
+        shape = {"class": "H5S_SCALAR"}
+        layout = getContiguousLayout(shape, typesize, **kwargs)
+        self.assertEqual(layout, (1,))
+
+        for extent in (1, 100, 10000):
+            dims = [
+                extent,
+            ]
+            shape = {"class": "H5S_SIMPLE", "dims": dims}
+            layout = getContiguousLayout(shape, typesize, **kwargs)
+            self.assertTrue(len(layout), 1)
+            chunk_bytes = get_num_bytes(layout)
+            space_bytes = get_num_bytes(dims)
+            if space_bytes > chunk_min:
+                self.assertTrue(chunk_bytes >= chunk_min)
+
+            self.assertTrue(chunk_bytes <= chunk_max)
+
+        for extent in (1, 9, 90):
+            dims = [extent, extent]
+            shape = {"class": "H5S_SIMPLE", "dims": dims}
+            layout = getContiguousLayout(shape, typesize, **kwargs)
+            self.assertTrue(len(layout), 2)
+            for i in range(2):
+                self.assertTrue(layout[i] >= 1)
+                self.assertTrue(layout[i] <= extent)
+            self.assertEqual(layout[1], extent)
+            chunk_bytes = get_num_bytes(layout)
+            space_bytes = get_num_bytes(dims)
+
+            if space_bytes > chunk_min:
+                self.assertTrue(chunk_bytes >= chunk_min)
+            self.assertTrue(chunk_bytes <= chunk_max)
+
+        for extent in (1, 10, 100):
+            dims = [extent, extent, 50]
+            shape = {"class": "H5S_SIMPLE", "dims": dims}
+            layout = getContiguousLayout(shape, typesize, **kwargs)
+            self.assertTrue(len(layout), 3)
+            for i in range(3):
+                self.assertTrue(layout[i] >= 1)
+                self.assertTrue(layout[i] <= dims[i])
+
+            chunk_bytes = get_num_bytes(layout)
+            space_bytes = get_num_bytes(dims)
+
+            if space_bytes > chunk_min:
+                self.assertTrue(chunk_bytes >= chunk_min)
+            self.assertTrue(chunk_bytes <= chunk_max)
+
+        for extent in (1, 100, 1000):
+            dims = [extent, 4]
+            shape = {"class": "H5S_SIMPLE", "dims": dims}
+            layout = getContiguousLayout(shape, typesize, **kwargs)
+            self.assertTrue(len(layout), 2)
+            for i in range(2):
+                self.assertTrue(layout[i] >= 1)
+                self.assertTrue(layout[i] <= dims[i])
+
+            chunk_bytes = get_num_bytes(layout)
+            space_bytes = get_num_bytes(dims)
+
+            if space_bytes > chunk_min:
+                self.assertTrue(chunk_bytes >= chunk_min)
+            self.assertTrue(chunk_bytes <= chunk_max)
+
     def testGetHyperslabSelection(self):
         # getHyperslabSelection(dsetshape, start, stop, step)
         # 1-D case

From 52f42f3898cab1916cdf816f4926f281ef0d9d17 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 21 May 2025 12:51:10 +0200
Subject: [PATCH 27/49] refacotr post dataset args to service_lib.py

---
 hsds/chunk_sn.py             |   1 +
 hsds/ctype_sn.py             |   2 +-
 hsds/dset_dn.py              |   2 +
 hsds/dset_sn.py              | 152 +++++++++++++++++++++--------------
 hsds/post_crawl.py           |   1 +
 hsds/servicenode_lib.py      |  93 +++++----------------
 tests/integ/dataset_test.py  |  29 ++++---
 tests/integ/datatype_test.py |   1 +
 8 files changed, 137 insertions(+), 144 deletions(-)

diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index 87f2fdb4..a039a911 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -515,6 +515,7 @@ async def PUT_Value(request):
     # get state for dataset from DN - will need this to validate
     # some of the query parameters
     dset_json = await getDsetJson(app, dset_id, bucket=bucket)
+    log.debug(f"got dset_json: {dset_json}")
 
     datashape = dset_json["shape"]
     if isNullSpace(dset_json):
diff --git a/hsds/ctype_sn.py b/hsds/ctype_sn.py
index b2a3d260..83f581a6 100755
--- a/hsds/ctype_sn.py
+++ b/hsds/ctype_sn.py
@@ -210,7 +210,7 @@ async def POST_Datatype(request):
                     log.warn(msg)
                     raise HTTPBadRequest(reason=msg)
                 kwargs = getCreateArgs(item, root_id=root_id, bucket=bucket)
-                kwargs["ignore_link"] = True
+                kwargs["ignore_link"] = True  # will create parent links later
                 kwarg_list.append(kwargs)
             kwargs = {"bucket": bucket, "root_id": root_id}
             log.debug(f"createDatatypeObjects, items: {kwarg_list}")
diff --git a/hsds/dset_dn.py b/hsds/dset_dn.py
index bca36457..5b28f69c 100755
--- a/hsds/dset_dn.py
+++ b/hsds/dset_dn.py
@@ -170,6 +170,8 @@ async def POST_Dataset(request):
     resp_json["shape"] = shape_json
     resp_json["lastModified"] = dset_json["lastModified"]
     resp_json["attributeCount"] = len(attrs)
+    if layout is not None:
+        resp_json["layout"] = layout
 
     resp = json_response(resp_json, status=201)
     log.response(request, resp=resp)
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 75110bb7..a60f87fd 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -15,15 +15,15 @@
 #
 
 from json import JSONDecodeError
-from aiohttp.web_exceptions import HTTPBadRequest, HTTPNotFound
+from aiohttp.web_exceptions import HTTPBadRequest, HTTPNotFound, HTTPInternalServerError
 
-#from h5json.hdf5dtype import createDataType
-from h5json.array_util import getNumElements #, jsonToArray
+from h5json.hdf5dtype import createDataType
+from h5json.array_util import getNumElements, jsonToArray
 from h5json.objid import isValidUuid, isSchema2Id
 
 from .util.httpUtil import getHref, respJsonAssemble
 from .util.httpUtil import jsonResponse, getBooleanParam
-from .util.dsetUtil import getPreviewQuery# , getShapeDims, validateChunkLayout
+from .util.dsetUtil import getPreviewQuery, getShapeDims
 from .util.authUtil import getUserPasswordFromRequest, aclCheck
 from .util.authUtil import validateUserPassword
 from .util.domainUtil import getDomainFromRequest, getPathForDomain, isValidDomain
@@ -31,7 +31,7 @@
 from .servicenode_lib import getDomainJson, getObjectJson, getDsetJson, getPathForObjectId
 from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo
 from .servicenode_lib import getDatasetCreateArgs, createDataset, deleteObject
-from .dset_lib import updateShape, deleteAllChunks #, doHyperslabWrite
+from .dset_lib import updateShape, deleteAllChunks, doHyperslabWrite
 from .post_crawl import createDatasets
 from .domain_crawl import DomainCrawler
 from . import hsds_logger as log
@@ -497,12 +497,23 @@ async def POST_Dataset(request):
     post_rsp = None
 
     datatype_json = None
+    init_values = []    # value initializer for each object
+
+    def _updateInitValuesList(kwargs):
+        # remove value key from kwargs and append
+        # to init_values list
+        if "value" in kwargs:
+            init_values.append(kwargs["value"])
+            del kwargs["value"]
+        else:
+            # add a placeholder
+            init_values.append(None)
 
     #
     # handle case of committed type input
     #
     if isinstance(body, dict) and "type" in body:
-         
+
         body_type = body["type"]
         log.debug(f"got datatype: {body_type}")
         if isinstance(body_type, str) and body_type.startswith("t-"):
@@ -532,10 +543,11 @@ async def POST_Dataset(request):
         elif count == 1:
             # just create one object in typical way
             kwargs = getDatasetCreateArgs(body[0],
-                                   root_id=root_id,
-                                   type=datatype_json,
-                                   bucket=bucket,
-                                   implicit=implicit)
+                                          root_id=root_id,
+                                          type=datatype_json,
+                                          bucket=bucket,
+                                          implicit=implicit)
+            _updateInitValuesList(kwargs)
         else:
             # create multiple dataset objects
             kwarg_list = []  # list of kwargs for each object
@@ -546,7 +558,11 @@ async def POST_Dataset(request):
                     msg = f"Post_Dataset - invalid item type: {type(item)}"
                     log.warn(msg)
                     raise HTTPBadRequest(reason=msg)
-                kwargs = getDatasetCreateArgs(item, root_id=root_id, type=datatype_json, bucket=bucket)
+                kwargs = getDatasetCreateArgs(item,
+                                              root_id=root_id,
+                                              type=datatype_json,
+                                              bucket=bucket)
+                _updateInitValuesList(kwargs)
                 kwargs["ignore_link"] = True
                 kwarg_list.append(kwargs)
             kwargs = {"bucket": bucket, "root_id": root_id}
@@ -556,7 +572,12 @@ async def POST_Dataset(request):
             post_rsp = await createDatasets(app, kwarg_list, **kwargs)
     else:
         # single object create
-        kwargs = getDatasetCreateArgs(body, root_id=root_id, type=datatype_json, bucket=bucket, implicit=implicit)
+        kwargs = getDatasetCreateArgs(body,
+                                      root_id=root_id,
+                                      type=datatype_json,
+                                      bucket=bucket,
+                                      implicit=implicit)
+        _updateInitValuesList(kwargs)
         log.debug(f"kwargs for dataset create: {kwargs}")
 
     if post_rsp is None:
@@ -569,52 +590,33 @@ async def POST_Dataset(request):
         # add any links in multi request
         objects = post_rsp["objects"]
         obj_count = len(objects)
-        log.debug(f"Post datatype multi create: {obj_count} objects")
+        log.debug(f"Post dataset multi create: {obj_count} objects")
         if len(body) != obj_count:
             msg = f"Expected {obj_count} objects but got {len(body)}"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        parent_ids = {}
-        for index in range(obj_count):
-            item = body[index]
-            if "link" in item:
-                link_item = item["link"]
-                parent_id = link_item.get("id")
-                title = link_item.get("name")
-                if parent_id and title:
-                    # add a hard link
-                    object = objects[index]
-                    obj_id = object["id"]
-                    if parent_id not in parent_ids:
-                        parent_ids[parent_id] = {}
-                    links = parent_ids[parent_id]
-                    links[title] = {"id": obj_id}
-        if parent_ids:
-            log.debug(f"POST dataset multi - adding links: {parent_ids}")
-            kwargs = {"action": "put_link", "bucket": bucket}
-            kwargs["replace"] = True
-
-            crawler = DomainCrawler(app, parent_ids, **kwargs)
-
-            # will raise exception on not found, server busy, etc.
-            await crawler.crawl()
-
-            status = crawler.get_status()
-
-            log.info(f"DomainCrawler done for put_links action, status: {status}")
-    """ 
-    if "value" in body and body["value"]:
-        # data to initialize dataset included in request
-        input_data = body["value"]
-        msg = "input data doesn't match request type and shape"
+    else:
+        obj_count = 1  # single object create
+        objects = [post_rsp, ]  # treat as an array to make the following code more consistent
+
+    if len(init_values) != obj_count:
+        msg = f"Expected {obj_count} init values"
+        log.error(msg)
+        raise HTTPInternalServerError()
+
+    # write any init data values
+    for index in range(obj_count):
+        init_data = init_values[index]
+        if init_data is None:
+            continue
+        dset_json = objects[index]
+        log.debug(f"init value, post_rsp: {dset_json}")
+        shape_json = dset_json["shape"]
+        type_json = dset_json["type"]
+        arr_dtype = createDataType(type_json)
         dims = getShapeDims(shape_json)
-        if not dims:
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        arr_dtype = createDataType(datatype)
-
         try:
-            input_arr = jsonToArray(dims, arr_dtype, input_data)
+            input_arr = jsonToArray(dims, arr_dtype, init_data)
         except ValueError:
             log.warn(f"ValueError: {msg}")
             raise HTTPBadRequest(reason=msg)
@@ -625,14 +627,9 @@ async def POST_Dataset(request):
             log.warn(f"IndexError: {msg}")
             raise HTTPBadRequest(reason=msg)
         log.debug(f"got json arr: {input_arr.shape}")
-    else:
-        input_arr = None
 
-    # write data if provided
-    if input_arr is not None:
+        # write data if provided
         log.debug(f"write input_arr: {input_arr}")
-        # mixin the layout
-        dset_json["layout"] = layout
         # make selection for entire dataspace
         dims = getShapeDims(shape_json)
         slices = []
@@ -648,7 +645,44 @@ async def POST_Dataset(request):
         kwargs["data"] = input_arr
         # do write
         await doHyperslabWrite(app, request, **kwargs)
-    """
+
+    if "objects" in post_rsp:
+        # add any links in multi request
+        objects = post_rsp["objects"]
+        obj_count = len(objects)
+        log.debug(f"Post datatype multi create: {obj_count} objects")
+        if len(body) != obj_count:
+            msg = f"Expected {obj_count} objects but got {len(body)}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        parent_ids = {}
+        for index in range(obj_count):
+            item = body[index]
+            if "link" in item:
+                link_item = item["link"]
+                parent_id = link_item.get("id")
+                title = link_item.get("name")
+                if parent_id and title:
+                    # add a hard link
+                    object = objects[index]
+                    obj_id = object["id"]
+                    if parent_id not in parent_ids:
+                        parent_ids[parent_id] = {}
+                    links = parent_ids[parent_id]
+                    links[title] = {"id": obj_id}
+        if parent_ids:
+            log.debug(f"POST dataset multi - adding links: {parent_ids}")
+            kwargs = {"action": "put_link", "bucket": bucket}
+            kwargs["replace"] = True
+
+            crawler = DomainCrawler(app, parent_ids, **kwargs)
+
+            # will raise exception on not found, server busy, etc.
+            await crawler.crawl()
+            status = crawler.get_status()
+
+            log.info(f"DomainCrawler done for put_links action, status: {status}")
+
     # dataset creation successful
     resp = await jsonResponse(request, post_rsp, status=201)
     log.response(request, resp=resp)
diff --git a/hsds/post_crawl.py b/hsds/post_crawl.py
index 198b1492..2c79b47e 100644
--- a/hsds/post_crawl.py
+++ b/hsds/post_crawl.py
@@ -259,6 +259,7 @@ async def createDatatypeObjs(app, items: list, root_id=None, bucket=None):
     rsp_json = await _createObjects(app, items=items, root_id=root_id, bucket=bucket)
     return rsp_json
 
+
 async def createDatasets(app, items: list, root_id=None, bucket=None):
     """ create dataset objects based on parameters in items list """
 
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index bc089420..cbc71b0a 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -1300,7 +1300,7 @@ def validateDatasetCreationProps(creation_props, type_json=None, shape=None):
             msg = "shape and type must be set to use fillValue"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        
+
         # validate fill value compatible with type
         dt = createDataType(type_json)
         fill_value = creation_props["fillValue"]
@@ -1327,14 +1327,14 @@ def validateDatasetCreationProps(creation_props, type_json=None, shape=None):
                 msg = f"invalid fill value: {fill_value}"
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
-            
+
     if "filters" in creation_props:
         if not type_json or not shape:
             msg = "shape and type must be set to use filters"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        
-        supported_filters = getSupportedFilters() 
+
+        supported_filters = getSupportedFilters()
         # will raise bad request exception if not valid
         supported_filters = getSupportedFilters(include_compressors=True)
         log.debug(f"supported_filters: {supported_filters}")
@@ -1343,6 +1343,7 @@ def validateDatasetCreationProps(creation_props, type_json=None, shape=None):
         log.debug(f"setting filters to: {filters_out}")
         creation_props["filters"] = filters_out
 
+
 def getCreateArgs(body,
                   root_id=None,
                   bucket=None,
@@ -1487,13 +1488,14 @@ def getCreateArgs(body,
         pass  # no type
     return kwargs
 
+
 def getDatasetCreateArgs(body,
-                  root_id=None,
-                  bucket=None,
-                  type=None,
-                  implicit=False,
-                  chunk_table=None,
-                  ignore_link=False):
+                         root_id=None,
+                         bucket=None,
+                         type=None,
+                         implicit=False,
+                         chunk_table=None,
+                         ignore_link=False):
 
     """ get args for createDataset from request body """
 
@@ -1504,12 +1506,12 @@ def getDatasetCreateArgs(body,
                            type=type,
                            implicit=implicit,
                            ignore_link=ignore_link)
-    
-    if not "type" in kwargs:
+
+    if "type" not in kwargs:
         msg = "no type specified for create dataset"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    
+
     type_json = kwargs["type"]
     #
     # Validate shape if present
@@ -1518,7 +1520,7 @@ def getDatasetCreateArgs(body,
     # will return scalar shape if no shape key in body
     shape_json = getShapeJson(body)
     kwargs["shape"] = shape_json
-       
+
     # get layout for dataset creation
     log.debug("getting dataset creation settings")
     layout_props = None
@@ -1584,7 +1586,7 @@ def getDatasetCreateArgs(body,
         msg = f"chunk_size: {chunk_size}, min: {min_chunk_size}, "
         msg += f"max: {max_chunk_size}"
         log.debug(msg)
-        
+
         # adjust the chunk shape if chunk size is too small or too big
         adjusted_chunk_dims = None
         if chunk_size < min_chunk_size:
@@ -1599,7 +1601,7 @@ def getDatasetCreateArgs(body,
             log.debug(msg)
             opts = {"chunk_max": max_chunk_size}
             adjusted_chunk_dims = shrinkChunk(chunk_dims, item_size, **opts)
-        
+
         if adjusted_chunk_dims:
             msg = f"requested chunk_dimensions: {chunk_dims} modified "
             msg += f"dimensions: {adjusted_chunk_dims}"
@@ -1676,12 +1678,12 @@ def getDatasetCreateArgs(body,
             msg += f"{max_chunk_size}, for {layout_class} dataset"
             log.warn(msg)
         layout["dims"] = chunk_dims
-        
+
     if layout:
         log.debug(f"setting layout to: {layout}")
         kwargs["layout"] = layout
 
-    # 
+    #
     # get input data if present
     #
     if "value" in body and body["value"]:
@@ -1690,7 +1692,7 @@ def getDatasetCreateArgs(body,
             msg = "null shape datasets can not have initial values"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        
+
         input_data = body["value"]
         msg = "input data doesn't match request type and shape"
         dims = getShapeDims(shape_json)
@@ -1712,7 +1714,7 @@ def getDatasetCreateArgs(body,
             raise HTTPBadRequest(reason=msg)
         log.debug(f"got json arr: {input_arr.shape}")
         kwargs["value"] = input_data
-     
+
     return kwargs
 
 
@@ -1969,7 +1971,6 @@ async def createDataset(app,
                         obj_id=None,
                         creation_props=None,
                         layout=None,
-                        value=None,
                         attrs=None,
                         links=None,
                         implicit=None,
@@ -2001,54 +2002,4 @@ async def createDataset(app,
     kwargs["bucket"] = bucket
     dset_json = await createObject(app, **kwargs)
 
-    if value:
-        log.debug(f"tbd - set dataset value to: {value}")
-        shape_json = kwargs["shape"]
-        type_json = kwargs["type"]
-        # data to initialize dataset included in request
-        msg = "input data doesn't match request type and shape"
-        dims = getShapeDims(shape_json)
-        if not dims:
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        arr_dtype = createDataType(type_json)
-
-        try:
-            input_arr = jsonToArray(dims, arr_dtype, value)
-        except ValueError:
-            log.warn(f"ValueError: {msg}")
-            raise HTTPBadRequest(reason=msg)
-        except TypeError:
-            log.warn(f"TypeError: {msg}")
-            raise HTTPBadRequest(reason=msg)
-        except IndexError:
-            log.warn(f"IndexError: {msg}")
-            raise HTTPBadRequest(reason=msg)
-        log.debug(f"got json arr: {input_arr.shape}")
-    else:
-        input_arr = None
-
-    # write data if provided
-    if input_arr is not None:
-        log.debug(f"write input_arr: {input_arr}")
-        # mixin the layout
-        dset_json["layout"] = layout
-        # make selection for entire dataspace
-        dims = getShapeDims(shape_json)
-        slices = []
-        for dim in dims:
-            s = slice(0, dim, 1)
-            slices.append(s)
-        # make a one page list to handle the write in one chunk crawler run
-        # (larger write request should user binary streaming)
-        kwargs = {"page_number": 0, "page": slices}
-        kwargs["dset_json"] = dset_json
-        kwargs["bucket"] = bucket
-        kwargs["select_dtype"] = input_arr.dtype
-        kwargs["data"] = input_arr
-        log.debug(f"kwargs for hyperslab write: {kwargs}")
-        # do write
-        #request = None  # don't need in this case since not reading from input stream
-        #await doHyperslabWrite(app, request, **kwargs)
-
     return dset_json
diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py
index 3d4610a4..12fe3c1f 100755
--- a/tests/integ/dataset_test.py
+++ b/tests/integ/dataset_test.py
@@ -64,31 +64,34 @@ def testScalarDataset(self):
         rsp = self.session.post(req, data=json.dumps(data), headers=headers)
         self.assertEqual(rsp.status_code, 201)
         rspJson = json.loads(rsp.text)
-        self.assertEqual(rspJson["attributeCount"], 0)
-        dset_id = rspJson["id"]
-        self.assertTrue(helper.validateId(dset_id))
-
-        # read back the obj
-        req = self.endpoint + "/datasets/" + dset_id
-        rsp = self.session.get(req, headers=headers)
-        self.assertEqual(rsp.status_code, 200)
-        rspJson = json.loads(rsp.text)
 
         expected_keys = [
             "id",
             "shape",
-            "hrefs",
             "layout",
-            "creationProperties",
             "attributeCount",
             "created",
             "lastModified",
             "root",
-            "domain",
         ]
-
         for name in expected_keys:
             self.assertTrue(name in rspJson)
+
+        # additional keys expected for GET response
+        expected_keys.append("hrefs")
+        expected_keys.append("creationProperties")
+        expected_keys.append("domain")
+
+        self.assertEqual(rspJson["attributeCount"], 0)
+        dset_id = rspJson["id"]
+        self.assertTrue(helper.validateId(dset_id))
+
+        # read back the obj
+        req = self.endpoint + "/datasets/" + dset_id
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+
         self.assertEqual(rspJson["id"], dset_id)
         self.assertEqual(rspJson["root"], root_uuid)
         self.assertEqual(rspJson["domain"], domain)
diff --git a/tests/integ/datatype_test.py b/tests/integ/datatype_test.py
index ce7418cb..7bf90d09 100755
--- a/tests/integ/datatype_test.py
+++ b/tests/integ/datatype_test.py
@@ -475,6 +475,7 @@ def testPostWithLink(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)  # link doesn't exist yet
         rspJson = json.loads(rsp.text)
+
         self.assertTrue("link" in rspJson)
         link_json = rspJson["link"]
         self.assertEqual(link_json["collection"], "datatypes")

From ce45804747568c426721d6ed33723a6eebb0e365 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 21 May 2025 21:04:10 +0200
Subject: [PATCH 28/49] add multi-dataset test with init data

---
 tests/integ/dataset_test.py | 89 +++++++++++++++++++++++++++++++++++--
 tests/integ/value_test.py   | 80 +++++++++++++++++++++++++++++++++
 2 files changed, 165 insertions(+), 4 deletions(-)

diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py
index 12fe3c1f..cf15ada2 100755
--- a/tests/integ/dataset_test.py
+++ b/tests/integ/dataset_test.py
@@ -20,9 +20,8 @@
 import config
 
 # min/max chunk size - these can be set by config, but
-# practially the min config value should be larger than
-# CHUNK_MIN and the max config value should less than
-# CHUNK_MAX
+# practically the min config value should be larger than
+# CHUNK_MIN and the max config value should less than CHUNK_MAX
 CHUNK_MIN = 1024  # lower limit  (1024b)
 CHUNK_MAX = 50 * 1024 * 1024  # upper limit (50M)
 
@@ -2751,7 +2750,7 @@ def testExtendibleDatasetChunkPartitioning(self):
         req = self.endpoint + "/datasets"
         # 50K x 80K x 90K dataset
         dims = [0, 80000, 90000]
-        # unlimited extend in dim 0, fixeed in dimension 2, extenbile by 10x in dim 3
+        # unlimited extend in dim 0, fixeed in dimension 2, extensible by 10x in dim 3
         max_dims = [0, 80000, 900000]
         payload = {"type": "H5T_IEEE_F32LE", "shape": dims, "maxdims": max_dims}
 
@@ -2815,6 +2814,88 @@ def testDatasetEmptyChunkExtent(self):
         # Should fail with Bad Request due to invalid layout value
         self.assertEqual(rsp.status_code, 400)  # create dataset
 
+    def testDatasetPostMulti(self):
+        # test POST with multi-object creation
+        domain = self.base_domain + "/testDatasetPostMulti.h5"
+        helper.setupDomain(domain)
+        print("testDatasetPostMulti", domain)
+        headers = helper.getRequestHeaders(domain=domain)
+        req = helper.getEndpoint() + "/"
+
+        # get root ids
+        req = helper.getEndpoint() + "/"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        # get root group and verify link count is 0
+        req = helper.getEndpoint() + "/groups/" + root_uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], 0)
+
+        dataset_count = 3
+        datatype = "H5T_STD_I32LE"
+        payload = []
+        for _ in range(dataset_count):
+            dataset_args = {"type": datatype}
+            payload.append(dataset_args)
+
+        req = helper.getEndpoint() + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("objects" in rspJson)
+        rsp_objs = rspJson["objects"]
+        self.assertEqual(len(rsp_objs), dataset_count)
+
+        expected_keys = [
+            "id",
+            "shape",
+            "layout",
+            "attributeCount",
+            "created",
+            "lastModified",
+            "root",
+        ]
+
+        for i in range(dataset_count):
+            obj_json = rsp_objs[i]
+            self.assertEqual(obj_json["attributeCount"], 0)
+            dset_id = obj_json["id"]
+            self.assertTrue(helper.validateId(dset_id))
+            self.assertTrue(dset_id.startswith("d-"))
+            for key in expected_keys:
+                self.assertTrue(key in obj_json)
+
+        # create a set of linked datasets
+        for i in range(dataset_count):
+            item = payload[i]
+            item["link"] = {"id": root_uuid, "name": f"dset_{i + 1}"}
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("objects" in rspJson)
+        rsp_objs = rspJson["objects"]
+        self.assertEqual(len(rsp_objs), dataset_count)
+        for i in range(dataset_count):
+            json_rsp = rsp_objs[i]
+            self.assertEqual(json_rsp["attributeCount"], 0)
+            dset_id = json_rsp["id"]
+            self.assertTrue(helper.validateId(dset_id))
+            for key in expected_keys:
+                self.assertTrue(key in obj_json)
+
+        # get root group and verify link count is dataset_count
+        req = helper.getEndpoint() + "/groups/" + root_uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertEqual(rspJson["linkCount"], dataset_count)
+
 
 if __name__ == "__main__":
     # setup test files
diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py
index 9ad5c5e6..1f34a99e 100755
--- a/tests/integ/value_test.py
+++ b/tests/integ/value_test.py
@@ -1012,6 +1012,86 @@ def testScalarDatasetInitData(self):
         self.assertTrue("value" in rspJson)
         self.assertEqual(rspJson["value"], 42)
 
+    def testScalarDatasetInitDataMulti(self):
+        # Test creation/deletion of multiple scalar dataset obj along with initial data
+        print("testScalarDatasetInitDataMulti", self.base_domain)
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+        req = self.endpoint + "/"
+
+        # Get root uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        dataset_count = 3
+        datatype = "H5T_STD_I32LE"
+        payload = []
+        for i in range(dataset_count):
+            dataset_args = {"type": datatype}
+            dataset_args["value"] = i
+            payload.append(dataset_args)
+
+        # create dataset objects
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+
+        self.assertTrue("objects" in rspJson)
+        rsp_objs = rspJson["objects"]
+        self.assertEqual(len(rsp_objs), dataset_count)
+
+        for i in range(dataset_count):
+            obj_json = rsp_objs[i]
+            self.assertEqual(obj_json["attributeCount"], 0)
+            dset_id = obj_json["id"]
+            self.assertTrue(helper.validateId(dset_id))
+            self.assertTrue(dset_id.startswith("d-"))
+
+        # read back the obj
+        for i in range(dataset_count):
+            dset_id = rsp_objs[i]["id"]
+            req = self.endpoint + "/datasets/" + dset_id
+            rsp = self.session.get(req, headers=headers)
+            self.assertEqual(rsp.status_code, 200)
+            rspJson = json.loads(rsp.text)
+
+            expected_keys = [
+                "id",
+                "shape",
+                "hrefs",
+                "layout",
+                "creationProperties",
+                "attributeCount",
+                "created",
+                "lastModified",
+                "root",
+                "domain",
+            ]
+
+            for name in expected_keys:
+                self.assertTrue(name in rspJson)
+            self.assertEqual(rspJson["id"], dset_id)
+            self.assertEqual(rspJson["root"], root_uuid)
+            self.assertEqual(rspJson["domain"], self.base_domain)
+            self.assertEqual(rspJson["attributeCount"], 0)
+            shape_json = rspJson["shape"]
+            self.assertTrue(shape_json["class"], "H5S_SCALAR")
+            self.assertTrue(rspJson["type"], "H5T_STD_I32LE")
+
+        # read the data back
+        for i in range(dataset_count):
+            dset_id = rsp_objs[i]["id"]
+            req = self.endpoint + "/datasets/" + dset_id + "/value"
+            rsp = self.session.get(req, headers=headers)
+            self.assertEqual(rsp.status_code, 200)
+            rspJson = json.loads(rsp.text)
+            self.assertTrue("hrefs" in rspJson)
+            self.assertTrue("value" in rspJson)
+            self.assertEqual(rspJson["value"], i)
+
     def testNullSpaceDataset(self):
         # Test attempted read/write to null space dataset
         print("testNullSpaceDataset", self.base_domain)

From 88e06919b3f41045abfbf6afcba00fe3300774da Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 6 Jun 2025 20:32:21 +0200
Subject: [PATCH 29/49] allow client group id for PUT domain

---
 hsds/chunk_dn.py           |  8 ++--
 hsds/chunk_sn.py           |  6 +--
 hsds/ctype_dn.py           |  5 +++
 hsds/ctype_sn.py           |  8 ++--
 hsds/domain_sn.py          | 39 +++++++++++++---
 hsds/dset_dn.py            |  5 +++
 hsds/dset_sn.py            | 14 +++---
 hsds/group_dn.py           |  4 ++
 hsds/group_sn.py           |  6 +--
 hsds/servicenode_lib.py    |  8 +++-
 tests/integ/domain_test.py | 91 ++++++++++++++++++++++++++++++++++++++
 11 files changed, 164 insertions(+), 30 deletions(-)

diff --git a/hsds/chunk_dn.py b/hsds/chunk_dn.py
index 97e86f01..02545b85 100644
--- a/hsds/chunk_dn.py
+++ b/hsds/chunk_dn.py
@@ -65,7 +65,7 @@ async def PUT_Chunk(request):
         log.error(msg)
         raise HTTPBadRequest(reason=msg)
 
-    if not isValidUuid(chunk_id, "Chunk"):
+    if not isValidUuid(chunk_id, obj_class="chunks"):
         msg = f"Invalid chunk id: {chunk_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
@@ -339,7 +339,7 @@ async def GET_Chunk(request):
         msg = "Missing chunk id"
         log.error(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(chunk_id, "Chunk"):
+    if not isValidUuid(chunk_id, obj_class="chunks"):
         msg = f"Invalid chunk id: {chunk_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
@@ -655,7 +655,7 @@ async def POST_Chunk(request):
     chunk_index = getChunkIndex(chunk_id)
     log.debug(f"chunk_index: {chunk_index}")
 
-    if not isValidUuid(chunk_id, "Chunk"):
+    if not isValidUuid(chunk_id, obj_class="chunks"):
         msg = f"Invalid chunk id: {chunk_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
@@ -842,7 +842,7 @@ async def DELETE_Chunk(request):
         raise HTTPBadRequest(reason=msg)
     log.info(f"DELETE chunk: {chunk_id}")
 
-    if not isValidUuid(chunk_id, "Chunk"):
+    if not isValidUuid(chunk_id, obj_class="chunks"):
         msg = f"Invalid chunk id: {chunk_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index a039a911..285617f6 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -497,7 +497,7 @@ async def PUT_Value(request):
         msg = "Missing dataset id"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(dset_id, "Dataset"):
+    if not isValidUuid(dset_id, obj_class="datasets"):
         msg = f"Invalid dataset id: {dset_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
@@ -784,7 +784,7 @@ async def GET_Value(request):
         msg = "Missing dataset id"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(dset_id, "Dataset"):
+    if not isValidUuid(dset_id, obj_class="datasets"):
         msg = f"Invalid dataset id: {dset_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
@@ -1062,7 +1062,7 @@ async def POST_Value(request):
         msg = "Missing dataset id"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(dset_id, "Dataset"):
+    if not isValidUuid(dset_id, obj_class="datasets"):
         msg = f"Invalid dataset id: {dset_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
diff --git a/hsds/ctype_dn.py b/hsds/ctype_dn.py
index 0b14ab41..834e02cb 100755
--- a/hsds/ctype_dn.py
+++ b/hsds/ctype_dn.py
@@ -95,6 +95,11 @@ async def POST_Datatype(request):
         log.error("Unexpected type_id: {ctype_id}")
         raise HTTPInternalServerError()
 
+    deleted_ids = app["deleted_ids"]
+    if ctype_id in deleted_ids:
+        log.warn(f"POST Dataset has id: {ctype_id} that has previously been deleted")
+        deleted_ids.remove(ctype_id)
+
     # verify the id doesn't already exist
     obj_found = await check_metadata_obj(app, ctype_id, bucket=bucket)
     if obj_found:
diff --git a/hsds/ctype_sn.py b/hsds/ctype_sn.py
index 83f581a6..dfa96f98 100755
--- a/hsds/ctype_sn.py
+++ b/hsds/ctype_sn.py
@@ -51,7 +51,7 @@ async def GET_Datatype(request):
         include_attrs = True
 
     if ctype_id:
-        if not isValidUuid(ctype_id, "datatypes"):
+        if not isValidUuid(ctype_id, obj_class="datatypes"):
             msg = f"Invalid type id: {ctype_id}"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
@@ -62,7 +62,7 @@ async def GET_Datatype(request):
         group_id = None
         if "grpid" in params:
             group_id = params["grpid"]
-            if not isValidUuid(group_id, "groups"):
+            if not isValidUuid(group_id, obj_class="groups"):
                 msg = f"Invalid parent group id: {group_id}"
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
@@ -103,7 +103,7 @@ async def GET_Datatype(request):
         # throws 404 if not found
         kwargs = {"bucket": bucket, "domain": domain}
         ctype_id, domain, _ = await getObjectIdByPath(app, group_id, h5path, **kwargs)
-        if not isValidUuid(ctype_id, "datatypes"):
+        if not isValidUuid(ctype_id, obj_class="datatypes"):
             msg = f"No datatype exist with the path: {h5path}"
             log.warn(msg)
             raise HTTPGone()
@@ -280,7 +280,7 @@ async def DELETE_Datatype(request):
         msg = "Missing committed type id"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(ctype_id, "datatypes"):
+    if not isValidUuid(ctype_id, obj_class="datatypes"):
         msg = f"Invalid committed type id: {ctype_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
diff --git a/hsds/domain_sn.py b/hsds/domain_sn.py
index 4436db37..1ef469d5 100755
--- a/hsds/domain_sn.py
+++ b/hsds/domain_sn.py
@@ -18,12 +18,12 @@
 import os.path as op
 
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPForbidden, HTTPNotFound
-from aiohttp.web_exceptions import HTTPInternalServerError
+from aiohttp.web_exceptions import HTTPInternalServerError, HTTPGone
 from aiohttp.web_exceptions import HTTPConflict, HTTPServiceUnavailable
 from aiohttp.web import json_response
 
 from h5json.objid import createObjId, getCollectionForId
-from h5json.objid import isValidUuid, isSchema2Id
+from h5json.objid import isValidUuid, isRootObjId, isSchema2Id
 
 from .util.nodeUtil import getNodeCount, getDataNodeUrl
 from .util.httpUtil import getObjectClass, http_post, http_put, http_delete
@@ -99,7 +99,7 @@ async def get_collections(app, root_id, bucket=None, max_objects_limit=None):
 
 
 async def getDomainObjects(app, root_id, include_attrs=False, bucket=None):
-    """Iterate through all objects in heirarchy and add to obj_dict
+    """Iterate through all objects in hierarchy and add to obj_dict
     keyed by obj id
     """
 
@@ -754,7 +754,7 @@ async def PUT_Domain(request):
     username, pswd = getUserPasswordFromRequest(request)
     await validateUserPassword(app, username, pswd)
 
-    # inital perms for owner and default
+    # initial perms for owner and default
     owner_perm = {
         "create": True,
         "read": True,
@@ -858,7 +858,7 @@ async def PUT_Domain(request):
         if "root" in domain_json:
             # nothing to update for folders
             root_id = domain_json["root"]
-            if not isValidUuid(root_id):
+            if not isValidUuid(root_id, obj_class="groups"):
                 msg = f"domain: {domain} with invalid  root id: {root_id}"
                 log.error(msg)
                 raise HTTPInternalServerError()
@@ -985,8 +985,33 @@ async def PUT_Domain(request):
 
     if not is_folder and not linked_json:
         # create a root group for the new domain
-        root_id = createObjId("groups")
-        log.debug(f"new root group id: {root_id}")
+        if body and "root_id" in body:
+            root_id = body["root_id"]
+            if not isRootObjId(root_id):
+                msg = f"invalid client provided root id: {root_id}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+            # verify that the group object doesn't already exist
+            log.debug(f"attempting to fetch root id: {root_id}")
+            kwargs = {
+                "refresh": True,
+                "include_links": False,
+                "include_attrs": False,
+                "bucket": bucket,
+            }
+            try:
+                await getObjectJson(app, root_id, **kwargs)
+                msg = "client specified root_id already exists"
+                log.warn(msg)
+                raise HTTPConflict()
+            except HTTPNotFound:
+                log.debug(f"root_id: {root_id} not found (expected)")
+            except HTTPGone:
+                log.debug(f"root_id: {root_id} has been removed (expected)")
+            log.debug(f"using client supplied root_id: {root_id}")
+        else:
+            root_id = createObjId("groups")
+            log.debug(f"new root group id: {root_id}")
         group_json = {"id": root_id, "root": root_id, "domain": domain}
         log.debug(f"create group for domain, body: {group_json}")
 
diff --git a/hsds/dset_dn.py b/hsds/dset_dn.py
index 5b28f69c..fc949203 100755
--- a/hsds/dset_dn.py
+++ b/hsds/dset_dn.py
@@ -98,6 +98,11 @@ async def POST_Dataset(request):
         log.error(f"Unexpected dataset_id: {dset_id}")
         raise HTTPInternalServerError()
 
+    deleted_ids = app["deleted_ids"]
+    if dset_id in deleted_ids:
+        log.warn(f"POST Dataset has id: {dset_id} that has previously been deleted")
+        deleted_ids.remove(dset_id)
+
     # verify the id doesn't already exist
     obj_found = await check_metadata_obj(app, dset_id, bucket=bucket)
     if obj_found:
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index a60f87fd..48ee8609 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -85,7 +85,7 @@ async def GET_Dataset(request):
         include_attrs = True
 
     if dset_id:
-        if not isValidUuid(dset_id, "Dataset"):
+        if not isValidUuid(dset_id, obj_class="datasets"):
             msg = f"Invalid dataset id: {dset_id}"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
@@ -96,7 +96,7 @@ async def GET_Dataset(request):
         group_id = None
         if "grpid" in params:
             group_id = params["grpid"]
-            if not isValidUuid(group_id, "Group"):
+            if not isValidUuid(group_id, obj_class="groups"):
                 msg = f"Invalid parent group id: {group_id}"
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
@@ -140,7 +140,7 @@ async def GET_Dataset(request):
         # throws 404 if not found
         kwargs = {"bucket": bucket, "domain": domain}
         dset_id, domain, _ = await getObjectIdByPath(app, group_id, h5path, **kwargs)
-        if not isValidUuid(dset_id, "Dataset"):
+        if not isValidUuid(dset_id, obj_class="datasets"):
             msg = f"No dataset exist with the path: {h5path}"
             log.warn(msg)
             raise HTTPNotFound()
@@ -228,7 +228,7 @@ async def GET_DatasetType(request):
         msg = "Missing dataset id"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(dset_id, "Dataset"):
+    if not isValidUuid(dset_id, obj_class="datasets"):
         msg = f"Invalid dataset id: {dset_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
@@ -280,7 +280,7 @@ async def GET_DatasetShape(request):
         msg = "Missing dataset id"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(dset_id, "Dataset"):
+    if not isValidUuid(dset_id, obj_class="datasets"):
         msg = f"Invalid dataset id: {dset_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
@@ -337,7 +337,7 @@ async def PUT_DatasetShape(request):
         msg = "Missing dataset id"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(dset_id, "Dataset"):
+    if not isValidUuid(dset_id, obj_class="datasets"):
         msg = f"Invalid dataset id: {dset_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
@@ -700,7 +700,7 @@ async def DELETE_Dataset(request):
         msg = "Missing dataset id"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(dset_id, "Dataset"):
+    if not isValidUuid(dset_id, obj_class="datasets"):
         msg = f"Invalid dataset id: {dset_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
diff --git a/hsds/group_dn.py b/hsds/group_dn.py
index 0a93bed4..e2b69eef 100755
--- a/hsds/group_dn.py
+++ b/hsds/group_dn.py
@@ -98,6 +98,10 @@ async def POST_Group(request):
         raise HTTPBadRequest(reason=msg)
 
     group_id = get_obj_id(request, body=body)
+    deleted_ids = app["deleted_ids"]
+    if group_id in deleted_ids:
+        log.warn(f"POST Group has id: {group_id} that has previously been deleted")
+        deleted_ids.remove(group_id)
 
     log.info(f"POST group: {group_id} bucket: {bucket} body: {body}")
     if not isValidUuid(group_id, obj_class="groups"):
diff --git a/hsds/group_sn.py b/hsds/group_sn.py
index 4d83e5c7..1011b883 100755
--- a/hsds/group_sn.py
+++ b/hsds/group_sn.py
@@ -52,7 +52,7 @@ async def GET_Group(request):
     if group_id:
         log.info(f"GET_Group, id: {group_id}")
         # is the id a group id and not something else?
-        if not isValidUuid(group_id, "Group"):
+        if not isValidUuid(group_id, obj_class="groups"):
             msg = f"Invalid group id: {group_id}"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
@@ -99,7 +99,7 @@ async def GET_Group(request):
         kwargs = {"bucket": bucket, "domain": domain}
         group_id, domain, obj_json = await getObjectIdByPath(app, group_id, h5path, **kwargs)
 
-        if not isValidUuid(group_id, "Group"):
+        if not isValidUuid(group_id, obj_class="groups"):
             msg = f"No group exist with the path: {h5path}"
             log.warn(msg)
             raise HTTPNotFound()
@@ -296,7 +296,7 @@ async def DELETE_Group(request):
         msg = "Missing group id"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if not isValidUuid(group_id, "Group"):
+    if not isValidUuid(group_id, obj_class="groups"):
         msg = f"Invalid group id: {group_id}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index cbc71b0a..1c17edad 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -1367,7 +1367,7 @@ def getCreateArgs(body,
             msg = "link can't be used with h5path"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        # if ingore_link is set, parent_links will be created post object creation
+        # if ignore_link is set, parent_links will be created post object creation
         link_body = body["link"]
         log.debug(f"link_body: {link_body}")
         if "id" in link_body and not ignore_link:
@@ -1417,7 +1417,11 @@ def getCreateArgs(body,
 
     if "id" in body:
         obj_id = body["id"]
-        # tbd: validate this is a group id
+        if not isValidUuid(obj_id):
+            msg = f"Invalid id: {obj_id}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+
         kwargs["obj_id"] = obj_id
         log.debug(f"createObject will use client id: {obj_id}")
 
diff --git a/tests/integ/domain_test.py b/tests/integ/domain_test.py
index 4f21d44e..1c68cb5e 100755
--- a/tests/integ/domain_test.py
+++ b/tests/integ/domain_test.py
@@ -13,6 +13,9 @@
 import time
 import json
 from os import path as pp
+
+from h5json.objid import createObjId
+
 import config
 import helper
 
@@ -489,6 +492,94 @@ def testCreateDomain(self):
                 self.assertTrue(k in rspJson)
             # we should get the same value for root id
             self.assertEqual(root_id, rspJson["root"])
+
+    def testCreateDomainWithId(self):
+        domain = self.base_domain + "/newdomainwithid.h5"
+        print("testCreateDomainWithId", domain)
+        headers = helper.getRequestHeaders(domain=domain)
+
+        root_id = createObjId("groups")
+        body = {"root_id": root_id}
+        req = helper.getEndpoint() + "/"
+
+        rsp = self.session.put(req, data=json.dumps(body), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        for k in (
+            "root",
+            "owner",
+            "acls",
+            "created",
+            "lastModified",
+            "version",
+            "limits",
+            "compressors",
+        ):
+            self.assertTrue(k in rspJson)
+
+        self.assertEqual(rspJson["root"], root_id)
+
+        limit_keys = ("min_chunk_size", "max_chunk_size", "max_request_size")
+        limits = rspJson["limits"]
+        for k in limit_keys:
+            self.assertTrue(k in limits)
+            limit = limits[k]
+            self.assertTrue(isinstance(limit, int))
+            self.assertTrue(limit > 0)
+        compressors = rspJson["compressors"]
+        for compressor in EXPECTED_COMPRESSORS:
+            self.assertTrue(compressor in compressors)
+
+        # do a get on the new domain
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        for k in (
+            "root",
+            "owner",
+            "class",
+            "created",
+            "lastModified",
+            "limits",
+            "version",
+        ):
+            self.assertTrue(k in rspJson)
+        # we should get the same value for root id
+        self.assertEqual(root_id, rspJson["root"])
+        # should get limits here too
+        limits = rspJson["limits"]
+        for k in limit_keys:
+            self.assertTrue(k in limits)
+            limit = limits[k]
+            self.assertTrue(isinstance(limit, int))
+            self.assertTrue(limit > 0)
+
+        # verify we can access root groups
+        root_req = helper.getEndpoint() + "/groups/" + root_id
+        headers = helper.getRequestHeaders(domain=domain)
+        rsp = self.session.get(root_req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+
+        # verify that putting the same domain again fails with a 409 error
+        rsp = self.session.put(req, headers=headers)
+        self.assertEqual(rsp.status_code, 409)
+
+        # PUT with a different domain name should also give a 409
+        # (due to the root_id conflicting)
+        domain2 = self.base_domain + "/newdomainwithid2.h5"
+        headers2 = helper.getRequestHeaders(domain=domain2)
+        rsp = self.session.put(req, data=json.dumps(body), headers=headers2)
+        self.assertEqual(rsp.status_code, 409)
+
+        # Delete the original domain
+        headers = helper.getRequestHeaders(domain=domain)
+        rsp = self.session.delete(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+
+        # re-create the domain with the same root id
+        rsp = self.session.put(req, data=json.dumps(body), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
     """
     def testCreateDomainWithCustomClass(self):
         domain = self.base_domain + "/newclassdomain.h6"

From 7561534242e745fea750fca7ac037f8dce712b44 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Sun, 8 Jun 2025 12:06:45 +0200
Subject: [PATCH 30/49] fix np.frombuffer error

---
 hsds/attr_sn.py  | 6 +-----
 hsds/chunk_sn.py | 7 +++----
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/hsds/attr_sn.py b/hsds/attr_sn.py
index d3b05ca0..d7f05f75 100755
--- a/hsds/attr_sn.py
+++ b/hsds/attr_sn.py
@@ -851,11 +851,7 @@ async def PUT_AttributeValue(request):
             msg += f"but got {len(binary_data)}"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        arr = np.fromstring(binary_data, dtype=np_dtype)
-        if attr_shape["class"] == "H5S_SCALAR":
-            arr = arr.reshape([])
-        else:
-            arr = arr.reshape(np_shape)  # conform to selection shape
+        arr = bytesToArray(binary_data, np_dtype, np_shape)
         log.debug(f"got array {arr} from binary data")
     else:
         try:
diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index 285617f6..86d7539f 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -657,7 +657,7 @@ async def PUT_Value(request):
                 log.warn(f"bytesToArray value error: {ve}")
                 raise HTTPBadRequest()
         else:
-            # fixed item size
+            # fixed item size - check against number of bytes
             if len(input_data) % item_size != 0:
                 msg = f"Expected request size to be a multiple of {item_size}, "
                 msg += f"but {len(input_data)} bytes received"
@@ -668,8 +668,7 @@ async def PUT_Value(request):
                 msg = f"expected {item_size * num_elements} bytes but got {len(input_data)}"
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
-
-            arr = np.fromstring(input_data, dtype=dset_dtype)
+            arr = np.frombuffer(input_data, dtype=dset_dtype)
             log.debug(f"read fixed type array: {arr}")
 
         if bc_shape:
@@ -1166,7 +1165,7 @@ async def POST_Value(request):
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
         num_points = request.content_length // point_dt.itemsize
-        points = np.fromstring(binary_data, dtype=point_dt)
+        points = np.frombuffer(binary_data, dtype=point_dt)
         # reshape the data based on the rank (num_points x rank)
         if rank > 1:
             if len(points) % rank != 0:

From 25c4cf37d2888b800a2a3f8017b92bccd965179e Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Sun, 8 Jun 2025 13:29:37 +0200
Subject: [PATCH 31/49] fix dsetUtil flake errors

---
 hsds/util/dsetUtil.py | 50 +++++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/hsds/util/dsetUtil.py b/hsds/util/dsetUtil.py
index b259aae9..a1d20cbf 100644
--- a/hsds/util/dsetUtil.py
+++ b/hsds/util/dsetUtil.py
@@ -19,7 +19,6 @@
 from .. import hsds_logger as log
 from .. import config
 
-#from .chunkUtil import getChunkSize, guessChunk, expandChunk, shrinkChunk
 
 CHUNK_MIN = 512 * 1024  # Soft lower limit (512k)
 CHUNK_MAX = 2048 * 1024  # Hard upper limit (2M)
@@ -84,6 +83,7 @@
     "H5D_CONTIGUOUS_REF",
 )
 
+
 def get_dset_size(shape_json, typesize):
     """Return the size of the dataspace.  For
     any unlimited dimensions, assume a value of 1.
@@ -106,6 +106,7 @@ def get_dset_size(shape_json, typesize):
         dset_size *= shape[n]
     return dset_size
 
+
 def getFilterItem(key):
     """
     Return filter code, id, and name, based on an id, a name or a code.
@@ -121,15 +122,15 @@ def getFilterItem(key):
 
 
 def getFiltersJson(create_props, supported_filters=None):
-    """ return standardized filter representation from creation properties 
+    """ return standardized filter representation from creation properties
         raise bad request if invalid """
-        
+
     # refer to https://hdf5-json.readthedocs.io/en/latest/bnf/\
     # filters.html#grammar-token-filter_list
 
     if "filters" not in create_props:
         return {}  # null set
-    
+
     f_in = create_props["filters"]
 
     log.debug(f"filters provided in creation_prop: {f_in}")
@@ -189,11 +190,11 @@ def getFiltersJson(create_props, supported_filters=None):
             msg = f"Unexpected type for filter: {filter}"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        
+
     # return standardized filter representation
     log.debug(f"using filters: {f_out}")
     return f_out
-    
+
 
 def getFilters(dset_json):
     """Return list of filters, or empty list"""
@@ -298,7 +299,8 @@ def getFilterOps(app, dset_id, filters, dtype=None, chunk_shape=None):
         return filter_ops
     else:
         return None
-    
+
+
 def getShapeJson(body):
     """ Return normalized json description of data space """
 
@@ -310,8 +312,8 @@ def getShapeJson(body):
         shape_class = "H5S_SCALAR"
         log.debug("not shape given - using H5S_SCALAR")
         return {"class": shape_class}
-    
-    body_shape = body["shape"]    
+
+    body_shape = body["shape"]
     log.debug(f"got shape: {body_shape}")
 
     if isinstance(body_shape, int):
@@ -326,11 +328,11 @@ def getShapeJson(body):
         else:
             shape_class = "H5S_SIMPLE"
             dims = body_shape
-    else:        
+    else:
         msg = "invalid shape: {body_shape}"
         log.warn(msg)
         raise ValueError(msg)
-    
+
     if shape_class not in ("H5S_NULL", "H5S_SCALAR", "H5S_SIMPLE"):
         msg = f"invalid shape class: {shape_class}"
         log.warn(msg)
@@ -386,7 +388,7 @@ def getShapeJson(body):
             msg = "max_dims rank doesn't match dims"
             log.warn(msg)
             raise ValueError(msg)
-        
+
     # return json description of shape
     shape_json = {"class": shape_class}
     if shape_class == "H5S_SIMPLE":
@@ -396,6 +398,7 @@ def getShapeJson(body):
     log.debug(f"returning shape_json: {shape_json}")
     return shape_json
 
+
 def getShapeClass(data_shape):
     """ Return shape class of the given data shape """
 
@@ -404,11 +407,12 @@ def getShapeClass(data_shape):
 
     if "class" not in data_shape:
         raise KeyError("expected 'class' key for data shape")\
-        
+
     return data_shape["class"]
 
+
 def getRank(data_shape):
-    """ Return rank of given data shape_json """ 
+    """ Return rank of given data shape_json """
 
     shape_class = getShapeClass(data_shape)
 
@@ -423,6 +427,7 @@ def getRank(data_shape):
     else:
         raise ValueError(f"unexpected data shape class: {shape_class}")
 
+
 def getDsetRank(dset_json):
     """Get rank returning 0 for scalar or NULL data shapes"""
     data_shape = dset_json["shape"]
@@ -445,7 +450,7 @@ def isScalarSpace(dset_json):
     shape_class = getShapeClass(data_shape)
     if shape_class == "H5S_NULL":
         return False
-    
+
     rank = getRank(data_shape)
     return True if rank == 0 else False
 
@@ -458,7 +463,7 @@ def getContiguousLayout(shape_json, item_size, chunk_min=None, chunk_max=None):
         msg = "ContiguousLayout can only be used with fixed-length types"
         log.warn(msg)
         raise ValueError(msg)
-    
+
     if chunk_min is None:
         msg = "chunk_min not set"
         log.warn(msg)
@@ -470,7 +475,7 @@ def getContiguousLayout(shape_json, item_size, chunk_min=None, chunk_max=None):
 
     if chunk_max < chunk_min:
         raise ValueError("chunk_max cannot be less than chunk_min")
-    
+
     if shape_json is None or shape_json["class"] == "H5S_NULL":
         return None
     if shape_json["class"] == "H5S_SCALAR":
@@ -507,6 +512,7 @@ def getContiguousLayout(shape_json, item_size, chunk_min=None, chunk_max=None):
 
     return layout
 
+
 def getChunkSize(layout, type_size):
     """Return chunk size given layout.
     i.e. just the product of the values in the list.
@@ -521,6 +527,7 @@ def getChunkSize(layout, type_size):
         chunk_size *= n
     return chunk_size
 
+
 def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
     """
     Use chunk layout given in the creationPropertiesList (if defined and
@@ -668,7 +675,7 @@ def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
             msg = f"Invalid chunk table id: {chunk_table_id}"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        
+
     elif layout_class == "H5D_CHUNKED":
         if "dims" not in layout:
             msg = "dims key not found in layout for creation property list"
@@ -695,7 +702,8 @@ def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
         msg = f"Unexpected layout: {layout_class}"
         log.warn(msg)
         raise ValueError(msg)
-    
+
+
 def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN, layout_class="H5D_CHUNKED"):
     """Compute an increased chunk shape with a size in bytes greater than chunk_min."""
     if shape_json is None or shape_json["class"] == "H5S_NULL":
@@ -833,7 +841,7 @@ def guessChunk(shape_json, typesize):
 def getLayoutJson(creation_props, shape=None, type_json=None, chunk_min=None, chunk_max=None):
     """ Get the layout json given by creation_props.
         Raise bad request error if invalid """
-    
+
     min_chunk_size = int(config.get("min_chunk_size"))
     max_chunk_size = int(config.get("max_chunk_size"))
 
@@ -853,7 +861,7 @@ def getLayoutJson(creation_props, shape=None, type_json=None, chunk_min=None, ch
         layout_props = creation_props["layout"]
     else:
         layout_props = None
-        
+
     if layout_props:
         if "class" not in layout_props:
             msg = "expected class key in layout props"

From 5cc77e7843e573f01ac794cb961573ed1b6e862e Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 14 Jul 2025 20:57:19 +0100
Subject: [PATCH 32/49] expanded link test

---
 hsds/link_sn.py           | 11 +++++++++++
 hsds/servicenode_lib.py   |  7 +++++--
 hsds/util/dsetUtil.py     |  2 +-
 hsds/util/linkUtil.py     |  3 ++-
 tests/integ/link_test.py  | 17 ++++++++++++++++-
 tests/integ/value_test.py |  4 +---
 6 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/hsds/link_sn.py b/hsds/link_sn.py
index 938f78c2..8b90af12 100755
--- a/hsds/link_sn.py
+++ b/hsds/link_sn.py
@@ -336,6 +336,16 @@ async def PUT_Links(request):
         msg = "Unable to load JSON body"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
+    
+    if not body:
+        msg = "PUT links with empty body"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+    
+    if not isinstance(body, dict):
+        msg = f"PUT links expected dictionary body but got: {type(body)}"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
 
     domain = getDomainFromRequest(request)
     if not isValidDomain(domain):
@@ -432,6 +442,7 @@ async def PUT_Links(request):
                             link_item = link_items[title]
                             getLinkClass(link_item)
                         except ValueError:
+                            log.warn(f"invalid link for {title}: {link_item}")
                             raise HTTPBadRequest(reason="invalid link item")
                     grp_ids[grp_id] = link_items
 
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 1c17edad..65214aa0 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -1020,10 +1020,13 @@ async def getAttributeFromRequest(app, req_json, obj_id=None, bucket=None):
         created = req_json["created"]
         # allow "pre-dated" attributes if the timestamp is within the last 10 seconds
         predate_max_time = config.get("predate_max_time", default=10.0)
-        if now - created > predate_max_time:
+        if now - created < predate_max_time:
             attr_item["created"] = created
         else:
-            log.warn("stale created timestamp for attribute, ignoring")
+            msg = "stale created timestamp for attribute, ignoring "
+            msg += f"predate config: {predate_max_time:6.2f} "
+            msg += f"age: {(now - created):6.2f}"
+            log.warn(msg)
     if "created" not in attr_item:
         attr_item["created"] = now
 
diff --git a/hsds/util/dsetUtil.py b/hsds/util/dsetUtil.py
index a1d20cbf..5bf3afc5 100644
--- a/hsds/util/dsetUtil.py
+++ b/hsds/util/dsetUtil.py
@@ -329,7 +329,7 @@ def getShapeJson(body):
             shape_class = "H5S_SIMPLE"
             dims = body_shape
     else:
-        msg = "invalid shape: {body_shape}"
+        msg = f"invalid shape: {body_shape}"
         log.warn(msg)
         raise ValueError(msg)
 
diff --git a/hsds/util/linkUtil.py b/hsds/util/linkUtil.py
index 0090b045..d0063a39 100644
--- a/hsds/util/linkUtil.py
+++ b/hsds/util/linkUtil.py
@@ -33,6 +33,7 @@ def validateLinkName(name):
 def getLinkClass(link_json):
     """ verify this is a valid link
         returns the link class """
+    log.debug(f"getLinkClass({link_json})")
     if "class" in link_json:
         link_class = link_json["class"]
     else:
@@ -183,7 +184,7 @@ def getRequestLink(title, link_json, predate_max_time=0.0):
     if "created" in link_json:
         created = link_json["created"]
         # allow "pre-dated" attributes if recent enough
-        if now - created > predate_max_time:
+        if now - created < predate_max_time:
             link_item["created"] = created
         else:
             log.warn("stale created timestamp for link, ignoring")
diff --git a/tests/integ/link_test.py b/tests/integ/link_test.py
index 7c909435..801f5d0c 100755
--- a/tests/integ/link_test.py
+++ b/tests/integ/link_test.py
@@ -68,6 +68,14 @@ def testHardLink(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 404)  # link doesn't exist yet
 
+        # try creating link with no body
+        rsp = self.session.put(req, headers=headers)
+        self.assertEqual(rsp.status_code, 400)
+
+        # try creating link with no items
+        rsp = self.session.put(req, headers=headers, data=json.dumps({}))
+        self.assertEqual(rsp.status_code, 400)
+
         # try creating a link with a different user (should fail)
         if test_user2:
             headers = helper.getRequestHeaders(domain=domain, username=test_user2)
@@ -1481,7 +1489,14 @@ def testPutLinkMultiple(self):
         links = {}
         for i in range(grp_count):
             title = grp_names[i]
-            links[title] = {"id": grp_ids[i]}
+            if i%2 == 0:
+                # create a hardlink implicitly
+                links[title] = {"id": grp_ids[i]}
+            else:
+                # for variety, create a hardlink by providing full link json
+                links[title] = {"class": "H5L_TYPE_HARD", "id": grp_ids[i]}
+                print("putLinkMulti:", links[title])
+                self.assertTrue(False)
 
         # add a soft and external link as well
         links["softlink"] = {"h5path": "a_path"}
diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py
index 1f34a99e..293c625d 100755
--- a/tests/integ/value_test.py
+++ b/tests/integ/value_test.py
@@ -111,9 +111,7 @@ def testPut1DDataset(self):
         rspJson = json.loads(rsp.text)
         self.assertTrue("hrefs" in rspJson)
         self.assertTrue("value" in rspJson)
-        expect_value = [
-            0,
-        ]
+        expect_value = [0, ]
         expect_value *= data["shape"]
         self.assertEqual(rspJson["value"], expect_value)
 

From 45f3aa5b934227224bf19226713fc4521e40c74a Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 16 Jul 2025 20:33:11 +0100
Subject: [PATCH 33/49] added config to test high latency storage

---
 admin/config/config.yml  |  3 ++-
 hsds/util/fileClient.py  | 23 ++++++++++++++++++++++-
 tests/integ/link_test.py |  4 +---
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/admin/config/config.yml b/admin/config/config.yml
index 6e92d65b..998d3f36 100755
--- a/admin/config/config.yml
+++ b/admin/config/config.yml
@@ -70,7 +70,7 @@ admin_group: null # enable admin privileges for any user in this group
 openid_provider: azure # OpenID authentication provider
 openid_url: null # OpenID connect endpoint if provider is not azure or google
 openid_audience: null # OpenID audience. This is synonymous with azure_resource_id for azure
-openid_claims: unique_name,appid,roles # Comma seperated list of claims to resolve to usernames.
+openid_claims: unique_name,appid,roles # Comma separated list of claims to resolve to usernames.
 chaos_die: 0 # if > 0, have nodes randomly die after n seconds (for testing)
 standalone_app: false # True when run as a single application
 blosc_nthreads: 2 # number of threads to use for blosc compression.  Set to 0 to have blosc auto-determine thread count
@@ -89,6 +89,7 @@ allow_any_bucket_write: true # enable writes to buckets other than default bucke
 bit_shuffle_default_blocksize: 2048 # default blocksize for bitshuffle filter
 max_rangeget_gap: 1024 # max gap in byte for intelligent range get requests
 predate_maxtime: 10.0 # max delta between object created timestamp in request and actual time
+posix_delay: 0.0  # delay for POSIX IO operations for simulating cloud storage latencies
 # DEPRECATED - the remaining config values are not used in currently but kept for backward compatibility with older container images
 aws_lambda_chunkread_function: null # name of aws lambda function for chunk reading
 aws_lambda_threshold: 4 # number of chunks per node per request to reach before using lambda
diff --git a/hsds/util/fileClient.py b/hsds/util/fileClient.py
index feebe2c1..0d7d88ba 100644
--- a/hsds/util/fileClient.py
+++ b/hsds/util/fileClient.py
@@ -173,6 +173,12 @@ async def get_object(self, key, bucket=None, offset=0, length=-1):
             msg = f"Unexpected Exception {type(e)} get get_object {key}: {e}"
             log.error(msg)
             raise HTTPInternalServerError()
+
+        posix_delay = config.get("posix_delay", default=0.0)
+        if posix_delay > 0.0:
+            log.warn(f"posix_delay for get_object, sleep for: {posix_delay}")
+            await asyncio.sleep(posix_delay)
+
         return data
 
     def _mkdir(self, dirpath):
@@ -254,6 +260,12 @@ async def put_object(self, key, data, bucket=None):
             msg = f"fileClient.put_object {key} complete, "
             msg += f"write_rsp: {write_rsp}"
             log.debug(msg)
+
+        posix_delay = config.get("posix_delay", default=0.0)
+        if posix_delay > 0.0:
+            log.warn(f"posix_delay for put_object, sleep for: {posix_delay}")
+            await asyncio.sleep(posix_delay)
+
         return write_rsp
 
     async def delete_object(self, key, bucket=None):
@@ -294,7 +306,11 @@ async def delete_object(self, key, bucket=None):
             msg = f"Unexpected Exception {type(e)} deleting file obj {key}: {e}"
             log.error(msg)
             raise HTTPInternalServerError()
-        await asyncio.sleep(0)  # for async compat
+
+        posix_delay = config.get("posix_delay", default=0.0)
+        if posix_delay > 0.0:
+            log.warn(f"posix_delay for delete_object , sleep for: {posix_delay}")
+        await asyncio.sleep(posix_delay)  # for async compat
 
     async def is_object(self, key, bucket=None):
         self._validateBucket(bucket)
@@ -429,6 +445,11 @@ async def list_keys(
             msg == f"got {len(key_names)}"
             log.warning(msg)
 
+        posix_delay = config.get("posix_delay", default=0.0)
+        if posix_delay > 0.0:
+            log.warn(f"posix_delay for list_keys, sleep for: {posix_delay}")
+            await asyncio.sleep(posix_delay)
+
         return key_names
 
     async def releaseClient(self):
diff --git a/tests/integ/link_test.py b/tests/integ/link_test.py
index 801f5d0c..a796155f 100755
--- a/tests/integ/link_test.py
+++ b/tests/integ/link_test.py
@@ -1489,14 +1489,12 @@ def testPutLinkMultiple(self):
         links = {}
         for i in range(grp_count):
             title = grp_names[i]
-            if i%2 == 0:
+            if i % 2 == 0:
                 # create a hardlink implicitly
                 links[title] = {"id": grp_ids[i]}
             else:
                 # for variety, create a hardlink by providing full link json
                 links[title] = {"class": "H5L_TYPE_HARD", "id": grp_ids[i]}
-                print("putLinkMulti:", links[title])
-                self.assertTrue(False)
 
         # add a soft and external link as well
         links["softlink"] = {"h5path": "a_path"}

From ff1c04367063662e633849b1f49c90e5ff76f2a2 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 22 Jul 2025 12:50:43 +0100
Subject: [PATCH 34/49] added put_data action for DomainCrawler

---
 admin/config/config.yml |  1 +
 hsds/domain_crawl.py    | 55 +++++++++++++++++++++++++++++++++++++----
 hsds/dset_dn.py         |  2 +-
 hsds/dset_sn.py         | 47 +++++++++++++++++++++++++++++++----
 hsds/link_sn.py         |  4 +--
 5 files changed, 96 insertions(+), 13 deletions(-)

diff --git a/admin/config/config.yml b/admin/config/config.yml
index 998d3f36..4b9f40d5 100755
--- a/admin/config/config.yml
+++ b/admin/config/config.yml
@@ -90,6 +90,7 @@ bit_shuffle_default_blocksize: 2048 # default blocksize for bitshuffle filter
 max_rangeget_gap: 1024 # max gap in byte for intelligent range get requests
 predate_maxtime: 10.0 # max delta between object created timestamp in request and actual time
 posix_delay: 0.0  # delay for POSIX IO operations for simulating cloud storage latencies
+max_compact_dset_size: 65536  # size in bytes for maximum compact storage size
 # DEPRECATED - the remaining config values are not used in currently but kept for backward compatibility with older container images
 aws_lambda_chunkread_function: null # name of aws lambda function for chunk reading
 aws_lambda_threshold: 4 # number of chunks per node per request to reach before using lambda
diff --git a/hsds/domain_crawl.py b/hsds/domain_crawl.py
index 35b20bf9..5c1fe13a 100644
--- a/hsds/domain_crawl.py
+++ b/hsds/domain_crawl.py
@@ -19,9 +19,10 @@
 from aiohttp.web_exceptions import HTTPInternalServerError, HTTPNotFound, HTTPGone
 
 from h5json.objid import getCollectionForId
+from h5json.array_util import arrayToBytes
 
 from .util.nodeUtil import getDataNodeUrl
-from .util.httpUtil import isOK
+from .util.httpUtil import isOK, http_put
 from .util.globparser import globmatch
 from .servicenode_lib import getObjectJson, getAttributes, putAttributes, getLinks, putLinks
 from . import hsds_logger as log
@@ -233,7 +234,7 @@ async def put_attributes(self, obj_id, attr_items):
         try:
             status = await putAttributes(self._app, obj_id, attr_items, **kwargs)
         except HTTPConflict:
-            log.warn("DomainCrawler - got HTTPConflict from http_put")
+            log.warn("DomainCrawler - got HTTPConflict from putAttributers")
             status = 409
         except HTTPServiceUnavailable:
             status = 503
@@ -419,8 +420,10 @@ async def put_links(self, grp_id, link_items):
             log.warn("DomainCrawler - got HTTPConflict from http_put")
             status = 409
         except HTTPServiceUnavailable:
+            log.warn("DomainCrawler - got HTTPServiceUnavailable exception")
             status = 503
         except HTTPInternalServerError:
+            log.warn("DomainCrawler - got 500 error from DN")
             status = 500
         except Exception as e:
             log.error(f"unexpected exception {e}")
@@ -428,6 +431,38 @@ async def put_links(self, grp_id, link_items):
         log.debug(f"DomainCrawler fetch for {grp_id} - returning status: {status}")
         self._obj_dict[grp_id] = {"status": status}
 
+    async def put_data(self, chunk_id, arr):
+        # write a one-chunk dataset value
+        log.debug(f"DomainCrawler put_data for {chunk_id}, arr: {arr}")
+        req = getDataNodeUrl(self._app, chunk_id)
+        req += "/chunks/" + chunk_id
+        log.debug(f"put_data req: {req}")
+        params = {"bucket": self._bucket}
+
+        data = arrayToBytes(arr)
+
+        log.debug(f"DomainCrawler - put_data req: {req}, {len(data)} bytes")
+
+        try:
+            # TBD: setup an http client?
+            await http_put(self._app, req, data=data, params=params, client=None)
+            log.debug("http_put return")
+        except HTTPConflict:
+            log.warn("DomainCrawler - got HTTPConflict from http_put")
+            status = 409
+        except HTTPServiceUnavailable:
+            log.warn("DomainCrawler - got HTTPServiceUnavailable exception")
+            status = 503
+        except HTTPInternalServerError:
+            log.warn("DomainCrawler - got 500 error from DN")
+            status = 500
+        except Exception as e:
+            log.error(f"unexpected exception {e}")
+            status = 500
+
+        log.debug(f"DomainCrawler put_data for {chunk_id} - returning status: {status}")
+        self._obj_dict[chunk_id] = {"status": status}
+
     def get_status(self):
         """ return the highest status of any of the returned objects """
         status = None
@@ -528,7 +563,7 @@ async def fetch(self, obj_id):
 
             await self.put_attributes(obj_id, attr_items)
         elif self._action == "get_link":
-            log.debug("DomainCrawlwer - get links")
+            log.debug("DomainCrawler - get links")
             log.debug(f"self._objs: {self._objs}, type: {type(self._objs)}")
 
             if self._objs is None or obj_id not in self._objs:
@@ -548,7 +583,7 @@ async def fetch(self, obj_id):
                 log.debug(f"DomainCrawler - get link titles: {link_titles}")
             await self.get_links(obj_id, link_titles)
         elif self._action == "put_link":
-            log.debug("DomainCrawlwer - put links")
+            log.debug("DomainCrawler - put links")
             # write links
             if self._objs and obj_id not in self._objs:
                 log.error(f"couldn't find {obj_id} in self._objs")
@@ -557,11 +592,21 @@ async def fetch(self, obj_id):
             log.debug(f"got {len(link_items)} link items for {obj_id}")
 
             await self.put_links(obj_id, link_items)
+        elif self._action == "put_data":
+            log.debug("DomainCrawler - put data")
+            # write one chunk per dataset
+            if self._objs and obj_id not in self._objs:
+                log.error(f"couldn't find {obj_id} in self._objs")
+                return
+            data = self._objs[obj_id]
+            log.debug(f"got {data} data for {obj_id}")
+
+            await self.put_data(obj_id, data)
         else:
             msg = f"DomainCrawler: unexpected action: {self._action}"
             log.error(msg)
 
         msg = f"DomainCrawler - fetch complete obj_id: {obj_id}, "
-        msg += f"{len(self._obj_dict)} objects found"
+        msg += f"{len(self._obj_dict)} objects processed"
         log.debug(msg)
         log.debug(f"obj_dict: {len(self._obj_dict)} items")
diff --git a/hsds/dset_dn.py b/hsds/dset_dn.py
index fc949203..b2b640e4 100755
--- a/hsds/dset_dn.py
+++ b/hsds/dset_dn.py
@@ -137,7 +137,7 @@ async def POST_Dataset(request):
     if "layout" in body:
         layout = body["layout"]  # client specified chunk layout
 
-    # ok - all set, create committed type obj
+    # ok - all set, create dataset obj
     now = getNow(app)
 
     if "attributes" in body:
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 48ee8609..b5c912c8 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -23,7 +23,8 @@
 
 from .util.httpUtil import getHref, respJsonAssemble
 from .util.httpUtil import jsonResponse, getBooleanParam
-from .util.dsetUtil import getPreviewQuery, getShapeDims
+from .util.chunkUtil import getChunkIds
+from .util.dsetUtil import getPreviewQuery, getShapeDims, getChunkLayout, getDatasetLayoutClass
 from .util.authUtil import getUserPasswordFromRequest, aclCheck
 from .util.authUtil import validateUserPassword
 from .util.domainUtil import getDomainFromRequest, getPathForDomain, isValidDomain
@@ -31,7 +32,7 @@
 from .servicenode_lib import getDomainJson, getObjectJson, getDsetJson, getPathForObjectId
 from .servicenode_lib import getObjectIdByPath, validateAction, getRootInfo
 from .servicenode_lib import getDatasetCreateArgs, createDataset, deleteObject
-from .dset_lib import updateShape, deleteAllChunks, doHyperslabWrite
+from .dset_lib import updateShape, deleteAllChunks
 from .post_crawl import createDatasets
 from .domain_crawl import DomainCrawler
 from . import hsds_logger as log
@@ -605,12 +606,35 @@ def _updateInitValuesList(kwargs):
         raise HTTPInternalServerError()
 
     # write any init data values
+    init_chunks = {}
     for index in range(obj_count):
         init_data = init_values[index]
         if init_data is None:
-            continue
+            continue  # no data to initialize
         dset_json = objects[index]
+        dset_id = dset_json["id"]
         log.debug(f"init value, post_rsp: {dset_json}")
+        layout_class = getDatasetLayoutClass(dset_json)
+        log.debug(f"layout_class: {layout_class}")
+        if layout_class != "H5D_CHUNKED":
+            msg = f"dataset init_data used with unsupported layout_class: {layout_class}"
+            log.error(msg)
+            raise HTTPInternalServerError()
+        layout_dims = getChunkLayout(dset_json)
+        log.debug(f"init data layout is: {layout_dims}")
+        # make selection for entire dataspace
+        dims = getShapeDims(dset_json["shape"])
+        slices = []
+        for dim in dims:
+            s = slice(0, dim, 1)
+            slices.append(s)
+        chunk_ids = getChunkIds(dset_id, slices, layout_dims)
+        log.debug(f"init data, got chunk_ids: {chunk_ids}")
+        if not chunk_ids or len(chunk_ids) != 1:
+            msg = "expected one chunk for init_data but got: {chunk_ids}"
+            log.error(msg)
+            raise HTTPInternalServerError()
+        chunk_id = chunk_ids[0]
         shape_json = dset_json["shape"]
         type_json = dset_json["type"]
         arr_dtype = createDataType(type_json)
@@ -627,7 +651,8 @@ def _updateInitValuesList(kwargs):
             log.warn(f"IndexError: {msg}")
             raise HTTPBadRequest(reason=msg)
         log.debug(f"got json arr: {input_arr.shape}")
-
+        init_chunks[chunk_id] = input_arr
+        """
         # write data if provided
         log.debug(f"write input_arr: {input_arr}")
         # make selection for entire dataspace
@@ -636,7 +661,7 @@ def _updateInitValuesList(kwargs):
         for dim in dims:
             s = slice(0, dim, 1)
             slices.append(s)
-        # make a one page list to handle the write in one chunk crawler run
+        #make a one page list to handle the write in one chunk crawler run
         # (larger write request should user binary streaming)
         kwargs = {"page_number": 0, "page": slices}
         kwargs["dset_json"] = dset_json
@@ -645,6 +670,18 @@ def _updateInitValuesList(kwargs):
         kwargs["data"] = input_arr
         # do write
         await doHyperslabWrite(app, request, **kwargs)
+        """
+    if init_chunks:
+        # write dataset init values using the Domain Crawler
+        log.debug(f"POST dataset - setting init values: {list(init_chunks.keys())}")
+        kwargs = {"action": "put_data", "bucket": bucket}
+
+        crawler = DomainCrawler(app, init_chunks, **kwargs)
+
+        # will raise exception on not found, server busy, etc.
+        await crawler.crawl()
+        status = crawler.get_status()
+        log.info(f"DomainCrawler done for put_data action, status: {status}")
 
     if "objects" in post_rsp:
         # add any links in multi request
diff --git a/hsds/link_sn.py b/hsds/link_sn.py
index 8b90af12..94139ce5 100755
--- a/hsds/link_sn.py
+++ b/hsds/link_sn.py
@@ -336,12 +336,12 @@ async def PUT_Links(request):
         msg = "Unable to load JSON body"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    
+
     if not body:
         msg = "PUT links with empty body"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    
+
     if not isinstance(body, dict):
         msg = f"PUT links expected dictionary body but got: {type(body)}"
         log.warn(msg)

From cda56cf819767e94fdfc971bbab2c27be1d3f87f Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 23 Jul 2025 11:08:45 +0100
Subject: [PATCH 35/49] fix for hang in DomainCrawler put_data handler

---
 hsds/domain_crawl.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/hsds/domain_crawl.py b/hsds/domain_crawl.py
index 5c1fe13a..15dc2c6d 100644
--- a/hsds/domain_crawl.py
+++ b/hsds/domain_crawl.py
@@ -442,11 +442,10 @@ async def put_data(self, chunk_id, arr):
         data = arrayToBytes(arr)
 
         log.debug(f"DomainCrawler - put_data req: {req}, {len(data)} bytes")
-
         try:
-            # TBD: setup an http client?
-            await http_put(self._app, req, data=data, params=params, client=None)
-            log.debug("http_put return")
+            rsp = await http_put(self._app, req, data=data, params=params)
+            log.debug(f"http_put return: {rsp}")
+            status = 200
         except HTTPConflict:
             log.warn("DomainCrawler - got HTTPConflict from http_put")
             status = 409
@@ -459,6 +458,8 @@ async def put_data(self, chunk_id, arr):
         except Exception as e:
             log.error(f"unexpected exception {e}")
             status = 500
+        finally:
+            log.debug("DomainCrawler put_data end try")
 
         log.debug(f"DomainCrawler put_data for {chunk_id} - returning status: {status}")
         self._obj_dict[chunk_id] = {"status": status}

From 5a2d4d682ba4ad798d179935bad99c843f2b9474 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 23 Jul 2025 12:50:15 +0100
Subject: [PATCH 36/49] reduce log verbosity

---
 hsds/domain_crawl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hsds/domain_crawl.py b/hsds/domain_crawl.py
index 15dc2c6d..19eee5df 100644
--- a/hsds/domain_crawl.py
+++ b/hsds/domain_crawl.py
@@ -600,7 +600,7 @@ async def fetch(self, obj_id):
                 log.error(f"couldn't find {obj_id} in self._objs")
                 return
             data = self._objs[obj_id]
-            log.debug(f"got {data} data for {obj_id}")
+            log.debug(f"got {len(data)} data for {obj_id}")
 
             await self.put_data(obj_id, data)
         else:

From 053395c5bf41238cc6d157ca7846bfcf9609dcd1 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 29 Jul 2025 11:10:16 +0100
Subject: [PATCH 37/49] fix for regression with h5pyd master branch

---
 hsds/dset_sn.py         | 20 +-----------------
 hsds/group_sn.py        |  1 +
 hsds/servicenode_lib.py | 45 ++++++++++++++++++++++-------------------
 3 files changed, 26 insertions(+), 40 deletions(-)

diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index b5c912c8..84a983c9 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -652,25 +652,7 @@ def _updateInitValuesList(kwargs):
             raise HTTPBadRequest(reason=msg)
         log.debug(f"got json arr: {input_arr.shape}")
         init_chunks[chunk_id] = input_arr
-        """
-        # write data if provided
-        log.debug(f"write input_arr: {input_arr}")
-        # make selection for entire dataspace
-        dims = getShapeDims(shape_json)
-        slices = []
-        for dim in dims:
-            s = slice(0, dim, 1)
-            slices.append(s)
-        #make a one page list to handle the write in one chunk crawler run
-        # (larger write request should user binary streaming)
-        kwargs = {"page_number": 0, "page": slices}
-        kwargs["dset_json"] = dset_json
-        kwargs["bucket"] = bucket
-        kwargs["select_dtype"] = input_arr.dtype
-        kwargs["data"] = input_arr
-        # do write
-        await doHyperslabWrite(app, request, **kwargs)
-        """
+
     if init_chunks:
         # write dataset init values using the Domain Crawler
         log.debug(f"POST dataset - setting init values: {list(init_chunks.keys())}")
diff --git a/hsds/group_sn.py b/hsds/group_sn.py
index 1011b883..991b50bd 100755
--- a/hsds/group_sn.py
+++ b/hsds/group_sn.py
@@ -233,6 +233,7 @@ async def POST_Group(request):
         kwargs = {"root_id": root_id, "bucket": bucket}
 
     if post_rsp is None:
+        log.debug(f"post_rsp is None, call createGroup with kwargs: {kwargs}")
         # Handle cases other than multi-group create here
         if "type" in kwargs:
             msg = "type key is not allowed for Group creation"
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 65214aa0..a36eaeac 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -1357,6 +1357,8 @@ def getCreateArgs(body,
     """ get args for createObject from request body """
 
     log.debug(f"getCreateArgs with body keys: {list(body.keys())}")
+    if ignore_link:
+        log.debug("getCreateArgs, ignore_link is set")
 
     kwargs = {"bucket": bucket}
     predate_max_time = config.get("predate_max_time", default=10.0)
@@ -1365,9 +1367,29 @@ def getCreateArgs(body,
     obj_id = None
     h5path = None
 
+    if "parent_id" not in body:
+        parent_id = root_id
+    else:
+        parent_id = body["parent_id"]
+
+    if "h5path" in body:
+        h5path = body["h5path"]
+        # normalize the h5path
+        if h5path.startswith("/"):
+            if parent_id == root_id:
+                # just adjust the path to be relative
+                h5path = h5path[1:]
+            else:
+                msg = f"PostCrawler expecting relative h5path, but got: {h5path}"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+
+        if h5path.endswith("/"):
+            h5path = h5path[:-1]  # makes iterating through the links a bit easier
+
     if "link" in body:
         if "h5path" in body:
-            msg = "link can't be used with h5path"
+            msg = "'link' key in body can't be used with h5path"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
         # if ignore_link is set, parent_links will be created post object creation
@@ -1389,26 +1411,7 @@ def getCreateArgs(body,
             log.debug(f"parent id: {parent_id}, link_title: {link_title}")
             if not ignore_link:
                 h5path = link_title  # just use the link name as the h5path
-
-    if "parent_id" not in body:
-        parent_id = root_id
-    else:
-        parent_id = body["parent_id"]
-
-    if "h5path" in body:
-        h5path = body["h5path"]
-        # normalize the h5path
-        if h5path.startswith("/"):
-            if parent_id == root_id:
-                # just adjust the path to be relative
-                h5path = h5path[1:]
-            else:
-                msg = f"PostCrawler expecting relative h5path, but got: {h5path}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-        if h5path.endswith("/"):
-            h5path = h5path[:-1]  # makes iterating through the links a bit easier
+                log.debug(f"set h5path to {link_title}")
 
     if parent_id and h5path:
         # these are used by createObjectByPath

From 78127f11c989480d443b5d1fb0528b01cf3b2e29 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Mon, 8 Sep 2025 17:22:51 +0100
Subject: [PATCH 38/49] enable client-based timestamps for attribute and link
 creation

---
 admin/config/config.yml  |   1 +
 hsds/attr_dn.py          |  18 ++-
 hsds/link_dn.py          |  16 ++-
 hsds/link_sn.py          |   3 +
 hsds/servicenode_lib.py  |   9 +-
 tests/integ/attr_test.py |  61 +++++++++
 tests/integ/link_test.py | 266 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 365 insertions(+), 9 deletions(-)

diff --git a/admin/config/config.yml b/admin/config/config.yml
index 4b9f40d5..a4303edd 100755
--- a/admin/config/config.yml
+++ b/admin/config/config.yml
@@ -91,6 +91,7 @@ max_rangeget_gap: 1024 # max gap in byte for intelligent range get requests
 predate_maxtime: 10.0 # max delta between object created timestamp in request and actual time
 posix_delay: 0.0  # delay for POSIX IO operations for simulating cloud storage latencies
 max_compact_dset_size: 65536  # size in bytes for maximum compact storage size
+max_timestamp_drift: 300  # number of seconds a client-based timestamp can differ from current time
 # DEPRECATED - the remaining config values are not used in currently but kept for backward compatibility with older container images
 aws_lambda_chunkread_function: null # name of aws lambda function for chunk reading
 aws_lambda_threshold: 4 # number of chunks per node per request to reach before using lambda
diff --git a/hsds/attr_dn.py b/hsds/attr_dn.py
index cb002623..3c640c3f 100755
--- a/hsds/attr_dn.py
+++ b/hsds/attr_dn.py
@@ -12,7 +12,6 @@
 #
 # attribute handling routines
 #
-import time
 from bisect import bisect_left
 
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPConflict, HTTPNotFound, HTTPGone
@@ -28,6 +27,8 @@
 from .util.dsetUtil import getShapeDims
 from .util.domainUtil import isValidBucketName
 from .datanode_lib import get_obj_id, get_metadata_obj, save_metadata_obj
+from .util.timeUtil import getNow
+from . import config
 from . import hsds_logger as log
 
 
@@ -362,6 +363,8 @@ async def PUT_Attributes(request):
     params = request.rel_url.query
     log.debug(f"got PUT_Attributes params: {params}")
     obj_id = get_obj_id(request)
+    now = getNow(app)
+    max_timestamp_drift = int(config.get("max_timestamp_drift", default=300))
 
     if not request.has_body:
         log.error("PUT_Attribute with no body")
@@ -459,11 +462,18 @@ async def PUT_Attributes(request):
 
     attributes = obj_json["attributes"]
 
-    create_time = time.time()
     # check for conflicts
     new_attributes = set()  # attribute names that are new or replacements
     for attr_name in items:
         attribute = items[attr_name]
+        if attribute.get("created"):
+            create_time = attribute["created"]
+            log.debug(f"attribute {attr_name} has create time: {create_time}")
+            if abs(create_time - now) > max_timestamp_drift:
+                log.warn(f"attribute {attr_name} create time stale, ignoring")
+                create_time = now
+        else:
+            create_time = now
         if attr_name in attributes:
             log.debug(f"attribute {attr_name} exists")
             old_item = attributes[attr_name]
@@ -511,7 +521,7 @@ async def PUT_Attributes(request):
 
     if new_attributes:
         # update the obj lastModified
-        now = time.time()
+        now = getNow(app)
         obj_json["lastModified"] = now
         # write back to S3, save to metadata cache
         await save_metadata_obj(app, obj_id, obj_json, bucket=bucket)
@@ -610,7 +620,7 @@ async def DELETE_Attributes(request):
 
     if save_obj:
         # update the object lastModified
-        now = time.time()
+        now = getNow(app)
         obj_json["lastModified"] = now
         await save_metadata_obj(app, obj_id, obj_json, bucket=bucket)
 
diff --git a/hsds/link_dn.py b/hsds/link_dn.py
index 1ad6133e..a35acf17 100755
--- a/hsds/link_dn.py
+++ b/hsds/link_dn.py
@@ -27,6 +27,7 @@
 from .util.domainUtil import isValidBucketName
 from .util.timeUtil import getNow
 from .datanode_lib import get_obj_id, get_metadata_obj, save_metadata_obj
+from . import config
 from . import hsds_logger as log
 
 
@@ -285,6 +286,8 @@ async def PUT_Links(request):
     params = request.rel_url.query
     group_id = get_obj_id(request)
     log.info(f"PUT links: {group_id}")
+    now = getNow(app)
+    max_timestamp_drift = int(config.get("max_timestamp_drift", default=300))
 
     if not isValidUuid(group_id, obj_class="groups"):
         log.error(f"Unexpected group_id: {group_id}")
@@ -365,11 +368,16 @@ async def PUT_Links(request):
         link_delete_set = deleted_links[group_id]
     else:
         link_delete_set = set()
-
-    create_time = getNow(app)
-
     for title in new_links:
         item = items[title]
+        if item.get("created"):
+            create_time = item["created"]
+            log.debug(f"link {title} has create time: {create_time}")
+            if abs(create_time - now) > max_timestamp_drift:
+                log.warn(f"link {title} create time stale, ignoring")
+                create_time = now
+        else:
+            create_time = now
         item["created"] = create_time
         links[title] = item
         log.debug(f"added link {title}: {item}")
@@ -378,7 +386,7 @@ async def PUT_Links(request):
 
     if new_links:
         # update the group lastModified
-        group_json["lastModified"] = create_time
+        group_json["lastModified"] = now
 
         # write back to S3, save to metadata cache
         await save_metadata_obj(app, group_id, group_json, bucket=bucket)
diff --git a/hsds/link_sn.py b/hsds/link_sn.py
index 94139ce5..2048dd7c 100755
--- a/hsds/link_sn.py
+++ b/hsds/link_sn.py
@@ -299,6 +299,9 @@ async def PUT_Link(request):
     kwargs["tgt_id"] = body.get("id")
     kwargs["h5path"] = body.get("h5path")
     kwargs["h5domain"] = body.get("h5domain")
+    created = body.get("created")
+    if created:
+        kwargs["created"] = created
 
     status = await putLink(app, group_id, link_title, **kwargs)
 
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index a36eaeac..3e70a71a 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -448,7 +448,12 @@ async def getLink(app, group_id, title, bucket=None):
     return link_json
 
 
-async def putLink(app, group_id, title, tgt_id=None, h5path=None, h5domain=None, bucket=None):
+async def putLink(app, group_id, title,
+                  tgt_id=None,
+                  h5path=None,
+                  h5domain=None,
+                  bucket=None,
+                  created=None):
     """ create a new link.  Return 201 if this is a new link,
     or 200 if it's a duplicate of an existing link. """
 
@@ -469,6 +474,8 @@ async def putLink(app, group_id, title, tgt_id=None, h5path=None, h5domain=None,
         link_json["h5path"] = h5path
     if h5domain:
         link_json["h5domain"] = h5domain
+    if created:
+        link_json["created"] = created
 
     try:
         link_class = getLinkClass(link_json)
diff --git a/tests/integ/attr_test.py b/tests/integ/attr_test.py
index de54c5ea..e986a464 100644
--- a/tests/integ/attr_test.py
+++ b/tests/integ/attr_test.py
@@ -12,6 +12,7 @@
 from copy import copy
 import unittest
 import json
+import time
 import numpy as np
 import base64
 import helper
@@ -500,6 +501,66 @@ def testPutFixedString(self):
         self.assertTrue("length" in type_json)
         self.assertEqual(type_json["length"], 7)
 
+    def testUseTimestamp(self):
+        # Test PUT value for 1d attribute with timestamp included
+        print("testUseTimestamp", self.base_domain)
+
+        headers = helper.getRequestHeaders(domain=self.base_domain)
+        req = self.endpoint + "/"
+
+        # Get root uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        def _create_attr(attr_name, ts=None):
+
+            # create attr
+            fixed_str_type = {
+                "charSet": "H5T_CSET_ASCII",
+                "class": "H5T_STRING",
+                "length": 12,
+                "strPad": "H5T_STR_NULLPAD",
+            }
+            data = {"type": fixed_str_type, "value": "XYZ"}
+            if ts:
+                data["created"] = ts
+            req = self.endpoint + "/groups/" + root_uuid + "/attributes/" + attr_name
+            rsp = self.session.put(req, data=json.dumps(data), headers=headers)
+            self.assertEqual(rsp.status_code, 201)
+
+        def _check_attr_ts(attr_name, min_ts=None, max_ts=None):
+            # read attr
+            req = self.endpoint + "/groups/" + root_uuid + "/attributes/" + attr_name
+            rsp = self.session.get(req, headers=headers)
+            self.assertEqual(rsp.status_code, 200)
+            rspJson = json.loads(rsp.text)
+            self.assertTrue("hrefs" in rspJson)
+            self.assertTrue("value" in rspJson)
+            self.assertEqual(rspJson["value"], "XYZ")
+            self.assertTrue("type" in rspJson)
+            self.assertTrue("created" in rspJson)
+            if min_ts:
+                self.assertGreaterEqual(rspJson["created"], min_ts)
+            if max_ts:
+                self.assertLessEqual(rspJson["created"], max_ts)
+
+        now = time.time()
+        # server-based timestamp
+        _create_attr("a1")
+        _check_attr_ts("a1", min_ts=(now - 1), max_ts=(now + 1))
+        # client assigned timestamp
+        _create_attr("a2", ts=now)
+        _check_attr_ts("a2", min_ts=now, max_ts=now)
+        # client assigned with small time-skew, ok
+        _create_attr("a3", ts=int(now))
+        _check_attr_ts("a3", min_ts=int(now), max_ts=int(now))
+        # client assigned with large time-skew, ignored
+        _create_attr("a4", ts=999)
+        _check_attr_ts("a4", min_ts=now, max_ts=(now + 1))
+
     def testPutFixedStringNullTerm(self):
         # Test PUT value for 1d attribute with fixed length string/null terminated types
         print("testPutFixedStringNullTerm", self.base_domain)
diff --git a/tests/integ/link_test.py b/tests/integ/link_test.py
index a796155f..a6142aaf 100755
--- a/tests/integ/link_test.py
+++ b/tests/integ/link_test.py
@@ -1630,6 +1630,220 @@ def testPutLinkMultiple(self):
                 else:
                     self.assertTrue(False)  # unexpected
 
+    def testPutLinkMultipleWithTimestamps(self):
+        domain = self.base_domain + "/testPutLinkMultipleWithTImestamps.h5"
+        helper.setupDomain(domain)
+        print("testPutLinkMultipleWithTimestamps", domain)
+        headers = helper.getRequestHeaders(domain=domain)
+        req = self.endpoint + "/"
+
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_id = rspJson["root"]
+
+        # create a group
+        req = self.endpoint + "/groups"
+        rsp = self.session.post(req, headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        grpA_id = rspJson["id"]
+        self.assertTrue(helper.validateId(grpA_id))
+
+        # link new obj as '/grpA'
+        req = self.endpoint + "/groups/" + root_id + "/links/grpA"
+        payload = {"id": grpA_id}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)  # created
+
+        # create some groups under grp1
+        grp_count = 3
+
+        grp_names = [f"grp{(i + 1):04d}" for i in range(grp_count)]
+        grp_ids = []
+
+        for grp_name in grp_names:
+            # create sub_groups
+            req = self.endpoint + "/groups"
+            rsp = self.session.post(req, headers=headers)
+            self.assertEqual(rsp.status_code, 201)
+            rspJson = json.loads(rsp.text)
+            grp_id = rspJson["id"]
+            self.assertTrue(helper.validateId(grp_id))
+            grp_ids.append(grp_id)
+
+        # create some links
+        links = {}
+        for i in range(grp_count):
+            title = grp_names[i]
+            if i % 2 == 0:
+                # create a hardlink implicitly
+                links[title] = {"id": grp_ids[i]}
+            else:
+                # for variety, create a hardlink by providing full link json
+                links[title] = {"class": "H5L_TYPE_HARD", "id": grp_ids[i]}
+
+        # add a soft and external link as well
+        links["softlink"] = {"h5path": "a_path"}
+        links["extlink"] = {"h5path": "another_path", "h5domain": "/a_domain"}
+        link_count = len(links)
+        # add timestamp
+        timestamps = set()
+        for title in links:
+            link = links[title]
+            now = time.time()
+            link["created"] = now
+            timestamps.add(now)
+
+        # write links to the grpA
+        data = {"links": links}
+        req = self.endpoint + "/groups/" + grpA_id + "/links"
+        rsp = self.session.put(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
+        # do a get on the links
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("links" in rspJson)
+        ret_links = rspJson["links"]
+        self.assertEqual(len(ret_links), link_count)
+        for link in ret_links:
+            self.assertTrue("title" in link)
+            title = link["title"]
+            self.assertTrue("class" in link)
+            link_class = link["class"]
+            if link_class == "H5L_TYPE_HARD":
+                self.assertTrue("id" in link)
+                self.assertTrue(link["id"] in grp_ids)
+                self.assertTrue(title in grp_names)
+            elif link_class == "H5L_TYPE_SOFT":
+                self.assertTrue("h5path" in link)
+                h5path = link["h5path"]
+                self.assertEqual(h5path, "a_path")
+            elif link_class == "H5L_TYPE_EXTERNAL":
+                self.assertTrue("h5path" in link)
+                h5path = link["h5path"]
+                self.assertEqual(h5path, "another_path")
+                self.assertTrue("h5domain" in link)
+                h5domain = link["h5domain"]
+                self.assertEqual(h5domain, "/a_domain")
+            else:
+                self.assertTrue(False)  # unexpected
+            self.assertTrue("created" in link)
+            self.assertTrue(link["created"] in timestamps)
+
+        # try writing again, should get 200 (no new links)
+        rsp = self.session.put(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+
+        # write some links to three group objects
+        links = {}
+        links["hardlink_multicast"] = {"id": root_id}
+        links["softlink_multicast"] = {"h5path": "multi_path"}
+        links["extlink_multicast"] = {"h5path": "multi_path", "h5domain": "/another_domain"}
+        link_count = len(links)
+        timestamps = set()
+        for title in links:
+            link = links[title]
+            now = time.time()
+            link["created"] = now
+            timestamps.add(now)
+
+        data = {"links": links, "grp_ids": grp_ids}
+        req = self.endpoint + "/groups/" + root_id + "/links"
+        rsp = self.session.put(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
+        # check that the links got created
+        for grp_id in grp_ids:
+            req = self.endpoint + "/groups/" + grp_id + "/links"
+            rsp = self.session.get(req, headers=headers)
+            self.assertEqual(rsp.status_code, 200)
+            rspJson = json.loads(rsp.text)
+            self.assertTrue("links" in rspJson)
+            ret_links = rspJson["links"]
+            self.assertEqual(len(ret_links), 3)
+            for ret_link in ret_links:
+                self.assertTrue("class" in ret_link)
+                link_class = ret_link["class"]
+                if link_class == "H5L_TYPE_HARD":
+                    self.assertTrue("id" in ret_link)
+                    self.assertEqual(ret_link["id"], root_id)
+                elif link_class == "H5L_TYPE_SOFT":
+                    self.assertTrue("h5path" in ret_link)
+                    self.assertEqual(ret_link["h5path"], "multi_path")
+                elif link_class == "H5L_TYPE_EXTERNAL":
+                    self.assertTrue("h5path" in ret_link)
+                    self.assertEqual(ret_link["h5path"], "multi_path")
+                    self.assertTrue("h5domain" in ret_link)
+                    self.assertEqual(ret_link["h5domain"], "/another_domain")
+                else:
+                    self.assertTrue(False)  # unexpected
+                self.assertTrue("created" in ret_link)
+                self.assertTrue(ret_link["created"] in timestamps)
+
+        # write different links to three group objects
+        link_data = {}
+        timestamps = set()
+        for i in range(grp_count):
+            grp_id = grp_ids[i]
+            links = {}
+            links[f"hardlink_{i}"] = {"id": root_id}
+            links[f"softlink_{i}"] = {"h5path": f"multi_path_{i}"}
+            ext_link = {"h5path": f"multi_path_{i}", "h5domain": f"/another_domain/{i}"}
+            links[f"extlink_{i}"] = ext_link
+            for title in links:
+                link = links[title]
+                now = time.time()
+                link["created"] = now
+                timestamps.add(now)
+            link_data[grp_id] = {"links": links}
+
+        data = {"grp_ids": link_data}
+        req = self.endpoint + "/groups/" + root_id + "/links"
+        rsp = self.session.put(req, data=json.dumps(data), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
+        # check that the new links got created
+        for i in range(grp_count):
+            grp_id = grp_ids[i]
+            titles = [f"hardlink_{i}", f"softlink_{i}", f"extlink_{i}", ]
+            data = {"titles": titles}
+            # do a post to just return the links we are interested in
+            req = self.endpoint + "/groups/" + grp_id + "/links"
+            rsp = self.session.post(req, data=json.dumps(data), headers=headers)
+            self.assertEqual(rsp.status_code, 200)
+            rspJson = json.loads(rsp.text)
+            self.assertTrue("links" in rspJson)
+            ret_links = rspJson["links"]
+            self.assertEqual(len(ret_links), len(titles))
+            for j in range(len(titles)):
+                ret_link = ret_links[j]
+                self.assertTrue("class" in ret_link)
+                link_class = ret_link["class"]
+                self.assertTrue("title" in ret_link)
+                link_title = ret_link["title"]
+                if link_class == "H5L_TYPE_HARD":
+                    self.assertEqual(link_title, f"hardlink_{i}")
+                    self.assertTrue("id" in ret_link)
+                    self.assertEqual(ret_link["id"], root_id)
+                elif link_class == "H5L_TYPE_SOFT":
+                    self.assertEqual(link_title, f"softlink_{i}")
+                    self.assertTrue("h5path" in ret_link)
+                    self.assertEqual(ret_link["h5path"], f"multi_path_{i}")
+                elif link_class == "H5L_TYPE_EXTERNAL":
+                    self.assertEqual(link_title, f"extlink_{i}")
+                    self.assertTrue("h5path" in ret_link)
+                    self.assertEqual(ret_link["h5path"], f"multi_path_{i}")
+                    self.assertTrue("h5domain" in ret_link)
+                    self.assertEqual(ret_link["h5domain"], f"/another_domain/{i}")
+                else:
+                    self.assertTrue(False)  # unexpected
+                self.assertTrue("created" in ret_link)
+                self.assertTrue(ret_link["created"] in timestamps)
+                print(timestamps)
+
     def testDeleteLinkMultiple(self):
         domain = self.base_domain + "/testDeleteLinkMultiple.h5"
         helper.setupDomain(domain)
@@ -1805,6 +2019,58 @@ def testLinkCreationOrder(self):
             self.assertEqual(prev_link['title'], sorted(link_names)[i])
             self.assertEqual(link['title'], sorted(link_names)[i + 1])
 
+    def testUseTimestamp(self):
+        # Test PUT value for link  with timestamp included
+        domain = self.base_domain + "/testLinkUseTimestamp.h5"
+
+        helper.setupDomain(domain)
+        print("testUseTimestamp", domain)
+        headers = helper.getRequestHeaders(domain=domain)
+        req = helper.getEndpoint() + "/"
+
+        # Get root uuid
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        root_uuid = rspJson["root"]
+        helper.validateId(root_uuid)
+
+        def _create_link(title, ts=None):
+            # create link
+            req = helper.getEndpoint() + f"/groups/{root_uuid}/links/{title}"
+            body = {"h5path": "some_path"}
+            if ts:
+                body["created"] = ts
+            rsp = self.session.put(req, data=json.dumps(body), headers=headers)
+            self.assertEqual(rsp.status_code, 201)
+
+        def _check_link_ts(title, min_ts=None, max_ts=None):
+            # read link
+            req = helper.getEndpoint() + f"/groups/{root_uuid}/links/{title}"
+            rsp = self.session.get(req, headers=headers)
+            self.assertEqual(rsp.status_code, 200)
+            rspJson = json.loads(rsp.text)
+            self.assertTrue("hrefs" in rspJson)
+            self.assertTrue("created" in rspJson)
+            if min_ts:
+                self.assertGreaterEqual(rspJson["created"], min_ts)
+            if max_ts:
+                self.assertLessEqual(rspJson["created"], max_ts)
+
+        now = time.time()
+        # server-based timestamp
+        _create_link("a1", ts=None)
+        _check_link_ts("a1", min_ts=(now - 1), max_ts=(now + 1))
+        # client assigned timestamp
+        _create_link("a2", ts=now)
+        _check_link_ts("a2", min_ts=now, max_ts=now)
+        # client assigned with small time-skew, ok
+        _create_link("a3", ts=int(now))
+        _check_link_ts("a3", min_ts=int(now), max_ts=int(now))
+        # client assigned with large time-skew, ignored
+        _create_link("a4", ts=999)
+        _check_link_ts("a4", min_ts=now, max_ts=(now + 1))
+
 
 if __name__ == "__main__":
     # setup test files

From f96b34c4dac935846080ec6da39e89480cd4c8d5 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 9 Sep 2025 10:46:01 +0100
Subject: [PATCH 39/49] remove python 3.9 from .git workflow

---
 .github/workflows/python-package.yml | 2 +-
 pyproject.toml                       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 7e24ec14..5175ea54 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -17,7 +17,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-22.04, ubuntu-latest, windows-latest]
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12"]
         build-method: ["manual", "docker"]
 
     runs-on: ${{ matrix.os }}
diff --git a/pyproject.toml b/pyproject.toml
index 3f1dc4de..8e260cda 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,7 @@ classifiers = [
     "Topic :: Database",
     "Topic :: Software Development :: Libraries :: Python Modules",
 ]
-requires-python = ">=3.8"
+requires-python = ">=3.10"
 version = "0.9.2"
 
 dependencies = [
@@ -45,7 +45,7 @@ dependencies = [
     "h5json@git+https://github.com/HDFGroup/hdf5-json@abstract",
     "importlib_resources",
     "numcodecs <= 0.15.1",
-    "numpy >=2.0.0rc1; python_version>='3.9'",
+    "numpy >=2.0.0",
     "psutil",
     "pyjwt",
     "pytz",

From 03e413f4d5a3a890c5d83d97ff2fbd4e5bea7348 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 9 Sep 2025 10:53:53 +0100
Subject: [PATCH 40/49] adjust min time for time skew test

---
 tests/integ/link_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integ/link_test.py b/tests/integ/link_test.py
index a6142aaf..511bdf4e 100755
--- a/tests/integ/link_test.py
+++ b/tests/integ/link_test.py
@@ -2069,7 +2069,7 @@ def _check_link_ts(title, min_ts=None, max_ts=None):
         _check_link_ts("a3", min_ts=int(now), max_ts=int(now))
         # client assigned with large time-skew, ignored
         _create_link("a4", ts=999)
-        _check_link_ts("a4", min_ts=now, max_ts=(now + 1))
+        _check_link_ts("a4", min_ts=(now - 1), max_ts=(now + 1))
 
 
 if __name__ == "__main__":

From b6016e078229277b0a45ed87e775cd748593805b Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Wed, 29 Oct 2025 14:07:06 +0000
Subject: [PATCH 41/49] use hdf5-json util classes

---
 admin/config/config.yml      |    1 +
 hsds/async_lib.py            |    7 +-
 hsds/attr_dn.py              |   10 +-
 hsds/attr_sn.py              |    7 +-
 hsds/chunk_crawl.py          |    7 +-
 hsds/chunk_dn.py             |   11 +-
 hsds/chunk_sn.py             |   10 +-
 hsds/datanode_lib.py         |   69 ++-
 hsds/dset_dn.py              |   20 +-
 hsds/dset_lib.py             |   23 +-
 hsds/dset_sn.py              |    6 +-
 hsds/servicenode_lib.py      |  281 ++++-----
 hsds/util/dsetUtil.py        | 1034 +---------------------------------
 tests/integ/dataset_test.py  |  321 +++++------
 tests/unit/dset_util_test.py |  274 +--------
 15 files changed, 429 insertions(+), 1652 deletions(-)

diff --git a/admin/config/config.yml b/admin/config/config.yml
index a4303edd..cb46522a 100755
--- a/admin/config/config.yml
+++ b/admin/config/config.yml
@@ -47,6 +47,7 @@ flush_sleep_interval: 1 # time to wait between checking on dirty objects
 flush_timeout: 10 # max time to wait on all I/O operations to complete for a flush
 min_chunk_size: 1m # 1 MB
 max_chunk_size: 4m # 4 MB
+default_vlen_type_size: 128 # guess for average size of variable length types
 max_request_size: 100m # 100 MB - should be no smaller than client_max_body_size in nginx tmpl (if using nginx)
 max_chunks_per_folder: 0 # max number of chunks per s3 folder. 0 for unlimiited
 max_task_count: 100 # maximum number of concurrent tasks per node before server will return 503 error
diff --git a/hsds/async_lib.py b/hsds/async_lib.py
index 715e7985..9888d3dd 100755
--- a/hsds/async_lib.py
+++ b/hsds/async_lib.py
@@ -20,12 +20,15 @@
 from h5json.array_util import getNumElements, bytesToArray
 from h5json.objid import isValidUuid, isSchema2Id, getS3Key, isS3ObjKey
 from h5json.objid import getObjId, isValidChunkId, getCollectionForId
+from h5json.filters import getFilters
+from h5json.shape_util import getShapeDims
+from h5json.dset_util import getDatasetLayoutClass, getDatasetLayout, getChunkDims
 
 from .util.chunkUtil import getDatasetId, getNumChunks, ChunkIterator
-from .util.dsetUtil import getHyperslabSelection, getFilterOps, getChunkDims, getFilters
-from .util.dsetUtil import getDatasetLayoutClass, getDatasetLayout, getShapeDims
+from .util.dsetUtil import getHyperslabSelection
 from .util.storUtil import getStorKeys, putStorJSONObj, getStorJSONObj
 from .util.storUtil import deleteStorObj, getStorBytes, isStorObj
+from .datanode_lib import getFilterOps
 from . import hsds_logger as log
 from . import config
 import time
diff --git a/hsds/attr_dn.py b/hsds/attr_dn.py
index 3c640c3f..43c04232 100755
--- a/hsds/attr_dn.py
+++ b/hsds/attr_dn.py
@@ -21,10 +21,10 @@
 from h5json.hdf5dtype import getItemSize, createDataType
 from h5json.array_util import arrayToBytes, jsonToArray, decodeData
 from h5json.array_util import bytesToArray, bytesArrayToList, getNumElements
+from h5json.shape_util import getShapeDims
 
 from .util.attrUtil import validateAttributeName, isEqualAttr
 from .util.globparser import globmatch
-from .util.dsetUtil import getShapeDims
 from .util.domainUtil import isValidBucketName
 from .datanode_lib import get_obj_id, get_metadata_obj, save_metadata_obj
 from .util.timeUtil import getNow
@@ -361,7 +361,7 @@ async def PUT_Attributes(request):
     log.request(request)
     app = request.app
     params = request.rel_url.query
-    log.debug(f"got PUT_Attributes params: {params}")
+    log.debug(f"got PUT_Attributes params: {dict(params)}")
     obj_id = get_obj_id(request)
     now = getNow(app)
     max_timestamp_drift = int(config.get("max_timestamp_drift", default=300))
@@ -371,7 +371,7 @@ async def PUT_Attributes(request):
         raise HTTPBadRequest(message="body expected")
 
     body = await request.json()
-    log.debug(f"got body: {body}")
+    log.debug(f"PUT_Attributes got body: {body}")
     if "bucket" in params:
         bucket = params["bucket"]
     elif "bucket" in body:
@@ -440,8 +440,8 @@ async def PUT_Attributes(request):
             data = arr.tolist()
             try:
                 json_data = bytesArrayToList(data)
-                log.debug(f"converted encoded data to {json_data}")
-                if attr_shape["class"] == "H5S_SCALAR":
+                log.debug(f"converted encoded data to '{json_data}'")
+                if attr_shape["class"] == "H5S_SCALAR" and isinstance(json_data, list):
                     attr_json["value"] = json_data[0]  # just store the scalar
                 else:
                     attr_json["value"] = json_data
diff --git a/hsds/attr_sn.py b/hsds/attr_sn.py
index d7f05f75..44346929 100755
--- a/hsds/attr_sn.py
+++ b/hsds/attr_sn.py
@@ -22,6 +22,7 @@
 from h5json.array_util import jsonToArray, getNumElements
 from h5json.array_util import bytesToArray, arrayToBytes, decodeData, encodeData
 from h5json.objid import isValidUuid
+from h5json.shape_util import getShapeDims
 
 from .util.httpUtil import getAcceptType, jsonResponse, getHref, getBooleanParam
 from .util.globparser import globmatch
@@ -29,7 +30,6 @@
 from .util.domainUtil import getDomainFromRequest, isValidDomain
 from .util.domainUtil import getBucketForDomain, verifyRoot
 from .util.attrUtil import validateAttributeName, getRequestCollectionName
-from .util.dsetUtil import getShapeDims
 
 from .servicenode_lib import getDomainJson, getAttributeFromRequest, getAttributesFromRequest
 from .servicenode_lib import getAttributes, putAttributes, deleteAttributes, validateAction
@@ -358,7 +358,7 @@ async def PUT_Attribute(request):
     kwargs = {"bucket": bucket}
     if "replace" in params and params["replace"]:
         # allow attribute to be overwritten
-        log.debug("setting replace for PUT Atttribute")
+        log.debug("setting replace for PUT Attribute")
         kwargs["replace"] = True
     else:
         log.debug("replace is not set for PUT Attribute")
@@ -819,7 +819,7 @@ async def PUT_AttributeValue(request):
             log.debug("PUT AttributeValue - request_type is binary")
             request_type = "binary"
         elif "application/json" in content_type:
-            log.debug("PUT AttribueValue - request type is json")
+            log.debug("PUT AttributeValue - request type is json")
         else:
             msg = f"Unknown content_type: {content_type}"
             log.warn(msg)
@@ -896,6 +896,7 @@ async def PUT_AttributeValue(request):
     attr_body["value"] = data.decode("ascii")
     attr_body["encoding"] = "base64"
     attr_json = {attr_name: attr_body}
+    log.debug(f"putting attr {attr_name} to DN: {attr_json}")
 
     kwargs = {"bucket": bucket, "replace": True}
 
diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py
index 47b4b114..a92bdf36 100755
--- a/hsds/chunk_crawl.py
+++ b/hsds/chunk_crawl.py
@@ -27,12 +27,13 @@
 from h5json.hdf5dtype import createDataType
 from h5json.array_util import jsonToArray, getNumpyValue
 from h5json.array_util import getNumElements, arrayToBytes, bytesToArray
+from h5json.shape_util import getShapeDims
+from h5json.dset_util import getChunkDims
 
 from .util.nodeUtil import getDataNodeUrl, getNodeCount
 from .util.httpUtil import http_get, http_put, http_post, get_http_client
 from .util.httpUtil import isUnixDomainUrl
-from .util.dsetUtil import getSliceQueryParam, getShapeDims
-from .util.dsetUtil import getSelectionShape, getChunkLayout
+from .util.dsetUtil import getSliceQueryParam, getSelectionShape
 from .util.chunkUtil import getChunkCoverage, getDataCoverage
 from .util.chunkUtil import getChunkIdForPartition, getQueryDtype
 
@@ -108,7 +109,7 @@ async def write_chunk_hyperslab(
         log.debug(f"setting fields_param to: {fields_param}")
         params["fields"] = fields_param
 
-    layout = getChunkLayout(dset_json)
+    layout = getChunkDims(dset_json)
     log.debug(f"getChunkCoverage({chunk_id}, {slices}, {layout})")
     chunk_sel = getChunkCoverage(chunk_id, slices, layout)
     if chunk_sel is None:
diff --git a/hsds/chunk_dn.py b/hsds/chunk_dn.py
index 02545b85..839da5ac 100644
--- a/hsds/chunk_dn.py
+++ b/hsds/chunk_dn.py
@@ -23,11 +23,12 @@
 from h5json.hdf5dtype import createDataType, getSubType
 from h5json.array_util import bytesToArray, arrayToBytes, getBroadcastShape
 from h5json.objid import getS3Key, isValidUuid
+from h5json.shape_util import getShapeDims
+from h5json.dset_util import getChunkDims
 
 from .util.httpUtil import request_read, getContentType
 from .util.storUtil import isStorObj, deleteStorObj
-from .util.dsetUtil import getSelectionList, getChunkLayout, getShapeDims
-from .util.dsetUtil import getSelectionShape, getChunkInitializer
+from .util.dsetUtil import getSelectionList, getSelectionShape, getChunkInitializer
 from .util.chunkUtil import getChunkIndex, getDatasetId, chunkQuery
 from .util.chunkUtil import chunkWriteSelection, chunkReadSelection
 from .util.chunkUtil import chunkWritePoints, chunkReadPoints
@@ -131,7 +132,7 @@ async def PUT_Chunk(request):
     dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)
 
     # TBD - does this work with linked datasets?
-    dims = getChunkLayout(dset_json)
+    dims = getChunkDims(dset_json)
     rank = len(dims)
 
     type_json = dset_json["type"]
@@ -435,7 +436,7 @@ async def GET_Chunk(request):
     dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)
     shape_dims = getShapeDims(dset_json["shape"])
     log.debug(f"shape_dims: {shape_dims}")
-    dims = getChunkLayout(dset_json)
+    dims = getChunkDims(dset_json)
     log.debug(f"GET_Chunk - got dims: {dims}")
 
     # get chunk selection from query params
@@ -682,7 +683,7 @@ async def POST_Chunk(request):
 
     dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)
     log.debug(f"get_metadata_obj for {dset_id} returned {dset_json}")
-    dims = getChunkLayout(dset_json)
+    dims = getChunkDims(dset_json)
     rank = len(dims)
 
     type_json = dset_json["type"]
diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index 86d7539f..a1cd3d06 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -28,14 +28,14 @@
 from h5json.array_util import bytesArrayToList, jsonToArray, getNumElements, arrayToBytes
 from h5json.array_util import bytesToArray, squeezeArray, getBroadcastShape
 from h5json.objid import isValidUuid
+from h5json.shape_util import isNullSpace, isScalar, getShapeDims
+from h5json.dset_util import getChunkDims, isExtensible, getDsetMaxDims
 
 from .util.httpUtil import getHref, getAcceptType, getContentType
 from .util.httpUtil import request_read, jsonResponse, isAWSLambda
 from .util.domainUtil import getDomainFromRequest, isValidDomain
 from .util.domainUtil import getBucketForDomain
-from .util.dsetUtil import isNullSpace, isScalarSpace, get_slices, getShapeDims
-from .util.dsetUtil import isExtensible, getSelectionPagination
-from .util.dsetUtil import getSelectionShape, getDsetMaxDims, getChunkLayout
+from .util.dsetUtil import getSelectionShape, getSelectionPagination, get_slices
 from .util.authUtil import getUserPasswordFromRequest, validateUserPassword
 from .servicenode_lib import getDsetJson, validateAction
 from .dset_lib import getSelectionData, getParser, extendShape, doPointWrite, doHyperslabWrite
@@ -819,7 +819,7 @@ async def GET_Value(request):
     log.debug(f"dset shape: {dims}")
     rank = len(dims)
 
-    layout = getChunkLayout(dset_json)
+    layout = getChunkDims(dset_json)
     log.debug(f"chunk layout: {layout}")
 
     await validateAction(app, domain, dset_id, username, "read")
@@ -1103,7 +1103,7 @@ async def POST_Value(request):
         msg = "POST value not supported for datasets with NULL shape"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    if isScalarSpace(dset_json):
+    if isScalar(dset_json):
         msg = "POST value not supported for datasets with SCALAR shape"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
diff --git a/hsds/datanode_lib.py b/hsds/datanode_lib.py
index 48843a25..9bd2b0a5 100644
--- a/hsds/datanode_lib.py
+++ b/hsds/datanode_lib.py
@@ -20,11 +20,14 @@
 from aiohttp.web_exceptions import HTTPNotFound, HTTPForbidden
 from aiohttp.web_exceptions import HTTPServiceUnavailable, HTTPBadRequest
 
-from h5json.hdf5dtype import createDataType
+from h5json.hdf5dtype import createDataType, isVlen
 from h5json.array_util import arrayToBytes, bytesToArray, jsonToArray
+from h5json.filters import getFilters, getCompressionFilter, getShuffleFilter
 from h5json.objid import getS3Key, isValidUuid
 from h5json.objid import isValidChunkId, isSchema2Id
 from h5json.objid import getRootObjId, isRootObjId
+from h5json.shape_util import getShapeDims
+from h5json.dset_util import getChunkDims
 
 from .util.nodeUtil import getDataNodeUrl
 from .util.storUtil import getStorJSONObj, putStorJSONObj, putStorBytes
@@ -33,8 +36,7 @@
 from .util.domainUtil import isValidDomain, getBucketForDomain
 from .util.attrUtil import getRequestCollectionName
 from .util.httpUtil import http_post
-from .util.dsetUtil import getChunkLayout, getFilterOps, getShapeDims
-from .util.dsetUtil import getChunkInitializer, getSliceQueryParam, getFilters
+from .util.dsetUtil import getChunkInitializer, getSliceQueryParam
 from .util.chunkUtil import getDatasetId, getChunkSelection, getChunkIndex
 from .util.nodeUtil import validateInPartition
 from .util.rangegetUtil import ChunkLocation, chunkMunge, getHyperChunkIndex, getHyperChunkFactors
@@ -558,6 +560,54 @@ async def delete_metadata_obj(app, obj_id, notify=True, root_id=None, bucket=Non
     log.debug(f"delete_metadata_obj for {obj_id} done")
 
 
+def getFilterOps(app, dset_id, filters, dtype=None, chunk_shape=None):
+    """Get list of filter operations to be used for this dataset"""
+    filter_map = app["filter_map"]
+
+    if dset_id in filter_map:
+        return filter_map[dset_id]
+
+    compressionFilter = getCompressionFilter(filters)
+
+    filter_ops = {}
+
+    shuffleFilter = getShuffleFilter(filters)
+
+    if shuffleFilter and not isVlen(dtype):
+        shuffle_name = shuffleFilter["name"]
+        if shuffle_name == "shuffle":
+            filter_ops["shuffle"] = 1  # use regular shuffle
+        elif shuffle_name == "bitshuffle":
+            filter_ops["shuffle"] = 2  # use bitshuffle
+        else:
+            filter_ops["shuffle"] = 0  # no shuffle
+    else:
+        filter_ops["shuffle"] = 0  # no shuffle
+
+    if compressionFilter:
+        if compressionFilter["class"] == "H5Z_FILTER_DEFLATE":
+            filter_ops["compressor"] = "zlib"  # blosc compressor
+        else:
+            if "name" in compressionFilter:
+                filter_ops["compressor"] = compressionFilter["name"]
+            else:
+                filter_ops["compressor"] = "lz4"  # default to lz4
+        if "level" not in compressionFilter:
+            filter_ops["level"] = 5  # medium level
+        else:
+            filter_ops["level"] = int(compressionFilter["level"])
+
+    if filter_ops:
+        # save the chunk shape and dtype
+        filter_ops["chunk_shape"] = chunk_shape
+        filter_ops["dtype"] = dtype
+        filter_map[dset_id] = filter_ops  # save
+
+        return filter_ops
+    else:
+        return None
+
+
 def arange_chunk_init(
     app,
     initializer,
@@ -588,9 +638,8 @@ def arange_chunk_init(
         log.warn(msg)
         raise None
 
-    try:
-        chunk_layout = getChunkLayout(dset_json)
-    except HTTPInternalServerError:
+    chunk_layout = getChunkDims(dset_json)
+    if chunk_layout is None:
         msg = "non-chunked dataset"
         log.warning(msg)
         raise None
@@ -714,7 +763,7 @@ async def run_chunk_initializer(
     dims = getShapeDims(datashape)
     log.debug(f"dataset shape: {dims}")
     # get the chunk layout for this dataset
-    layout = getChunkLayout(dset_json)
+    layout = getChunkDims(dset_json)
     log.debug(f"chunk layout: {layout}")
 
     rank = len(dims)
@@ -1008,12 +1057,12 @@ async def get_chunk(
     log.debug(msg)
 
     chunk_arr = None
-    dims = getChunkLayout(dset_json)
+    dims = getChunkDims(dset_json)
     type_json = dset_json["type"]
     dt = createDataType(type_json)
     layout_json = dset_json["layout"]
     layout_class = layout_json.get("class")
-    chunk_dims = getChunkLayout(dset_json)
+    chunk_dims = getChunkDims(dset_json)
     fill_value = getFillValue(dset_json)
 
     # note - officially we should follow the order in which the filters are
@@ -1167,7 +1216,7 @@ def save_chunk(app, chunk_id, dset_json, chunk_arr, bucket=None):
 
     dset_id = dset_json["id"]
     dtype = createDataType(dset_json["type"])
-    chunk_shape = getChunkLayout(dset_json)
+    chunk_shape = getChunkDims(dset_json)
 
     # will store filter options into app['filter_map']
     filters = getFilters(dset_json)
diff --git a/hsds/dset_dn.py b/hsds/dset_dn.py
index b2b640e4..7b2029f8 100755
--- a/hsds/dset_dn.py
+++ b/hsds/dset_dn.py
@@ -57,9 +57,12 @@ async def GET_Dataset(request):
     resp_json["shape"] = dset_json["shape"]
     resp_json["attributeCount"] = len(dset_json["attributes"])
     if "creationProperties" in dset_json:
-        resp_json["creationProperties"] = dset_json["creationProperties"]
+        cpl = dset_json["creationProperties"]
+    else:
+        cpl = {}
     if "layout" in dset_json:
-        resp_json["layout"] = dset_json["layout"]
+        cpl["layout"] = dset_json["layout"]
+    resp_json["creationProperties"] = cpl
     if "include_attrs" in params and params["include_attrs"]:
         resp_json["attributes"] = dset_json["attributes"]
 
@@ -133,9 +136,8 @@ async def POST_Dataset(request):
         raise HTTPInternalServerError()
     shape_json = body["shape"]
 
-    layout = None
     if "layout" in body:
-        layout = body["layout"]  # client specified chunk layout
+        log.error("unexpected key for POST Dataset: 'layout'")
 
     # ok - all set, create dataset obj
     now = getNow(app)
@@ -160,9 +162,10 @@ async def POST_Dataset(request):
     }
 
     if "creationProperties" in body:
-        dset_json["creationProperties"] = body["creationProperties"]
-    if layout is not None:
-        dset_json["layout"] = layout
+        cpl = body["creationProperties"]
+    else:
+        cpl = {}
+    dset_json["creationProperties"] = cpl
 
     kwargs = {"bucket": bucket, "notify": True, "flush": True}
     await save_metadata_obj(app, dset_id, dset_json, **kwargs)
@@ -175,8 +178,7 @@ async def POST_Dataset(request):
     resp_json["shape"] = shape_json
     resp_json["lastModified"] = dset_json["lastModified"]
     resp_json["attributeCount"] = len(attrs)
-    if layout is not None:
-        resp_json["layout"] = layout
+    resp_json["creationProperties"] = cpl
 
     resp = json_response(resp_json, status=201)
     log.response(request, resp=resp)
diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py
index fc1d3626..384defe7 100755
--- a/hsds/dset_lib.py
+++ b/hsds/dset_lib.py
@@ -23,11 +23,12 @@
 from h5json.hdf5dtype import createDataType, getItemSize, getDtypeItemSize
 from h5json.array_util import getNumpyValue, bytesToArray
 from h5json.objid import isSchema2Id, getS3Key, getObjId
+from h5json.shape_util import isNullSpace, getShapeDims
+from h5json.dset_util import getChunkDims, getDatasetLayout, getDatasetLayoutClass
 
 from .util.nodeUtil import getDataNodeUrl
 from .util.boolparser import BooleanParser
-from .util.dsetUtil import isNullSpace, getDatasetLayout, getDatasetLayoutClass, get_slices
-from .util.dsetUtil import getShapeDims, getSelectionShape, getChunkLayout
+from .util.dsetUtil import get_slices, getSelectionShape
 from .util.chunkUtil import getChunkCoordinate, getChunkIndex, getChunkSuffix
 from .util.chunkUtil import getNumChunks, getChunkIds, getChunkId
 from .util.chunkUtil import getChunkCoverage, getDataCoverage
@@ -370,7 +371,7 @@ def get_chunk_selections(chunk_map, chunk_ids, slices, dset_json):
         log.debug("no slices set, returning")
         return  # nothing to do
     log.debug(f"slices: {slices}")
-    layout = getChunkLayout(dset_json)
+    layout = getChunkDims(dset_json)
     for chunk_id in chunk_ids:
         if chunk_id in chunk_map:
             item = chunk_map[chunk_id]
@@ -448,7 +449,7 @@ async def getSelectionData(
         log.error("getSelectionData - expected either slices or points to be set")
         raise HTTPInternalServerError()
 
-    layout = getChunkLayout(dset_json)
+    layout = getChunkDims(dset_json)
 
     chunkinfo = {}
 
@@ -861,7 +862,15 @@ async def reduceShape(app, dset_json, shape_update, bucket=None):
         arr = np.zeros([1], dtype=dt, order="C")
 
     # and the chunk layout
-    layout = tuple(getChunkLayout(dset_json))
+    layout = getChunkDims(dset_json)
+    if not layout:
+        layout = dset_json.get("layout")  # older storage version put layout here
+        if layout:
+            log.warn(f"got layout for {dset_id} from dataset_json")
+    if not layout:
+        msg = f"no layout found for {dset_id}"
+        log.error(msg)
+        raise HTTPInternalServerError()
     log.debug(f"got layout: {layout}")
 
     # get all chunk ids for chunks that have been allocated
@@ -1073,7 +1082,7 @@ async def doPointWrite(app,
     num_points = len(points)
     log.debug(f"doPointWrite - num_points: {num_points}")
     dset_id = dset_json["id"]
-    layout = getChunkLayout(dset_json)
+    layout = getChunkDims(dset_json)
     datashape = dset_json["shape"]
     dims = getShapeDims(datashape)
     rank = len(dims)
@@ -1172,7 +1181,7 @@ async def doHyperslabWrite(app,
         log.error(msg)
         raise HTTPInternalServerError()
 
-    layout = getChunkLayout(dset_json)
+    layout = getChunkDims(dset_json)
 
     num_chunks = getNumChunks(page, layout)
     log.debug(f"num_chunks: {num_chunks}")
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 84a983c9..7a05999f 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -20,11 +20,13 @@
 from h5json.hdf5dtype import createDataType
 from h5json.array_util import getNumElements, jsonToArray
 from h5json.objid import isValidUuid, isSchema2Id
+from h5json.shape_util import getShapeDims
+from h5json.dset_util import getChunkDims, getDatasetLayoutClass
 
 from .util.httpUtil import getHref, respJsonAssemble
 from .util.httpUtil import jsonResponse, getBooleanParam
 from .util.chunkUtil import getChunkIds
-from .util.dsetUtil import getPreviewQuery, getShapeDims, getChunkLayout, getDatasetLayoutClass
+from .util.dsetUtil import getPreviewQuery
 from .util.authUtil import getUserPasswordFromRequest, aclCheck
 from .util.authUtil import validateUserPassword
 from .util.domainUtil import getDomainFromRequest, getPathForDomain, isValidDomain
@@ -620,7 +622,7 @@ def _updateInitValuesList(kwargs):
             msg = f"dataset init_data used with unsupported layout_class: {layout_class}"
             log.error(msg)
             raise HTTPInternalServerError()
-        layout_dims = getChunkLayout(dset_json)
+        layout_dims = getChunkDims(dset_json)
         log.debug(f"init data layout is: {layout_dims}")
         # make selection for entire dataspace
         dims = getShapeDims(dset_json["shape"])
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 3e70a71a..b26ba8ee 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -29,8 +29,12 @@
 from h5json.array_util import jsonToArray, getNumpyValue
 from h5json.objid import getCollectionForId, createObjId, getRootObjId
 from h5json.objid import isSchema2Id, getS3Key, isValidUuid
-from h5json.hdf5dtype import getBaseTypeJson, validateTypeItem, createDataType
-from h5json.hdf5dtype import getItemSize
+from h5json.hdf5dtype import getBaseTypeJson, validateTypeItem, createDataType, getItemSize
+from h5json.filters import getFiltersJson
+from h5json.shape_util import getShapeDims, getShapeClass
+from h5json.dset_util import guessChunk, getChunkSize
+from h5json.dset_util import validateChunkLayout, getDataSize, getDsetMaxDims
+from h5json.dset_util import LAYOUT_CLASSES
 
 from .util.nodeUtil import getDataNodeUrl
 from .util.authUtil import getAclKeys
@@ -39,10 +43,8 @@
 from .util.authUtil import aclCheck
 from .util.httpUtil import http_get, http_put, http_post, http_delete
 from .util.domainUtil import getBucketForDomain, verifyRoot, getLimits
+from .util.dsetUtil import getShapeJson
 from .util.storUtil import getCompressors
-from .util.dsetUtil import getShapeDims, getShapeJson, getFiltersJson, validateChunkLayout
-from .util.dsetUtil import getContiguousLayout, guessChunk, getChunkSize
-from .util.dsetUtil import expandChunk, shrinkChunk
 
 from .basenode import getVersion
 from . import hsds_logger as log
@@ -976,8 +978,7 @@ def getShapeFromRequest(body):
                     raise HTTPBadRequest(reason=msg)
             elif shape_class == "H5S_SCALAR":
                 shape_json["class"] = "H5S_SCALAR"
-                dims = getShapeDims(shape_body)
-                if len(dims) != 1 or dims[0] != 1:
+                if "dims" in shape_body:
                     msg = "dimensions aren't valid for scalar attribute"
                     log.warn(msg)
                     raise HTTPBadRequest(reason=msg)
@@ -1230,7 +1231,7 @@ async def putAttributes(app,
     req = getDataNodeUrl(app, obj_id)
     collection = getCollectionForId(obj_id)
     req += f"/{collection}/{obj_id}/attributes"
-    log.info(f"putAttribute: {req}")
+    log.info(f"putAttributes: {req}")
 
     params = {}
     if replace:
@@ -1304,7 +1305,7 @@ async def deleteObject(app, obj_id, bucket=None):
 def validateDatasetCreationProps(creation_props, type_json=None, shape=None):
     """ validate creation props """
 
-    log.debug(f"validateCreationProps: {creation_props}")
+    log.debug(f"validateDatasetCreationProps: {creation_props}")
     if "fillValue" in creation_props:
         if not type_json or not shape:
             msg = "shape and type must be set to use fillValue"
@@ -1344,12 +1345,16 @@ def validateDatasetCreationProps(creation_props, type_json=None, shape=None):
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
 
-        supported_filters = getSupportedFilters()
-        # will raise bad request exception if not valid
         supported_filters = getSupportedFilters(include_compressors=True)
         log.debug(f"supported_filters: {supported_filters}")
-        filters_out = getFiltersJson(creation_props, supported_filters=supported_filters)
-        # replace filters with our starndardized list
+        try:
+            filters_out = getFiltersJson(creation_props, supported_filters=supported_filters)
+        except (KeyError, ValueError):
+            # raise bad request exception if not valid
+            msg = "invalid filter provided"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        # replace filters with our standardized list
         log.debug(f"setting filters to: {filters_out}")
         creation_props["filters"] = filters_out
 
@@ -1536,6 +1541,16 @@ def getDatasetCreateArgs(body,
 
     # will return scalar shape if no shape key in body
     shape_json = getShapeJson(body)
+    try:
+        shape_class = getShapeClass(shape_json)
+        shape_dims = getShapeDims(shape_json)
+    except (KeyError, TypeError, ValueError):
+        msg = f"Invalid shape: {shape_json}"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
+
+    log.debug(f"got createArgs: {list(kwargs.keys())}")
+
     kwargs["shape"] = shape_json
 
     # get layout for dataset creation
@@ -1545,6 +1560,8 @@ def getDatasetCreateArgs(body,
     max_chunk_size = int(config.get("max_chunk_size"))
     type_json = kwargs["type"]
     item_size = getItemSize(type_json)
+    if item_size == "H5T_VARIABLE":
+        item_size = config.get("default_vlen_type_size", default=128)
     creation_props = kwargs["creation_props"]
     layout_props = None
 
@@ -1558,125 +1575,129 @@ def getDatasetCreateArgs(body,
                 msg = f"invalid chunk layout: {layout_props}"
                 log.warn(msg)
                 raise HTTPBadRequest(reason=msg)
+    else:
+        creation_props = {}
 
-    # TBD: check for invalid layout class...
+    layout_class = None
+    chunk_dims = None
     if layout_props:
-        if layout_props["class"] == "H5D_CONTIGUOUS":
-            # treat contiguous as chunked
-            layout_class = "H5D_CHUNKED"
-        else:
-            layout_class = layout_props["class"]
-    elif shape_json["class"] != "H5S_NULL":
-        layout_class = "H5D_CHUNKED"
-    else:
-        layout_class = None
-    log.debug(f"using layout_class: {layout_class}")
+        layout_class = layout_props.get("class")
 
-    if layout_class == "H5D_COMPACT":
-        layout = {"class": "H5D_COMPACT"}
-    elif layout_class:
-        # initialize to H5D_CHUNKED
-        layout = {"class": "H5D_CHUNKED"}
-    else:
-        # null space - no layout
-        layout = None
+    if layout_class:
+        if layout_class not in LAYOUT_CLASSES:
+            msg = f"unknown layout_class: {layout_class}"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        # check dims is defined for any chunked layout
+        if layout_class.startswith("H5D_CHUNKED"):
+            if "dims" not in layout_props:
+                msg = "chunked layout specified without dims"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+            chunk_dims = layout_props["dims"]
+            if len(chunk_dims) != len(shape_dims):
+                msg = "chunk dimensions have different rank than dataset"
+                log.warn(msg)
+                raise HTTPBadRequest(reason=msg)
+        elif layout_class == "H5D_CONTIGUOUS_REF" and getItemSize(type_json) == "H5T_VARIABLE":
+            # ref dataset does not work with vlen type
+            msg = "H5D_CONTIGUOUS_REF cannot be used with variable length types"
+            log.warn(msg)
+            raise HTTPBadRequest(reason=msg)
+        else:
+            pass
 
-    if layout_props and "dims" in layout_props:
-        chunk_dims = layout_props["dims"]
+    elif shape_class == "H5S_NULL":
+        layout_class = None
+        log.debug("using None layout for H5S_NULL dataset")
+    elif shape_class == "H5S_SCALAR":
+        layout_class = "H5D_CONTIGUOUS"
+        log.debug("Using H5D_CONTIGUOUS for H5S_SCALAR dataset")
+    elif shape_class == "H5S_SIMPLE":
+        dset_size = getDataSize(shape_dims, item_size)
+        if dset_size <= min_chunk_size:
+            # default to contiguous
+            layout_class = "H5D_CONTIGUOUS"
+            log.debug(f"Using H5D_CONTIGUOUS for small (<{min_chunk_size}) dataset")
+        else:
+            layout_class = "H5D_CHUNKED"
+            log.debug(f"shape_json: {shape_json}")
+            log.debug(f"item_size: {item_size}")
+            log.debug(f"chunk_min: {min_chunk_size}")
+            log.debug(f"chunk_max: {max_chunk_size}")
+            kwargs = {"chunk_min": min_chunk_size, "chunk_max": max_chunk_size}
+            chunk_dims = guessChunk(shape_json, item_size, **kwargs)
+            log.debug(f"initial autochunk layout: {chunk_dims}")
+            chunk_size = getChunkSize(chunk_dims, item_size)
+
+            # log warning if the chunk shape if chunk size is too small or too big
+            if chunk_size < min_chunk_size:
+                msg = f"chunk size: {chunk_size} less than recommended min size: {min_chunk_size}"
+                log.warn(msg)
+            elif chunk_size > max_chunk_size:
+                msg = f"chunk size: {chunk_size} greater than recommended "
+                msg += f"max size: {max_chunk_size}"
+                log.debug(msg)
     else:
-        chunk_dims = None
-
-    if layout_class == "H5D_CONTIGUOUS_REF":
-        opts = {"chunk_min": min_chunk_size, "chunk_max": max_chunk_size}
-        chunk_dims = getContiguousLayout(shape_json, item_size, **opts)
-        layout["dims"] = chunk_dims
-        log.debug(f"autoContiguous layout: {layout}")
-
-    if layout_class == "H5D_CHUNKED" and chunk_dims is None:
-        # do auto-chunking
-        chunk_dims = guessChunk(shape_json, item_size)
-        log.debug(f"initial autochunk layout: {chunk_dims}")
-
-    if layout_class == "H5D_CHUNKED":
-        chunk_size = getChunkSize(chunk_dims, item_size)
-
-        msg = f"chunk_size: {chunk_size}, min: {min_chunk_size}, "
-        msg += f"max: {max_chunk_size}"
-        log.debug(msg)
+        msg = f"unexpected shape_class: {shape_class}"
+        log.warn(msg)
+        raise HTTPBadRequest(reason=msg)
 
-        # adjust the chunk shape if chunk size is too small or too big
-        adjusted_chunk_dims = None
-        if chunk_size < min_chunk_size:
-            msg = f"chunk size: {chunk_size} less than min size: "
-            msg += f"{min_chunk_size}, expanding"
-            log.debug(msg)
-            opts = {"chunk_min": min_chunk_size, "layout_class": layout_class}
-            adjusted_chunk_dims = expandChunk(chunk_dims, item_size, shape_json, **opts)
-        elif chunk_size > max_chunk_size:
-            msg = f"chunk size: {chunk_size} greater than max size: "
-            msg += f"{max_chunk_size}, shrinking"
+    if not layout_props:
+        layout_props = {"class": layout_class}
+    if chunk_dims:
+        layout_props["dims"] = chunk_dims
+    log.debug(f"using dataset layout: {layout_props}")
+    creation_props["layout"] = layout_props
+
+    # set partition_count if needed:
+    max_chunks_per_folder = int(config.get("max_chunks_per_folder"))
+    set_partition = False
+    if max_chunks_per_folder > 0:
+        if "dims" in layout_props:
+            set_partition = True
+
+    if set_partition:
+        log.debug(f"updating layout for partition constraint: {max_chunks_per_folder}")
+        shape_dims = getShapeDims(shape_json)
+        max_dims = getDsetMaxDims(shape_json)
+
+        num_chunks = 1
+        rank = len(shape_dims)
+        unlimited_count = 0
+        if max_dims:
+            for i in range(rank):
+                if max_dims[i] == 0:
+                    unlimited_count += 1
+            msg = f"number of unlimited dimensions: {unlimited_count}"
             log.debug(msg)
-            opts = {"chunk_max": max_chunk_size}
-            adjusted_chunk_dims = shrinkChunk(chunk_dims, item_size, **opts)
 
-        if adjusted_chunk_dims:
-            msg = f"requested chunk_dimensions: {chunk_dims} modified "
-            msg += f"dimensions: {adjusted_chunk_dims}"
-            log.debug(msg)
-            layout["dims"] = adjusted_chunk_dims
-        else:
-            layout["dims"] = chunk_dims  # don't need to adjust chunk size
-
-        # set partition_count if needed:
-        max_chunks_per_folder = int(config.get("max_chunks_per_folder"))
-        set_partition = False
-        if max_chunks_per_folder > 0:
-            if "dims" in shape_json and "dims" in layout:
-                set_partition = True
-
-        if set_partition:
-            chunk_dims = layout["dims"]
-            shape_dims = shape_json["dims"]
-            if "maxdims" in shape_json:
-                max_dims = shape_json["maxdims"]
-            else:
-                max_dims = None
-            num_chunks = 1
-            rank = len(shape_dims)
-            unlimited_count = 0
+        for i in range(rank):
+            max_dim = 1
             if max_dims:
-                for i in range(rank):
-                    if max_dims[i] == 0:
-                        unlimited_count += 1
-                msg = f"number of unlimited dimensions: {unlimited_count}"
-                log.debug(msg)
-
-            for i in range(rank):
-                max_dim = 1
-                if max_dims:
-                    max_dim = max_dims[i]
-                    if max_dim == 0:
-                        # don't really know what the ultimate extent
-                        # could be, but assume 10^6 for total number of
-                        # elements and square-shaped array...
-                        MAX_ELEMENT_GUESS = 10.0 ** 6
-                        exp = 1 / unlimited_count
-                        max_dim = int(math.pow(MAX_ELEMENT_GUESS, exp))
-                else:
-                    max_dim = shape_dims[i]
-                num_chunks *= math.ceil(max_dim / chunk_dims[i])
-
-            if num_chunks > max_chunks_per_folder:
-                partition_count = math.ceil(num_chunks / max_chunks_per_folder)
-                msg = f"set partition count to: {partition_count}, "
-                msg += f"num_chunks: {num_chunks}"
-                log.info(msg)
-                layout["partition_count"] = partition_count
+                max_dim = max_dims[i]
+                if max_dim == 0:
+                    # don't really know what the ultimate extent
+                    # could be, but assume 10^6 for total number of
+                    # elements and square-shaped array...
+                    MAX_ELEMENT_GUESS = 10.0 ** 6
+                    exp = 1 / unlimited_count
+                    max_dim = int(math.pow(MAX_ELEMENT_GUESS, exp))
             else:
-                msg = "do not need chunk partitions, num_chunks: "
-                msg += f"{num_chunks} max_chunks_per_folder: "
-                msg += f"{max_chunks_per_folder}"
-                log.info(msg)
+                max_dim = shape_dims[i]
+            num_chunks *= math.ceil(max_dim / chunk_dims[i])
+
+        if num_chunks > max_chunks_per_folder:
+            partition_count = math.ceil(num_chunks / max_chunks_per_folder)
+            msg = f"set partition count to: {partition_count}, "
+            msg += f"num_chunks: {num_chunks}"
+            log.info(msg)
+            layout_props["partition_count"] = partition_count
+        else:
+            msg = "do not need chunk partitions, num_chunks: "
+            msg += f"{num_chunks} max_chunks_per_folder: "
+            msg += f"{max_chunks_per_folder}"
+            log.info(msg)
 
     if layout_class in ("H5D_CHUNKED_REF", "H5D_CHUNKED_REF_INDIRECT"):
         chunk_size = getChunkSize(chunk_dims, item_size)
@@ -1694,11 +1715,10 @@ def getDatasetCreateArgs(body,
             msg = f"chunk size: {chunk_size} greater than max size: "
             msg += f"{max_chunk_size}, for {layout_class} dataset"
             log.warn(msg)
-        layout["dims"] = chunk_dims
+        layout_props["dims"] = chunk_dims
 
-    if layout:
-        log.debug(f"setting layout to: {layout}")
-        kwargs["layout"] = layout
+    creation_props["layout"] = layout_props
+    kwargs["creation_props"] = creation_props
 
     #
     # get input data if present
@@ -1813,7 +1833,6 @@ async def createObject(app,
                        obj_id=None,
                        type=None,
                        shape=None,
-                       layout=None,
                        creation_props=None,
                        attrs=None,
                        links=None,
@@ -1846,8 +1865,6 @@ async def createObject(app,
         log.debug(f"    type: {type}")
     if shape:
         log.debug(f"    shape: {shape}")
-    if layout:
-        log.debug(f"    layout: {layout}")
     if creation_props:
         log.debug(f"    cprops: {creation_props}")
     if attrs:
@@ -1886,10 +1903,10 @@ async def createObject(app,
         obj_json["type"] = type
     if shape:
         obj_json["shape"] = shape
-    if layout:
-        obj_json["layout"] = layout
     if creation_props:
         obj_json["creationProperties"] = creation_props
+    else:
+        obj_json["creationProperties"] = {}
     if attrs:
         kwargs = {"obj_id": obj_id, "bucket": bucket}
         attrs_json = {"attributes": attrs}
@@ -1987,7 +2004,6 @@ async def createDataset(app,
                         h5path=None,
                         obj_id=None,
                         creation_props=None,
-                        layout=None,
                         attrs=None,
                         links=None,
                         implicit=None,
@@ -2011,7 +2027,6 @@ async def createDataset(app,
     kwargs["shape"] = shape
     kwargs["h5path"] = h5path
     kwargs["obj_id"] = obj_id
-    kwargs["layout"] = layout
     kwargs["creation_props"] = creation_props
     kwargs["attrs"] = attrs
     kwargs["links"] = links
diff --git a/hsds/util/dsetUtil.py b/hsds/util/dsetUtil.py
index 5bf3afc5..3704822d 100644
--- a/hsds/util/dsetUtil.py
+++ b/hsds/util/dsetUtil.py
@@ -10,297 +10,14 @@
 # request a copy from help@hdfgroup.org.                                     #
 ##############################################################################
 
-from aiohttp.web_exceptions import HTTPBadRequest, HTTPInternalServerError
+from aiohttp.web_exceptions import HTTPBadRequest
 import math
 
-from h5json.hdf5dtype import getItemSize, isVlen
-from h5json.objid import isValidUuid
+from h5json.shape_util import getShapeDims
 
 from .. import hsds_logger as log
-from .. import config
-
-
-CHUNK_MIN = 512 * 1024  # Soft lower limit (512k)
-CHUNK_MAX = 2048 * 1024  # Hard upper limit (2M)
-DEFAULT_TYPE_SIZE = 128  # Type size case when it is variable
-
-"""
-Filters that are known to HSDS.
-Format is:
-  FILTER_CODE, FILTER_ID, Name
-
-  H5Z_FILTER_FLETCHER32, H5Z_FILTER_SZIP, H5Z_FILTER_NBIT,
-  and H5Z_FILTER_SCALEOFFSET, are not currently supported.
-
-  Non-supported filters metadata will be stored, but are
-  not (currently) used for compression/decompression.
-"""
-
-FILTER_DEFS = (
-    ("H5Z_FILTER_NONE", 0, "none"),
-    ("H5Z_FILTER_DEFLATE", 1, "gzip"),  # aka as "zlib" for blosc
-    ("H5Z_FILTER_SHUFFLE", 2, "shuffle"),
-    ("H5Z_FILTER_FLETCHER32", 3, "fletcher32"),
-    ("H5Z_FILTER_SZIP", 4, "szip"),
-    ("H5Z_FILTER_NBIT", 5, "nbit"),
-    ("H5Z_FILTER_SCALEOFFSET", 6, "scaleoffset"),
-    ("H5Z_FILTER_LZF", 32000, "lzf"),
-    ("H5Z_FILTER_BLOSC", 32001, "blosclz"),
-    ("H5Z_FILTER_SNAPPY", 32003, "snappy"),
-    ("H5Z_FILTER_LZ4", 32004, "lz4"),
-    ("H5Z_FILTER_LZ4HC", 32005, "lz4hc"),
-    ("H5Z_FILTER_BITSHUFFLE", 32008, "bitshuffle"),
-    ("H5Z_FILTER_ZSTD", 32015, "zstd"),
-)
-
-COMPRESSION_FILTER_IDS = (
-    "H5Z_FILTER_DEFLATE",
-    "H5Z_FILTER_SZIP",
-    "H5Z_FILTER_SCALEOFFSET",
-    "H5Z_FILTER_LZF",
-    "H5Z_FILTER_BLOSC",
-    "H5Z_FILTER_SNAPPY",
-    "H5Z_FILTER_LZ4",
-    "H5Z_FILTER_LZ4HC",
-    "H5Z_FILTER_ZSTD",
-)
-
-COMPRESSION_FILTER_NAMES = (
-    "gzip",
-    "szip",
-    "lzf",
-    "blosclz",
-    "snappy",
-    "lz4",
-    "lz4hc",
-    "zstd",
-)
-
-CHUNK_LAYOUT_CLASSES = (
-    "H5D_CHUNKED",
-    "H5D_CHUNKED_REF",
-    "H5D_CHUNKED_REF_INDIRECT",
-    "H5D_CONTIGUOUS_REF",
-)
-
-
-def get_dset_size(shape_json, typesize):
-    """Return the size of the dataspace.  For
-    any unlimited dimensions, assume a value of 1.
-    (so the return size will be the absolute minimum)
-    """
-    if shape_json is None or shape_json["class"] == "H5S_NULL":
-        return None
-    if shape_json["class"] == "H5S_SCALAR":
-        return typesize  # just return size for one item
-    if typesize == "H5T_VARIABLE":
-        typesize = DEFAULT_TYPE_SIZE  # just take a guess at the item size
-    dset_size = typesize
-    shape = shape_json["dims"]
-    rank = len(shape)
-
-    for n in range(rank):
-        if shape[n] == 0:
-            # extendable extent with value of 0
-            continue  # assume this is one
-        dset_size *= shape[n]
-    return dset_size
-
-
-def getFilterItem(key):
-    """
-    Return filter code, id, and name, based on an id, a name or a code.
-    """
-
-    if key == "deflate":
-        key = "gzip"  # use gzip as equivalent
-    for item in FILTER_DEFS:
-        for i in range(3):
-            if key == item[i]:
-                return {"class": item[0], "id": item[1], "name": item[2]}
-    return None  # not found
-
-
-def getFiltersJson(create_props, supported_filters=None):
-    """ return standardized filter representation from creation properties
-        raise bad request if invalid """
-
-    # refer to https://hdf5-json.readthedocs.io/en/latest/bnf/\
-    # filters.html#grammar-token-filter_list
-
-    if "filters" not in create_props:
-        return {}  # null set
-
-    f_in = create_props["filters"]
-
-    log.debug(f"filters provided in creation_prop: {f_in}")
-
-    if not isinstance(f_in, list):
-        msg = "Expected filters in creation_props to be a list"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-
-    f_out = []
-    for filter in f_in:
-        if isinstance(filter, int) or isinstance(filter, str):
-            item = getFilterItem(filter)
-            if not item:
-                msg = f"filter {filter} not recognized"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-            if item["name"] not in supported_filters:
-                msg = f"filter {filter} is not supported"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            f_out.append(item)
-        elif isinstance(filter, dict):
-            if "class" not in filter:
-                msg = "expected 'class' key for filter property"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            if filter["class"] != "H5Z_FILTER_USER":
-                item = getFilterItem(filter["class"])
-            elif "id" in filter:
-                item = getFilterItem(filter["id"])
-            elif "name" in filter:
-                item = getFilterItem(filter["name"])
-            else:
-                item = None
-            if not item:
-                msg = f"filter {filter['class']} not recognized"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            if "id" not in filter:
-                filter["id"] = item["id"]
-            elif item["id"] != filter["id"]:
-                msg = f"Expected {filter['class']} to have id: "
-                msg += f"{item['id']} but got {filter['id']}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            if "name" not in filter:
-                filter["name"] = item["name"]
-            if filter["name"] not in supported_filters:
-                msg = f"filter {filter} is not supported"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-            f_out.append(filter)
-        else:
-            msg = f"Unexpected type for filter: {filter}"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-
-    # return standardized filter representation
-    log.debug(f"using filters: {f_out}")
-    return f_out
-
-
-def getFilters(dset_json):
-    """Return list of filters, or empty list"""
-    if "creationProperties" not in dset_json:
-        return []
-    creationProperties = dset_json["creationProperties"]
-    if "filters" not in creationProperties:
-        return []
-    filters = creationProperties["filters"]
-    return filters
-
-
-def getCompressionFilter(filters):
-    """Return compression filter from filters, or None"""
-    for filter in filters:
-        if "class" not in filter:
-            msg = f"filter option: {filter} with no class key"
-            log.warn(msg)
-            continue
-        filter_class = filter["class"]
-        if filter_class in COMPRESSION_FILTER_IDS:
-            return filter
-        if all(
-            (
-                filter_class == "H5Z_FILTER_USER",
-                "name" in filter,
-                filter["name"] in COMPRESSION_FILTER_NAMES,
-            )
-        ):
-            return filter
-    return None
-
-
-def getShuffleFilter(filters):
-    """Return shuffle filter, or None"""
-    FILTER_CLASSES = ("H5Z_FILTER_SHUFFLE", "H5Z_FILTER_BITSHUFFLE")
-    for filter in filters:
-        log.debug(f"filter: {filter}")
-        if "class" not in filter:
-            log.warn(f"filter option: {filter} with no class key")
-            continue
-        filter_class = filter["class"]
-        if filter_class in FILTER_CLASSES:
-            log.debug(f"found filter: {filter}")
-            return filter
-
-    log.debug("Shuffle filter not used")
-    return None
-
-
-def getFilterOps(app, dset_id, filters, dtype=None, chunk_shape=None):
-    """Get list of filter operations to be used for this dataset"""
-    filter_map = app["filter_map"]
-
-    try:
-        if dset_id in filter_map:
-            log.debug(f"returning filter from filter_map for dset: {dset_id}")
-            return filter_map[dset_id]
-    except TypeError:
-        log.error(f"getFilterOps TypeError - dset_id: {dset_id} filter_map: {filter_map}")
-        raise
-
-    compressionFilter = getCompressionFilter(filters)
-    log.debug(f"got compressionFilter: {compressionFilter}")
-
-    filter_ops = {}
-
-    shuffleFilter = getShuffleFilter(filters)
-
-    if shuffleFilter and not isVlen(dtype):
-        shuffle_name = shuffleFilter["name"]
-        if shuffle_name == "shuffle":
-            filter_ops["shuffle"] = 1  # use regular shuffle
-        elif shuffle_name == "bitshuffle":
-            filter_ops["shuffle"] = 2  # use bitshuffle
-        else:
-            log.warn(f"unexpected shuffleFilter: {shuffle_name}")
-            filter_ops["shuffle"] = 0  # no shuffle
-    else:
-        filter_ops["shuffle"] = 0  # no shuffle
-
-    if compressionFilter:
-        if compressionFilter["class"] == "H5Z_FILTER_DEFLATE":
-            filter_ops["compressor"] = "zlib"  # blosc compressor
-        else:
-            if "name" in compressionFilter:
-                filter_ops["compressor"] = compressionFilter["name"]
-            else:
-                filter_ops["compressor"] = "lz4"  # default to lz4
-        if "level" not in compressionFilter:
-            filter_ops["level"] = 5  # medium level
-        else:
-            filter_ops["level"] = int(compressionFilter["level"])
-
-    if filter_ops:
-        # save the chunk shape and dtype
-        filter_ops["chunk_shape"] = chunk_shape
-        filter_ops["dtype"] = dtype
-        log.debug(f"save filter ops for {dset_id}")
-        filter_map[dset_id] = filter_ops  # save
-
-        return filter_ops
-    else:
-        return None
-
 
+ 
 def getShapeJson(body):
     """ Return normalized json description of data space """
 
@@ -399,608 +116,6 @@ def getShapeJson(body):
     return shape_json
 
 
-def getShapeClass(data_shape):
-    """ Return shape class of the given data shape """
-
-    if not isinstance(data_shape, dict):
-        raise TypeError("expected dict object")
-
-    if "class" not in data_shape:
-        raise KeyError("expected 'class' key for data shape")\
-
-    return data_shape["class"]
-
-
-def getRank(data_shape):
-    """ Return rank of given data shape_json """
-
-    shape_class = getShapeClass(data_shape)
-
-    if shape_class == "H5S_NULL":
-        return 0
-    elif shape_class == "H5S_SCALAR":
-        return 0
-    elif shape_class == "H5S_SIMPLE":
-        if "dims" not in data_shape:
-            raise KeyError("expected dims key for H5S_SIMPLE data shape")
-        return len(data_shape["dims"])
-    else:
-        raise ValueError(f"unexpected data shape class: {shape_class}")
-
-
-def getDsetRank(dset_json):
-    """Get rank returning 0 for scalar or NULL data shapes"""
-    data_shape = dset_json["shape"]
-    return getRank(data_shape)
-
-
-def isNullSpace(dset_json):
-    """Return true if this dataset is a null data space"""
-    shape_class = getShapeClass(dset_json["shape"])
-    if shape_class == "H5S_NULL":
-        return True
-    else:
-        return False
-
-
-def isScalarSpace(dset_json):
-    """ return true if this is a scalar dataset """
-
-    data_shape = dset_json["shape"]
-    shape_class = getShapeClass(data_shape)
-    if shape_class == "H5S_NULL":
-        return False
-
-    rank = getRank(data_shape)
-    return True if rank == 0 else False
-
-
-def getContiguousLayout(shape_json, item_size, chunk_min=None, chunk_max=None):
-    """
-    create a chunk layout for datasets use contiguous storage.
-    """
-    if not isinstance(item_size, int):
-        msg = "ContiguousLayout can only be used with fixed-length types"
-        log.warn(msg)
-        raise ValueError(msg)
-
-    if chunk_min is None:
-        msg = "chunk_min not set"
-        log.warn(msg)
-        raise ValueError(msg)
-    if chunk_max is None:
-        msg = "chunk_max not set"
-        log.warn(msg)
-        raise ValueError(msg)
-
-    if chunk_max < chunk_min:
-        raise ValueError("chunk_max cannot be less than chunk_min")
-
-    if shape_json is None or shape_json["class"] == "H5S_NULL":
-        return None
-    if shape_json["class"] == "H5S_SCALAR":
-        return (1,)  # just enough to store one item
-    dims = shape_json["dims"]
-    rank = len(dims)
-    if rank == 0:
-        raise ValueError("rank must be positive for Contiguous Layout")
-    for dim in dims:
-        if dim < 0:
-            raise ValueError("extents must be positive for Contiguous Layout")
-        if dim == 0:
-            # data shape with no elements, just return dims as layout
-            return dims
-
-    nsize = item_size
-    layout = [1,] * rank
-
-    for i in range(rank):
-        dim = rank - i - 1
-        extent = dims[dim]
-        if extent * nsize < chunk_max:
-            # just use the full extent as layout
-            layout[dim] = extent
-            nsize *= extent
-        else:
-            n = extent
-            while n > 1:
-                n = -(-n // 2)  # use negatives so we round up on odds
-                if n * nsize < chunk_max:
-                    break
-            layout[dim] = n
-            break  # just use 1's for the rest of the layout
-
-    return layout
-
-
-def getChunkSize(layout, type_size):
-    """Return chunk size given layout.
-    i.e. just the product of the values in the list.
-    """
-    if type_size == "H5T_VARIABLE":
-        type_size = DEFAULT_TYPE_SIZE
-
-    chunk_size = type_size
-    for n in layout:
-        if n <= 0:
-            raise ValueError("Invalid chunk layout")
-        chunk_size *= n
-    return chunk_size
-
-
-def validateChunkLayout(shape_json, item_size, layout, chunk_table=None):
-    """
-    Use chunk layout given in the creationPropertiesList (if defined and
-    layout is valid).
-    Return chunk_layout_json
-    """
-
-    rank = 0
-    space_dims = None
-    chunk_dims = None
-    max_dims = None
-
-    if "dims" in shape_json:
-        space_dims = shape_json["dims"]
-        rank = len(space_dims)
-
-    if "maxdims" in shape_json:
-        max_dims = shape_json["maxdims"]
-    if "dims" in layout:
-        chunk_dims = layout["dims"]
-
-    if chunk_dims:
-        # validate that the chunk_dims are valid and correlates with the
-        # dataset shape
-        if isinstance(chunk_dims, int):
-            chunk_dims = [
-                chunk_dims,
-            ]  # promote to array
-        if len(chunk_dims) != rank:
-            msg = "Layout rank does not match shape rank"
-            log.warn(msg)
-            raise ValueError(msg)
-        for i in range(rank):
-            dim_extent = space_dims[i]
-            chunk_extent = chunk_dims[i]
-            if not isinstance(chunk_extent, int):
-                msg = "Layout dims must be integer or integer array"
-                log.warn(msg)
-                raise ValueError(msg)
-            if chunk_extent <= 0:
-                msg = "Invalid layout value"
-                log.warn(msg)
-                raise ValueError(msg)
-            if max_dims is None:
-                if chunk_extent > dim_extent:
-                    msg = "Invalid layout value"
-                    log.warn(msg)
-                    raise HTTPBadRequest(reason=msg)
-            elif max_dims[i] != 0:
-                if chunk_extent > max_dims[i]:
-                    msg = "Invalid layout value for extensible dimension"
-                    log.warn(msg)
-                    raise ValueError(msg)
-            else:
-                pass  # allow any positive value for unlimited dimensions
-
-    if "class" not in layout:
-        msg = "class key not found in layout for creation property list"
-        log.warn(msg)
-        raise ValueError(msg)
-
-    layout_class = layout["class"]
-
-    if layout_class == "H5D_CONTIGUOUS_REF":
-        # reference to a dataset in a traditional HDF5 files with
-        # contiguous storage
-        if item_size == "H5T_VARIABLE":
-            # can't be used with variable types...
-            msg = "Datasets with variable types cannot be used with "
-            msg += "reference layouts"
-            log.warn(msg)
-            raise ValueError(msg)
-        if "file_uri" not in layout:
-            # needed for H5D_CONTIGUOUS_REF
-            msg = "'file_uri' key must be provided for "
-            msg += "H5D_CONTIGUOUS_REF layout"
-            log.warn(msg)
-            raise ValueError(msg)
-        if "offset" not in layout:
-            # needed for H5D_CONTIGUOUS_REF
-            msg = "'offset' key must be provided for "
-            msg += "H5D_CONTIGUOUS_REF layout"
-            log.warn(msg)
-            raise ValueError(msg)
-        if "size" not in layout:
-            # needed for H5D_CONTIGUOUS_REF
-            msg = "'size' key must be provided for "
-            msg += "H5D_CONTIGUOUS_REF layout"
-            log.warn(msg)
-            raise ValueError(msg)
-        if "dims" in layout:
-            # used defined chunk layout not allowed for H5D_CONTIGUOUS_REF
-            msg = "'dims' key can not be provided for "
-            msg += "H5D_CONTIGUOUS_REF layout"
-            log.warn(msg)
-            raise ValueError(msg)
-    elif layout_class == "H5D_CHUNKED_REF":
-        # reference to a dataset in a traditional HDF5 files with
-        # chunked storage
-        if item_size == "H5T_VARIABLE":
-            # can't be used with variable types..
-            msg = "Datasets with variable types cannot be used with "
-            msg += "reference layouts"
-            log.warn(msg)
-            raise ValueError(msg)
-        if "file_uri" not in layout:
-            # needed for H5D_CHUNKED_REF
-            msg = "'file_uri' key must be provided for "
-            msg += "H5D_CHUNKED_REF layout"
-            log.warn(msg)
-            raise ValueError(msg)
-        if "dims" not in layout:
-            # needed for H5D_CHUNKED_REF
-            msg = "'dimns' key must be provided for "
-            msg += "H5D_CHUNKED_REF layout"
-            log.warn(msg)
-            raise ValueError(msg)
-        if "chunks" not in layout:
-            msg = "'chunks' key must be provided for "
-            msg += "H5D_CHUNKED_REF layout"
-            log.warn(msg)
-            raise ValueError(msg)
-    elif layout_class == "H5D_CHUNKED_REF_INDIRECT":
-        # reference to a dataset in a traditional HDF5 files with chunked
-        # storage using an auxiliary dataset
-        if item_size == "H5T_VARIABLE":
-            # can't be used with variable types..
-            msg = "Datasets with variable types cannot be used with "
-            msg += "reference layouts"
-            log.warn(msg)
-            raise ValueError(msg)
-        if "dims" not in layout:
-            # needed for H5D_CHUNKED_REF_INDIRECT
-            msg = "'dims' key must be provided for "
-            msg += "H5D_CHUNKED_REF_INDIRECT layout"
-            log.warn(msg)
-            raise ValueError(msg)
-        if "chunk_table" not in layout:
-            msg = "'chunk_table' key must be provided for "
-            msg += "H5D_CHUNKED_REF_INDIRECT layout"
-            log.warn(msg)
-            raise ValueError(msg)
-        chunk_table_id = layout["chunk_table"]
-        if not isValidUuid(chunk_table_id, "Dataset"):
-            msg = f"Invalid chunk table id: {chunk_table_id}"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-
-    elif layout_class == "H5D_CHUNKED":
-        if "dims" not in layout:
-            msg = "dims key not found in layout for creation property list"
-            log.warn(msg)
-            raise ValueError(msg)
-        if shape_json["class"] != "H5S_SIMPLE":
-            msg = "Bad Request: chunked layout not valid with shape class: "
-            msg += f"{shape_json['class']}"
-            log.warn(msg)
-            raise ValueError(msg)
-    elif layout_class == "H5D_CONTIGUOUS":
-        if "dims" in layout:
-            msg = "dims key found in layout for creation property list "
-            msg += "for H5D_CONTIGUOUS storage class"
-            log.warn(msg)
-            raise ValueError(msg)
-    elif layout_class == "H5D_COMPACT":
-        if "dims" in layout:
-            msg = "dims key found in layout for creation property list "
-            msg += "for H5D_COMPACT storage class"
-            log.warn(msg)
-            raise ValueError(msg)
-    else:
-        msg = f"Unexpected layout: {layout_class}"
-        log.warn(msg)
-        raise ValueError(msg)
-
-
-def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN, layout_class="H5D_CHUNKED"):
-    """Compute an increased chunk shape with a size in bytes greater than chunk_min."""
-    if shape_json is None or shape_json["class"] == "H5S_NULL":
-        return None
-    if shape_json["class"] == "H5S_SCALAR":
-        return (1,)  # just enough to store one item
-
-    layout = list(layout)
-    log.debug(f"expandChunk layout: {layout} typesize: {typesize}")
-    dims = shape_json["dims"]
-    rank = len(dims)
-    extendable_dims = 0  # number of dimensions that are extenable
-    maxdims = None
-    if "maxdims" in shape_json:
-        maxdims = shape_json["maxdims"]
-        for n in range(rank):
-            if maxdims[n] == 0 or maxdims[n] > dims[n]:
-                extendable_dims += 1
-
-    dset_size = get_dset_size(shape_json, typesize)
-    if dset_size <= chunk_min and extendable_dims == 0:
-        # just use the entire dataspace shape as one big chunk
-        return tuple(dims)
-
-    chunk_size = getChunkSize(layout, typesize)
-    if chunk_size >= chunk_min:
-        return tuple(layout)  # good already
-    while chunk_size < chunk_min:
-        # just adjust along extendable dimensions first
-        old_chunk_size = chunk_size
-        for n in range(rank):
-            dim = rank - n - 1  # start from last dim
-
-            if extendable_dims > 0:
-                if maxdims[dim] == 0:
-                    # infinitely extendable dimensions
-                    layout[dim] *= 2
-                    chunk_size = getChunkSize(layout, typesize)
-                    if chunk_size > chunk_min:
-                        break
-                elif maxdims[dim] > layout[dim]:
-                    # can only be extended so much
-                    layout[dim] *= 2
-                    if layout[dim] >= dims[dim]:
-                        layout[dim] = maxdims[dim]  # trim back
-                        extendable_dims -= 1  # one less extenable dimension
-
-                    chunk_size = getChunkSize(layout, typesize)
-                    if chunk_size > chunk_min:
-                        break
-                    else:
-                        pass  # ignore non-extensible for now
-            else:
-                # no extendable dimensions
-                if dims[dim] > layout[dim]:
-                    # can expand chunk along this dimension
-                    layout[dim] *= 2
-                    if layout[dim] > dims[dim]:
-                        layout[dim] = dims[dim]  # trim back
-                    chunk_size = getChunkSize(layout, typesize)
-                    if chunk_size > chunk_min:
-                        break
-                else:
-                    pass  # can't extend chunk along this dimension
-        if chunk_size <= old_chunk_size:
-            # stop iteration if we haven't increased the chunk size
-            log.debug("stopping expandChunk iteration")
-            break
-        elif chunk_size > chunk_min:
-            break  # we're good
-        else:
-            pass  # do another round
-    return tuple(layout)
-
-
-def shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX, layout_class="H5D_CHUNKED"):
-    """Compute a reduced chunk shape with a size in bytes less than chunk_max."""
-    layout = list(layout)
-    chunk_size = getChunkSize(layout, typesize)
-    if chunk_size <= chunk_max:
-        return tuple(layout)  # good already
-    log.debug(f"shrinkChunk layout: {layout} typesize: {typesize}")
-    rank = len(layout)
-
-    while chunk_size > chunk_max:
-        # just adjust along extendable dimensions first
-        old_chunk_size = chunk_size
-        for dim in range(rank):
-            if layout[dim] > 1:
-                # tricky way to do  x // 2 with ceil
-                layout[dim] = -(-layout[dim] // 2)
-                chunk_size = getChunkSize(layout, typesize)
-                if chunk_size <= chunk_max:
-                    break
-            else:
-                pass  # can't shrink chunk along this dimension
-        if chunk_size >= old_chunk_size:
-            # reality check to see if we'll ever break out of the while loop
-            log.warning("Unexpected error in shrink_chunk")
-            break
-        elif chunk_size <= chunk_max:
-            break  # we're good
-        else:
-            pass  # do another round
-    return tuple(layout)
-
-
-def guessChunk(shape_json, typesize):
-    """Guess an appropriate chunk layout for a dataset, given its shape and
-    the size of each element in bytes.  Will allocate chunks only as large
-    as MAX_SIZE.  Chunks are generally close to some power-of-2 fraction of
-    each axis, slightly favoring bigger values for the last index.
-
-    Undocumented and subject to change without warning.
-    """
-    if shape_json is None or shape_json["class"] == "H5S_NULL":
-        return None
-    if shape_json["class"] == "H5S_SCALAR":
-        return (1,)  # just enough to store one item
-
-    if "maxdims" in shape_json:
-        shape = shape_json["maxdims"]
-    else:
-        shape = shape_json["dims"]
-
-    if typesize == "H5T_VARIABLE":
-        typesize = 128  # just take a guess at the item size
-
-    # For unlimited dimensions we have to guess. use 1024
-    shape = tuple((x if x != 0 else 1024) for i, x in enumerate(shape))
-
-    return shape
-
-
-def getLayoutJson(creation_props, shape=None, type_json=None, chunk_min=None, chunk_max=None):
-    """ Get the layout json given by creation_props.
-        Raise bad request error if invalid """
-
-    min_chunk_size = int(config.get("min_chunk_size"))
-    max_chunk_size = int(config.get("max_chunk_size"))
-
-    item_size = getItemSize(type_json)
-    if chunk_min is None:
-        chunk_min = 1000 * 1000
-    if chunk_max is None:
-        chunk_max = 4 * 1000 * 1000
-
-    if chunk_min > chunk_max:
-        msg = "chunk_max must be larger than chunk_min"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-
-    layout = None
-    if "layout" in creation_props:
-        layout_props = creation_props["layout"]
-    else:
-        layout_props = None
-
-    if layout_props:
-        if "class" not in layout_props:
-            msg = "expected class key in layout props"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        layout_class = layout_props["class"]
-        if layout_class == "H5D_CONTIGUOUS":
-            # treat contiguous as chunked
-            layout_class = "H5D_CHUNKED"
-        else:
-            layout_class = layout_props["class"]
-    elif shape["class"] != "H5S_NULL":
-        layout_class = "H5D_CHUNKED"
-    else:
-        layout_class = None
-
-    if layout_class == "H5D_COMPACT":
-        layout = {"class": "H5D_COMPACT"}
-    elif layout_class:
-        # initialize to H5D_CHUNKED
-        layout = {"class": "H5D_CHUNKED"}
-    else:
-        # null space - no layout
-        layout = None
-
-    if layout_props and "dims" in layout_props:
-        chunk_dims = layout_props["dims"]
-    else:
-        chunk_dims = None
-
-    if layout_class == "H5D_CONTIGUOUS_REF":
-        kwargs = {"chunk_min": min_chunk_size, "chunk_max": max_chunk_size}
-        chunk_dims = getContiguousLayout(shape, item_size, **kwargs)
-        layout["dims"] = chunk_dims
-        log.debug(f"autoContiguous layout: {layout}")
-
-    if layout_class == "H5D_CHUNKED" and chunk_dims is None:
-        # do auto-chunking
-        chunk_dims = guessChunk(shape, item_size)
-        log.debug(f"initial autochunk layout: {chunk_dims}")
-
-    if layout_class == "H5D_CHUNKED":
-        chunk_size = getChunkSize(chunk_dims, item_size)
-
-        msg = f"chunk_size: {chunk_size}, min: {min_chunk_size}, "
-        msg += f"max: {max_chunk_size}"
-        log.debug(msg)
-        # adjust the chunk shape if chunk size is too small or too big
-        adjusted_chunk_dims = None
-        if chunk_size < min_chunk_size:
-            msg = f"chunk size: {chunk_size} less than min size: "
-            msg += f"{min_chunk_size}, expanding"
-            log.debug(msg)
-            kwargs = {"chunk_min": min_chunk_size, "layout_class": layout_class}
-            adjusted_chunk_dims = expandChunk(chunk_dims, item_size, shape, **kwargs)
-        elif chunk_size > max_chunk_size:
-            msg = f"chunk size: {chunk_size} greater than max size: "
-            msg += f"{max_chunk_size}, shrinking"
-            log.debug(msg)
-            kwargs = {"chunk_max": max_chunk_size}
-            adjusted_chunk_dims = shrinkChunk(chunk_dims, item_size, **kwargs)
-        if adjusted_chunk_dims:
-            msg = f"requested chunk_dimensions: {chunk_dims} modified "
-            msg += f"dimensions: {adjusted_chunk_dims}"
-            log.debug(msg)
-            layout["dims"] = adjusted_chunk_dims
-        else:
-            layout["dims"] = chunk_dims  # don't need to adjust chunk size
-
-        # set partition_count if needed:
-        max_chunks_per_folder = int(config.get("max_chunks_per_folder"))
-        set_partition = False
-        if max_chunks_per_folder > 0:
-            if "dims" in shape and "dims" in layout:
-                set_partition = True
-
-        if set_partition:
-            chunk_dims = layout["dims"]
-            shape_dims = shape["dims"]
-            if "maxdims" in shape:
-                max_dims = shape["maxdims"]
-            else:
-                max_dims = None
-            num_chunks = 1
-            rank = len(shape_dims)
-            unlimited_count = 0
-            if max_dims:
-                for i in range(rank):
-                    if max_dims[i] == 0:
-                        unlimited_count += 1
-                msg = f"number of unlimited dimensions: {unlimited_count}"
-                log.debug(msg)
-
-            for i in range(rank):
-                max_dim = 1
-                if max_dims:
-                    max_dim = max_dims[i]
-                    if max_dim == 0:
-                        # don't really know what the ultimate extent
-                        # could be, but assume 10^6 for total number of
-                        # elements and square-shaped array...
-                        MAX_ELEMENT_GUESS = 10.0 ** 6
-                        exp = 1 / unlimited_count
-                        max_dim = int(math.pow(MAX_ELEMENT_GUESS, exp))
-                else:
-                    max_dim = shape_dims[i]
-                num_chunks *= math.ceil(max_dim / chunk_dims[i])
-
-            if num_chunks > max_chunks_per_folder:
-                partition_count = math.ceil(num_chunks / max_chunks_per_folder)
-                msg = f"set partition count to: {partition_count}, "
-                msg += f"num_chunks: {num_chunks}"
-                log.info(msg)
-                layout["partition_count"] = partition_count
-            else:
-                msg = "do not need chunk partitions, num_chunks: "
-                msg += f"{num_chunks} max_chunks_per_folder: "
-                msg += f"{max_chunks_per_folder}"
-                log.info(msg)
-
-    if layout_class in ("H5D_CHUNKED_REF", "H5D_CHUNKED_REF_INDIRECT"):
-        chunk_size = getChunkSize(chunk_dims, item_size)
-
-        msg = f"chunk_size: {chunk_size}, min: {min_chunk_size}, "
-        msg += f"max: {max_chunk_size}"
-        log.debug(msg)
-        # nothing to do about inefficiently small chunks, but large chunks
-        # can be subdivided
-        if chunk_size < min_chunk_size:
-            msg = f"chunk size: {chunk_size} less than min size: "
-            msg += f"{min_chunk_size} for {layout_class} dataset"
-            log.warn(msg)
-        elif chunk_size > max_chunk_size:
-            msg = f"chunk size: {chunk_size} greater than max size: "
-            msg += f"{max_chunk_size}, for {layout_class} dataset"
-            log.warn(msg)
-        layout["dims"] = chunk_dims
-
-
 def getHyperslabSelection(dims, start=None, stop=None, step=None):
     """
     Get slices given lists of start, stop, step values
@@ -1117,40 +232,6 @@ def getSelectionShape(selection):
     return shape
 
 
-def getShapeDims(shape):
-    """
-    Get dims from a given shape json.  Return [1,] for Scalar datasets,
-    None for null dataspaces
-    """
-    dims = None
-    if isinstance(shape, int):
-        dims = [shape, ]
-    elif isinstance(shape, list) or isinstance(shape, tuple):
-        dims = shape  # can use as is
-    elif isinstance(shape, str):
-        # only valid string value is H5S_NULL
-        if shape != "H5S_NULL":
-            raise ValueError("Invalid value for shape")
-        dims = None
-    elif isinstance(shape, dict):
-        if "class" not in shape:
-            raise ValueError("'class' key not found in shape")
-        if shape["class"] == "H5S_NULL":
-            dims = None
-        elif shape["class"] == "H5S_SCALAR":
-            dims = [1,]
-        elif shape["class"] == "H5S_SIMPLE":
-            if "dims" not in shape:
-                raise ValueError("'dims' key expected for shape")
-            dims = shape["dims"]
-        else:
-            raise ValueError("Unknown shape class: {}".format(shape["class"]))
-    else:
-        raise ValueError(f"Unexpected shape class: {type(shape)}")
-
-    return dims
-
-
 def isSelectAll(slices, dims):
     """ return True if the selection covers the entire dataspace """
     if len(slices) != len(dims):
@@ -1605,58 +686,10 @@ def setChunkDimQueryParam(params, dims):
             extent = dims[i]
             dim_param += str(extent)
         dim_param += "]"
-        log.debug("dim query param: {}".format(dim_param))
+        log.debug(f"dim query param: {dim_param}")
         params["dim"] = dim_param
 
 
-def getDsetMaxDims(dset_json):
-    """
-    Get maxdims from a given shape.  Return [1,] for Scalar datasets
-
-    Use with H5S_NULL datasets will throw a 400 error.
-    """
-    if "shape" not in dset_json:
-        log.error("No shape found in dset_json")
-        raise HTTPInternalServerError()
-    shape_json = dset_json["shape"]
-    maxdims = None
-    if shape_json["class"] == "H5S_NULL":
-        msg = "Expected shape class other than H5S_NULL"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
-    elif shape_json["class"] == "H5S_SCALAR":
-        maxdims = [
-            1,
-        ]
-    elif shape_json["class"] == "H5S_SIMPLE":
-        if "maxdims" in shape_json:
-            maxdims = shape_json["maxdims"]
-    else:
-        log.error("Unexpected shape class: {}".format(shape_json["class"]))
-        raise HTTPInternalServerError()
-    return maxdims
-
-
-def getChunkLayout(dset_json):
-    """Get chunk layout.  Throw 500 if used with non-H5D_CHUNKED layout"""
-    if "layout" not in dset_json:
-        log.error("No layout found in dset_json")
-        raise HTTPInternalServerError()
-    layout_json = dset_json["layout"]
-    if "class" not in layout_json:
-        log.error(f"Expected class key for layout: {layout_json}")
-        raise HTTPInternalServerError()
-    layout_class = layout_json["class"]
-    if layout_class not in CHUNK_LAYOUT_CLASSES:
-        log.error(f"Unexpected shape layout: {layout_class}")
-        raise HTTPInternalServerError()
-    if "dims" not in layout_json:
-        log.error(f"Expected dims key in layout: {layout_json}")
-        raise HTTPInternalServerError()
-    layout = layout_json["dims"]
-    return layout
-
-
 def getChunkInitializer(dset_json):
     """ get initializer application and arguments if set """
     initializer = None
@@ -1708,65 +741,6 @@ def getPreviewQuery(dims):
     return select
 
 
-def isExtensible(dims, maxdims):
-    """
-    Determine if the dataset can be extended
-    """
-    if maxdims is None or len(dims) == 0:
-        return False
-    rank = len(dims)
-    if len(maxdims) != rank:
-        raise ValueError("rank of maxdims does not match dataset")
-    for n in range(rank):
-        # TBD - shouldn't have H5S_UNLIMITED in any new files.
-        # Remove check once this is confirmed
-        if maxdims[n] in (0, "H5S_UNLIMITED") or maxdims[n] > dims[n]:
-            return True
-    return False
-
-
-def getDatasetLayout(dset_json):
-    """ Return layout json from creation property list or layout json """
-    layout = None
-
-    if "creationProperties" in dset_json:
-        cp = dset_json["creationProperties"]
-        if "layout" in cp:
-            layout = cp["layout"]
-    if not layout and "layout" in dset_json:
-        layout = dset_json["layout"]
-    if not layout:
-        log.warn(f"no layout for {dset_json}")
-    return layout
-
-
-def getDatasetLayoutClass(dset_json):
-    """ return layout class """
-    layout = getDatasetLayout(dset_json)
-    if layout and "class" in layout:
-        layout_class = layout["class"]
-    else:
-        layout_class = None
-    return layout_class
-
-
-def getChunkDims(dset_json):
-    """ get chunk shape for given dset_json """
-
-    layout = getDatasetLayout(dset_json)
-    if layout and "dims" in layout:
-        return layout["dims"]
-    else:
-        # H5D_COMPACT and H5D_CONTIGUOUS will not have a dims key
-        # Check the layout dict in dset_json to see if it's
-        # defined there
-        if "layout" in dset_json:
-            layout = dset_json["layout"]
-            if "dims" in layout:
-                return layout["dims"]
-    return None
-
-
 class ItemIterator:
     """
     Class to iterator through items in a selection
diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py
index cf15ada2..e8d4feb3 100755
--- a/tests/integ/dataset_test.py
+++ b/tests/integ/dataset_test.py
@@ -19,12 +19,6 @@
 import helper
 import config
 
-# min/max chunk size - these can be set by config, but
-# practically the min config value should be larger than
-# CHUNK_MIN and the max config value should less than CHUNK_MAX
-CHUNK_MIN = 1024  # lower limit  (1024b)
-CHUNK_MAX = 50 * 1024 * 1024  # upper limit (50M)
-
 
 class DatasetTest(unittest.TestCase):
     def __init__(self, *args, **kwargs):
@@ -67,7 +61,7 @@ def testScalarDataset(self):
         expected_keys = [
             "id",
             "shape",
-            "layout",
+            "creationProperties",
             "attributeCount",
             "created",
             "lastModified",
@@ -232,7 +226,6 @@ def testPostDatasetWithId(self):
             "id",
             "shape",
             "hrefs",
-            "layout",
             "creationProperties",
             "attributeCount",
             "created",
@@ -423,7 +416,6 @@ def testGet(self):
             "id",
             "shape",
             "hrefs",
-            "layout",
             "creationProperties",
             "attributeCount",
             "created",
@@ -447,7 +439,8 @@ def testGet(self):
         self.assertEqual(shape["dims"], [10, 10])
         self.assertEqual(shape["maxdims"], [10, 10])
 
-        layout = rspJson["layout"]
+        cpl = rspJson["creationProperties"]
+        layout = cpl["layout"]
         self.assertEqual(layout["class"], "H5D_CHUNKED")
         self.assertEqual(layout["dims"], [10, 10])
         self.assertTrue("partition_count" not in layout)
@@ -517,7 +510,6 @@ def testGetByPath(self):
             "id",
             "shape",
             "hrefs",
-            "layout",
             "creationProperties",
             "attributeCount",
             "created",
@@ -539,7 +531,6 @@ def testGetByPath(self):
             "id",
             "shape",
             "hrefs",
-            "layout",
             "creationProperties",
             "attributeCount",
             "created",
@@ -601,7 +592,6 @@ def testGetVerbose(self):
             "id",
             "shape",
             "hrefs",
-            "layout",
             "creationProperties",
             "attributeCount",
             "created",
@@ -1265,6 +1255,95 @@ def testExtend3DDataset(self):
         rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 409)  # tried to extend a non-extensible dimension
 
+    def testInvalidDatasetLayout(self):
+        # test that various invalid layouts fail with a 400 status
+        domain = self.base_domain + "/testInvalidDatasetLayout.h5"
+        helper.setupDomain(domain)
+
+        print("testInvalidDatasetLayout", domain)
+        headers = helper.getRequestHeaders(domain=domain)
+        # get domain
+        req = helper.getEndpoint() + "/"
+        rsp = self.session.get(req, headers=headers)
+        self.assertEqual(rsp.status_code, 200)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("root" in rspJson)
+
+        # dataset create
+        req = self.endpoint + "/datasets"
+        dims = [365, 780, 1024]
+        maxdims = [0, 780, 1024]
+        payload = {
+            "type": "H5T_IEEE_F32LE",
+            "shape": dims,
+            "maxdims": maxdims,
+        }
+        # bad layout class
+        payload["creationProperties"] = {
+            "layout": {"class": "H5D_XYZ", "dims": [1, 390, 512]},
+        }
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 400)  # create dataset
+
+        # chunked layout with mismatched dimensions
+        payload["creationProperties"] = {
+            "layout": {"class": "H5D_XYZ", "dims": [1, 390]},
+        }
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 400)  # create dataset
+
+        # chunked layout with negative dimensions
+        payload["creationProperties"] = {
+            "layout": {"class": "H5D_XYZ", "dims": [100, 200, -300]},
+        }
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 400)  # create dataset
+
+        file_uri = "s3://a-storage-bucket/some-file.h5"
+        offset = 1234
+        size = dims[0] * dims[1] * dims[2] * 4
+
+        # H5D_CONTIGUOUS layout missing different required keys...
+        for key in ("file_uri", "offset", "size"):
+            layout = {"class": "H5D_CONTIGUOUS_REF"}
+            if key != "file_uri":
+                layout["file_uri"] = file_uri
+            elif key != "offset":
+                layout["offset"] = offset
+            elif key != "size":
+                layout["size"] = size
+            else:
+                self.assertTrue(False)  # one of the above should be true
+
+            payload["creationProperties"] = {
+                "layout": layout
+            }
+            rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+            self.assertEqual(rsp.status_code, 400)  # create dataset
+
+        # H5D_CONTIGOUS with a vlen type
+        type_vstr = {
+            "charSet": "H5T_CSET_ASCII",
+            "class": "H5T_STRING",
+            "strPad": "H5T_STR_NULLTERM",
+            "length": "H5T_VARIABLE",
+        }
+        payload = {
+            "type": type_vstr,
+            "shape": dims,
+        }
+        layout = {
+            "class": "H5D_CONTIGUOUS_REF",
+            "file_uri": file_uri,
+            "offset": offset,
+            "size": size
+        }
+        payload["creationProperties"] = {
+            "layout": layout
+        }
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 400)  # create dataset
+
     def testCreationPropertiesLayoutDataset(self):
         # test Dataset with creation property list
         domain = self.base_domain + "/testCreationPropertiesLayoutDataset.h5"
@@ -1288,6 +1367,7 @@ def testCreationPropertiesLayoutDataset(self):
             "shape": [365, 780, 1024],
             "maxdims": [0, 780, 1024],
         }
+
         # define a chunk layout with 4 chunks per 'slice'
         # chunk size is 798720 bytes
         gzip_filter = {
@@ -1308,7 +1388,6 @@ def testCreationPropertiesLayoutDataset(self):
                 fletcher32_filter,
             ],
         }
-        req = self.endpoint + "/datasets"
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)  # create dataset
         rspJson = json.loads(rsp.text)
@@ -1326,12 +1405,14 @@ def testCreationPropertiesLayoutDataset(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        self.assertTrue("layout" in rspJson)
-        layout_json = rspJson["layout"]
+        self.assertTrue("creationProperties" in rspJson)
+        cpl = rspJson["creationProperties"]
+        self.assertTrue("layout" in cpl)
+        layout_json = cpl["layout"]
         self.assertTrue("class" in layout_json)
         self.assertEqual(layout_json["class"], "H5D_CHUNKED")
         self.assertTrue("dims" in layout_json)
-        self.assertEqual(layout_json["dims"], [1, 390, 1024])
+        self.assertEqual(layout_json["dims"], [1, 390, 512])
         if config.get("max_chunks_per_folder") > 0:
             self.assertTrue("partition_count" in layout_json)
             self.assertEqual(layout_json["partition_count"], 10)
@@ -1407,16 +1488,13 @@ def testCreationPropertiesContiguousDataset(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        self.assertTrue("layout" in rspJson)
-        layout_json = rspJson["layout"]
-        self.assertTrue("class" in layout_json)
-        self.assertEqual(layout_json["class"], "H5D_CHUNKED")
-        self.assertTrue("dims" in layout_json)
-        self.assertEqual(layout_json["dims"], [10, 20])
-        # verify creation properties are preserved
         self.assertTrue("creationProperties" in rspJson)
         cpl = rspJson["creationProperties"]
         self.assertTrue("layout" in cpl)
+        layout_json = cpl["layout"]
+        self.assertTrue("class" in layout_json)
+        self.assertEqual(layout_json["class"], "H5D_CONTIGUOUS")
+        self.assertFalse("dims" in layout_json)
 
     def testCompressionFiltersDataset(self):
         # test Dataset with creation property list
@@ -1463,10 +1541,12 @@ def testCompressionFiltersDataset(self):
             rsp = self.session.get(req, headers=headers)
             self.assertEqual(rsp.status_code, 200)
             rspJson = json.loads(rsp.text)
-            self.assertTrue("layout" in rspJson)
-            layout_json = rspJson["layout"]
+            self.assertTrue("creationProperties" in rspJson)
+            cpl = rspJson["creationProperties"]
+            self.assertTrue("layout" in cpl)
+            layout_json = cpl["layout"]
             self.assertTrue("class" in layout_json)
-            self.assertEqual(layout_json["class"], "H5D_CHUNKED")
+            self.assertEqual(layout_json["class"], "H5D_CONTIGUOUS")
 
             # verify compression
             self.assertTrue("creationProperties" in rspJson)
@@ -1525,10 +1605,13 @@ def testCompressionFilterOptionDataset(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        self.assertTrue("layout" in rspJson)
-        layout_json = rspJson["layout"]
+        self.assertTrue("creationProperties" in rspJson)
+        cpl = rspJson["creationProperties"]
+        self.assertTrue("layout" in cpl)
+        layout_json = cpl["layout"]
         self.assertTrue("class" in layout_json)
-        self.assertEqual(layout_json["class"], "H5D_CHUNKED")
+        self.assertEqual(layout_json["class"], "H5D_CONTIGUOUS")
+        self.assertFalse("dims" in layout_json)
 
         # verify compression
         self.assertTrue("creationProperties" in rspJson)
@@ -1762,9 +1845,7 @@ def testAutoChunk1dDataset(self):
         req = self.endpoint + "/datasets"
         # 50K x 80K dataset
         extent = 1000 * 1000 * 1000
-        dims = [
-            extent,
-        ]
+        dims = [extent, ]
         fields = (
             {"name": "x", "type": "H5T_IEEE_F64LE"},
             {"name": "y", "type": "H5T_IEEE_F64LE"},
@@ -1773,13 +1854,12 @@ def testAutoChunk1dDataset(self):
         datatype = {"class": "H5T_COMPOUND", "fields": fields}
 
         payload = {"type": datatype, "shape": dims}
-        # the following should get ignored as too small
+        # the following specifies an efficiently small chunk size
+        chunk_dims = [10,]
         payload["creationProperties"] = {
             "layout": {
                 "class": "H5D_CHUNKED",
-                "dims": [
-                    10,
-                ],
+                "dims": chunk_dims
             }
         }
         req = self.endpoint + "/datasets"
@@ -1802,19 +1882,14 @@ def testAutoChunk1dDataset(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        self.assertTrue("layout" in rspJson)
-        layout_json = rspJson["layout"]
+        self.assertTrue("creationProperties" in rspJson)
+        cpl = rspJson["creationProperties"]
+        self.assertTrue("layout" in cpl)
+        layout_json = cpl["layout"]
         self.assertTrue("class" in layout_json)
         self.assertEqual(layout_json["class"], "H5D_CHUNKED")
         self.assertTrue("dims" in layout_json)
-        self.assertTrue("partition_count" not in layout_json)
-        layout = layout_json["dims"]
-        self.assertEqual(len(layout), 1)
-        self.assertTrue(layout[0] < dims[0])
-        chunk_size = layout[0] * 8 * 3  # three 64bit
-        # chunk size should be between chunk min and max
-        self.assertTrue(chunk_size >= CHUNK_MIN)
-        self.assertTrue(chunk_size <= CHUNK_MAX)
+        self.assertEqual(layout_json["dims"], chunk_dims)
 
     def testAutoChunk2dDataset(self):
         # test Dataset where chunk layout is set automatically
@@ -1855,8 +1930,10 @@ def testAutoChunk2dDataset(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        self.assertTrue("layout" in rspJson)
-        layout_json = rspJson["layout"]
+        self.assertTrue("creationProperties" in rspJson)
+        cpl = rspJson["creationProperties"]
+        self.assertTrue("layout" in cpl)
+        layout_json = cpl["layout"]
         self.assertTrue("class" in layout_json)
         self.assertEqual(layout_json["class"], "H5D_CHUNKED")
         self.assertTrue("dims" in layout_json)
@@ -1865,64 +1942,10 @@ def testAutoChunk2dDataset(self):
         self.assertTrue(layout[0] < dims[0])
         self.assertTrue(layout[1] < dims[1])
         chunk_size = layout[0] * layout[1] * 4
-        # chunk size should be between chunk min and max
-        self.assertTrue(chunk_size >= CHUNK_MIN)
-        self.assertTrue(chunk_size <= CHUNK_MAX)
-
-    def testMinChunkSizeDataset(self):
-        # test Dataset where chunk layout is adjusted if provided
-        # layout is too small
-        domain = self.base_domain + "/testMinChunkSizeDataset.h5"
-        helper.setupDomain(domain)
-        print("testMinChunkSizeDataset", domain)
-        headers = helper.getRequestHeaders(domain=domain)
-        # get domain
-        req = helper.getEndpoint() + "/"
-        rsp = self.session.get(req, headers=headers)
-        rspJson = json.loads(rsp.text)
-        self.assertTrue("root" in rspJson)
-        root_uuid = rspJson["root"]
-
-        # create the dataset
-        req = self.endpoint + "/datasets"
-        # 50K x 80K dataset
-        dims = [50000, 80000]
-        payload = {"type": "H5T_IEEE_F32LE", "shape": dims}
-        # define a chunk layout with lots of small chunks
-        payload["creationProperties"] = {
-            "layout": {"class": "H5D_CHUNKED", "dims": [10, 10]}
-        }
-
-        req = self.endpoint + "/datasets"
-        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
-        self.assertEqual(rsp.status_code, 201)  # create dataset
-        rspJson = json.loads(rsp.text)
-        dset_uuid = rspJson["id"]
-        self.assertTrue(helper.validateId(dset_uuid))
 
-        # link new dataset as 'dset'
-        name = "dset" + helper.getRandomName()
-        req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
-        payload = {"id": dset_uuid}
-        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
-        self.assertEqual(rsp.status_code, 201)
-
-        # verify layout
-        req = helper.getEndpoint() + "/datasets/" + dset_uuid
-        rsp = self.session.get(req, headers=headers)
-        self.assertEqual(rsp.status_code, 200)
-        rspJson = json.loads(rsp.text)
-        self.assertTrue("layout" in rspJson)
-        layout_json = rspJson["layout"]
-        self.assertTrue("class" in layout_json)
-        self.assertEqual(layout_json["class"], "H5D_CHUNKED")
-        self.assertTrue("dims" in layout_json)
-        layout = layout_json["dims"]
-        self.assertEqual(len(layout), 2)
-        self.assertTrue(layout[0] < dims[0])
-        self.assertTrue(layout[1] < dims[1])
-        chunk_size = layout[0] * layout[1] * 4
-        # chunk size should be between chunk min and max
+        # chunk size will be based on server config, but assume a min/max of 1MB to 1GB
+        CHUNK_MIN = 1024 * 1024
+        CHUNK_MAX = 1024 * 1024 * 1024
         self.assertTrue(chunk_size >= CHUNK_MIN)
         self.assertTrue(chunk_size <= CHUNK_MAX)
 
@@ -2307,17 +2330,13 @@ def testContiguousRefDataset(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        self.assertTrue("layout" in rspJson)
-        layout_json = rspJson["layout"]
+        self.assertTrue("creationProperties" in rspJson)
+        cpl = rspJson["creationProperties"]
+        self.assertTrue("layout" in cpl)
+        layout_json = cpl["layout"]
         self.assertTrue("class" in layout_json)
-        self.assertEqual(layout_json["class"], "H5D_CHUNKED")
-        self.assertTrue("dims" in layout_json)
-        chunk_dims = layout_json["dims"]
-        self.assertEqual(len(chunk_dims), 2)
-        chunk_size = chunk_dims[0] * chunk_dims[1] * 4
-        # chunk size should be between chunk min and max
-        self.assertTrue(chunk_size >= CHUNK_MIN)
-        self.assertTrue(chunk_size <= CHUNK_MAX)
+        self.assertEqual(layout_json["class"], "H5D_CONTIGUOUS_REF")
+        self.assertFalse("dims" in layout_json)
 
         # verify cpl
         self.assertTrue("creationProperties" in rspJson)
@@ -2380,23 +2399,13 @@ def testContiguousRefZeroDimDataset(self):
         rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)
 
-        # verify layout
+        # get dataset json
         req = helper.getEndpoint() + "/datasets/" + dset_uuid
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        self.assertTrue("layout" in rspJson)
-        layout_json = rspJson["layout"]
-        self.assertTrue("class" in layout_json)
-        self.assertEqual(layout_json["class"], "H5D_CHUNKED")
-        self.assertTrue("dims" in layout_json)
-        chunk_dims = layout_json["dims"]
-        self.assertEqual(len(chunk_dims), 2)
-        # layout should be same as the dims
-        self.assertEqual(chunk_dims[0], dims[0])
-        self.assertEqual(chunk_dims[1], dims[1])
 
-        # verify cpl
+        # verify layout
         self.assertTrue("creationProperties" in rspJson)
         cpl = rspJson["creationProperties"]
         self.assertTrue("layout" in cpl)
@@ -2470,13 +2479,6 @@ def testChunkedRefDataset(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        self.assertTrue("layout" in rspJson)
-        layout_json = rspJson["layout"]
-        self.assertTrue("class" in layout_json)
-        self.assertEqual(layout_json["class"], "H5D_CHUNKED")
-        self.assertTrue("dims" in layout_json)
-        chunk_dims = layout_json["dims"]
-        self.assertEqual(len(chunk_dims), 2)
         self.assertTrue("creationProperties" in rspJson)
         cpl = rspJson["creationProperties"]
         self.assertTrue("layout" in cpl)
@@ -2549,21 +2551,15 @@ def testChunkedRefIndirectDataset(self):
         rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)
 
-        # verify layout
+        # get dataset json
         req = helper.getEndpoint() + "/datasets/" + dset_uuid
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        self.assertTrue("layout" in rspJson)
-        layout_json = rspJson["layout"]
-        self.assertTrue("class" in layout_json)
-        self.assertEqual(layout_json["class"], "H5D_CHUNKED")
-        self.assertTrue("chunks" not in layout_json)
-        chunk_dims = layout_json["dims"]
-        self.assertEqual(len(chunk_dims), 2)
 
         self.assertTrue("creationProperties" in rspJson)
         cpl = rspJson["creationProperties"]
+        self.assertTrue("layout")
         self.assertTrue("layout" in cpl)
         cpl_layout = cpl["layout"]
         self.assertTrue("class" in cpl_layout)
@@ -2645,19 +2641,11 @@ def testChunkedRefIndirectS3UriDataset(self):
         rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)
 
-        # verify layout
+        # fetch dataset json
         req = helper.getEndpoint() + "/datasets/" + dset_uuid
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        self.assertTrue("layout" in rspJson)
-        layout_json = rspJson["layout"]
-        self.assertTrue("class" in layout_json)
-        self.assertEqual(layout_json["class"], "H5D_CHUNKED")
-        self.assertTrue("chunks" not in layout_json)
-        self.assertTrue("dims" in layout_json)
-        chunk_dims = layout_json["dims"]
-        self.assertEqual(len(chunk_dims), 2)
 
         self.assertTrue("creationProperties" in rspJson)
         cpl = rspJson["creationProperties"]
@@ -2711,8 +2699,10 @@ def testDatasetChunkPartitioning(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        self.assertTrue("layout" in rspJson)
-        layout_json = rspJson["layout"]
+        self.assertTrue("creationProperties" in rspJson)
+        cpl = rspJson["creationProperties"]
+        self.assertTrue("layout" in cpl)
+        layout_json = cpl["layout"]
         self.assertTrue("class" in layout_json)
         self.assertEqual(layout_json["class"], "H5D_CHUNKED")
         self.assertTrue("dims" in layout_json)
@@ -2728,10 +2718,6 @@ def testDatasetChunkPartitioning(self):
         self.assertTrue(layout[0] < dims[0])
         self.assertTrue(layout[1] < dims[1])
         self.assertTrue(layout[2] < dims[2])
-        chunk_size = layout[0] * layout[1] * layout[2] * 4
-        # chunk size should be between chunk min and max
-        self.assertTrue(chunk_size >= CHUNK_MIN)
-        self.assertTrue(chunk_size <= CHUNK_MAX)
 
     def testExtendibleDatasetChunkPartitioning(self):
         # test Dataset partitioning logic for large datasets
@@ -2750,9 +2736,17 @@ def testExtendibleDatasetChunkPartitioning(self):
         req = self.endpoint + "/datasets"
         # 50K x 80K x 90K dataset
         dims = [0, 80000, 90000]
+
         # unlimited extend in dim 0, fixeed in dimension 2, extensible by 10x in dim 3
         max_dims = [0, 80000, 900000]
+        chunk_shape = [1000, 1000, 1000]
+        layout = {
+            "class": "H5D_CHUNKED",
+            "dims": chunk_shape
+        }
+        cpl = {"layout": layout}
         payload = {"type": "H5T_IEEE_F32LE", "shape": dims, "maxdims": max_dims}
+        payload["creationProperties"] = cpl
 
         req = self.endpoint + "/datasets"
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
@@ -2774,8 +2768,10 @@ def testExtendibleDatasetChunkPartitioning(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        self.assertTrue("layout" in rspJson)
-        layout_json = rspJson["layout"]
+        self.assertTrue("creationProperties" in rspJson)
+        cpl = rspJson["creationProperties"]
+        self.assertTrue("layout" in cpl)
+        layout_json = cpl["layout"]
         self.assertTrue("class" in layout_json)
         self.assertEqual(layout_json["class"], "H5D_CHUNKED")
         self.assertTrue("dims" in layout_json)
@@ -2785,10 +2781,6 @@ def testExtendibleDatasetChunkPartitioning(self):
         layout = layout_json["dims"]
 
         self.assertEqual(len(layout), 3)
-        chunk_size = layout[0] * layout[1] * layout[2] * 4
-        # chunk size should be between chunk min and max
-        self.assertTrue(chunk_size >= CHUNK_MIN)
-        self.assertTrue(chunk_size <= CHUNK_MAX)
 
     def testDatasetEmptyChunkExtent(self):
         # Attempting to create 0-extent chunks should respond with Bad Request
@@ -2855,7 +2847,6 @@ def testDatasetPostMulti(self):
         expected_keys = [
             "id",
             "shape",
-            "layout",
             "attributeCount",
             "created",
             "lastModified",
diff --git a/tests/unit/dset_util_test.py b/tests/unit/dset_util_test.py
index 7c2028b9..0e77ab1b 100755
--- a/tests/unit/dset_util_test.py
+++ b/tests/unit/dset_util_test.py
@@ -15,8 +15,7 @@
 
 sys.path.append("../..")
 from hsds.util.dsetUtil import getHyperslabSelection, getSelectionShape
-from hsds.util.dsetUtil import getSelectionList, ItemIterator, getSelectionPagination, expandChunk
-from hsds.util.dsetUtil import guessChunk, shrinkChunk, getChunkSize, getContiguousLayout
+from hsds.util.dsetUtil import getSelectionList, ItemIterator, getSelectionPagination
 
 
 class DsetUtilTest(unittest.TestCase):
@@ -26,277 +25,6 @@ def __init__(self, *args, **kwargs):
         self.logger = logging.getLogger()
         self.logger.setLevel(logging.WARNING)
 
-    def testGuessChunk(self):
-
-        typesize = "H5T_VARIABLE"
-        logging.debug("hello")
-
-        shape = {"class": "H5S_NULL"}
-        layout = guessChunk(shape, typesize)
-        self.assertTrue(layout is None)
-
-        shape = {"class": "H5S_SCALAR"}
-        layout = guessChunk(shape, typesize)
-        self.assertEqual(layout, (1,))
-
-        shape = {"class": "H5S_SIMPLE", "dims": [100, 100]}
-        layout = guessChunk(shape, typesize)
-        self.assertTrue(len(layout), 2)
-        for i in range(2):
-            self.assertTrue(layout[i] >= 1)
-            self.assertTrue(layout[i] <= 100)
-
-        typesize = 8
-        layout = guessChunk(shape, typesize)
-        self.assertTrue(len(layout), 2)
-        for i in range(2):
-            self.assertTrue(layout[i] >= 1)
-            self.assertTrue(layout[i] <= 100)
-
-        shape = {"class": "H5S_SIMPLE", "dims": [5]}
-        layout = guessChunk(shape, typesize)
-        self.assertEqual(layout, (5,))
-
-        shape = {"class": "H5S_SIMPLE", "dims": [100, 100, 100]}
-        layout = guessChunk(shape, typesize)
-        print("layout:", layout)
-        self.assertTrue(len(layout), 3)
-        for i in range(3):
-            self.assertTrue(layout[i] >= 1)
-            self.assertTrue(layout[i] <= 100)
-
-        shape = {"class": "H5S_SIMPLE", "dims": [100, 0], "maxdims": [100, 0]}
-        layout = guessChunk(shape, typesize)
-        self.assertTrue(len(layout), 2)
-        for i in range(2):
-            self.assertTrue(layout[i] >= 1)
-            self.assertTrue(layout[i] <= 1024)
-
-        shape = {"class": "H5S_SCALAR"}
-        layout = guessChunk(shape, typesize)
-        self.assertEqual(layout, (1,))
-
-        shape = {"class": "H5S_NULL"}
-        layout = guessChunk(shape, typesize)
-        self.assertEqual(layout, None)
-
-    def testShrinkChunk(self):
-        CHUNK_MIN = 500
-        CHUNK_MAX = 5000
-        typesize = 1
-        layout = (1, 2, 3)
-        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
-        self.assertEqual(shrunk, layout)
-
-        layout = (100, 200, 300)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes > CHUNK_MAX)
-        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
-        rank = len(layout)
-        for i in range(rank):
-            self.assertTrue(shrunk[i] >= 1)
-            self.assertTrue(shrunk[i] <= 1000 * (i + 1))
-        num_bytes = getChunkSize(shrunk, typesize)
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-        layout = (300, 200, 100)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes > CHUNK_MAX)
-        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
-        rank = len(layout)
-        for i in range(rank):
-            self.assertTrue(shrunk[i] >= 1)
-            self.assertTrue(shrunk[i] <= 1000 * (3 - i))
-        num_bytes = getChunkSize(shrunk, typesize)
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-        CHUNK_MIN = 1 * 1024 * 1024
-        CHUNK_MAX = 4 * 1024 * 1024
-        typesize = 4
-        layout = (117, 201, 189, 1)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes > CHUNK_MAX)
-        shrunk = shrinkChunk(layout, typesize, chunk_max=CHUNK_MAX)
-        self.assertEqual(shrunk, (59, 101, 95, 1))
-        num_bytes = getChunkSize(shrunk, typesize)
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-    def testExpandChunk(self):
-        CHUNK_MIN = 5000
-        CHUNK_MAX = 50000
-
-        typesize = 20
-        shape = {"class": "H5S_SIMPLE", "dims": [12, ], "maxdims": [20, ]}
-        layout = (20,)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, typesize)
-        # chunk layout can't be larger than dataspace
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        self.assertEqual(expanded, (20,))
-
-        typesize = 1
-        shape = {"class": "H5S_SIMPLE", "dims": [10, 10, 10]}
-        layout = (10, 10, 10)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, typesize)
-        # chunk layout can't be larger than dataspace
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        self.assertEqual(expanded, (10, 10, 10))
-
-        shape = {"class": "H5S_SIMPLE", "dims": [1000, 2000, 3000]}
-        layout = (10, 10, 10)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, typesize)
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-        shape = {"class": "H5S_SIMPLE", "dims": [1000,]}
-        layout = (10,)
-        num_bytes = getChunkSize(layout, "H5T_VARIABLE")
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, "H5T_VARIABLE", shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, "H5T_VARIABLE")
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-        shape = {
-            "class": "H5S_SIMPLE",
-            "dims": [1000, 10, 1000],
-            "maxdims": [1000, 100, 1000],
-        }
-        layout = (10, 10, 10)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, typesize)
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-        shape = {
-            "class": "H5S_SIMPLE",
-            "dims": [1000, 0, 1000],
-            "maxdims": [1000, 100, 1000],
-        }
-        layout = (10, 10, 10)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, typesize)
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-        shape = {
-            "class": "H5S_SIMPLE",
-            "dims": [1000, 10, 1000],
-            "maxdims": [1000, 0, 1000],
-        }
-        layout = (10, 10, 10)
-        num_bytes = getChunkSize(layout, typesize)
-        self.assertTrue(num_bytes < CHUNK_MIN)
-        expanded = expandChunk(layout, typesize, shape, chunk_min=CHUNK_MIN)
-        num_bytes = getChunkSize(expanded, typesize)
-        self.assertTrue(num_bytes > CHUNK_MIN)
-        self.assertTrue(num_bytes < CHUNK_MAX)
-
-    def testGetContiguousLayout(self):
-        typesize = 4
-        chunk_min = 400
-        chunk_max = 800
-
-        kwargs = {"chunk_min": chunk_min, "chunk_max": chunk_max}
-
-        def get_num_bytes(dims):
-            num_bytes = typesize
-            for n in dims:
-                num_bytes *= n
-            return num_bytes
-
-        try:
-            shape = {"class": "H5S_SIMPLE", "dims": [100, 100]}
-            layout = getContiguousLayout(shape, "H5T_VARIABLE", **kwargs)
-            self.assertTrue(False)
-        except ValueError:
-            pass  # expected
-
-        shape = {"class": "H5S_NULL"}
-        layout = getContiguousLayout(shape, typesize, **kwargs)
-        self.assertTrue(layout is None)
-
-        shape = {"class": "H5S_SCALAR"}
-        layout = getContiguousLayout(shape, typesize, **kwargs)
-        self.assertEqual(layout, (1,))
-
-        for extent in (1, 100, 10000):
-            dims = [
-                extent,
-            ]
-            shape = {"class": "H5S_SIMPLE", "dims": dims}
-            layout = getContiguousLayout(shape, typesize, **kwargs)
-            self.assertTrue(len(layout), 1)
-            chunk_bytes = get_num_bytes(layout)
-            space_bytes = get_num_bytes(dims)
-            if space_bytes > chunk_min:
-                self.assertTrue(chunk_bytes >= chunk_min)
-
-            self.assertTrue(chunk_bytes <= chunk_max)
-
-        for extent in (1, 9, 90):
-            dims = [extent, extent]
-            shape = {"class": "H5S_SIMPLE", "dims": dims}
-            layout = getContiguousLayout(shape, typesize, **kwargs)
-            self.assertTrue(len(layout), 2)
-            for i in range(2):
-                self.assertTrue(layout[i] >= 1)
-                self.assertTrue(layout[i] <= extent)
-            self.assertEqual(layout[1], extent)
-            chunk_bytes = get_num_bytes(layout)
-            space_bytes = get_num_bytes(dims)
-
-            if space_bytes > chunk_min:
-                self.assertTrue(chunk_bytes >= chunk_min)
-            self.assertTrue(chunk_bytes <= chunk_max)
-
-        for extent in (1, 10, 100):
-            dims = [extent, extent, 50]
-            shape = {"class": "H5S_SIMPLE", "dims": dims}
-            layout = getContiguousLayout(shape, typesize, **kwargs)
-            self.assertTrue(len(layout), 3)
-            for i in range(3):
-                self.assertTrue(layout[i] >= 1)
-                self.assertTrue(layout[i] <= dims[i])
-
-            chunk_bytes = get_num_bytes(layout)
-            space_bytes = get_num_bytes(dims)
-
-            if space_bytes > chunk_min:
-                self.assertTrue(chunk_bytes >= chunk_min)
-            self.assertTrue(chunk_bytes <= chunk_max)
-
-        for extent in (1, 100, 1000):
-            dims = [extent, 4]
-            shape = {"class": "H5S_SIMPLE", "dims": dims}
-            layout = getContiguousLayout(shape, typesize, **kwargs)
-            self.assertTrue(len(layout), 2)
-            for i in range(2):
-                self.assertTrue(layout[i] >= 1)
-                self.assertTrue(layout[i] <= dims[i])
-
-            chunk_bytes = get_num_bytes(layout)
-            space_bytes = get_num_bytes(dims)
-
-            if space_bytes > chunk_min:
-                self.assertTrue(chunk_bytes >= chunk_min)
-            self.assertTrue(chunk_bytes <= chunk_max)
-
     def testGetHyperslabSelection(self):
         # getHyperslabSelection(dsetshape, start, stop, step)
         # 1-D case

From 61d38fd9d1229ddffc2b25c358fcb046534215df Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 13 Nov 2025 16:56:52 +0100
Subject: [PATCH 42/49] update requirement.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 5aa9d39b..7dfad721 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,6 +5,7 @@ aiofiles
 azure-storage-blob
 cryptography
 h5py>=3.6.0
+hdf5-json>1.0.0
 numcodecs
 numpy>=2.0.0rc1
 psutil

From 73d822313093edb3658f616b1d98718e2cc6300b Mon Sep 17 00:00:00 2001
From: Joshua Stillerman <jas@mit.edu>
Date: Tue, 16 Dec 2025 08:15:49 +0000
Subject: [PATCH 43/49] updates to support h5json latest

---
 hsds/chunk_crawl.py           |   4 +-
 hsds/chunk_sn.py              |  14 ++-
 hsds/datanode_lib.py          |   5 +-
 hsds/domain_crawl.py          |   7 +-
 hsds/dset_sn.py               |  14 +--
 hsds/servicenode_lib.py       | 222 ++++++++++++----------------------
 hsds/util/chunkUtil.py        |  26 +++-
 hsds/util/dsetUtil.py         |  36 ++++--
 hsds/util/storUtil.py         |   2 +-
 tests/integ/dataset_test.py   | 101 ++++++++++------
 tests/integ/filter_test.py    |  23 +++-
 tests/integ/link_test.py      |   1 -
 tests/integ/pointsel_test.py  |   6 +-
 tests/integ/value_test.py     |   7 +-
 tests/unit/chunk_util_test.py |  33 +++++
 tests/unit/dset_util_test.py  |  34 +++++-
 16 files changed, 302 insertions(+), 233 deletions(-)

diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py
index a92bdf36..142577d8 100755
--- a/hsds/chunk_crawl.py
+++ b/hsds/chunk_crawl.py
@@ -87,9 +87,7 @@ async def write_chunk_hyperslab(
     msg += f"bucket: {bucket}"
     msg += f" dset_json: {dset_json}"
     log.info(msg)
-    if "layout" not in dset_json:
-        log.error(f"No layout found in dset_json: {dset_json}")
-        raise HTTPInternalServerError()
+    
     partition_chunk_id = getChunkIdForPartition(chunk_id, dset_json)
     if partition_chunk_id != chunk_id:
         log.debug(f"using partition_chunk_id: {partition_chunk_id}")
diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index a1cd3d06..f278e4e9 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -201,7 +201,7 @@ def _getSelect(params, dset_json, body=None):
     """ return selection region if any as a list
       of slices. """
     slices = None
-    log.debug(f"_getSelect  params: {params} body: {body}")
+    log.debug(f"_getSelect  params: {dict(params)} body: {body}")
     try:
         if body and isinstance(body, dict):
             if "select" in body and body["select"]:
@@ -214,6 +214,7 @@ def _getSelect(params, dset_json, body=None):
             if slices:
                 msg = "select defined in both request body and query parameters"
                 raise ValueError(msg)
+            log.debug(f"_getSelect - select param: {select}")
             slices = get_slices(select, dset_json)
     except ValueError as ve:
         log.warn(f"Invalid selection: {ve}")
@@ -226,12 +227,17 @@ def _getSelect(params, dset_json, body=None):
 
     if not slices:
         # just return the entire dataspace
+        log.debug("_getSelect - no selection, using entire dataspace")
         datashape = dset_json["shape"]
         dims = getShapeDims(datashape)
         slices = []
-        for dim in dims:
-            s = slice(0, dim, 1)
-            slices.append(s)
+        if dims:
+            for dim in dims:
+                s = slice(0, dim, 1)
+                slices.append(s)
+        else:
+            # scalar dataset
+            slices.append(slice(0, 1, 1))
     log.debug(f"_getSelect returning: {slices}")
     return slices
 
diff --git a/hsds/datanode_lib.py b/hsds/datanode_lib.py
index 9bd2b0a5..a6adbe28 100644
--- a/hsds/datanode_lib.py
+++ b/hsds/datanode_lib.py
@@ -27,7 +27,7 @@
 from h5json.objid import isValidChunkId, isSchema2Id
 from h5json.objid import getRootObjId, isRootObjId
 from h5json.shape_util import getShapeDims
-from h5json.dset_util import getChunkDims
+from h5json.dset_util import getChunkDims, getDatasetLayoutClass
 
 from .util.nodeUtil import getDataNodeUrl
 from .util.storUtil import getStorJSONObj, putStorJSONObj, putStorBytes
@@ -1060,8 +1060,7 @@ async def get_chunk(
     dims = getChunkDims(dset_json)
     type_json = dset_json["type"]
     dt = createDataType(type_json)
-    layout_json = dset_json["layout"]
-    layout_class = layout_json.get("class")
+    layout_class = getDatasetLayoutClass(dset_json)
     chunk_dims = getChunkDims(dset_json)
     fill_value = getFillValue(dset_json)
 
diff --git a/hsds/domain_crawl.py b/hsds/domain_crawl.py
index 19eee5df..d9285d45 100644
--- a/hsds/domain_crawl.py
+++ b/hsds/domain_crawl.py
@@ -433,10 +433,9 @@ async def put_links(self, grp_id, link_items):
 
     async def put_data(self, chunk_id, arr):
         # write a one-chunk dataset value
-        log.debug(f"DomainCrawler put_data for {chunk_id}, arr: {arr}")
+        log.debug(f"DomainCrawler put_data for {chunk_id}, arr.shape: {arr.shape}")
         req = getDataNodeUrl(self._app, chunk_id)
         req += "/chunks/" + chunk_id
-        log.debug(f"put_data req: {req}")
         params = {"bucket": self._bucket}
 
         data = arrayToBytes(arr)
@@ -600,7 +599,9 @@ async def fetch(self, obj_id):
                 log.error(f"couldn't find {obj_id} in self._objs")
                 return
             data = self._objs[obj_id]
-            log.debug(f"got {len(data)} data for {obj_id}")
+            if data is None:
+                log.error(f"no data found for {obj_id}")
+                return
 
             await self.put_data(obj_id, data)
         else:
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index 7a05999f..e314e22c 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -26,7 +26,7 @@
 from .util.httpUtil import getHref, respJsonAssemble
 from .util.httpUtil import jsonResponse, getBooleanParam
 from .util.chunkUtil import getChunkIds
-from .util.dsetUtil import getPreviewQuery
+from .util.dsetUtil import getPreviewQuery, getHyperslabSelection
 from .util.authUtil import getUserPasswordFromRequest, aclCheck
 from .util.authUtil import validateUserPassword
 from .util.domainUtil import getDomainFromRequest, getPathForDomain, isValidDomain
@@ -476,7 +476,7 @@ async def POST_Dataset(request):
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
 
-    log.debug(f"got body: {body}")
+    log.debug(f"POST_Dataset got body: {body}")
     # get domain, check authorization
     domain = getDomainFromRequest(request)
     if not isValidDomain(domain):
@@ -507,6 +507,7 @@ def _updateInitValuesList(kwargs):
         # to init_values list
         if "value" in kwargs:
             init_values.append(kwargs["value"])
+            log.debug(f"init value appended: {kwargs['value']}") 
             del kwargs["value"]
         else:
             # add a placeholder
@@ -613,12 +614,13 @@ def _updateInitValuesList(kwargs):
         init_data = init_values[index]
         if init_data is None:
             continue  # no data to initialize
+        log.debug(f"init data: {init_data}")
         dset_json = objects[index]
         dset_id = dset_json["id"]
         log.debug(f"init value, post_rsp: {dset_json}")
         layout_class = getDatasetLayoutClass(dset_json)
         log.debug(f"layout_class: {layout_class}")
-        if layout_class != "H5D_CHUNKED":
+        if layout_class not in ("H5D_CONTIGUOUS", "H5D_CHUNKED"):
             msg = f"dataset init_data used with unsupported layout_class: {layout_class}"
             log.error(msg)
             raise HTTPInternalServerError()
@@ -626,10 +628,8 @@ def _updateInitValuesList(kwargs):
         log.debug(f"init data layout is: {layout_dims}")
         # make selection for entire dataspace
         dims = getShapeDims(dset_json["shape"])
-        slices = []
-        for dim in dims:
-            s = slice(0, dim, 1)
-            slices.append(s)
+        slices = getHyperslabSelection(dims)
+        
         chunk_ids = getChunkIds(dset_id, slices, layout_dims)
         log.debug(f"init data, got chunk_ids: {chunk_ids}")
         if not chunk_ids or len(chunk_ids) != 1:
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index b26ba8ee..b9a179c7 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -26,15 +26,14 @@
 from aiohttp import ClientResponseError
 
 from h5json.array_util import encodeData, decodeData, bytesToArray, bytesArrayToList
-from h5json.array_util import jsonToArray, getNumpyValue
+from h5json.array_util import jsonToArray
 from h5json.objid import getCollectionForId, createObjId, getRootObjId
 from h5json.objid import isSchema2Id, getS3Key, isValidUuid
 from h5json.hdf5dtype import getBaseTypeJson, validateTypeItem, createDataType, getItemSize
-from h5json.filters import getFiltersJson
 from h5json.shape_util import getShapeDims, getShapeClass
-from h5json.dset_util import guessChunk, getChunkSize
-from h5json.dset_util import validateChunkLayout, getDataSize, getDsetMaxDims
-from h5json.dset_util import LAYOUT_CLASSES
+from h5json.filters import getFiltersJson
+from h5json.dset_util import guessChunk, getChunkSize, validateDatasetCreationProps
+from h5json.dset_util import getDataSize, isExtensible
 
 from .util.nodeUtil import getDataNodeUrl
 from .util.authUtil import getAclKeys
@@ -1302,63 +1301,6 @@ async def deleteObject(app, obj_id, bucket=None):
         del meta_cache[obj_id]  # remove from cache
 
 
-def validateDatasetCreationProps(creation_props, type_json=None, shape=None):
-    """ validate creation props """
-
-    log.debug(f"validateDatasetCreationProps: {creation_props}")
-    if "fillValue" in creation_props:
-        if not type_json or not shape:
-            msg = "shape and type must be set to use fillValue"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-
-        # validate fill value compatible with type
-        dt = createDataType(type_json)
-        fill_value = creation_props["fillValue"]
-        log.debug(f"got fill_value: {fill_value}")
-        if "fillValue_encoding" in creation_props:
-            fill_value_encoding = creation_props["fillValue_encoding"]
-            if fill_value_encoding not in ("None", "base64"):
-                msg = f"unexpected value for fill_value_encoding: {fill_value_encoding}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            else:
-                # should see a string in this case
-                if not isinstance(fill_value, str):
-                    msg = f"unexpected fill value: {fill_value} "
-                    msg += f"for encoding: {fill_value_encoding}"
-                    log.warn(msg)
-                    raise HTTPBadRequest(reason=msg)
-        else:
-            fill_value_encoding = None
-
-            try:
-                getNumpyValue(fill_value, dt=dt, encoding=fill_value_encoding)
-            except ValueError:
-                msg = f"invalid fill value: {fill_value}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-
-    if "filters" in creation_props:
-        if not type_json or not shape:
-            msg = "shape and type must be set to use filters"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-
-        supported_filters = getSupportedFilters(include_compressors=True)
-        log.debug(f"supported_filters: {supported_filters}")
-        try:
-            filters_out = getFiltersJson(creation_props, supported_filters=supported_filters)
-        except (KeyError, ValueError):
-            # raise bad request exception if not valid
-            msg = "invalid filter provided"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        # replace filters with our standardized list
-        log.debug(f"setting filters to: {filters_out}")
-        creation_props["filters"] = filters_out
-
-
 def getCreateArgs(body,
                   root_id=None,
                   bucket=None,
@@ -1544,10 +1486,18 @@ def getDatasetCreateArgs(body,
     try:
         shape_class = getShapeClass(shape_json)
         shape_dims = getShapeDims(shape_json)
+        if "maxdims" in shape_json:
+            max_dims = shape_json["maxdims"]
+            is_extensible = isExtensible(shape_dims, max_dims)
+        else:
+            max_dims = None
+            is_extensible = False
     except (KeyError, TypeError, ValueError):
         msg = f"Invalid shape: {shape_json}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
+    
+    log.debug(f"shape_class: {shape_class}, shape_dims: {shape_dims}")
 
     log.debug(f"got createArgs: {list(kwargs.keys())}")
 
@@ -1555,58 +1505,73 @@ def getDatasetCreateArgs(body,
 
     # get layout for dataset creation
     log.debug("getting dataset creation settings")
-    layout_props = None
     min_chunk_size = int(config.get("min_chunk_size"))
     max_chunk_size = int(config.get("max_chunk_size"))
     type_json = kwargs["type"]
+
     item_size = getItemSize(type_json)
     if item_size == "H5T_VARIABLE":
         item_size = config.get("default_vlen_type_size", default=128)
-    creation_props = kwargs["creation_props"]
-    layout_props = None
-
-    if creation_props:
-        validateDatasetCreationProps(creation_props, type_json=type_json, shape=shape_json)
-        if "layout" in creation_props:
-            layout_props = creation_props["layout"]
-            try:
-                validateChunkLayout(shape_json, item_size, layout_props, chunk_table=chunk_table)
-            except ValueError:
-                msg = f"invalid chunk layout: {layout_props}"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
+    if shape_dims is None:
+        dset_size = 0
     else:
-        creation_props = {}
+        dset_size = getDataSize(shape_dims, item_size)
 
+    creation_props = kwargs["creation_props"]
     layout_class = None
+    layout_json = {}
     chunk_dims = None
-    if layout_props:
-        layout_class = layout_props.get("class")
+    partition_count = None
 
-    if layout_class:
-        if layout_class not in LAYOUT_CLASSES:
-            msg = f"unknown layout_class: {layout_class}"
+    if creation_props:
+        log.debug(f"POST_Dataset creation props: {creation_props}")
+        try:
+            validateDatasetCreationProps(creation_props, type_json=type_json, shape=shape_json)
+        except ValueError as ve:
+            msg = f"Provided creation properties are invalid: {ve}"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
-        # check dims is defined for any chunked layout
-        if layout_class.startswith("H5D_CHUNKED"):
-            if "dims" not in layout_props:
-                msg = "chunked layout specified without dims"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-            chunk_dims = layout_props["dims"]
-            if len(chunk_dims) != len(shape_dims):
-                msg = "chunk dimensions have different rank than dataset"
-                log.warn(msg)
-                raise HTTPBadRequest(reason=msg)
-        elif layout_class == "H5D_CONTIGUOUS_REF" and getItemSize(type_json) == "H5T_VARIABLE":
+        log.debug(f"create_props after validation: {creation_props}")
+        if creation_props.get("layout"):
+            layout_json = creation_props["layout"] 
+            layout_class = layout_json.get("class")
+        if "filters" in creation_props:
+            # normalize filter format
+            filters = getFiltersJson(creation_props)
+            supported_filters = getSupportedFilters()
+            log.debug(f"supported filters: {supported_filters}")
+            for filter_item in filters:
+                if filter_item["name"] not in supported_filters:
+                    msg = f"Unsupported filter id: {filter_item['id']}"
+                    log.warn(msg)
+                    raise HTTPBadRequest(reason=msg)
+            creation_props["filters"] = filters
+        log.debug(f"post validate creation properties: {creation_props}")
+
+    if layout_class:   
+        if layout_class == "H5D_CONTIGUOUS_REF" and getItemSize(type_json) == "H5T_VARIABLE":
             # ref dataset does not work with vlen type
-            msg = "H5D_CONTIGUOUS_REF cannot be used with variable length types"
+            msg = "H5D_CONTIGUOUS_REF datasets cannot be used with variable length types"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
+        
+        if "dims" in layout_json:
+            chunk_dims = layout_json["dims"]
+        if chunk_dims:
+            # log warning if the chunk shape if chunk size is too small or too big
+            chunk_size = getChunkSize(chunk_dims, item_size)
+            if chunk_size < min_chunk_size:
+                msg = f"chunk size: {chunk_size} less than recommended min size: {min_chunk_size}"
+                log.warn(msg)
+            elif chunk_size > max_chunk_size:
+                msg = f"chunk size: {chunk_size} greater than recommended "
+                msg += f"max size: {max_chunk_size}"
+                log.debug(msg)
         else:
-            pass
-
+            # log warning if contiguous layout used with too large datadset
+            if dset_size > max_chunk_size:
+                msg = f"dataset larger than recommended {max_chunk_size} for CONTIGUOUS storage"
+                log.warn(msg)
     elif shape_class == "H5S_NULL":
         layout_class = None
         log.debug("using None layout for H5S_NULL dataset")
@@ -1614,8 +1579,7 @@ def getDatasetCreateArgs(body,
         layout_class = "H5D_CONTIGUOUS"
         log.debug("Using H5D_CONTIGUOUS for H5S_SCALAR dataset")
     elif shape_class == "H5S_SIMPLE":
-        dset_size = getDataSize(shape_dims, item_size)
-        if dset_size <= min_chunk_size:
+        if dset_size <= min_chunk_size and not is_extensible:
             # default to contiguous
             layout_class = "H5D_CONTIGUOUS"
             log.debug(f"Using H5D_CONTIGUOUS for small (<{min_chunk_size}) dataset")
@@ -1625,42 +1589,25 @@ def getDatasetCreateArgs(body,
             log.debug(f"item_size: {item_size}")
             log.debug(f"chunk_min: {min_chunk_size}")
             log.debug(f"chunk_max: {max_chunk_size}")
-            kwargs = {"chunk_min": min_chunk_size, "chunk_max": max_chunk_size}
-            chunk_dims = guessChunk(shape_json, item_size, **kwargs)
+            args = {"chunk_min": min_chunk_size, "chunk_max": max_chunk_size}
+            chunk_dims = guessChunk(shape_json, item_size, **args)
             log.debug(f"initial autochunk layout: {chunk_dims}")
             chunk_size = getChunkSize(chunk_dims, item_size)
-
-            # log warning if the chunk shape if chunk size is too small or too big
-            if chunk_size < min_chunk_size:
-                msg = f"chunk size: {chunk_size} less than recommended min size: {min_chunk_size}"
-                log.warn(msg)
-            elif chunk_size > max_chunk_size:
-                msg = f"chunk size: {chunk_size} greater than recommended "
-                msg += f"max size: {max_chunk_size}"
-                log.debug(msg)
+            log.debug(f"chunk_size: {chunk_size}")
     else:
         msg = f"unexpected shape_class: {shape_class}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
 
-    if not layout_props:
-        layout_props = {"class": layout_class}
-    if chunk_dims:
-        layout_props["dims"] = chunk_dims
-    log.debug(f"using dataset layout: {layout_props}")
-    creation_props["layout"] = layout_props
-
     # set partition_count if needed:
     max_chunks_per_folder = int(config.get("max_chunks_per_folder"))
     set_partition = False
     if max_chunks_per_folder > 0:
-        if "dims" in layout_props:
+        if "dims" in layout_json:
             set_partition = True
 
-    if set_partition:
+    if set_partition and dset_size > max_chunk_size:
         log.debug(f"updating layout for partition constraint: {max_chunks_per_folder}")
-        shape_dims = getShapeDims(shape_json)
-        max_dims = getDsetMaxDims(shape_json)
 
         num_chunks = 1
         rank = len(shape_dims)
@@ -1692,33 +1639,22 @@ def getDatasetCreateArgs(body,
             msg = f"set partition count to: {partition_count}, "
             msg += f"num_chunks: {num_chunks}"
             log.info(msg)
-            layout_props["partition_count"] = partition_count
         else:
             msg = "do not need chunk partitions, num_chunks: "
             msg += f"{num_chunks} max_chunks_per_folder: "
             msg += f"{max_chunks_per_folder}"
             log.info(msg)
 
-    if layout_class in ("H5D_CHUNKED_REF", "H5D_CHUNKED_REF_INDIRECT"):
-        chunk_size = getChunkSize(chunk_dims, item_size)
-
-        msg = f"chunk_size: {chunk_size}, min: {min_chunk_size}, "
-        msg += f"max: {max_chunk_size}"
-        log.debug(msg)
-        # nothing to do about inefficiently small chunks, but large chunks
-        # can be subdivided
-        if chunk_size < min_chunk_size:
-            msg = f"chunk size: {chunk_size} less than min size: "
-            msg += f"{min_chunk_size} for {layout_class} dataset"
-            log.warn(msg)
-        elif chunk_size > max_chunk_size:
-            msg = f"chunk size: {chunk_size} greater than max size: "
-            msg += f"{max_chunk_size}, for {layout_class} dataset"
-            log.warn(msg)
-        layout_props["dims"] = chunk_dims
-
-    creation_props["layout"] = layout_props
+    if layout_class:
+        # should be set if shape is not H5S_NULL
+        if "class" not in layout_json:
+            layout_json["class"] = layout_class
+    if chunk_dims:
+        layout_json["dims"] = chunk_dims
+    log.debug(f"using dataset layout: {layout_json}")
+    creation_props["layout"] = layout_json
     kwargs["creation_props"] = creation_props
+    log.debug(f"updated creation props: {creation_props}")
 
     #
     # get input data if present
@@ -1733,9 +1669,7 @@ def getDatasetCreateArgs(body,
         input_data = body["value"]
         msg = "input data doesn't match request type and shape"
         dims = getShapeDims(shape_json)
-        if not dims:
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
+
         arr_dtype = createDataType(type_json)
 
         try:
diff --git a/hsds/util/chunkUtil.py b/hsds/util/chunkUtil.py
index 9dd51bf9..7d299d9a 100644
--- a/hsds/util/chunkUtil.py
+++ b/hsds/util/chunkUtil.py
@@ -1,6 +1,7 @@
 import numpy as np
 
 from h5json.array_util import ndarray_compare
+from h5json.dset_util import getDatasetLayout
 
 from .. import hsds_logger as log
 
@@ -37,7 +38,16 @@ def getNumChunks(selection, layout):
     If selection is provided (a list of slices), return the number
     of chunks that intersect with the selection.
     """
+    print(f"getNumChunks: {selection}, layout: {layout}")
+    if len(selection) == 0:
+        print("zero length selection")
+        return 0
+
     rank = len(layout)
+    if rank == 1 and layout[0] == 1:
+        # scalar dataset
+        print("scalar dset")
+        return 1
     if len(selection) != rank:
         msg = f"selection list has {len(selection)} items, but rank is {rank}"
         raise ValueError(msg)
@@ -47,10 +57,12 @@ def getNumChunks(selection, layout):
         if isinstance(s, slice):
             if s.stop <= s.start:
                 log.debug("null selection")
+                print("null selection")
                 return 0
         else:
             # coordinate list
             if len(s) == 0:
+                print("null coordinate list")
                 return 0
     # first, get the number of chunks needed for any coordinate selection
     chunk_indices = []
@@ -80,6 +92,8 @@ def getNumChunks(selection, layout):
     else:
         num_chunks = 1
 
+    print("num_chunks:", num_chunks)
+
     # now deal with any slices in the selection
     for i in range(len(selection)):
         s = selection[i]
@@ -207,11 +221,8 @@ def getPartitionKey(chunk_id, partition_count):
 
 def getChunkIdForPartition(chunk_id, dset_json):
     """Return the partition specific chunk id for given chunk"""
-    if "layout" not in dset_json:
-        msg = "No layout found in dset_json"
-        log.error(msg)
-        raise KeyError(msg)
-    layout_json = dset_json["layout"]
+
+    layout_json = getDatasetLayout(dset_json)
     if "partition_count" in layout_json:
         partition_count = layout_json["partition_count"]
         partition = getChunkPartition(chunk_id)
@@ -250,7 +261,12 @@ def chunk_id_to_index(chunk_id):
             indices.append(x)
         return indices
 
+    log.debug(f"getChunkIds - dset_id: {dset_id}, selection: {selection}, layout: {layout}")
+    if prefix:
+        log.debug(f"prefix: {prefix}")
+
     num_chunks = getNumChunks(selection, layout)
+    log.debug(f"getChunkIds - num_chunks: {num_chunks}")
     if num_chunks == 0:
         return []  # empty list
     if prefix is None:
diff --git a/hsds/util/dsetUtil.py b/hsds/util/dsetUtil.py
index 3704822d..49723750 100644
--- a/hsds/util/dsetUtil.py
+++ b/hsds/util/dsetUtil.py
@@ -17,7 +17,7 @@
 
 from .. import hsds_logger as log
 
- 
+
 def getShapeJson(body):
     """ Return normalized json description of data space """
 
@@ -27,11 +27,11 @@ def getShapeJson(body):
 
     if "shape" not in body:
         shape_class = "H5S_SCALAR"
-        log.debug("not shape given - using H5S_SCALAR")
+        log.debug("getShapeJson - no shape given, using H5S_SCALAR")
         return {"class": shape_class}
 
     body_shape = body["shape"]
-    log.debug(f"got shape: {body_shape}")
+    log.debug(f"getShapeJson - got shape: {body_shape}")
 
     if isinstance(body_shape, int):
         shape_class = "H5S_SIMPLE"
@@ -112,7 +112,7 @@ def getShapeJson(body):
         shape_json["dims"] = dims
     if maxdims:
         shape_json["maxdims"] = maxdims
-    log.debug(f"returning shape_json: {shape_json}")
+    log.debug(f"getShapeJson - returning shape_json: {shape_json}")
     return shape_json
 
 
@@ -123,6 +123,11 @@ def getHyperslabSelection(dims, start=None, stop=None, step=None):
     TBD: for step>1, adjust the slice to not extend beyond last
         data point returned
     """
+
+    if len(dims) == 0:
+        # scalar dataset
+        dims = (1,)
+
     rank = len(dims)
     if start:
         if not isinstance(start, (list, tuple)):
@@ -494,20 +499,25 @@ def get_slices(select, dset_json):
 
     dset_id = dset_json["id"]
     datashape = dset_json["shape"]
-    if datashape["class"] == "H5S_NULL":
+    shape_class = datashape["class"]
+    if shape_class == "H5S_NULL":
         msg = "Null space datasets can not be used as target for GET value"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
 
-    dims = getShapeDims(datashape)  # throws 400 for HS_NULL dsets
+    if shape_class == "H5S_SCALAR":
+        # return single slice
+        slices = [slice(0, 1, 1), ]
+    else:
+        dims = getShapeDims(datashape)  # throws 400 for HS_NULL dsets
 
-    try:
-        slices = getSelectionList(select, dims)
-    except ValueError:
-        msg = f"Invalid selection: {select} on dims: {dims} "
-        msg += f"for dataset: {dset_id}"
-        log.warn(msg)
-        raise
+        try:
+            slices = getSelectionList(select, dims)
+        except ValueError:
+            msg = f"Invalid selection: {select} on dims: {dims} "
+            msg += f"for dataset: {dset_id}"
+            log.warn(msg)
+            raise
     return slices
 
 
diff --git a/hsds/util/storUtil.py b/hsds/util/storUtil.py
index 3bbb073c..b37e25bc 100644
--- a/hsds/util/storUtil.py
+++ b/hsds/util/storUtil.py
@@ -69,7 +69,7 @@ def getCompressors():
 def getSupportedFilters(include_compressors=True):
     """return list of other supported filters"""
     filters = [
-        # "bitshuffle",
+        "bitshuffle",
         "shuffle",
         "fletcher32",
         "nbit",        # No-op
diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py
index e8d4feb3..5979e042 100755
--- a/tests/integ/dataset_test.py
+++ b/tests/integ/dataset_test.py
@@ -785,10 +785,19 @@ def testResizableDataset(self):
         # create the dataset
         req = self.endpoint + "/datasets"
         payload = {"type": "H5T_IEEE_F32LE", "shape": 10, "maxdims": 20}
-        payload["creationProperties"] = {"fillValue": 3.12}
+        contiguous_layout = {"class": "H5D_CONTIGUOUS"}
+        cpl = {"fillValue": 3.12, "layout": contiguous_layout}
+        payload["creationProperties"] = cpl
+
         req = self.endpoint + "/datasets"
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
-        self.assertEqual(rsp.status_code, 201)  # create dataset
+        self.assertEqual(rsp.status_code, 400)  # need chunk layout for resizable
+
+        # if we remove the layout, HSDS will setup a chunked layout for us
+        del cpl["layout"]
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
         rspJson = json.loads(rsp.text)
         dset_uuid = rspJson["id"]
         self.assertTrue(helper.validateId(dset_uuid))
@@ -816,8 +825,16 @@ def testResizableDataset(self):
         self.assertTrue("maxdims" in shape)
         self.assertEqual(shape["maxdims"][0], 20)
 
+        self.assertTrue("creationProperties" in rspJson)
         creationProps = rspJson["creationProperties"]
+        self.assertTrue("fillValue" in creationProps)
         self.assertEqual(creationProps["fillValue"], 3.12)
+        self.assertTrue("layout" in creationProps)
+        layout = creationProps["layout"]
+        self.assertTrue("class" in layout)
+        self.assertEqual(layout["class"], "H5D_CHUNKED")
+        self.assertTrue("dims" in layout)
+        self.assertEqual(len(layout["dims"]), 1)
 
         # verify shape using the GET shape request
         req = req + "/shape"
@@ -1271,8 +1288,11 @@ def testInvalidDatasetLayout(self):
 
         # dataset create
         req = self.endpoint + "/datasets"
-        dims = [365, 780, 1024]
-        maxdims = [0, 780, 1024]
+        # TBD: the larger dimensions are causing SN to crash
+        # dims = [365, 780, 1024]
+        dims = [365, 780, 10]
+        # maxdims = [0, 780, 1024]
+        maxdims = [0, 780, 10]
         payload = {
             "type": "H5T_IEEE_F32LE",
             "shape": dims,
@@ -1455,7 +1475,7 @@ def testCreationPropertiesContiguousDataset(self):
         req = self.endpoint + "/datasets"
         # Create ~1GB dataset
 
-        layout = {"class": "H5D_CONTIGUOUS"}
+        layout = {"class": "H5D_CHUNKED", "dims": [10, 20]}
         gzip_filter = {
             "class": "H5Z_FILTER_DEFLATE",
             "id": 1,
@@ -1493,8 +1513,9 @@ def testCreationPropertiesContiguousDataset(self):
         self.assertTrue("layout" in cpl)
         layout_json = cpl["layout"]
         self.assertTrue("class" in layout_json)
-        self.assertEqual(layout_json["class"], "H5D_CONTIGUOUS")
-        self.assertFalse("dims" in layout_json)
+        self.assertEqual(layout_json["class"], "H5D_CHUNKED")
+        self.assertTrue("dims" in layout_json)
+        self.assertEqual(layout_json["dims"], [10, 20])
 
     def testCompressionFiltersDataset(self):
         # test Dataset with creation property list
@@ -1519,11 +1540,10 @@ def testCompressionFiltersDataset(self):
             req = self.endpoint + "/datasets"
 
             payload = {"type": "H5T_IEEE_F32LE", "shape": [40, 80]}
-            payload["creationProperties"] = {
-                "filters": [
-                    compressor,
-                ]
-            }
+            filters = [compressor, ]
+            layout = {"class": "H5D_CHUNKED", "dims": [10, 20]}
+            cpl = {"filters": filters, "layout": layout}
+            payload["creationProperties"] = cpl
             req = self.endpoint + "/datasets"
             rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
             self.assertEqual(rsp.status_code, 201)  # create dataset
@@ -1546,7 +1566,7 @@ def testCompressionFiltersDataset(self):
             self.assertTrue("layout" in cpl)
             layout_json = cpl["layout"]
             self.assertTrue("class" in layout_json)
-            self.assertEqual(layout_json["class"], "H5D_CONTIGUOUS")
+            self.assertEqual(layout_json["class"], "H5D_CHUNKED")
 
             # verify compression
             self.assertTrue("creationProperties" in rspJson)
@@ -1580,14 +1600,14 @@ def testCompressionFilterOptionDataset(self):
 
         # create the dataset
         req = self.endpoint + "/datasets"
-        compressor = {"class": "H5Z_FILTER_USER", "name": "lz4", "level": 5}
+        compressor = {"class": "H5Z_FILTER_LZ4", "name": "lz4", "level": 5}
+        filters = [compressor, ]
 
         payload = {"type": "H5T_IEEE_F32LE", "shape": [40, 80]}
-        payload["creationProperties"] = {
-            "filters": [
-                compressor,
-            ]
-        }
+        layout = {"class": "H5D_CHUNKED", "dims": [10, 20]}
+        cpl = {"filters": filters, "layout": layout}
+        payload["creationProperties"] = cpl
+
         req = self.endpoint + "/datasets"
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)  # create dataset
@@ -1610,8 +1630,8 @@ def testCompressionFilterOptionDataset(self):
         self.assertTrue("layout" in cpl)
         layout_json = cpl["layout"]
         self.assertTrue("class" in layout_json)
-        self.assertEqual(layout_json["class"], "H5D_CONTIGUOUS")
-        self.assertFalse("dims" in layout_json)
+        self.assertEqual(layout_json["class"], "H5D_CHUNKED")
+        self.assertTrue("dims" in layout_json)
 
         # verify compression
         self.assertTrue("creationProperties" in rspJson)
@@ -1622,7 +1642,7 @@ def testCompressionFilterOptionDataset(self):
         filter = filters[0]
         self.assertTrue(isinstance(filter, dict))
         self.assertTrue("class" in filter)
-        self.assertEqual(filter["class"], "H5Z_FILTER_USER")
+        self.assertEqual(filter["class"], "H5Z_FILTER_LZ4")
         self.assertTrue("id" in filter)
         self.assertTrue("name" in filter)
         self.assertEqual(filter["name"], "lz4")
@@ -1852,16 +1872,11 @@ def testAutoChunk1dDataset(self):
             {"name": "z", "type": "H5T_IEEE_F64LE"},
         )
         datatype = {"class": "H5T_COMPOUND", "fields": fields}
+        item_size = 12  # 3 fields of 4 bytes each
+        cpl = {"fillValue": 3.12}  # no layout given
+
+        payload = {"type": datatype, "shape": dims, "creationProperties": cpl}
 
-        payload = {"type": datatype, "shape": dims}
-        # the following specifies an efficiently small chunk size
-        chunk_dims = [10,]
-        payload["creationProperties"] = {
-            "layout": {
-                "class": "H5D_CHUNKED",
-                "dims": chunk_dims
-            }
-        }
         req = self.endpoint + "/datasets"
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)  # create dataset
@@ -1884,12 +1899,23 @@ def testAutoChunk1dDataset(self):
         rspJson = json.loads(rsp.text)
         self.assertTrue("creationProperties" in rspJson)
         cpl = rspJson["creationProperties"]
+        self.assertTrue("fillValue" in cpl)
         self.assertTrue("layout" in cpl)
         layout_json = cpl["layout"]
         self.assertTrue("class" in layout_json)
         self.assertEqual(layout_json["class"], "H5D_CHUNKED")
         self.assertTrue("dims" in layout_json)
-        self.assertEqual(layout_json["dims"], chunk_dims)
+        chunk_dims = layout_json["dims"]
+        self.assertEqual(len(chunk_dims), 1)
+        self.assertTrue(chunk_dims[0] < dims[0])
+
+        chunk_size = chunk_dims[0] * item_size
+
+        # chunk size will be based on server config, but assume a min/max of 1MB to 1GB
+        CHUNK_MIN = 1024 * 1024
+        CHUNK_MAX = 1024 * 1024 * 1024
+        self.assertTrue(chunk_size >= CHUNK_MIN)
+        self.assertTrue(chunk_size <= CHUNK_MAX)
 
     def testAutoChunk2dDataset(self):
         # test Dataset where chunk layout is set automatically
@@ -1908,6 +1934,7 @@ def testAutoChunk2dDataset(self):
         req = self.endpoint + "/datasets"
         # 50K x 80K dataset
         dims = [50000, 80000]
+        item_size = 4  # 4 bytes per float32
         payload = {"type": "H5T_IEEE_F32LE", "shape": dims}
 
         req = self.endpoint + "/datasets"
@@ -1937,11 +1964,11 @@ def testAutoChunk2dDataset(self):
         self.assertTrue("class" in layout_json)
         self.assertEqual(layout_json["class"], "H5D_CHUNKED")
         self.assertTrue("dims" in layout_json)
-        layout = layout_json["dims"]
-        self.assertEqual(len(layout), 2)
-        self.assertTrue(layout[0] < dims[0])
-        self.assertTrue(layout[1] < dims[1])
-        chunk_size = layout[0] * layout[1] * 4
+        chunk_dims = layout_json["dims"]
+        self.assertEqual(len(chunk_dims), 2)
+        self.assertTrue(chunk_dims[0] < dims[0])
+        self.assertTrue(chunk_dims[1] < dims[1])
+        chunk_size = chunk_dims[0] * chunk_dims[1] * item_size
 
         # chunk size will be based on server config, but assume a min/max of 1MB to 1GB
         CHUNK_MIN = 1024 * 1024
diff --git a/tests/integ/filter_test.py b/tests/integ/filter_test.py
index ea2df637..0cd7fdb5 100755
--- a/tests/integ/filter_test.py
+++ b/tests/integ/filter_test.py
@@ -58,6 +58,8 @@ def testDeflateCompression(self):
         # Create ~1MB dataset
 
         payload = {"type": "H5T_STD_I8LE", "shape": [1024, 1024]}
+        # use a chunked layout for compression
+        layout = {"class": "H5D_CHUNKED", "dims": [64, 64]}
         # define deflate compression
         gzip_filter = {
             "class": "H5Z_FILTER_DEFLATE",
@@ -65,7 +67,7 @@ def testDeflateCompression(self):
             "level": 9,
             "name": "deflate",
         }
-        payload["creationProperties"] = {"filters": [gzip_filter]}
+        payload["creationProperties"] = {"layout": layout, "filters": [gzip_filter]}
         req = self.endpoint + "/datasets"
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)  # create dataset
@@ -120,7 +122,9 @@ def testShuffleFilter(self):
         payload = {"type": "H5T_STD_I32LE", "shape": [1024, 1024]}
         # define sshufle compression
         shuffle_filter = {"class": "H5Z_FILTER_SHUFFLE", "id": 2, "name": "shuffle"}
-        payload["creationProperties"] = {"filters": [shuffle_filter]}
+        # use chunked layout for compression
+        layout = {"class": "H5D_CHUNKED", "dims": [64, 64]}
+        payload["creationProperties"] = {"filters": [shuffle_filter], "layout": layout}
         req = self.endpoint + "/datasets"
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)  # create dataset
@@ -182,7 +186,11 @@ def testShuffleAndDeflate(self):
         }
         # and shuffle compression
         shuffle_filter = {"class": "H5Z_FILTER_SHUFFLE", "id": 2, "name": "shuffle"}
-        payload["creationProperties"] = {"filters": [shuffle_filter, gzip_filter]}
+        filters = [shuffle_filter, gzip_filter]
+        # use chunked layout
+        layout = {"class": "H5D_CHUNKED", "dims": [64, 64]}
+        payload["creationProperties"] = {"layout": layout, "filters": filters}
+
         req = self.endpoint + "/datasets"
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)  # create dataset
@@ -237,7 +245,9 @@ def testBitShuffle(self):
 
         # bit shuffle
         bitshuffle_filter = {"class": "H5Z_FILTER_BITSHUFFLE", "id": 32008, "name": "bitshuffle"}
-        payload["creationProperties"] = {"filters": [bitshuffle_filter, ]}
+        # use chunked layout
+        layout = {"class": "H5D_CHUNKED", "dims": [64, 64]}
+        payload["creationProperties"] = {"filters": [bitshuffle_filter], "layout": layout}
         req = self.endpoint + "/datasets"
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)  # create dataset
@@ -299,7 +309,10 @@ def testBitShuffleAndDeflate(self):
         }
         # and bit shuffle
         bitshuffle_filter = {"class": "H5Z_FILTER_BITSHUFFLE", "id": 32008, "name": "bitshuffle"}
-        payload["creationProperties"] = {"filters": [bitshuffle_filter, gzip_filter]}
+        filters = [bitshuffle_filter, gzip_filter]
+        # use chunked layout
+        layout = {"class": "H5D_CHUNKED", "dims": [64, 64]}
+        payload["creationProperties"] = {"filters": filters, "layout": layout}
         req = self.endpoint + "/datasets"
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)  # create dataset
diff --git a/tests/integ/link_test.py b/tests/integ/link_test.py
index 511bdf4e..244f8f5d 100755
--- a/tests/integ/link_test.py
+++ b/tests/integ/link_test.py
@@ -1842,7 +1842,6 @@ def testPutLinkMultipleWithTimestamps(self):
                     self.assertTrue(False)  # unexpected
                 self.assertTrue("created" in ret_link)
                 self.assertTrue(ret_link["created"] in timestamps)
-                print(timestamps)
 
     def testDeleteLinkMultiple(self):
         domain = self.base_domain + "/testDeleteLinkMultiple.h5"
diff --git a/tests/integ/pointsel_test.py b/tests/integ/pointsel_test.py
index 194eb2ce..57949114 100755
--- a/tests/integ/pointsel_test.py
+++ b/tests/integ/pointsel_test.py
@@ -1370,8 +1370,10 @@ def testDatasetChunkPartitioning(self):
         rsp = self.session.get(req, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
-        self.assertTrue("layout" in rspJson)
-        layout_json = rspJson["layout"]
+        self.assertTrue("creationProperties" in rspJson)
+        creation_props = rspJson["creationProperties"]
+        self.assertTrue("layout" in creation_props)
+        layout_json = creation_props["layout"]
         self.assertTrue("class" in layout_json)
         self.assertEqual(layout_json["class"], "H5D_CHUNKED")
         self.assertTrue("dims" in layout_json)
diff --git a/tests/integ/value_test.py b/tests/integ/value_test.py
index 293c625d..02a4f990 100755
--- a/tests/integ/value_test.py
+++ b/tests/integ/value_test.py
@@ -982,7 +982,6 @@ def testScalarDatasetInitData(self):
             "id",
             "shape",
             "hrefs",
-            "layout",
             "creationProperties",
             "attributeCount",
             "created",
@@ -1060,7 +1059,6 @@ def testScalarDatasetInitDataMulti(self):
                 "id",
                 "shape",
                 "hrefs",
-                "layout",
                 "creationProperties",
                 "attributeCount",
                 "created",
@@ -2133,7 +2131,7 @@ def testResizable1DValue(self):
 
         # read values from the extended region
         req = self.endpoint + "/datasets/" + dset_uuid + "/value"
-        params = {"select": "[{}:{}]".format(0, num_elements)}
+        params = {"select": f"[0:{num_elements}]"}
         rsp = self.session.get(req, params=params, headers=headers)
         self.assertEqual(rsp.status_code, 200)
         rspJson = json.loads(rsp.text)
@@ -3236,7 +3234,7 @@ def testARangeInitializerDataset(self):
         extent = 1_000_000_000   # one billion elements
         dset_dims = [extent, ]
         layout = {"class": "H5D_CHUNKED"}
-        layout["dims"] = dset_dims
+        layout["dims"] = [1_000, ]
 
         range_start = 0  # -0.25
         range_step = 1
@@ -3286,6 +3284,7 @@ def testARangeInitializerDataset(self):
 
     def testIntelligentRangeGet1D(self):
         test_name = "testIntelligentRangeGet1D"
+
         print(test_name, self.base_domain)
 
         headers = helper.getRequestHeaders(domain=self.base_domain)
diff --git a/tests/unit/chunk_util_test.py b/tests/unit/chunk_util_test.py
index 7ae16bd5..623f26ca 100755
--- a/tests/unit/chunk_util_test.py
+++ b/tests/unit/chunk_util_test.py
@@ -115,11 +115,44 @@ def testGetNumChunks(self):
         selection = getHyperslabSelection(datashape, (0, 0), (100, 100), (20, 40))
         count = getNumChunks(selection, layout)
         self.assertEqual(count, 15)
+        # test with scalar
+        datashape = ()
+        layout = (1, )
+        selection = getHyperslabSelection(datashape, 0, 1)
+        print("selection:", selection)
+        count = getNumChunks(selection, layout)
+        self.assertEqual(count, 1)
 
     def testGetChunkIds(self):
         # getChunkIds(dset_id, selection, layout, dim=0, prefix=None, chunk_ids=None):
         dset_id = "d-12345678-1234-1234-1234-1234567890ab"
 
+        datashape = []
+        layout = (1,)
+
+        selection = getHyperslabSelection(datashape, 0, 1)
+        num_chunks = getNumChunks(selection, layout)
+
+        self.assertEqual(num_chunks, 1)
+        chunk_ids = getChunkIds(dset_id, selection, layout)
+        self.assertEqual(len(chunk_ids), 1)
+        chunk_id = chunk_ids[0]
+        self.assertTrue(chunk_id.startswith("c-"))
+        self.assertTrue(chunk_id.endswith("_0"))
+        self.assertEqual(chunk_id[2:-2], dset_id[2:])
+        self.assertEqual(len(chunk_id), 2 + 36 + 2)
+        self.assertEqual(getDatasetId(chunk_id), dset_id)
+
+        selection = getHyperslabSelection(datashape)
+        chunk_ids = getChunkIds(dset_id, selection, layout)
+        self.assertEqual(len(chunk_ids), 1)
+        chunk_id = chunk_ids[0]
+        self.assertTrue(chunk_id.startswith("c-"))
+        self.assertTrue(chunk_id.endswith("_0"))
+        self.assertEqual(chunk_id[2:-2], dset_id[2:])
+        self.assertEqual(len(chunk_id), 2 + 36 + 2)
+        self.assertEqual(getDatasetId(chunk_id), dset_id)
+
         datashape = [1,]
         layout = (1,)
         selection = getHyperslabSelection(datashape)
diff --git a/tests/unit/dset_util_test.py b/tests/unit/dset_util_test.py
index 0e77ab1b..f89690d9 100755
--- a/tests/unit/dset_util_test.py
+++ b/tests/unit/dset_util_test.py
@@ -14,7 +14,7 @@
 import sys
 
 sys.path.append("../..")
-from hsds.util.dsetUtil import getHyperslabSelection, getSelectionShape
+from hsds.util.dsetUtil import getHyperslabSelection, getSelectionShape, get_slices
 from hsds.util.dsetUtil import getSelectionList, ItemIterator, getSelectionPagination
 
 
@@ -25,8 +25,40 @@ def __init__(self, *args, **kwargs):
         self.logger = logging.getLogger()
         self.logger.setLevel(logging.WARNING)
 
+    def testGetSlices(self):
+        dset_json = {"id": "d-b4b3b3d6-94343adc-1727-28bebf-12caac"}
+        datashape = {"class": "H5S_SCALAR"}
+        cprops = {"layout": {"class": "H5D_CONTIGUOUS"}}
+        dtype_json = {"class": "H5T_INTEGER", "base": "H5T_STD_I32LE"}
+        dset_json["shape"] = datashape
+        dset_json["creationProperties"] = cprops
+        dset_json["type"] = dtype_json
+
+        slices = get_slices("", dset_json)
+        self.assertEqual(len(slices), 1)
+        self.assertEqual(slices[0], slice(0, 1, 1))
+
+        slices = get_slices(None, dset_json)
+        self.assertEqual(len(slices), 1)
+        self.assertEqual(slices[0], slice(0, 1, 1))
+
     def testGetHyperslabSelection(self):
         # getHyperslabSelection(dsetshape, start, stop, step)
+
+        # Scalar case
+        datashape = []
+        slices = getHyperslabSelection(datashape)
+        self.assertEqual(len(slices), 1)
+        self.assertEqual(slices[0], slice(0, 1, 1))
+
+        slices = getHyperslabSelection(datashape, 0)
+        self.assertEqual(len(slices), 1)
+        self.assertEqual(slices[0], slice(0, 1, 1))
+
+        slices = getHyperslabSelection(datashape, 0, 1)
+        self.assertEqual(len(slices), 1)
+        self.assertEqual(slices[0], slice(0, 1, 1))
+
         # 1-D case
         datashape = [100,]
         slices = getHyperslabSelection(datashape)

From a2ca1ee159f8ac2b1d8dc7f86a0a0a93c2126300 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Fri, 26 Dec 2025 14:25:01 +0800
Subject: [PATCH 44/49] updated for new hdf5-json methods

---
 hsds/async_lib.py           |   9 +-
 hsds/attr_dn.py             |   3 +-
 hsds/chunk_crawl.py         |  11 ++-
 hsds/chunk_sn.py            |   8 +-
 hsds/chunklocator.py        |   6 +-
 hsds/ctype_dn.py            |   3 +-
 hsds/datanode_lib.py        |   3 +-
 hsds/domain_dn.py           |   3 +-
 hsds/domain_sn.py           |   3 +-
 hsds/dset_dn.py             |   2 +-
 hsds/dset_sn.py             |   4 +-
 hsds/folder_crawl.py        |   9 +-
 hsds/group_dn.py            |   2 +-
 hsds/headnode.py            |  13 ++-
 hsds/hsds_app.py            |  18 ++--
 hsds/link_dn.py             |   2 +-
 hsds/servicenode.py         |   7 +-
 hsds/servicenode_lib.py     | 159 +++++++++++-------------------------
 hsds/util/chunkUtil.py      |   9 +-
 hsds/util/timeUtil.py       |  83 -------------------
 pyproject.toml              |   2 +-
 tests/integ/dataset_test.py |  76 ++++++++++-------
 22 files changed, 157 insertions(+), 278 deletions(-)
 delete mode 100755 hsds/util/timeUtil.py

diff --git a/hsds/async_lib.py b/hsds/async_lib.py
index 9888d3dd..674caa98 100755
--- a/hsds/async_lib.py
+++ b/hsds/async_lib.py
@@ -23,6 +23,7 @@
 from h5json.filters import getFilters
 from h5json.shape_util import getShapeDims
 from h5json.dset_util import getDatasetLayoutClass, getDatasetLayout, getChunkDims
+from h5json.time_util import getNow
 
 from .util.chunkUtil import getDatasetId, getNumChunks, ChunkIterator
 from .util.dsetUtil import getHyperslabSelection
@@ -31,7 +32,7 @@
 from .datanode_lib import getFilterOps
 from . import hsds_logger as log
 from . import config
-import time
+
 
 # List all keys under given root and optionally update info.json
 # Note: only works with schema v2 domains!
@@ -78,7 +79,7 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None):
         return
     type_json = dset_json["type"]
     item_size = getItemSize(type_json)
-    if "layout" not in dset_json:
+    if not getDatasetLayout(dset_json):
         msg = "updateDatasetInfo - expected to find layout in dataset_json "
         msg += f"for {dset_id}"
         log.warn(msg)
@@ -387,7 +388,7 @@ async def scanRoot(app, rootid, update=False, bucket=None):
     results["logical_bytes"] = 0
     results["checksums"] = {}  # map of objid to checksums
     results["bucket"] = bucket
-    results["scan_start"] = time.time()
+    results["scan_start"] = getNow(app=app)
 
     app["scanRoot_results"] = results
     app["scanRoot_keyset"] = set()
@@ -442,7 +443,7 @@ async def scanRoot(app, rootid, update=False, bucket=None):
     # free up memory used by the checksums
     del results["checksums"]
 
-    results["scan_complete"] = time.time()
+    results["scan_complete"] = getNow(app=app)
 
     if update:
         # write .info object back to S3
diff --git a/hsds/attr_dn.py b/hsds/attr_dn.py
index 43c04232..a660b836 100755
--- a/hsds/attr_dn.py
+++ b/hsds/attr_dn.py
@@ -22,12 +22,13 @@
 from h5json.array_util import arrayToBytes, jsonToArray, decodeData
 from h5json.array_util import bytesToArray, bytesArrayToList, getNumElements
 from h5json.shape_util import getShapeDims
+from h5json.time_util import getNow
 
 from .util.attrUtil import validateAttributeName, isEqualAttr
 from .util.globparser import globmatch
 from .util.domainUtil import isValidBucketName
 from .datanode_lib import get_obj_id, get_metadata_obj, save_metadata_obj
-from .util.timeUtil import getNow
+
 from . import config
 from . import hsds_logger as log
 
diff --git a/hsds/chunk_crawl.py b/hsds/chunk_crawl.py
index 142577d8..38da4d3e 100755
--- a/hsds/chunk_crawl.py
+++ b/hsds/chunk_crawl.py
@@ -15,7 +15,6 @@
 #
 
 import asyncio
-import time
 import traceback
 import random
 from asyncio import CancelledError
@@ -29,6 +28,7 @@
 from h5json.array_util import getNumElements, arrayToBytes, bytesToArray
 from h5json.shape_util import getShapeDims
 from h5json.dset_util import getChunkDims
+from h5json.time_util import getNow
 
 from .util.nodeUtil import getDataNodeUrl, getNodeCount
 from .util.httpUtil import http_get, http_put, http_post, get_http_client
@@ -87,7 +87,7 @@ async def write_chunk_hyperslab(
     msg += f"bucket: {bucket}"
     msg += f" dset_json: {dset_json}"
     log.info(msg)
-    
+
     partition_chunk_id = getChunkIdForPartition(chunk_id, dset_json)
     if partition_chunk_id != chunk_id:
         log.debug(f"using partition_chunk_id: {partition_chunk_id}")
@@ -676,6 +676,9 @@ def __init__(
             app["cc_clients"] = {}
         self._clients = app["cc_clients"]
 
+    def now(self):
+        return getNow(app=self._app)
+
     def get_status(self):
         if len(self._status_map) != len(self._chunk_ids):
             msg = "get_status code while crawler not complete"
@@ -720,7 +723,7 @@ async def work(self):
         log.info(f"ChunkCrawler - client_name: {client_name}")
         while True:
             try:
-                start = time.time()
+                start = self.now()
                 chunk_id = await self._q.get()
                 if self._limit > 0 and self._hits >= self._limit:
                     msg = f"ChunkCrawler - maxhits exceeded, skipping fetch for chunk: {chunk_id}"
@@ -745,7 +748,7 @@ async def work(self):
                     await self.do_work(chunk_id, client=client)
 
                 self._q.task_done()
-                elapsed = time.time() - start
+                elapsed = self.now() - start
                 msg = f"ChunkCrawler - task {chunk_id} start: {start:.3f} "
                 msg += f"elapsed: {elapsed:.3f}"
                 log.debug(msg)
diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py
index f278e4e9..1cca80f1 100755
--- a/hsds/chunk_sn.py
+++ b/hsds/chunk_sn.py
@@ -28,8 +28,8 @@
 from h5json.array_util import bytesArrayToList, jsonToArray, getNumElements, arrayToBytes
 from h5json.array_util import bytesToArray, squeezeArray, getBroadcastShape
 from h5json.objid import isValidUuid
-from h5json.shape_util import isNullSpace, isScalar, getShapeDims
-from h5json.dset_util import getChunkDims, isExtensible, getDsetMaxDims
+from h5json.shape_util import isNullSpace, isScalar, getShapeDims, getMaxDims
+from h5json.dset_util import getChunkDims, isExtensible
 
 from .util.httpUtil import getHref, getAcceptType, getContentType
 from .util.httpUtil import request_read, jsonResponse, isAWSLambda
@@ -163,8 +163,8 @@ def _getAppendRows(params, dset_json, body=None):
         datashape = dset_json["shape"]
         dims = getShapeDims(datashape)
         rank = len(dims)
-        maxdims = getDsetMaxDims(dset_json)
-        if not isExtensible(dims, maxdims):
+        maxdims = getMaxDims(datashape)
+        if not isExtensible(datashape):
             msg = "Dataset shape must be extensible for packet updates"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
diff --git a/hsds/chunklocator.py b/hsds/chunklocator.py
index 2f8bfbaf..f2cd93d7 100644
--- a/hsds/chunklocator.py
+++ b/hsds/chunklocator.py
@@ -1,11 +1,11 @@
 import sys
-import time
 import h5py
 import s3fs
 import numpy as np
 from . import config
 from . import hsds_logger as log
 
+from h5json.time_util import getNow
 from h5json.array_util import bytesArrayToList, getNumElements
 
 from .util.dsetUtil import getSelectionList, getSelectionShape
@@ -191,7 +191,7 @@ def main():
     prefix = config.get("log_prefix")
     log_timestamps = config.get("log_timestamps", default=False)
     log.setLogConfig(log_level, prefix=prefix, timestamps=log_timestamps)
-    start_time = time.time()
+    start_time = getNow()
     log.info(f"chunklocator start: {start_time:.2f}")
 
     cmd_options = get_cmd_options()
@@ -231,6 +231,6 @@ def main():
         sys.exit(1)
 
     log.info('done')
-    stop_time = time.time()
+    stop_time = getNow()
     log.info(f"chunklocator stop: {stop_time:.2f}")
     log.info(f"chunklocator elapsed: {(stop_time - start_time):.2f}")
diff --git a/hsds/ctype_dn.py b/hsds/ctype_dn.py
index 834e02cb..465d8916 100755
--- a/hsds/ctype_dn.py
+++ b/hsds/ctype_dn.py
@@ -19,11 +19,12 @@
 from aiohttp.web import json_response
 
 from h5json.objid import isValidUuid, validateUuid
+from h5json.time_util import getNow
 
 from .datanode_lib import get_obj_id, get_metadata_obj, save_metadata_obj
 from .datanode_lib import delete_metadata_obj, check_metadata_obj
 from .util.domainUtil import isValidBucketName
-from .util.timeUtil import getNow
+
 from . import hsds_logger as log
 
 
diff --git a/hsds/datanode_lib.py b/hsds/datanode_lib.py
index a6adbe28..6b76e37a 100644
--- a/hsds/datanode_lib.py
+++ b/hsds/datanode_lib.py
@@ -28,6 +28,7 @@
 from h5json.objid import getRootObjId, isRootObjId
 from h5json.shape_util import getShapeDims
 from h5json.dset_util import getChunkDims, getDatasetLayoutClass
+from h5json.time_util import getNow
 
 from .util.nodeUtil import getDataNodeUrl
 from .util.storUtil import getStorJSONObj, putStorJSONObj, putStorBytes
@@ -40,7 +41,7 @@
 from .util.chunkUtil import getDatasetId, getChunkSelection, getChunkIndex
 from .util.nodeUtil import validateInPartition
 from .util.rangegetUtil import ChunkLocation, chunkMunge, getHyperChunkIndex, getHyperChunkFactors
-from .util.timeUtil import getNow
+
 from . import config
 from . import hsds_logger as log
 from .dset_lib import getFillValue
diff --git a/hsds/domain_dn.py b/hsds/domain_dn.py
index 0fe0d01c..5b14ba70 100755
--- a/hsds/domain_dn.py
+++ b/hsds/domain_dn.py
@@ -16,10 +16,11 @@
 from aiohttp.web_exceptions import HTTPConflict, HTTPInternalServerError
 from aiohttp.web import json_response
 
+from h5json.time_util import getNow
+
 from .util.authUtil import getAclKeys
 from .util.domainUtil import isValidDomain, getBucketForDomain
 from .util.nodeUtil import validateInPartition
-from .util.timeUtil import getNow
 from .datanode_lib import get_metadata_obj, save_metadata_obj
 from .datanode_lib import delete_metadata_obj, check_metadata_obj
 from . import hsds_logger as log
diff --git a/hsds/domain_sn.py b/hsds/domain_sn.py
index 1ef469d5..cc634526 100755
--- a/hsds/domain_sn.py
+++ b/hsds/domain_sn.py
@@ -24,6 +24,8 @@
 
 from h5json.objid import createObjId, getCollectionForId
 from h5json.objid import isValidUuid, isRootObjId, isSchema2Id
+from h5json.time_util import getNow
+
 
 from .util.nodeUtil import getNodeCount, getDataNodeUrl
 from .util.httpUtil import getObjectClass, http_post, http_put, http_delete
@@ -37,7 +39,6 @@
 from .util.storUtil import getStorKeys, getCompressors
 from .util.boolparser import BooleanParser
 from .util.globparser import globmatch
-from .util.timeUtil import getNow
 from .servicenode_lib import getDomainJson, getObjectJson, getObjectIdByPath
 from .servicenode_lib import getRootInfo, checkBucketAccess, doFlush, getDomainResponse
 from .basenode import getVersion
diff --git a/hsds/dset_dn.py b/hsds/dset_dn.py
index 7b2029f8..5b99711d 100755
--- a/hsds/dset_dn.py
+++ b/hsds/dset_dn.py
@@ -18,9 +18,9 @@
 from aiohttp.web import json_response
 
 from h5json.objid import isValidUuid, validateUuid
+from h5json.time_util import getNow
 
 from .util.domainUtil import isValidBucketName
-from .util.timeUtil import getNow
 from .datanode_lib import get_obj_id, check_metadata_obj, get_metadata_obj
 from .datanode_lib import save_metadata_obj, delete_metadata_obj
 from . import hsds_logger as log
diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py
index e314e22c..bd7ad394 100755
--- a/hsds/dset_sn.py
+++ b/hsds/dset_sn.py
@@ -507,7 +507,7 @@ def _updateInitValuesList(kwargs):
         # to init_values list
         if "value" in kwargs:
             init_values.append(kwargs["value"])
-            log.debug(f"init value appended: {kwargs['value']}") 
+            log.debug(f"init value appended: {kwargs['value']}")
             del kwargs["value"]
         else:
             # add a placeholder
@@ -629,7 +629,7 @@ def _updateInitValuesList(kwargs):
         # make selection for entire dataspace
         dims = getShapeDims(dset_json["shape"])
         slices = getHyperslabSelection(dims)
-        
+
         chunk_ids = getChunkIds(dset_id, slices, layout_dims)
         log.debug(f"init data, got chunk_ids: {chunk_ids}")
         if not chunk_ids or len(chunk_ids) != 1:
diff --git a/hsds/folder_crawl.py b/hsds/folder_crawl.py
index 05048758..f1b3fcf2 100644
--- a/hsds/folder_crawl.py
+++ b/hsds/folder_crawl.py
@@ -13,12 +13,12 @@
 # service node of hsds cluster
 #
 
-import time
 import asyncio
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPForbidden, HTTPNotFound
 from aiohttp.web_exceptions import HTTPGone, HTTPInternalServerError
 from aiohttp.web_exceptions import HTTPServiceUnavailable
 
+from h5json.time_util import getNow
 from .servicenode_lib import getObjectJson, getDomainResponse, getDomainJson
 from .util.nodeUtil import getNodeCount
 
@@ -51,6 +51,9 @@ def __init__(
         else:
             self._max_tasks = len(domains)
 
+    def now(self):
+        return getNow(app=self._app)
+
     async def crawl(self):
         workers = [asyncio.Task(self.work()) for _ in range(self._max_tasks)]
         # When all work is done, exit.
@@ -68,11 +71,11 @@ async def crawl(self):
 
     async def work(self):
         while True:
-            start = time.time()
+            start = self.now()
             domain = await self._q.get()
             await self.fetch(domain)
             self._q.task_done()
-            elapsed = time.time() - start
+            elapsed = self.now() - start
             msg = f"FolderCrawler - task {domain} start: {start:.3f} "
             msg += f"elapsed: {elapsed:.3f}"
             log.debug(msg)
diff --git a/hsds/group_dn.py b/hsds/group_dn.py
index e2b69eef..69fa35d4 100755
--- a/hsds/group_dn.py
+++ b/hsds/group_dn.py
@@ -20,9 +20,9 @@
 from aiohttp.web import json_response
 
 from h5json.objid import isValidUuid, isSchema2Id, isRootObjId, getRootObjId
+from h5json.time_util import getNow
 
 from .util.domainUtil import isValidBucketName
-from .util.timeUtil import getNow
 from .datanode_lib import get_obj_id, check_metadata_obj, get_metadata_obj
 from .datanode_lib import save_metadata_obj, delete_metadata_obj
 from . import hsds_logger as log
diff --git a/hsds/headnode.py b/hsds/headnode.py
index 354a17bc..41501d00 100755
--- a/hsds/headnode.py
+++ b/hsds/headnode.py
@@ -15,13 +15,12 @@
 
 import asyncio
 import os
-import time
 
 from aiohttp.web import Application, StreamResponse, run_app, json_response
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPInternalServerError
+from h5json.time_util import unixTimeToUTC, elapsedTime, getNow
 
 from . import config
-from .util.timeUtil import unixTimeToUTC, elapsedTime
 from .util.nodeUtil import createNodeId
 from . import hsds_logger as log
 from .util import query_marathon as marathonClient
@@ -46,7 +45,7 @@ def __init__(self, node_id=None, node_type=None, node_host=None, node_port=None)
         self._type = node_type
         self._host = node_host
         self._port = node_port
-        now = time.time()
+        now = getNow()
         self._create_time = now
         self._last_poll = now
         self._stats = {}
@@ -87,13 +86,13 @@ def get_info(self):
         return info
 
     def poll_update(self):
-        now = time.time()
+        now = getNow()
         self._last_poll = now
 
     def is_healthy(self):
         sleep_sec = int(config.get("node_sleep_time"))
 
-        now = time.time()
+        now = getNow()
         if now - self._last_poll < sleep_sec * 2:
             return True
         else:
@@ -301,7 +300,7 @@ async def register(request):
     answer["dn_ids"] = dn_ids
     answer["req_ip"] = node_host
     log.debug(f"register returning: {answer}")
-    app["last_health_check"] = int(time.time())
+    app["last_health_check"] = int(getNow())
 
     resp = json_response(answer)
     log.response(request, resp=resp)
@@ -475,7 +474,7 @@ async def init():
 
     app["nodes"] = nodes
     app["dead_node_ids"] = set()
-    app["start_time"] = int(time.time())  # seconds after epoch
+    app["start_time"] = int(getNow())  # seconds after epoch
     app["last_health_check"] = 0
     app["max_task_count"] = config.get("max_task_count")
     app.router.add_get("/", info)
diff --git a/hsds/hsds_app.py b/hsds/hsds_app.py
index e690b68d..d0d25d6d 100644
--- a/hsds/hsds_app.py
+++ b/hsds/hsds_app.py
@@ -3,12 +3,14 @@
 from pathlib import Path
 import site
 import subprocess
-import time
 import queue
 import threading
+import time
 import logging
 from shutil import which
 
+from h5json.time_util import getNow
+
 
 def _enqueue_output(out, queue, loglevel):
     try:
@@ -318,7 +320,7 @@ def run(self):
             self._threads.append(t)
 
         # wait to sockets are initialized
-        start_ts = time.time()
+        start_ts = getNow()
         SLEEP_TIME = 1  # time to sleep between checking on socket connection
         MAX_INIT_TIME = 10.0  # max time to wait for socket to be initialized
 
@@ -329,7 +331,7 @@ def run(self):
                     if os.path.exists(socket_path):
                         ready += 1
             else:
-                if time.time() > start_ts + 5:
+                if getNow() > start_ts + 5:
                     # TBD - put a real ready check here
                     ready = count
             if ready == count:
@@ -339,12 +341,12 @@ def run(self):
                 self.log.debug(f"{ready}/{count} ready")
                 self.log.debug(f"sleeping for {SLEEP_TIME}")
                 time.sleep(SLEEP_TIME)
-                if time.time() > start_ts + MAX_INIT_TIME:
+                if getNow() > start_ts + MAX_INIT_TIME:
                     msg = f"failed to initialize after {MAX_INIT_TIME} seconds"
                     self.log.error(msg)
                     raise IOError(msg)
 
-        self.log.info(f"Ready after: {(time.time() - start_ts):4.2f} s")
+        self.log.info(f"Ready after: {(getNow() - start_ts):4.2f} s")
         self._ready = True
 
     def stop(self):
@@ -352,7 +354,7 @@ def stop(self):
         if not self._processes:
             return
 
-        now = time.time()
+        now = getNow()
         logging.info(f"hsds app stop at {now}")
 
         for pname in self._processes:
@@ -363,7 +365,7 @@ def stop(self):
         # wait for sub-proccesses to exit
         SLEEP_TIME = 0.1  # time to sleep between checking on process state
         MAX_WAIT_TIME = 10.0  # max time to wait for sub-process to terminate
-        start_ts = time.time()
+        start_ts = getNow()
         while True:
             is_alive_cnt = 0
             for pname in self._processes:
@@ -380,7 +382,7 @@ def stop(self):
             else:
                 logging.debug("all subprocesses exited")
                 break
-            if time.time() > start_ts + MAX_WAIT_TIME:
+            if getNow() > start_ts + MAX_WAIT_TIME:
                 msg = f"failed to terminate after {MAX_WAIT_TIME} seconds"
                 self.log.error(msg)
                 break
diff --git a/hsds/link_dn.py b/hsds/link_dn.py
index a35acf17..f602a405 100755
--- a/hsds/link_dn.py
+++ b/hsds/link_dn.py
@@ -21,11 +21,11 @@
 from aiohttp.web import json_response
 
 from h5json.objid import isValidUuid
+from h5json.time_util import getNow
 
 from .util.globparser import globmatch
 from .util.linkUtil import validateLinkName, getLinkClass, isEqualLink
 from .util.domainUtil import isValidBucketName
-from .util.timeUtil import getNow
 from .datanode_lib import get_obj_id, get_metadata_obj, save_metadata_obj
 from . import config
 from . import hsds_logger as log
diff --git a/hsds/servicenode.py b/hsds/servicenode.py
index 8a5ddaee..7ec4f5a3 100755
--- a/hsds/servicenode.py
+++ b/hsds/servicenode.py
@@ -14,9 +14,10 @@
 #
 
 import asyncio
-import time
 from aiohttp.web import run_app
 import aiohttp_cors
+from h5json.time_util import getNow
+
 from .util.lruCache import LruCache
 from .util.httpUtil import isUnixDomainUrl, bindToSocket, getPortFromUrl
 from .util.httpUtil import release_http_client, jsonResponse
@@ -217,10 +218,10 @@ async def preStop(request):
     log.request(request)
     app = request.app
 
-    shutdown_start = time.time()
+    shutdown_start = getNow()
     log.warn(f"preStop request calling on_shutdown at {shutdown_start:.2f}")
     await on_shutdown(app)
-    shutdown_elapse_time = time.time() - shutdown_start
+    shutdown_elapse_time = getNow() - shutdown_start
     msg = f"shutdown took: {shutdown_elapse_time:.2f} seconds"
     if shutdown_elapse_time > 2.0:
         # 2.0 is the default grace period for kubernetes
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index b9a179c7..48a5538b 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -15,8 +15,6 @@
 
 import asyncio
 import json
-import math
-import time
 import numpy as np
 
 from aiohttp.web_exceptions import HTTPBadRequest, HTTPForbidden, HTTPGone, HTTPConflict
@@ -31,9 +29,9 @@
 from h5json.objid import isSchema2Id, getS3Key, isValidUuid
 from h5json.hdf5dtype import getBaseTypeJson, validateTypeItem, createDataType, getItemSize
 from h5json.shape_util import getShapeDims, getShapeClass
-from h5json.filters import getFiltersJson
-from h5json.dset_util import guessChunk, getChunkSize, validateDatasetCreationProps
-from h5json.dset_util import getDataSize, isExtensible
+from h5json.dset_util import getChunkSize, generateLayout
+from h5json.dset_util import getDataSize, validateDatasetCreationProps
+from h5json.time_util import getNow
 
 from .util.nodeUtil import getDataNodeUrl
 from .util.authUtil import getAclKeys
@@ -1022,7 +1020,7 @@ async def getAttributeFromRequest(app, req_json, obj_id=None, bucket=None):
     else:
         attr_item["value"] = None
 
-    now = time.time()
+    now = getNow()
     if "created" in req_json:
         created = req_json["created"]
         # allow "pre-dated" attributes if the timestamp is within the last 10 seconds
@@ -1453,12 +1451,28 @@ def getCreateArgs(body,
     return kwargs
 
 
+def genLayout(shape_json, item_size, has_filters=False):
+    """ create a chunked or contiguous layout based on shape and itemsize """
+
+    min_chunk_size = int(config.get("min_chunk_size"))
+    max_chunk_size = int(config.get("max_chunk_size"))
+    max_chunks_per_folder = int(config.get("max_chunks_per_folder", default=0))
+    kwargs = {"chunk_min": min_chunk_size, "chunk_max": max_chunk_size}
+    max_chunks_per_folder = int(config.get("max_chunks_per_folder"))
+    if max_chunks_per_folder > 0:
+        kwargs["max_chunks_per_folder"] = max_chunks_per_folder
+    if has_filters:
+        kwargs["chunks"] = True  # force a chunked layout to support compression
+
+    layout_json = generateLayout(shape_json, item_size, **kwargs)
+    return layout_json
+
+
 def getDatasetCreateArgs(body,
                          root_id=None,
                          bucket=None,
                          type=None,
                          implicit=False,
-                         chunk_table=None,
                          ignore_link=False):
 
     """ get args for createDataset from request body """
@@ -1486,17 +1500,11 @@ def getDatasetCreateArgs(body,
     try:
         shape_class = getShapeClass(shape_json)
         shape_dims = getShapeDims(shape_json)
-        if "maxdims" in shape_json:
-            max_dims = shape_json["maxdims"]
-            is_extensible = isExtensible(shape_dims, max_dims)
-        else:
-            max_dims = None
-            is_extensible = False
     except (KeyError, TypeError, ValueError):
         msg = f"Invalid shape: {shape_json}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    
+
     log.debug(f"shape_class: {shape_class}, shape_dims: {shape_dims}")
 
     log.debug(f"got createArgs: {list(kwargs.keys())}")
@@ -1517,27 +1525,27 @@ def getDatasetCreateArgs(body,
     else:
         dset_size = getDataSize(shape_dims, item_size)
 
-    creation_props = kwargs["creation_props"]
-    layout_class = None
-    layout_json = {}
+    if "creationProperties" in body:
+        creation_props = body["creationProperties"]
+    else:
+        creation_props = None
+    layout_json = None
     chunk_dims = None
-    partition_count = None
+    has_filters = False
 
     if creation_props:
         log.debug(f"POST_Dataset creation props: {creation_props}")
         try:
             validateDatasetCreationProps(creation_props, type_json=type_json, shape=shape_json)
-        except ValueError as ve:
-            msg = f"Provided creation properties are invalid: {ve}"
+        except (KeyError, TypeError, ValueError) as e:
+            msg = f"Provided creation properties are invalid: {e}"
             log.warn(msg)
             raise HTTPBadRequest(reason=msg)
         log.debug(f"create_props after validation: {creation_props}")
-        if creation_props.get("layout"):
-            layout_json = creation_props["layout"] 
-            layout_class = layout_json.get("class")
+
         if "filters" in creation_props:
-            # normalize filter format
-            filters = getFiltersJson(creation_props)
+            # check that the given filters are supported by HSDS
+            filters = creation_props["filters"]
             supported_filters = getSupportedFilters()
             log.debug(f"supported filters: {supported_filters}")
             for filter_item in filters:
@@ -1545,19 +1553,21 @@ def getDatasetCreateArgs(body,
                     msg = f"Unsupported filter id: {filter_item['id']}"
                     log.warn(msg)
                     raise HTTPBadRequest(reason=msg)
+            if filters:
+                has_filters = True
             creation_props["filters"] = filters
         log.debug(f"post validate creation properties: {creation_props}")
+        if "layout" in creation_props:
+            layout_json = creation_props["layout"]
+    else:
+        creation_props = {}
 
-    if layout_class:   
-        if layout_class == "H5D_CONTIGUOUS_REF" and getItemSize(type_json) == "H5T_VARIABLE":
-            # ref dataset does not work with vlen type
-            msg = "H5D_CONTIGUOUS_REF datasets cannot be used with variable length types"
-            log.warn(msg)
-            raise HTTPBadRequest(reason=msg)
-        
+    if "layout" in creation_props:
+        layout_json = creation_props["layout"]
+        # layout_json was validated in validateDatasetCreationProps, but issue
+        # a warning if the chunk size is outside suggested range
         if "dims" in layout_json:
             chunk_dims = layout_json["dims"]
-        if chunk_dims:
             # log warning if the chunk shape if chunk size is too small or too big
             chunk_size = getChunkSize(chunk_dims, item_size)
             if chunk_size < min_chunk_size:
@@ -1572,87 +1582,12 @@ def getDatasetCreateArgs(body,
             if dset_size > max_chunk_size:
                 msg = f"dataset larger than recommended {max_chunk_size} for CONTIGUOUS storage"
                 log.warn(msg)
-    elif shape_class == "H5S_NULL":
-        layout_class = None
-        log.debug("using None layout for H5S_NULL dataset")
-    elif shape_class == "H5S_SCALAR":
-        layout_class = "H5D_CONTIGUOUS"
-        log.debug("Using H5D_CONTIGUOUS for H5S_SCALAR dataset")
-    elif shape_class == "H5S_SIMPLE":
-        if dset_size <= min_chunk_size and not is_extensible:
-            # default to contiguous
-            layout_class = "H5D_CONTIGUOUS"
-            log.debug(f"Using H5D_CONTIGUOUS for small (<{min_chunk_size}) dataset")
-        else:
-            layout_class = "H5D_CHUNKED"
-            log.debug(f"shape_json: {shape_json}")
-            log.debug(f"item_size: {item_size}")
-            log.debug(f"chunk_min: {min_chunk_size}")
-            log.debug(f"chunk_max: {max_chunk_size}")
-            args = {"chunk_min": min_chunk_size, "chunk_max": max_chunk_size}
-            chunk_dims = guessChunk(shape_json, item_size, **args)
-            log.debug(f"initial autochunk layout: {chunk_dims}")
-            chunk_size = getChunkSize(chunk_dims, item_size)
-            log.debug(f"chunk_size: {chunk_size}")
     else:
-        msg = f"unexpected shape_class: {shape_class}"
-        log.warn(msg)
-        raise HTTPBadRequest(reason=msg)
+        # no layout, create one based on shape and itemsize
+        layout_json = genLayout(shape_json, item_size, has_filters=has_filters)
+        log.info(f"created chunk layout for new dset: {layout_json}")
+        creation_props["layout"] = layout_json
 
-    # set partition_count if needed:
-    max_chunks_per_folder = int(config.get("max_chunks_per_folder"))
-    set_partition = False
-    if max_chunks_per_folder > 0:
-        if "dims" in layout_json:
-            set_partition = True
-
-    if set_partition and dset_size > max_chunk_size:
-        log.debug(f"updating layout for partition constraint: {max_chunks_per_folder}")
-
-        num_chunks = 1
-        rank = len(shape_dims)
-        unlimited_count = 0
-        if max_dims:
-            for i in range(rank):
-                if max_dims[i] == 0:
-                    unlimited_count += 1
-            msg = f"number of unlimited dimensions: {unlimited_count}"
-            log.debug(msg)
-
-        for i in range(rank):
-            max_dim = 1
-            if max_dims:
-                max_dim = max_dims[i]
-                if max_dim == 0:
-                    # don't really know what the ultimate extent
-                    # could be, but assume 10^6 for total number of
-                    # elements and square-shaped array...
-                    MAX_ELEMENT_GUESS = 10.0 ** 6
-                    exp = 1 / unlimited_count
-                    max_dim = int(math.pow(MAX_ELEMENT_GUESS, exp))
-            else:
-                max_dim = shape_dims[i]
-            num_chunks *= math.ceil(max_dim / chunk_dims[i])
-
-        if num_chunks > max_chunks_per_folder:
-            partition_count = math.ceil(num_chunks / max_chunks_per_folder)
-            msg = f"set partition count to: {partition_count}, "
-            msg += f"num_chunks: {num_chunks}"
-            log.info(msg)
-        else:
-            msg = "do not need chunk partitions, num_chunks: "
-            msg += f"{num_chunks} max_chunks_per_folder: "
-            msg += f"{max_chunks_per_folder}"
-            log.info(msg)
-
-    if layout_class:
-        # should be set if shape is not H5S_NULL
-        if "class" not in layout_json:
-            layout_json["class"] = layout_class
-    if chunk_dims:
-        layout_json["dims"] = chunk_dims
-    log.debug(f"using dataset layout: {layout_json}")
-    creation_props["layout"] = layout_json
     kwargs["creation_props"] = creation_props
     log.debug(f"updated creation props: {creation_props}")
 
diff --git a/hsds/util/chunkUtil.py b/hsds/util/chunkUtil.py
index 7d299d9a..8715ac3d 100644
--- a/hsds/util/chunkUtil.py
+++ b/hsds/util/chunkUtil.py
@@ -38,15 +38,14 @@ def getNumChunks(selection, layout):
     If selection is provided (a list of slices), return the number
     of chunks that intersect with the selection.
     """
-    print(f"getNumChunks: {selection}, layout: {layout}")
+
     if len(selection) == 0:
-        print("zero length selection")
+        # zero length selection
         return 0
 
     rank = len(layout)
     if rank == 1 and layout[0] == 1:
         # scalar dataset
-        print("scalar dset")
         return 1
     if len(selection) != rank:
         msg = f"selection list has {len(selection)} items, but rank is {rank}"
@@ -57,12 +56,10 @@ def getNumChunks(selection, layout):
         if isinstance(s, slice):
             if s.stop <= s.start:
                 log.debug("null selection")
-                print("null selection")
                 return 0
         else:
             # coordinate list
             if len(s) == 0:
-                print("null coordinate list")
                 return 0
     # first, get the number of chunks needed for any coordinate selection
     chunk_indices = []
@@ -92,8 +89,6 @@ def getNumChunks(selection, layout):
     else:
         num_chunks = 1
 
-    print("num_chunks:", num_chunks)
-
     # now deal with any slices in the selection
     for i in range(len(selection)):
         s = selection[i]
diff --git a/hsds/util/timeUtil.py b/hsds/util/timeUtil.py
deleted file mode 100755
index e4ae9d3f..00000000
--- a/hsds/util/timeUtil.py
+++ /dev/null
@@ -1,83 +0,0 @@
-##############################################################################
-# Copyright by The HDF Group.                                                #
-# All rights reserved.                                                       #
-#                                                                            #
-# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and      #
-# Utilities.  The full HSDS copyright notice, including                      #
-# terms governing use, modification, and redistribution, is contained in     #
-# the file COPYING, which can be found at the root of the source code        #
-# distribution tree.  If you do not have access to this file, you may        #
-# request a copy from help@hdfgroup.org.                                     #
-##############################################################################
-from datetime import datetime
-import time
-import os
-import pytz
-
-
-def unixTimeToUTC(timestamp):
-    """Convert unix timestamp (seconds since Jan 1, 1970, to ISO-8601
-    compatible UTC time string.
-
-    """
-    utc = pytz.utc
-    dtTime = datetime.fromtimestamp(timestamp, utc)
-    iso_str = dtTime.isoformat()
-    # isoformat returns a string like this:
-    # '2014-10-30T04:25:21+00:00'
-    # strip off the '+00:00' and replace
-    # with 'Z' (both are ISO-8601 compatible)
-    npos = iso_str.rfind("+")
-    iso_z = iso_str[:npos] + "Z"
-    return iso_z
-
-
-def elapsedTime(timestamp):
-    """Get Elapsed time from given timestamp"""
-    delta = int(time.time()) - timestamp
-    if delta < 0:
-        return "Invalid timestamp!"
-    day_length = 24 * 60 * 60
-    days = 0
-    hour_length = 60 * 60
-    hours = 0
-    minute_length = 60
-    minutes = 0
-    ret_str = ""
-
-    if delta > day_length:
-        days = delta // day_length
-        delta = delta % day_length
-        ret_str += "{} days ".format(days)
-    if delta > hour_length or days > 0:
-        hours = delta // hour_length
-        delta = delta % hour_length
-        ret_str += "{} hours ".format(hours)
-    if delta > minute_length or days > 0 or hours > 0:
-        minutes = delta // minute_length
-        delta = delta % minute_length
-        ret_str += "{} minutes ".format(minutes)
-    ret_str += "{} seconds".format(delta)
-    return ret_str
-
-
-def getNow(app):
-    """
-    Get current time in unix timestamp
-
-    Returns a precise timestamp even on platforms where
-    time.time() has low resolution (e.g. Windows)
-    """
-    system = os.name
-    current_time = 0
-
-    if system == "nt":
-        # Windows
-        current_time = (time.perf_counter() - app["start_time_relative"]) + app["start_time"]
-    elif system == "posix":
-        # Unix
-        current_time = time.time()
-    else:
-        raise ValueError(f"Unsupported OS: {system}")
-
-    return current_time
diff --git a/pyproject.toml b/pyproject.toml
index 8e260cda..422750fd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,7 +42,7 @@ dependencies = [
     "bitshuffle >=0.5.2",
     "cryptography",
     "h5py >= 3.6.0",
-    "h5json@git+https://github.com/HDFGroup/hdf5-json@abstract",
+    "h5json >= 1.0.0",
     "importlib_resources",
     "numcodecs <= 0.15.1",
     "numpy >=2.0.0",
diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py
index 5979e042..697258da 100755
--- a/tests/integ/dataset_test.py
+++ b/tests/integ/dataset_test.py
@@ -15,6 +15,7 @@
 import numpy as np
 
 from h5json.objid import createObjId
+from h5json.filters import getFilterItem
 
 import helper
 import config
@@ -1401,25 +1402,21 @@ def testCreationPropertiesLayoutDataset(self):
             "id": 3,
             "name": "fletcher32"
         }
-        payload["creationProperties"] = {
-            "layout": {"class": "H5D_CHUNKED", "dims": [1, 390, 512]},
+        contiguous_layout = {"class": "H5D_CONTIGUOUS"}
+        chunked_layout = {"class": "H5D_CHUNKED", "dims": [1, 390, 512]}
+        creationProps = {
             "filters": [
                 gzip_filter,
                 fletcher32_filter,
             ],
         }
+        payload["creationProperties"] = creationProps
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)  # create dataset
         rspJson = json.loads(rsp.text)
         dset_uuid = rspJson["id"]
         self.assertTrue(helper.validateId(dset_uuid))
 
-        # link new dataset as 'chunktest'
-        name = "chunktest"
-        req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
-        payload = {"id": dset_uuid}
-        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
-        self.assertEqual(rsp.status_code, 201)
         # verify layout
         req = helper.getEndpoint() + "/datasets/" + dset_uuid
         rsp = self.session.get(req, headers=headers)
@@ -1431,12 +1428,41 @@ def testCreationPropertiesLayoutDataset(self):
         layout_json = cpl["layout"]
         self.assertTrue("class" in layout_json)
         self.assertEqual(layout_json["class"], "H5D_CHUNKED")
+        self.assertTrue("dims" in layout_json)  # layout created automatically
+
+        # add an explicit layout to creation props and verify contiguous
+        creationProps["layout"] = contiguous_layout
+        payload["creationProperties"] = creationProps
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 400)
+
+        # use a chunk layout to creation props and verify success
+        creationProps["layout"] = chunked_layout
+        payload["creationProperties"] = creationProps
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+        rspJson = json.loads(rsp.text)
+        self.assertTrue("creationProperties" in rspJson)
+        cpl = rspJson["creationProperties"]
+        self.assertTrue("layout" in cpl)
+        layout_json = cpl["layout"]
+        self.assertTrue("class" in layout_json)
+        self.assertEqual(layout_json["class"], "H5D_CHUNKED")
         self.assertTrue("dims" in layout_json)
+
         self.assertEqual(layout_json["dims"], [1, 390, 512])
         if config.get("max_chunks_per_folder") > 0:
             self.assertTrue("partition_count" in layout_json)
             self.assertEqual(layout_json["partition_count"], 10)
 
+        # link new dataset as 'chunktest'
+        name = "chunktest"
+        req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
+        payload = {"id": dset_uuid}
+        rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 201)
+
         # verify compression
         self.assertTrue("creationProperties" in rspJson)
         cpl = rspJson["creationProperties"]
@@ -1540,7 +1566,8 @@ def testCompressionFiltersDataset(self):
             req = self.endpoint + "/datasets"
 
             payload = {"type": "H5T_IEEE_F32LE", "shape": [40, 80]}
-            filters = [compressor, ]
+            filter_item = getFilterItem(compressor)
+            filters = [filter_item, ]
             layout = {"class": "H5D_CHUNKED", "dims": [10, 20]}
             cpl = {"filters": filters, "layout": layout}
             payload["creationProperties"] = cpl
@@ -1600,8 +1627,9 @@ def testCompressionFilterOptionDataset(self):
 
         # create the dataset
         req = self.endpoint + "/datasets"
-        compressor = {"class": "H5Z_FILTER_LZ4", "name": "lz4", "level": 5}
-        filters = [compressor, ]
+        filter_item = getFilterItem("lz4", options={"level": 4})
+        print("filter_item:", filter_item)
+        filters = [filter_item, ]
 
         payload = {"type": "H5T_IEEE_F32LE", "shape": [40, 80]}
         layout = {"class": "H5D_CHUNKED", "dims": [10, 20]}
@@ -1661,25 +1689,15 @@ def testInvalidCompressionFilter(self):
         rspJson = json.loads(rsp.text)
         self.assertTrue("root" in rspJson)
 
-        bad_compressors = ("shrink-o-rama")
-        for compressor_name in bad_compressors:
-            # create the dataset
-            req = self.endpoint + "/datasets"
-            compressor = {
-                "class": "H5Z_FILTER_USER",
-                "name": compressor_name,
-                "level": 5,
-            }
+        filter_item = {'class': 'H5Z_FILTER_FOOBAR', 'id': 123, 'name': 'foobar'}
+        # create the dataset
+        req = self.endpoint + "/datasets"
 
-            payload = {"type": "H5T_IEEE_F32LE", "shape": [40, 80]}
-            payload["creationProperties"] = {
-                "filters": [
-                    compressor,
-                ]
-            }
-            req = self.endpoint + "/datasets"
-            rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
-            self.assertEqual(rsp.status_code, 400)  # create dataset
+        payload = {"type": "H5T_IEEE_F32LE", "shape": [40, 80]}
+        payload["creationProperties"] = {"filters": [filter_item, ]}
+        req = self.endpoint + "/datasets"
+        rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
+        self.assertEqual(rsp.status_code, 400)  # create dataset
 
     def testInvalidFillValue(self):
         # test Dataset with simple type and fill value that is incompatible with the type

From 55c85981796d7c42d45799aa2ac3f489057dffef Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Sun, 4 Jan 2026 17:09:01 +0800
Subject: [PATCH 45/49] update for h5json changes

---
 hsds/servicenode_lib.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 48a5538b..717912cc 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -1451,7 +1451,7 @@ def getCreateArgs(body,
     return kwargs
 
 
-def genLayout(shape_json, item_size, has_filters=False):
+def genLayout(shape_json, type_json, has_filters=False):
     """ create a chunked or contiguous layout based on shape and itemsize """
 
     min_chunk_size = int(config.get("min_chunk_size"))
@@ -1464,7 +1464,7 @@ def genLayout(shape_json, item_size, has_filters=False):
     if has_filters:
         kwargs["chunks"] = True  # force a chunked layout to support compression
 
-    layout_json = generateLayout(shape_json, item_size, **kwargs)
+    layout_json = generateLayout(shape_json, type_json, **kwargs)
     return layout_json
 
 
@@ -1584,7 +1584,7 @@ def getDatasetCreateArgs(body,
                 log.warn(msg)
     else:
         # no layout, create one based on shape and itemsize
-        layout_json = genLayout(shape_json, item_size, has_filters=has_filters)
+        layout_json = genLayout(shape_json, type_json, has_filters=has_filters)
         log.info(f"created chunk layout for new dset: {layout_json}")
         creation_props["layout"] = layout_json
 

From 77042d85a70eb54b1aca2c30e6768569c70016c5 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 6 Jan 2026 09:39:00 +0800
Subject: [PATCH 46/49] added consolidated metadata support

---
 hsds/async_lib.py           | 188 ++++++++++++++++++++++++++++++++++--
 hsds/datanode_lib.py        |   2 +-
 hsds/domain_sn.py           |  20 ++--
 hsds/servicenode_lib.py     |  35 ++++++-
 hsds/util/storUtil.py       |   2 +-
 tests/integ/dataset_test.py |  24 +++++
 tests/integ/vlen_test.py    |  10 +-
 7 files changed, 250 insertions(+), 31 deletions(-)

diff --git a/hsds/async_lib.py b/hsds/async_lib.py
index 674caa98..997432cc 100755
--- a/hsds/async_lib.py
+++ b/hsds/async_lib.py
@@ -17,15 +17,15 @@
 from aiohttp.web_exceptions import HTTPForbidden
 from h5json.hdf5dtype import getItemSize
 from h5json.hdf5dtype import createDataType
-from h5json.array_util import getNumElements, bytesToArray
+from h5json.array_util import getNumElements, bytesToArray, bytesArrayToList
 from h5json.objid import isValidUuid, isSchema2Id, getS3Key, isS3ObjKey
 from h5json.objid import getObjId, isValidChunkId, getCollectionForId
 from h5json.filters import getFilters
-from h5json.shape_util import getShapeDims
+from h5json.shape_util import getShapeDims, getDataSize
 from h5json.dset_util import getDatasetLayoutClass, getDatasetLayout, getChunkDims
 from h5json.time_util import getNow
 
-from .util.chunkUtil import getDatasetId, getNumChunks, ChunkIterator
+from .util.chunkUtil import getDatasetId, getNumChunks, ChunkIterator, getChunkIndex, getChunkIds
 from .util.dsetUtil import getHyperslabSelection
 from .util.storUtil import getStorKeys, putStorJSONObj, getStorJSONObj
 from .util.storUtil import deleteStorObj, getStorBytes, isStorObj
@@ -77,6 +77,7 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None):
         msg += f"{dset_id}"
         log.warn(msg)
         return
+
     type_json = dset_json["type"]
     item_size = getItemSize(type_json)
     if not getDatasetLayout(dset_json):
@@ -112,7 +113,7 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None):
     if layout_class == "H5D_CONTIGUOUS_REF":
         # In H5D_CONTIGUOUS_REF a non-compressed part of the HDF5 is divided
         # into equal size chunks, so we can just compute link bytes and num
-        # chunks based on the size of the coniguous dataset
+        # chunks based on the size of the contiguous dataset
         layout_dims = getChunkDims(dset_json)
         num_chunks = getNumChunks(selection, layout_dims)
         chunk_size = item_size
@@ -268,20 +269,26 @@ def scanRootCallback(app, s3keys):
     results = app["scanRoot_results"]
     scanRoot_keyset = app["scanRoot_keyset"]
     checksums = results["checksums"]
+
     for s3key in s3keys.keys():
 
         if not isS3ObjKey(s3key):
-            log.info(f"not s3obj key, ignoring: {s3key}")
+            log.info(f"scanRoot -not s3obj key, ignoring: {s3key}")
             continue
         if s3key in scanRoot_keyset:
-            log.warn(f"scanRoot - dejavu for key: {s3key}")
+            log.warn(f"scanRoot -scanRoot - dejavu for key: {s3key}")
             continue
         scanRoot_keyset.add(s3key)
-        msg = f"scanRoot adding key: {s3key} to keyset, "
+        msg = f"scanRoot - adding key: {s3key} to keyset, "
         msg += f"{len(scanRoot_keyset)} keys"
         log.debug(msg)
 
         objid = getObjId(s3key)
+
+        if objid in app["deleted_ids"]:
+            log.debug(f"scanRoot - skipping deleted id: {objid}")
+            continue
+
         etag = None
         obj_size = None
         lastModified = None
@@ -306,8 +313,15 @@ def scanRootCallback(app, s3keys):
             is_chunk = True
             results["num_chunks"] += 1
             results["allocated_bytes"] += obj_size
+            chunk_index = getChunkIndex(objid)
+            if max(chunk_index) == 0:
+                # save the first chunk if present
+                # this will be used to save dataset values to
+                # the the obj_ids set for small datasets
+                results["obj_ids"].add(objid)
         else:
             results["metadata_bytes"] += obj_size
+            results["obj_ids"].add(objid)
 
         if is_chunk or getCollectionForId(objid) == "datasets":
             if is_chunk:
@@ -345,6 +359,144 @@ def scanRootCallback(app, s3keys):
             log.error(msg)
 
 
+async def _getDatsetValueJson(app, dset_id, dset_json, obj_ids, size_limit=None, bucket=None):
+    """ If the dataset size is less than size_limit, and the chunk_ids for the dataset are
+        available, return a JSON representation of the dataset values. Othewise, return None """
+
+    dims = getShapeDims(dset_json)
+    if dims is None:
+        return None  # null dataspace
+    if "type" not in dset_json:
+        msg = f"_getDatsetValueJson - expected to find type in dataset_json for {dset_id}"
+        log.warn(msg)
+        return None
+    type_json = dset_json["type"]
+    item_size = getItemSize(type_json)
+    if item_size == "H5T_VARIABLE":
+        item_size = 1024  # make a guess for variable length types
+    dataset_size = getDataSize(dims, item_size)
+    if dataset_size > size_limit:
+        log.debug(f"_getDatasetValueJson - dataset size {dataset_size} exceeds limit {size_limit}")
+        return None
+
+    chunk_dims = getChunkDims(dset_json)
+    if not chunk_dims:
+        log.warning(f"_getDatasetValueJson - no layout found for dataset: {dset_id}")
+        return None
+    if chunk_dims != dims:
+        msg = f"_getDatasetValueJson - dataset layout {chunk_dims} does not match dims {dims} "
+        msg += f"for dataset: {dset_id}, ignoring"
+        log.warning(msg)
+        return None
+    select_all = getHyperslabSelection(dims)  # select entire datashape
+    chunk_ids = getChunkIds(dset_id, select_all, dims)
+    if len(chunk_ids) == 0:
+        log.debug(f"_getDatasetValueJson - no chunk ids found for dataset: {dset_id}")
+        return None
+    if len(chunk_ids) > 1:
+        log.debug(f"_getDatasetValueJson - more than one chunk id found for dataset: {dset_id}")
+        return None
+    chunk_id = chunk_ids[0]
+    if chunk_id not in obj_ids:
+        log.debug(f"_getDatasetValueJson - chunk id {chunk_id} not in scanned obj_ids")
+        return None
+    log.debug(f"using chunk: {chunk_id} to get dataset value for {dset_id}")
+
+    # fetch the chunk - using getStoreBytes since this will not be used with
+    # chunk cache or chunk crawlers
+    # TBD: need parameters for s3path, s3offset, s3size for ref layouts
+    # regular store read
+
+    filters = getFilters(dset_json)
+    dt = createDataType(type_json)
+    filter_ops = getFilterOps(app, dset_id, filters, dtype=dt, chunk_shape=chunk_dims)
+
+    kwargs = {
+        "filter_ops": filter_ops,
+        "offset": None,
+        "length": None,
+        "bucket": bucket
+    }
+    s3key = getS3Key(chunk_id)
+
+    try:
+        chunk_bytes = await getStorBytes(app, s3key, **kwargs)
+    except HTTPNotFound:
+        log.warning(f"_getDatasetValueJson - HTTPNotFound for chunk {chunk_id} bucket:{bucket}")
+        return None
+    except HTTPForbidden:
+        log.warning(f"_getDatasetValueJson - HTTPForbidden for chunk {chunk_id} bucket:{bucket}")
+        return None
+    except HTTPInternalServerError:
+        msg = "_getDatasetValueJson - "
+        msg += f"HTTPInternalServerError for chunk {chunk_id} bucket:{bucket}"
+        log.warning(msg)
+        return None
+
+    if chunk_bytes is None:
+        msg = f"_getDatasetValueJson -read {chunk_id} bucket: {bucket} returned None"
+        log.warning(msg)
+        return None
+
+    arr = bytesToArray(chunk_bytes, dt, chunk_dims)
+
+    json_value = bytesArrayToList(arr)
+    log.debug(f"_getDatsetValueJson - returning {json_value}")
+
+    return json_value
+
+
+async def getConsolidatedMetaData(app, obj_ids, bucket=None):
+    # create a consolidated metadata summary for all objects in the domain
+    # return a dict of obj_ids to their metadata summaries
+    log.info("getConsolidatedMetaData - creating consolidated metadata summary")
+    consolidated_metadata = {}
+    for obj_id in obj_ids:
+        if isValidChunkId(obj_id):
+            # skip chunks - we may use the chunk later when processing it's dataset object
+            continue
+        s3_key = getS3Key(obj_id)
+        try:
+            obj_json = await getStorJSONObj(app, s3_key, bucket=bucket)
+        except HTTPNotFound:
+            log.warn(f"HTTPNotFound for {s3_key} bucket:{bucket}")
+            continue
+        except HTTPForbidden:
+            log.warn(f"HTTPForbidden error for {s3_key} bucket:{bucket}")
+            continue
+        except HTTPInternalServerError:
+            msg = f"HTTPInternalServerError error for {s3_key} bucket:{bucket}"
+            log.warn(msg)
+            continue
+        log.debug(f"getConsolidatedMetaData - got json for obj_id: {obj_id}: {obj_json}")
+        # extract relevant metadata
+        metadata_summary = {}
+        if "type" in obj_json:
+            metadata_summary["type"] = obj_json["type"]
+        if "shape" in obj_json:
+            metadata_summary["shape"] = obj_json["shape"]
+        if "attributes" in obj_json:
+            metadata_summary["attributes"] = obj_json["attributes"]
+        if "links" in obj_json:
+            metadata_summary["links"] = obj_json["links"]
+        if "creationProperties" in obj_json:
+            metadata_summary["creationProperties"] = obj_json["creationProperties"]
+        if getCollectionForId(obj_id) == "datasets":
+            log.debug("getConsolidatedMetaData - got dataset")
+            size_limit = 4096  # TBD - make this a config
+            kwargs = {"size_limit": size_limit, "bucket": bucket}
+            json_value = await _getDatsetValueJson(app, obj_id, obj_json, obj_ids, **kwargs)
+            if json_value is not None:
+                log.debug(f"adding dataset value to metadata summary for dataset: {obj_id}")
+                metadata_summary["value"] = json_value
+        else:
+            log.debug("getConsolidatedMetaData - not a dataset")
+
+        consolidated_metadata[obj_id] = metadata_summary
+    log.info("getConsolidatedMetaData - done creating consolidated metadata summary")
+    return consolidated_metadata
+
+
 async def scanRoot(app, rootid, update=False, bucket=None):
 
     # iterate through all s3 keys under the given root.
@@ -386,7 +538,8 @@ async def scanRoot(app, rootid, update=False, bucket=None):
     results["num_linked_chunks"] = 0
     results["linked_bytes"] = 0
     results["logical_bytes"] = 0
-    results["checksums"] = {}  # map of objid to checksums
+    results["obj_ids"] = set()  # map of object ids scanned (and first chunk id for datasets)
+    results["checksums"] = {}   # map of objid to checksums
     results["bucket"] = bucket
     results["scan_start"] = getNow(app=app)
 
@@ -405,6 +558,9 @@ async def scanRoot(app, rootid, update=False, bucket=None):
     num_objects += len(results["datasets"])
     num_objects += results["num_chunks"]
     log.info(f"scanRoot - got {num_objects} keys for rootid: {rootid}")
+    obj_ids = results["obj_ids"]
+    log.info(f"scanRoot - got {len(obj_ids)} unique object ids")
+    log.debug(f"scanRoot - obj_ids: {obj_ids}")
 
     dataset_results = results["datasets"]
     for dsetid in dataset_results:
@@ -445,6 +601,11 @@ async def scanRoot(app, rootid, update=False, bucket=None):
 
     results["scan_complete"] = getNow(app=app)
 
+    # extract the obj_ids set, that won't go into .info.json
+    obj_ids = results["obj_ids"]
+    del results["obj_ids"]
+    log.debug(f"obj_ids set: {obj_ids}")
+
     if update:
         # write .info object back to S3
         info_key = root_prefix + ".info.json"
@@ -452,6 +613,17 @@ async def scanRoot(app, rootid, update=False, bucket=None):
         msg += f"{results}"
         log.info(msg)
         await putStorJSONObj(app, info_key, results, bucket=bucket)
+
+        # create a json summary of objects in ths domain
+        log.debug(f"Creating consolidated metadata summary for root {rootid}")
+        summary_key = root_prefix + ".summary.json"
+        summary_data = await getConsolidatedMetaData(app, obj_ids, bucket=bucket)
+        if summary_data:
+            log.info(f"Got consolidated metadata summary for root {rootid}")
+            log.debug(f"Summary data: {summary_data}")
+            await putStorJSONObj(app, summary_key, summary_data, bucket=bucket)
+        else:
+            log.info(f"No consolidated metadata summary for root {rootid}")
     return results
 
 
diff --git a/hsds/datanode_lib.py b/hsds/datanode_lib.py
index 6b76e37a..d2b1840c 100644
--- a/hsds/datanode_lib.py
+++ b/hsds/datanode_lib.py
@@ -1094,7 +1094,7 @@ async def get_chunk(
         log.debug(msg)
     else:
         s3key = getS3Key(chunk_id)
-        log.debug(f"getChunk chunkid: {chunk_id} bucket: {bucket}")
+        log.debug(f"getChunk chunkid: {chunk_id} bucket: {bucket} using key: {s3key}")
     if chunk_id in chunk_cache:
         log.debug(f"getChunk chunkid: {chunk_id} found in cache")
         chunk_arr = chunk_cache[chunk_id]
diff --git a/hsds/domain_sn.py b/hsds/domain_sn.py
index cc634526..46f2f890 100755
--- a/hsds/domain_sn.py
+++ b/hsds/domain_sn.py
@@ -462,6 +462,11 @@ async def GET_Domain(request):
     if "verbose" in params and params["verbose"]:
         verbose = True
 
+    getobjs = False
+    # include domain objects if requested
+    if params.get("getobjs"):
+        getobjs = True
+
     if not domain:
         log.info("no domain passed in, returning all top-level domains")
         # no domain passed in, return top-level domains for this request
@@ -543,22 +548,9 @@ async def GET_Domain(request):
         return resp
 
     # return just the keys as per the REST API
-    kwargs = {"verbose": verbose, "bucket": bucket}
+    kwargs = {"verbose": verbose, "getobjs": getobjs, "bucket": bucket}
     rsp_json = await getDomainResponse(app, domain_json, **kwargs)
 
-    # include domain objects if requested
-    if params.get("getobjs") and "root" in domain_json:
-
-        log.debug("getting all domain objects")
-        root_id = domain_json["root"]
-        kwargs = {"include_attrs": include_attrs, "bucket": bucket}
-        domain_objs = await getDomainObjects(app, root_id, **kwargs)
-        if domain_objs:
-            rsp_json["domain_objs"] = domain_objs
-
-    # include domain class if present
-    # if "class" in domain_json:
-    #    rsp_json["class"] = domain_json["class"]
 
     # include dn_ids if requested
     if "getdnids" in params and params["getdnids"]:
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 717912cc..921e2f2d 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -116,7 +116,7 @@ async def getDomainJson(app, domain, reload=False):
     return domain_json
 
 
-async def getDomainResponse(app, domain_json, bucket=None, verbose=False):
+async def getDomainResponse(app, domain_json, bucket=None, verbose=False, getobjs=False):
     """ construct JSON response for domain request """
     rsp_json = {}
     if "root" in domain_json:
@@ -189,6 +189,13 @@ async def getDomainResponse(app, domain_json, bucket=None, verbose=False):
         rsp_json["num_linked_chunks"] = num_linked_chunks
         rsp_json["md5_sum"] = md5_sum
 
+    if getobjs and "root" in domain_json:
+        root_id = domain_json["root"]
+        domain_objs = await getDomainObjs(app, root_id, bucket=bucket)
+        if domain_objs:
+            log.debug(f"returning {len(domain_objs)} for root_id: {root_id}")
+            rsp_json["domain_objs"] = domain_objs
+
     # pass back config parameters the client may care about
 
     rsp_json["limits"] = getLimits()
@@ -849,8 +856,32 @@ async def getRootInfo(app, root_id, bucket=None):
     return info_json
 
 
+async def getDomainObjs(app, root_id, bucket=None):
+    """ Return domain objects if available for this root id """
+    log.debug(f"getDomainObjs {root_id}")
+
+    s3_key = getS3Key(root_id)
+
+    parts = s3_key.split("/")
+    # dset_key is in the format  db/<root>/d/<dset>/.dataset.json
+    # get the key for the root info object as: db/<root>/.summary.json
+    if len(parts) != 3:
+        log.error(f"Unexpected s3key format: {s3_key}")
+        return None
+
+    summary_key = f"db/{parts[1]}/.summary.json"
+
+    try:
+        summary_json = await getStorJSONObj(app, summary_key, bucket=bucket)
+    except HTTPNotFound:
+        log.warn(f".summary.json not found for key: {summary_key}")
+        return None
+
+    return summary_json
+
+
 async def doFlush(app, root_id, bucket=None):
-    """return wnen all DN nodes have wrote any pending changes to S3"""
+    """return wnen all DN nodes have wrote any pending changes to S3 """
     log.info(f"doFlush {root_id}")
     params = {"flush": 1}
     if bucket:
diff --git a/hsds/util/storUtil.py b/hsds/util/storUtil.py
index b37e25bc..7b3b8a4e 100644
--- a/hsds/util/storUtil.py
+++ b/hsds/util/storUtil.py
@@ -493,7 +493,7 @@ async def getStorBytes(app,
         chunk_bytes = []
 
         for chunk_location in chunk_locations:
-            log.debug(f"getStoreBytes - processing chunk_location: {chunk_location}")
+            log.debug(f"getStorBytes - processing chunk_location: {chunk_location}")
             n = chunk_location.offset - offset
             if n < 0:
                 log.warn(f"getStorBytes - unexpected offset for chunk_location: {chunk_location}")
diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py
index 697258da..04d7f20b 100755
--- a/tests/integ/dataset_test.py
+++ b/tests/integ/dataset_test.py
@@ -342,6 +342,30 @@ def testPostDatasetWithAttributes(self):
         self.assertTrue("attributes") in rspJson
         self.assertEqual(len(rspJson["attributes"]), attr_count)
 
+        # try fetching the objson in domain resp
+        req = helper.getEndpoint() + "/"
+        params = {"getobjs": 1}
+        for i in range(10):
+            # try a few times to allow for async update of summary info
+            time.sleep(5)
+            rsp = self.session.get(req, params=params, headers=headers)
+            self.assertEqual(rsp.status_code, 200)
+            rspJson = json.loads(rsp.text)
+            if "domain_objs" in rspJson:
+                break
+
+        self.assertTrue("domain_objs" in rspJson)
+        domain_objs = rspJson["domain_objs"]
+        self.assertTrue(root_uuid in domain_objs)
+        self.assertTrue(dset_id in domain_objs)
+        dset_json = domain_objs[dset_id]
+        self.assertTrue("attributes" in dset_json)
+        self.assertEqual(len(dset_json["attributes"]), attr_count)
+        self.assertTrue("type" in dset_json)
+        self.assertTrue("shape" in dset_json)
+        self.assertTrue("creationProperties" in dset_json)
+        self.assertFalse("value" in dset_json)  # no data written yet
+
     def testScalarEmptyDimsDataset(self):
         # Test creation/deletion of scalar dataset obj
         domain = self.base_domain + "/testScalarEmptyDimsDataset.h5"
diff --git a/tests/integ/vlen_test.py b/tests/integ/vlen_test.py
index d3d44ab5..28bb3e90 100755
--- a/tests/integ/vlen_test.py
+++ b/tests/integ/vlen_test.py
@@ -35,7 +35,7 @@ def tearDown(self):
         # main
 
     def testPutVLenInt(self):
-        # Test PUT value for 1d attribute with variable length int types
+        # Test PUT value for 1d dataset with variable length int types
         print("testPutVLenInt", self.base_domain)
 
         headers = helper.getRequestHeaders(domain=self.base_domain)
@@ -120,7 +120,7 @@ def testPutVLenInt(self):
         self.assertEqual(value[1], [1, 2, 3, 4])
 
     def testPutVLenIntBinary(self):
-        # Test PUT value for 1d attribute with variable length int types using binary transfer
+        # Test PUT value for 1d dataset with variable length int types using binary transfer
         print("testPutVLenIntBinary", self.base_domain)
 
         count = 4
@@ -217,7 +217,7 @@ def testPutVLenIntBinary(self):
         self.assertEqual(value[0], [1, 2, 3])
 
     def testPutVLen2DInt(self):
-        # Test PUT value for 1d attribute with variable length int types
+        # Test PUT value for 1d dataset with variable length int types
         print("testPutVLen2DInt", self.base_domain)
         nrow = 2
         ncol = 2
@@ -294,7 +294,7 @@ def testPutVLen2DInt(self):
         self.assertEqual(value[0][1], [1, 2])
 
     def testPutVLenString(self):
-        # Test PUT value for 1d attribute with variable length string types
+        # Test PUT value for 1d dataset with variable length string types
         print("testPutVLenString", self.base_domain)
 
         headers = helper.getRequestHeaders(domain=self.base_domain)
@@ -364,7 +364,7 @@ def testPutVLenString(self):
         self.assertEqual(value[1], data[3])
 
     def testPutVLenStringBinary(self):
-        # Test PUT value for 1d attribute with variable length string types
+        # Test PUT value for 1d dataset with variable length string types
         print("testPutVLenStringBinary", self.base_domain)
 
         headers = helper.getRequestHeaders(domain=self.base_domain)

From 23bb24bd8441428233c6ba1b513991cac181fb7e Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 6 Jan 2026 18:03:31 +0800
Subject: [PATCH 47/49] fix for use of H5S_UNLIMITED in maxdims

---
 hsds/domain_sn.py           | 1 -
 hsds/dset_dn.py             | 2 +-
 hsds/dset_lib.py            | 2 +-
 hsds/util/dsetUtil.py       | 5 -----
 tests/integ/dataset_test.py | 8 ++++----
 5 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/hsds/domain_sn.py b/hsds/domain_sn.py
index 46f2f890..5758cd0d 100755
--- a/hsds/domain_sn.py
+++ b/hsds/domain_sn.py
@@ -551,7 +551,6 @@ async def GET_Domain(request):
     kwargs = {"verbose": verbose, "getobjs": getobjs, "bucket": bucket}
     rsp_json = await getDomainResponse(app, domain_json, **kwargs)
 
-
     # include dn_ids if requested
     if "getdnids" in params and params["getdnids"]:
         rsp_json["dn_ids"] = app["dn_ids"]
diff --git a/hsds/dset_dn.py b/hsds/dset_dn.py
index 5b99711d..3d5a261e 100755
--- a/hsds/dset_dn.py
+++ b/hsds/dset_dn.py
@@ -290,7 +290,7 @@ async def PUT_DatasetShape(request):
             if i == extend_dim:
                 lb = dims[i]
                 ub = lb + extension
-                if maxdims[extend_dim] != 0 and ub > maxdims[extend_dim]:
+                if maxdims[extend_dim] not in (0, "H5S_UNLIMITED") and ub > maxdims[extend_dim]:
                     msg = "maximum extent exceeded"
                     log.warn(msg)
                     raise HTTPConflict()
diff --git a/hsds/dset_lib.py b/hsds/dset_lib.py
index 384defe7..a6c58b45 100755
--- a/hsds/dset_lib.py
+++ b/hsds/dset_lib.py
@@ -1007,7 +1007,7 @@ async def updateShape(app, dset_json, shape_update, bucket=None):
                 raise HTTPBadRequest(reason=msg)
             decreasing_dims.append(i)
         elif shape_update[i] > dims[i]:
-            if maxdims[i] != 0 and shape_update[i] > maxdims[i]:
+            if maxdims[i] not in (0, "H5S_UNLIMITED") and shape_update[i] > maxdims[i]:
                 msg = "Extension dimension can not be extended past max extent"
                 log.warn(msg)
                 raise HTTPConflict()
diff --git a/hsds/util/dsetUtil.py b/hsds/util/dsetUtil.py
index 49723750..fb7d21c5 100644
--- a/hsds/util/dsetUtil.py
+++ b/hsds/util/dsetUtil.py
@@ -50,11 +50,6 @@ def getShapeJson(body):
         log.warn(msg)
         raise ValueError(msg)
 
-    if shape_class not in ("H5S_NULL", "H5S_SCALAR", "H5S_SIMPLE"):
-        msg = f"invalid shape class: {shape_class}"
-        log.warn(msg)
-        raise ValueError(msg)
-
     if shape_class in ("H5S_NULL", "H5S_SCALAR") and dims:
         msg = f"dims not valid for shape class: {body_shape}"
         log.warn(msg)
diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py
index 04d7f20b..17357119 100755
--- a/tests/integ/dataset_test.py
+++ b/tests/integ/dataset_test.py
@@ -938,7 +938,7 @@ def testResizableUnlimitedDataset(self):
 
         # create the dataset
         req = self.endpoint + "/datasets"
-        payload = {"type": "H5T_IEEE_F32LE", "shape": [10, 20], "maxdims": [30, 0]}
+        payload = {"type": "H5T_IEEE_F32LE", "shape": [10, 20], "maxdims": [30, "H5S_UNLIMITED"]}
         payload["creationProperties"] = {"fillValue": 3.12}
         req = self.endpoint + "/datasets"
         rsp = self.session.post(req, data=json.dumps(payload), headers=headers)
@@ -970,7 +970,7 @@ def testResizableUnlimitedDataset(self):
         self.assertEqual(shape["dims"][1], 20)
         self.assertTrue("maxdims" in shape)
         self.assertEqual(shape["maxdims"][0], 30)
-        self.assertEqual(shape["maxdims"][1], 0)
+        self.assertEqual(shape["maxdims"][1], "H5S_UNLIMITED")
 
         # verify shape using the GET shape request
         req = req + "/shape"
@@ -987,7 +987,7 @@ def testResizableUnlimitedDataset(self):
         self.assertTrue("maxdims" in shape)
         self.assertEqual(len(shape["maxdims"]), 2)
         self.assertEqual(shape["maxdims"][0], 30)
-        self.assertEqual(shape["maxdims"][1], 0)
+        self.assertEqual(shape["maxdims"][1], "H5S_UNLIMITED")
 
         # resize the second dimension  to 500 elements
         payload = {"shape": [10, 500]}
@@ -1009,7 +1009,7 @@ def testResizableUnlimitedDataset(self):
         self.assertTrue("maxdims" in shape)
         self.assertEqual(len(shape["maxdims"]), 2)
         self.assertEqual(shape["maxdims"][0], 30)
-        self.assertEqual(shape["maxdims"][1], 0)
+        self.assertEqual(shape["maxdims"][1], "H5S_UNLIMITED")
 
     def testExtendDataset(self):
         # test extending dataset

From c66d632161f29e1753565d903b2041e52cb2be37 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Tue, 6 Jan 2026 21:17:22 +0800
Subject: [PATCH 48/49] fix for domain_test

---
 tests/integ/domain_test.py | 38 +++++++++++++-------------------------
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/tests/integ/domain_test.py b/tests/integ/domain_test.py
index 1c68cb5e..f01dcc93 100755
--- a/tests/integ/domain_test.py
+++ b/tests/integ/domain_test.py
@@ -14,7 +14,7 @@
 import json
 from os import path as pp
 
-from h5json.objid import createObjId
+from h5json.objid import createObjId, getCollectionForId
 
 import config
 import helper
@@ -116,33 +116,21 @@ def testGetDomain(self):
         attr_count = 0
         for objid in domain_objs:
             obj_json = domain_objs[objid]
-            self.assertTrue("id" in obj_json)
-            self.assertTrue("attributeCount" in obj_json)
-            attr_count += obj_json["attributeCount"]
-            self.assertFalse("attributes" in obj_json)
+            collection_type = getCollectionForId(objid)
+            if collection_type == "datasets":
+                self.assertTrue("attributes" in obj_json)
+                self.assertTrue("type" in obj_json)
+                self.assertTrue("shape" in obj_json)
+                self.assertTrue("creationProperties" in obj_json)
+            elif collection_type == "groups":
+                self.assertTrue("attributes" in obj_json)
+                self.assertTrue("links" in obj_json)
+            else:
+                self.assertTrue(False)  # unexpected type
+            attr_count += len(obj_json["attributes"])
 
         self.assertEqual(attr_count, 4)
 
-        # get a dict of all objects in the domain including any attributes
-        params["include_attrs"] = 1
-        rsp = self.session.get(req, headers=headers, params=params)
-        self.assertEqual(rsp.status_code, 200)
-        rspJson = json.loads(rsp.text)
-        self.assertTrue("domain_objs" in rspJson)
-        domain_objs = rspJson["domain_objs"]
-        self.assertEqual(len(domain_objs), 10)
-        attr_count = 0
-        for objid in domain_objs:
-            obj_json = domain_objs[objid]
-            self.assertTrue("attributeCount" in obj_json)
-            self.assertTrue("attributes" in obj_json)
-            attributes = obj_json["attributes"]
-            for attr_name in attributes:
-                # only the names "attr1" and "attr2" are used in this domain
-                self.assertTrue(attr_name in ("attr1", "attr2"))
-                attr_count += 1
-        self.assertEqual(attr_count, 4)
-
         # passing domain via the host header is deprecated
         # Previously his returned 200, now it is a 400
         del headers["X-Hdf-domain"]

From 6917c5d8df1dc47ce3c11caeedd77bfc5632d5d4 Mon Sep 17 00:00:00 2001
From: John Readey <jreadey@hdfgroup.org>
Date: Thu, 8 Jan 2026 13:17:36 +0800
Subject: [PATCH 49/49] refactor linkUtil with h5json

---
 hsds/link_dn.py          |  17 +++--
 hsds/link_sn.py          |  51 +++++++++-----
 hsds/servicenode_lib.py  |   7 +-
 hsds/util/linkUtil.py    | 139 ++-------------------------------------
 tests/integ/link_test.py |  67 ++++++++++---------
 5 files changed, 92 insertions(+), 189 deletions(-)

diff --git a/hsds/link_dn.py b/hsds/link_dn.py
index f602a405..ef1c0438 100755
--- a/hsds/link_dn.py
+++ b/hsds/link_dn.py
@@ -22,9 +22,9 @@
 
 from h5json.objid import isValidUuid
 from h5json.time_util import getNow
+from h5json.link_util import validateLinkName, getLinkClass, isEqualLink
 
 from .util.globparser import globmatch
-from .util.linkUtil import validateLinkName, getLinkClass, isEqualLink
 from .util.domainUtil import isValidBucketName
 from .datanode_lib import get_obj_id, get_metadata_obj, save_metadata_obj
 from . import config
@@ -156,6 +156,10 @@ async def GET_Links(request):
         link = copy(link_dict[title])
         log.debug(f"link list[{i}: {link}")
         link["title"] = title
+        if link.get("h5domain"):
+            # deprecated key, replace with file
+            link["file"] = link["h5domain"]
+            del link["h5domain"]
         link_list.append(link)
 
     resp_json = {"links": link_list}
@@ -218,6 +222,7 @@ async def POST_Links(request):
             log.info(f"Link name {title} not found in group: {group_id}")
             continue
         link_json = links[title]
+        log.debug(f"POST Links got link_json: {link_json}")
         item = {}
         if "class" not in link_json:
             log.warn(f"expected to find class key for link: {title}")
@@ -245,15 +250,19 @@ async def POST_Links(request):
                 log.warn(f"expected to find h5path for external link: {title}")
                 continue
             item["h5path"] = link_json["h5path"]
-            if "h5domain" not in link_json:
-                log.warn(f"expted to find h5domain for external link: {title}")
+            if "h5domain" in link_json:
+                item["file"] = link_json["h5domain"]
+            elif "file" in link_json:
+                item["file"] = link_json["file"]
+            else:
+                log.warn(f"expected to find h5domain or file for external link: {title}")
                 continue
-            item["h5domain"] = link_json["h5domain"]
         else:
             log.warn(f"unexpected to link class {link_class} for link: {title}")
             continue
 
         item["title"] = title
+        log.debug(f"adding link item: {item}")
 
         link_list.append(item)
 
diff --git a/hsds/link_sn.py b/hsds/link_sn.py
index 2048dd7c..a7dcc6a0 100755
--- a/hsds/link_sn.py
+++ b/hsds/link_sn.py
@@ -17,19 +17,23 @@
 from json import JSONDecodeError
 
 from h5json.objid import isValidUuid, getCollectionForId
+from h5json.link_util import validateLinkName, getLinkClass, getLinkId
+from h5json.link_util import getLinkPath, getLinkFilePath
 
 from .util.nodeUtil import getDataNodeUrl
 from .util.httpUtil import getHref, getBooleanParam
 from .util.httpUtil import jsonResponse
 from .util.globparser import globmatch
 from .util.authUtil import getUserPasswordFromRequest, validateUserPassword
-from .util.domainUtil import getDomainFromRequest, isValidDomain, verifyRoot
-from .util.domainUtil import getBucketForDomain
-from .util.linkUtil import validateLinkName, getLinkClass
+from .util.domainUtil import getDomainFromRequest, isValidDomain, verifyRoot, getBucketForDomain
+from .util.linkUtil import getRequestLink
+
+
 from .servicenode_lib import getDomainJson, validateAction
 from .servicenode_lib import getLink, putLink, putLinks, getLinks, deleteLinks
 from .domain_crawl import DomainCrawler
 from . import hsds_logger as log
+from . import config
 
 
 async def GET_Links(request):
@@ -221,13 +225,13 @@ async def GET_Link(request):
     link_class = link_json["class"]
     resp_link["class"] = link_class
     if link_class == "H5L_TYPE_HARD":
-        resp_link["id"] = link_json["id"]
+        resp_link["id"] = getLinkId(link_json)
         resp_link["collection"] = getCollectionForId(link_json["id"])
     elif link_class == "H5L_TYPE_SOFT":
-        resp_link["h5path"] = link_json["h5path"]
+        resp_link["h5path"] = getLinkPath(link_json)
     elif link_class == "H5L_TYPE_EXTERNAL":
-        resp_link["h5path"] = link_json["h5path"]
-        resp_link["h5domain"] = link_json["h5domain"]
+        resp_link["h5path"] = getLinkPath(link_json)
+        resp_link["file"] = getLinkFilePath(link_json)
     else:
         log.warn(f"Unexpected link class: {link_class}")
     resp_json = {}
@@ -291,17 +295,32 @@ async def PUT_Link(request):
         msg = f"Invalid domain: {domain}"
         log.warn(msg)
         raise HTTPBadRequest(reason=msg)
-    bucket = getBucketForDomain(domain)
 
     await validateAction(app, domain, group_id, username, "create")
-    # putLink will validate these arguments
-    kwargs = {"bucket": bucket}
-    kwargs["tgt_id"] = body.get("id")
-    kwargs["h5path"] = body.get("h5path")
-    kwargs["h5domain"] = body.get("h5domain")
-    created = body.get("created")
-    if created:
-        kwargs["created"] = created
+
+    predate_max_time = config.get("predate_max_time", default=10.0)
+
+    try:
+        link_json = getRequestLink(link_title, body, predate_max_time=predate_max_time)
+    except (KeyError, TypeError, ValueError) as e:
+        raise HTTPBadRequest(reason=str(e))
+
+    link_class = getLinkClass(link_json)
+
+    kwargs = {}
+    kwargs["bucket"] = getBucketForDomain(domain)
+    if link_class == "H5L_TYPE_HARD":
+        kwargs["tgt_id"] = getLinkId(link_json)
+    elif link_class == "H5L_TYPE_SOFT":
+        kwargs["h5path"] = getLinkPath(link_json)
+    elif link_class == "H5L_TYPE_EXTERNAL":
+        kwargs["h5path"] = getLinkPath(link_json)
+        kwargs["h5domain"] = getLinkFilePath(link_json)
+    else:
+        raise HTTPBadRequest(reason=f"unexpected link class: {link_class}")
+
+    if "created" in link_json:
+        kwargs["created"] = link_json["created"]
 
     status = await putLink(app, group_id, link_title, **kwargs)
 
diff --git a/hsds/servicenode_lib.py b/hsds/servicenode_lib.py
index 921e2f2d..f20908bf 100644
--- a/hsds/servicenode_lib.py
+++ b/hsds/servicenode_lib.py
@@ -31,11 +31,12 @@
 from h5json.shape_util import getShapeDims, getShapeClass
 from h5json.dset_util import getChunkSize, generateLayout
 from h5json.dset_util import getDataSize, validateDatasetCreationProps
+from h5json.link_util import h5Join, validateLinkName, getLinkClass, getLinkFilePath
 from h5json.time_util import getNow
 
 from .util.nodeUtil import getDataNodeUrl
 from .util.authUtil import getAclKeys
-from .util.linkUtil import h5Join, validateLinkName, getLinkClass, getRequestLinks
+from .util.linkUtil import getRequestLinks
 from .util.storUtil import getStorJSONObj, isStorObj, getSupportedFilters
 from .util.authUtil import aclCheck
 from .util.httpUtil import http_get, http_put, http_post, http_delete
@@ -479,7 +480,7 @@ async def putLink(app, group_id, title,
     if h5path:
         link_json["h5path"] = h5path
     if h5domain:
-        link_json["h5domain"] = h5domain
+        link_json["file"] = h5domain
     if created:
         link_json["created"] = created
 
@@ -659,7 +660,7 @@ async def getObjectIdByPath(app, obj_id, h5path, bucket=None, refresh=False, dom
                 raise HTTPBadRequest(reason=msg)
 
             # find domain object is stored under
-            domain = link_json["h5domain"]
+            domain = getLinkFilePath(link_json)
 
             if domain.startswith("hdf5:/"):
                 # strip off prefix
diff --git a/hsds/util/linkUtil.py b/hsds/util/linkUtil.py
index d0063a39..65939e7d 100644
--- a/hsds/util/linkUtil.py
+++ b/hsds/util/linkUtil.py
@@ -13,129 +13,12 @@
 # linkdUtil:
 # link related functions
 #
-import time
+from h5json.time_util import getNow
+from h5json.link_util import validateLinkName, getLinkClass, getLinkPath, getLinkFilePath
 
 from .. import hsds_logger as log
 
 
-def validateLinkName(name):
-    """ verify the link name is valid """
-    if not isinstance(name, str):
-        msg = "Unexpected type for link name"
-        log.warn(msg)
-        raise ValueError(msg)
-    if name.find("/") >= 0:
-        msg = "link name contains slash"
-        log.warn(msg)
-        raise ValueError(msg)
-
-
-def getLinkClass(link_json):
-    """ verify this is a valid link
-        returns the link class """
-    log.debug(f"getLinkClass({link_json})")
-    if "class" in link_json:
-        link_class = link_json["class"]
-    else:
-        link_class = None
-    if "h5path" in link_json and "id" in link_json:
-        msg = "link tgt_id and h5path both set"
-        log.warn(msg)
-        raise ValueError(msg)
-    if "id" in link_json:
-        tgt_id = link_json["id"]
-        if not isinstance(tgt_id, str) or len(tgt_id) < 38:
-            msg = f"link with invalid id: {tgt_id}"
-            log.warn(msg)
-            raise ValueError(msg)
-        if tgt_id[:2] not in ("g-", "t-", "d-"):
-            msg = "link tgt must be group, datatype or dataset uuid"
-            log.warn(msg)
-            raise ValueError(msg)
-        if link_class:
-            if link_class != "H5L_TYPE_HARD":
-                msg = f"expected link class to be H5L_TYPE_HARD but got: {link_class}"
-                log.warn(msg)
-                raise ValueError(msg)
-        else:
-            link_class = "H5L_TYPE_HARD"
-    elif "h5path" in link_json:
-        h5path = link_json["h5path"]
-        log.debug(f"link path: {h5path}")
-        if "h5domain" in link_json:
-            if link_class:
-                if link_class != "H5L_TYPE_EXTERNAL":
-                    msg = f"expected link class to be H5L_TYPE_EXTERNAL but got: {link_class}"
-                    log.warn(msg)
-                    raise ValueError(msg)
-            else:
-                link_class = "H5L_TYPE_EXTERNAL"
-        else:
-            if link_class:
-                if link_class != "H5L_TYPE_SOFT":
-                    msg = f"expected link class to be H5L_TYPE_SOFT but got: {link_class}"
-                    log.warn(msg)
-                    raise ValueError(msg)
-            else:
-                link_class = "H5L_TYPE_SOFT"
-    else:
-        msg = "link with no id or h5path"
-        log.warn(msg)
-        raise ValueError(msg)
-
-    return link_class
-
-
-def isEqualLink(link1, link2):
-    """ Return True if the two links are the same """
-
-    for obj in (link1, link2):
-        if not isinstance(obj, dict):
-            raise TypeError(f"unexpected type: {type(obj)}")
-        if "class" not in obj:
-            raise TypeError("expected class key for link")
-    if link1["class"] != link2["class"]:
-        return False  # different link types
-    link_class = link1["class"]
-    if link_class == "H5L_TYPE_HARD":
-        for obj in (link1, link2):
-            if "id" not in obj:
-                raise TypeError(f"expected id key for link: {obj}")
-        if link1["id"] != link2["id"]:
-            return False
-    elif link_class == "H5L_TYPE_SOFT":
-        for obj in (link1, link2):
-            if "h5path" not in obj:
-                raise TypeError(f"expected h5path key for link: {obj}")
-        if link1["h5path"] != link2["h5path"]:
-            return False
-    elif link_class == "H5L_TYPE_EXTERNAL":
-        for obj in (link1, link2):
-            for k in ("h5path", "h5domain"):
-                if k not in obj:
-                    raise TypeError(f"expected {k} key for link: {obj}")
-        if link1["h5path"] != link2["h5path"]:
-            return False
-        if link1["h5domain"] != link2["h5domain"]:
-            return False
-    else:
-        raise TypeError(f"unexpected link class: {link_class}")
-    return True
-
-
-def h5Join(path, paths):
-    h5path = path
-    if not paths:
-        return h5path
-    if isinstance(paths, str):
-        paths = (paths,)
-    for s in paths:
-        if h5path[-1] != "/":
-            h5path += "/"
-        h5path += s
-    return h5path
-
-
 def getRequestLink(title, link_json, predate_max_time=0.0):
     """ return normalized link from request json
         Throw value error if badly formatted """
@@ -148,16 +31,11 @@ def getRequestLink(title, link_json, predate_max_time=0.0):
     log.debug(f"getRequestLink title: {title} link_json: {link_json}")
     link_item = {}  # normalized link item to return
 
-    now = time.time()
+    now = getNow()
 
     validateLinkName(title)  # will raise ValueError is invalid
 
     link_class = getLinkClass(link_json)
-    if "class" in link_item:
-        if link_class != link_json["class"]:
-            msg = f"expected link class of: {link_class} but got {link_json}"
-            log.warn(msg)
-            raise ValueError(msg)
 
     link_item = {"class": link_class}
 
@@ -169,17 +47,10 @@ def getRequestLink(title, link_json, predate_max_time=0.0):
         link_item["id"] = link_json["id"]
     else:
         if link_class in ("H5L_TYPE_SOFT", "H5L_TYPE_EXTERNAL"):
-            if "h5path" not in link_json:
-                msg = "expected h5path key for soft link"
-                log.warn(msg)
-                raise ValueError(msg)
-            link_item["h5path"] = link_json["h5path"]
+            link_item["h5path"] = getLinkPath(link_json)
 
         if link_class == "H5L_TYPE_EXTERNAL":
-            if "h5domain" not in link_json:
-                msg = "expected h5domain key for external link"
-                log.warn(msg)
-                raise ValueError(msg)
+            link_item["file"] = getLinkFilePath(link_json)
 
     if "created" in link_json:
         created = link_json["created"]
diff --git a/tests/integ/link_test.py b/tests/integ/link_test.py
index 244f8f5d..d95e6834 100755
--- a/tests/integ/link_test.py
+++ b/tests/integ/link_test.py
@@ -270,7 +270,7 @@ def testExternalLink(self):
         target_path = "somewhere"
         link_title = "external_link"
         req = helper.getEndpoint() + "/groups/" + root_id + "/links/" + link_title
-        payload = {"h5path": target_path, "h5domain": target_domain}
+        payload = {"h5path": target_path, "file": target_domain}
         rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)  # created
 
@@ -294,7 +294,7 @@ def testExternalLink(self):
         self.assertEqual(rspLink["title"], link_title)
         self.assertEqual(rspLink["class"], "H5L_TYPE_EXTERNAL")
         self.assertEqual(rspLink["h5path"], target_path)
-        self.assertEqual(rspLink["h5domain"], target_domain)
+        self.assertEqual(rspLink["file"], target_domain)
 
     def testGetLinks(self):
         domain = self.base_domain + "/testGetLinks.h5"
@@ -524,10 +524,10 @@ def testGet(self):
                 self.assertTrue(link["created"] < now - 10)
             else:
                 self.assertEqual(link_class, "H5L_TYPE_EXTERNAL")
-                for name in ("created", "class", "h5domain", "h5path", "title", "href"):
+                for name in ("created", "class", "file", "h5path", "title", "href"):
                     self.assertTrue(name in link)
                 self.assertEqual(link["title"], "extlink")
-                extlink_file = link["h5domain"]
+                extlink_file = link["file"]
                 self.assertEqual(extlink_file, "somefile")
                 self.assertEqual(link["h5path"], "somepath")
                 self.assertTrue(link["created"] < now - 10)
@@ -555,7 +555,8 @@ def testGet(self):
             self.assertTrue(name in link)
 
         self.assertEqual(link["class"], "H5L_TYPE_SOFT")
-        self.assertFalse("h5domain" in link)  # only for external links
+        self.assertFalse("h5domain" in link)  # deprecated name
+        self.assertFalse("file" in link)  # only for external links
         self.assertEqual(link["title"], "slink")
         self.assertEqual(link["h5path"], "somevalue")
 
@@ -618,12 +619,14 @@ def testGetRecursive(self):
                     softlink_count += 1
                     self.assertTrue("h5path" in link)
                     self.assertFalse("h5domain" in link)
+                    self.assertFalse("file" in link)
                     self.assertFalse("id" in link)
                     self.assertTrue(link_title in expected_soft_links)
                 elif link_class == "H5L_TYPE_EXTERNAL":
                     extlink_count += 1
                     self.assertTrue("h5path" in link)
-                    self.assertTrue("h5domain" in link)
+                    self.assertTrue("file" in link)
+                    self.assertFalse("h5domain" in link)  # deprecated name
                     self.assertFalse("id" in link)
                     self.assertTrue(link_title in expected_external_links)
                 else:
@@ -690,7 +693,7 @@ def testGetPattern(self):
 
             self.assertEqual(len(links), 1)  # only extlink should be returned
             link = links[0]
-            for name in ("created", "class", "h5domain", "h5path", "title"):
+            for name in ("created", "class", "file", "h5path", "title"):
                 self.assertTrue(name in link)
             if use_post:
                 pass  # no href with post
@@ -698,7 +701,7 @@ def testGetPattern(self):
                 self.assertTrue("href" in link)
             self.assertEqual(link["class"], "H5L_TYPE_EXTERNAL")
             self.assertEqual(link["title"], "extlink")
-            self.assertEqual(link["h5domain"], "somefile")
+            self.assertEqual(link["file"], "somefile")
             self.assertEqual(link["h5path"], "somepath")
             self.assertTrue(link["created"] < now - 10)
 
@@ -926,7 +929,7 @@ def testExternalLinkTraversal(self):
         target_path = "/external_group"
         link_title = "external_link_to_group"
         req = helper.getEndpoint() + "/groups/" + root_id + "/links/" + link_title
-        payload = {"h5path": target_path, "h5domain": second_domain}
+        payload = {"h5path": target_path, "file": second_domain}
         headers = helper.getRequestHeaders(domain=domain)
         rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)
@@ -959,7 +962,7 @@ def testExternalLinkTraversal(self):
         target_path = "/external_group"
         link_title = "external_link_to_group_prefix"
         req = helper.getEndpoint() + "/groups/" + root_id + "/links/" + link_title
-        payload = {"h5path": target_path, "h5domain": f"hdf5:/{second_domain}"}
+        payload = {"h5path": target_path, "file": f"hdf5:/{second_domain}"}
         headers = helper.getRequestHeaders(domain=domain)
         rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
         self.assertEqual(rsp.status_code, 201)
@@ -1222,10 +1225,10 @@ def testPostLinkSingle(self):
                 self.assertTrue(link["created"] < now - 10)
             else:
                 self.assertEqual(link_class, "H5L_TYPE_EXTERNAL")
-                for name in ("created", "class", "h5domain", "h5path", "title"):
+                for name in ("created", "class", "file", "h5path", "title"):
                     self.assertTrue(name in link)
                 self.assertEqual(link["title"], "extlink")
-                extlink_file = link["h5domain"]
+                extlink_file = link["file"]
                 self.assertEqual(extlink_file, "somefile")
                 self.assertEqual(link["h5path"], "somepath")
                 self.assertTrue(link["created"] < now - 10)
@@ -1296,7 +1299,7 @@ def testPostLinkMultiple(self):
                     # soft or external link
                     self.assertEqual(link["h5path"], expected["h5path"])
                     if link_class == "H5L_TYPE_EXTERNAL":
-                        self.assertEqual(link["h5domain"], expected["h5domain"])
+                        self.assertEqual(link["file"], expected["file"])
 
         # get just the requested links for each group
         req = helper.getEndpoint() + "/groups/" + root_id + "/links"
@@ -1498,7 +1501,7 @@ def testPutLinkMultiple(self):
 
         # add a soft and external link as well
         links["softlink"] = {"h5path": "a_path"}
-        links["extlink"] = {"h5path": "another_path", "h5domain": "/a_domain"}
+        links["extlink"] = {"h5path": "another_path", "file": "/a_domain"}
         link_count = len(links)
 
         # write links to the grpA
@@ -1531,8 +1534,8 @@ def testPutLinkMultiple(self):
                 self.assertTrue("h5path" in link)
                 h5path = link["h5path"]
                 self.assertEqual(h5path, "another_path")
-                self.assertTrue("h5domain" in link)
-                h5domain = link["h5domain"]
+                self.assertTrue("file" in link)
+                h5domain = link["file"]
                 self.assertEqual(h5domain, "/a_domain")
             else:
                 self.assertTrue(False)  # unexpected
@@ -1545,7 +1548,7 @@ def testPutLinkMultiple(self):
         links = {}
         links["hardlink_multicast"] = {"id": root_id}
         links["softlink_multicast"] = {"h5path": "multi_path"}
-        links["extlink_multicast"] = {"h5path": "multi_path", "h5domain": "/another_domain"}
+        links["extlink_multicast"] = {"h5path": "multi_path", "file": "/another_domain"}
         link_count = len(links)
         data = {"links": links, "grp_ids": grp_ids}
         req = self.endpoint + "/groups/" + root_id + "/links"
@@ -1573,8 +1576,8 @@ def testPutLinkMultiple(self):
                 elif link_class == "H5L_TYPE_EXTERNAL":
                     self.assertTrue("h5path" in ret_link)
                     self.assertEqual(ret_link["h5path"], "multi_path")
-                    self.assertTrue("h5domain" in ret_link)
-                    self.assertEqual(ret_link["h5domain"], "/another_domain")
+                    self.assertTrue("file" in ret_link)
+                    self.assertEqual(ret_link["file"], "/another_domain")
                 else:
                     self.assertTrue(False)  # unexpected
 
@@ -1585,7 +1588,7 @@ def testPutLinkMultiple(self):
             links = {}
             links[f"hardlink_{i}"] = {"id": root_id}
             links[f"softlink_{i}"] = {"h5path": f"multi_path_{i}"}
-            ext_link = {"h5path": f"multi_path_{i}", "h5domain": f"/another_domain/{i}"}
+            ext_link = {"h5path": f"multi_path_{i}", "file": f"/another_domain/{i}"}
             links[f"extlink_{i}"] = ext_link
             link_data[grp_id] = {"links": links}
 
@@ -1625,8 +1628,8 @@ def testPutLinkMultiple(self):
                     self.assertEqual(link_title, f"extlink_{i}")
                     self.assertTrue("h5path" in ret_link)
                     self.assertEqual(ret_link["h5path"], f"multi_path_{i}")
-                    self.assertTrue("h5domain" in ret_link)
-                    self.assertEqual(ret_link["h5domain"], f"/another_domain/{i}")
+                    self.assertTrue("file" in ret_link)
+                    self.assertEqual(ret_link["file"], f"/another_domain/{i}")
                 else:
                     self.assertTrue(False)  # unexpected
 
@@ -1685,7 +1688,7 @@ def testPutLinkMultipleWithTimestamps(self):
 
         # add a soft and external link as well
         links["softlink"] = {"h5path": "a_path"}
-        links["extlink"] = {"h5path": "another_path", "h5domain": "/a_domain"}
+        links["extlink"] = {"h5path": "another_path", "file": "/a_domain"}
         link_count = len(links)
         # add timestamp
         timestamps = set()
@@ -1725,8 +1728,8 @@ def testPutLinkMultipleWithTimestamps(self):
                 self.assertTrue("h5path" in link)
                 h5path = link["h5path"]
                 self.assertEqual(h5path, "another_path")
-                self.assertTrue("h5domain" in link)
-                h5domain = link["h5domain"]
+                self.assertTrue("file" in link)
+                h5domain = link["file"]
                 self.assertEqual(h5domain, "/a_domain")
             else:
                 self.assertTrue(False)  # unexpected
@@ -1741,7 +1744,7 @@ def testPutLinkMultipleWithTimestamps(self):
         links = {}
         links["hardlink_multicast"] = {"id": root_id}
         links["softlink_multicast"] = {"h5path": "multi_path"}
-        links["extlink_multicast"] = {"h5path": "multi_path", "h5domain": "/another_domain"}
+        links["extlink_multicast"] = {"h5path": "multi_path", "file": "/another_domain"}
         link_count = len(links)
         timestamps = set()
         for title in links:
@@ -1776,8 +1779,8 @@ def testPutLinkMultipleWithTimestamps(self):
                 elif link_class == "H5L_TYPE_EXTERNAL":
                     self.assertTrue("h5path" in ret_link)
                     self.assertEqual(ret_link["h5path"], "multi_path")
-                    self.assertTrue("h5domain" in ret_link)
-                    self.assertEqual(ret_link["h5domain"], "/another_domain")
+                    self.assertTrue("file" in ret_link)
+                    self.assertEqual(ret_link["file"], "/another_domain")
                 else:
                     self.assertTrue(False)  # unexpected
                 self.assertTrue("created" in ret_link)
@@ -1791,7 +1794,7 @@ def testPutLinkMultipleWithTimestamps(self):
             links = {}
             links[f"hardlink_{i}"] = {"id": root_id}
             links[f"softlink_{i}"] = {"h5path": f"multi_path_{i}"}
-            ext_link = {"h5path": f"multi_path_{i}", "h5domain": f"/another_domain/{i}"}
+            ext_link = {"h5path": f"multi_path_{i}", "file": f"/another_domain/{i}"}
             links[f"extlink_{i}"] = ext_link
             for title in links:
                 link = links[title]
@@ -1836,8 +1839,8 @@ def testPutLinkMultipleWithTimestamps(self):
                     self.assertEqual(link_title, f"extlink_{i}")
                     self.assertTrue("h5path" in ret_link)
                     self.assertEqual(ret_link["h5path"], f"multi_path_{i}")
-                    self.assertTrue("h5domain" in ret_link)
-                    self.assertEqual(ret_link["h5domain"], f"/another_domain/{i}")
+                    self.assertTrue("file" in ret_link)
+                    self.assertEqual(ret_link["file"], f"/another_domain/{i}")
                 else:
                     self.assertTrue(False)  # unexpected
                 self.assertTrue("created" in ret_link)
@@ -1886,7 +1889,7 @@ def testDeleteLinkMultiple(self):
         links[title] = {"h5path": "a_path"}
         titles.append(title)
         title = "extlink"
-        links[title] = {"h5path": "another_path", "h5domain": "/a_domain"}
+        links[title] = {"h5path": "another_path", "file": "/a_domain"}
         titles.append(title)
         link_count = len(links)