Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
edcf2f6
trying zarr >=3.x with zarr_format=2, #355
FedeMPouzols Apr 17, 2025
45296aa
sort by ps node name when generating summary(), prevent dataframe ind…
FedeMPouzols Apr 17, 2025
3ccdc01
exec_block_xds/info: convert numpy types before using them as attrs, …
FedeMPouzols Apr 17, 2025
9a49734
Merge remote-tracking branch 'origin/main' into 355-zarr-python-v3-an…
FedeMPouzols May 27, 2025
8b2020d
Merge remote-tracking branch 'origin/main' into 355-zarr-python-v3-an…
FedeMPouzols Jun 13, 2025
08932cb
use ZARR_FORMAT=2 in image, move config to /_utils/zarr, #355
FedeMPouzols Jun 13, 2025
0b9b550
Merge remote-tracking branch 'origin/main' into 355-zarr-python-v3-an…
FedeMPouzols Jul 17, 2025
598a27b
linting+black after large merge, #355
FedeMPouzols Jul 17, 2025
a8cea2d
update test cases to new 3.1 zarr behavior/msgs, #355
FedeMPouzols Jul 17, 2025
50f2b25
use zarr.codecs for Zstd, #355
FedeMPouzols Jul 17, 2025
1c0bc83
switch to zarr_format=3, #355
FedeMPouzols Jul 17, 2025
52786f2
prevent image/_decode_sub_xdses from getting confused with zarr 3 dir…
FedeMPouzols Jul 17, 2025
d8235b0
Merge remote-tracking branch 'origin/main' into 355-zarr-python-v3-an…
FedeMPouzols Sep 1, 2025
c6a1b74
, move import up, reorg after merge conflicts, #355
FedeMPouzols Sep 1, 2025
68e565c
update expected error msg, #355
FedeMPouzols Sep 1, 2025
25425a1
Merge remote-tracking branch 'origin/main' into 355-zarr-python-v3-an…
FedeMPouzols Sep 11, 2025
d1c8436
Merge remote-tracking branch 'origin/main' into 355-zarr-python-v3-an…
FedeMPouzols Sep 16, 2025
3f34c41
Merge remote-tracking branch 'origin/main' into 355-zarr-python-v3-an…
FedeMPouzols Oct 30, 2025
f3d993c
Merge remote-tracking branch 'origin/main' into 355-zarr-python-v3-an…
FedeMPouzols Dec 11, 2025
4801662
Merge remote-tracking branch 'origin/main' into 355-zarr-python-v3-an…
FedeMPouzols Jan 22, 2026
548acf1
extend a bit doc of ZARR_FORMAT option, #355
FedeMPouzols Jan 22, 2026
2601f15
use zarr versions consistently in all pyproject deps, #355
FedeMPouzols Jan 22, 2026
25ecfc9
Merge remote-tracking branch 'origin/main' into 355-zarr-python-v3-an…
FedeMPouzols Jan 22, 2026
4da2c94
black version update in testing/, #355
FedeMPouzols Jan 22, 2026
eb34497
stores/zarr: check zarr.json (v3) in addition to .zattrs, #355
FedeMPouzols Jan 22, 2026
78389d7
fix Intensity/sky in casacore_from_casatools written imgs, #355, #470
FedeMPouzols Jan 23, 2026
742e604
Merge remote-tracking branch 'origin/main' into 355-zarr-python-v3-an…
FedeMPouzols Jan 27, 2026
2b0d12f
Merge remote-tracking branch 'origin/main' into 355-zarr-python-v3-an…
FedeMPouzols Apr 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ zarr = [
's3fs',
'scipy',
'xarray',
'zarr>=2,<3',
'zarr>=3,<4',
'pyarrow',
'psutil' # psutil is needed so large FITS images are not loaded into memory
]
Expand All @@ -38,7 +38,7 @@ test = [
's3fs',
'scipy',
'xarray',
'zarr>=2,<3',
'zarr>=3,<4',
'pyarrow',
'psutil',
'pytest',
Expand All @@ -53,7 +53,7 @@ casacore = [
's3fs',
'scipy',
'xarray',
'zarr>=2,<3',
'zarr>=3,<4',
'pyarrow',
'psutil',
'python_casacore>=3.6.1; sys_platform != "darwin"'
Expand All @@ -65,7 +65,7 @@ interactive = [
's3fs',
'scipy',
'xarray',
'zarr>=2,<3',
'zarr>=3,<4',
'pyarrow',
'psutil',
'matplotlib',
Expand Down Expand Up @@ -95,7 +95,7 @@ all = [
's3fs',
'scipy',
'xarray',
'zarr>=2,<3',
'zarr>=3,<4',
'pyarrow',
'psutil',
'pytest',
Expand Down
2 changes: 1 addition & 1 deletion src/xradio/_utils/_casacore/casacore_from_casatools.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ def imageinfo(self) -> dict:
if "imageinfo" in tb.keywordnames():
image_metadata = tb.getkeyword("imageinfo")
else:
image_metadata = {"imagetype": "Intensity", "objectname": ""}
image_metadata = {"imagetype": "sky", "objectname": ""}

image_metadata["imagetype"] = _validate_image_type(
image_metadata.get("imagetype", "Intensity")
Expand Down
4 changes: 4 additions & 0 deletions src/xradio/_utils/zarr/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Added for the transition from zarr-python 2.x to 3.x. Zarr-Python v3 still supports
# Zarr format v2 in addition to the new v3. This config option is set for the new/default
# format v3 but can be changed to use format v2.
ZARR_FORMAT = 3
4 changes: 2 additions & 2 deletions src/xradio/image/_util/_zarr/xds_from_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ def _decode_sub_xdses(xarrayObj, top_dir: str, id_dict: dict) -> None:
ky = d.name[len(_top_level_sub_xds) :]
xarrayObj.attrs[ky] = _read_zarr(path, id_dict)
# TODO if attrs that are xdses have attrs that are xdses ...
else:
# descend into the directory
elif d.name != "c":
# descend into the directory (except for the Zarr v3 "c" dirs)
_decode_sub_xdses(xarrayObj[d.name], path, id_dict)


Expand Down
6 changes: 4 additions & 2 deletions src/xradio/image/_util/_zarr/xds_to_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import os
from .common import _np_types, _top_level_sub_xds

from xradio._utils.zarr.config import ZARR_FORMAT


def _write_zarr(xds: xr.Dataset, zarr_store: str):
max_chunk_size = 0.95 * 2**30
Expand All @@ -25,7 +27,7 @@ def _write_zarr(xds: xr.Dataset, zarr_store: str):
)
xds_copy = xds.copy(deep=True)
sub_xds_dict = _encode(xds_copy, zarr_store)
z_obj = xds_copy.to_zarr(store=zarr_store, compute=True)
z_obj = xds_copy.to_zarr(store=zarr_store, compute=True, zarr_format=ZARR_FORMAT)
if sub_xds_dict:
_write_sub_xdses(sub_xds_dict)

Expand Down Expand Up @@ -62,4 +64,4 @@ def _encode_dict(my_dict: dict, top_path: str, sub_xds_dict) -> tuple:

def _write_sub_xdses(sub_xds: dict):
for k, v in sub_xds.items():
z_obj = v.to_zarr(store=k, compute=True)
z_obj = v.to_zarr(store=k, compute=True, zarr_format=ZARR_FORMAT)
2 changes: 1 addition & 1 deletion src/xradio/image/_util/image_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ def detect_store_type(store):
elif os.path.isdir(store):
if "table.info" in os.listdir(store):
store_type = "casa"
elif ".zattrs" in os.listdir(store):
elif ".zattrs" in os.listdir(store) or "zarr.json" in os.listdir(store):
store_type = "zarr"
else:
xradio_logger().error("Unknown directory structure.")
Expand Down
28 changes: 28 additions & 0 deletions src/xradio/measurement_set/_utils/_msv2/_tables/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,34 @@ def convert_casacore_time_to_mjd(rawtimes: np.ndarray) -> np.ndarray:
return rawtimes / SECS_IN_DAY


def casacore_numpy_to_json_safe_type(value: object) -> object:
"""
This funciton converts values with numpy types (commonly found in "raw" xds loaded with load_generic_table()) to
native types that can be safely written to JSON.

This is handy when loading values from table columns that one wants to put in attributes. Starting
with Zarr 3, numpy types are not converted/encoded before writing to JSON, and an exception is raised.

Parameters
----------
value : object
A value or numpy array of values, of presumably numpy type

Returns
-------
object
The same value converted to a JSON safe type (for example int(a_numpy_int32_value))
"""
if isinstance(value, np.ndarray):
return ",".join([scalar for scalar in value])
elif isinstance(value, np.integer):
return int(value)
elif isinstance(value, np.floating):
return float(value)
else:
return value


def make_taql_where_between_min_max(
min_max: Tuple[np.float64, np.float64],
path: str,
Expand Down
12 changes: 8 additions & 4 deletions src/xradio/measurement_set/_utils/_msv2/conversion.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from collections import deque
import datetime
import importlib
import numcodecs
import os
import pathlib
import time
import traceback
from typing import Callable, Dict, Union

import dask.array as da
import numpy as np
import xarray as xr
import traceback
import zarr.codecs

try:
from casacore import tables
Expand Down Expand Up @@ -1012,7 +1012,7 @@ def convert_and_write_partition(
ephemeris_interpolate: bool = False,
phase_cal_interpolate: bool = False,
sys_cal_interpolate: bool = False,
compressor: numcodecs.abc.Codec = numcodecs.Zstd(level=2),
compressor: zarr.abc.codec.BytesBytesCodec = zarr.codecs.ZstdCodec(level=2),
add_reshaping_indices: bool = False,
storage_backend="zarr",
parallel_mode: str = "none",
Expand Down Expand Up @@ -1412,8 +1412,12 @@ def get_observation_info(in_file, observation_id, scan_intents):
ms_xdt["/phased_array_xds"] = phased_array_xds

if storage_backend == "zarr":
from xradio._utils.zarr.config import ZARR_FORMAT

ms_xdt.to_zarr(
store=os.path.join(out_file, ms_v4_name), mode=persistence_mode
store=os.path.join(out_file, ms_v4_name),
mode=persistence_mode,
zarr_format=ZARR_FORMAT,
)
elif storage_backend == "netcdf":
# xds.to_netcdf(path=file_name+"/MAIN", mode=mode) #Does not work
Expand Down
14 changes: 8 additions & 6 deletions src/xradio/measurement_set/_utils/_msv2/msv4_info_dicts.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re

import numpy as np

import xarray as xr

try:
Expand All @@ -10,7 +11,11 @@


from .subtables import subt_rename_ids
from ._tables.read import load_generic_table, convert_casacore_time
from ._tables.read import (
casacore_numpy_to_json_safe_type,
convert_casacore_time,
load_generic_table,
)
from xradio._utils.list_and_array import check_if_consistent
from xradio._utils.logging import xradio_logger

Expand Down Expand Up @@ -191,7 +196,7 @@ def extract_optional_fields_asdm_asis_table(

Returns:
--------
info: dict
table_info: dict
info dict with description from an ASDM_* subtable, ready
for the MSv4 observation_info dict
"""
Expand All @@ -200,10 +205,7 @@ def extract_optional_fields_asdm_asis_table(
for field_msv4, col_msv2 in optional_fields.items():
if col_msv2 in asdm_asis_xds.data_vars:
msv2_value = asdm_asis_xds[col_msv2].values[0]
if isinstance(msv2_value, np.ndarray):
table_info[field_msv4] = ",".join([log for log in msv2_value])
else:
table_info[field_msv4] = msv2_value
table_info[field_msv4] = casacore_numpy_to_json_safe_type(msv2_value)

return table_info

Expand Down
2 changes: 1 addition & 1 deletion src/xradio/measurement_set/_utils/_zarr/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ def add_encoding(xds, compressor, chunks=None):

for da_name in list(xds.data_vars):
da_chunks = [chunks[dim_name] for dim_name in xds[da_name].sizes]
xds[da_name].encoding = {"compressor": compressor, "chunks": da_chunks}
xds[da_name].encoding = {"compressors": (compressor,), "chunks": da_chunks}
12 changes: 7 additions & 5 deletions src/xradio/measurement_set/convert_msv2_to_processing_set.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import numcodecs
from typing import Dict, Union, Literal
import time

import dask
import zarr.codecs

from xradio.measurement_set._utils._msv2.partition_queries import (
create_partitions,
Expand All @@ -11,6 +11,7 @@
convert_and_write_partition,
estimate_memory_and_cores_for_partitions,
)
from xradio._utils.zarr.config import ZARR_FORMAT
from xradio._utils.logging import xradio_logger


Expand Down Expand Up @@ -64,7 +65,7 @@ def convert_msv2_to_processing_set(
phase_cal_interpolate: bool = False,
sys_cal_interpolate: bool = False,
use_table_iter: bool = False,
compressor: numcodecs.abc.Codec = numcodecs.Zstd(level=2),
compressor: zarr.abc.codec.BytesBytesCodec = zarr.codecs.ZstdCodec(level=2),
add_reshaping_indices: bool = False,
storage_backend: Literal["zarr", "netcdf"] = "zarr",
parallel_mode: Literal["none", "partition", "time"] = "none",
Expand Down Expand Up @@ -125,7 +126,7 @@ def convert_msv2_to_processing_set(
if not str(out_file).endswith("ps.zarr"):
out_file += ".ps.zarr"

ps_dt.to_zarr(store=out_file, mode=persistence_mode)
ps_dt.to_zarr(store=out_file, mode=persistence_mode, zarr_format=ZARR_FORMAT)

# Check `parallel_mode` is valid
try:
Expand Down Expand Up @@ -226,6 +227,7 @@ def convert_msv2_to_processing_set(

import zarr

root_group = zarr.open(out_file, mode="r+") # Open in read/write mode
# Open in read/write mode
root_group = zarr.open(out_file, mode="r+", zarr_format=ZARR_FORMAT)
root_group.attrs["type"] = "processing_set" # Replace
zarr.convenience.consolidate_metadata(root_group.store)
zarr.consolidate_metadata(root_group.store)
2 changes: 1 addition & 1 deletion src/xradio/measurement_set/processing_set_xdt.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def _summary(self, data_group_name: str = None):
from astropy.coordinates import SkyCoord
import astropy.units as u

for key, value in self._xdt.items():
for key, value in sorted(self._xdt.items()):
partition_info = value.xr_ms.get_partition_info()
observation_info = value.observation_info

Expand Down
4 changes: 3 additions & 1 deletion tests/stakeholder/test_measure_set_stakeholder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
ProcessingSetXdt,
)
from xradio.schema.check import check_datatree
from xradio._utils.zarr.config import ZARR_FORMAT

# relative_tolerance = 10 ** (-12)
relative_tolerance = 10 ** (-5)
Expand Down Expand Up @@ -364,7 +365,8 @@ def base_test(
else:
ps_copy_name = str(ps_name) + "_copy"

ps_lazy_xdt.to_zarr(ps_copy_name) # Test writing to disk.
# Test writing to disk.
ps_lazy_xdt.to_zarr(ps_copy_name, zarr_format=ZARR_FORMAT)

ms_xdt_name = list(ps_lazy_xdt.keys())[0]
ms_xds = ps_lazy_xdt[ms_xdt_name].ds
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/measurement_set/_utils/_msv2/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ def test_convert_and_write_partition_with_antenna1(ms_minimal_required):
)

# Will need a SD-like test ms. Otherwise the partition is empty:
with pytest.raises(FileNotFoundError, match="No such file or directory"):
with pytest.raises(FileNotFoundError, match="does not exist"):
msv4_xds = xr.open_dataset(
out_name
+ "/"
Expand Down
9 changes: 6 additions & 3 deletions tests/unit/measurement_set/_utils/_zarr/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_add_encoding_wo_chunks():

add_encoding(xds, single_encoding)
assert xds
assert xds.da.encoding == {"compressor": single_encoding, "chunks": [3]}
assert xds.da.encoding == {"compressors": (single_encoding,), "chunks": [3]}


def test_add_encoding_with_wrong_chunks():
Expand All @@ -38,7 +38,7 @@ def test_add_encoding_with_wrong_chunks():
chunks_size = 1
add_encoding(xds, single_encoding, chunks={"zz_not_there": chunks_size})
assert xds
assert xds.da.encoding == {"compressor": single_encoding, "chunks": [3]}
assert xds.da.encoding == {"compressors": (single_encoding,), "chunks": [3]}


def test_add_encoding_with_chunks():
Expand All @@ -56,4 +56,7 @@ def test_add_encoding_with_chunks():
chunks_size = 1
add_encoding(xds, single_encoding, chunks={"x": chunks_size})
assert xds
assert xds.da.encoding == {"compressor": single_encoding, "chunks": [chunks_size]}
assert xds.da.encoding == {
"compressors": (single_encoding,),
"chunks": [chunks_size],
}
Loading