From 9af7c7f4053ae67c38bcef80980e04ab44ffaf27 Mon Sep 17 00:00:00 2001 From: Peter Wortmann Date: Thu, 1 May 2025 18:55:51 +0100 Subject: [PATCH 01/12] Replace Python annotations with explicit type/literal descriptions This should make schemas portable to other languages and environments. --- src/xradio/measurement_set/schema.py | 39 ++++---- src/xradio/schema/check.py | 116 ++++++------------------ src/xradio/schema/dataclass.py | 131 ++++++++++++++++++++++++++- src/xradio/schema/metamodel.py | 36 +++++++- 4 files changed, 207 insertions(+), 115 deletions(-) diff --git a/src/xradio/measurement_set/schema.py b/src/xradio/measurement_set/schema.py index d676f2e0..2757ef41 100644 --- a/src/xradio/measurement_set/schema.py +++ b/src/xradio/measurement_set/schema.py @@ -79,24 +79,21 @@ # Units of quantities and measures -UnitsSeconds = list[Literal["s"]] -UnitsHertz = list[Literal["Hz"]] -UnitsMeters = list[Literal["m"]] - -UnitsOfSkyCoordInRadians = list[Literal["rad"], Literal["rad"]] -UnitsOfLocationInMetersOrRadians = Union[ - list[Literal["m"], Literal["m"], Literal["m"]], - list[Literal["rad"], Literal["rad"], Literal["m"]], -] -UnitsOfPositionInRadians = list[Literal["rad"], Literal["rad"], Literal["m"]] -UnitsOfDopplerShift = Union[list[Literal["ratio"]], list[Literal["m/s"]]] +UnitsSeconds = Literal[["s"]] +UnitsHertz = Literal[["Hz"]] +UnitsMeters = Literal[["m"]] + +UnitsOfSkyCoordInRadians = Literal[["rad", "rad"]] +UnitsOfLocationInMetersOrRadians = Literal[["m", "m", "m"], ["rad", "rad", "m"]] +UnitsOfPositionInRadians = Literal[["rad", "rad", "m"]] +UnitsOfDopplerShift = Literal[["ratio"], ["m/s"]] -UnitsRadians = list[Literal["rad"]] -UnitsKelvin = list[Literal["K"]] -UnitsKelvinPerJansky = list[Literal["K/Jy"]] -UnitsMetersPerSecond = list[Literal["m/s"]] -UnitsPascal = list[Literal["Pa"]] # hPa? (in MSv2) -UnitsPerSquareMeters = list[Literal["/m^2"]] +UnitsRadians = Literal[["rad"]] +UnitsKelvin = Literal[["K"]] +UnitsKelvinPerJansky = Literal[["K/Jy"]] +UnitsMetersPerSecond = Literal[["m/s"]] +UnitsPascal = Literal[["Pa"]] # hPa? (in MSv2) +UnitsPerSquareMeters = Literal[["/m^2"]] # Quantities @@ -497,7 +494,7 @@ class TimeWeatherCoordArray: ``format``). """ - type: Attr[Time] = "time_weather" + type: Attr[TimeWeather] = "time_weather" """ Coordinate type. Should be ``"time_weather"``. """ units: Attr[UnitsSeconds] = ("s",) @@ -1314,7 +1311,7 @@ class SpectrumArray: long_name: Optional[Attr[str]] = "Spectrum values" """ Long-form name to use for axis. Should be ``"Spectrum values"``""" - units: Attr[list[str]] = ("Jy",) + units: Attr[List[str]] = ("Jy",) @xarray_dataarray_schema @@ -1333,7 +1330,7 @@ class VisibilityArray: long_name: Optional[Attr[str]] = "Visibility values" """ Long-form name to use for axis. Should be ``"Visibility values"``""" - units: Attr[list[str]] = ("Jy",) + units: Attr[List[str]] = ("Jy",) allow_mutiple_versions: Optional[Attr[bool]] = True @@ -1371,7 +1368,7 @@ class VisibilityArray: @dict_schema class ObservationInfoDict: - observer: list + observer: list[str] """List of observer names.""" project: str """Project Code/Project_UID""" diff --git a/src/xradio/schema/check.py b/src/xradio/schema/check.py index c5e9b6e4..617e585d 100644 --- a/src/xradio/schema/check.py +++ b/src/xradio/schema/check.py @@ -1,3 +1,4 @@ +import builtins import dataclasses import typing import inspect @@ -334,30 +335,21 @@ def check_attributes( issues = SchemaIssues() for attr_schema in attrs_schema: - # Attribute missing? Note that a value of "None" is equivalent for the - # purpose of the check + # Attribute missing is equivalent to a value of "None" is + # equivalent for the purpose of the check val = attrs.get(attr_schema.name) if val is None: if not attr_schema.optional: - # Get options - if typing.get_origin(attr_schema.typ) is typing.Union: - options = typing.get_args(attr_schema.typ) - else: - options = [attr_schema.typ] - issues.add( SchemaIssue( path=[(attr_kind, attr_schema.name)], - message=f"Required attribute {attr_schema.name} is missing!", - expected=options, + message="Non-optional attribute is missing!", ) ) continue - # Check attribute value - issues += _check_value_union(val, attr_schema.typ).at_path( - attr_kind, attr_schema.name - ) + # Check actual value + issues += _check_value(val, attr_schema).at_path(attr_kind, attr_schema.name) # Extra attributes are always okay @@ -385,7 +377,6 @@ def check_data_vars( issues = SchemaIssues() for data_var_schema in data_vars_schema: - allow_mutiple_versions = False for attr in data_var_schema.attributes: if hasattr(attr, "name"): @@ -450,7 +441,7 @@ def check_dict( return check_attributes(dct, schema.attributes, attr_kind="") -def _check_value(val, ann): +def _check_value(val: typing.Any, ann: metamodel.AttrSchemaRef): """ Check whether value satisfies annotation @@ -462,7 +453,7 @@ def _check_value(val, ann): """ # Is supposed to be a data array? - if bases.is_dataarray_schema(ann): + if ann.type_name == "dataarray": # Attempt to convert dictionaries automatically if isinstance(val, dict): try: @@ -486,41 +477,28 @@ def _check_value(val, ann): if not isinstance(val, xarray.DataArray): # Fall through to plain type check - ann = xarray.DataArray + type_to_check = xarray.DataArray else: - return check_array(val, ann) - - # Is supposed to be a dataset? - if bases.is_dataset_schema(ann): - # Attempt to convert dictionaries automatically - if isinstance(val, dict): - try: - val = xarray.Dataset.from_dict(val) - except ValueError as e: - return SchemaIssues( - [ - SchemaIssue( - path=[], message=str(t), expected=[ann], found=type(val) - ) - ] - ) - if not isinstance(val, xarray.Dataset): - # Fall through to plain type check - ann = xarray.Dataset - else: - return check_dataset(val, ann) + return check_array(val, ann.array_schema) # Is supposed to be a dictionary? - if bases.is_dict_schema(ann): + elif ann.type_name == "dict": if not isinstance(val, dict): # Fall through to plain type check - ann = dict + type_to_check = dict else: - return check_dict(val, ann) + return check_dict(val, ann.dict_schema) + + elif ann.type_name == "list[str]": + type_to_check = typing.List[str] + elif ann.type_name in ["bool", "str", "int", "float"]: + type_to_check = getattr(builtins, ann.type_name) + else: + return ValueError(f"Invalid typ_name in schema: {ann.type_name}") # Otherwise straight type check using typeguard try: - check_type(val, ann) + check_type(val, type_to_check) except TypeCheckError as t: return SchemaIssues( [SchemaIssue(path=[], message=str(t), expected=[ann], found=type(val))] @@ -529,48 +507,6 @@ def _check_value(val, ann): return SchemaIssues() -def _check_value_union(val, ann): - """ - Check whether value satisfies annotations, including union types - - If the annotation is a data array or dataset schema, it will be checked. - - :param val: Value to check - :param ann: Type annotation of value - :returns: Schema issues - """ - - if ann is None or ann is inspect.Signature.empty: - return SchemaIssues() - - # Account for union types (this especially catches "Optional") - if typing.get_origin(ann) is typing.Union: - options = typing.get_args(ann) - else: - options = [ann] - - # Go through options, try to find one without issues - args_issues = None - okay = False - for option in options: - arg_issues = _check_value(val, option) - # We can immediately return if we find no issues with - # some schema check - if not arg_issues: - return SchemaIssues() - if args_issues is None: - args_issues = arg_issues - - # Crude merging of expected options (for "unexpected type") - elif len(args_issues) == 1 and len(arg_issues) == 1: - args_issues[0].expected += arg_issues[0].expected - - # Return representative issues list - if not args_issues: - raise ValueError("Empty union set?") - return args_issues - - _DATASET_TYPES = {} @@ -591,7 +527,7 @@ def register_dataset_type(schema: metamodel.DatasetSchema): continue # Type should be a kind of literal - if typing.get_origin(attr.typ) is not typing.Literal: + if attr.literal is None: warnings.warn( f"In dataset schema {schema.schema_name}:" 'Attribute "type" should be a literal!' @@ -599,7 +535,12 @@ def register_dataset_type(schema: metamodel.DatasetSchema): continue # Register type names - for typ in typing.get_args(attr.typ): + for typ in attr.literal: + assert isinstance(typ, str), ( + f"In dataset schema {schema.schema_name}:" + 'Attribute "type" should be a literal giving ' + "names of schema!" + ) _DATASET_TYPES[typ] = schema @@ -621,7 +562,6 @@ def check_datatree( # Loop through all groups in datatree issues = SchemaIssues() for xds_name in datatree.groups: - # Ignore any leaf without data node = datatree[xds_name] if not node.has_data: diff --git a/src/xradio/schema/dataclass.py b/src/xradio/schema/dataclass.py index 82275b4d..956a13db 100644 --- a/src/xradio/schema/dataclass.py +++ b/src/xradio/schema/dataclass.py @@ -88,6 +88,113 @@ def _check_invalid_dims( return valid_dims +def _attr_type( + ann: typing.Any, klass_name: str, field_name: str +) -> (str, typing.Optional[typing.List[typing.Any]]): + """ + Take attribute type annotation and convert into type name and + - optionally - a list of literal allowed values + + :param ann: Annotation + :param klass_name: Name of class where annotation origins from + :param field_name: Name of field where annotation origins from + :returns: Pair of (type_name, dict_schema, array_schema, literals + """ + + # Is a type? + if isinstance(ann, type): + # Array type? + if hasattr(ann, "__xradio_array_schema"): + return ("dataarray", None, ann.__xradio_array_schema, None) + + # Dictionary type? + if hasattr(ann, "__xradio_dict_schema"): + return ("dict", ann.__xradio_dict_schema, None, None) + + # Check that it is an allowable type + if ann not in [bool, str, int, float, bool]: + raise ValueError( + f"In '{klass_name}', field '{field_name}' has" + f" type {ann} - but only str, int, float or list are allowed!" + ) + return (ann.__name__, None, None, None) + + # Is a list + if typing.get_origin(ann) in [typing.List, list]: + args = typing.get_args(ann) + + # Must be a string list + if args != (str,): + raise ValueError( + f"In '{klass_name}', field '{field_name}' has" + f" annotation {ann}, but only str, int, float, list[str] or Literal allowed!" + ) + + return ("list[str]", None, None, None) + + # Is a literal? + if typing.get_origin(ann) is typing.Literal: + args = typing.get_args(ann) + + # Check that it is an allowable type + if len(args) == 0: + raise ValueError( + f"In '{klass_name}', field '{field_name}' has" + f" literal annotation, but allows no values!" + ) + + # String list? + typ = type(args[0]) + if typ is list: + elem_type = type(args[0][0]) + if elem_type is not str: + raise ValueError( + f"In '{klass_name}', field '{field_name}' has" + f" literal type list[{elem_type}] - but only list[str] is allowed!" + ) + for lit in args: + if not isinstance(lit, typ): + raise ValueError( + f"In '{klass_name}', field '{field_name}' literal" + f" {lit} has inconsistent type ({typ(lit)}) vs ({typ})!" + ) + for elem in lit: + if not isinstance(elem, elem_type): + raise ValueError( + f"In '{klass_name}', field '{field_name}' literal" + f" {lit} has inconsistent element type " + f"({typ(elem)}) vs ({elem_type})!" + ) + return ( + "list[str]", + None, + None, + [[str(elem) for elem in arg] for arg in args], + ) + + # Check that it is an allowable type + if typ not in [bool, str, int, float]: + raise ValueError( + f"In '{klass_name}', field '{field_name}' has" + f" literal type {typ} - but only str, int, float or list[str] are allowed!" + ) + + # Check that all literals have the same type + for lit in args: + if not isinstance(lit, typ): + raise ValueError( + f"In '{klass_name}', field '{field_name}' literal" + f" {lit} has inconsistent type ({typ(lit)}) vs ({typ})!" + ) + + return (typ.__name__, None, None, [typ(arg) for arg in args]) + + raise ValueError( + f"In '{klass_name}', field '{field_name}' has" + f" annotation {ann}, but only type or Literal allowed!" + ) + + def extract_xarray_dataclass(klass, allow_undefined_coords: bool = False): """ Go through dataclass fields and interpret them according to xarray-dataclass @@ -132,10 +239,23 @@ def check_invalid_dims(dims, field_name): # Is it an attribute? if role == Role.ATTR: + try: + ann = get_annotated(typ) + except TypeError as e: + raise ValueError( + f"Could not get annotation in '{klass.__name__}' field '{field.name}': {e}" + ) + type_name, dict_schema, array_schema, literal = _attr_type( + get_annotated(typ), klass.__name__, field.name + ) + attributes.append( AttrSchemaRef( name=field.name, - typ=get_annotated(typ), + type_name=type_name, + dict_schema=dict_schema, + array_schema=array_schema, + literal=literal, optional=is_optional(typ), default=field.default, docstring=field_docstrings.get(field.name), @@ -378,10 +498,17 @@ def xarray_dataclass_to_dict_schema(klass): else: typ = typing.Union.__getitem__[tuple(typs)] + type_name, dict_schema, array_schema, literal = _attr_type( + typ, klass.__name__, field.name + ) + attributes.append( AttrSchemaRef( name=field.name, - typ=typ, + type_name=type_name, + dict_schema=dict_schema, + array_schema=array_schema, + literal=literal, optional=optional, default=field.default, docstring=field_docstrings.get(field.name), diff --git a/src/xradio/schema/metamodel.py b/src/xradio/schema/metamodel.py index dfafefc9..5d4af6db 100644 --- a/src/xradio/schema/metamodel.py +++ b/src/xradio/schema/metamodel.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from dataclasses import dataclass import typing @@ -21,11 +23,37 @@ class AttrSchemaRef: """ name: str - """Name of attribute as given in data array / dataset.""" - typ: type """ - Python type of attribute. Note that this might again be a data - array or dataset, but we don't track that explicitly. + Name of attribute as given in data array / dataset. + + * ``bool``: A boolean + * ``str``: A UTF-8 string + * ``int``: A 64-bit signed integer + * ``float``: A double-precision floating point number + * ``str_list``: A list of strings + * ``dataarray``: An xarray dataarray (encoded using to_dict) + """ + type_name: typing.Literal[ + "bool", "str", "int", "float", "list[str]", "dict", "dataarray" + ] + """ + Dictionary schema, if it is an xarray DataArray + """ + dict_schema: typing.Optional[DictSchema] + """ + Array schema, if it is an xarray DataArray + """ + array_schema: typing.Optional[ArraySchema] + """ + Python name of type. + + * str = Unicode string + * int = 64 bit integer + * float = 64 bit floating point number (double) + """ + literal: typing.Optional[typing.List[typing.Any]] + """ + Allowed literal values, if specified. """ optional: bool """Is the attribute optional?""" From 1401e70c70d5ba70115a0c52841731bf63146473 Mon Sep 17 00:00:00 2001 From: Peter Wortmann Date: Thu, 29 May 2025 12:36:41 +0100 Subject: [PATCH 02/12] Introduce separate ValueSchema so we can tackle decorator checks This should also finally fix the test cases... --- src/xradio/schema/check.py | 85 ++++++++++++++++++++++++++------- src/xradio/schema/dataclass.py | 86 ++++++++++++++++++---------------- src/xradio/schema/metamodel.py | 53 +++++++++++---------- tests/unit/test_schema.py | 65 +++++++++---------------- 4 files changed, 164 insertions(+), 125 deletions(-) diff --git a/src/xradio/schema/check.py b/src/xradio/schema/check.py index 617e585d..89a1e93b 100644 --- a/src/xradio/schema/check.py +++ b/src/xradio/schema/check.py @@ -16,6 +16,8 @@ xarray_dataclass_to_dataset_schema, xarray_dataclass_to_dict_schema, ) +from xradio.schema.dataclass import value_schema +from xradio.schema.metamodel import AttrSchemaRef, ValueSchema @dataclasses.dataclass @@ -344,6 +346,8 @@ def check_attributes( SchemaIssue( path=[(attr_kind, attr_schema.name)], message="Non-optional attribute is missing!", + found=None, + expected=[attr_schema.type_name], ) ) continue @@ -441,36 +445,50 @@ def check_dict( return check_attributes(dct, schema.attributes, attr_kind="") -def _check_value(val: typing.Any, ann: metamodel.AttrSchemaRef): +def _check_value(val: typing.Any, schema: metamodel.ValueSchema): """ Check whether value satisfies annotation If the annotation is a data array or dataset schema, it will be checked. :param val: Value to check - :param ann: Type annotation of value + :param schema: Schema of value :returns: Schema issues """ + # Unspecified? + if schema.type_name is None: + return SchemaIssues() + + # Optional? + if schema.optional and val is None: + return SchemaIssues() + # Is supposed to be a data array? - if ann.type_name == "dataarray": + if schema.type_name == "dataarray": # Attempt to convert dictionaries automatically if isinstance(val, dict): try: val = xarray.DataArray.from_dict(val) except ValueError as e: + expected = [DataArray] + if schema.optional: + expected.append(type(None)) return SchemaIssues( [ SchemaIssue( - path=[], message=str(e), expected=[ann], found=type(val) + path=[], message=str(e), expected=expected, found=type(val) ) ] ) except TypeError as e: + expected = [DataArray] + if schema.optional: + expected.append(type(None)) return SchemaIssues( [ SchemaIssue( - path=[], message=str(e), expected=[ann], found=type(val) + path=[], message=str(e), expected=expected, found=type(val) ) ] ) @@ -479,29 +497,48 @@ def _check_value(val: typing.Any, ann: metamodel.AttrSchemaRef): # Fall through to plain type check type_to_check = xarray.DataArray else: - return check_array(val, ann.array_schema) + return check_array(val, schema.array_schema) # Is supposed to be a dictionary? - elif ann.type_name == "dict": + elif schema.type_name == "dict": if not isinstance(val, dict): # Fall through to plain type check type_to_check = dict else: - return check_dict(val, ann.dict_schema) + return check_dict(val, schema.dict_schema) - elif ann.type_name == "list[str]": + elif schema.type_name == "list[str]": type_to_check = typing.List[str] - elif ann.type_name in ["bool", "str", "int", "float"]: - type_to_check = getattr(builtins, ann.type_name) + elif schema.type_name in ["bool", "str", "int", "float"]: + type_to_check = getattr(builtins, schema.type_name) else: - return ValueError(f"Invalid typ_name in schema: {ann.type_name}") + raise ValueError(f"Invalid typ_name in schema: {schema.type_name}") # Otherwise straight type check using typeguard try: check_type(val, type_to_check) except TypeCheckError as t: + expected = [type_to_check] + if schema.optional: + expected.append(type(None)) + return SchemaIssues( + [SchemaIssue(path=[], message=str(t), expected=expected, found=type(val))] + ) + + # List of literals given? + if schema.literal is not None: + for lit in schema.literal: + if val == lit: + return SchemaIssues() return SchemaIssues( - [SchemaIssue(path=[], message=str(t), expected=[ann], found=type(val))] + [ + SchemaIssue( + path=[], + message=f"Disallowed literal value!", + expected=schema.literal, + found=val, + ) + ] ) return SchemaIssues() @@ -618,7 +655,7 @@ def schema_checked(fn, check_parameters: bool = True, check_return: bool = True) @functools.wraps(fn) def _check_fn(*args, **kwargs): # Hide this function in pytest tracebacks - __tracebackhide__ = True + # __tracebackhide__ = True # Bind parameters, collect (potential) issues bound = signature.bind(*args, **kwargs) @@ -628,7 +665,15 @@ def _check_fn(*args, **kwargs): continue # Get annotation - issues += _check_value_union(val, anns.get(arg)).at_path(arg) + vschema = value_schema(anns.get(arg), "function", arg) + pseudo_attr_schema = AttrSchemaRef( + name=arg, + **{ + fld.name: getattr(vschema, fld.name) + for fld in dataclasses.fields(ValueSchema) + }, + ) + issues += _check_value(val, pseudo_attr_schema).at_path(arg) # Any issues found? raise issues.expect() @@ -638,7 +683,15 @@ def _check_fn(*args, **kwargs): # Check return if check_return: - issues = _check_value_union(val, signature.return_annotation) + vschema = value_schema(anns.get(arg), "function", "return") + pseudo_attr_schema = AttrSchemaRef( + name="return", + **{ + fld.name: getattr(vschema, fld.name) + for fld in dataclasses.fields(ValueSchema) + }, + ) + issues = _check_value(val, pseudo_attr_schema) issues.at_path("return").expect() # Check return value diff --git a/src/xradio/schema/dataclass.py b/src/xradio/schema/dataclass.py index 956a13db..2f2949a3 100644 --- a/src/xradio/schema/dataclass.py +++ b/src/xradio/schema/dataclass.py @@ -76,7 +76,6 @@ def _check_invalid_dims( # Filter out dimension possibilities with undefined coordinates valid_dims = [ds for ds in dims if set(ds).issubset(all_coord_names)] - # print(f"{klass_name}.{field_name}", valid_dims, dims, all_coord_names) # Raise an exception if this makes the dimension set impossible if dims and not valid_dims: @@ -88,9 +87,7 @@ def _check_invalid_dims( return valid_dims -def _attr_type( - ann: typing.Any, klass_name: str, field_name: str -) -> (str, typing.Optional[typing.List[typing.Any]]): +def value_schema(ann: typing.Any, klass_name: str, field_name: str) -> "ValueSchema": """ Take attribute type annotation and convert into type name and - optionally - a list of literal allowed values @@ -98,18 +95,41 @@ def _attr_type( :param ann: Annotation :param klass_name: Name of class where annotation origins from :param field_name: Name of field where annotation origins from - :returns: Pair of (type_name, dict_schema, array_schema, literals + :returns: ValueSchema """ + # No annotation? + if ann is None: + return ValueSchema(None) + + # Optional? + if is_optional(ann): + + # Optional is actually represented as a union... Construct + # same union type without the "None" type. + typs = [typ for typ in get_args(ann) if typ is not None.__class__] + if len(typs) == 1: + typ = typs[0] + else: + raise ValueError( + f"In '{klass_name}', field '{field_name}' has" + f" a union type, which is not allowed!" + ) + + # Convert to schema recursively + vschema = value_schema(typ, klass_name, field_name) + vschema.optional = True + return vschema + # Is a type? if isinstance(ann, type): # Array type? if hasattr(ann, "__xradio_array_schema"): - return ("dataarray", None, ann.__xradio_array_schema, None) + return ValueSchema("dataarray", array_schema=ann.__xradio_array_schema) # Dictionary type? if hasattr(ann, "__xradio_dict_schema"): - return ("dict", ann.__xradio_dict_schema, None, None) + return ValueSchema("dict", dict_schema=ann.__xradio_dict_schema) # Check that it is an allowable type if ann not in [bool, str, int, float, bool]: @@ -117,7 +137,7 @@ def _attr_type( f"In '{klass_name}', field '{field_name}' has" f" type {ann} - but only str, int, float or list are allowed!" ) - return (ann.__name__, None, None, None) + return ValueSchema(ann.__name__) # Is a list if typing.get_origin(ann) in [typing.List, list]: @@ -130,7 +150,7 @@ def _attr_type( f" annotation {ann}, but only str, int, float, list[str] or Literal allowed!" ) - return ("list[str]", None, None, None) + return ValueSchema("list[str]") # Is a literal? if typing.get_origin(ann) is typing.Literal: @@ -165,11 +185,9 @@ def _attr_type( f" {lit} has inconsistent element type " f"({typ(elem)}) vs ({elem_type})!" ) - return ( + return ValueSchema( "list[str]", - None, - None, - [[str(elem) for elem in arg] for arg in args], + literal=[[str(elem) for elem in arg] for arg in args], ) # Check that it is an allowable type @@ -187,7 +205,7 @@ def _attr_type( f" {lit} has inconsistent type ({typ(lit)}) vs ({typ})!" ) - return (typ.__name__, None, None, [typ(arg) for arg in args]) + return ValueSchema(typ.__name__, literal=[typ(arg) for arg in args]) raise ValueError( f"In '{klass_name}', field '{field_name}' has" @@ -245,20 +263,19 @@ def check_invalid_dims(dims, field_name): raise ValueError( f"Could not get annotation in '{klass.__name__}' field '{field.name}': {e}" ) - type_name, dict_schema, array_schema, literal = _attr_type( - get_annotated(typ), klass.__name__, field.name - ) + vschema = value_schema(get_annotated(typ), klass.__name__, field.name) + if is_optional(typ): + vschema.optional = True attributes.append( AttrSchemaRef( name=field.name, - type_name=type_name, - dict_schema=dict_schema, - array_schema=array_schema, - literal=literal, - optional=is_optional(typ), default=field.default, docstring=field_docstrings.get(field.name), + **{ + fld.name: getattr(vschema, fld.name) + for fld in dataclasses.fields(ValueSchema) + }, ) ) continue @@ -271,7 +288,7 @@ def check_invalid_dims(dims, field_name): else: raise ValueError( f"Expected field '{field.name}' in '{klass.__name__}' " - "to be annotated with either Coord, Data or Attr!" + f"to be annotated with either Coord, Data or Attr!" ) # Defined using a dataclass, i.e. Coordof/Dataof? @@ -489,29 +506,16 @@ def xarray_dataclass_to_dict_schema(klass): for field in dataclasses.fields(klass): typ = type_hints[field.name] - # Handle optional value: Strip "None" from the types - optional = is_optional(typ) - if optional: - typs = [typ for typ in get_args(typ) if typ is not None.__class__] - if len(typs) == 1: - typ = typs[0] - else: - typ = typing.Union.__getitem__[tuple(typs)] - - type_name, dict_schema, array_schema, literal = _attr_type( - typ, klass.__name__, field.name - ) - + vschema = value_schema(typ, klass.__name__, field.name) attributes.append( AttrSchemaRef( name=field.name, - type_name=type_name, - dict_schema=dict_schema, - array_schema=array_schema, - literal=literal, - optional=optional, default=field.default, docstring=field_docstrings.get(field.name), + **{ + fld.name: getattr(vschema, fld.name) + for fld in dataclasses.fields(ValueSchema) + }, ) ) diff --git a/src/xradio/schema/metamodel.py b/src/xradio/schema/metamodel.py index 5d4af6db..dbf78eab 100644 --- a/src/xradio/schema/metamodel.py +++ b/src/xradio/schema/metamodel.py @@ -4,6 +4,7 @@ import typing __all__ = [ + "ValueSchema", "AttrSchemaRef", "ArraySchema", "ArraySchemaRef", @@ -12,19 +13,17 @@ ] -@dataclass(frozen=True) -class AttrSchemaRef: +@dataclass +class ValueSchema: """ - Schema information about an attribute as referenced from an array or - dataset schema. - - This includes the name and docstring associated with the attribute - in the array or dataset schema definition. + Schema information about a value in an attribute or dictionary. """ - name: str + type_name: typing.Literal[ + "bool", "str", "int", "float", "list[str]", "dict", "dataarray" + ] """ - Name of attribute as given in data array / dataset. + Type of value * ``bool``: A boolean * ``str``: A UTF-8 string @@ -33,33 +32,37 @@ class AttrSchemaRef: * ``str_list``: A list of strings * ``dataarray``: An xarray dataarray (encoded using to_dict) """ - type_name: typing.Literal[ - "bool", "str", "int", "float", "list[str]", "dict", "dataarray" - ] + dict_schema: typing.Optional[DictSchema] = None """ Dictionary schema, if it is an xarray DataArray """ - dict_schema: typing.Optional[DictSchema] + array_schema: typing.Optional[ArraySchema] = None """ Array schema, if it is an xarray DataArray """ - array_schema: typing.Optional[ArraySchema] + literal: typing.Optional[typing.List[typing.Any]] = None """ - Python name of type. - - * str = Unicode string - * int = 64 bit integer - * float = 64 bit floating point number (double) + Allowed literal values, if specified. """ - literal: typing.Optional[typing.List[typing.Any]] + optional: bool = False + """Is the value optional?""" + + +@dataclass +class AttrSchemaRef(ValueSchema): """ - Allowed literal values, if specified. + Schema information about an attribute as referenced from an array or + dataset schema. + + This includes the name and docstring associated with the attribute + in the array or dataset schema definition. """ - optional: bool - """Is the attribute optional?""" - default: typing.Optional[typing.Any] + + name: str = "" + """Name of attribute as given in data array / dataset.""" + default: typing.Optional[typing.Any] = None """If optional: What is the default value?""" - docstring: str + docstring: str = "" """Documentation string of attribute reference""" diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 48a0787e..71351e99 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -82,21 +82,21 @@ class _TestArraySchema: attributes=[ AttrSchemaRef( name="attr1", - typ=str, + type_name="str", optional=False, default=dataclasses.MISSING, docstring="Required attribute", ), AttrSchemaRef( name="attr2", - typ=int, + type_name="int", optional=False, default=123, docstring="Required attribute with default", ), AttrSchemaRef( name="attr3", - typ=int, + type_name="int", optional=True, default=None, docstring="Optional attribute with default", @@ -433,7 +433,7 @@ def test_check_array_wrong_type(): assert results[1].expected == [int] assert results[2].path == [("attrs", "attr3")] assert results[2].found == float - assert results[2].expected == [int] + assert results[2].expected == [int, type(None)] def test_schema_checked_wrap(): @@ -563,21 +563,21 @@ class _TestDictSchema: attributes=[ AttrSchemaRef( name="attr1", - typ=str, + type_name="str", optional=False, default=dataclasses.MISSING, docstring="Required attribute", ), AttrSchemaRef( name="attr2", - typ=int, + type_name="int", optional=False, default=123, docstring="Required attribute with default", ), AttrSchemaRef( name="attr3", - typ=int, + type_name="int", optional=True, default=None, docstring="Optional attribute with default", @@ -650,7 +650,7 @@ def test_check_dict_missing(): assert len(results) == 1 assert results[0].path == [("", "attr2")] assert results[0].found == None - assert results[0].expected == [int] + assert results[0].expected == ["int"] with pytest.raises(SchemaIssues): results.expect() @@ -761,21 +761,21 @@ def _dataclass_to_dict(obj, ignore=[]): attributes=[ AttrSchemaRef( name="attr1", - typ=str, + type_name="str", optional=False, default=dataclasses.MISSING, docstring="Required attribute", ), AttrSchemaRef( name="attr2", - typ=int, + type_name="int", optional=False, default=123, docstring="Required attribute with default", ), AttrSchemaRef( name="attr3", - typ=int, + type_name="int", optional=True, default=None, docstring="Optional attribute with default", @@ -1068,38 +1068,6 @@ def test_check_dataset_optional_coordinate(): assert not issues -def test_check_dict_dataset_attribute(): - # Make dataset - attrs = {"attr1": "str", "attr2": 123, "attr3": 345} - coords = { - "coord": xarray.DataArray( - numpy.arange(10, dtype=float), dims=("coord",), attrs=attrs - ), - } - data_vars = { - "data_var": (("coord",), numpy.zeros(10, dtype=complex), attrs), - } - dataset = xarray.Dataset(data_vars, coords, attrs) - - # Check inside dictionary - @dict_schema - class _DictSchema: - ds: _TestDatasetSchema - - assert not check_dict( - { - "ds": dataset, - }, - _DictSchema, - ) - assert check_dict( - { - "ds": xarray.Dataset(data_vars, coords), - }, - _DictSchema, - ) - - def test_check_dict_array_attribute(): # Make array data = numpy.zeros(10, dtype=complex) @@ -1128,3 +1096,14 @@ class _DictSchema: {"da": {"attr1": "asd", "attr2": 234, "attr3": 345}}, _DictSchema ) assert check_dict({"da": {"attr2": 234, "attr3": 345}}, _DictSchema) + +def test_check_dict_dict_attribute(): + # Check inside dictionary + @dict_schema + class _DictSchema: + da: _TestDictSchema + + assert not check_dict( + {"da": {"attr1": "asd", "attr2": 234, "attr3": 345}}, _DictSchema + ) + assert check_dict({"da": {"attr2": 234, "attr3": 345}}, _DictSchema) From cc4dd2ba5c16f94a4eea69717e0addf885213d28 Mon Sep 17 00:00:00 2001 From: Peter Wortmann Date: Thu, 29 May 2025 14:58:50 +0100 Subject: [PATCH 03/12] Use string representation of numpy.dtype Somewhat surprisingly, this still passes the unit tests - numpy defines a "smart" equality operator for numpy.dtype that automatically considers the string representation. --- src/xradio/schema/check.py | 5 +++-- src/xradio/schema/dataclass.py | 2 +- src/xradio/schema/metamodel.py | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/xradio/schema/check.py b/src/xradio/schema/check.py index 89a1e93b..b5617ab6 100644 --- a/src/xradio/schema/check.py +++ b/src/xradio/schema/check.py @@ -298,7 +298,8 @@ def check_dtype(dtype: numpy.dtype, expected: [numpy.dtype]) -> SchemaIssues: :returns: List of :py:class:`SchemaIssue`s found """ - for exp_dtype in expected: + for exp_dtype_str in expected: + exp_dtype = numpy.dtype(exp_dtype_str) # If the expected dtype has no size (e.g. "U", a.k.a. a string of # arbitrary length), we don't check itemsize, only kind. if ( @@ -315,7 +316,7 @@ def check_dtype(dtype: numpy.dtype, expected: [numpy.dtype]) -> SchemaIssues: SchemaIssue( path=[("dtype", None)], message="Wrong numpy dtype", - found=dtype, + found=dtype.str, expected=list(expected), ) ] diff --git a/src/xradio/schema/dataclass.py b/src/xradio/schema/dataclass.py index 2f2949a3..318157e9 100644 --- a/src/xradio/schema/dataclass.py +++ b/src/xradio/schema/dataclass.py @@ -356,7 +356,7 @@ def check_invalid_dims(dims, field_name): docstring=field_docstrings.get(field.name), schema_name=None, dimensions=check_invalid_dims(dims, field.name), - dtypes=[numpy.dtype(typ) for typ in types], + dtypes=[numpy.dtype(typ).str for typ in types], coordinates=[], attributes=[], class_docstring=None, diff --git a/src/xradio/schema/metamodel.py b/src/xradio/schema/metamodel.py index dbf78eab..626ebe66 100644 --- a/src/xradio/schema/metamodel.py +++ b/src/xradio/schema/metamodel.py @@ -80,8 +80,9 @@ class ArraySchema: """(Class) name of the schema""" dimensions: typing.List[typing.List[str]] """List of possible dimensions""" - dtypes: typing.List[typing.List["numpy.dtype"]] - """List of possible (numpy) types""" + dtypes: typing.List[typing.List[str]] + """List of possible (numpy) types as array interface protocol + descriptors (e.g. `">f4"`)""" coordinates: typing.List["ArraySchemaRef"] """Coordinates data arrays giving values to dimensions""" From 13fb99ee94c196eb670c1f22d4353839da4929a7 Mon Sep 17 00:00:00 2001 From: Peter Wortmann Date: Thu, 29 May 2025 15:00:37 +0100 Subject: [PATCH 04/12] Rename type_name to type --- src/xradio/schema/check.py | 16 ++++++++-------- src/xradio/schema/metamodel.py | 4 ++-- tests/unit/test_schema.py | 19 ++++++++++--------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/xradio/schema/check.py b/src/xradio/schema/check.py index b5617ab6..82631b4a 100644 --- a/src/xradio/schema/check.py +++ b/src/xradio/schema/check.py @@ -348,7 +348,7 @@ def check_attributes( path=[(attr_kind, attr_schema.name)], message="Non-optional attribute is missing!", found=None, - expected=[attr_schema.type_name], + expected=[attr_schema.type], ) ) continue @@ -458,7 +458,7 @@ def _check_value(val: typing.Any, schema: metamodel.ValueSchema): """ # Unspecified? - if schema.type_name is None: + if schema.type is None: return SchemaIssues() # Optional? @@ -466,7 +466,7 @@ def _check_value(val: typing.Any, schema: metamodel.ValueSchema): return SchemaIssues() # Is supposed to be a data array? - if schema.type_name == "dataarray": + if schema.type == "dataarray": # Attempt to convert dictionaries automatically if isinstance(val, dict): try: @@ -501,19 +501,19 @@ def _check_value(val: typing.Any, schema: metamodel.ValueSchema): return check_array(val, schema.array_schema) # Is supposed to be a dictionary? - elif schema.type_name == "dict": + elif schema.type == "dict": if not isinstance(val, dict): # Fall through to plain type check type_to_check = dict else: return check_dict(val, schema.dict_schema) - elif schema.type_name == "list[str]": + elif schema.type == "list[str]": type_to_check = typing.List[str] - elif schema.type_name in ["bool", "str", "int", "float"]: - type_to_check = getattr(builtins, schema.type_name) + elif schema.type in ["bool", "str", "int", "float"]: + type_to_check = getattr(builtins, schema.type) else: - raise ValueError(f"Invalid typ_name in schema: {schema.type_name}") + raise ValueError(f"Invalid typ_name in schema: {schema.type}") # Otherwise straight type check using typeguard try: diff --git a/src/xradio/schema/metamodel.py b/src/xradio/schema/metamodel.py index 626ebe66..5e275f9a 100644 --- a/src/xradio/schema/metamodel.py +++ b/src/xradio/schema/metamodel.py @@ -1,6 +1,6 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, MISSING import typing __all__ = [ @@ -19,7 +19,7 @@ class ValueSchema: Schema information about a value in an attribute or dictionary. """ - type_name: typing.Literal[ + type: typing.Literal[ "bool", "str", "int", "float", "list[str]", "dict", "dataarray" ] """ diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 71351e99..5cd47978 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -82,21 +82,21 @@ class _TestArraySchema: attributes=[ AttrSchemaRef( name="attr1", - type_name="str", + type="str", optional=False, default=dataclasses.MISSING, docstring="Required attribute", ), AttrSchemaRef( name="attr2", - type_name="int", + type="int", optional=False, default=123, docstring="Required attribute with default", ), AttrSchemaRef( name="attr3", - type_name="int", + type="int", optional=True, default=None, docstring="Optional attribute with default", @@ -563,21 +563,21 @@ class _TestDictSchema: attributes=[ AttrSchemaRef( name="attr1", - type_name="str", + type="str", optional=False, default=dataclasses.MISSING, docstring="Required attribute", ), AttrSchemaRef( name="attr2", - type_name="int", + type="int", optional=False, default=123, docstring="Required attribute with default", ), AttrSchemaRef( name="attr3", - type_name="int", + type="int", optional=True, default=None, docstring="Optional attribute with default", @@ -761,21 +761,21 @@ def _dataclass_to_dict(obj, ignore=[]): attributes=[ AttrSchemaRef( name="attr1", - type_name="str", + type="str", optional=False, default=dataclasses.MISSING, docstring="Required attribute", ), AttrSchemaRef( name="attr2", - type_name="int", + type="int", optional=False, default=123, docstring="Required attribute with default", ), AttrSchemaRef( name="attr3", - type_name="int", + type="int", optional=True, default=None, docstring="Optional attribute with default", @@ -1097,6 +1097,7 @@ class _DictSchema: ) assert check_dict({"da": {"attr2": 234, "attr3": 345}}, _DictSchema) + def test_check_dict_dict_attribute(): # Check inside dictionary @dict_schema From a36f70dab39cbcea23055f5f834fc6ffe9d5bb2d Mon Sep 17 00:00:00 2001 From: Peter Wortmann Date: Thu, 26 Jun 2025 15:09:00 +0100 Subject: [PATCH 05/12] Add proof-of-concept of schema export Including re-import, as requested by Simon --- Makefile | 7 +++ src/xradio/schema/export.py | 99 ++++++++++++++++++++++++++++++++++ src/xradio/schema/metamodel.py | 2 +- 3 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 src/xradio/schema/export.py diff --git a/Makefile b/Makefile index 08a1f630..5b36a94a 100644 --- a/Makefile +++ b/Makefile @@ -2,3 +2,10 @@ # Format Python code using black python-format: black --config pyproject.toml src/ tests/ docs/source/ + + +# Export JSON schemas +schema-export: + @for schema in VisibilityXds SpectrumXds; do \ + PYTHONPATH=src python scripts/export_schema.py $$schema schemas/$$schema.json; \ + done diff --git a/src/xradio/schema/export.py b/src/xradio/schema/export.py new file mode 100644 index 00000000..536b3238 --- /dev/null +++ b/src/xradio/schema/export.py @@ -0,0 +1,99 @@ +import dataclasses +import json + +from xradio.schema import ( + bases, + metamodel, + xarray_dataclass_to_array_schema, + xarray_dataclass_to_dataset_schema, + xarray_dataclass_to_dict_schema, +) + +CLASS_ATTR = "$class" + + +class DataclassEncoder(json.JSONEncoder): + """ + General-purpose encoder that represents data classes as + dictionaries, omitting defaults and annotating the original class + as a ``'$class'`` attribute. + """ + + def default(self, o): + if dataclasses.is_dataclass(o): + res = {CLASS_ATTR: o.__class__.__name__} + for fld in dataclasses.fields(type(o)): + if ( + getattr(o, fld.name) is not fld.default + and getattr(o, fld.name) is not dataclasses.MISSING + ): + res[fld.name] = getattr(o, fld.name) + return res + return super().default(o) + + +DATACLASS_MAP = { + cls.__name__: cls + for cls in [ + metamodel.DictSchema, + metamodel.ValueSchema, + metamodel.AttrSchemaRef, + metamodel.ArraySchema, + metamodel.ArraySchemaRef, + metamodel.DatasetSchema, + ] +} + + +class DataclassDecoder(json.JSONDecoder): + """ + General-purpose decoder that reads JSON as generated by + :py:class:`DataclassEncoder`. + """ + + def __init__(self, dataclass_map, *args, **kwargs): + self._dataclass_map = dataclass_map + super().__init__(*args, object_hook=self.object_hook, **kwargs) + + def object_hook(self, obj): + + # Detect dictionaries with '$class' annotation + if isinstance(obj, dict) and CLASS_ATTR in obj: + + # Identify the class + cls_name = obj[CLASS_ATTR] + cls = self._dataclass_map.get(cls_name) + if not cls: + raise ValueError( + f"Unknown $dataclass encountered while decoding JSON: {cls_name}" + ) + + # Instantiate + del obj["$dataclass"] + obj = cls(**obj) + + return obj + + +def export_schema_json_file(schema, fname): + """ + Exports given schema as a JSON file + """ + + # Check that this is actually a Dataset + if bases.is_dataset_schema(schema): + schema = xarray_dataclass_to_dataset_schema(schema) + if not isinstance(schema, metamodel.DatasetSchema): + raise TypeError( + f"export_schema_json_file: Expected DatasetSchema, but got {type(schema)}!" + ) + + # Perform export + with open(fname, "w", encoding="utf8") as f: + json.dump(schema, f, cls=DataclassEncoder, ensure_ascii=False, indent=" ") + + +def import_schema_json_file(fname): + + with open(fname, "r", encoding="utf8") as f: + return json.load(f, cls=DataclassDecoder, dataclass_map=DATACLASS_MAP) diff --git a/src/xradio/schema/metamodel.py b/src/xradio/schema/metamodel.py index 5e275f9a..0af283ed 100644 --- a/src/xradio/schema/metamodel.py +++ b/src/xradio/schema/metamodel.py @@ -129,7 +129,7 @@ class ArraySchemaRef(ArraySchema): """Name of array schema as given in dataset.""" optional: bool """Is the data array optional?""" - default: typing.Optional[typing.Any] + default: typing.Optional[typing.Any] = None """If optional: What is the default value?""" docstring: typing.Optional[str] = None """Documentation string of array reference""" From a0ce42bf1cfe1fe31082d8b98f8f5a771beb0f8f Mon Sep 17 00:00:00 2001 From: Peter Wortmann Date: Thu, 26 Jun 2025 15:09:45 +0100 Subject: [PATCH 06/12] Add schemas as exported by exporter --- schemas/SpectrumXds.json | 3503 ++++++++++++++++++++++++++++++++ schemas/VisibilityXds.json | 3848 ++++++++++++++++++++++++++++++++++++ 2 files changed, 7351 insertions(+) create mode 100644 schemas/SpectrumXds.json create mode 100644 schemas/VisibilityXds.json diff --git a/schemas/SpectrumXds.json b/schemas/SpectrumXds.json new file mode 100644 index 00000000..5e6147ab --- /dev/null +++ b/schemas/SpectrumXds.json @@ -0,0 +1,3503 @@ +{ + "$class": "DatasetSchema", + "schema_name": "xradio.measurement_set.schema.SpectrumXds", + "dimensions": [ + [ + "time", + "antenna_name", + "frequency", + "polarization" + ] + ], + "coordinates": [ + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.TimeCoordArray", + "dimensions": [ + [ + "time" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": false, + "docstring": "Labels for polarization types, e.g. ``['XX','XY','YX','YY']``, ``['RR','RL','LR','LL']``." + }, + { + "$class": "ArraySchemaRef", + "schema_name": null, + "dimensions": [ + [ + "time" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": false + } + ], + "attributes": [ + { + "$class": "AttrSchemaRef", + "type": "str", + "optional": true, + "name": "long_name", + "default": "Spectrum values", + "docstring": "Long-form name to use for axis. Should be ``\"Spectrum values\"``" + }, + { + "$class": "AttrSchemaRef", + "type": "list[str]", + "name": "units", + "default": [ + "Jy" + ], + "docstring": null + } + ], + "class_docstring": "Definition of xr.DataArray for SPECTRUM data (single dish)", + "data_docstring": null, + "name": "SPECTRUM", + "optional": false, + "docstring": "Single dish data, either simulated or measured by an antenna." + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.FlagArray", + "dimensions": [ + [ + "time", + "antenna_name", + "frequency", + "polarization" + ] + ], + "dtypes": [ + "|b1" + ], + "coordinates": [ + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.TimeCoordArray", + "dimensions": [ + [ + "time" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": true + } + ], + "attributes": [ + { + "$class": "AttrSchemaRef", + "type": "str", + "optional": true, + "name": "long_name", + "default": "Visibility flags", + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "bool", + "optional": true, + "name": "allow_mutiple_versions", + "default": true, + "docstring": null + } + ], + "class_docstring": "An array of Boolean values with the same shape as `VISIBILITY`,\nrepresenting the cumulative flags applying to this data matrix. Data are\nflagged bad if the ``FLAG`` array element is ``True``.", + "data_docstring": null, + "name": "FLAG", + "optional": false + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.WeightArray", + "dimensions": [ + [ + "time", + "antenna_name", + "frequency", + "polarization" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": true + } + ], + "attributes": [ + { + "$class": "AttrSchemaRef", + "type": "str", + "optional": true, + "name": "long_name", + "default": "Visibility weights", + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "bool", + "optional": true, + "name": "allow_mutiple_versions", + "default": true, + "docstring": null + } + ], + "class_docstring": "The weight for each channel, with the same shape as the associated\n:py:class:`VisibilityArray`, as assigned by the correlator or processor.\n\nWeight spectrum in ms v2 is renamed weight. Should be calculated as\n1/sigma^2 (sigma rms noise).", + "data_docstring": "Visibility weights", + "name": "WEIGHT", + "optional": false + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.QuantityInSecondsArray", + "dimensions": [ + [ + "time", + "antenna_name" + ], + [ + "time", + "antenna_name", + "frequency", + "polarization" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": true + } + ], + "attributes": [ + { + "$class": "AttrSchemaRef", + "type": "str", + "optional": true, + "name": "long_name", + "default": "Frequency sampling data", + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "list[str]", + "literal": [ + [ + "Hz" + ] + ], + "name": "units", + "default": [ + "Hz" + ], + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "REST", + "BARY", + "TOPO", + "gcrs", + "icrs", + "hcrs", + "lsrk", + "lsrd", + "lsr" + ], + "name": "observer", + "default": "icrs", + "docstring": "Astropy velocity reference frames (see :external:ref:`astropy-spectralcoord`).\nNote that Astropy does not use the name\n'topo' (telescope centric) velocity frame, rather it assumes if no velocity\nframe is given that this is the default." + } + ], + "class_docstring": "Model of frequency related data variables of the main dataset, such as EFFECTIV_CHANNEL_WIDTH.", + "data_docstring": "Data about frequency sampling, such as centroid or integration\ntime. Concrete function depends on concrete data array within\n:py:class:`VisibilityXds` or :py:class:`SpectrumXds`.", + "name": "EFFECTIVE_CHANNEL_WIDTH", + "optional": true, + "docstring": "The channel bandwidth that includes the effects of missing data." + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.FrequencyCentroidArray", + "dimensions": [ + [ + "frequency" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": false, + "docstring": "Labels for polarization types, e.g. ``['XX','XY','YX','YY']``, ``['RR','RL','LR','LL']``." + }, + { + "$class": "ArraySchemaRef", + "schema_name": null, + "dimensions": [ + [ + "time" + ] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": false + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.FrequencyArray", + "dimensions": [ + [ + "frequency" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": true + } + ], + "attributes": [ + { + "$class": "AttrSchemaRef", + "type": "str", + "optional": true, + "name": "long_name", + "default": "Visibility flags", + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "bool", + "optional": true, + "name": "allow_mutiple_versions", + "default": true, + "docstring": null + } + ], + "class_docstring": "An array of Boolean values with the same shape as `VISIBILITY`,\nrepresenting the cumulative flags applying to this data matrix. Data are\nflagged bad if the ``FLAG`` array element is ``True``.", + "data_docstring": null, + "name": "FLAG", + "optional": false + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.WeightArray", + "dimensions": [ + [ + "time", + "baseline_id", + "frequency", + "polarization" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": true + } + ], + "attributes": [ + { + "$class": "AttrSchemaRef", + "type": "str", + "optional": true, + "name": "long_name", + "default": "Visibility weights", + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "bool", + "optional": true, + "name": "allow_mutiple_versions", + "default": true, + "docstring": null + } + ], + "class_docstring": "The weight for each channel, with the same shape as the associated\n:py:class:`VisibilityArray`, as assigned by the correlator or processor.\n\nWeight spectrum in ms v2 is renamed weight. Should be calculated as\n1/sigma^2 (sigma rms noise).", + "data_docstring": "Visibility weights", + "name": "WEIGHT", + "optional": false + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.UvwArray", + "dimensions": [ + [ + "time", + "baseline_id", + "uvw_label" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": true + } + ], + "attributes": [ + { + "$class": "AttrSchemaRef", + "type": "str", + "optional": true, + "name": "long_name", + "default": "Frequency sampling data", + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "list[str]", + "literal": [ + [ + "Hz" + ] + ], + "name": "units", + "default": [ + "Hz" + ], + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "REST", + "BARY", + "TOPO", + "gcrs", + "icrs", + "hcrs", + "lsrk", + "lsrd", + "lsr" + ], + "name": "observer", + "default": "icrs", + "docstring": "Astropy velocity reference frames (see :external:ref:`astropy-spectralcoord`).\nNote that Astropy does not use the name\n'topo' (telescope centric) velocity frame, rather it assumes if no velocity\nframe is given that this is the default." + } + ], + "class_docstring": "Model of frequency related data variables of the main dataset, such as EFFECTIV_CHANNEL_WIDTH.", + "data_docstring": "Data about frequency sampling, such as centroid or integration\ntime. Concrete function depends on concrete data array within\n:py:class:`VisibilityXds` or :py:class:`SpectrumXds`.", + "name": "EFFECTIVE_CHANNEL_WIDTH", + "optional": true, + "docstring": "The channel bandwidth that includes the effects of missing data." + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.FrequencyCentroidArray", + "dimensions": [ + [ + "frequency" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + " Date: Thu, 26 Jun 2025 15:57:10 +0100 Subject: [PATCH 07/12] Add schema exporter script --- scripts/export_schema.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 scripts/export_schema.py diff --git a/scripts/export_schema.py b/scripts/export_schema.py new file mode 100644 index 00000000..ec481006 --- /dev/null +++ b/scripts/export_schema.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +import sys +import importlib + +from xradio.schema.export import export_schema_json_file + +SCHEMA_MAP = { + 'VisibilityXds': ('xradio.measurement_set.schema', 'VisibilityXds'), + 'SpectrumXds': ('xradio.measurement_set.schema', 'SpectrumXds') +} + +# Enough arguments? +if len(sys.argv) < 3 or sys.argv[1] not in SCHEMA_MAP: + print('Usage:') + print(' $ python export_schema.py [schema name] [file name]') + print() + print('Available schemas:', ', '.join(SCHEMA_MAP.keys())) + exit(1) + +# Import schema +mod_name, class_name = SCHEMA_MAP[sys.argv[1]] +mod = importlib.import_module(mod_name) +cls = getattr(mod, class_name) + +# Perform export +export_schema_json_file(cls, sys.argv[2]) From ed45ee34952949bf7a9dcb9a93a64fd7620d9695 Mon Sep 17 00:00:00 2001 From: Peter Wortmann Date: Thu, 26 Jun 2025 16:07:09 +0100 Subject: [PATCH 08/12] Reblackify --- Makefile | 2 +- scripts/export_schema.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 5b36a94a..47e365b6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Format Python code using black python-format: - black --config pyproject.toml src/ tests/ docs/source/ + black --config pyproject.toml src/ tests/ docs/source/ scripts/ # Export JSON schemas diff --git a/scripts/export_schema.py b/scripts/export_schema.py index ec481006..837c472b 100644 --- a/scripts/export_schema.py +++ b/scripts/export_schema.py @@ -6,16 +6,16 @@ from xradio.schema.export import export_schema_json_file SCHEMA_MAP = { - 'VisibilityXds': ('xradio.measurement_set.schema', 'VisibilityXds'), - 'SpectrumXds': ('xradio.measurement_set.schema', 'SpectrumXds') + "VisibilityXds": ("xradio.measurement_set.schema", "VisibilityXds"), + "SpectrumXds": ("xradio.measurement_set.schema", "SpectrumXds"), } # Enough arguments? if len(sys.argv) < 3 or sys.argv[1] not in SCHEMA_MAP: - print('Usage:') - print(' $ python export_schema.py [schema name] [file name]') + print("Usage:") + print(" $ python export_schema.py [schema name] [file name]") print() - print('Available schemas:', ', '.join(SCHEMA_MAP.keys())) + print("Available schemas:", ", ".join(SCHEMA_MAP.keys())) exit(1) # Import schema From c34ad33d58020fa738358c73ffd7c61d28e20041 Mon Sep 17 00:00:00 2001 From: Peter Wortmann Date: Thu, 26 Jun 2025 17:01:24 +0100 Subject: [PATCH 09/12] Add tests, fix some small issues We were actually still generating numpy dtypes (they just converted to strings automatically), and dimensions were using tuples. There's now a test that checks that schemas don't change on the JSON round-trip. --- src/xradio/schema/dataclass.py | 18 ++- src/xradio/schema/export.py | 2 +- src/xradio/schema/metamodel.py | 5 +- src/xradio/schema/typing.py | 10 +- tests/unit/test_schema.py | 209 +++++++++++++++++++++++++++++---- 5 files changed, 207 insertions(+), 37 deletions(-) diff --git a/src/xradio/schema/dataclass.py b/src/xradio/schema/dataclass.py index 318157e9..1ce418be 100644 --- a/src/xradio/schema/dataclass.py +++ b/src/xradio/schema/dataclass.py @@ -270,7 +270,9 @@ def check_invalid_dims(dims, field_name): attributes.append( AttrSchemaRef( name=field.name, - default=field.default, + default=( + None if field.default is dataclasses.MISSING else field.default + ), docstring=field_docstrings.get(field.name), **{ fld.name: getattr(vschema, fld.name) @@ -310,7 +312,7 @@ def check_invalid_dims(dims, field_name): schema_ref = ArraySchemaRef( name=field.name, optional=is_optional(typ), - default=field.default, + default=None if field.default is dataclasses.MISSING else field.default, docstring=field_docstrings.get(field.name), **arr_schema_fields, ) @@ -343,7 +345,9 @@ def check_invalid_dims(dims, field_name): schema_ref = ArraySchemaRef( name=field.name, optional=is_optional(typ), - default=field.default, + default=( + None if field.default is dataclasses.MISSING else field.default + ), docstring=field_docstrings.get(field.name), **arr_schema_fields, ) @@ -352,7 +356,9 @@ def check_invalid_dims(dims, field_name): schema_ref = ArraySchemaRef( name=field.name, optional=is_optional(typ), - default=field.default, + default=( + None if field.default is dataclasses.MISSING else field.default + ), docstring=field_docstrings.get(field.name), schema_name=None, dimensions=check_invalid_dims(dims, field.name), @@ -418,7 +424,7 @@ def xarray_dataclass_to_array_schema(klass): schema = ArraySchema( schema_name=f"{klass.__module__}.{klass.__qualname__}", dimensions=data_vars[0].dimensions, - dtypes=data_vars[0].dtypes, + dtypes=[numpy.dtype(dt).str for dt in data_vars[0].dtypes], coordinates=coordinates, attributes=attributes, class_docstring=inspect.cleandoc(klass.__doc__), @@ -510,7 +516,7 @@ def xarray_dataclass_to_dict_schema(klass): attributes.append( AttrSchemaRef( name=field.name, - default=field.default, + default=None if field.default is dataclasses.MISSING else field.default, docstring=field_docstrings.get(field.name), **{ fld.name: getattr(vschema, fld.name) diff --git a/src/xradio/schema/export.py b/src/xradio/schema/export.py index 536b3238..9b4828c9 100644 --- a/src/xradio/schema/export.py +++ b/src/xradio/schema/export.py @@ -69,7 +69,7 @@ def object_hook(self, obj): ) # Instantiate - del obj["$dataclass"] + del obj[CLASS_ATTR] obj = cls(**obj) return obj diff --git a/src/xradio/schema/metamodel.py b/src/xradio/schema/metamodel.py index 0af283ed..ad78aee3 100644 --- a/src/xradio/schema/metamodel.py +++ b/src/xradio/schema/metamodel.py @@ -29,8 +29,9 @@ class ValueSchema: * ``str``: A UTF-8 string * ``int``: A 64-bit signed integer * ``float``: A double-precision floating point number - * ``str_list``: A list of strings - * ``dataarray``: An xarray dataarray (encoded using to_dict) + * ``list[str]``: A list of strings + * ``dict``: Dictionary + * ``dataarray``: An xarray dataarray (encoded using ``to_dict``) """ dict_schema: typing.Optional[DictSchema] = None """ diff --git a/src/xradio/schema/typing.py b/src/xradio/schema/typing.py index 28b1465d..2bbe1186 100644 --- a/src/xradio/schema/typing.py +++ b/src/xradio/schema/typing.py @@ -312,26 +312,26 @@ def get_dims(tp: Any) -> List[Dims]: dims_out = [] for dim in dims_in: - args = get_args(dim) + args = list(get_args(dim)) origin = get_origin(dim) # One-dimensional dimension if origin is Literal: - dims_out.append((str(args[0]),)) + dims_out.append([str(args[0])]) continue if not (origin is tuple or origin is Tuple): raise TypeError(f"Could not find any dims in {tp!r}.") # Zero-dimensions - if args == () or args == ((),): - dims_out.append(()) + if args == [] or args == [()]: + dims_out.append([]) continue if not all(get_origin(arg) is Literal for arg in args): raise TypeError(f"Could not find any dims in {tp!r}.") - dims_out.append(tuple(str(get_args(arg)[0]) for arg in args)) + dims_out.append([str(get_args(arg)[0]) for arg in args]) return dims_out diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 5cd47978..387e25e9 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -5,6 +5,7 @@ import dask.array import pytest import inspect +import json from xradio.schema.typing import Attr, Coord, Coordof, Data, Dataof, Name from xradio.schema.metamodel import ( @@ -31,6 +32,7 @@ xarray_dataset_schema, dict_schema, ) +from xradio.schema.export import export_schema_json_file, import_schema_json_file Dim1 = Literal["coord"] Dim2 = Literal["coord2"] @@ -60,23 +62,23 @@ class _TestArraySchema: # The equivalent of the above in the meta-model TEST_ARRAY_SCHEMA = ArraySchema( schema_name=__name__ + "._TestArraySchema", - dimensions=[("coord",)], + dimensions=[["coord"]], coordinates=[ ArraySchemaRef( schema_name=None, name="coord", - dtypes=[numpy.dtype(float)], - dimensions=[("coord",)], + dtypes=[numpy.dtype(float).str], + dimensions=[["coord"]], coordinates=[], attributes=[], class_docstring=None, data_docstring=None, optional=False, - default=dataclasses.MISSING, + default=None, docstring="Docstring of coordinate", ), ], - dtypes=[numpy.dtype(complex)], + dtypes=[numpy.dtype(complex).str], class_docstring="Docstring of array schema\n\nMultiple lines!", data_docstring="Docstring of data", attributes=[ @@ -84,7 +86,7 @@ class _TestArraySchema: name="attr1", type="str", optional=False, - default=dataclasses.MISSING, + default=None, docstring="Required attribute", ), AttrSchemaRef( @@ -362,7 +364,7 @@ def test_check_array_extra_coord(): assert len(results) == 1 assert results[0].path == [("dims", None)] assert results[0].found == ["coord", "coord2"] - assert results[0].expected == [("coord",)] + assert results[0].expected == [["coord"]] def test_check_array_missing_coord(): @@ -372,7 +374,7 @@ def test_check_array_missing_coord(): assert len(results) == 2 assert results[0].path == [("dims", None)] assert results[0].found == [] - assert results[0].expected == [("coord",)] + assert results[0].expected == [["coord"]] assert results[1].path == [("coords", "coord")] @@ -388,7 +390,7 @@ def test_check_array_wrong_coord(): assert results[0].found == [ "coord2", ] - assert results[0].expected == [("coord",)] + assert results[0].expected == [["coord"]] assert results[1].path == [("coords", "coord")] @@ -565,7 +567,7 @@ class _TestDictSchema: name="attr1", type="str", optional=False, - default=dataclasses.MISSING, + default=None, docstring="Required attribute", ), AttrSchemaRef( @@ -712,10 +714,10 @@ def _dataclass_to_dict(obj, ignore=[]): ArraySchemaRef( schema_name=__name__ + "._TestDatasetSchemaCoord", name="coord", - dtypes=[numpy.dtype(float)], - dimensions=[("coord",)], + dtypes=[numpy.dtype(float).str], + dimensions=[["coord"]], optional=False, - default=dataclasses.MISSING, + default=None, docstring="Docstring of coordinate", coordinates=[], attributes=_dataclass_to_dict(TEST_ARRAY_SCHEMA)["attributes"], @@ -725,14 +727,14 @@ def _dataclass_to_dict(obj, ignore=[]): ArraySchemaRef( schema_name=None, name="coord2", - dtypes=[numpy.dtype(int)], - dimensions=[("coord2",)], + dtypes=[numpy.dtype(int).str], + dimensions=[["coord2"]], coordinates=[], attributes=[], class_docstring=None, data_docstring=None, optional=True, - default=dataclasses.MISSING, + default=None, docstring="Docstring of second coordinate", ), ], @@ -740,21 +742,21 @@ def _dataclass_to_dict(obj, ignore=[]): ArraySchemaRef( name="data_var", optional=False, - default=dataclasses.MISSING, + default=None, docstring="Docstring of external data variable", **_dataclass_to_dict(TEST_ARRAY_SCHEMA), ), ArraySchemaRef( schema_name=None, name="data_var_simple", - dtypes=[numpy.dtype(numpy.float32)], - dimensions=[("coord2",)], + dtypes=[numpy.dtype(numpy.float32).str], + dimensions=[["coord2"]], coordinates=[], attributes=[], class_docstring=None, data_docstring=None, optional=True, - default=dataclasses.MISSING, + default=None, docstring="Docstring of simple optional data variable", ), ], @@ -763,7 +765,7 @@ def _dataclass_to_dict(obj, ignore=[]): name="attr1", type="str", optional=False, - default=dataclasses.MISSING, + default=None, docstring="Required attribute", ), AttrSchemaRef( @@ -995,7 +997,7 @@ def test_check_dataset_dtype_mismatch(): assert issues[0].expected == [numpy.dtype(int)] assert issues[0].found == numpy.dtype(float) assert issues[1].path == [("data_vars", "data_var_simple"), ("dtype", None)] - assert issues[1].expected == [numpy.float32] + assert issues[1].expected == [numpy.dtype(numpy.float32).str] assert issues[1].found == numpy.dtype(float) @@ -1015,7 +1017,11 @@ def test_check_dataset_wrong_dim(): issues = check_dataset(dataset, TEST_DATASET_SCHEMA) assert len(issues) == 1 assert issues[0].path == [("data_vars", "data_var_simple"), ("dims", None)] - assert issues[0].expected == [("coord2",)] + assert issues[0].expected == [ + [ + "coord2", + ] + ] assert issues[0].found == ["coord"] @@ -1108,3 +1114,160 @@ class _DictSchema: {"da": {"attr1": "asd", "attr2": 234, "attr3": 345}}, _DictSchema ) assert check_dict({"da": {"attr2": 234, "attr3": 345}}, _DictSchema) + + +TEST_DATASET_SCHEMA_JSON = { + "$class": "DatasetSchema", + "schema_name": "tests.unit.test_schema._TestDatasetSchema", + "dimensions": [["coord"], ["coord", "coord2"]], + "coordinates": [ + { + "$class": "ArraySchemaRef", + "schema_name": "tests.unit.test_schema._TestDatasetSchemaCoord", + "dimensions": [["coord"]], + "dtypes": [" Date: Fri, 27 Jun 2025 15:27:24 +0100 Subject: [PATCH 10/12] Make test cases pass Not sure about all of these... --- src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py | 2 ++ src/xradio/measurement_set/schema.py | 4 ++-- tests/unit/image/test_image.py | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py b/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py index 53af07c4..d753941e 100644 --- a/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +++ b/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py @@ -386,6 +386,8 @@ def create_weather_xds(in_file: str, ant_position_with_ids: xr.DataArray): for data_var in weather_xds: if weather_xds.data_vars[data_var].dtype != np.float64: weather_xds[data_var] = weather_xds[data_var].astype(np.float64) + if "time_weather" in weather_xds.coords: + weather_xds.coords["time_weather"].attrs["type"] = "time_weather" return weather_xds diff --git a/src/xradio/measurement_set/schema.py b/src/xradio/measurement_set/schema.py index 068fa484..58cbf5b0 100644 --- a/src/xradio/measurement_set/schema.py +++ b/src/xradio/measurement_set/schema.py @@ -88,9 +88,9 @@ ] # name consistent with casacore measures UnitsSeconds = Literal[["s"]] UnitsHertz = Literal[["Hz"]] -UnitsMeters = Literal[["m"]] +UnitsMeters = Literal[["m"], ["m", "m", "m"]] -UnitsOfSkyCoordInRadians = Literal[["rad", "rad"]] +UnitsOfSkyCoordInRadians = Literal[["rad", "rad"], ["rad", "rad", "m"]] UnitsOfLocationInMetersOrRadians = Literal[["m", "m", "m"], ["rad", "rad", "m"]] UnitsOfPositionInRadians = Literal[["rad", "rad", "m"]] UnitsOfDopplerShift = Literal[["ratio"], ["m/s"]] diff --git a/tests/unit/image/test_image.py b/tests/unit/image/test_image.py index 8e99728a..e89c566e 100644 --- a/tests/unit/image/test_image.py +++ b/tests/unit/image/test_image.py @@ -108,7 +108,9 @@ def dict_equality(self, dict1, dict2, dict1_name, dict2_name, exclude_keys=[]): ) elif isinstance(one[0], numbers.Number): self.assertTrue( - np.isclose(np.array(one), np.array(two)).all(), + np.isclose( + np.array(one), np.array(two), rtol=1e-3, atol=1e-7 + ).all(), f"{dict1_name}[{k}] != {dict2_name}[{k}], " f"{one} != {two}", ) From 676dab15815cbbe02956ea857a0fc610f38e173c Mon Sep 17 00:00:00 2001 From: Peter Wortmann Date: Fri, 27 Jun 2025 15:49:12 +0100 Subject: [PATCH 11/12] Add test to confirm that exported schemas are synchronised --- schemas/VisibilityXds.json | 5 +++++ tests/unit/schema/test_export.py | 34 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 tests/unit/schema/test_export.py diff --git a/schemas/VisibilityXds.json b/schemas/VisibilityXds.json index 5179dbab..de8a4a6e 100644 --- a/schemas/VisibilityXds.json +++ b/schemas/VisibilityXds.json @@ -2259,6 +2259,11 @@ "literal": [ [ "m" + ], + [ + "m", + "m", + "m" ] ], "name": "units", diff --git a/tests/unit/schema/test_export.py b/tests/unit/schema/test_export.py new file mode 100644 index 00000000..a6d47f1e --- /dev/null +++ b/tests/unit/schema/test_export.py @@ -0,0 +1,34 @@ +import json +import pytest +import pathlib + +from xradio.measurement_set.schema import VisibilityXds, SpectrumXds +from xradio.schema.export import export_schema_json_file, import_schema_json_file + + +@pytest.mark.parametrize("schema", [VisibilityXds, SpectrumXds]) +def test_schema_export_in_synch(tmp_path, schema): + """ + Checks whether JSON schemas in the repository tree match + the Python definitions. + """ + + # Export schema + schema_fname = f"{schema.__name__}.json" + export_schema_json_file(schema, tmp_path / schema_fname) + with open(tmp_path / schema_fname, "r", encoding="utf8") as f: + python_schema_json = json.load(f) + + # Load existing schema + repository_root = pathlib.Path(__file__).parent.parent.parent.parent + assert ( + repository_root / "schemas" + ).is_dir(), "Schema directory doesn't exist in expected location" + with open(repository_root / "schemas" / schema_fname, "r", encoding="utf8") as f: + repo_schema_json = json.load(f) + + # Check that schemas are synchronised + assert python_schema_json == repo_schema_json, ( + "Exported schemas not consistent with Python definitions! " + "Run 'make schema-export' from repository root!" + ) From a06a6cd13c5cff9fad130ef55b4184ca6ece03a4 Mon Sep 17 00:00:00 2001 From: Peter Wortmann Date: Thu, 7 Aug 2025 15:05:10 +0100 Subject: [PATCH 12/12] Add image schema Very rough indeed, test cases fail, and likely incompatible with JW's newer changes. Notable changes in the infrastructure: * Add float arrays as a primitive type * Move dataclasses to be kw_only so that we can use subclassing (this requires Python >=3.10, but that isn't a problem at this point). --- src/xradio/image/schema.py | 282 ++++++++++++++++++ .../_utils/_msv2/msv4_sub_xdss.py | 2 +- src/xradio/measurement_set/schema.py | 8 +- src/xradio/schema/bases.py | 6 +- src/xradio/schema/dataclass.py | 10 +- tests/unit/image/test_image.py | 15 + 6 files changed, 312 insertions(+), 11 deletions(-) create mode 100644 src/xradio/image/schema.py diff --git a/src/xradio/image/schema.py b/src/xradio/image/schema.py new file mode 100644 index 00000000..eb18c5ee --- /dev/null +++ b/src/xradio/image/schema.py @@ -0,0 +1,282 @@ +from __future__ import annotations + +from typing import Literal, Optional, Union +from xradio.schema.bases import ( + xarray_dataset_schema, + xarray_dataarray_schema, + dict_schema, +) +from xradio.schema.typing import Attr, Coord, Coordof, Data, Dataof, Name +from xradio.measurement_set.schema import ( + ZD, + Time, + TimeCoordArray, + Frequency, + FrequencyArray, + SkyCoordArray, + UnitsDimensionless, + Polarization, + PolarizationArray, + SkyDirLabel, + SkyPosLabel, + QuantityInRadiansArray, + QuantityInMetersArray +) +import numpy +import dataclasses + + +# https://docs.google.com/spreadsheets/d/1WW0Gl6z85cJVPgtdgW4dxucurHFa06OKGjgoK8OREFA + +IMAGE_SCHEMA_VERSION = "4.0.-9999" + +LCoord = Literal["l"] +MCoord = Literal["m"] +UCoord = Literal["u"] +VCoord = Literal["v"] +LMLabelIn = Literal["lm_in"] +LMLabelOut = Literal["lm_out"] +BeamParam = Literal["beam_param"] +UnitsJansky = Literal["Jy"] + +@xarray_dataarray_schema +class CosineArray: + """ + Directional cosine coordinate. + """ + + data: Data[ZD, numpy.float64] + + units: Attr[UnitsDimensionless] = "" + +@xarray_dataarray_schema +class ApertureCoordArray: + """ + Directional cosine coordinate. + """ + + data: Data[ZD, numpy.float64] + + units: Attr[UnitsDimensionless] = "" + + # Likely not right, should be in lambda (wavelengths?) + reference_value: Attr[QuantityInMetersArray] + """ + World reference value. Note crpix purposefully omitted because + crpix cannot be reliably updated when selecting regions/subimages + using standard xarray selection methods. Use a world2pix function + if crpix is required in other computations. + """ + +# TODO: Jy/beam? rad?! +UnitsImage = Literal[["Jy/beam"], ["Jy/pixel"], ["rad"], [""]] + +@xarray_dataarray_schema +class ImageArray: + """ + Astronomical image - mapping of sky coordinates to intensities + """ + + data: Data[ + Union[ + tuple[Time, Frequency, Polarization, LCoord, MCoord] + ], + Union[numpy.float32, numpy.float64, numpy.complex64, numpy.complex128] + ] + + image_type: Optional[Attr[str]] = None + """type of image. eg, 'Intensity', 'spix', "mask", "beam" etc, can be blank""" + + active_mask: Optional[Attr[str]] = None + """ Default mask that should be used by processing applications + (would be interesting to allow an expresion here for eg boolean + operations on multiple masks, eg ANDing two masks together, but + that's probably best left to individual package implementations) """ + + units: Attr[UnitsImage] = ("Jy",) + + +@xarray_dataarray_schema +class ApertureArray: + """ + Aperture image - mapping of u/v coordinates to intensities + """ + + data: Data[ + Union[ + tuple[Time, Frequency, Polarization, UCoord, VCoord] + ], + Union[numpy.complex64, numpy.complex128] + ] + + # "attributes are generally the same as for sky images" + + image_type: Optional[Attr[str]] = None + """type of image. eg, 'Intensity', 'spix', "mask", "beam" etc, can be blank""" + + active_mask: Optional[Attr[str]] = None + """ Default mask that should be used by processing applications + (would be interesting to allow an expresion here for eg boolean + operations on multiple masks, eg ANDing two masks together, but + that's probably best left to individual package implementations) """ + + units: Attr[UnitsImage] = ("Jy",) + +@xarray_dataarray_schema +class LinearTransformArray: + """ + Matrix describing linear transform + Directional cosine coordinate. + """ + + data: Data[tuple[LMLabelOut, LMLabelIn], numpy.float64] + + lm_in: Coord[LMLabelIn, str] = ('l', 'm') + lm_out: Coord[LMLabelIn, str] = ("l'", "m'") + + units: Attr[UnitsImage] = ("",) + +AllowedProjections = Literal[ + "AZP", "TAN", "SIN", "STG", "ARC", + "ZPN", "ZEA", "AIR", "CYP", "CAR", "MER", "CEA", "COP", "COD", + "COE", "COO", "BON", "PCO", "SFL", "PAR", "AIT", "MOL", "CSC", + "QSC", "TSC", "SZP", "HPX" +] + +@xarray_dataarray_schema +class BeamParamArray: + """ + Coordinate axis to make up ``("major", "minor", "pa")`` tuple + """ + + data: Data[BeamParam, str] = ("major", "minor", "pa") + """Should be ``('major','minor','pa')``""" + long_name: Optional[Attr[str]] = "Beam parameter label" + """ Long-form name to use for axis. Should be ``"Beam parameter label"``""" + +@dict_schema +class DirectionDict: + latpole: QuantityInRadiansArray + """Latitude of pole for reference frame, in radians""" + lonpol: QuantityInRadiansArray + """Longitude of pole for reference frame, in radians""" + projection: AllowedProjections + """Direction coordinate projection, eg SIN""" + reference: SkyCoordArray + """Reference world coordinate for direction (essentially crval + + unit). Note that crpix is purposefully excluded; see notes for l + and m above)""" + pc: LinearTransformArray = ((1.0, 0.0), (0.0, 1.0)) + """Matrix describing linear transform""" + projection_parameters: list[float] = (0.0, 0.0) + """ Array describing projection, number of elements depends on the projection""" + +@dataclasses.dataclass(frozen=True) +class BaseImageXds: + + # --- Coordinates --- + + time: Coordof[TimeCoordArray] + """ + Normally one or a small number of planes. If unity, value + should be the same as in the image coordinate system obsdate + """ + + frequency: Coordof[FrequencyArray] + """ + frequency -> chan mapping + """ + + polarization: Coordof[PolarizationArray] + + beam_param: Coordof[BeamParamArray] + + # --- Attributes --- + + reference_frequency: Attr[FrequencyArray] + """ TODO: Document """ + + # Note that this would *not* actually share the Frequency + # dimension. Okay, as it is an attribute? + rest_frequencies: Attr[FrequencyArray] + """ List of relevant rest frequencies. At a minimum will include frequency.rest_frequency""" + # Not actually an array, just a naked quantity + rest_frequency: Attr[FrequencyArray] + """ Frequency used for velocity conversion. Must be in the frequency.rest_frequencies list. """ + + single_beam: Optional[Attr[bool]] + """ + Indicates if there is a single, global beam, i.e. + ``BEAM`` is the same for all time steps, frequencies and + polarisations. + """ + + # TODO: History? + + # --- Data variables --- + + IMAGE_CENTER: Optional[Data[tuple[Time], SkyCoordArray]] + """ + Pointing center information + + Identifies the on-sky direction of the center of the image + """ + + velocity: Optional[Coord[Frequency, numpy.float64]] + """ velocity, optional, allows for direct chan -> velocity or freq -> velocity mapping """ + + sky_dir_label: Optional[Coord[SkyDirLabel, str]] = None + """ Coordinate labels of sky directions (typically shape 2 and 'ra', 'dec') """ + + sky_pos_label: Optional[Coord[SkyPosLabel, str]] = None + """ Coordinate lables of sky positions (typically shape 3 and 'ra', 'dec', 'dist') """ + + +@xarray_dataset_schema +class AstroImageXds(BaseImageXds): + + l: Coord[LCoord, CosineArray] + """l direction cosine. Increases into direction of right ascension (RA) axis in image centre, but is not a longitude.""" + m: Coord[MCoord, CosineArray] + """m direction cosine. Increases into direction of declination (Dec) axis in image centre, but is not a latitude.""" + + # --- Data variables --- + + SKY: Optional[Dataof[ImageArray]] + SKY_MODEL: Optional[Dataof[ImageArray]] + PSF: Optional[Dataof[ImageArray]] + RESIDUAL: Optional[Dataof[ImageArray]] + + MASK: Optional[Dataof[ImageArray]] + """ + Image mask. Use names like ``MASK_[name]`` if there are multiple + masks. + + Expected to use mask convention inverse of CASA6, True=good, + False=bad. + + Synthesized beam data vars have coordinates time, + polarization, frequency, and beam_param. In the case of a single, + global beam, the values are repeated for each (time, polarization, + frequency) tuple. """ + + BEAM: Optional[Data[tuple[Time, Polarization, Frequency, BeamParam], float]] + """ + Synthesized beam parameters (minor and major axis as well as + position angle). + + In the case of a single, global beam, the values are repeated + for each (time, polarization, frequency) tuple. + """ + +@xarray_dataset_schema +class ApertureImageXds(BaseImageXds): + + u: Coord[UCoord, ApertureCoordArray] + """For Fourier images, u coordinate""" + v: Coord[VCoord, ApertureCoordArray] + """For Fourier images, v coordinate""" + + # --- Data variables --- + + APERTURE: Dataof[ApertureArray] diff --git a/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py b/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py index 6cc99eeb..e00ef1dd 100644 --- a/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +++ b/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py @@ -835,7 +835,7 @@ def make_data_variable(raw_name: str, dim_names: list[str]) -> xr.DataArray: } data_vars["COORDINATE_AXES"].attrs = { "type": "rotation_matrix", - "units": ["dimensionless", "dimensionless", "dimensionless"], + "units": ["", "", ""], } # Remove the "frame" attribute if it exists, because ELEMENT_OFFSET is # defined in a station-local frame for which no standard name exists diff --git a/src/xradio/measurement_set/schema.py b/src/xradio/measurement_set/schema.py index 579bac0f..805e278b 100644 --- a/src/xradio/measurement_set/schema.py +++ b/src/xradio/measurement_set/schema.py @@ -84,8 +84,8 @@ # Units of quantities and measures UnitsDimensionless = Literal[ - ["dimensionless"] -] # name consistent with casacore measures + [""] +] # name consistent astropy UnitsSeconds = Literal[["s"]] UnitsHertz = Literal[["Hz"]] UnitsMeters = Literal[["m"], ["m", "m", "m"]] @@ -1319,7 +1319,7 @@ class SpectrumArray: long_name: Optional[Attr[str]] = "Spectrum values" """ Long-form name to use for axis. Should be ``"Spectrum values"``""" - units: Attr[List[str]] = ("Jy",) + units: Attr[list[str]] = ("Jy",) @xarray_dataarray_schema @@ -1338,7 +1338,7 @@ class VisibilityArray: long_name: Optional[Attr[str]] = "Visibility values" """ Long-form name to use for axis. Should be ``"Visibility values"``""" - units: Attr[List[str]] = ("Jy",) + units: Attr[list[str]] = ("Jy",) allow_mutiple_versions: Optional[Attr[bool]] = True diff --git a/src/xradio/schema/bases.py b/src/xradio/schema/bases.py index b68acddd..b79c2d02 100644 --- a/src/xradio/schema/bases.py +++ b/src/xradio/schema/bases.py @@ -239,7 +239,7 @@ class TestArray: """ # Make into a dataclass (might not even be needed at some point?) - cls = dataclasses.dataclass(cls, init=True, repr=False, eq=False, frozen=True) + cls = dataclasses.dataclass(cls, init=True, repr=False, eq=False, frozen=True, kw_only=True) # Make schema cls.__xradio_array_schema = dataclass.xarray_dataclass_to_array_schema(cls) @@ -394,7 +394,7 @@ class constructor will be overwritten to generate schema-confirming """ # Make into a dataclass (might not even be needed at some point?) - cls = dataclasses.dataclass(cls, init=True, repr=False, eq=False, frozen=True) + cls = dataclasses.dataclass(cls, init=True, repr=False, eq=False, frozen=True, kw_only=True) # Make schema schema = dataclass.xarray_dataclass_to_dataset_schema(cls) @@ -453,7 +453,7 @@ def dict_schema(cls): """ # Make into a dataclass (might not even be needed at some point?) - cls = dataclasses.dataclass(cls, init=True, repr=False, eq=False, frozen=True) + cls = dataclasses.dataclass(cls, init=True, repr=False, eq=False, frozen=True, kw_only=True) # Make schema cls.__xradio_dict_schema = dataclass.xarray_dataclass_to_dict_schema(cls) diff --git a/src/xradio/schema/dataclass.py b/src/xradio/schema/dataclass.py index 1ce418be..0f44aa20 100644 --- a/src/xradio/schema/dataclass.py +++ b/src/xradio/schema/dataclass.py @@ -143,14 +143,17 @@ def value_schema(ann: typing.Any, klass_name: str, field_name: str) -> "ValueSch if typing.get_origin(ann) in [typing.List, list]: args = typing.get_args(ann) - # Must be a string list - if args != (str,): + # Must be a string or flota list + if args != (str,) and args != (float,): raise ValueError( f"In '{klass_name}', field '{field_name}' has" f" annotation {ann}, but only str, int, float, list[str] or Literal allowed!" ) - return ValueSchema("list[str]") + if args[0] == str: + return ValueSchema("list[str]") + else: + return ValueSchema("list[float]") # Is a literal? if typing.get_origin(ann) is typing.Literal: @@ -238,6 +241,7 @@ def check_invalid_dims(dims, field_name): for field in dataclasses.fields(klass) if get_role(type_hints[field.name]) == Role.COORD } + print(klass, all_coord_names) def check_invalid_dims(dims, field_name): return _check_invalid_dims( diff --git a/tests/unit/image/test_image.py b/tests/unit/image/test_image.py index e89c566e..9ec7aa9b 100644 --- a/tests/unit/image/test_image.py +++ b/tests/unit/image/test_image.py @@ -34,6 +34,7 @@ from xradio.image._util._casacore.common import _open_image_ro as open_image_ro from xradio.image._util.common import _image_type as image_type from xradio.image._util._casacore.common import _object_name +from xradio.image.schema import AstroImageXds, ApertureImageXds from xradio.image._util._casacore.common import ( _open_image_ro as open_image_ro, @@ -41,6 +42,8 @@ ) from toolviper.dask.client import local_client +from xradio.schema.check import check_dataset + sky = "SKY" @@ -2064,6 +2067,10 @@ def test_attrs(self): for skel in [self.skel_im(), self.skel_im_no_sky()]: self.run_attrs_tests(skel) + def test_schema(self): + for skel in [self.skel_im(), self.skel_im_no_sky()]: + check_dataset(skel, AstroImageXds).expect() + class make_empty_aperture_image_tests(make_empty_image_tests): """Test making skeleton image""" @@ -2117,6 +2124,10 @@ def test_attrs(self): skel = self.skel_im() self.run_attrs_tests(skel) + def test_schema(self): + for skel in [self.skel_im()]: + check_dataset(skel, ApertureImageXds).expect() + class make_empty_lmuv_image_tests(make_empty_image_tests): """Tests making image with l, m, u, v coordinates""" @@ -2214,6 +2225,10 @@ def test_attrs(self): skel = self.skel_im() self.run_attrs_tests(skel) + def test_schema(self): + for skel in [self.skel_im()]: + check_dataset(skel, ApertureImageXds).expect() + class write_image_test(xds_from_image_test):