diff --git a/Makefile b/Makefile index 08a1f630..47e365b6 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,11 @@ # Format Python code using black python-format: - black --config pyproject.toml src/ tests/ docs/source/ + black --config pyproject.toml src/ tests/ docs/source/ scripts/ + + +# Export JSON schemas +schema-export: + @for schema in VisibilityXds SpectrumXds; do \ + PYTHONPATH=src python scripts/export_schema.py $$schema schemas/$$schema.json; \ + done diff --git a/schemas/SpectrumXds.json b/schemas/SpectrumXds.json new file mode 100644 index 00000000..5e6147ab --- /dev/null +++ b/schemas/SpectrumXds.json @@ -0,0 +1,3503 @@ +{ + "$class": "DatasetSchema", + "schema_name": "xradio.measurement_set.schema.SpectrumXds", + "dimensions": [ + [ + "time", + "antenna_name", + "frequency", + "polarization" + ] + ], + "coordinates": [ + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.TimeCoordArray", + "dimensions": [ + [ + "time" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": false, + "docstring": "Labels for polarization types, e.g. ``['XX','XY','YX','YY']``, ``['RR','RL','LR','LL']``." + }, + { + "$class": "ArraySchemaRef", + "schema_name": null, + "dimensions": [ + [ + "time" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": false + } + ], + "attributes": [ + { + "$class": "AttrSchemaRef", + "type": "str", + "optional": true, + "name": "long_name", + "default": "Spectrum values", + "docstring": "Long-form name to use for axis. Should be ``\"Spectrum values\"``" + }, + { + "$class": "AttrSchemaRef", + "type": "list[str]", + "name": "units", + "default": [ + "Jy" + ], + "docstring": null + } + ], + "class_docstring": "Definition of xr.DataArray for SPECTRUM data (single dish)", + "data_docstring": null, + "name": "SPECTRUM", + "optional": false, + "docstring": "Single dish data, either simulated or measured by an antenna." + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.FlagArray", + "dimensions": [ + [ + "time", + "antenna_name", + "frequency", + "polarization" + ] + ], + "dtypes": [ + "|b1" + ], + "coordinates": [ + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.TimeCoordArray", + "dimensions": [ + [ + "time" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": true + } + ], + "attributes": [ + { + "$class": "AttrSchemaRef", + "type": "str", + "optional": true, + "name": "long_name", + "default": "Visibility flags", + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "bool", + "optional": true, + "name": "allow_mutiple_versions", + "default": true, + "docstring": null + } + ], + "class_docstring": "An array of Boolean values with the same shape as `VISIBILITY`,\nrepresenting the cumulative flags applying to this data matrix. Data are\nflagged bad if the ``FLAG`` array element is ``True``.", + "data_docstring": null, + "name": "FLAG", + "optional": false + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.WeightArray", + "dimensions": [ + [ + "time", + "antenna_name", + "frequency", + "polarization" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": true + } + ], + "attributes": [ + { + "$class": "AttrSchemaRef", + "type": "str", + "optional": true, + "name": "long_name", + "default": "Visibility weights", + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "bool", + "optional": true, + "name": "allow_mutiple_versions", + "default": true, + "docstring": null + } + ], + "class_docstring": "The weight for each channel, with the same shape as the associated\n:py:class:`VisibilityArray`, as assigned by the correlator or processor.\n\nWeight spectrum in ms v2 is renamed weight. Should be calculated as\n1/sigma^2 (sigma rms noise).", + "data_docstring": "Visibility weights", + "name": "WEIGHT", + "optional": false + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.QuantityInSecondsArray", + "dimensions": [ + [ + "time", + "antenna_name" + ], + [ + "time", + "antenna_name", + "frequency", + "polarization" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": true + } + ], + "attributes": [ + { + "$class": "AttrSchemaRef", + "type": "str", + "optional": true, + "name": "long_name", + "default": "Frequency sampling data", + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "list[str]", + "literal": [ + [ + "Hz" + ] + ], + "name": "units", + "default": [ + "Hz" + ], + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "REST", + "BARY", + "TOPO", + "gcrs", + "icrs", + "hcrs", + "lsrk", + "lsrd", + "lsr" + ], + "name": "observer", + "default": "icrs", + "docstring": "Astropy velocity reference frames (see :external:ref:`astropy-spectralcoord`).\nNote that Astropy does not use the name\n'topo' (telescope centric) velocity frame, rather it assumes if no velocity\nframe is given that this is the default." + } + ], + "class_docstring": "Model of frequency related data variables of the main dataset, such as EFFECTIV_CHANNEL_WIDTH.", + "data_docstring": "Data about frequency sampling, such as centroid or integration\ntime. Concrete function depends on concrete data array within\n:py:class:`VisibilityXds` or :py:class:`SpectrumXds`.", + "name": "EFFECTIVE_CHANNEL_WIDTH", + "optional": true, + "docstring": "The channel bandwidth that includes the effects of missing data." + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.FrequencyCentroidArray", + "dimensions": [ + [ + "frequency" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": false, + "docstring": "Labels for polarization types, e.g. ``['XX','XY','YX','YY']``, ``['RR','RL','LR','LL']``." + }, + { + "$class": "ArraySchemaRef", + "schema_name": null, + "dimensions": [ + [ + "time" + ] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": false + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.FrequencyArray", + "dimensions": [ + [ + "frequency" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": true + } + ], + "attributes": [ + { + "$class": "AttrSchemaRef", + "type": "str", + "optional": true, + "name": "long_name", + "default": "Visibility flags", + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "bool", + "optional": true, + "name": "allow_mutiple_versions", + "default": true, + "docstring": null + } + ], + "class_docstring": "An array of Boolean values with the same shape as `VISIBILITY`,\nrepresenting the cumulative flags applying to this data matrix. Data are\nflagged bad if the ``FLAG`` array element is ``True``.", + "data_docstring": null, + "name": "FLAG", + "optional": false + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.WeightArray", + "dimensions": [ + [ + "time", + "baseline_id", + "frequency", + "polarization" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": true + } + ], + "attributes": [ + { + "$class": "AttrSchemaRef", + "type": "str", + "optional": true, + "name": "long_name", + "default": "Visibility weights", + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "bool", + "optional": true, + "name": "allow_mutiple_versions", + "default": true, + "docstring": null + } + ], + "class_docstring": "The weight for each channel, with the same shape as the associated\n:py:class:`VisibilityArray`, as assigned by the correlator or processor.\n\nWeight spectrum in ms v2 is renamed weight. Should be calculated as\n1/sigma^2 (sigma rms noise).", + "data_docstring": "Visibility weights", + "name": "WEIGHT", + "optional": false + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.UvwArray", + "dimensions": [ + [ + "time", + "baseline_id", + "uvw_label" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + "`_:\n\n* ``I``, ``Q``, ``U``, ``V`` (standard stokes parameters)\n* ``RR``, ``RL``, ``LR``, ``LL`` (circular correlation products)\n* ``XX``, ``XY``, ``YX``, ``YY`` (linear correlation products)\n* ``RX``, ``RY``, ``LX``, ``LY``, ``XR``, ``XL``, ``YR``, ``YL`` (mixed correlation products)\n* ``PP``, ``PQ``, ``QP``, ``QQ`` (general quasi-orthogonal correlation products)\n* ``RCircular``, ``LCircular``, ``Linear`` (single dish polarization types)\n* ``Ptotal`` (polarized intensity: ``sqrt(Q²+U²+V²)``)\n* ``Plinear`` (linearly polarized intensity: ``sqrt(Q²+U²)``)\n* ``PFtotal`` (polarization fraction: ``Ptotal/I``)\n* ``PFlinear`` (linear polarization fraction: ``Plinear/I``)\n* ``Pangle`` (linear polarization angle: ``0.5 arctan(U/Q)`` in radians)", + "data_docstring": "Polarization names. ", + "name": "polarization", + "optional": true + } + ], + "attributes": [ + { + "$class": "AttrSchemaRef", + "type": "str", + "optional": true, + "name": "long_name", + "default": "Frequency sampling data", + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "list[str]", + "literal": [ + [ + "Hz" + ] + ], + "name": "units", + "default": [ + "Hz" + ], + "docstring": null + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "REST", + "BARY", + "TOPO", + "gcrs", + "icrs", + "hcrs", + "lsrk", + "lsrd", + "lsr" + ], + "name": "observer", + "default": "icrs", + "docstring": "Astropy velocity reference frames (see :external:ref:`astropy-spectralcoord`).\nNote that Astropy does not use the name\n'topo' (telescope centric) velocity frame, rather it assumes if no velocity\nframe is given that this is the default." + } + ], + "class_docstring": "Model of frequency related data variables of the main dataset, such as EFFECTIV_CHANNEL_WIDTH.", + "data_docstring": "Data about frequency sampling, such as centroid or integration\ntime. Concrete function depends on concrete data array within\n:py:class:`VisibilityXds` or :py:class:`SpectrumXds`.", + "name": "EFFECTIVE_CHANNEL_WIDTH", + "optional": true, + "docstring": "The channel bandwidth that includes the effects of missing data." + }, + { + "$class": "ArraySchemaRef", + "schema_name": "xradio.measurement_set.schema.FrequencyCentroidArray", + "dimensions": [ + [ + "frequency" + ] + ], + "dtypes": [ + "gcrs, LSRK=>lsrk, LSRD=>lsrd" + }, + { + "$class": "AttrSchemaRef", + "type": "str", + "literal": [ + "spectral_coord" + ], + "name": "type", + "default": "spectral_coord", + "docstring": null + } + ], + "class_docstring": "Measures array for data variables and attributes that are spectral coordinates.", + "data_docstring": null + }, + "name": "reference_frequency", + "docstring": "A frequency representative of the spectral window, usually the sky\nfrequency corresponding to the DC edge of the baseband. Used by the calibration\nsystem if a fixed scaling frequency is required or in algorithms to identify the\nobserving band. " + }, + { + "$class": "AttrSchemaRef", + "type": "dataarray", + "array_schema": { + "$class": "ArraySchema", + "schema_name": "xradio.measurement_set.schema.QuantityInHertzArray", + "dimensions": [ + [] + ], + "dtypes": [ + " chan mapping + """ + + polarization: Coordof[PolarizationArray] + + beam_param: Coordof[BeamParamArray] + + # --- Attributes --- + + reference_frequency: Attr[FrequencyArray] + """ TODO: Document """ + + # Note that this would *not* actually share the Frequency + # dimension. Okay, as it is an attribute? + rest_frequencies: Attr[FrequencyArray] + """ List of relevant rest frequencies. At a minimum will include frequency.rest_frequency""" + # Not actually an array, just a naked quantity + rest_frequency: Attr[FrequencyArray] + """ Frequency used for velocity conversion. Must be in the frequency.rest_frequencies list. """ + + single_beam: Optional[Attr[bool]] + """ + Indicates if there is a single, global beam, i.e. + ``BEAM`` is the same for all time steps, frequencies and + polarisations. + """ + + # TODO: History? + + # --- Data variables --- + + IMAGE_CENTER: Optional[Data[tuple[Time], SkyCoordArray]] + """ + Pointing center information + + Identifies the on-sky direction of the center of the image + """ + + velocity: Optional[Coord[Frequency, numpy.float64]] + """ velocity, optional, allows for direct chan -> velocity or freq -> velocity mapping """ + + sky_dir_label: Optional[Coord[SkyDirLabel, str]] = None + """ Coordinate labels of sky directions (typically shape 2 and 'ra', 'dec') """ + + sky_pos_label: Optional[Coord[SkyPosLabel, str]] = None + """ Coordinate lables of sky positions (typically shape 3 and 'ra', 'dec', 'dist') """ + + +@xarray_dataset_schema +class AstroImageXds(BaseImageXds): + + l: Coord[LCoord, CosineArray] + """l direction cosine. Increases into direction of right ascension (RA) axis in image centre, but is not a longitude.""" + m: Coord[MCoord, CosineArray] + """m direction cosine. Increases into direction of declination (Dec) axis in image centre, but is not a latitude.""" + + # --- Data variables --- + + SKY: Optional[Dataof[ImageArray]] + SKY_MODEL: Optional[Dataof[ImageArray]] + PSF: Optional[Dataof[ImageArray]] + RESIDUAL: Optional[Dataof[ImageArray]] + + MASK: Optional[Dataof[ImageArray]] + """ + Image mask. Use names like ``MASK_[name]`` if there are multiple + masks. + + Expected to use mask convention inverse of CASA6, True=good, + False=bad. + + Synthesized beam data vars have coordinates time, + polarization, frequency, and beam_param. In the case of a single, + global beam, the values are repeated for each (time, polarization, + frequency) tuple. """ + + BEAM: Optional[Data[tuple[Time, Polarization, Frequency, BeamParam], float]] + """ + Synthesized beam parameters (minor and major axis as well as + position angle). + + In the case of a single, global beam, the values are repeated + for each (time, polarization, frequency) tuple. + """ + +@xarray_dataset_schema +class ApertureImageXds(BaseImageXds): + + u: Coord[UCoord, ApertureCoordArray] + """For Fourier images, u coordinate""" + v: Coord[VCoord, ApertureCoordArray] + """For Fourier images, v coordinate""" + + # --- Data variables --- + + APERTURE: Dataof[ApertureArray] diff --git a/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py b/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py index 837f23dd..e00ef1dd 100644 --- a/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py +++ b/src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py @@ -386,6 +386,8 @@ def create_weather_xds(in_file: str, ant_position_with_ids: xr.DataArray): for data_var in weather_xds: if weather_xds.data_vars[data_var].dtype != np.float64: weather_xds[data_var] = weather_xds[data_var].astype(np.float64) + if "time_weather" in weather_xds.coords: + weather_xds.coords["time_weather"].attrs["type"] = "time_weather" return weather_xds @@ -833,7 +835,7 @@ def make_data_variable(raw_name: str, dim_names: list[str]) -> xr.DataArray: } data_vars["COORDINATE_AXES"].attrs = { "type": "rotation_matrix", - "units": ["dimensionless", "dimensionless", "dimensionless"], + "units": ["", "", ""], } # Remove the "frame" attribute if it exists, because ELEMENT_OFFSET is # defined in a station-local frame for which no standard name exists diff --git a/src/xradio/measurement_set/schema.py b/src/xradio/measurement_set/schema.py index 364b196d..805e278b 100644 --- a/src/xradio/measurement_set/schema.py +++ b/src/xradio/measurement_set/schema.py @@ -83,27 +83,24 @@ RotationMatrix = Literal["rotation_matrix"] # Units of quantities and measures -UnitsDimensionless = list[ - Literal["dimensionless"] -] # name consistent with casacore measures -UnitsSeconds = list[Literal["s"]] -UnitsHertz = list[Literal["Hz"]] -UnitsMeters = list[Literal["m"]] - -UnitsOfSkyCoordInRadians = list[Literal["rad"], Literal["rad"]] -UnitsOfLocationInMetersOrRadians = Union[ - list[Literal["m"], Literal["m"], Literal["m"]], - list[Literal["rad"], Literal["rad"], Literal["m"]], -] -UnitsOfPositionInRadians = list[Literal["rad"], Literal["rad"], Literal["m"]] -UnitsOfDopplerShift = Union[list[Literal["ratio"]], list[Literal["m/s"]]] - -UnitsRadians = list[Literal["rad"]] -UnitsKelvin = list[Literal["K"]] -UnitsKelvinPerJansky = list[Literal["K/Jy"]] -UnitsMetersPerSecond = list[Literal["m/s"]] -UnitsPascal = list[Literal["Pa"]] # hPa? (in MSv2) -UnitsPerSquareMeters = list[Literal["/m^2"]] +UnitsDimensionless = Literal[ + [""] +] # name consistent astropy +UnitsSeconds = Literal[["s"]] +UnitsHertz = Literal[["Hz"]] +UnitsMeters = Literal[["m"], ["m", "m", "m"]] + +UnitsOfSkyCoordInRadians = Literal[["rad", "rad"], ["rad", "rad", "m"]] +UnitsOfLocationInMetersOrRadians = Literal[["m", "m", "m"], ["rad", "rad", "m"]] +UnitsOfPositionInRadians = Literal[["rad", "rad", "m"]] +UnitsOfDopplerShift = Literal[["ratio"], ["m/s"]] + +UnitsRadians = Literal[["rad"]] +UnitsKelvin = Literal[["K"]] +UnitsKelvinPerJansky = Literal[["K/Jy"]] +UnitsMetersPerSecond = Literal[["m/s"]] +UnitsPascal = Literal[["Pa"]] # hPa? (in MSv2) +UnitsPerSquareMeters = Literal[["/m^2"]] # Quantities @@ -504,7 +501,7 @@ class TimeWeatherCoordArray: ``format``). """ - type: Attr[Time] = "time_weather" + type: Attr[TimeWeather] = "time_weather" """ Coordinate type. Should be ``"time_weather"``. """ units: Attr[UnitsSeconds] = ("s",) @@ -1379,7 +1376,7 @@ class VisibilityArray: @dict_schema class ObservationInfoDict: - observer: list + observer: list[str] """List of observer names.""" project: str """Project Code/Project_UID""" @@ -2231,7 +2228,7 @@ class PhasedArrayElementOffsetArray: float, ] - units: Attr[list[Literal["m"]]] + units: Attr[Literal[["m"]]] type: Attr[Location] """ Measure type. Should be ``"location"``.""" diff --git a/src/xradio/schema/bases.py b/src/xradio/schema/bases.py index b68acddd..b79c2d02 100644 --- a/src/xradio/schema/bases.py +++ b/src/xradio/schema/bases.py @@ -239,7 +239,7 @@ class TestArray: """ # Make into a dataclass (might not even be needed at some point?) - cls = dataclasses.dataclass(cls, init=True, repr=False, eq=False, frozen=True) + cls = dataclasses.dataclass(cls, init=True, repr=False, eq=False, frozen=True, kw_only=True) # Make schema cls.__xradio_array_schema = dataclass.xarray_dataclass_to_array_schema(cls) @@ -394,7 +394,7 @@ class constructor will be overwritten to generate schema-confirming """ # Make into a dataclass (might not even be needed at some point?) - cls = dataclasses.dataclass(cls, init=True, repr=False, eq=False, frozen=True) + cls = dataclasses.dataclass(cls, init=True, repr=False, eq=False, frozen=True, kw_only=True) # Make schema schema = dataclass.xarray_dataclass_to_dataset_schema(cls) @@ -453,7 +453,7 @@ def dict_schema(cls): """ # Make into a dataclass (might not even be needed at some point?) - cls = dataclasses.dataclass(cls, init=True, repr=False, eq=False, frozen=True) + cls = dataclasses.dataclass(cls, init=True, repr=False, eq=False, frozen=True, kw_only=True) # Make schema cls.__xradio_dict_schema = dataclass.xarray_dataclass_to_dict_schema(cls) diff --git a/src/xradio/schema/check.py b/src/xradio/schema/check.py index 92cf2457..062ccebb 100644 --- a/src/xradio/schema/check.py +++ b/src/xradio/schema/check.py @@ -1,3 +1,4 @@ +import builtins import dataclasses import typing import inspect @@ -15,6 +16,8 @@ xarray_dataclass_to_dataset_schema, xarray_dataclass_to_dict_schema, ) +from xradio.schema.dataclass import value_schema +from xradio.schema.metamodel import AttrSchemaRef, ValueSchema @dataclasses.dataclass @@ -295,7 +298,8 @@ def check_dtype(dtype: numpy.dtype, expected: [numpy.dtype]) -> SchemaIssues: :returns: List of :py:class:`SchemaIssue`s found """ - for exp_dtype in expected: + for exp_dtype_str in expected: + exp_dtype = numpy.dtype(exp_dtype_str) # If the expected dtype has no size (e.g. "U", a.k.a. a string of # arbitrary length), we don't check itemsize, only kind. if ( @@ -312,7 +316,7 @@ def check_dtype(dtype: numpy.dtype, expected: [numpy.dtype]) -> SchemaIssues: SchemaIssue( path=[("dtype", None)], message="Wrong numpy dtype", - found=dtype, + found=dtype.str, expected=list(expected), ) ] @@ -334,30 +338,23 @@ def check_attributes( issues = SchemaIssues() for attr_schema in attrs_schema: - # Attribute missing? Note that a value of "None" is equivalent for the - # purpose of the check + # Attribute missing is equivalent to a value of "None" is + # equivalent for the purpose of the check val = attrs.get(attr_schema.name) if val is None: if not attr_schema.optional: - # Get options - if typing.get_origin(attr_schema.typ) is typing.Union: - options = typing.get_args(attr_schema.typ) - else: - options = [attr_schema.typ] - issues.add( SchemaIssue( path=[(attr_kind, attr_schema.name)], - message=f"Required attribute {attr_schema.name} is missing!", - expected=options, + message="Non-optional attribute is missing!", + found=None, + expected=[attr_schema.type], ) ) continue - # Check attribute value - issues += _check_value_union(val, attr_schema.typ).at_path( - attr_kind, attr_schema.name - ) + # Check actual value + issues += _check_value(val, attr_schema).at_path(attr_kind, attr_schema.name) # Extra attributes are always okay @@ -385,7 +382,6 @@ def check_data_vars( issues = SchemaIssues() for data_var_schema in data_vars_schema: - allow_mutiple_versions = False for attr in data_var_schema.attributes: if hasattr(attr, "name"): @@ -450,125 +446,103 @@ def check_dict( return check_attributes(dct, schema.attributes, attr_kind="") -def _check_value(val, ann): +def _check_value(val: typing.Any, schema: metamodel.ValueSchema): """ Check whether value satisfies annotation If the annotation is a data array or dataset schema, it will be checked. :param val: Value to check - :param ann: Type annotation of value + :param schema: Schema of value :returns: Schema issues """ + # Unspecified? + if schema.type is None: + return SchemaIssues() + + # Optional? + if schema.optional and val is None: + return SchemaIssues() + # Is supposed to be a data array? - if bases.is_dataarray_schema(ann): + if schema.type == "dataarray": # Attempt to convert dictionaries automatically if isinstance(val, dict): try: val = xarray.DataArray.from_dict(val) except ValueError as e: + expected = [DataArray] + if schema.optional: + expected.append(type(None)) return SchemaIssues( [ SchemaIssue( - path=[], message=str(e), expected=[ann], found=type(val) + path=[], message=str(e), expected=expected, found=type(val) ) ] ) except TypeError as e: + expected = [DataArray] + if schema.optional: + expected.append(type(None)) return SchemaIssues( [ SchemaIssue( - path=[], message=str(e), expected=[ann], found=type(val) + path=[], message=str(e), expected=expected, found=type(val) ) ] ) if not isinstance(val, xarray.DataArray): # Fall through to plain type check - ann = xarray.DataArray - else: - return check_array(val, ann) - - # Is supposed to be a dataset? - if bases.is_dataset_schema(ann): - # Attempt to convert dictionaries automatically - if isinstance(val, dict): - try: - val = xarray.Dataset.from_dict(val) - except ValueError as e: - return SchemaIssues( - [ - SchemaIssue( - path=[], message=str(t), expected=[ann], found=type(val) - ) - ] - ) - if not isinstance(val, xarray.Dataset): - # Fall through to plain type check - ann = xarray.Dataset + type_to_check = xarray.DataArray else: - return check_dataset(val, ann) + return check_array(val, schema.array_schema) # Is supposed to be a dictionary? - if bases.is_dict_schema(ann): + elif schema.type == "dict": if not isinstance(val, dict): # Fall through to plain type check - ann = dict + type_to_check = dict else: - return check_dict(val, ann) + return check_dict(val, schema.dict_schema) + + elif schema.type == "list[str]": + type_to_check = typing.List[str] + elif schema.type in ["bool", "str", "int", "float"]: + type_to_check = getattr(builtins, schema.type) + else: + raise ValueError(f"Invalid typ_name in schema: {schema.type}") # Otherwise straight type check using typeguard try: - check_type(val, ann) + check_type(val, type_to_check) except TypeCheckError as t: + expected = [type_to_check] + if schema.optional: + expected.append(type(None)) return SchemaIssues( - [SchemaIssue(path=[], message=str(t), expected=[ann], found=type(val))] + [SchemaIssue(path=[], message=str(t), expected=expected, found=type(val))] ) - return SchemaIssues() - - -def _check_value_union(val, ann): - """ - Check whether value satisfies annotations, including union types - - If the annotation is a data array or dataset schema, it will be checked. - - :param val: Value to check - :param ann: Type annotation of value - :returns: Schema issues - """ - - if ann is None or ann is inspect.Signature.empty: - return SchemaIssues() - - # Account for union types (this especially catches "Optional") - if typing.get_origin(ann) is typing.Union: - options = typing.get_args(ann) - else: - options = [ann] - - # Go through options, try to find one without issues - args_issues = None - okay = False - for option in options: - arg_issues = _check_value(val, option) - # We can immediately return if we find no issues with - # some schema check - if not arg_issues: - return SchemaIssues() - if args_issues is None: - args_issues = arg_issues - - # Crude merging of expected options (for "unexpected type") - elif len(args_issues) == 1 and len(arg_issues) == 1: - args_issues[0].expected += arg_issues[0].expected + # List of literals given? + if schema.literal is not None: + for lit in schema.literal: + if val == lit: + return SchemaIssues() + return SchemaIssues( + [ + SchemaIssue( + path=[], + message=f"Disallowed literal value!", + expected=schema.literal, + found=val, + ) + ] + ) - # Return representative issues list - if not args_issues: - raise ValueError("Empty union set?") - return args_issues + return SchemaIssues() _DATASET_TYPES = {} @@ -591,7 +565,7 @@ def register_dataset_type(schema: metamodel.DatasetSchema): continue # Type should be a kind of literal - if typing.get_origin(attr.typ) is not typing.Literal: + if attr.literal is None: warnings.warn( f"In dataset schema {schema.schema_name}:" 'Attribute "type" should be a literal!' @@ -599,7 +573,12 @@ def register_dataset_type(schema: metamodel.DatasetSchema): continue # Register type names - for typ in typing.get_args(attr.typ): + for typ in attr.literal: + assert isinstance(typ, str), ( + f"In dataset schema {schema.schema_name}:" + 'Attribute "type" should be a literal giving ' + "names of schema!" + ) _DATASET_TYPES[typ] = schema @@ -621,7 +600,6 @@ def check_datatree( # Loop through all groups in datatree issues = SchemaIssues() for xds_name in datatree.groups: - # Ignore any leaf without data node = datatree[xds_name] if not node.has_data: @@ -679,7 +657,7 @@ def schema_checked(fn, check_parameters: bool = True, check_return: bool = True) @functools.wraps(fn) def _check_fn(*args, **kwargs): # Hide this function in pytest tracebacks - __tracebackhide__ = True + # __tracebackhide__ = True # Bind parameters, collect (potential) issues bound = signature.bind(*args, **kwargs) @@ -689,7 +667,15 @@ def _check_fn(*args, **kwargs): continue # Get annotation - issues += _check_value_union(val, anns.get(arg)).at_path(arg) + vschema = value_schema(anns.get(arg), "function", arg) + pseudo_attr_schema = AttrSchemaRef( + name=arg, + **{ + fld.name: getattr(vschema, fld.name) + for fld in dataclasses.fields(ValueSchema) + }, + ) + issues += _check_value(val, pseudo_attr_schema).at_path(arg) # Any issues found? raise issues.expect() @@ -699,7 +685,15 @@ def _check_fn(*args, **kwargs): # Check return if check_return: - issues = _check_value_union(val, signature.return_annotation) + vschema = value_schema(anns.get(arg), "function", "return") + pseudo_attr_schema = AttrSchemaRef( + name="return", + **{ + fld.name: getattr(vschema, fld.name) + for fld in dataclasses.fields(ValueSchema) + }, + ) + issues = _check_value(val, pseudo_attr_schema) issues.at_path("return").expect() # Check return value diff --git a/src/xradio/schema/dataclass.py b/src/xradio/schema/dataclass.py index 82275b4d..0f44aa20 100644 --- a/src/xradio/schema/dataclass.py +++ b/src/xradio/schema/dataclass.py @@ -76,7 +76,6 @@ def _check_invalid_dims( # Filter out dimension possibilities with undefined coordinates valid_dims = [ds for ds in dims if set(ds).issubset(all_coord_names)] - # print(f"{klass_name}.{field_name}", valid_dims, dims, all_coord_names) # Raise an exception if this makes the dimension set impossible if dims and not valid_dims: @@ -88,6 +87,135 @@ def _check_invalid_dims( return valid_dims +def value_schema(ann: typing.Any, klass_name: str, field_name: str) -> "ValueSchema": + """ + Take attribute type annotation and convert into type name and + - optionally - a list of literal allowed values + + :param ann: Annotation + :param klass_name: Name of class where annotation origins from + :param field_name: Name of field where annotation origins from + :returns: ValueSchema + """ + + # No annotation? + if ann is None: + return ValueSchema(None) + + # Optional? + if is_optional(ann): + + # Optional is actually represented as a union... Construct + # same union type without the "None" type. + typs = [typ for typ in get_args(ann) if typ is not None.__class__] + if len(typs) == 1: + typ = typs[0] + else: + raise ValueError( + f"In '{klass_name}', field '{field_name}' has" + f" a union type, which is not allowed!" + ) + + # Convert to schema recursively + vschema = value_schema(typ, klass_name, field_name) + vschema.optional = True + return vschema + + # Is a type? + if isinstance(ann, type): + # Array type? + if hasattr(ann, "__xradio_array_schema"): + return ValueSchema("dataarray", array_schema=ann.__xradio_array_schema) + + # Dictionary type? + if hasattr(ann, "__xradio_dict_schema"): + return ValueSchema("dict", dict_schema=ann.__xradio_dict_schema) + + # Check that it is an allowable type + if ann not in [bool, str, int, float, bool]: + raise ValueError( + f"In '{klass_name}', field '{field_name}' has" + f" type {ann} - but only str, int, float or list are allowed!" + ) + return ValueSchema(ann.__name__) + + # Is a list + if typing.get_origin(ann) in [typing.List, list]: + args = typing.get_args(ann) + + # Must be a string or flota list + if args != (str,) and args != (float,): + raise ValueError( + f"In '{klass_name}', field '{field_name}' has" + f" annotation {ann}, but only str, int, float, list[str] or Literal allowed!" + ) + + if args[0] == str: + return ValueSchema("list[str]") + else: + return ValueSchema("list[float]") + + # Is a literal? + if typing.get_origin(ann) is typing.Literal: + args = typing.get_args(ann) + + # Check that it is an allowable type + if len(args) == 0: + raise ValueError( + f"In '{klass_name}', field '{field_name}' has" + f" literal annotation, but allows no values!" + ) + + # String list? + typ = type(args[0]) + if typ is list: + elem_type = type(args[0][0]) + if elem_type is not str: + raise ValueError( + f"In '{klass_name}', field '{field_name}' has" + f" literal type list[{elem_type}] - but only list[str] is allowed!" + ) + for lit in args: + if not isinstance(lit, typ): + raise ValueError( + f"In '{klass_name}', field '{field_name}' literal" + f" {lit} has inconsistent type ({typ(lit)}) vs ({typ})!" + ) + for elem in lit: + if not isinstance(elem, elem_type): + raise ValueError( + f"In '{klass_name}', field '{field_name}' literal" + f" {lit} has inconsistent element type " + f"({typ(elem)}) vs ({elem_type})!" + ) + return ValueSchema( + "list[str]", + literal=[[str(elem) for elem in arg] for arg in args], + ) + + # Check that it is an allowable type + if typ not in [bool, str, int, float]: + raise ValueError( + f"In '{klass_name}', field '{field_name}' has" + f" literal type {typ} - but only str, int, float or list[str] are allowed!" + ) + + # Check that all literals have the same type + for lit in args: + if not isinstance(lit, typ): + raise ValueError( + f"In '{klass_name}', field '{field_name}' literal" + f" {lit} has inconsistent type ({typ(lit)}) vs ({typ})!" + ) + + return ValueSchema(typ.__name__, literal=[typ(arg) for arg in args]) + + raise ValueError( + f"In '{klass_name}', field '{field_name}' has" + f" annotation {ann}, but only type or Literal allowed!" + ) + + def extract_xarray_dataclass(klass, allow_undefined_coords: bool = False): """ Go through dataclass fields and interpret them according to xarray-dataclass @@ -113,6 +241,7 @@ def check_invalid_dims(dims, field_name): for field in dataclasses.fields(klass) if get_role(type_hints[field.name]) == Role.COORD } + print(klass, all_coord_names) def check_invalid_dims(dims, field_name): return _check_invalid_dims( @@ -132,13 +261,27 @@ def check_invalid_dims(dims, field_name): # Is it an attribute? if role == Role.ATTR: + try: + ann = get_annotated(typ) + except TypeError as e: + raise ValueError( + f"Could not get annotation in '{klass.__name__}' field '{field.name}': {e}" + ) + vschema = value_schema(get_annotated(typ), klass.__name__, field.name) + if is_optional(typ): + vschema.optional = True + attributes.append( AttrSchemaRef( name=field.name, - typ=get_annotated(typ), - optional=is_optional(typ), - default=field.default, + default=( + None if field.default is dataclasses.MISSING else field.default + ), docstring=field_docstrings.get(field.name), + **{ + fld.name: getattr(vschema, fld.name) + for fld in dataclasses.fields(ValueSchema) + }, ) ) continue @@ -151,7 +294,7 @@ def check_invalid_dims(dims, field_name): else: raise ValueError( f"Expected field '{field.name}' in '{klass.__name__}' " - "to be annotated with either Coord, Data or Attr!" + f"to be annotated with either Coord, Data or Attr!" ) # Defined using a dataclass, i.e. Coordof/Dataof? @@ -173,7 +316,7 @@ def check_invalid_dims(dims, field_name): schema_ref = ArraySchemaRef( name=field.name, optional=is_optional(typ), - default=field.default, + default=None if field.default is dataclasses.MISSING else field.default, docstring=field_docstrings.get(field.name), **arr_schema_fields, ) @@ -206,7 +349,9 @@ def check_invalid_dims(dims, field_name): schema_ref = ArraySchemaRef( name=field.name, optional=is_optional(typ), - default=field.default, + default=( + None if field.default is dataclasses.MISSING else field.default + ), docstring=field_docstrings.get(field.name), **arr_schema_fields, ) @@ -215,11 +360,13 @@ def check_invalid_dims(dims, field_name): schema_ref = ArraySchemaRef( name=field.name, optional=is_optional(typ), - default=field.default, + default=( + None if field.default is dataclasses.MISSING else field.default + ), docstring=field_docstrings.get(field.name), schema_name=None, dimensions=check_invalid_dims(dims, field.name), - dtypes=[numpy.dtype(typ) for typ in types], + dtypes=[numpy.dtype(typ).str for typ in types], coordinates=[], attributes=[], class_docstring=None, @@ -281,7 +428,7 @@ def xarray_dataclass_to_array_schema(klass): schema = ArraySchema( schema_name=f"{klass.__module__}.{klass.__qualname__}", dimensions=data_vars[0].dimensions, - dtypes=data_vars[0].dtypes, + dtypes=[numpy.dtype(dt).str for dt in data_vars[0].dtypes], coordinates=coordinates, attributes=attributes, class_docstring=inspect.cleandoc(klass.__doc__), @@ -369,22 +516,16 @@ def xarray_dataclass_to_dict_schema(klass): for field in dataclasses.fields(klass): typ = type_hints[field.name] - # Handle optional value: Strip "None" from the types - optional = is_optional(typ) - if optional: - typs = [typ for typ in get_args(typ) if typ is not None.__class__] - if len(typs) == 1: - typ = typs[0] - else: - typ = typing.Union.__getitem__[tuple(typs)] - + vschema = value_schema(typ, klass.__name__, field.name) attributes.append( AttrSchemaRef( name=field.name, - typ=typ, - optional=optional, - default=field.default, + default=None if field.default is dataclasses.MISSING else field.default, docstring=field_docstrings.get(field.name), + **{ + fld.name: getattr(vschema, fld.name) + for fld in dataclasses.fields(ValueSchema) + }, ) ) diff --git a/src/xradio/schema/export.py b/src/xradio/schema/export.py new file mode 100644 index 00000000..9b4828c9 --- /dev/null +++ b/src/xradio/schema/export.py @@ -0,0 +1,99 @@ +import dataclasses +import json + +from xradio.schema import ( + bases, + metamodel, + xarray_dataclass_to_array_schema, + xarray_dataclass_to_dataset_schema, + xarray_dataclass_to_dict_schema, +) + +CLASS_ATTR = "$class" + + +class DataclassEncoder(json.JSONEncoder): + """ + General-purpose encoder that represents data classes as + dictionaries, omitting defaults and annotating the original class + as a ``'$class'`` attribute. + """ + + def default(self, o): + if dataclasses.is_dataclass(o): + res = {CLASS_ATTR: o.__class__.__name__} + for fld in dataclasses.fields(type(o)): + if ( + getattr(o, fld.name) is not fld.default + and getattr(o, fld.name) is not dataclasses.MISSING + ): + res[fld.name] = getattr(o, fld.name) + return res + return super().default(o) + + +DATACLASS_MAP = { + cls.__name__: cls + for cls in [ + metamodel.DictSchema, + metamodel.ValueSchema, + metamodel.AttrSchemaRef, + metamodel.ArraySchema, + metamodel.ArraySchemaRef, + metamodel.DatasetSchema, + ] +} + + +class DataclassDecoder(json.JSONDecoder): + """ + General-purpose decoder that reads JSON as generated by + :py:class:`DataclassEncoder`. + """ + + def __init__(self, dataclass_map, *args, **kwargs): + self._dataclass_map = dataclass_map + super().__init__(*args, object_hook=self.object_hook, **kwargs) + + def object_hook(self, obj): + + # Detect dictionaries with '$class' annotation + if isinstance(obj, dict) and CLASS_ATTR in obj: + + # Identify the class + cls_name = obj[CLASS_ATTR] + cls = self._dataclass_map.get(cls_name) + if not cls: + raise ValueError( + f"Unknown $dataclass encountered while decoding JSON: {cls_name}" + ) + + # Instantiate + del obj[CLASS_ATTR] + obj = cls(**obj) + + return obj + + +def export_schema_json_file(schema, fname): + """ + Exports given schema as a JSON file + """ + + # Check that this is actually a Dataset + if bases.is_dataset_schema(schema): + schema = xarray_dataclass_to_dataset_schema(schema) + if not isinstance(schema, metamodel.DatasetSchema): + raise TypeError( + f"export_schema_json_file: Expected DatasetSchema, but got {type(schema)}!" + ) + + # Perform export + with open(fname, "w", encoding="utf8") as f: + json.dump(schema, f, cls=DataclassEncoder, ensure_ascii=False, indent=" ") + + +def import_schema_json_file(fname): + + with open(fname, "r", encoding="utf8") as f: + return json.load(f, cls=DataclassDecoder, dataclass_map=DATACLASS_MAP) diff --git a/src/xradio/schema/metamodel.py b/src/xradio/schema/metamodel.py index dfafefc9..ad78aee3 100644 --- a/src/xradio/schema/metamodel.py +++ b/src/xradio/schema/metamodel.py @@ -1,7 +1,10 @@ -from dataclasses import dataclass +from __future__ import annotations + +from dataclasses import dataclass, MISSING import typing __all__ = [ + "ValueSchema", "AttrSchemaRef", "ArraySchema", "ArraySchemaRef", @@ -10,8 +13,44 @@ ] -@dataclass(frozen=True) -class AttrSchemaRef: +@dataclass +class ValueSchema: + """ + Schema information about a value in an attribute or dictionary. + """ + + type: typing.Literal[ + "bool", "str", "int", "float", "list[str]", "dict", "dataarray" + ] + """ + Type of value + + * ``bool``: A boolean + * ``str``: A UTF-8 string + * ``int``: A 64-bit signed integer + * ``float``: A double-precision floating point number + * ``list[str]``: A list of strings + * ``dict``: Dictionary + * ``dataarray``: An xarray dataarray (encoded using ``to_dict``) + """ + dict_schema: typing.Optional[DictSchema] = None + """ + Dictionary schema, if it is an xarray DataArray + """ + array_schema: typing.Optional[ArraySchema] = None + """ + Array schema, if it is an xarray DataArray + """ + literal: typing.Optional[typing.List[typing.Any]] = None + """ + Allowed literal values, if specified. + """ + optional: bool = False + """Is the value optional?""" + + +@dataclass +class AttrSchemaRef(ValueSchema): """ Schema information about an attribute as referenced from an array or dataset schema. @@ -20,18 +59,11 @@ class AttrSchemaRef: in the array or dataset schema definition. """ - name: str + name: str = "" """Name of attribute as given in data array / dataset.""" - typ: type - """ - Python type of attribute. Note that this might again be a data - array or dataset, but we don't track that explicitly. - """ - optional: bool - """Is the attribute optional?""" - default: typing.Optional[typing.Any] + default: typing.Optional[typing.Any] = None """If optional: What is the default value?""" - docstring: str + docstring: str = "" """Documentation string of attribute reference""" @@ -49,8 +81,9 @@ class ArraySchema: """(Class) name of the schema""" dimensions: typing.List[typing.List[str]] """List of possible dimensions""" - dtypes: typing.List[typing.List["numpy.dtype"]] - """List of possible (numpy) types""" + dtypes: typing.List[typing.List[str]] + """List of possible (numpy) types as array interface protocol + descriptors (e.g. `">f4"`)""" coordinates: typing.List["ArraySchemaRef"] """Coordinates data arrays giving values to dimensions""" @@ -97,7 +130,7 @@ class ArraySchemaRef(ArraySchema): """Name of array schema as given in dataset.""" optional: bool """Is the data array optional?""" - default: typing.Optional[typing.Any] + default: typing.Optional[typing.Any] = None """If optional: What is the default value?""" docstring: typing.Optional[str] = None """Documentation string of array reference""" diff --git a/src/xradio/schema/typing.py b/src/xradio/schema/typing.py index 28b1465d..2bbe1186 100644 --- a/src/xradio/schema/typing.py +++ b/src/xradio/schema/typing.py @@ -312,26 +312,26 @@ def get_dims(tp: Any) -> List[Dims]: dims_out = [] for dim in dims_in: - args = get_args(dim) + args = list(get_args(dim)) origin = get_origin(dim) # One-dimensional dimension if origin is Literal: - dims_out.append((str(args[0]),)) + dims_out.append([str(args[0])]) continue if not (origin is tuple or origin is Tuple): raise TypeError(f"Could not find any dims in {tp!r}.") # Zero-dimensions - if args == () or args == ((),): - dims_out.append(()) + if args == [] or args == [()]: + dims_out.append([]) continue if not all(get_origin(arg) is Literal for arg in args): raise TypeError(f"Could not find any dims in {tp!r}.") - dims_out.append(tuple(str(get_args(arg)[0]) for arg in args)) + dims_out.append([str(get_args(arg)[0]) for arg in args]) return dims_out diff --git a/tests/unit/image/test_image.py b/tests/unit/image/test_image.py index 8e99728a..9ec7aa9b 100644 --- a/tests/unit/image/test_image.py +++ b/tests/unit/image/test_image.py @@ -34,6 +34,7 @@ from xradio.image._util._casacore.common import _open_image_ro as open_image_ro from xradio.image._util.common import _image_type as image_type from xradio.image._util._casacore.common import _object_name +from xradio.image.schema import AstroImageXds, ApertureImageXds from xradio.image._util._casacore.common import ( _open_image_ro as open_image_ro, @@ -41,6 +42,8 @@ ) from toolviper.dask.client import local_client +from xradio.schema.check import check_dataset + sky = "SKY" @@ -108,7 +111,9 @@ def dict_equality(self, dict1, dict2, dict1_name, dict2_name, exclude_keys=[]): ) elif isinstance(one[0], numbers.Number): self.assertTrue( - np.isclose(np.array(one), np.array(two)).all(), + np.isclose( + np.array(one), np.array(two), rtol=1e-3, atol=1e-7 + ).all(), f"{dict1_name}[{k}] != {dict2_name}[{k}], " f"{one} != {two}", ) @@ -2062,6 +2067,10 @@ def test_attrs(self): for skel in [self.skel_im(), self.skel_im_no_sky()]: self.run_attrs_tests(skel) + def test_schema(self): + for skel in [self.skel_im(), self.skel_im_no_sky()]: + check_dataset(skel, AstroImageXds).expect() + class make_empty_aperture_image_tests(make_empty_image_tests): """Test making skeleton image""" @@ -2115,6 +2124,10 @@ def test_attrs(self): skel = self.skel_im() self.run_attrs_tests(skel) + def test_schema(self): + for skel in [self.skel_im()]: + check_dataset(skel, ApertureImageXds).expect() + class make_empty_lmuv_image_tests(make_empty_image_tests): """Tests making image with l, m, u, v coordinates""" @@ -2212,6 +2225,10 @@ def test_attrs(self): skel = self.skel_im() self.run_attrs_tests(skel) + def test_schema(self): + for skel in [self.skel_im()]: + check_dataset(skel, ApertureImageXds).expect() + class write_image_test(xds_from_image_test): diff --git a/tests/unit/schema/test_export.py b/tests/unit/schema/test_export.py new file mode 100644 index 00000000..a6d47f1e --- /dev/null +++ b/tests/unit/schema/test_export.py @@ -0,0 +1,34 @@ +import json +import pytest +import pathlib + +from xradio.measurement_set.schema import VisibilityXds, SpectrumXds +from xradio.schema.export import export_schema_json_file, import_schema_json_file + + +@pytest.mark.parametrize("schema", [VisibilityXds, SpectrumXds]) +def test_schema_export_in_synch(tmp_path, schema): + """ + Checks whether JSON schemas in the repository tree match + the Python definitions. + """ + + # Export schema + schema_fname = f"{schema.__name__}.json" + export_schema_json_file(schema, tmp_path / schema_fname) + with open(tmp_path / schema_fname, "r", encoding="utf8") as f: + python_schema_json = json.load(f) + + # Load existing schema + repository_root = pathlib.Path(__file__).parent.parent.parent.parent + assert ( + repository_root / "schemas" + ).is_dir(), "Schema directory doesn't exist in expected location" + with open(repository_root / "schemas" / schema_fname, "r", encoding="utf8") as f: + repo_schema_json = json.load(f) + + # Check that schemas are synchronised + assert python_schema_json == repo_schema_json, ( + "Exported schemas not consistent with Python definitions! " + "Run 'make schema-export' from repository root!" + ) diff --git a/tests/unit/schema/test_schema.py b/tests/unit/schema/test_schema.py index 48a0787e..6d8a5192 100644 --- a/tests/unit/schema/test_schema.py +++ b/tests/unit/schema/test_schema.py @@ -5,6 +5,7 @@ import dask.array import pytest import inspect +import json from xradio.schema.typing import Attr, Coord, Coordof, Data, Dataof, Name from xradio.schema.metamodel import ( @@ -31,6 +32,7 @@ xarray_dataset_schema, dict_schema, ) +from xradio.schema.export import export_schema_json_file, import_schema_json_file Dim1 = Literal["coord"] Dim2 = Literal["coord2"] @@ -60,43 +62,43 @@ class _TestArraySchema: # The equivalent of the above in the meta-model TEST_ARRAY_SCHEMA = ArraySchema( schema_name=__name__ + "._TestArraySchema", - dimensions=[("coord",)], + dimensions=[["coord"]], coordinates=[ ArraySchemaRef( schema_name=None, name="coord", - dtypes=[numpy.dtype(float)], - dimensions=[("coord",)], + dtypes=[numpy.dtype(float).str], + dimensions=[["coord"]], coordinates=[], attributes=[], class_docstring=None, data_docstring=None, optional=False, - default=dataclasses.MISSING, + default=None, docstring="Docstring of coordinate", ), ], - dtypes=[numpy.dtype(complex)], + dtypes=[numpy.dtype(complex).str], class_docstring="Docstring of array schema\n\nMultiple lines!", data_docstring="Docstring of data", attributes=[ AttrSchemaRef( name="attr1", - typ=str, + type="str", optional=False, - default=dataclasses.MISSING, + default=None, docstring="Required attribute", ), AttrSchemaRef( name="attr2", - typ=int, + type="int", optional=False, default=123, docstring="Required attribute with default", ), AttrSchemaRef( name="attr3", - typ=int, + type="int", optional=True, default=None, docstring="Optional attribute with default", @@ -362,7 +364,7 @@ def test_check_array_extra_coord(): assert len(results) == 1 assert results[0].path == [("dims", None)] assert results[0].found == ["coord", "coord2"] - assert results[0].expected == [("coord",)] + assert results[0].expected == [["coord"]] def test_check_array_missing_coord(): @@ -372,7 +374,7 @@ def test_check_array_missing_coord(): assert len(results) == 2 assert results[0].path == [("dims", None)] assert results[0].found == [] - assert results[0].expected == [("coord",)] + assert results[0].expected == [["coord"]] assert results[1].path == [("coords", "coord")] @@ -388,7 +390,7 @@ def test_check_array_wrong_coord(): assert results[0].found == [ "coord2", ] - assert results[0].expected == [("coord",)] + assert results[0].expected == [["coord"]] assert results[1].path == [("coords", "coord")] @@ -433,7 +435,7 @@ def test_check_array_wrong_type(): assert results[1].expected == [int] assert results[2].path == [("attrs", "attr3")] assert results[2].found == float - assert results[2].expected == [int] + assert results[2].expected == [int, type(None)] def test_schema_checked_wrap(): @@ -563,21 +565,21 @@ class _TestDictSchema: attributes=[ AttrSchemaRef( name="attr1", - typ=str, + type="str", optional=False, - default=dataclasses.MISSING, + default=None, docstring="Required attribute", ), AttrSchemaRef( name="attr2", - typ=int, + type="int", optional=False, default=123, docstring="Required attribute with default", ), AttrSchemaRef( name="attr3", - typ=int, + type="int", optional=True, default=None, docstring="Optional attribute with default", @@ -650,7 +652,7 @@ def test_check_dict_missing(): assert len(results) == 1 assert results[0].path == [("", "attr2")] assert results[0].found == None - assert results[0].expected == [int] + assert results[0].expected == ["int"] with pytest.raises(SchemaIssues): results.expect() @@ -712,10 +714,10 @@ def _dataclass_to_dict(obj, ignore=[]): ArraySchemaRef( schema_name=__name__ + "._TestDatasetSchemaCoord", name="coord", - dtypes=[numpy.dtype(float)], - dimensions=[("coord",)], + dtypes=[numpy.dtype(float).str], + dimensions=[["coord"]], optional=False, - default=dataclasses.MISSING, + default=None, docstring="Docstring of coordinate", coordinates=[], attributes=_dataclass_to_dict(TEST_ARRAY_SCHEMA)["attributes"], @@ -725,14 +727,14 @@ def _dataclass_to_dict(obj, ignore=[]): ArraySchemaRef( schema_name=None, name="coord2", - dtypes=[numpy.dtype(int)], - dimensions=[("coord2",)], + dtypes=[numpy.dtype(int).str], + dimensions=[["coord2"]], coordinates=[], attributes=[], class_docstring=None, data_docstring=None, optional=True, - default=dataclasses.MISSING, + default=None, docstring="Docstring of second coordinate", ), ], @@ -740,42 +742,42 @@ def _dataclass_to_dict(obj, ignore=[]): ArraySchemaRef( name="data_var", optional=False, - default=dataclasses.MISSING, + default=None, docstring="Docstring of external data variable", **_dataclass_to_dict(TEST_ARRAY_SCHEMA), ), ArraySchemaRef( schema_name=None, name="data_var_simple", - dtypes=[numpy.dtype(numpy.float32)], - dimensions=[("coord2",)], + dtypes=[numpy.dtype(numpy.float32).str], + dimensions=[["coord2"]], coordinates=[], attributes=[], class_docstring=None, data_docstring=None, optional=True, - default=dataclasses.MISSING, + default=None, docstring="Docstring of simple optional data variable", ), ], attributes=[ AttrSchemaRef( name="attr1", - typ=str, + type="str", optional=False, - default=dataclasses.MISSING, + default=None, docstring="Required attribute", ), AttrSchemaRef( name="attr2", - typ=int, + type="int", optional=False, default=123, docstring="Required attribute with default", ), AttrSchemaRef( name="attr3", - typ=int, + type="int", optional=True, default=None, docstring="Optional attribute with default", @@ -995,7 +997,7 @@ def test_check_dataset_dtype_mismatch(): assert issues[0].expected == [numpy.dtype(int)] assert issues[0].found == numpy.dtype(float) assert issues[1].path == [("data_vars", "data_var_simple"), ("dtype", None)] - assert issues[1].expected == [numpy.float32] + assert issues[1].expected == [numpy.dtype(numpy.float32).str] assert issues[1].found == numpy.dtype(float) @@ -1015,7 +1017,11 @@ def test_check_dataset_wrong_dim(): issues = check_dataset(dataset, TEST_DATASET_SCHEMA) assert len(issues) == 1 assert issues[0].path == [("data_vars", "data_var_simple"), ("dims", None)] - assert issues[0].expected == [("coord2",)] + assert issues[0].expected == [ + [ + "coord2", + ] + ] assert issues[0].found == ["coord"] @@ -1068,38 +1074,6 @@ def test_check_dataset_optional_coordinate(): assert not issues -def test_check_dict_dataset_attribute(): - # Make dataset - attrs = {"attr1": "str", "attr2": 123, "attr3": 345} - coords = { - "coord": xarray.DataArray( - numpy.arange(10, dtype=float), dims=("coord",), attrs=attrs - ), - } - data_vars = { - "data_var": (("coord",), numpy.zeros(10, dtype=complex), attrs), - } - dataset = xarray.Dataset(data_vars, coords, attrs) - - # Check inside dictionary - @dict_schema - class _DictSchema: - ds: _TestDatasetSchema - - assert not check_dict( - { - "ds": dataset, - }, - _DictSchema, - ) - assert check_dict( - { - "ds": xarray.Dataset(data_vars, coords), - }, - _DictSchema, - ) - - def test_check_dict_array_attribute(): # Make array data = numpy.zeros(10, dtype=complex) @@ -1128,3 +1102,172 @@ class _DictSchema: {"da": {"attr1": "asd", "attr2": 234, "attr3": 345}}, _DictSchema ) assert check_dict({"da": {"attr2": 234, "attr3": 345}}, _DictSchema) + + +def test_check_dict_dict_attribute(): + # Check inside dictionary + @dict_schema + class _DictSchema: + da: _TestDictSchema + + assert not check_dict( + {"da": {"attr1": "asd", "attr2": 234, "attr3": 345}}, _DictSchema + ) + assert check_dict({"da": {"attr2": 234, "attr3": 345}}, _DictSchema) + + +TEST_DATASET_SCHEMA_JSON = { + "$class": "DatasetSchema", + "schema_name": "tests.unit.schema.test_schema._TestDatasetSchema", + "dimensions": [["coord"], ["coord", "coord2"]], + "coordinates": [ + { + "$class": "ArraySchemaRef", + "schema_name": "tests.unit.schema.test_schema._TestDatasetSchemaCoord", + "dimensions": [["coord"]], + "dtypes": ["