diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml index c66ca999..ef13b7bc 100644 --- a/.github/workflows/doc.yml +++ b/.github/workflows/doc.yml @@ -21,7 +21,7 @@ jobs: uses: actions/checkout@v6 with: { submodules: recursive } - uses: prefix-dev/setup-pixi@v0.9.3 - with: { pixi-version: v0.50.2 } + with: { pixi-version: v0.63.2 } - name: build documentation run: | pixi run doc diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index dc1f92a6..2d4a7957 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -18,7 +18,7 @@ jobs: - name: checkout uses: actions/checkout@v6 - uses: prefix-dev/setup-pixi@v0.9.3 - with: { pixi-version: v0.50.2 } + with: { pixi-version: v0.63.2 } - name: pylint run: pixi run pylint - name: black diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4903b2eb..963b3db0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,7 +33,7 @@ jobs: with: { submodules: recursive } - uses: prefix-dev/setup-pixi@v0.9.3 with: - pixi-version: v0.50.2 + pixi-version: v0.63.2 - name: doctest run: | pixi run -e ${{ matrix.environment }} doctest ${{ matrix.remote-data == 'remote' && '--remote-data' || '' }} diff --git a/doc/news/load-metadata.rst b/doc/news/load-metadata.rst new file mode 100644 index 00000000..450b3a76 --- /dev/null +++ b/doc/news/load-metadata.rst @@ -0,0 +1,24 @@ +**Added:** + +* Added . + +**Changed:** + +* Changed . + +**Removed:** + +* Removed deprecated module `cv_collection`. +* Removed deprecated module `cv_entry`. + +**Fixed:** + +* Fixed . + +**Deprecated:** + +* Deprecated . + +**Performance:** + +* Improved . diff --git a/examples/from_csv/from_csv_multiple_headers.csv b/examples/from_csv/from_csv_multiple_headers.csv new file mode 100644 index 00000000..e8b13072 --- /dev/null +++ b/examples/from_csv/from_csv_multiple_headers.csv @@ -0,0 +1,4 @@ +E,j +V,A / cm2 +1,2 +3,4 diff --git a/pixi.lock b/pixi.lock index 2c94af5a..23458182 100644 --- a/pixi.lock +++ b/pixi.lock @@ -5,6 +5,8 @@ environments: - url: https://conda.anaconda.org/conda-forge/ indexes: - https://pypi.org/simple + options: + pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -1579,6 +1581,8 @@ environments: - url: https://conda.anaconda.org/conda-forge/ indexes: - https://pypi.org/simple + options: + pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -3386,6 +3390,8 @@ environments: - url: https://conda.anaconda.org/conda-forge/ indexes: - https://pypi.org/simple + options: + pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -5074,6 +5080,8 @@ environments: - url: https://conda.anaconda.org/conda-forge/ indexes: - https://pypi.org/simple + options: + pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -6219,6 +6227,8 @@ environments: - url: https://conda.anaconda.org/conda-forge/ indexes: - https://pypi.org/simple + options: + pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -7816,6 +7826,8 @@ environments: - url: https://conda.anaconda.org/conda-forge/ indexes: - https://pypi.org/simple + options: + pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -9428,6 +9440,8 @@ environments: - url: https://conda.anaconda.org/conda-forge/ indexes: - https://pypi.org/simple + options: + pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -11031,6 +11045,8 @@ environments: - url: https://conda.anaconda.org/conda-forge/ indexes: - https://pypi.org/simple + options: + pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -12644,6 +12660,8 @@ environments: - url: https://conda.anaconda.org/conda-forge/ indexes: - https://pypi.org/simple + options: + pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -40365,7 +40383,7 @@ packages: - pypi: ./ name: unitpackage version: 0.11.2 - sha256: 6ee96f38df146d7ad06c80c2571eed38cdc16d6de7f0262440a1b092a35f6364 + sha256: af8d578e9c797241a93a5f9807b13eee883afc722b8fef79022d4fcb8beeca0c requires_dist: - astropy>=5,<8 - click>=8,<9 @@ -40375,7 +40393,6 @@ packages: - plotly>=5,<7 - pybtex>=0.25,<0.26 requires_python: '>=3.10' - editable: true - conda: https://conda.anaconda.org/conda-forge/noarch/uri-template-1.3.0-pyhd8ed1ab_1.conda sha256: e0eb6c8daf892b3056f08416a96d68b0a358b7c46b99c8a50481b22631a4dfc0 md5: e7cb0f5745e4c5035a460248334af7eb diff --git a/pyproject.toml b/pyproject.toml index b2c5f981..ce01c110 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,6 @@ unitpackage = "unitpackage.entrypoint:cli" [tool.setuptools] packages = [ "unitpackage", - "unitpackage.cv", "unitpackage.database", "unitpackage.electrochemistry", "unitpackage.loaders", diff --git a/unitpackage/collection.py b/unitpackage/collection.py index e4c1f0ff..5a8300ed 100644 --- a/unitpackage/collection.py +++ b/unitpackage/collection.py @@ -20,7 +20,7 @@ Search the collection for entries, for example, from a single publication providing its DOI:: - >>> collection.filter(lambda entry: entry.source.url == 'https://doi.org/10.1039/C0CP01001D') # doctest: +REMOTE_DATA + >>> collection.filter(lambda entry: entry.echemdb.source.url == 'https://doi.org/10.1039/C0CP01001D') # doctest: +REMOTE_DATA [Entry('alves_2011_electrochemistry_6010_f1a_solid'), ... """ @@ -28,7 +28,7 @@ # ******************************************************************** # This file is part of unitpackage. # -# Copyright (C) 2021-2025 Albert Engstfeld +# Copyright (C) 2021-2026 Albert Engstfeld # Copyright (C) 2021 Johannes Hermann # Copyright (C) 2021-2022 Julian Rüth # Copyright (C) 2021 Nicolas Hörmann @@ -47,7 +47,6 @@ # along with unitpackage. If not, see . # ******************************************************************** import logging -from functools import cached_property from frictionless import Package @@ -138,60 +137,6 @@ def create_example(cls): package=package, ) - @cached_property - def bibliography(self): - r""" - Return a pybtex database of all bibtex bibliography files, - associated with the entries. - - EXAMPLES:: - - >>> collection = Collection.create_example() - >>> collection.bibliography - BibliographyData( - entries=OrderedCaseInsensitiveDict([ - ('alves_2011_electrochemistry_6010', Entry('article', - ... - ('engstfeld_2018_polycrystalline_17743', Entry('article', - ... - - A derived collection includes only the bibliographic entries of the remaining entries:: - - >>> collection.filter(lambda entry: entry.source.citationKey != 'alves_2011_electrochemistry_6010').bibliography - BibliographyData( - entries=OrderedCaseInsensitiveDict([ - ('engstfeld_2018_polycrystalline_17743', Entry('article', - ... - - A collection with entries without bibliography:: - - >>> collection = Collection.create_example()["no_bibliography"] - >>> collection.bibliography - '' - - """ - from pybtex.database import BibliographyData - - bib_data = BibliographyData( - { - entry.bibliography.key: entry.bibliography - for entry in self - if entry.bibliography - } - ) - - if isinstance(bib_data, str): - return bib_data - - # Remove duplicates from the bibliography - bib_data_ = BibliographyData() - - for key, entry in bib_data.entries.items(): - if key not in bib_data_.entries: - bib_data_.add_entry(key, entry) - - return bib_data_ - def filter(self, predicate): r""" Return the subset of the collection that satisfies predicate. @@ -199,7 +144,7 @@ def filter(self, predicate): EXAMPLES:: >>> collection = Collection.create_example() - >>> collection.filter(lambda entry: entry.source.url == 'https://doi.org/10.1039/C0CP01001D') + >>> collection.filter(lambda entry: entry.echemdb.source.url == 'https://doi.org/10.1039/C0CP01001D') [Entry('alves_2011_electrochemistry_6010_f1a_solid')] @@ -610,7 +555,7 @@ def from_remote(cls, url=None, data=None, outdir=None): >>> from unitpackage.collection import Collection >>> collection = Collection.from_remote() # doctest: +REMOTE_DATA - >>> collection.filter(lambda entry: entry.source.url == 'https://doi.org/10.1039/C0CP01001D') # doctest: +REMOTE_DATA + >>> collection.filter(lambda entry: entry.echemdb.source.url == 'https://doi.org/10.1039/C0CP01001D') # doctest: +REMOTE_DATA [Entry('alves_2011_electrochemistry_6010_f1a_solid'), Entry('alves_2011_electrochemistry_6010_f2_red')] The folder containing the data in the zip can be specified with the :param data:. diff --git a/unitpackage/cv/__init__.py b/unitpackage/cv/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/unitpackage/cv/cv_collection.py b/unitpackage/cv/cv_collection.py deleted file mode 100644 index f7db1bc3..00000000 --- a/unitpackage/cv/cv_collection.py +++ /dev/null @@ -1,128 +0,0 @@ -r""" -A Collection of Cyclic Voltammograms. It provides additional functionalities compared to -the :class:`Collection` specific to Cyclic Voltammograms and electrochemical data. - -EXAMPLES: - -Create a collection from local `frictionless Data Packages `__ -in the `data/` directory:: - - >>> from unitpackage.cv.cv_collection import CVCollection - >>> collection = CVCollection.from_local('data/') - -Create a collection from the Data Packages published in the `echemdb data repository -`_ displayed on the `echemdb website -`_.:: - - >>> collection = CVCollection.from_remote() # doctest: +REMOTE_DATA - -Search the collection for entries from a single publication:: - - >>> collection.filter(lambda entry: entry.source.url == 'https://doi.org/10.1039/C0CP01001D') # doctest: +REMOTE_DATA - [CVEntry('alves_2011_electrochemistry_6010_f1a_solid'), ... - -""" - -# ******************************************************************** -# This file is part of unitpackage. -# -# Copyright (C) 2021-2025 Albert Engstfeld -# Copyright (C) 2021 Johannes Hermann -# Copyright (C) 2021-2022 Julian Rüth -# Copyright (C) 2021 Nicolas Hörmann -# -# unitpackage is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# unitpackage is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with unitpackage. If not, see . -# ******************************************************************** -import logging -import warnings - -from unitpackage.collection import Collection - -logger = logging.getLogger("unitpackage") - - -class CVCollection(Collection): - r""" - A collection of `frictionless Data Packages `__. - - Essentially this is just a list of data packages with some additional - convenience wrap for use in the `echemdb data repository `_ - displayed on the `echemdb website `_. - - EXAMPLES: - - An example collection:: - - >>> collection = Collection.create_example() - >>> collection.package.resource_names # doctest: +NORMALIZE_WHITESPACE - ['alves_2011_electrochemistry_6010_f1a_solid', - 'engstfeld_2018_polycrystalline_17743_f4b_1', - 'no_bibliography'] - - """ - - from unitpackage.cv.cv_entry import CVEntry - - Entry = CVEntry - - def __init__(self, *args, **kwargs): - warnings.warn( - f"{self.__class__.__name__} is deprecated. Loading the echemdb database has been moved to `echemdb.Echemdb` and will be removed or refactored in a future version.", - category=DeprecationWarning, - stacklevel=2, - ) - super().__init__(*args, **kwargs) - - def materials(self): - r""" - Return the substrate materials in the collection. - - EXAMPLES:: - - >>> collection = CVCollection.create_example() - >>> collection.materials() == {'Cu', 'Ru'} - True - - """ - # pylint: disable=R0801 - import pandas as pd - - return set( - pd.unique(pd.Series([entry.get_electrode("WE").material for entry in self])) - ) - - def describe(self): - r""" - Return some statistics about the collection. - - EXAMPLES:: - - >>> collection = CVCollection.create_example() - >>> collection.describe() == \ - ... {'number of references': 2, - ... 'number of entries': 3, - ... 'materials': {'Cu', 'Ru'}} - True - - """ - # pylint: disable=R0801 - return { - "number of references": ( - 0 - if isinstance(self.bibliography, str) - else len(self.bibliography.entries) - ), - "number of entries": len(self), - "materials": self.materials(), - } diff --git a/unitpackage/cv/cv_entry.py b/unitpackage/cv/cv_entry.py deleted file mode 100644 index 805b5bda..00000000 --- a/unitpackage/cv/cv_entry.py +++ /dev/null @@ -1,286 +0,0 @@ -r""" -A Data Package describing a Cyclic Voltammogram (CV) found in the field of electrochemistry. -It provides additional functionalities compared to the class :class:`~unitpackage.entry.Entry`. - -These are the individual elements of a :class:`~unitpackage.cv.cv_collection.CVCollection`. - -EXAMPLES: - -Create a collection from local `frictionless Data Packages `__ -in the `data/` directory:: - - >>> from unitpackage.cv.cv_collection import CVCollection - >>> collection = CVCollection.from_local('data/') - -We can directly access the material of an electrode used in the experiment, -such as the WE, CE or REF:: - - >>> from unitpackage.cv.cv_collection import CVCollection - >>> db = CVCollection.create_example() - >>> entry = db['alves_2011_electrochemistry_6010_f1a_solid'] - >>> entry.get_electrode('WE').material - 'Ru' - -The :meth:`~unitpackage.cv.cv_entry.CVEntry.plot` creates a typical representation of a CV, -where ``I`` or. ``j`` is plotted vs. ``U`` or. ``E``:: - - >>> entry.plot() - Figure(...) - -""" - -# ******************************************************************** -# This file is part of unitpackage. -# -# Copyright (C) 2021-2025 Albert Engstfeld -# Copyright (C) 2021 Johannes Hermann -# Copyright (C) 2021-2022 Julian Rüth -# Copyright (C) 2021 Nicolas Hörmann -# -# unitpackage is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# unitpackage is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with unitpackage. If not, see . -# ******************************************************************** -import logging -import warnings - -from unitpackage.entry import Entry - -logger = logging.getLogger("unitpackage") - - -class CVEntry(Entry): - r""" - A `frictionless Data Package `_ describing a CV. - - EXAMPLES: - - An entry can be created directly from a Data Package that has been created - with `svgdigitizer's `_ `cv` command. - However, entries are normally obtained by opening a :class:`~unitpackage.cv.cv_collection.CVCollection` of entries:: - - >>> from unitpackage.cv.cv_collection import CVCollection - >>> collection = CVCollection.create_example() - >>> entry = next(iter(collection)) - - """ - - def __init__(self, *args, **kwargs): - warnings.warn( - f"{self.__class__.__name__} is deprecated. Use `echemdb.echemdb_entry.EchemdbEntry` instead.", - category=DeprecationWarning, - stacklevel=2, - ) - super().__init__(*args, **kwargs) - - def __repr__(self): - r""" - Return a printable representation of this entry. - - EXAMPLES:: - - >>> entry = CVEntry.create_examples()[0] - >>> entry - CVEntry('alves_2011_electrochemistry_6010_f1a_solid') - - """ - return f"CVEntry({self.identifier!r})" - - def get_electrode(self, name): - r""" - Returns an electrode with the specified name. - - EXAMPLES:: - - >>> entry = CVEntry.create_examples()[0] - >>> entry.get_electrode('WE') # doctest: +NORMALIZE_WHITESPACE - {'name': 'WE', 'function': 'workingElectrode', 'type': 'single crystal', - 'crystallographicOrientation': '0001', 'material': 'Ru', - 'preparationProcedure': 'Sputtering and flash annealing under UHV - conditions with repeated cycles of oxygen adsorption and desorption.', - 'shape': {'height': {'unit': 'mm', 'value': 2}, 'type': 'hat shaped'}, - 'source': {'supplier': 'Mateck'}} - - TESTS:: - - >>> entry.get_electrode('foo') # doctest: +NORMALIZE_WHITESPACE - Traceback (most recent call last): - ... - KeyError: "Electrode with name 'foo' does not exist" - - """ - # pylint: disable=R0801 - for electrode in self.system.electrodes: - if electrode["name"] == name: - return electrode - - raise KeyError(f"Electrode with name '{name}' does not exist") - - def rescale(self, units): - r""" - Return a rescaled :class:`~unitpackage.cv.cv_entry.CVEntry` with axes in the specified ``units``. - - Usage is essentially the same as for :meth:`~unitpackage.entry.Entry.rescale`, i.e., - new units are expected as dict, where the key is the axis name and the value - the new unit, such as ``{'j': 'uA / cm2', 't': 'h'}``. - - Additionally, the entry can be rescaled to the axes' units of the original data. - These units must be defined in the metadata of the resource, - within the key ``figure_description.fields``:: - - >>> entry = CVEntry.create_examples()[0] - >>> rescaled_entry = entry.rescale(units='original') - >>> rescaled_entry.mutable_resource.schema.fields # doctest: +NORMALIZE_WHITESPACE - [{'name': 't', 'type': 'number', 'unit': 's'}, - {'name': 'E', 'type': 'number', 'unit': 'V', 'reference': 'RHE'}, - {'name': 'j', 'type': 'number', 'unit': 'mA / cm2'}] - - """ - # pylint: disable=R0801 - if units == "original": - units = { - field["name"]: field["unit"] for field in self.figureDescription.fields - } - - return super().rescale(units) - - def _normalize_field_name(self, field_name): - r""" - Return the name of a field name of the `unitpackage` resource. - - If 'j' is requested but is not present in the resource, - 'I' is returned instead. - - EXAMPLES:: - - >>> entry = CVEntry.create_examples()[0] - >>> entry._normalize_field_name('j') - 'j' - >>> entry._normalize_field_name('x') - Traceback (most recent call last): - ... - ValueError: No axis with name 'x' found. - - """ - # pylint: disable=R0801 - if field_name in self.mutable_resource.schema.field_names: - return field_name - if field_name == "j": - return self._normalize_field_name("I") - raise ValueError(f"No axis with name '{field_name}' found.") - - def thumbnail(self, width=96, height=72, dpi=72, **kwds): - r""" - Return a thumbnail of the entry's curve as a PNG byte stream. - - EXAMPLES:: - - >>> entry = CVEntry.create_examples()[0] - >>> thumb = entry.thumbnail() - >>> thumb.startswith(b'\x89PNG') # different python versions may produce different binary outputs using " or '. - True - - The PNG's ``width`` and ``height`` can be specified in pixels. - Additional keyword arguments are passed to the data frame plotting - method:: - - >>> thumb = entry.thumbnail(width=4, height=2, color='red', linewidth=2) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE - >>> thumb.startswith(b'\x89PNG') # different python versions may produce different binary outputs using " or '. - True - - """ - # pylint: disable=R0801 - kwds.setdefault("color", "b") - kwds.setdefault("linewidth", 1) - kwds.setdefault("legend", False) - - import matplotlib.pyplot - - # A reasonable DPI setting that should work for most screens is the default value of 72. - fig, axis = matplotlib.pyplot.subplots( - 1, 1, figsize=[width / dpi, height / dpi], dpi=dpi - ) - self.df.plot( - "E", - self._normalize_field_name("j"), - ax=axis, - **kwds, - ) - - matplotlib.pyplot.axis("off") - matplotlib.pyplot.close(fig) - - import io - - buffer = io.BytesIO() - fig.savefig(buffer, format="png", transparent=True, dpi=dpi) - - buffer.seek(0) - return buffer.read() - - def plot(self, x_label="E", y_label="j", name=None): - r""" - Return a plot of this entry. - The default plot is a Cyclic Voltammogram ('j vs E'). - When `j` is not present in the data, `I` is used instead. - - EXAMPLES:: - - >>> entry = CVEntry.create_examples()[0] - >>> entry.plot() - Figure(...) - - The plot can also be returned with custom axis dimensions (field names) available in the resource:: - - >>> entry.plot(x_label='t', y_label='E') - Figure(...) - - A plot resembling the original figure can be obtained by first rescaling:: - - >>> rescaled_entry = entry.rescale('original') - >>> rescaled_entry.plot() - Figure(...) - - """ - # pylint: disable=R0801 - x_label = self._normalize_field_name(x_label) - y_label = self._normalize_field_name(y_label) - - def figure_name(): - if ( - hasattr(self.resource, "source") - and hasattr(self.resource.source, "figure") - and hasattr(self.resource.source, "curve") - ): - return f"Fig. {self.source.figure}: {self.source.curve}" - - return self.identifier - - fig = super().plot(x_label=x_label, y_label=y_label, name=name or figure_name()) - - def reference(label): - if not label == "E": - return "" - field = self.mutable_resource.schema.get_field(label).to_dict() - if "reference" not in field: - return "" - return f" vs. {field['reference']}" - - def axis_label(label): - return f"{label} [{self.field_unit(label)}{reference(label)}]" - - fig.update_layout( - xaxis_title=axis_label(x_label), - yaxis_title=axis_label(y_label), - ) - - return fig diff --git a/unitpackage/database/echemdb.py b/unitpackage/database/echemdb.py index 38928a3b..99c09ec2 100644 --- a/unitpackage/database/echemdb.py +++ b/unitpackage/database/echemdb.py @@ -22,7 +22,7 @@ # ******************************************************************** # This file is part of unitpackage. # -# Copyright (C) 2021-2025 Albert Engstfeld +# Copyright (C) 2021-2026 Albert Engstfeld # Copyright (C) 2021 Johannes Hermann # Copyright (C) 2021-2022 Julian Rüth # Copyright (C) 2021 Nicolas Hörmann @@ -41,6 +41,7 @@ # along with unitpackage. If not, see . # ******************************************************************** import logging +from functools import cached_property from unitpackage.collection import Collection @@ -114,3 +115,58 @@ def describe(self): "number of entries": len(self), "materials": self.materials(), } + + @cached_property + def bibliography(self): + r""" + Return a pybtex database of all bibtex bibliography files, + associated with the entries. + + EXAMPLES:: + + >>> from unitpackage.database.echemdb import Echemdb + >>> collection = Echemdb.create_example() + >>> collection.bibliography + BibliographyData( + entries=OrderedCaseInsensitiveDict([ + ('alves_2011_electrochemistry_6010', Entry('article', + ... + ('engstfeld_2018_polycrystalline_17743', Entry('article', + ... + + A derived collection includes only the bibliographic entries of the remaining entries:: + + >>> collection.filter(lambda entry: entry.source.citationKey != 'alves_2011_electrochemistry_6010').bibliography + BibliographyData( + entries=OrderedCaseInsensitiveDict([ + ('engstfeld_2018_polycrystalline_17743', Entry('article', + ... + + A collection with entries without bibliography:: + + >>> collection = Echemdb.create_example()["no_bibliography"] + >>> collection.bibliography + '' + + """ + from pybtex.database import BibliographyData + + bib_data = BibliographyData( + { + entry.bibliography.key: entry.bibliography + for entry in self + if entry.bibliography + } + ) + + if isinstance(bib_data, str): + return bib_data + + # Remove duplicates from the bibliography + bib_data_ = BibliographyData() + + for key, entry in bib_data.entries.items(): + if key not in bib_data_.entries: + bib_data_.add_entry(key, entry) + + return bib_data_ diff --git a/unitpackage/database/echemdb_entry.py b/unitpackage/database/echemdb_entry.py index d08d02df..e4ec0610 100644 --- a/unitpackage/database/echemdb_entry.py +++ b/unitpackage/database/echemdb_entry.py @@ -21,12 +21,32 @@ >>> entry.plot() Figure(...) + Data Entries containing published data, + also contain information on the source of the data.:: + + >>> from unitpackage.database.echemdb import Echemdb + >>> db = Echemdb.create_example() + >>> entry = db['alves_2011_electrochemistry_6010_f1a_solid'] + >>> entry.bibliography # doctest: +NORMALIZE_WHITESPACE +REMOTE_DATA + Entry('article', + fields=[ + ('title', 'Electrochemistry at Ru(0001) in a flowing CO-saturated electrolyte—reactive and inert adlayer phases'), + ('journal', 'Physical Chemistry Chemical Physics'), + ('volume', '13'), + ('number', '13'), + ('pages', '6010--6021'), + ('year', '2011'), + ('publisher', 'Royal Society of Chemistry'), + ('abstract', 'We investigated ...')], + persons={'author': [Person('Alves, Otavio B'), Person('Hoster, Harry E'), Person('Behm, Rolf J{\\"u}rgen')]}) + + """ # ******************************************************************** # This file is part of unitpackage. # -# Copyright (C) 2021-2025 Albert Engstfeld +# Copyright (C) 2021-2026 Albert Engstfeld # Copyright (C) 2021 Johannes Hermann # Copyright (C) 2021-2022 Julian Rüth # Copyright (C) 2021 Nicolas Hörmann @@ -66,6 +86,9 @@ class EchemdbEntry(Entry): """ + default_metadata_key = "echemdb" + """Use 'echemdb' key to access descriptor metadata.""" + def __repr__(self): r""" Return a printable representation of this entry. @@ -79,6 +102,108 @@ def __repr__(self): """ return f"Echemdb({self.identifier!r})" + @property + def bibliography(self): + r""" + Return a pybtex bibliography object associated with this entry. + + EXAMPLES:: + + >>> entry = EchemdbEntry.create_examples()[0] + >>> entry.bibliography # doctest: +NORMALIZE_WHITESPACE + Entry('article', + fields=[ + ('title', ... + ... + + >>> entry_no_bib = EchemdbEntry.create_examples(name="no_bibliography")[0] + >>> entry_no_bib.bibliography + '' + + """ + metadata = self._default_metadata.setdefault("source", {}) + citation = metadata.setdefault("bibdata", "") + + if not citation: + logger.warning(f"Entry with name {self.identifier} has no bibliography.") + return citation + + from pybtex.database import parse_string + + bibliography = parse_string(citation, "bibtex") + return bibliography.entries[self.source.citationKey] + + def citation(self, backend="text"): + r""" + Return a formatted reference for the entry's bibliography such as: + + J. Doe, et al., Journal Name, volume (YEAR) page, "Title" + + Rendering default is plain text 'text', but can be changed to any format + supported by pybtex, such as markdown 'md', 'latex' or 'html'. + + EXAMPLES:: + + >>> entry = EchemdbEntry.create_examples()[0] + >>> entry.citation(backend='text') + 'O. B. Alves et al. Electrochemistry at Ru(0001) in a flowing CO-saturated electrolyte—reactive and inert adlayer phases. Physical Chemistry Chemical Physics, 13(13):6010–6021, 2011.' + >>> print(entry.citation(backend='md')) + O\. B\. Alves *et al\.* + *Electrochemistry at Ru\(0001\) in a flowing CO\-saturated electrolyte—reactive and inert adlayer phases*\. + *Physical Chemistry Chemical Physics*, 13\(13\):6010–6021, 2011\. + + """ + from pybtex.style.formatting.unsrt import Style + + # TODO:: Remove `class EchemdbStyle` from citation and improve citation style. (see #104) + class EchemdbStyle(Style): + r""" + A citation style for the echemdb website. + """ + + def format_names(self, role, as_sentence=True): + from pybtex.style.template import node + + @node + def names(_, context, role): + persons = context["entry"].persons[role] + style = context["style"] + + names = [ + style.format_name(person, style.abbreviate_names) + for person in persons + ] + + if len(names) == 1: + return names[0].format_data(context) + + from pybtex.style.template import tag, words + + # pylint: disable=no-value-for-parameter + return words(sep=" ")[names[0], tag("i")["et al."]].format_data( + context + ) + + # pylint: disable=no-value-for-parameter + names = names(role) + + from pybtex.style.template import sentence + + return sentence[names] if as_sentence else names + + def format_title(self, e, which_field, as_sentence=True): + from pybtex.style.template import field, sentence, tag + + # pylint: disable=no-value-for-parameter + title = tag("i")[field(which_field)] + return sentence[title] if as_sentence else title + + return ( + EchemdbStyle(abbreviate_names=True) + .format_entry("unused", self.bibliography) + .text.render_as(backend) + ) + def get_electrode(self, name): r""" Returns an electrode with the specified name. diff --git a/unitpackage/descriptor.py b/unitpackage/descriptor.py index f1c0345a..7dc10924 100644 --- a/unitpackage/descriptor.py +++ b/unitpackage/descriptor.py @@ -45,7 +45,7 @@ # ******************************************************************** # This file is part of unitpackage. # -# Copyright (C) 2021-2023 Albert Engstfeld +# Copyright (C) 2021-2026 Albert Engstfeld # Copyright (C) 2021 Johannes Hermann # Copyright (C) 2021 Julian Rüth # Copyright (C) 2021 Nicolas Hörmann @@ -171,7 +171,7 @@ class QuantityDescriptor(GenericDescriptor): >>> from unitpackage.entry import Entry >>> entry = Entry.create_examples()[0] - >>> temperature = entry.system.electrolyte.temperature + >>> temperature = entry.echemdb.system.electrolyte.temperature >>> temperature 298.15 K @@ -188,7 +188,7 @@ def quantity(self): >>> from unitpackage.entry import Entry >>> entry = Entry.create_examples()[0] - >>> temperature = entry.system.electrolyte.temperature + >>> temperature = entry.echemdb.system.electrolyte.temperature >>> temperature.quantity @@ -205,7 +205,7 @@ def __repr__(self): >>> from unitpackage.entry import Entry >>> entry = Entry.create_examples()[0] - >>> temperature = entry.system.electrolyte.temperature + >>> temperature = entry.echemdb.system.electrolyte.temperature >>> temperature 298.15 K diff --git a/unitpackage/entry.py b/unitpackage/entry.py index 5ddfca10..569b6661 100644 --- a/unitpackage/entry.py +++ b/unitpackage/entry.py @@ -14,8 +14,9 @@ Metadata included in an entry is accessible as an attribute:: + >>> from unitpackage.entry import Entry >>> entry = Entry.create_examples()[0] - >>> entry.source # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + >>> entry.echemdb.source # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE {'citationKey': 'alves_2011_electrochemistry_6010', 'url': 'https://doi.org/10.1039/C0CP01001D', 'figure': '1a', @@ -31,31 +32,34 @@ 1 0.020000 -0.102158 -0.981762 ... -Data Entries containing published data, -also contain information on the source of the data.:: - - >>> from unitpackage.collection import Collection - >>> db = Collection.create_example() - >>> entry = db['alves_2011_electrochemistry_6010_f1a_solid'] - >>> entry.bibliography # doctest: +NORMALIZE_WHITESPACE +REMOTE_DATA - Entry('article', - fields=[ - ('title', 'Electrochemistry at Ru(0001) in a flowing CO-saturated electrolyte—reactive and inert adlayer phases'), - ('journal', 'Physical Chemistry Chemical Physics'), - ('volume', '13'), - ('number', '13'), - ('pages', '6010--6021'), - ('year', '2011'), - ('publisher', 'Royal Society of Chemistry'), - ('abstract', 'We investigated ...')], - persons={'author': [Person('Alves, Otavio B'), Person('Hoster, Harry E'), Person('Behm, Rolf J{\\"u}rgen')]}) +Entries can be created from from various sources, such as csv files or pandas dataframes:: + + >>> entry = Entry.from_csv(csvname='examples/from_csv/from_csv.csv') + >>> entry + Entry('from_csv') + +Information on the fields such as units can be updated:: + + >>> fields = [{'name':'E', 'unit': 'mV'}, {'name':'I', 'unit': 'A'}] + >>> entry = entry.update_fields(fields=fields) + >>> entry.mutable_resource.schema.fields # doctest: +NORMALIZE_WHITESPACE + [{'name': 'E', 'type': 'integer', 'unit': 'mV'}, + {'name': 'I', 'type': 'integer', 'unit': 'A'}] + +Metadata to the resource can be updated in-place:: + + >>> metadata = {'echemdb': {'source': {'citationKey': 'new_key'}}} + >>> entry.metadata.from_dict(metadata) + >>> entry.metadata + {'echemdb': {'source': {'citationKey': 'new_key'}}} + """ # ******************************************************************** # This file is part of unitpackage. # -# Copyright (C) 2021-2025 Albert Engstfeld +# Copyright (C) 2021-2026 Albert Engstfeld # Copyright (C) 2021 Johannes Hermann # Copyright (C) 2021-2022 Julian Rüth # Copyright (C) 2021 Nicolas Hörmann @@ -77,6 +81,7 @@ import os.path from unitpackage.descriptor import Descriptor +from unitpackage.metadata import MetadataDescriptor logger = logging.getLogger("unitpackage") @@ -115,9 +120,101 @@ class Entry: """ + default_metadata_key = "" + """Default metadata key to use when accessing the descriptor. + If empty string, the entire metadata dict is used. Subclasses can override this.""" + def __init__(self, resource): self.resource = resource + @property + def metadata(self): + r""" + Access and manage entry metadata. + + Returns a MetadataDescriptor that supports both dict and attribute-style access. + Allows loading metadata from various sources. Modifications are applied in-place. + + EXAMPLES:: + + >>> entry = Entry.create_examples()[0] + >>> entry.metadata['echemdb']['source']['citationKey'] + 'alves_2011_electrochemistry_6010' + + >>> entry.metadata.echemdb['source']['citationKey'] + 'alves_2011_electrochemistry_6010' + + Load metadata from a dict:: + + >>> new_entry = Entry.create_examples()[0] + >>> new_entry.metadata.from_dict({'echemdb': {'test': 'data'}}) + >>> new_entry.metadata['echemdb']['test'] + 'data' + + """ + return MetadataDescriptor(self) + + def load_metadata(self, filename, file_format=None, key=None): + r""" + Load metadata from a file and return self for method chaining. + + The file format is auto-detected from the extension if not specified. + Supported formats are 'yaml' and 'json'. + + EXAMPLES: + + Load metadata from a YAML file:: + + >>> import os + >>> import tempfile + >>> import yaml + >>> entry = Entry.create_examples()[0] + >>> with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f: + ... yaml.dump({'source': {'citationKey': 'chain_test'}}, f) + ... temp_path = f.name + >>> entry.load_metadata(temp_path, key='echemdb').metadata.echemdb.source.citationKey + 'chain_test' + >>> os.unlink(temp_path) + + Load metadata from a JSON file with auto-detection:: + + >>> import os + >>> import json + >>> import tempfile + >>> entry = Entry.create_examples()[0] + >>> with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: + ... json.dump({'custom': {'data': 'value'}}, f) + ... temp_path = f.name + >>> entry.load_metadata(temp_path).metadata.custom.data + 'value' + >>> os.unlink(temp_path) + + + """ + # Auto-detect format from file extension if not specified + if file_format is None: + if filename.endswith(".yaml") or filename.endswith(".yml"): + file_format = "yaml" + elif filename.endswith(".json"): + file_format = "json" + else: + raise ValueError( + f"Cannot auto-detect format for '{filename}'. " + "Please specify file_format='yaml' or file_format='json'" + ) + + # Load metadata using the appropriate method + if file_format == "yaml": + self.metadata.from_yaml(filename, key=key) + elif file_format == "json": + self.metadata.from_json(filename, key=key) + else: + raise ValueError( + f"Unsupported format '{file_format}'. Use 'yaml' or 'json'" + ) + + return self + @property def identifier(self): r""" @@ -142,10 +239,10 @@ def __dir__(self): >>> entry = Entry.create_examples()[0] >>> dir(entry) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE - [... 'bibliography', 'citation', 'create_examples', 'curation', - 'dataDescription', 'df', 'experimental', 'field_unit', 'figureDescription', - 'from_csv', 'from_df', 'from_local', 'identifier', 'mutable_resource', 'plot', - 'rename_fields', 'rescale', 'resource', 'save', 'source', 'system', 'yaml'] + [... 'create_examples', 'default_metadata_key', 'df', 'echemdb', 'field_unit', + 'from_csv', 'from_df', 'from_local', 'identifier', 'load_metadata', + 'metadata', 'mutable_resource', 'plot', 'rename_fields', 'rescale', + 'resource', 'save', 'update_fields', 'yaml'] """ return list(set(dir(self._descriptor) + object.__dir__(self))) @@ -157,7 +254,7 @@ def __getattr__(self, name): EXAMPLES:: >>> entry = Entry.create_examples()[0] - >>> entry.source # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + >>> entry.echemdb.source # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE {'citationKey': 'alves_2011_electrochemistry_6010', 'url': 'https://doi.org/10.1039/C0CP01001D', 'figure': '1a', @@ -166,7 +263,7 @@ def __getattr__(self, name): The returned descriptor can again be accessed in the same way:: - >>> entry.system.electrolyte.components[0].name + >>> entry.echemdb.system.electrolyte.components[0].name 'H2O' """ @@ -179,7 +276,7 @@ def __getitem__(self, name): EXAMPLES:: >>> entry = Entry.create_examples()[0] - >>> entry["source"] # doctest: +NORMALIZE_WHITESPACE + >>> entry["echemdb"]["source"] # doctest: +NORMALIZE_WHITESPACE {'citationKey': 'alves_2011_electrochemistry_6010', 'url': 'https://doi.org/10.1039/C0CP01001D', 'figure': '1a', @@ -191,123 +288,70 @@ def __getitem__(self, name): @property def _descriptor(self): - return Descriptor(self.resource.custom["metadata"]["echemdb"]) - - @property - def _metadata(self): r""" - Returns the metadata named "echemdb" associated with this entry. + Return a Descriptor object wrapping the entry's metadata. - EXAMPLES:: + The metadata structure depends on the :attr:`default_metadata_key` class attribute: - >>> entry = Entry.create_examples()[0] - >>> entry._metadata # doctest: +NORMALIZE_WHITESPACE - {...'source': {'citationKey': 'alves_2011_electrochemistry_6010',...} + - If ``default_metadata_key`` is an empty string (default in :class:`Entry`), + the entire ``metadata`` dict is wrapped as the descriptor. + - If ``default_metadata_key`` is set to a non-empty string (e.g., "echemdb" in subclasses), + the descriptor wraps only the metadata under that specific key. - """ - return self.resource.custom["metadata"]["echemdb"] - - @property - def bibliography(self): - r""" - Return a pybtex bibliography object associated with this entry. + This allows subclasses to work with different metadata structures while maintaining + a consistent interface through the Descriptor class. EXAMPLES:: >>> entry = Entry.create_examples()[0] - >>> entry.bibliography # doctest: +NORMALIZE_WHITESPACE - Entry('article', - fields=[ - ('title', ... - ... + >>> entry._descriptor # doctest: +ELLIPSIS + {'echemdb': ...} - >>> entry_no_bib = Entry.create_examples(name="no_bibliography")[0] - >>> entry_no_bib.bibliography - '' - - """ - metadata = self._metadata.setdefault("source", {}) - citation = metadata.setdefault("bibdata", "") + >>> entry.echemdb.source.citationKey + 'alves_2011_electrochemistry_6010' - if not citation: - logger.warning(f"Entry with name {self.identifier} has no bibliography.") - return citation - from pybtex.database import parse_string - - bibliography = parse_string(citation, "bibtex") - return bibliography.entries[self.source.citationKey] + """ + return Descriptor(self._default_metadata) - def citation(self, backend="text"): + @property + def _metadata(self): r""" - Return a formatted reference for the entry's bibliography such as: + Returns the metadata associated with this entry. - J. Doe, et al., Journal Name, volume (YEAR) page, "Title" - - Rendering default is plain text 'text', but can be changed to any format - supported by pybtex, such as markdown 'md', 'latex' or 'html'. + The metadata may contain keys which nest entire metadata schemas (e.g., "echemdb", "myExperiment", etc.). + Use :attr:`_default_metadata` to access the subset determined by :attr:`default_metadata_key`. EXAMPLES:: >>> entry = Entry.create_examples()[0] - >>> entry.citation(backend='text') - 'O. B. Alves et al. Electrochemistry at Ru(0001) in a flowing CO-saturated electrolyte—reactive and inert adlayer phases. Physical Chemistry Chemical Physics, 13(13):6010–6021, 2011.' - >>> print(entry.citation(backend='md')) - O\. B\. Alves *et al\.* - *Electrochemistry at Ru\(0001\) in a flowing CO\-saturated electrolyte—reactive and inert adlayer phases*\. - *Physical Chemistry Chemical Physics*, 13\(13\):6010–6021, 2011\. + >>> entry._metadata # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + {...'echemdb': {...'source': {'citationKey': 'alves_2011_electrochemistry_6010',...}...} """ - from pybtex.style.formatting.unsrt import Style - - # TODO:: Remove `class EchemdbStyle` from citation and improve citation style. (see #104) - class EchemdbStyle(Style): - r""" - A citation style for the echemdb website. - """ - - def format_names(self, role, as_sentence=True): - from pybtex.style.template import node - - @node - def names(_, context, role): - persons = context["entry"].persons[role] - style = context["style"] - - names = [ - style.format_name(person, style.abbreviate_names) - for person in persons - ] + return self.resource.custom.setdefault("metadata", {}) - if len(names) == 1: - return names[0].format_data(context) - - from pybtex.style.template import tag, words - - # pylint: disable=no-value-for-parameter - return words(sep=" ")[names[0], tag("i")["et al."]].format_data( - context - ) - - # pylint: disable=no-value-for-parameter - names = names(role) + @property + def _default_metadata(self): + r""" + Returns the metadata subset based on :attr:`default_metadata_key`. - from pybtex.style.template import sentence + If :attr:`default_metadata_key` is empty, returns the entire metadata dict. + Otherwise, returns the metadata under the specified key. - return sentence[names] if as_sentence else names + This is useful for subclasses that want to work with a specific metadata structure. - def format_title(self, e, which_field, as_sentence=True): - from pybtex.style.template import field, sentence, tag + EXAMPLES:: - # pylint: disable=no-value-for-parameter - title = tag("i")[field(which_field)] - return sentence[title] if as_sentence else title + >>> entry = Entry.create_examples()[0] + >>> entry._default_metadata # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + {...'echemdb': {...'source': {'citationKey': 'alves_2011_electrochemistry_6010',...}...} - return ( - EchemdbStyle(abbreviate_names=True) - .format_entry("unused", self.bibliography) - .text.render_as(backend) - ) + """ + metadata = self._metadata + if self.default_metadata_key and self.default_metadata_key in metadata: + return metadata[self.default_metadata_key] + return metadata def field_unit(self, field_name): r""" @@ -503,7 +547,7 @@ def add_offset(self, field_name=None, offset=None, unit=""): @property def mutable_resource(self): r""" - Return the data of this entry's "MutableResource" as a data frame. + Return the entry's "MutableResource". EXAMPLES:: @@ -524,11 +568,24 @@ def mutable_resource(self): self.resource.custom.setdefault("MutableResource", "") if not self.resource.custom["MutableResource"]: - from frictionless import Schema + if self.resource.format not in ["csv", "pandas"]: + raise ValueError( + "MutableResource can only be created from resources of format 'csv' or 'pandas'." + ) + + if self.resource.format == "csv": - from unitpackage.local import create_df_resource + from unitpackage.local import create_df_resource_from_tabular_resource + + self.resource.custom["MutableResource"] = ( + create_df_resource_from_tabular_resource(self.resource) + ) + + elif self.resource.format == "pandas": + self.resource.custom["MutableResource"] = self.resource + + from frictionless import Schema - self.resource.custom["MutableResource"] = create_df_resource(self.resource) self.resource.custom["MutableResource"].schema = Schema.from_descriptor( self.resource.schema.to_dict() ) @@ -556,6 +613,18 @@ def df(self): {'name': 'E', 'type': 'number', 'unit': 'V', 'reference': 'RHE'}, {'name': 'j', 'type': 'number', 'unit': 'A / m2'}] + TESTS:: + + >>> import pandas as pd + >>> from unitpackage.entry import Entry + >>> df = pd.DataFrame({'x':[1,2,3], 'y':[2,3,4]}) + >>> entry = Entry.from_df(df=df, basename='test_df') + >>> entry.df + x y + 0 1 2 + 1 2 3 + 2 3 4 + """ return self.mutable_resource.data @@ -590,6 +659,13 @@ def add_columns(self, df, new_fields): >>> new_entry.field_unit('P/A') Unit("A V / m2") + TESTS: + + Validate that the identifier is preserved:: + + >>> new_entry.identifier + 'alves_2011_electrochemistry_6010_f1a_solid' + """ import pandas as pd @@ -598,9 +674,10 @@ def add_columns(self, df, new_fields): fields = [field.to_dict() for field in self.mutable_resource.schema.fields] fields.extend(new_fields) - return self.from_df( - df=df_, metadata=self._metadata, basename=self.identifier, fields=fields - ) + entry = self.from_df(df=df_, basename=self.identifier).update_fields(fields) + entry.metadata.from_dict(self._metadata) + + return entry def __repr__(self): r""" @@ -709,18 +786,73 @@ def plot(self, x_label=None, y_label=None, name=None): return fig - @classmethod - def from_csv(cls, csvname, metadata=None, fields=None): + def update_fields(self, fields): r""" - Returns an entry constructed from a CSV with a single header line. + Return a new entry with updated fields in the MutableResource. - EXAMPLES: + The :param fields: list must must be structured such as + `[{'name':'E', 'unit': 'mV'}, {'name':'T', 'unit': 'K'}]`. - Units describing the fields can be provided:: + EXAMPLES:: - >>> import os - >>> fields = [{'name':'E', 'unit': 'mV'}, {'name':'I', 'unit': 'A'}] - >>> entry = Entry.from_csv(csvname='examples/from_csv/from_csv.csv', fields=fields) + >>> from unitpackage.entry import Entry + >>> entry = Entry.create_examples()[0] + >>> entry.mutable_resource.schema.fields # doctest: +NORMALIZE_WHITESPACE + [{'name': 't', 'type': 'number', 'unit': 's'}, + {'name': 'E', 'type': 'number', 'unit': 'V', 'reference': 'RHE'}, + {'name': 'j', 'type': 'number', 'unit': 'A / m2'}] + + Updating the fields returns the same entry with updated field metadata:: + + >>> fields = [{'name':'E', 'unit': 'mV'}, + ... {'name':'j', 'unit': 'uA / cm2'}, + ... {'name':'x', 'unit': 'm'}] + >>> entry.update_fields(fields) + Entry('alves_2011_electrochemistry_6010_f1a_solid') + + >>> entry.mutable_resource.schema.fields # doctest: +NORMALIZE_WHITESPACE + [{'name': 't', 'type': 'number', 'unit': 's'}, + {'name': 'E', 'type': 'number', 'unit': 'mV', 'reference': 'RHE'}, + {'name': 'j', 'type': 'number', 'unit': 'uA / cm2'}] + + >>> new_entry = entry.update_fields(fields) + >>> new_entry.mutable_resource.schema.fields # doctest: +NORMALIZE_WHITESPACE + [{'name': 't', 'type': 'number', 'unit': 's'}, + {'name': 'E', 'type': 'number', 'unit': 'mV', 'reference': 'RHE'}, + {'name': 'j', 'type': 'number', 'unit': 'uA / cm2'}] + + """ + from unitpackage.local import update_fields + + updated_fields = update_fields( + self.mutable_resource.schema.to_dict()["fields"], fields + ) + + from frictionless import Schema + + original_schema = self.mutable_resource.schema.to_dict() + original_schema["fields"] = updated_fields + self.mutable_resource.schema = Schema.from_descriptor(original_schema) + + return self + + @classmethod + def from_csv( + cls, + csvname, + encoding=None, + header_lines=None, + column_header_lines=None, + decimal=None, + delimiters=None, + ): + r""" + Returns an entry constructed from a CSV. + + EXAMPLES:: + + >>> from unitpackage.entry import Entry + >>> entry = Entry.from_csv(csvname='examples/from_csv/from_csv.csv') >>> entry Entry('from_csv') @@ -728,23 +860,12 @@ def from_csv(cls, csvname, metadata=None, fields=None): {'name': 'from_csv', ... - Metadata can be appended:: - - >>> import os - >>> fields = [{'name':'E', 'unit': 'mV'}, {'name':'I', 'unit': 'A'}] - >>> metadata = {'user':'Max Doe'} - >>> entry = Entry.from_csv(csvname='examples/from_csv/from_csv.csv', metadata=metadata, fields=fields) - >>> entry.user - 'Max Doe' - .. important:: Upper case filenames are converted to lower case entry identifiers! A filename containing upper case characters:: - >>> import os - >>> fields = [{'name':'E', 'unit': 'mV'}, {'name':'I', 'unit': 'A'}] - >>> entry = Entry.from_csv(csvname='examples/from_csv/UpperCase.csv', fields=fields) + >>> entry = Entry.from_csv(csvname='examples/from_csv/UpperCase.csv') >>> entry Entry('uppercase') @@ -756,12 +877,40 @@ def from_csv(cls, csvname, metadata=None, fields=None): 'path': 'UpperCase.csv', ... + CSV with a more complex structure, such as multiple header lines can be constructed:: + + >>> filename = 'examples/from_csv/from_csv_multiple_headers.csv' + >>> entry = Entry.from_csv(csvname='examples/from_csv/from_csv_multiple_headers.csv', column_header_lines=2) + >>> entry.resource # doctest: +NORMALIZE_WHITESPACE + {'name': 'from_csv_multiple_headers', + 'type': 'table', + 'data': [], + 'format': 'pandas', + 'mediatype': 'application/pandas', + 'schema': {'fields': [{'name': 'E / V', 'type': 'integer'}, + {'name': 'j / A / cm2', 'type': 'integer'}]}} + """ - from unitpackage.local import create_unitpackage + from unitpackage.local import create_tabular_resource_from_csv + + # pylint: disable=duplicate-code + resource = create_tabular_resource_from_csv( + csvname=csvname, + encoding=encoding, + header_lines=header_lines, + column_header_lines=column_header_lines, + decimal=decimal, + delimiters=delimiters, + ) - package = create_unitpackage(csvname=csvname, metadata=metadata, fields=fields) + from pathlib import Path - return cls(resource=package.resources[0]) + if resource.name == "memory": + resource.name = Path( + csvname + ).stem.lower() # Use stem (filename without extension) + + return cls(resource) @classmethod def _modify_fields(cls, original, alternative, keep_original_name_as=None): @@ -891,9 +1040,13 @@ def from_local(cls, filename): return cls(resource=package.resources[0]) @classmethod - def from_df(cls, df, metadata=None, fields=None, outdir=None, *, basename): + def from_df(cls, df, *, basename): r""" Returns an entry constructed from a pandas dataframe. + A name `basename` for the entry must be provided. + The name must be lower-case and contain only alphanumeric + characters along with `.` , `_` or `-` characters'. + (Upper case characters are converted to lower case.) EXAMPLES:: @@ -909,9 +1062,10 @@ def from_df(cls, df, metadata=None, fields=None, outdir=None, *, basename): >>> import os >>> fields = [{'name':'x', 'unit': 'm'}, {'name':'P', 'unit': 'um'}, {'name':'E', 'unit': 'V'}] >>> metadata = {'user':'Max Doe'} - >>> entry = Entry.from_df(df=df, basename='test_df', metadata=metadata, fields=fields) - >>> entry.user - 'Max Doe' + >>> entry = Entry.from_df(df=df, basename='test_df').update_fields(fields=fields) + >>> entry.metadata.from_dict(metadata) + >>> entry.metadata + {'user': 'Max Doe'} Save the entry:: @@ -935,27 +1089,17 @@ def from_df(cls, df, metadata=None, fields=None, outdir=None, *, basename): Verify that all fields are properly created even when they are not specified as fields:: >>> fields = [{'name':'x', 'unit': 'm'}, {'name':'P', 'unit': 'um'}, {'name':'E', 'unit': 'V'}] - >>> metadata = {'user':'Max Doe'} - >>> entry = Entry.from_df(df=df, basename='test_df', metadata=metadata, fields=fields) + >>> entry = Entry.from_df(df=df, basename='test_df').update_fields(fields=fields) >>> entry.resource.schema.fields [{'name': 'x', 'type': 'integer', 'unit': 'm'}, {'name': 'y', 'type': 'integer'}] """ - if outdir is None: - import atexit - import shutil - import tempfile - - outdir = tempfile.mkdtemp() - atexit.register(shutil.rmtree, outdir) + from unitpackage.local import create_df_resource_from_df - csvname = basename + ".csv" + resource = create_df_resource_from_df(df) + resource.name = basename.lower() - df.to_csv(os.path.join(outdir, csvname), index=False) - - return cls.from_csv( - os.path.join(outdir, csvname), metadata=metadata, fields=fields - ) + return cls(resource) def save(self, *, outdir, basename=None): r""" @@ -1018,7 +1162,8 @@ def save(self, *, outdir, basename=None): >>> from unitpackage.entry import Entry >>> df = pd.DataFrame({'x':[1,2,3], 'y':[2,3,4]}) >>> basename = 'save_datetime' - >>> entry = Entry.from_df(df=df, basename=basename, metadata={'currentTime':datetime.now()}) + >>> entry = Entry.from_df(df=df, basename=basename) + >>> entry.metadata.from_dict({'currentTime':datetime.now()}) >>> entry.save(outdir='./test/generated') >>> os.path.exists(f'test/generated/{basename}.json') and os.path.exists(f'test/generated/{basename}.csv') True @@ -1028,6 +1173,7 @@ def save(self, *, outdir, basename=None): os.makedirs(outdir) basename = basename or self.identifier + basename = basename.lower() csv_name = os.path.join(outdir, basename + ".csv") json_name = os.path.join(outdir, basename + ".json") @@ -1038,6 +1184,13 @@ def save(self, *, outdir, basename=None): self.resource.path = basename + ".csv" self.resource.name = basename + # convert a pandas resource into a csv resource + if self.resource.format == "pandas": + self.resource.format = "csv" + self.resource.mediatype = "text/csv" + if hasattr(self.resource, "data"): + del self.resource.data + resource = self.resource.to_dict() # update the fields from the main resource with those from the "MutableResource"resource diff --git a/unitpackage/entrypoint.py b/unitpackage/entrypoint.py index 1d68f68f..7d2df5c2 100644 --- a/unitpackage/entrypoint.py +++ b/unitpackage/entrypoint.py @@ -114,9 +114,11 @@ def convert(csv, device, outdir, metadata): with open(csv, "r") as file: # pylint: disable=unspecified-encoding loader = BaseLoader(file) - entry = Entry.from_df( - df=loader.df, basename=Path(csv).stem, metadata=metadata, fields=fields - ) + entry = Entry.from_df(df=loader.df, basename=Path(csv).stem) + if fields: + entry = entry.update_fields(fields=fields) + if metadata: + entry.metadata.from_dict(metadata) entry.save(outdir=outdir) diff --git a/unitpackage/loaders/baseloader.py b/unitpackage/loaders/baseloader.py index e5dd4b6c..37f0e5bb 100644 --- a/unitpackage/loaders/baseloader.py +++ b/unitpackage/loaders/baseloader.py @@ -422,6 +422,20 @@ def df(self): 0 0 0 1 1 1 + A file with two column header lines, which is sometimes, for example, + used for storing units to the values:: + + >>> from io import StringIO + >>> file = StringIO(r'''a,b + ... m,s + ... 0,0 + ... 1,1''') + >>> csv = BaseLoader(file, column_header_lines=2) + >>> csv.df + a / m b / s + 0 0 0 + 1 1 1 + """ import pandas as pd diff --git a/unitpackage/local.py b/unitpackage/local.py index 9f05eaa9..d3ba1abe 100644 --- a/unitpackage/local.py +++ b/unitpackage/local.py @@ -6,7 +6,7 @@ # ******************************************************************** # This file is part of unitpackage. # -# Copyright (C) 2021-2025 Albert Engstfeld +# Copyright (C) 2021-2026 Albert Engstfeld # Copyright (C) 2021 Johannes Hermann # Copyright (C) 2021 Julian Rüth # Copyright (C) 2021 Nicolas Hörmann @@ -36,15 +36,155 @@ logger = logging.getLogger("unitpackage") -def create_df_resource(resource): +def create_tabular_resource_from_csv( + csvname, + encoding=None, + header_lines=None, + column_header_lines=None, + decimal=None, + delimiters=None, +): + r""" + Return a resource built from a provided CSV. + + EXAMPLES:: + + For standard CSV files (single header line and subsequent + lines with data, using `.` as decimal separator.) + a tabular data resource is created:: + + >>> filename = './examples/from_csv/from_csv.csv' + >>> resource = create_tabular_resource_from_csv(filename) + >>> resource # doctest: +NORMALIZE_WHITESPACE + {'name': 'from_csv', + 'type': 'table', + 'path': 'from_csv.csv', + 'scheme': 'file', + 'format': 'csv', + 'mediatype': 'text/csv', ... + + For CSV files with a more complex structure (header, multiple column header lines, or other separators) + a pandas dataframe resource is created instead:: + + >>> filename = 'examples/from_csv/from_csv_multiple_headers.csv' + >>> resource = create_tabular_resource_from_csv(csvname=filename, column_header_lines=2) + >>> resource # doctest: +NORMALIZE_WHITESPACE + {'name': 'memory', + 'type': 'table', + 'data': [], + 'format': 'pandas', + 'mediatype': 'application/pandas', + 'schema': {'fields': [{'name': 'E / V', 'type': 'integer'}, + {'name': 'j / A / cm2', 'type': 'integer'}]}} + + + """ + csv_basename = os.path.basename(csvname) + + if not header_lines and not column_header_lines and not decimal and not delimiters: + resource = Resource( + path=csv_basename, + basepath=os.path.dirname(csvname) or ".", + ) + resource.infer() + return resource + + # pylint: disable=duplicate-code + return create_df_resource_from_csv( + csvname, + encoding=encoding, + header_lines=header_lines, + column_header_lines=column_header_lines, + decimal=decimal, + delimiters=delimiters, + ) + + +def create_df_resource_from_csv( + csvname, + encoding=None, + header_lines=None, + column_header_lines=None, + decimal=None, + delimiters=None, +): + r""" + Create a pandas dataframe resource from a CSV file. + + EXAMPLES:: + + >>> from unitpackage.local import create_df_resource_from_csv + >>> filename = 'examples/from_csv/from_csv_multiple_headers.csv' + >>> resource = create_df_resource_from_csv(csvname='examples/from_csv/from_csv_multiple_headers.csv', column_header_lines=2) + >>> resource # doctest: +NORMALIZE_WHITESPACE + {'name': 'memory', + 'type': 'table', + 'data': [], + 'format': 'pandas', + 'mediatype': 'application/pandas', + 'schema': {'fields': [{'name': 'E / V', 'type': 'integer'}, + {'name': 'j / A / cm2', 'type': 'integer'}]}} + + """ + + from unitpackage.loaders.baseloader import BaseLoader + + with open(csvname, "r", encoding=encoding or "utf-8") as f: + csv = BaseLoader( + f, + header_lines=header_lines, + column_header_lines=column_header_lines, + decimal=decimal, + delimiters=delimiters, + ) + + return create_df_resource_from_df(csv.df) + + +def create_df_resource_from_df(df): + r""" + Return a pandas dataframe resource for a pandas DataFrame. + + EXAMPLES:: + + >>> data = {'x': [1, 2, 3], 'y': [4, 5, 6]} + >>> import pandas as pd + >>> df = pd.DataFrame(data) + >>> from unitpackage.local import create_df_resource_from_df + >>> resource = create_df_resource_from_df(df) + >>> resource # doctest: +NORMALIZE_WHITESPACE + {'name': 'memory', + 'type': 'table', + 'data': [], + 'format': 'pandas', ... + + >>> resource.data + x y + 0 1 4 + 1 2 5 + 2 3 6 + + >>> resource.format + 'pandas' + + + """ + df_resource = Resource(df) + df_resource.infer() + + return df_resource + + +def create_df_resource_from_tabular_resource(resource): r""" Return a pandas dataframe resource for a frictionless Tabular Resource. EXAMPLES:: >>> from frictionless import Package - >>> resource = Package("./examples/local/no_bibliography/no_bibliography.json").resources[0] - >>> df_resource = create_df_resource(resource) # doctest: +NORMALIZE_WHITESPACE + >>> from unitpackage.local import create_df_resource_from_tabular_resource + >>> tabular_resource = Package("./examples/local/no_bibliography/no_bibliography.json").resources[0] + >>> df_resource = create_df_resource_from_tabular_resource(tabular_resource) # doctest: +NORMALIZE_WHITESPACE >>> df_resource {'name': 'memory', ... @@ -70,10 +210,6 @@ def create_df_resource(resource): 2 3 6 """ - if not resource: - raise ValueError( - "dataframe resource can not be created since the Data Package has no resources." - ) descriptor_path = ( resource.basepath + "/" + resource.path if resource.basepath else resource.path ) @@ -123,28 +259,38 @@ def collect_datapackages(data): return [Package(package) for package in packages] -def create_unitpackage(csvname, metadata=None, fields=None): +def update_fields(original_fields, new_fields): r""" - Return a Data Package built from a :param metadata: dict and tabular data - in :param csvname: str. + Return a new list of fields where a list of fields has been updated + based on a new list of fields. - The :param fields: list must must be structured such as - `[{'name':'E', 'unit': 'mV'}, {'name':'T', 'unit': 'K'}]`. + The :param: original_fields: list and :param new_fields: list + must must be structured such as + `[{'name':'E', 'unit': 'mV'}, {'name':'T', 'unit': 'K'}]` + and each entry must contain a key `name` corresponding to a field name + in the original fields. EXAMPLES:: - >>> fields = [{'name':'E', 'unit': 'mV'}, {'name':'I', 'unit': 'A'}] - >>> package = create_unitpackage("./examples/from_csv/from_csv.csv", fields=fields) - >>> package # doctest: +NORMALIZE_WHITESPACE - {'resources': [{'name': - ... + >>> from unitpackage.local import update_fields, create_tabular_resource_from_csv + >>> schema = create_tabular_resource_from_csv("./examples/from_csv/from_csv.csv").schema + >>> original_fields = schema.to_dict()['fields'] + >>> original_fields # doctest: +NORMALIZE_WHITESPACE + [{'name': 'E', 'type': 'integer'}, + {'name': 'I', 'type': 'integer'}] + + >>> new_fields = [{'name':'E', 'unit': 'mV'}, {'name':'I', 'unit': 'A'}, {'name':'x', 'unit': 'm'}] + >>> updated_fields = update_fields(original_fields, new_fields) + >>> updated_fields # doctest: +NORMALIZE_WHITESPACE + [{'name': 'E', 'type': 'integer', 'unit': 'mV'}, + {'name': 'I', 'type': 'integer', 'unit': 'A'}] TESTS: Invalid fields:: >>> fields = 'not a list' - >>> package = create_unitpackage("./examples/from_csv/from_csv.csv", fields=fields) # doctest: +NORMALIZE_WHITESPACE + >>> updated_fields = update_fields(original_fields, fields) Traceback (most recent call last): ... ValueError: 'fields' must be a list such as @@ -154,30 +300,22 @@ def create_unitpackage(csvname, metadata=None, fields=None): More fields than required:: >>> fields = [{'name':'E', 'unit': 'mV'}, {'name':'I', 'unit': 'A'}, {'name':'x', 'unit': 'm'}] - >>> package = create_unitpackage("./examples/from_csv/from_csv.csv", fields=fields) # doctest: +NORMALIZE_WHITESPACE + >>> updated_fields = update_fields(original_fields, fields) + >>> updated_fields # doctest: +NORMALIZE_WHITESPACE + [{'name': 'E', 'type': 'integer', 'unit': 'mV'}, + {'name': 'I', 'type': 'integer', 'unit': 'A'}] Part of the fields specified: >>> fields = [{'name':'E', 'unit': 'mV'}] - >>> package = create_unitpackage("./examples/from_csv/from_csv.csv", fields=fields) # doctest: +NORMALIZE_WHITESPACE + >>> updated_fields = update_fields(original_fields, fields) + >>> updated_fields # doctest: +NORMALIZE_WHITESPACE + [{'name': 'E', 'type': 'integer', 'unit': 'mV'}, + {'name': 'I', 'type': 'integer'}] """ - csv_basename = os.path.basename(csvname) - - resource = Resource( - path=csv_basename, - basepath=os.path.dirname(csvname) or ".", - ) - - resource.infer() - - resource.custom.setdefault("metadata", {}) - resource.custom["metadata"].setdefault("echemdb", metadata) - - if fields: - # Update fields in the Resource describing the data in the CSV - resource_schema = resource.schema + def validate_field_structure(fields): if not isinstance(fields, list): raise ValueError( "'fields' must be a list such as \ @@ -195,35 +333,77 @@ def create_unitpackage(csvname, metadata=None, fields=None): e.g., `{'name':'j', 'unit': 'uA / cm2'}`" ) - provided_schema = Schema.from_descriptor({"fields": fields}, allow_invalid=True) + validate_field_structure(original_fields) + validate_field_structure(new_fields) + + original_schema = Schema({"fields": original_fields}) + + # Create a lookup dict for provided fields by name + provided_fields_dict = { + field["name"]: field for field in new_fields if "name" in field + } + + updated_fields = [] + unspecified_fields = [] + unused_provided_fields = [] + + # First, update fields that exist in the original schema, + # and record which original fields have no additional information provided. + for name in original_schema.field_names: + if name in provided_fields_dict: + # Start with original field, then update only the keys provided in the input + updated_field = original_schema.get_field(name).to_dict() + updated_field.update(provided_fields_dict[name]) + updated_fields.append(updated_field) + else: + unspecified_fields.append(name) + updated_fields.append(original_schema.get_field(name).to_dict()) + + # Then, record any provided fields that are not present in the original schema. + for name in provided_fields_dict.keys(): + if name not in original_schema.field_names: + unused_provided_fields.append(name) + if len(unspecified_fields) != 0: + logger.warning( + f"Additional information was not provided for fields {unspecified_fields}." + ) - new_fields = [] - unspecified_fields = [] + if len(unused_provided_fields) != 0: + logger.warning( + f"Fields with names {unused_provided_fields} were provided but do not appear in the field names of tabular resource {original_schema.field_names}." + ) - for name in resource_schema.field_names: - if name in provided_schema.field_names: - new_fields.append( - provided_schema.get_field(name).to_dict() - | resource_schema.get_field(name).to_dict() - ) - else: - new_fields.append(resource_schema.get_field(name).to_dict()) + return updated_fields - if len(unspecified_fields) != 0: - logger.warning( - f"Additional information were not provided for fields {unspecified_fields}." - ) - unused_provided_fields = [] - for name in provided_schema.field_names: - if name not in resource_schema.field_names: - unused_provided_fields.append(name) - if len(unused_provided_fields) != 0: - logger.warning( - f"Fields with names {unused_provided_fields} was provided but does not appear in the field names of tabular resource {resource_schema.field_names}." - ) +def create_unitpackage(resource, metadata=None, fields=None): + r""" + Return a Data Package built from a :param metadata: dict and tabular data + in :param resource: frictionless.Resource. + + The :param fields: list must be structured such as + `[{'name':'E', 'unit': 'mV'}, {'name':'T', 'unit': 'K'}]`. + + EXAMPLES:: + + >>> from unitpackage.local import create_tabular_resource_from_csv, create_unitpackage + >>> resource = create_tabular_resource_from_csv("./examples/from_csv/from_csv.csv") + >>> new_fields = [{'name':'E', 'unit': 'mV'}, {'name':'I', 'unit': 'A'}] + >>> package = create_unitpackage(resource=resource, fields=new_fields) + >>> package # doctest: +NORMALIZE_WHITESPACE + {'resources': [{'name': + ... - resource.schema = Schema.from_descriptor({"fields": new_fields}) + """ + resource.custom.setdefault("metadata", {}) + resource.custom["metadata"] = metadata + + if fields: + # Update fields in the Resource describing the data in the CSV + updated_fields = update_fields(resource.schema.to_dict()["fields"], fields) + original_schema = resource.schema.to_dict() + original_schema["fields"] = updated_fields + resource.schema = Schema.from_descriptor(original_schema) package = Package(resources=[resource]) diff --git a/unitpackage/metadata.py b/unitpackage/metadata.py new file mode 100644 index 00000000..ef9a5735 --- /dev/null +++ b/unitpackage/metadata.py @@ -0,0 +1,218 @@ +r""" +Metadata management for unitpackage entries. + +This module provides the MetadataDescriptor class that manages metadata +for Entry objects, supporting both dict and attribute access, and providing +methods to load metadata from various sources (YAML, JSON, dict). + +EXAMPLES: + +Access metadata with dict-style or attribute-style syntax:: + + >>> from unitpackage.entry import Entry + >>> entry = Entry.create_examples()[0] + >>> entry.metadata['echemdb']['source']['citationKey'] + 'alves_2011_electrochemistry_6010' + + >>> entry.metadata.echemdb.source.citationKey + 'alves_2011_electrochemistry_6010' + +Load metadata from external sources:: + + >>> entry.metadata.from_dict({'custom': {'key': 'value'}}) + >>> entry.metadata['custom']['key'] + 'value' + +""" + +# ******************************************************************** +# This file is part of unitpackage. +# +# Copyright (C) 2026 Albert Engstfeld +# +# unitpackage is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# unitpackage is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with unitpackage. If not, see . +# ******************************************************************** + +from unitpackage.descriptor import Descriptor + + +class MetadataDescriptor: + r""" + Manages metadata for an Entry, supporting both dict and attribute access, + and providing methods to load metadata from various sources. + + EXAMPLES:: + + >>> from unitpackage.entry import Entry + >>> entry = Entry.create_examples()[0] + >>> entry.metadata # doctest: +ELLIPSIS + {'echemdb': {'experimental': ... + + >>> entry.metadata['echemdb']['source']['citationKey'] + 'alves_2011_electrochemistry_6010' + + >>> entry.metadata.echemdb.source.citationKey + 'alves_2011_electrochemistry_6010' + + """ + + def __init__(self, entry): + object.__setattr__(self, "_entry", entry) + + def __repr__(self): + return repr(self._metadata) + + @property + def _metadata(self): + return self._entry.resource.custom.setdefault("metadata", {}) + + @property + def _descriptor(self): + return Descriptor(self._metadata) + + def __getitem__(self, key): + r""" + Dict-style access to metadata with descriptor support. + + EXAMPLES:: + + >>> from unitpackage.entry import Entry + >>> entry = Entry.create_examples()[0] + >>> entry.metadata['echemdb']['source']['citationKey'] + 'alves_2011_electrochemistry_6010' + + """ + return self._descriptor[key] + + def __setitem__(self, key, value): + r""" + Dict-style assignment to metadata. + + EXAMPLES:: + + >>> from unitpackage.entry import Entry + >>> entry = Entry.create_examples()[0] + >>> entry.metadata['custom_key'] = {'data': 'value'} + >>> entry.metadata['custom_key'] + {'data': 'value'} + + """ + self._metadata[key] = value + + def __getattr__(self, name): + r""" + Attribute-style access to metadata with full descriptor support. + + EXAMPLES:: + + >>> from unitpackage.entry import Entry + >>> entry = Entry.create_examples()[0] + >>> entry.metadata.echemdb.source.citationKey + 'alves_2011_electrochemistry_6010' + + """ + return getattr(self._descriptor, name) + + def from_dict(self, data): + r""" + Load metadata from a dictionary. + + EXAMPLES:: + + >>> from unitpackage.entry import Entry + >>> entry = Entry.create_examples()[0] + >>> entry.metadata.from_dict({'echemdb': {'source': {'citationKey': 'test'}}}) + >>> entry.metadata['echemdb']['source']['citationKey'] + 'test' + + """ + self._entry.resource.custom["metadata"] = data + + def _add_metadata(self, key, data): + r""" + Add metadata under a specific key. + + EXAMPLES:: + + >>> from unitpackage.entry import Entry + >>> entry = Entry.create_examples()[0] + >>> entry.metadata._add_metadata('custom_key', {'data': 'value'}) + >>> entry.metadata['custom_key'] + {'data': 'value'} + + """ + if key: + self._entry.resource.custom["metadata"][key] = data + else: + self._entry.resource.custom["metadata"] = data + + def from_yaml(self, filename, key=None): + r""" + Load metadata from a YAML file. + + If a key is provided, the loaded data is stored under that key. + Otherwise, it replaces the entire metadata dict. + + EXAMPLES:: + + >>> import os + >>> import tempfile + >>> import yaml + >>> from unitpackage.entry import Entry + >>> entry = Entry.create_examples()[0] + >>> with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f: + ... yaml.dump({'source': {'citationKey': 'yaml_test'}}, f) + ... temp_path = f.name + >>> entry.metadata.from_yaml(temp_path, key='echemdb') + >>> entry.metadata['echemdb']['source']['citationKey'] + 'yaml_test' + >>> os.unlink(temp_path) + + """ + import yaml + + with open(filename, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + + self._add_metadata(key, data) + + def from_json(self, filename, key=None): + r""" + Load metadata from a JSON file. + + If a key is provided, the loaded data is stored under that key. + Otherwise, it replaces the entire metadata dict. + + EXAMPLES:: + + >>> import os + >>> import json + >>> import tempfile + >>> from unitpackage.entry import Entry + >>> entry = Entry.create_examples()[0] + >>> with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: + ... json.dump({'source': {'citationKey': 'json_test'}}, f) + ... temp_path = f.name + >>> entry.metadata.from_json(temp_path, key='echemdb') + >>> entry.metadata['echemdb']['source']['citationKey'] + 'json_test' + >>> os.unlink(temp_path) + + """ + import json + + with open(filename, "r", encoding="utf-8") as f: + data = json.load(f) + + self._add_metadata(key, data)