Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
uses: actions/checkout@v6
with: { submodules: recursive }
- uses: prefix-dev/setup-pixi@v0.9.3
with: { pixi-version: v0.50.2 }
with: { pixi-version: v0.63.2 }
- name: build documentation
run: |
pixi run doc
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: checkout
uses: actions/checkout@v6
- uses: prefix-dev/setup-pixi@v0.9.3
with: { pixi-version: v0.50.2 }
with: { pixi-version: v0.63.2 }
- name: pylint
run: pixi run pylint
- name: black
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
with: { submodules: recursive }
- uses: prefix-dev/setup-pixi@v0.9.3
with:
pixi-version: v0.50.2
pixi-version: v0.63.2
- name: doctest
run: |
pixi run -e ${{ matrix.environment }} doctest ${{ matrix.remote-data == 'remote' && '--remote-data' || '' }}
Expand Down
4 changes: 4 additions & 0 deletions examples/from_csv/from_csv_multiple_headers.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
E,j
V,A / cm2
1,2
3,4
19 changes: 18 additions & 1 deletion pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

102 changes: 78 additions & 24 deletions unitpackage/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
# ********************************************************************
# This file is part of unitpackage.
#
# Copyright (C) 2021-2025 Albert Engstfeld
# Copyright (C) 2021-2026 Albert Engstfeld
# Copyright (C) 2021 Johannes Hermann
# Copyright (C) 2021-2022 Julian Rüth
# Copyright (C) 2021 Nicolas Hörmann
Expand Down Expand Up @@ -503,7 +503,7 @@ def add_offset(self, field_name=None, offset=None, unit=""):
@property
def mutable_resource(self):
r"""
Return the data of this entry's "MutableResource" as a data frame.
Return the entry's "MutableResource".

EXAMPLES::

Expand All @@ -524,11 +524,24 @@ def mutable_resource(self):
self.resource.custom.setdefault("MutableResource", "")

if not self.resource.custom["MutableResource"]:
from frictionless import Schema
if self.resource.format not in ["csv", "pandas"]:
raise ValueError(
"MutableResource can only be created from resources of format 'csv' or 'pandas'."
)

if self.resource.format == "csv":

from unitpackage.local import create_df_resource_from_tabular_resource

self.resource.custom["MutableResource"] = (
create_df_resource_from_tabular_resource(self.resource)
)

from unitpackage.local import create_df_resource
elif self.resource.format == "pandas":
self.resource.custom["MutableResource"] = self.resource

from frictionless import Schema

self.resource.custom["MutableResource"] = create_df_resource(self.resource)
self.resource.custom["MutableResource"].schema = Schema.from_descriptor(
self.resource.schema.to_dict()
)
Expand Down Expand Up @@ -556,6 +569,18 @@ def df(self):
{'name': 'E', 'type': 'number', 'unit': 'V', 'reference': 'RHE'},
{'name': 'j', 'type': 'number', 'unit': 'A / m2'}]

TESTS::

>>> import pandas as pd
>>> from unitpackage.entry import Entry
>>> df = pd.DataFrame({'x':[1,2,3], 'y':[2,3,4]})
>>> entry = Entry.from_df(df=df, basename='test_df')
>>> entry.df
x y
0 1 2
1 2 3
2 3 4

"""
return self.mutable_resource.data

Expand Down Expand Up @@ -710,15 +735,25 @@ def plot(self, x_label=None, y_label=None, name=None):
return fig

@classmethod
def from_csv(cls, csvname, metadata=None, fields=None):
def from_csv(
cls,
csvname,
encoding=None,
header_lines=None,
column_header_lines=None,
decimal=None,
delimiters=None,
metadata=None,
fields=None,
):
r"""
Returns an entry constructed from a CSV with a single header line.

EXAMPLES:

Units describing the fields can be provided::

>>> import os
>>> from unitpackage.entry import Entry
>>> fields = [{'name':'E', 'unit': 'mV'}, {'name':'I', 'unit': 'A'}]
>>> entry = Entry.from_csv(csvname='examples/from_csv/from_csv.csv', fields=fields)
>>> entry
Expand All @@ -730,7 +765,6 @@ def from_csv(cls, csvname, metadata=None, fields=None):

Metadata can be appended::

>>> import os
>>> fields = [{'name':'E', 'unit': 'mV'}, {'name':'I', 'unit': 'A'}]
>>> metadata = {'user':'Max Doe'}
>>> entry = Entry.from_csv(csvname='examples/from_csv/from_csv.csv', metadata=metadata, fields=fields)
Expand All @@ -742,7 +776,6 @@ def from_csv(cls, csvname, metadata=None, fields=None):

A filename containing upper case characters::

>>> import os
>>> fields = [{'name':'E', 'unit': 'mV'}, {'name':'I', 'unit': 'A'}]
>>> entry = Entry.from_csv(csvname='examples/from_csv/UpperCase.csv', fields=fields)
>>> entry
Expand All @@ -757,9 +790,24 @@ def from_csv(cls, csvname, metadata=None, fields=None):
...

"""
from unitpackage.local import create_unitpackage
from unitpackage.local import (
create_tabular_resource_from_csv,
create_unitpackage,
)

package = create_unitpackage(csvname=csvname, metadata=metadata, fields=fields)
# pylint: disable=duplicate-code
resource = create_tabular_resource_from_csv(
csvname=csvname,
encoding=encoding,
header_lines=header_lines,
column_header_lines=column_header_lines,
decimal=decimal,
delimiters=delimiters,
)

package = create_unitpackage(
resource=resource, metadata=metadata, fields=fields
)

return cls(resource=package.resources[0])

Expand Down Expand Up @@ -891,9 +939,13 @@ def from_local(cls, filename):
return cls(resource=package.resources[0])

@classmethod
def from_df(cls, df, metadata=None, fields=None, outdir=None, *, basename):
def from_df(cls, df, metadata=None, fields=None, *, basename):
r"""
Returns an entry constructed from a pandas dataframe.
A name `basename` for the entry must be provided.
The name must be lower-case and contain only alphanumeric
characters along with `.` , `_` or `-` characters'.
(Upper case characters are converted to lower case.)

EXAMPLES::

Expand Down Expand Up @@ -941,21 +993,15 @@ def from_df(cls, df, metadata=None, fields=None, outdir=None, *, basename):
[{'name': 'x', 'type': 'integer', 'unit': 'm'}, {'name': 'y', 'type': 'integer'}]

"""
if outdir is None:
import atexit
import shutil
import tempfile

outdir = tempfile.mkdtemp()
atexit.register(shutil.rmtree, outdir)
from unitpackage.local import create_df_resource_from_df, create_unitpackage

csvname = basename + ".csv"
resource = create_df_resource_from_df(df)
resource.name = basename.lower()

df.to_csv(os.path.join(outdir, csvname), index=False)

return cls.from_csv(
os.path.join(outdir, csvname), metadata=metadata, fields=fields
package = create_unitpackage(
resource=resource, metadata=metadata, fields=fields
)
return cls(resource=package.resources[0])

def save(self, *, outdir, basename=None):
r"""
Expand Down Expand Up @@ -1028,6 +1074,7 @@ def save(self, *, outdir, basename=None):
os.makedirs(outdir)

basename = basename or self.identifier
basename = basename.lower()
csv_name = os.path.join(outdir, basename + ".csv")
json_name = os.path.join(outdir, basename + ".json")

Expand All @@ -1038,6 +1085,13 @@ def save(self, *, outdir, basename=None):
self.resource.path = basename + ".csv"
self.resource.name = basename

# convert a pandas resource into a csv resource
if self.resource.format == "pandas":
self.resource.format = "csv"
self.resource.mediatype = "text/csv"
if hasattr(self.resource, "data"):
del self.resource.data

resource = self.resource.to_dict()

# update the fields from the main resource with those from the "MutableResource"resource
Expand Down
14 changes: 14 additions & 0 deletions unitpackage/loaders/baseloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,20 @@ def df(self):
0 0 0
1 1 1

A file with two column header lines, which is sometimes, for example,
used for storing units to the values::

>>> from io import StringIO
>>> file = StringIO(r'''a,b
... m,s
... 0,0
... 1,1''')
>>> csv = BaseLoader(file, column_header_lines=2)
>>> csv.df
a / m b / s
0 0 0
1 1 1

"""
import pandas as pd

Expand Down
Loading
Loading