From 3adf831fdb574466e68a81a77d61efcc4802c258 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Wed, 12 Nov 2025 19:30:58 +0500 Subject: [PATCH] feat: add support for xlsx named table --- .github/workflows/CI.yml | 2 +- .pre-commit-config.yaml | 15 +- pyproject.toml | 2 + python/python_calamine/__init__.py | 10 +- python/python_calamine/_python_calamine.pyi | 148 ++++++++++++-- src/lib.rs | 19 +- src/types/errors.rs | 68 ++++++- src/types/mod.rs | 5 +- src/types/table.rs | 71 +++++++ src/types/workbook.rs | 203 +++++++++++++++++--- src/utils.rs | 55 ------ tests/data/table-multiple.xlsx | Bin 0 -> 11520 bytes tests/test_tables.py | 73 +++++++ 13 files changed, 561 insertions(+), 110 deletions(-) create mode 100644 src/types/table.rs delete mode 100644 src/utils.rs create mode 100644 tests/data/table-multiple.xlsx create mode 100644 tests/test_tables.py diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index c3bba80..d2a9ead 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -117,7 +117,7 @@ jobs: py ${{ env.pythonLocation }} - - run: pip install pre-commit + - run: pip install pre-commit mypy if: steps.cache-py.outputs.cache-hit != 'true' - run: pip install . diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 53eb6ce..e18896c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,11 +34,20 @@ repos: rev: 24.1.1 hooks: - id: black - - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.8.0 + - repo: local hooks: - id: mypy - exclude: ^tests/.*$ + name: mypy + entry: mypy + language: python + pass_filenames: false + - id: mypy-stubtest + name: mypy-stubtest + entry: stubtest + args: + - python_calamine + language: python + pass_filenames: false - repo: local hooks: - id: rust-linting diff --git a/pyproject.toml b/pyproject.toml index ad48dd7..563837c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ dynamic = ["version"] dev = [ "maturin~=1.0", "pre-commit~=4.3", + "mypy~=1.18.2", "pytest~=9.0", "pandas[excel]~=2.2", ] @@ -36,6 +37,7 @@ profile = "black" [tool.mypy] python_version = "3.10" +packages = ["python_calamine"] ignore_missing_imports = false disallow_untyped_defs = true check_untyped_defs = true diff --git a/python/python_calamine/__init__.py b/python/python_calamine/__init__.py index d2f57ea..c14d5db 100644 --- a/python/python_calamine/__init__.py +++ b/python/python_calamine/__init__.py @@ -1,11 +1,15 @@ from ._python_calamine import ( CalamineError, CalamineSheet, + CalamineTable, CalamineWorkbook, PasswordError, SheetMetadata, SheetTypeEnum, SheetVisibleEnum, + TableNotFound, + TablesNotLoaded, + TablesNotSupported, WorkbookClosed, WorksheetNotFound, XmlError, @@ -16,14 +20,18 @@ __all__ = ( "CalamineError", "CalamineSheet", + "CalamineTable", "CalamineWorkbook", "PasswordError", "SheetMetadata", "SheetTypeEnum", "SheetVisibleEnum", + "TableNotFound", + "TablesNotLoaded", + "TablesNotSupported", + "WorkbookClosed", "WorksheetNotFound", "XmlError", "ZipError", - "WorkbookClosed", "load_workbook", ) diff --git a/python/python_calamine/_python_calamine.pyi b/python/python_calamine/_python_calamine.pyi index 32854b8..c59c067 100644 --- a/python/python_calamine/_python_calamine.pyi +++ b/python/python_calamine/_python_calamine.pyi @@ -1,6 +1,6 @@ +# Some documentations from upstream under MIT License. See authors in https://github.com/tafia/calamine from __future__ import annotations -import contextlib import datetime import enum import os @@ -23,34 +23,57 @@ class SheetTypeEnum(enum.Enum): @typing.final class SheetVisibleEnum(enum.Enum): Visible = ... + """Visible.""" Hidden = ... + """Hidden.""" VeryHidden = ... + """The sheet is hidden and cannot be displayed using the user interface. It is supported only by Excel formats.""" @typing.final class SheetMetadata: name: str + """Name of sheet.""" typ: SheetTypeEnum + """Type of sheet. + + Only Excel formats support this. Default value for ODS is `WorkSheet`. + """ visible: SheetVisibleEnum + """Visible of sheet.""" - def __init__( - self, name: str, typ: SheetTypeEnum, visible: SheetVisibleEnum - ) -> None: ... + def __new__( + cls, name: str, typ: SheetTypeEnum, visible: SheetVisibleEnum + ) -> SheetMetadata: ... @typing.final class CalamineSheet: name: str @property - def height(self) -> int: ... + def height(self) -> int: + """Get the row height of a sheet data. + + The height is defined as the number of rows between the start and end positions. + """ + @property - def width(self) -> int: ... + def width(self) -> int: + """Get the column width of a sheet data. + + The width is defined as the number of columns between the start and end positions. + """ + @property def total_height(self) -> int: ... @property def total_width(self) -> int: ... @property - def start(self) -> tuple[int, int] | None: ... + def start(self) -> tuple[int, int] | None: + """Get top left cell position of a sheet data.""" + @property - def end(self) -> tuple[int, int] | None: ... + def end(self) -> tuple[int, int] | None: + """Get bottom right cell position of a sheet data.""" + def to_python( self, skip_empty_area: bool = True, nrows: int | None = None ) -> list[ @@ -102,34 +125,96 @@ class CalamineSheet: """ @typing.final -class CalamineWorkbook(contextlib.AbstractContextManager): +class CalamineTable: + name: str + """Get the name of the table.""" + sheet: str + """Get the name of the parent worksheet for a table.""" + columns: list[str] + """Get the header names of the table columns. + + In Excel table headers can be hidden but the table will still have + column header names. + """ + @property + def height(self) -> int: + """Get the row height of a table data. + + The height is defined as the number of rows between the start and end positions. + """ + + @property + def width(self) -> int: + """Get the column width of a table data. + + The width is defined as the number of columns between the start and end positions. + """ + + @property + def start(self) -> tuple[int, int] | None: + """Get top left cell position of a table data.""" + + @property + def end(self) -> tuple[int, int] | None: + """Get bottom right cell position of a table data.""" + + def to_python( + self, + ) -> list[ + list[ + int + | float + | str + | bool + | datetime.time + | datetime.date + | datetime.datetime + | datetime.timedelta + ] + ]: + """Retunrning data from table as list of lists.""" + +@typing.final +class CalamineWorkbook: path: str | None + """Path to file. `None` if bytes was loaded.""" sheet_names: list[str] + """All sheet names of this workbook, in workbook order.""" sheets_metadata: list[SheetMetadata] + """All sheets metadata of this workbook, in workbook order.""" + table_names: list[str] | None + """All table names of this workbook.""" @classmethod def from_object( - cls, path_or_filelike: str | os.PathLike | ReadBuffer + cls, path_or_filelike: str | os.PathLike | ReadBuffer, load_tables: bool = False ) -> "CalamineWorkbook": """Determining type of pyobject and reading from it. Args: path_or_filelike (str | os.PathLike | ReadBuffer): path to file or IO (must imlpement read/seek methods). + load_tables (bool): load Excel tables (supported for XLSX only). """ @classmethod - def from_path(cls, path: str | os.PathLike) -> "CalamineWorkbook": + def from_path( + cls, path: str | os.PathLike, load_tables: bool = False + ) -> "CalamineWorkbook": """Reading file from path. Args: path (str | os.PathLike): path to file. + load_tables (bool): load Excel tables (supported for XLSX only). """ @classmethod - def from_filelike(cls, filelike: ReadBuffer) -> "CalamineWorkbook": + def from_filelike( + cls, filelike: ReadBuffer, load_tables: bool = False + ) -> "CalamineWorkbook": """Reading file from IO. Args: filelike : IO (must imlpement read/seek methods). + load_tables (bool): load Excel tables (supported for XLSX only). """ def close(self) -> None: @@ -177,18 +262,55 @@ class CalamineWorkbook(contextlib.AbstractContextManager): WorksheetNotFound: If worksheet not found in workbook. """ + def get_table_by_name(self, name: str) -> CalamineTable: + """Get table by name. + + Args: + name(str): name of table + + Returns: + CalamineTable + + Raises: + WorkbookClosed: If workbook already closed. + WorksheetNotFound: If worksheet not found in workbook. + """ + class CalamineError(Exception): ... class PasswordError(CalamineError): ... class WorksheetNotFound(CalamineError): ... class XmlError(CalamineError): ... class ZipError(CalamineError): ... class WorkbookClosed(CalamineError): ... +class TablesNotLoaded(CalamineError): ... +class TablesNotSupported(CalamineError): ... +class TableNotFound(CalamineError): ... def load_workbook( - path_or_filelike: str | os.PathLike | ReadBuffer, + path_or_filelike: str | os.PathLike | ReadBuffer, load_tables: bool = False ) -> CalamineWorkbook: """Determining type of pyobject and reading from it. Args: path_or_filelike (str | os.PathLike | ReadBuffer): path to file or IO (must imlpement read/seek methods). + load_tables (bool): load Excel tables (supported for XLSX only). """ + +__all__ = [ + "CalamineError", + "CalamineSheet", + "CalamineTable", + "CalamineWorkbook", + "PasswordError", + "SheetMetadata", + "SheetTypeEnum", + "SheetVisibleEnum", + "TableNotFound", + "TablesNotLoaded", + "TablesNotSupported", + "WorkbookClosed", + "WorksheetNotFound", + "XmlError", + "ZipError", + "load_workbook", +] diff --git a/src/lib.rs b/src/lib.rs index 58caf6b..f969bf9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,20 @@ use pyo3::prelude::*; mod types; -mod utils; use crate::types::{ - CalamineError, CalamineSheet, CalamineWorkbook, CellValue, Error, PasswordError, SheetMetadata, - SheetTypeEnum, SheetVisibleEnum, WorkbookClosed, WorksheetNotFound, XmlError, ZipError, + CalamineError, CalamineSheet, CalamineTable, CalamineWorkbook, CellValue, Error, PasswordError, + SheetMetadata, SheetTypeEnum, SheetVisibleEnum, TableNotFound, TablesNotLoaded, + TablesNotSupported, WorkbookClosed, WorksheetNotFound, XmlError, ZipError, }; #[pyfunction] -fn load_workbook(py: Python, path_or_filelike: Py) -> PyResult { - CalamineWorkbook::from_object(py, path_or_filelike) +#[pyo3(signature = (path_or_filelike, load_tables=false))] +fn load_workbook( + py: Python, + path_or_filelike: Py, + load_tables: bool, +) -> PyResult { + CalamineWorkbook::from_object(py, path_or_filelike, load_tables) } #[pymodule(gil_used = false)] @@ -20,11 +25,15 @@ fn _python_calamine(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add("CalamineError", py.get_type::())?; m.add("PasswordError", py.get_type::())?; m.add("WorksheetNotFound", py.get_type::())?; m.add("XmlError", py.get_type::())?; m.add("ZipError", py.get_type::())?; + m.add("TablesNotSupported", py.get_type::())?; + m.add("TablesNotLoaded", py.get_type::())?; + m.add("TableNotFound", py.get_type::())?; m.add("WorkbookClosed", py.get_type::())?; Ok(()) } diff --git a/src/types/errors.rs b/src/types/errors.rs index a4220b9..64ab8e5 100644 --- a/src/types/errors.rs +++ b/src/types/errors.rs @@ -1,10 +1,12 @@ -use calamine::Error as CalamineCrateError; -use pyo3::create_exception; -use pyo3::exceptions::PyException; +use calamine::{Error as CalamineCrateError, OdsError, XlsError, XlsbError, XlsxError}; +use pyo3::exceptions::{PyException, PyIOError}; +use pyo3::{create_exception, PyErr}; #[derive(Debug)] pub enum Error { Calamine(CalamineCrateError), + TablesNotSupported, + TablesNotLoaded, WorkbookClosed, } @@ -14,3 +16,63 @@ create_exception!(python_calamine, WorksheetNotFound, CalamineError); create_exception!(python_calamine, XmlError, CalamineError); create_exception!(python_calamine, ZipError, CalamineError); create_exception!(python_calamine, WorkbookClosed, CalamineError); +create_exception!(python_calamine, TablesNotSupported, CalamineError); +create_exception!(python_calamine, TablesNotLoaded, CalamineError); +create_exception!(python_calamine, TableNotFound, CalamineError); + +impl From for PyErr { + fn from(val: Error) -> Self { + match val { + Error::Calamine(calamine_error) => match calamine_error { + CalamineCrateError::Io(err) => PyIOError::new_err(err.to_string()), + CalamineCrateError::Ods(ref err) => match err { + OdsError::Io(error) => PyIOError::new_err(error.to_string()), + OdsError::Zip(error) => ZipError::new_err(error.to_string()), + OdsError::Xml(error) => XmlError::new_err(error.to_string()), + OdsError::XmlAttr(error) => XmlError::new_err(error.to_string()), + OdsError::Password => PasswordError::new_err(err.to_string()), + OdsError::WorksheetNotFound(error) => { + WorksheetNotFound::new_err(error.to_string()) + } + _ => CalamineError::new_err(err.to_string()), + }, + CalamineCrateError::Xls(ref err) => match err { + XlsError::Io(error) => PyIOError::new_err(error.to_string()), + XlsError::Password => PasswordError::new_err(err.to_string()), + XlsError::WorksheetNotFound(error) => { + WorksheetNotFound::new_err(error.to_string()) + } + _ => CalamineError::new_err(err.to_string()), + }, + CalamineCrateError::Xlsx(ref err) => match err { + XlsxError::Io(error) => PyIOError::new_err(error.to_string()), + XlsxError::Zip(error) => ZipError::new_err(error.to_string()), + XlsxError::Xml(error) => XmlError::new_err(error.to_string()), + XlsxError::XmlAttr(error) => XmlError::new_err(error.to_string()), + XlsxError::XmlEof(error) => XmlError::new_err(error.to_string()), + XlsxError::Password => PasswordError::new_err(err.to_string()), + XlsxError::TableNotFound(error) => TableNotFound::new_err(error.to_string()), + XlsxError::WorksheetNotFound(error) => { + WorksheetNotFound::new_err(error.to_string()) + } + _ => CalamineError::new_err(err.to_string()), + }, + CalamineCrateError::Xlsb(ref err) => match err { + XlsbError::Io(error) => PyIOError::new_err(error.to_string()), + XlsbError::Zip(error) => ZipError::new_err(error.to_string()), + XlsbError::Xml(error) => XmlError::new_err(error.to_string()), + XlsbError::XmlAttr(error) => XmlError::new_err(error.to_string()), + XlsbError::Password => PasswordError::new_err(err.to_string()), + XlsbError::WorksheetNotFound(error) => { + WorksheetNotFound::new_err(error.to_string()) + } + _ => CalamineError::new_err(err.to_string()), + }, + _ => CalamineError::new_err(calamine_error.to_string()), + }, + Error::WorkbookClosed => WorkbookClosed::new_err("".to_string()), + Error::TablesNotLoaded => TablesNotLoaded::new_err("".to_string()), + Error::TablesNotSupported => TablesNotSupported::new_err("".to_string()), + } + } +} diff --git a/src/types/mod.rs b/src/types/mod.rs index 60fed03..c5cff3d 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -1,10 +1,13 @@ mod cell; mod errors; mod sheet; +mod table; mod workbook; pub use cell::CellValue; pub use errors::{ - CalamineError, Error, PasswordError, WorkbookClosed, WorksheetNotFound, XmlError, ZipError, + CalamineError, Error, PasswordError, TableNotFound, TablesNotLoaded, TablesNotSupported, + WorkbookClosed, WorksheetNotFound, XmlError, ZipError, }; pub use sheet::{CalamineSheet, SheetMetadata, SheetTypeEnum, SheetVisibleEnum}; +pub use table::CalamineTable; pub use workbook::CalamineWorkbook; diff --git a/src/types/table.rs b/src/types/table.rs new file mode 100644 index 0000000..0bf72ac --- /dev/null +++ b/src/types/table.rs @@ -0,0 +1,71 @@ +use std::sync::Arc; + +use calamine::{Data, Range}; +use pyo3::prelude::*; +use pyo3::types::PyList; + +use crate::CellValue; + +#[pyclass] +#[derive(Clone, PartialEq)] +pub struct CalamineTable { + #[pyo3(get)] + name: String, + #[pyo3(get)] + sheet: String, + #[pyo3(get)] + columns: Vec, + range: Arc>, +} + +impl CalamineTable { + pub fn new(name: String, sheet_name: String, columns: Vec, range: Range) -> Self { + CalamineTable { + name, + sheet: sheet_name, + columns, + range: Arc::new(range), + } + } +} + +#[pymethods] +impl CalamineTable { + fn __repr__(&self) -> PyResult { + Ok(format!("CalamineTable(name='{}')", self.name)) + } + + #[getter] + fn height(&self) -> usize { + self.range.height() + } + + #[getter] + fn width(&self) -> usize { + self.range.width() + } + + #[getter] + fn start(&self) -> Option<(u32, u32)> { + self.range.start() + } + + #[getter] + fn end(&self) -> Option<(u32, u32)> { + self.range.end() + } + + fn to_python(slf: PyRef<'_, Self>) -> PyResult> { + let range = Arc::clone(&slf.range); + + let py_list = PyList::empty(slf.py()); + + for row in range.rows() { + let py_row = PyList::new(slf.py(), row.iter().map(<&Data as Into>::into))?; + + py_list.append(py_row)?; + } + + Ok(py_list) + } +} diff --git a/src/types/workbook.rs b/src/types/workbook.rs index c197d46..a96644f 100644 --- a/src/types/workbook.rs +++ b/src/types/workbook.rs @@ -10,8 +10,7 @@ use pyo3::prelude::*; use pyo3::types::PyType; use pyo3_file::PyFileLikeObject; -use crate::utils::err_to_py; -use crate::{CalamineSheet, Error, SheetMetadata, WorksheetNotFound}; +use crate::{CalamineSheet, CalamineTable, Error, SheetMetadata, WorksheetNotFound}; enum SheetsEnum { File(Sheets>), @@ -19,6 +18,33 @@ enum SheetsEnum { None, } +enum WorkbookType { + Xls, + Xlsx, + Xlsb, + Ods, +} + +impl From<&SheetsEnum> for WorkbookType { + fn from(sheets: &SheetsEnum) -> Self { + match sheets { + SheetsEnum::File(f) => match f { + Sheets::Xls(_) => WorkbookType::Xls, + Sheets::Xlsx(_) => WorkbookType::Xlsx, + Sheets::Xlsb(_) => WorkbookType::Xlsb, + Sheets::Ods(_) => WorkbookType::Ods, + }, + SheetsEnum::FileLike(f) => match f { + Sheets::Xls(_) => WorkbookType::Xls, + Sheets::Xlsx(_) => WorkbookType::Xlsx, + Sheets::Xlsb(_) => WorkbookType::Xlsb, + Sheets::Ods(_) => WorkbookType::Ods, + }, + SheetsEnum::None => unreachable!(), + } + } +} + impl SheetsEnum { fn sheets_metadata(&self) -> Vec { match self { @@ -75,17 +101,94 @@ impl SheetsEnum { SheetsEnum::None => Err(Error::WorkbookClosed), } } + + fn load_tables(&mut self) -> Result<(), Error> { + match self { + SheetsEnum::File(f) => match f { + Sheets::Xlsx(xlsx_f) => xlsx_f + .load_tables() + .map_err(CalamineCrateError::Xlsx) + .map_err(Error::Calamine), + _ => Err(Error::TablesNotSupported), + }, + SheetsEnum::FileLike(f) => match f { + Sheets::Xlsx(xlsx_f) => xlsx_f + .load_tables() + .map_err(CalamineCrateError::Xlsx) + .map_err(Error::Calamine), + _ => Err(Error::TablesNotSupported), + }, + SheetsEnum::None => Err(Error::WorkbookClosed), + } + } + + fn table_names(&self) -> Result, Error> { + match self { + SheetsEnum::File(f) => match f { + Sheets::Xlsx(xlsx_f) => Ok(xlsx_f.table_names()), + _ => Err(Error::TablesNotSupported), + }, + SheetsEnum::FileLike(f) => match f { + Sheets::Xlsx(xlsx_f) => Ok(xlsx_f.table_names()), + _ => Err(Error::TablesNotSupported), + }, + SheetsEnum::None => Err(Error::WorkbookClosed), + } + .map(|v| { + v.iter() + .map(|s| s.to_owned().to_owned()) + .collect::>() + }) + } + + fn get_table_by_name(&mut self, name: &str) -> Result { + match self { + SheetsEnum::File(f) => match f { + Sheets::Xlsx(xlsx_f) => xlsx_f + .table_by_name(name) + .map_err(CalamineCrateError::Xlsx) + .map_err(Error::Calamine) + .map(|t| { + CalamineTable::new( + t.name().to_owned(), + t.sheet_name().to_owned(), + t.columns().iter().map(|s| s.to_owned()).collect(), + t.data().to_owned(), + ) + }), + _ => Err(Error::TablesNotSupported), + }, + SheetsEnum::FileLike(f) => match f { + Sheets::Xlsx(xlsx_f) => xlsx_f + .table_by_name(name) + .map_err(CalamineCrateError::Xlsx) + .map_err(Error::Calamine) + .map(|t| { + CalamineTable::new( + t.name().to_owned(), + t.sheet_name().to_owned(), + t.columns().iter().map(|s| s.to_owned()).collect(), + t.data().to_owned(), + ) + }), + _ => Err(Error::TablesNotSupported), + }, + SheetsEnum::None => Err(Error::WorkbookClosed), + } + } } #[pyclass] pub struct CalamineWorkbook { #[pyo3(get)] path: Option, + workbook_type: WorkbookType, sheets: SheetsEnum, #[pyo3(get)] sheets_metadata: Vec, #[pyo3(get)] sheet_names: Vec, + table_names: Option>, } #[pymethods] @@ -98,31 +201,38 @@ impl CalamineWorkbook { } #[classmethod] - #[pyo3(name = "from_object")] + #[pyo3(name = "from_object", signature = (path_or_filelike, load_tables=false))] fn py_from_object( _cls: &Bound<'_, PyType>, py: Python<'_>, path_or_filelike: Py, + load_tables: bool, ) -> PyResult { - Self::from_object(py, path_or_filelike) + Self::from_object(py, path_or_filelike, load_tables) } #[classmethod] - #[pyo3(name = "from_filelike")] + #[pyo3(name = "from_filelike", signature = (filelike, load_tables=false))] fn py_from_filelike( _cls: &Bound<'_, PyType>, py: Python<'_>, filelike: Py, + load_tables: bool, ) -> PyResult { - py.detach(|| Self::from_filelike(filelike)) + py.detach(|| Self::from_filelike(filelike, load_tables)) } #[classmethod] - #[pyo3(name = "from_path")] - fn py_from_path(_cls: &Bound<'_, PyType>, py: Python<'_>, path: Py) -> PyResult { + #[pyo3(name = "from_path", signature = (path, load_tables=false))] + fn py_from_path( + _cls: &Bound<'_, PyType>, + py: Python<'_>, + path: Py, + load_tables: bool, + ) -> PyResult { if let Ok(string_ref) = path.extract::(py) { let path = string_ref.to_string_lossy().to_string(); - return py.detach(|| Self::from_path(&path)); + return py.detach(|| Self::from_path(&path, load_tables)); } Err(PyTypeError::new_err("")) @@ -138,15 +248,30 @@ impl CalamineWorkbook { py.detach(|| self.get_sheet_by_index(index)) } + #[getter] + fn table_names(&self) -> PyResult> { + match &self.workbook_type { + WorkbookType::Xlsx => match &self.table_names { + Some(v) => Ok(v.clone()), + None => Err(Error::TablesNotLoaded.into()), + }, + _ => Err(Error::TablesNotSupported.into()), + } + } + + #[pyo3(name = "get_table_by_name")] + fn py_get_table_by_name(&mut self, py: Python<'_>, name: &str) -> PyResult { + py.detach(|| self.get_table_by_name(name)) + } + fn close(&mut self) -> PyResult<()> { match self.sheets { - SheetsEnum::None => Err(Error::WorkbookClosed), + SheetsEnum::None => Err(Error::WorkbookClosed.into()), _ => { self.sheets = SheetsEnum::None; Ok(()) } } - .map_err(err_to_py) } fn __enter__(slf: Py) -> Py { @@ -164,56 +289,68 @@ impl CalamineWorkbook { } impl CalamineWorkbook { - pub fn from_object(py: Python<'_>, path_or_filelike: Py) -> PyResult { + pub fn from_object( + py: Python<'_>, + path_or_filelike: Py, + load_tables: bool, + ) -> PyResult { if let Ok(string_ref) = path_or_filelike.extract::(py) { let path = string_ref.to_string_lossy().to_string(); - return py.detach(|| Self::from_path(&path)); + return py.detach(|| Self::from_path(&path, load_tables)); } - py.detach(|| Self::from_filelike(path_or_filelike)) + py.detach(|| Self::from_filelike(path_or_filelike, load_tables)) } - pub fn from_filelike(filelike: Py) -> PyResult { + pub fn from_filelike(filelike: Py, load_tables: bool) -> PyResult { let mut buf = vec![]; PyFileLikeObject::with_requirements(filelike, true, false, true, false)? .read_to_end(&mut buf)?; let reader = Cursor::new(buf); - let sheets = SheetsEnum::FileLike( - open_workbook_auto_from_rs(reader) - .map_err(Error::Calamine) - .map_err(err_to_py)?, - ); + let mut sheets = + SheetsEnum::FileLike(open_workbook_auto_from_rs(reader).map_err(Error::Calamine)?); let sheet_names = sheets.sheet_names().to_owned(); let sheets_metadata = sheets.sheets_metadata().to_owned(); + let mut table_names: Option> = None; + if load_tables { + sheets.load_tables()?; + table_names = Some(sheets.table_names()?); + } + Ok(Self { path: None, + workbook_type: WorkbookType::from(&sheets), sheets, sheets_metadata, sheet_names, + table_names, }) } - pub fn from_path(path: &str) -> PyResult { - let sheets = SheetsEnum::File( - open_workbook_auto(path) - .map_err(Error::Calamine) - .map_err(err_to_py)?, - ); + pub fn from_path(path: &str, load_tables: bool) -> PyResult { + let mut sheets = SheetsEnum::File(open_workbook_auto(path).map_err(Error::Calamine)?); let sheet_names = sheets.sheet_names().to_owned(); let sheets_metadata = sheets.sheets_metadata().to_owned(); + let mut table_names: Option> = None; + if load_tables { + sheets.load_tables()?; + table_names = Some(sheets.table_names()?); + } Ok(Self { path: Some(path.to_string()), + workbook_type: WorkbookType::from(&sheets), sheets, sheets_metadata, sheet_names, + table_names, }) } fn get_sheet_by_name(&mut self, name: &str) -> PyResult { - let range = self.sheets.worksheet_range(name).map_err(err_to_py)?; - let merge_cells_range = self.sheets.worksheet_merge_cells(name).map_err(err_to_py)?; + let range = self.sheets.worksheet_range(name)?; + let merge_cells_range = self.sheets.worksheet_merge_cells(name)?; Ok(CalamineSheet::new( name.to_owned(), range, @@ -229,4 +366,14 @@ impl CalamineWorkbook { .to_string(); self.get_sheet_by_name(&name) } + + fn get_table_by_name(&mut self, name: &str) -> PyResult { + match &self.workbook_type { + WorkbookType::Xlsx => match &self.table_names { + Some(_) => Ok(self.sheets.get_table_by_name(name)?), + None => Err(Error::TablesNotLoaded.into()), + }, + _ => Err(Error::TablesNotSupported.into()), + } + } } diff --git a/src/utils.rs b/src/utils.rs deleted file mode 100644 index 185c33d..0000000 --- a/src/utils.rs +++ /dev/null @@ -1,55 +0,0 @@ -use calamine::{Error as CalamineCrateError, OdsError, XlsError, XlsbError, XlsxError}; -use pyo3::exceptions::PyIOError; -use pyo3::PyErr; - -use crate::{ - CalamineError, Error, PasswordError, WorkbookClosed, WorksheetNotFound, XmlError, ZipError, -}; - -pub fn err_to_py(e: Error) -> PyErr { - match e { - Error::Calamine(calamine_error) => match calamine_error { - CalamineCrateError::Io(err) => PyIOError::new_err(err.to_string()), - CalamineCrateError::Ods(ref err) => match err { - OdsError::Io(error) => PyIOError::new_err(error.to_string()), - OdsError::Zip(error) => ZipError::new_err(error.to_string()), - OdsError::Xml(error) => XmlError::new_err(error.to_string()), - OdsError::XmlAttr(error) => XmlError::new_err(error.to_string()), - OdsError::Password => PasswordError::new_err(err.to_string()), - OdsError::WorksheetNotFound(error) => WorksheetNotFound::new_err(error.to_string()), - _ => CalamineError::new_err(err.to_string()), - }, - CalamineCrateError::Xls(ref err) => match err { - XlsError::Io(error) => PyIOError::new_err(error.to_string()), - XlsError::Password => PasswordError::new_err(err.to_string()), - XlsError::WorksheetNotFound(error) => WorksheetNotFound::new_err(error.to_string()), - _ => CalamineError::new_err(err.to_string()), - }, - CalamineCrateError::Xlsx(ref err) => match err { - XlsxError::Io(error) => PyIOError::new_err(error.to_string()), - XlsxError::Zip(error) => ZipError::new_err(error.to_string()), - XlsxError::Xml(error) => XmlError::new_err(error.to_string()), - XlsxError::XmlAttr(error) => XmlError::new_err(error.to_string()), - XlsxError::XmlEof(error) => XmlError::new_err(error.to_string()), - XlsxError::Password => PasswordError::new_err(err.to_string()), - XlsxError::WorksheetNotFound(error) => { - WorksheetNotFound::new_err(error.to_string()) - } - _ => CalamineError::new_err(err.to_string()), - }, - CalamineCrateError::Xlsb(ref err) => match err { - XlsbError::Io(error) => PyIOError::new_err(error.to_string()), - XlsbError::Zip(error) => ZipError::new_err(error.to_string()), - XlsbError::Xml(error) => XmlError::new_err(error.to_string()), - XlsbError::XmlAttr(error) => XmlError::new_err(error.to_string()), - XlsbError::Password => PasswordError::new_err(err.to_string()), - XlsbError::WorksheetNotFound(error) => { - WorksheetNotFound::new_err(error.to_string()) - } - _ => CalamineError::new_err(err.to_string()), - }, - _ => CalamineError::new_err(calamine_error.to_string()), - }, - Error::WorkbookClosed => WorkbookClosed::new_err("".to_string()), - } -} diff --git a/tests/data/table-multiple.xlsx b/tests/data/table-multiple.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..77d022ddd7d885921faa3dba7a3329b42a0d56cc GIT binary patch literal 11520 zcmeHtWn7f)w*J7-4T5xcs~{nr(hU;Q4Ba6mozmSQ9fEX7HwZ`y2qWDM(tSqu-pBVv z_wWDzdd}I;hj~8CtZSZUt!v%)wbr^7q=7Kl05|{w001Bd@CdCACqe-L@8JP}M}Y58 zS|T>qjv#ACT@_bbkb^dhiVS2}6O5);TLPjjYVUm1 zR8=v#j_oX?r3Bi#2o0jYB}^%tUs($aDTqO(NHlkal%(TFI!|^UHuU-7aH)v*mXMyP z1718;+H@V{FDP3CB99<5i&?XGSpxfda<)G#IvPR&izo4_2HmgvxM}dQp(zc8#^uak zlk?s*aMw|21altdoXb zqn5Dp^wH!R17=3!EWZ0S4Z69bBOl6Lan5#b#s?tVqM)bC%QBVqcH zO`X%`WIXsp&UKbfkdy4oA6p*89zX!#?hXi0_*;(Fs6L@Qfe=pyl7h&9At+sYkfj4F z%g;};?Ee4c=Kq?U42v0->wJRJvoFykvU0U$ErF1g*C->=NUlQCMzu#;6P`mQcy{U! z^p!FMkDjg%3uw*lt(mEi5*e_{bvDomRj?VjPno-vW~!mTH=r;v!|o-BAku#|J7=lu zk{g$5Fm+W}85&B^5O+;!z)mcWjeNLcpBMc^v0vYGG#0-Q^gV8sFR+zFfth!r-5del z9ln_VlJ}yrj$PKqg>}utAW<$%Fyj%=z?)SOW8|V?aAAt@fTSPJQQA1>=PL?BUZ>31U%3rD>3Ar6Au+deFrrbXw?L%d4`{)@6k=lbp4=qkF9u>H>UIm8<} zXvlu_6_N=A00bx(OV)qT-Py+8!obGH;^&U^PwK<`q(0=g|F^HE*kSW~()cy|je6L{ zScBqTG=~Y5X`KOR6qiQ2I1)~2Z3JRP8U#PHWa0WzU3&QwIb4h8pHO9oP{RgJ@8{-@>6OvjkyxdD zWjRhU#3$Hva-Br&=asE{c0J*3kWU{G?`vDaK_=%HIdo^y*kCRF)^61#GNDYp&jxqM z2!lIhYlMt3Nt3GgGtIj$va-$<&cM%!dX9Rs9O;Hi{-hd@noT+-p8LrwJVHs52YLZ@@g@Bg!W{L-YIIK#Y z#9WNO{e_vMc&wrf;zkETku$!BuE6Yf#U;lVX-h9MuXOB^g%kSCVS*tcmY< zB@r(dySRhD^6>|LI<*$LG-DH{K*?TWV6Dp|)A->77b`IfHhR{osMoB7oBzHk75GT~2oc^?c% zQ;-#i_5Sne&t%@wkhcMIV7H=wb0%}Nv7)Udgm()0R*_mNOW0){L}Helpst)PI40>q zIJ4{x)!Cq*2-Udz#FB9Yy3sL{Mk}Zkc!_f=AytVtAUy_0I5`m{O5yePZho24>14() zI7@_rYSj+>LHnaya2G%6IG3rrbKs3MAxR8Uui>DJ%* zYW;426m@ER$gw`&zyL4#(?Qw#;Bwzrm?}Ncq)@ZLs*}YN#?&3`rwU<|WbW?IviOBW zWXHh$!Do0)JyKgxneTjPWYciqcrqs0g@-_ih>pP;U?9JtCp#5-To7IY63vXx#C22u zZysK@ zlu0b*j?84;cuMgF$#8+018e|qb5c5R>4F1! z8`wh;BPWX0Lk~PyE&(HgfjHsODYQnu9>Gg25#qiEUjQ~(uahc7T3!>3HANl6%Bp~` zp0r0{BDD&Pm$&bWOyF^ zIK*9b(dfFn?4)hIzINrtcxFmI8p@h^eY*2Gw2c4erm>g$z{=;P*m;z=Xtvf00`UQY zBU_*yH7%uG3r;77vI#7j&l|+e3?$-(Qf^~VJ(^v(^DDtaI-pMGtg{iZzd^n!;yR)J z3gC>(gqTGN)mG$)LEVKb)X#p+K&^{d@2z@IW2j+oO|u5HDjTv?@9At5m%$zz5hGDl z5?${4d0Z6M?w;b-H>b!;Uuk~D4H7Y88EM&Pxs!FVPoZlZjAV_D^l>%_%4 z?iNJUAR&P;DCJLzwU%`FB7>eNKALDL7zfq!(T8%GzV_GWVz1>H4jcvXUN%3CuC)AO z1$Fs2%@Zin%;0oIlKPTGiS7k+j_V@_9nrl5GtMqWr9k2lE+zwkC}jl2u+vWre*1Qu zBF8-EP`!+^9*J1Jvjw=mJnXijnlQl`$%wnsbYJCe!cDK8yOR_PdpO|caF%Q9P_>jb zq;@js+CSLdwRbf7nH=)_sO!$T%uUQXJ59N7Z|@bq=C`QP##S$EZg4I4sMw6jiI@H1 z|BOSyLEKtuoL2f9Q;!GU*X=>5=#`*+D*xWF6~pd z%OZs~l?X3QQDpVp0Dmz)EvUkP1xWTr>TT|@Yk_)cTWQ|s7O1)^uA;t=Exy`o&76ly zkK2)1H=FpPBbZodXhTO8!4}Ua2}pj}jh)iimIUjp7wu;pV})5?gAu-eAwms;h2 zUrM9VK7<}v*x@pk3?8@tpv*pCgv{xhHlR0$wB%!*S4p@!R2Cw6fvl8=*K^0Fdh7{3 z1-osy3q@*e4=esc7fgc7u%ZwU6LskU74BC=jKOS6j69B^Vw-7si)>$b2TrSEm(lDj zk|!PkAR=8wBf*kiDI~}orwf-P>}e-ZNh2l)GXyg}Vmu8d!s7V)yu9e_4SE2^R(?t@ zsBJ;6C-W-9c!X|5R_^J18bQo{GY9982nwCb)~6))I5G-TtQ zSgPu9C3F%_t*=_jX&HS7cb%qIMFP8yg;9qOZsPO6_9k+vg)#U|f$}@WMYbzDVVgn` zPe2V$dmWrCN(Wr_WsDVX6id__TbS@z;_CKXNsaOR(%4$fS`A&>g482EeM{zPjcX4% z!}Tx814Ey?L)qV$>-SE+vCJ!KucB4P4y2hD>f!2nJHg=TvXpbRut8kEl=+le_DQ=5 zwNKUwQ4+8BN*&RcvC>w^hV;7vJ&;p&setH*IEVtp`=uWoOhF(=2iE)Bv!DA`Sgfr4 z_a_*E`|+2gO!I_1jj6ubRKYmHwuS!19kZ!2SfO1JmisI>XT;#{^78nU*qey;_r^Id!Bp(eKEinH<~TdiBQl?>7q(vzxvQ*Pm;%Tmg8i*q=J-@w?yAofo> zI$G1?SM4HHl_%>6X5~#8KSnDv)_e1mtYLv&-m?3`nFG1Vlqzn`3$I}xgv{A6JN*8& zKVFi5=k+cfu5@Z~tmzRfCeTB+?8shAVN!+sN34I2hcit}RQlxbobCKX+*QHOI2f$Z z=SNO_Y#{I{^U?+09*_9VZn!69nkhq0B+Q4;6;7cC&7TKw)+XEz2~>{{T}cRZDu8en zcQdv7R=A&gq7vVKs8Wa&V0X1g%&Jvfod!jt(3YHNOui^r`%GP#-Cq^zRnB3ek=mx| zr6(3|BGxY7BJ1WHV{G?W8|FAwT86Ed7yMR+O^$uY1QU3TVI0*TgXl&CKPmR^-7<BD(K;hjD5> z=7tfS-yA&hVFSKovlg{hvQK=pm2*K%=zx6(?aH{ooaBgV4}03JAS(_Q!M?|N5|v=6 zpwk@L_fZ2H?Y=~3EMAJ+r@{z!{r;D{Gi?;gL>73;NIZGp!q_RLN&7$rXLdbty*@grDGKC3jy$1^YiMnHI7(m0Sc9l0a5&5yZ5<5{bzym zO8wVNg$+u&t6R8#e0lHZ+v@*cFMrSU|6wm*nQ)<@;FQxB!k?kX#e6Xn5I9 zl`bb7)3jh<%#glo@EH3i6)ywXke}NlCWvqPXmD*MBq=(mT}g|`k5OI7!O>8m`OA0( zW~GR$Tbmw)n)T9ftJH?7UVj(v9aQ5QxaIn-w5*)-$H z7D}X(&!aQOi-Uc?;T6f86waLyu%C! zz93C8MaD=?oh)A@;FHb$Y|WR77Ll5=uY?1Iw1=ZO6tN1cQ0uF|u(w%t^?tO@K}Fay zW@$;K+oI9DdvCoSBChX1sqeD`x7+|UNgf(A7EcsU1%Ox6EI&44B+!*~^=aP5ZmLQ6 zxy=azV_>nhf>3|VtA*gR<@@QAND zk7Gj|X||xZduLh!n0zPO{P+uIWaD79+dsA)tC5AoSs_LACzyZBko&bUENeLuH*8 z1-L?bAjFz-%C93)1%k@RFJD$6Ls|~@nJTZ^xll}mm{mG?0s<~bQJIja^~3FTMV4P+ z0^@B}TAAZ(d8O^eT58FS-%DJ`R;zdO_F>6;s@PVW><_-%g=2_9vnK$USQ%<{t#cei z$mpjMyzOf~37;iQ5FtN}cW+xpS7;oI4la-^2^MKBQDdJ$G^uep9M0+2megM~CFvcX zzGhgH*(>e5+*~>AlWX&v>UhWW)WwLMs(93VG=f)CrcisDz@|@XqxMGRF-^OVc>?YH zkMFq6XQe|PRdcu)AK)Wcp?gW5M`h{zp9#Ma62s_tx+NWcGk8D+gN>b@F7GvPv zZzRYK^3T-E$9<|Ci7&xSU)w7KpTuH#iWqP^iu|Q3iUw1}G&&1KrwIg-0UVe9}9U@8)9_U>gmm~Y5qTx!3ilDuP zxRkmbD@n?gYRiiuJ|*Ym#ca?VMP@a*n`~2fx|B!tOm5nW&@M+R{A!Je29A}WR5l_I zt5H3;SmLfr63C`iRGf{7kxiChLixyV2F$&b2>0q`VyEUVaTr_kK-Y@tyCXD_!Q6i8 zL~6SjpgQVKB@|6@_LL~cQU<3%dr-ytkG|O?P=399c4J}h?jBgxHw8&ej5vF7PgmC? zzZlw5+LFCtFXhMj zku-|6vQ5Kg&gjOXM&qg#)cvV>!lauK%$Yy+gJQTbuJtGu`jb?!o2?+44Q#=7RX(?_ zX8GqwWZFjqU%qI3u%!*NIsrJUqf5KCvrg8x->K*5%+ilgDfT0K4#m#!%P7lj?<+R{ zamip`kM9_TMDb_;4f~(bG}X5U87Vv3n^~LO2h}L5U8+kEC2-$&|H;apoGM2ACKCff z#%ujEC}BddkDPH@uEG3VG#>&p_l<4~6-M3NyV#3-3UWK-sYW;yyTr>j5#)m9?Fl|S z#TAl*uCE=!(C%KYi=tn@Y9u@spSU(Hb)*!F)9&x8IZBoiEv-ONvnSsHQH^m;r{;gE z!UXzljty$=&-o^Ezx{G-4~M>x4zvHYm0rwbg6Ar6c&FY-v#KDi0QIf-#~QVH8xp&%r(dM~x#9#98C^cYgh?X4^s6e^wVm9S-J{kk_C2~zXX+LAwtOf>9v^*Sa%xeW%H#%n zxUe6;Y%TeVFPr$zF)6_f*#yz)@v%&eiE@PK9V$rHLN2>wr}4ECqz)ERFpBu*CHzg= zaun7$gT3PgJQzgkT5jl9s>G*7!f;|Awa}0a@pHJU`VG82(Q2JCMDaC;k!a{g67i2o zOy5FlK6`Oax`P@UtG|q@ydGu3vr%u?4Ie7qirVY8y=UBf9ujXhYuI3wt&=3ddp&n} zHWS(s)eu1>>$kapST2p0rThE}NbmVavC_69>rV@D=;fcCdcO@>Dr(DjJi+$hJQQF* z(|$olKyBF-#YC?72-Zsw3_8jNM7rx-O5F`+hJ`yvWy3y1t2Q^YSF%@NrEQ60GN`Y1ZY5~z7qL*Yhn^cJ+ zJ!wUD6>+kJjVxBEB=>SvQv^3CW_-y&%iD}IlodzJ!NNf^g{P}nXX^O}Tp7$ZLgj{_ zj#Qo!M(5zeM}2F>`iT~u8{PP-#v%)OY~guiwI9jl^a)@+qM=$5I!zbr%4ucf%s8#W z?=bUeJ1la7pVtvfW?cB^Y^P}XF*3|n+~$L;;WH~n!aah)y~dwTeS-Y8dr91tyxv>G zpEmTIliQ8TaR;}IuFy)HLUi8mRZtMkP#GS?sbhb3?7jRqiqewqdV&!!4|^w)Mb?ZJJnphdho$Rod8f(=WW}La6|f3BQTyl}iPv4cvl?+)7}-7o-MR=&eX>9jwV+ z^`uj{LA%oM*awXHXWS63Ucg*!IGR}M^vj_iuiEDU^{jZV%E-Fq8Au2^!w(ocGKYhusSb4cJ zt8lq*7Xu9&K{7n(**HEORJBA*nY@6rl7j+ssZAF?&-kqb_mwkQa1AR&58x0*>#4og zkeQ#_{qcrwWyub{b;CQnDY1B+^xIXc#=vV5A;GGg8lj(5B85fLv6B=2c)(7B|=;amNh^{+wSTspsUuV}!h6TEs`U4N{~Ra4p+ zda}fpkneQ1S&8VbPjUF9S?m2&#<4(=R!olBlXM=aH=oImON|A{x@rID)e3NhiwMM{ z8W0_d2^j|(*%&I=+t@m=8rs-{e!7$_`#)1ai01%dF}l*7Pq4e^VOvE$yQfyqstAL9 z`boNc^u!LCshqVcBt@`xPj_YSO4Qc;q7uJ5{AgU)i77>^L~CjLR;w(1aO3 zv$~P@I*605Jc7P`OI#=-LT|LVzNxQ1zv=0L5dmtCsvb~YDZxHHr`aLJ!198Gsw&!< zj@6owGWqCLRcFc}dMz*w!#;qTizdmi9vJ9Gk&Q`z?*BD2e-Fynuu#a?{&BBjm{${8 zShW}bMtrie+Fs#}jNx}~tplO7@9Z5f$>{!x}k8A7vc!@ zK0fUhsd|Nlo%+t5E{SWbeQtlsDTCBXq@)L-bLB1jm+Y3*tj-&2lpq?G;WFeH8;rsA|>v24F zPI5|are`mQWZI2`M!NVBNo1yYOaqvcf14~TGz;TMgXI|lM*_*dg>Vh0zb9{Afu{GIQd+DdYI}e6R+sioFLh! zz9TI}cV7iz*NF`1?xgyi4KI-aQ+NKi(q?z90xuJWW`wGrHXK$$XPh&l)(dW6MOjeVDuNU?6&)Ww}q25 zqq@Om1zo_-KAt5@x0Z{zebE;Zp;8u$e+trn2l)F5q<|AjOLImh^5#Q)Iv;SlsQPEAg^*A38^V1-^bp~p*8hcYNBSGWJxLxSJk+?q z5G2WeL%3JG4-x*Z=YH7(0GSk!CeTB5_t5