diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index cad4c49..60c29eb 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -117,7 +117,7 @@ jobs: py ${{ env.pythonLocation }} - - run: pip install pre-commit + - run: pip install pre-commit mypy if: steps.cache-py.outputs.cache-hit != 'true' - run: pip install . diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 53eb6ce..e18896c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,11 +34,20 @@ repos: rev: 24.1.1 hooks: - id: black - - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.8.0 + - repo: local hooks: - id: mypy - exclude: ^tests/.*$ + name: mypy + entry: mypy + language: python + pass_filenames: false + - id: mypy-stubtest + name: mypy-stubtest + entry: stubtest + args: + - python_calamine + language: python + pass_filenames: false - repo: local hooks: - id: rust-linting diff --git a/pyproject.toml b/pyproject.toml index ad48dd7..563837c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ dynamic = ["version"] dev = [ "maturin~=1.0", "pre-commit~=4.3", + "mypy~=1.18.2", "pytest~=9.0", "pandas[excel]~=2.2", ] @@ -36,6 +37,7 @@ profile = "black" [tool.mypy] python_version = "3.10" +packages = ["python_calamine"] ignore_missing_imports = false disallow_untyped_defs = true check_untyped_defs = true diff --git a/python/python_calamine/__init__.py b/python/python_calamine/__init__.py index d2f57ea..c14d5db 100644 --- a/python/python_calamine/__init__.py +++ b/python/python_calamine/__init__.py @@ -1,11 +1,15 @@ from ._python_calamine import ( CalamineError, CalamineSheet, + CalamineTable, CalamineWorkbook, PasswordError, SheetMetadata, SheetTypeEnum, SheetVisibleEnum, + TableNotFound, + TablesNotLoaded, + TablesNotSupported, WorkbookClosed, WorksheetNotFound, XmlError, @@ -16,14 +20,18 @@ __all__ = ( "CalamineError", "CalamineSheet", + "CalamineTable", "CalamineWorkbook", "PasswordError", "SheetMetadata", "SheetTypeEnum", "SheetVisibleEnum", + "TableNotFound", + "TablesNotLoaded", + "TablesNotSupported", + "WorkbookClosed", "WorksheetNotFound", "XmlError", "ZipError", - "WorkbookClosed", "load_workbook", ) diff --git a/python/python_calamine/_python_calamine.pyi b/python/python_calamine/_python_calamine.pyi index 32854b8..c59c067 100644 --- a/python/python_calamine/_python_calamine.pyi +++ b/python/python_calamine/_python_calamine.pyi @@ -1,6 +1,6 @@ +# Some documentations from upstream under MIT License. See authors in https://github.com/tafia/calamine from __future__ import annotations -import contextlib import datetime import enum import os @@ -23,34 +23,57 @@ class SheetTypeEnum(enum.Enum): @typing.final class SheetVisibleEnum(enum.Enum): Visible = ... + """Visible.""" Hidden = ... + """Hidden.""" VeryHidden = ... + """The sheet is hidden and cannot be displayed using the user interface. It is supported only by Excel formats.""" @typing.final class SheetMetadata: name: str + """Name of sheet.""" typ: SheetTypeEnum + """Type of sheet. + + Only Excel formats support this. Default value for ODS is `WorkSheet`. + """ visible: SheetVisibleEnum + """Visible of sheet.""" - def __init__( - self, name: str, typ: SheetTypeEnum, visible: SheetVisibleEnum - ) -> None: ... + def __new__( + cls, name: str, typ: SheetTypeEnum, visible: SheetVisibleEnum + ) -> SheetMetadata: ... @typing.final class CalamineSheet: name: str @property - def height(self) -> int: ... + def height(self) -> int: + """Get the row height of a sheet data. + + The height is defined as the number of rows between the start and end positions. + """ + @property - def width(self) -> int: ... + def width(self) -> int: + """Get the column width of a sheet data. + + The width is defined as the number of columns between the start and end positions. + """ + @property def total_height(self) -> int: ... @property def total_width(self) -> int: ... @property - def start(self) -> tuple[int, int] | None: ... + def start(self) -> tuple[int, int] | None: + """Get top left cell position of a sheet data.""" + @property - def end(self) -> tuple[int, int] | None: ... + def end(self) -> tuple[int, int] | None: + """Get bottom right cell position of a sheet data.""" + def to_python( self, skip_empty_area: bool = True, nrows: int | None = None ) -> list[ @@ -102,34 +125,96 @@ class CalamineSheet: """ @typing.final -class CalamineWorkbook(contextlib.AbstractContextManager): +class CalamineTable: + name: str + """Get the name of the table.""" + sheet: str + """Get the name of the parent worksheet for a table.""" + columns: list[str] + """Get the header names of the table columns. + + In Excel table headers can be hidden but the table will still have + column header names. + """ + @property + def height(self) -> int: + """Get the row height of a table data. + + The height is defined as the number of rows between the start and end positions. + """ + + @property + def width(self) -> int: + """Get the column width of a table data. + + The width is defined as the number of columns between the start and end positions. + """ + + @property + def start(self) -> tuple[int, int] | None: + """Get top left cell position of a table data.""" + + @property + def end(self) -> tuple[int, int] | None: + """Get bottom right cell position of a table data.""" + + def to_python( + self, + ) -> list[ + list[ + int + | float + | str + | bool + | datetime.time + | datetime.date + | datetime.datetime + | datetime.timedelta + ] + ]: + """Retunrning data from table as list of lists.""" + +@typing.final +class CalamineWorkbook: path: str | None + """Path to file. `None` if bytes was loaded.""" sheet_names: list[str] + """All sheet names of this workbook, in workbook order.""" sheets_metadata: list[SheetMetadata] + """All sheets metadata of this workbook, in workbook order.""" + table_names: list[str] | None + """All table names of this workbook.""" @classmethod def from_object( - cls, path_or_filelike: str | os.PathLike | ReadBuffer + cls, path_or_filelike: str | os.PathLike | ReadBuffer, load_tables: bool = False ) -> "CalamineWorkbook": """Determining type of pyobject and reading from it. Args: path_or_filelike (str | os.PathLike | ReadBuffer): path to file or IO (must imlpement read/seek methods). + load_tables (bool): load Excel tables (supported for XLSX only). """ @classmethod - def from_path(cls, path: str | os.PathLike) -> "CalamineWorkbook": + def from_path( + cls, path: str | os.PathLike, load_tables: bool = False + ) -> "CalamineWorkbook": """Reading file from path. Args: path (str | os.PathLike): path to file. + load_tables (bool): load Excel tables (supported for XLSX only). """ @classmethod - def from_filelike(cls, filelike: ReadBuffer) -> "CalamineWorkbook": + def from_filelike( + cls, filelike: ReadBuffer, load_tables: bool = False + ) -> "CalamineWorkbook": """Reading file from IO. Args: filelike : IO (must imlpement read/seek methods). + load_tables (bool): load Excel tables (supported for XLSX only). """ def close(self) -> None: @@ -177,18 +262,55 @@ class CalamineWorkbook(contextlib.AbstractContextManager): WorksheetNotFound: If worksheet not found in workbook. """ + def get_table_by_name(self, name: str) -> CalamineTable: + """Get table by name. + + Args: + name(str): name of table + + Returns: + CalamineTable + + Raises: + WorkbookClosed: If workbook already closed. + WorksheetNotFound: If worksheet not found in workbook. + """ + class CalamineError(Exception): ... class PasswordError(CalamineError): ... class WorksheetNotFound(CalamineError): ... class XmlError(CalamineError): ... class ZipError(CalamineError): ... class WorkbookClosed(CalamineError): ... +class TablesNotLoaded(CalamineError): ... +class TablesNotSupported(CalamineError): ... +class TableNotFound(CalamineError): ... def load_workbook( - path_or_filelike: str | os.PathLike | ReadBuffer, + path_or_filelike: str | os.PathLike | ReadBuffer, load_tables: bool = False ) -> CalamineWorkbook: """Determining type of pyobject and reading from it. Args: path_or_filelike (str | os.PathLike | ReadBuffer): path to file or IO (must imlpement read/seek methods). + load_tables (bool): load Excel tables (supported for XLSX only). """ + +__all__ = [ + "CalamineError", + "CalamineSheet", + "CalamineTable", + "CalamineWorkbook", + "PasswordError", + "SheetMetadata", + "SheetTypeEnum", + "SheetVisibleEnum", + "TableNotFound", + "TablesNotLoaded", + "TablesNotSupported", + "WorkbookClosed", + "WorksheetNotFound", + "XmlError", + "ZipError", + "load_workbook", +] diff --git a/src/lib.rs b/src/lib.rs index 58caf6b..f969bf9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,20 @@ use pyo3::prelude::*; mod types; -mod utils; use crate::types::{ - CalamineError, CalamineSheet, CalamineWorkbook, CellValue, Error, PasswordError, SheetMetadata, - SheetTypeEnum, SheetVisibleEnum, WorkbookClosed, WorksheetNotFound, XmlError, ZipError, + CalamineError, CalamineSheet, CalamineTable, CalamineWorkbook, CellValue, Error, PasswordError, + SheetMetadata, SheetTypeEnum, SheetVisibleEnum, TableNotFound, TablesNotLoaded, + TablesNotSupported, WorkbookClosed, WorksheetNotFound, XmlError, ZipError, }; #[pyfunction] -fn load_workbook(py: Python, path_or_filelike: Py) -> PyResult { - CalamineWorkbook::from_object(py, path_or_filelike) +#[pyo3(signature = (path_or_filelike, load_tables=false))] +fn load_workbook( + py: Python, + path_or_filelike: Py, + load_tables: bool, +) -> PyResult { + CalamineWorkbook::from_object(py, path_or_filelike, load_tables) } #[pymodule(gil_used = false)] @@ -20,11 +25,15 @@ fn _python_calamine(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add("CalamineError", py.get_type::())?; m.add("PasswordError", py.get_type::())?; m.add("WorksheetNotFound", py.get_type::())?; m.add("XmlError", py.get_type::())?; m.add("ZipError", py.get_type::())?; + m.add("TablesNotSupported", py.get_type::())?; + m.add("TablesNotLoaded", py.get_type::())?; + m.add("TableNotFound", py.get_type::())?; m.add("WorkbookClosed", py.get_type::())?; Ok(()) } diff --git a/src/types/errors.rs b/src/types/errors.rs index a4220b9..64ab8e5 100644 --- a/src/types/errors.rs +++ b/src/types/errors.rs @@ -1,10 +1,12 @@ -use calamine::Error as CalamineCrateError; -use pyo3::create_exception; -use pyo3::exceptions::PyException; +use calamine::{Error as CalamineCrateError, OdsError, XlsError, XlsbError, XlsxError}; +use pyo3::exceptions::{PyException, PyIOError}; +use pyo3::{create_exception, PyErr}; #[derive(Debug)] pub enum Error { Calamine(CalamineCrateError), + TablesNotSupported, + TablesNotLoaded, WorkbookClosed, } @@ -14,3 +16,63 @@ create_exception!(python_calamine, WorksheetNotFound, CalamineError); create_exception!(python_calamine, XmlError, CalamineError); create_exception!(python_calamine, ZipError, CalamineError); create_exception!(python_calamine, WorkbookClosed, CalamineError); +create_exception!(python_calamine, TablesNotSupported, CalamineError); +create_exception!(python_calamine, TablesNotLoaded, CalamineError); +create_exception!(python_calamine, TableNotFound, CalamineError); + +impl From for PyErr { + fn from(val: Error) -> Self { + match val { + Error::Calamine(calamine_error) => match calamine_error { + CalamineCrateError::Io(err) => PyIOError::new_err(err.to_string()), + CalamineCrateError::Ods(ref err) => match err { + OdsError::Io(error) => PyIOError::new_err(error.to_string()), + OdsError::Zip(error) => ZipError::new_err(error.to_string()), + OdsError::Xml(error) => XmlError::new_err(error.to_string()), + OdsError::XmlAttr(error) => XmlError::new_err(error.to_string()), + OdsError::Password => PasswordError::new_err(err.to_string()), + OdsError::WorksheetNotFound(error) => { + WorksheetNotFound::new_err(error.to_string()) + } + _ => CalamineError::new_err(err.to_string()), + }, + CalamineCrateError::Xls(ref err) => match err { + XlsError::Io(error) => PyIOError::new_err(error.to_string()), + XlsError::Password => PasswordError::new_err(err.to_string()), + XlsError::WorksheetNotFound(error) => { + WorksheetNotFound::new_err(error.to_string()) + } + _ => CalamineError::new_err(err.to_string()), + }, + CalamineCrateError::Xlsx(ref err) => match err { + XlsxError::Io(error) => PyIOError::new_err(error.to_string()), + XlsxError::Zip(error) => ZipError::new_err(error.to_string()), + XlsxError::Xml(error) => XmlError::new_err(error.to_string()), + XlsxError::XmlAttr(error) => XmlError::new_err(error.to_string()), + XlsxError::XmlEof(error) => XmlError::new_err(error.to_string()), + XlsxError::Password => PasswordError::new_err(err.to_string()), + XlsxError::TableNotFound(error) => TableNotFound::new_err(error.to_string()), + XlsxError::WorksheetNotFound(error) => { + WorksheetNotFound::new_err(error.to_string()) + } + _ => CalamineError::new_err(err.to_string()), + }, + CalamineCrateError::Xlsb(ref err) => match err { + XlsbError::Io(error) => PyIOError::new_err(error.to_string()), + XlsbError::Zip(error) => ZipError::new_err(error.to_string()), + XlsbError::Xml(error) => XmlError::new_err(error.to_string()), + XlsbError::XmlAttr(error) => XmlError::new_err(error.to_string()), + XlsbError::Password => PasswordError::new_err(err.to_string()), + XlsbError::WorksheetNotFound(error) => { + WorksheetNotFound::new_err(error.to_string()) + } + _ => CalamineError::new_err(err.to_string()), + }, + _ => CalamineError::new_err(calamine_error.to_string()), + }, + Error::WorkbookClosed => WorkbookClosed::new_err("".to_string()), + Error::TablesNotLoaded => TablesNotLoaded::new_err("".to_string()), + Error::TablesNotSupported => TablesNotSupported::new_err("".to_string()), + } + } +} diff --git a/src/types/mod.rs b/src/types/mod.rs index 60fed03..c5cff3d 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -1,10 +1,13 @@ mod cell; mod errors; mod sheet; +mod table; mod workbook; pub use cell::CellValue; pub use errors::{ - CalamineError, Error, PasswordError, WorkbookClosed, WorksheetNotFound, XmlError, ZipError, + CalamineError, Error, PasswordError, TableNotFound, TablesNotLoaded, TablesNotSupported, + WorkbookClosed, WorksheetNotFound, XmlError, ZipError, }; pub use sheet::{CalamineSheet, SheetMetadata, SheetTypeEnum, SheetVisibleEnum}; +pub use table::CalamineTable; pub use workbook::CalamineWorkbook; diff --git a/src/types/table.rs b/src/types/table.rs new file mode 100644 index 0000000..0bf72ac --- /dev/null +++ b/src/types/table.rs @@ -0,0 +1,71 @@ +use std::sync::Arc; + +use calamine::{Data, Range}; +use pyo3::prelude::*; +use pyo3::types::PyList; + +use crate::CellValue; + +#[pyclass] +#[derive(Clone, PartialEq)] +pub struct CalamineTable { + #[pyo3(get)] + name: String, + #[pyo3(get)] + sheet: String, + #[pyo3(get)] + columns: Vec, + range: Arc>, +} + +impl CalamineTable { + pub fn new(name: String, sheet_name: String, columns: Vec, range: Range) -> Self { + CalamineTable { + name, + sheet: sheet_name, + columns, + range: Arc::new(range), + } + } +} + +#[pymethods] +impl CalamineTable { + fn __repr__(&self) -> PyResult { + Ok(format!("CalamineTable(name='{}')", self.name)) + } + + #[getter] + fn height(&self) -> usize { + self.range.height() + } + + #[getter] + fn width(&self) -> usize { + self.range.width() + } + + #[getter] + fn start(&self) -> Option<(u32, u32)> { + self.range.start() + } + + #[getter] + fn end(&self) -> Option<(u32, u32)> { + self.range.end() + } + + fn to_python(slf: PyRef<'_, Self>) -> PyResult> { + let range = Arc::clone(&slf.range); + + let py_list = PyList::empty(slf.py()); + + for row in range.rows() { + let py_row = PyList::new(slf.py(), row.iter().map(<&Data as Into>::into))?; + + py_list.append(py_row)?; + } + + Ok(py_list) + } +} diff --git a/src/types/workbook.rs b/src/types/workbook.rs index c197d46..a96644f 100644 --- a/src/types/workbook.rs +++ b/src/types/workbook.rs @@ -10,8 +10,7 @@ use pyo3::prelude::*; use pyo3::types::PyType; use pyo3_file::PyFileLikeObject; -use crate::utils::err_to_py; -use crate::{CalamineSheet, Error, SheetMetadata, WorksheetNotFound}; +use crate::{CalamineSheet, CalamineTable, Error, SheetMetadata, WorksheetNotFound}; enum SheetsEnum { File(Sheets>), @@ -19,6 +18,33 @@ enum SheetsEnum { None, } +enum WorkbookType { + Xls, + Xlsx, + Xlsb, + Ods, +} + +impl From<&SheetsEnum> for WorkbookType { + fn from(sheets: &SheetsEnum) -> Self { + match sheets { + SheetsEnum::File(f) => match f { + Sheets::Xls(_) => WorkbookType::Xls, + Sheets::Xlsx(_) => WorkbookType::Xlsx, + Sheets::Xlsb(_) => WorkbookType::Xlsb, + Sheets::Ods(_) => WorkbookType::Ods, + }, + SheetsEnum::FileLike(f) => match f { + Sheets::Xls(_) => WorkbookType::Xls, + Sheets::Xlsx(_) => WorkbookType::Xlsx, + Sheets::Xlsb(_) => WorkbookType::Xlsb, + Sheets::Ods(_) => WorkbookType::Ods, + }, + SheetsEnum::None => unreachable!(), + } + } +} + impl SheetsEnum { fn sheets_metadata(&self) -> Vec { match self { @@ -75,17 +101,94 @@ impl SheetsEnum { SheetsEnum::None => Err(Error::WorkbookClosed), } } + + fn load_tables(&mut self) -> Result<(), Error> { + match self { + SheetsEnum::File(f) => match f { + Sheets::Xlsx(xlsx_f) => xlsx_f + .load_tables() + .map_err(CalamineCrateError::Xlsx) + .map_err(Error::Calamine), + _ => Err(Error::TablesNotSupported), + }, + SheetsEnum::FileLike(f) => match f { + Sheets::Xlsx(xlsx_f) => xlsx_f + .load_tables() + .map_err(CalamineCrateError::Xlsx) + .map_err(Error::Calamine), + _ => Err(Error::TablesNotSupported), + }, + SheetsEnum::None => Err(Error::WorkbookClosed), + } + } + + fn table_names(&self) -> Result, Error> { + match self { + SheetsEnum::File(f) => match f { + Sheets::Xlsx(xlsx_f) => Ok(xlsx_f.table_names()), + _ => Err(Error::TablesNotSupported), + }, + SheetsEnum::FileLike(f) => match f { + Sheets::Xlsx(xlsx_f) => Ok(xlsx_f.table_names()), + _ => Err(Error::TablesNotSupported), + }, + SheetsEnum::None => Err(Error::WorkbookClosed), + } + .map(|v| { + v.iter() + .map(|s| s.to_owned().to_owned()) + .collect::>() + }) + } + + fn get_table_by_name(&mut self, name: &str) -> Result { + match self { + SheetsEnum::File(f) => match f { + Sheets::Xlsx(xlsx_f) => xlsx_f + .table_by_name(name) + .map_err(CalamineCrateError::Xlsx) + .map_err(Error::Calamine) + .map(|t| { + CalamineTable::new( + t.name().to_owned(), + t.sheet_name().to_owned(), + t.columns().iter().map(|s| s.to_owned()).collect(), + t.data().to_owned(), + ) + }), + _ => Err(Error::TablesNotSupported), + }, + SheetsEnum::FileLike(f) => match f { + Sheets::Xlsx(xlsx_f) => xlsx_f + .table_by_name(name) + .map_err(CalamineCrateError::Xlsx) + .map_err(Error::Calamine) + .map(|t| { + CalamineTable::new( + t.name().to_owned(), + t.sheet_name().to_owned(), + t.columns().iter().map(|s| s.to_owned()).collect(), + t.data().to_owned(), + ) + }), + _ => Err(Error::TablesNotSupported), + }, + SheetsEnum::None => Err(Error::WorkbookClosed), + } + } } #[pyclass] pub struct CalamineWorkbook { #[pyo3(get)] path: Option, + workbook_type: WorkbookType, sheets: SheetsEnum, #[pyo3(get)] sheets_metadata: Vec, #[pyo3(get)] sheet_names: Vec, + table_names: Option>, } #[pymethods] @@ -98,31 +201,38 @@ impl CalamineWorkbook { } #[classmethod] - #[pyo3(name = "from_object")] + #[pyo3(name = "from_object", signature = (path_or_filelike, load_tables=false))] fn py_from_object( _cls: &Bound<'_, PyType>, py: Python<'_>, path_or_filelike: Py, + load_tables: bool, ) -> PyResult { - Self::from_object(py, path_or_filelike) + Self::from_object(py, path_or_filelike, load_tables) } #[classmethod] - #[pyo3(name = "from_filelike")] + #[pyo3(name = "from_filelike", signature = (filelike, load_tables=false))] fn py_from_filelike( _cls: &Bound<'_, PyType>, py: Python<'_>, filelike: Py, + load_tables: bool, ) -> PyResult { - py.detach(|| Self::from_filelike(filelike)) + py.detach(|| Self::from_filelike(filelike, load_tables)) } #[classmethod] - #[pyo3(name = "from_path")] - fn py_from_path(_cls: &Bound<'_, PyType>, py: Python<'_>, path: Py) -> PyResult { + #[pyo3(name = "from_path", signature = (path, load_tables=false))] + fn py_from_path( + _cls: &Bound<'_, PyType>, + py: Python<'_>, + path: Py, + load_tables: bool, + ) -> PyResult { if let Ok(string_ref) = path.extract::(py) { let path = string_ref.to_string_lossy().to_string(); - return py.detach(|| Self::from_path(&path)); + return py.detach(|| Self::from_path(&path, load_tables)); } Err(PyTypeError::new_err("")) @@ -138,15 +248,30 @@ impl CalamineWorkbook { py.detach(|| self.get_sheet_by_index(index)) } + #[getter] + fn table_names(&self) -> PyResult> { + match &self.workbook_type { + WorkbookType::Xlsx => match &self.table_names { + Some(v) => Ok(v.clone()), + None => Err(Error::TablesNotLoaded.into()), + }, + _ => Err(Error::TablesNotSupported.into()), + } + } + + #[pyo3(name = "get_table_by_name")] + fn py_get_table_by_name(&mut self, py: Python<'_>, name: &str) -> PyResult { + py.detach(|| self.get_table_by_name(name)) + } + fn close(&mut self) -> PyResult<()> { match self.sheets { - SheetsEnum::None => Err(Error::WorkbookClosed), + SheetsEnum::None => Err(Error::WorkbookClosed.into()), _ => { self.sheets = SheetsEnum::None; Ok(()) } } - .map_err(err_to_py) } fn __enter__(slf: Py) -> Py { @@ -164,56 +289,68 @@ impl CalamineWorkbook { } impl CalamineWorkbook { - pub fn from_object(py: Python<'_>, path_or_filelike: Py) -> PyResult { + pub fn from_object( + py: Python<'_>, + path_or_filelike: Py, + load_tables: bool, + ) -> PyResult { if let Ok(string_ref) = path_or_filelike.extract::(py) { let path = string_ref.to_string_lossy().to_string(); - return py.detach(|| Self::from_path(&path)); + return py.detach(|| Self::from_path(&path, load_tables)); } - py.detach(|| Self::from_filelike(path_or_filelike)) + py.detach(|| Self::from_filelike(path_or_filelike, load_tables)) } - pub fn from_filelike(filelike: Py) -> PyResult { + pub fn from_filelike(filelike: Py, load_tables: bool) -> PyResult { let mut buf = vec![]; PyFileLikeObject::with_requirements(filelike, true, false, true, false)? .read_to_end(&mut buf)?; let reader = Cursor::new(buf); - let sheets = SheetsEnum::FileLike( - open_workbook_auto_from_rs(reader) - .map_err(Error::Calamine) - .map_err(err_to_py)?, - ); + let mut sheets = + SheetsEnum::FileLike(open_workbook_auto_from_rs(reader).map_err(Error::Calamine)?); let sheet_names = sheets.sheet_names().to_owned(); let sheets_metadata = sheets.sheets_metadata().to_owned(); + let mut table_names: Option> = None; + if load_tables { + sheets.load_tables()?; + table_names = Some(sheets.table_names()?); + } + Ok(Self { path: None, + workbook_type: WorkbookType::from(&sheets), sheets, sheets_metadata, sheet_names, + table_names, }) } - pub fn from_path(path: &str) -> PyResult { - let sheets = SheetsEnum::File( - open_workbook_auto(path) - .map_err(Error::Calamine) - .map_err(err_to_py)?, - ); + pub fn from_path(path: &str, load_tables: bool) -> PyResult { + let mut sheets = SheetsEnum::File(open_workbook_auto(path).map_err(Error::Calamine)?); let sheet_names = sheets.sheet_names().to_owned(); let sheets_metadata = sheets.sheets_metadata().to_owned(); + let mut table_names: Option> = None; + if load_tables { + sheets.load_tables()?; + table_names = Some(sheets.table_names()?); + } Ok(Self { path: Some(path.to_string()), + workbook_type: WorkbookType::from(&sheets), sheets, sheets_metadata, sheet_names, + table_names, }) } fn get_sheet_by_name(&mut self, name: &str) -> PyResult { - let range = self.sheets.worksheet_range(name).map_err(err_to_py)?; - let merge_cells_range = self.sheets.worksheet_merge_cells(name).map_err(err_to_py)?; + let range = self.sheets.worksheet_range(name)?; + let merge_cells_range = self.sheets.worksheet_merge_cells(name)?; Ok(CalamineSheet::new( name.to_owned(), range, @@ -229,4 +366,14 @@ impl CalamineWorkbook { .to_string(); self.get_sheet_by_name(&name) } + + fn get_table_by_name(&mut self, name: &str) -> PyResult { + match &self.workbook_type { + WorkbookType::Xlsx => match &self.table_names { + Some(_) => Ok(self.sheets.get_table_by_name(name)?), + None => Err(Error::TablesNotLoaded.into()), + }, + _ => Err(Error::TablesNotSupported.into()), + } + } } diff --git a/src/utils.rs b/src/utils.rs deleted file mode 100644 index 185c33d..0000000 --- a/src/utils.rs +++ /dev/null @@ -1,55 +0,0 @@ -use calamine::{Error as CalamineCrateError, OdsError, XlsError, XlsbError, XlsxError}; -use pyo3::exceptions::PyIOError; -use pyo3::PyErr; - -use crate::{ - CalamineError, Error, PasswordError, WorkbookClosed, WorksheetNotFound, XmlError, ZipError, -}; - -pub fn err_to_py(e: Error) -> PyErr { - match e { - Error::Calamine(calamine_error) => match calamine_error { - CalamineCrateError::Io(err) => PyIOError::new_err(err.to_string()), - CalamineCrateError::Ods(ref err) => match err { - OdsError::Io(error) => PyIOError::new_err(error.to_string()), - OdsError::Zip(error) => ZipError::new_err(error.to_string()), - OdsError::Xml(error) => XmlError::new_err(error.to_string()), - OdsError::XmlAttr(error) => XmlError::new_err(error.to_string()), - OdsError::Password => PasswordError::new_err(err.to_string()), - OdsError::WorksheetNotFound(error) => WorksheetNotFound::new_err(error.to_string()), - _ => CalamineError::new_err(err.to_string()), - }, - CalamineCrateError::Xls(ref err) => match err { - XlsError::Io(error) => PyIOError::new_err(error.to_string()), - XlsError::Password => PasswordError::new_err(err.to_string()), - XlsError::WorksheetNotFound(error) => WorksheetNotFound::new_err(error.to_string()), - _ => CalamineError::new_err(err.to_string()), - }, - CalamineCrateError::Xlsx(ref err) => match err { - XlsxError::Io(error) => PyIOError::new_err(error.to_string()), - XlsxError::Zip(error) => ZipError::new_err(error.to_string()), - XlsxError::Xml(error) => XmlError::new_err(error.to_string()), - XlsxError::XmlAttr(error) => XmlError::new_err(error.to_string()), - XlsxError::XmlEof(error) => XmlError::new_err(error.to_string()), - XlsxError::Password => PasswordError::new_err(err.to_string()), - XlsxError::WorksheetNotFound(error) => { - WorksheetNotFound::new_err(error.to_string()) - } - _ => CalamineError::new_err(err.to_string()), - }, - CalamineCrateError::Xlsb(ref err) => match err { - XlsbError::Io(error) => PyIOError::new_err(error.to_string()), - XlsbError::Zip(error) => ZipError::new_err(error.to_string()), - XlsbError::Xml(error) => XmlError::new_err(error.to_string()), - XlsbError::XmlAttr(error) => XmlError::new_err(error.to_string()), - XlsbError::Password => PasswordError::new_err(err.to_string()), - XlsbError::WorksheetNotFound(error) => { - WorksheetNotFound::new_err(error.to_string()) - } - _ => CalamineError::new_err(err.to_string()), - }, - _ => CalamineError::new_err(calamine_error.to_string()), - }, - Error::WorkbookClosed => WorkbookClosed::new_err("".to_string()), - } -} diff --git a/tests/data/table-multiple.xlsx b/tests/data/table-multiple.xlsx new file mode 100644 index 0000000..77d022d Binary files /dev/null and b/tests/data/table-multiple.xlsx differ diff --git a/tests/test_tables.py b/tests/test_tables.py new file mode 100644 index 0000000..791e8e2 --- /dev/null +++ b/tests/test_tables.py @@ -0,0 +1,73 @@ +from pathlib import Path + +import pytest +from python_calamine import ( + CalamineWorkbook, + TableNotFound, + TablesNotLoaded, + TablesNotSupported, +) + +PATH = Path(__file__).parent / "data" + + +# NOTE: "table-multiple.xlsx" from tafia/calamine +def test_table_names_xlsx(): + reader = CalamineWorkbook.from_object( + PATH / "table-multiple.xlsx", load_tables=True + ) + + assert reader.table_names == ["Inventory", "Pricing", "Sales_Bob", "Sales_Alice"] + + +def test_table_names_not_loaded(): + reader = CalamineWorkbook.from_object(PATH / "table-multiple.xlsx") + + with pytest.raises(TablesNotLoaded): + reader.table_names + + +def test_table_names_not_supported(): + with pytest.raises(TablesNotSupported): + CalamineWorkbook.from_object(PATH / "base.xlsb", load_tables=True) + + +def test_table_get_by_name(): + reader = CalamineWorkbook.from_object( + PATH / "table-multiple.xlsx", load_tables=True + ) + + table = reader.get_table_by_name("Inventory") + + assert table.sheet == "Sheet1" + assert table.name == "Inventory" + assert table.columns == ["Item", "Type", "Quantity"] + + assert table.height == 4 + assert table.width == 3 + + assert table.start == (1, 0) + assert table.end == (4, 2) + + assert table.to_python() == [ + [1.0, "Apple", 50.0], + [2.0, "Banana", 200.0], + [3.0, "Orange", 60.0], + [4.0, "Pear", 100.0], + ] + + +def test_table_get_by_name_not_found(): + reader = CalamineWorkbook.from_object( + PATH / "table-multiple.xlsx", load_tables=True + ) + + with pytest.raises(TableNotFound): + reader.get_table_by_name("not found table") + + +def test_table_get_by_name_not_supported(): + reader = CalamineWorkbook.from_object(PATH / "base.xlsb") + + with pytest.raises(TablesNotSupported): + reader.get_table_by_name("not found table")