Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ jobs:
include:
- { python: "3.12", os: "ubuntu-latest", session: "pre-commit" }
- { python: "3.12", os: "ubuntu-latest", session: "safety" }
# - { python: "3.12", os: "ubuntu-latest", session: "mypy" }
# - { python: "3.11", os: "ubuntu-latest", session: "mypy" }
# - { python: "3.10", os: "ubuntu-latest", session: "mypy" }
# - { python: "3.9", os: "ubuntu-latest", session: "mypy" }
# - { python: "3.8", os: "ubuntu-latest", session: "mypy" }
- { python: "3.12", os: "ubuntu-latest", session: "mypy" }
- { python: "3.11", os: "ubuntu-latest", session: "mypy" }
- { python: "3.10", os: "ubuntu-latest", session: "mypy" }
- { python: "3.9", os: "ubuntu-latest", session: "mypy" }
- { python: "3.8", os: "ubuntu-latest", session: "mypy" }
- { python: "3.12", os: "ubuntu-latest", session: "tests" }
- { python: "3.11", os: "ubuntu-latest", session: "tests" }
- { python: "3.10", os: "ubuntu-latest", session: "tests" }
Expand Down
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def safety(session: nox.Session) -> None:
)


@nox.session(python=[python_versions[0], python_versions[-1]])
@nox.session(python=python_versions)
def mypy(session: nox.Session) -> None:
"""Type-check using mypy."""
args = session.posargs or ["src", "tests", "docs/conf.py"]
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,12 @@ warn_unreachable = true
pretty = true
show_column_numbers = true
show_error_context = true
disallow_untyped_defs = true
disallow_any_unimported = true
no_implicit_optional = true
check_untyped_defs = true
warn_return_any = true
warn_unused_ignores = true

[tool.ruff]
src = ["src", "tests"]
Expand Down
94 changes: 46 additions & 48 deletions src/invoice2data/extract/plugins/tables.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
"""Plugin to extract tables from an invoice."""

import re
from collections import OrderedDict
from logging import getLogger
from typing import TYPE_CHECKING
from typing import Any
from typing import Dict
from typing import Optional

from ..utils import _apply_grouping


if TYPE_CHECKING:
from ..invoice_template import InvoiceTemplate


logger = getLogger(__name__)

DEFAULT_OPTIONS = {"field_separator": r"\s+", "line_separator": r"\n"}


def extract(
self: "OrderedDict[str, Any]", content: str, output: Dict[str, Any]
self: "InvoiceTemplate", content: str, output: Dict[str, Any]
) -> Optional[Dict[str, Any]]:
"""Try to extract tables from an invoice.

Expand Down Expand Up @@ -61,7 +65,7 @@ def extract(


def _extract_and_validate_settings(
self: "OrderedDict[str, Any]",
self: "InvoiceTemplate",
table: Dict[str, Any],
) -> Optional[Dict[str, Any]]:
"""Extract and validate table settings.
Expand Down Expand Up @@ -114,7 +118,7 @@ def _extract_table_body(content: str, table: Dict[str, Any]) -> Optional[str]:


def _process_table_lines(
self: "OrderedDict[str, Any]",
self: "InvoiceTemplate",
table: Dict[str, Any],
table_body: str,
) -> Optional[Dict[str, Any]]:
Expand Down Expand Up @@ -154,7 +158,7 @@ def _process_table_lines(


def _process_table_line( # noqa: C901
self: "OrderedDict[str, Any]",
self: "InvoiceTemplate",
table: Dict[str, Any],
line: str,
types: Dict[str, Any],
Expand All @@ -173,49 +177,43 @@ def _process_table_line( # noqa: C901
bool: True if processing is successful, False if date parsing fails.
"""
match = re.search(table["body"], line)
if match:
for field, value in match.groupdict().items():
logger.debug(
(
"field=\033[1m\033[93m%s\033[0m |"
"regex=\033[36m%s\033[0m | "
"matches=\033[1m\033[92m['%s']\033[0m"
),
field,
match.re.pattern,
value,
)

if field.startswith("date") or field.endswith("date"):
value = self.parse_date(value) # type: ignore[attr-defined]
if not value:
logger.error("Date parsing failed on date *%s*", value)
return False
elif field.startswith("amount"):
value = self.parse_number(value) # type: ignore[attr-defined]
elif field in types:
value = self.coerce_type(value, types[field]) # type: ignore[attr-defined]
elif table.get("fields"):
# Writing templates is hard. So we also support the following format
# In case someone mixup syntax
# fields:
# example_field:
# type: float
# group: sum
field_set = table["fields"].get(field, {})
if "type" in field_set:
value = self.coerce_type(value, field_set.get("type")) # type: ignore[attr-defined]

if field in output:
# Ensure output[field] is a list before appending
if not isinstance(output[field], list):
output[field] = [output[field]]
output[field].append(value)
else:
output[field] = value
# Return True if a match is found and processed successfully
return True
else:
if not match:
logger.debug("The following line doesn't match anything:\n*%s*", line)
# Return True to continue processing even if a line doesn't match
return True

for field, value in match.groupdict().items():
logger.debug(
(
"field=\033[1m\033[93m%s\033[0m |"
"regex=\033[36m%s\033[0m | "
"matches=\033[1m\033[92m['%s']\033[0m"
),
field,
match.re.pattern,
value,
)

if field.startswith("date") or field.endswith("date"):
value = self.parse_date(value)
if not value:
logger.error("Date parsing failed on date *%s*", value)
return False
elif field.startswith("amount"):
value = self.parse_number(value)
elif field in types:
value = self.coerce_type(value, types[field])
elif table.get("fields"):
field_set = table["fields"].get(field, {})
if "type" in field_set:
value = self.coerce_type(value, field_set.get("type"))

if field in output:
# Ensure output[field] is a list before appending
if not isinstance(output[field], list):
output[field] = [output[field]]
output[field].append(value)
else:
output[field] = value
# Return True if a match is found and processed successfully
return True
1 change: 1 addition & 0 deletions src/invoice2data/py.typed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
partial
Loading
Loading