From e1cefdc342d93e8f0c475544cc2ec3d20821e4d7 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Wed, 7 Jan 2026 21:50:53 +0100 Subject: [PATCH 1/9] Login mit hinterlegtem Passwort (Flask Session Cookie) --- Dockerfile | 2 +- README.md | 5 +++ app/config.py | 9 +++++- app/routes.py | 66 +++++++++++++++++++++++++++++++++------- app/server.py | 1 + app/static/css/style.css | 4 +++ app/templates/index.html | 1 + app/templates/login.html | 39 ++++++++++++++++++++++++ docker-compose.yaml | 1 - docker/entrypoint.sh | 7 ----- tests/config.py | 8 ++++- tests/conftest.py | 1 + 12 files changed, 122 insertions(+), 22 deletions(-) create mode 100644 app/templates/login.html delete mode 100755 docker/entrypoint.sh diff --git a/Dockerfile b/Dockerfile index 4dc0bd0..31199e9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,4 +21,4 @@ RUN ln -sf /dev/stdout /var/log/apache2/access.log && \ EXPOSE 80 -ENTRYPOINT ["/app/docker/entrypoint.sh"] +ENTRYPOINT ["apachectl", "-D", "FOREGROUND"] diff --git a/README.md b/README.md index f1690ce..4299d7f 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,9 @@ docker compose build docker compose down && docker compose up -d ``` +**Ändere `AUTH_PASSWORD` in der `docker-compose.yaml` !** + + ### Standalone non-Docker Setup ``` @@ -36,6 +39,8 @@ pip install -r requirements.txt .venv/bin/python3.12 app/server.py ``` +**Ändere das Login Passwort in der `app/config.py` !** + ### Start - Importiere Kontoumsätze über CSV Listen oder PDF Kontoauszüge deiner Bank ([unterstützte Banken](#unterstützte-banken)) diff --git a/app/config.py b/app/config.py index 54a9ae3..74ba86c 100644 --- a/app/config.py +++ b/app/config.py @@ -1,12 +1,19 @@ #!/usr/bin/python3 """App Settings zum Zeitpunkt der Initalisierung von PynanceParser""" +import os + # Logging (will also log in webserver logs if used via wsgi) LOG_ACCESS_FILE = '/tmp/pynance_access.log' LOG_ERROR_FILE = '/tmp/pynance_error.log' # Options: -DATABASE_BACKEND = 'tiny' # or 'mongo' + +# - Login Password (overwrite to not use the system env variable) +PASSWORD = os.getenv('AUTH_PASSWORD', 'change_this_password') + +# - Database Backend ('tiny' or 'mongo') +DATABASE_BACKEND = 'tiny' # For tiny: Path to the Folder (/path/to) # For mongo: MongoDB URI diff --git a/app/routes.py b/app/routes.py index cf99dfa..81ccae9 100644 --- a/app/routes.py +++ b/app/routes.py @@ -4,7 +4,8 @@ import os from datetime import datetime from flask import request, current_app, render_template, redirect, \ - make_response, send_from_directory + make_response, send_from_directory, session +import secrets class Routes: @@ -37,6 +38,59 @@ def version_string(): 'version': current_app.config.get('VERSION', 'unknown') } + @current_app.before_request + def require_login(): + """ + Before Request Handler, der sicherstellt, dass der User eingeloggt ist. + Falls nicht, wird dieser immer zur Login Seite umeleitet- + """ + # Allow PyTest Client + if current_app.config.get('TESTING', False): + return + + # Allow access to login route + if request.endpoint == "login": + return + + # Allow access to CSS files + if request.endpoint == "static" and request.path.endswith(".css"): + return + + # Allow access to JS files + if request.endpoint == "static" and request.path.endswith(".js"): + return + + # Block everything else unless logged in + if not session.get("logged_in"): + return redirect('/login') + + @current_app.route("/login", methods=["GET", "POST"]) + def login(): + """ + Login Seite, die ohne gültiges Cookie immer aufgerufen wird. + Args (form): + password, str: Passwort für den Login + Returns: + html: Login Formular + """ + error = None + + if request.method == "POST": + password = request.form.get("password", "") + if secrets.compare_digest(password, current_app.config['PASSWORD']): + session["logged_in"] = True + return redirect('/') + + error = "Invalid password" + + return render_template('login.html', error=error) + + @current_app.route("/logout") + def logout(): + """Logout Seite, welche das Cookie löscht und zur Loin Seite weiterleitet.""" + session.clear() + return redirect('/login') + @current_app.route('/', methods=['GET']) def welcome() -> str: """ @@ -215,16 +269,6 @@ def show_stats(iban) -> str: return render_template('stats.html', sums=sums, IBAN=iban, filters=frontend_filters) - @current_app.route('/logout', methods=['GET']) - def logout(): - """ - Loggt den User aus der Session aus und leitet zur Startseite weiter. - - Returns: - redirect: Weiterleitung zur Startseite - """ - return redirect('/') - @current_app.route('/sw.js') def sw(): response = make_response( diff --git a/app/server.py b/app/server.py index 3483387..b364551 100755 --- a/app/server.py +++ b/app/server.py @@ -39,6 +39,7 @@ def create_app(config_path: str) -> Flask: template_folder=os.path.join(parent_dir, 'app', 'templates'), static_folder=os.path.join(parent_dir, 'app', 'static') ) + app.secret_key = os.urandom(24).hex() # Global Config app.config.from_pyfile(config_path) diff --git a/app/static/css/style.css b/app/static/css/style.css index f429d14..6f742ec 100644 --- a/app/static/css/style.css +++ b/app/static/css/style.css @@ -38,6 +38,10 @@ main { @media (max-width: 1023px) {.hide-m {display:none !important;}} /* Color classes */ +.error { + color: var(--pico-color-red-600); + font-weight: bold; +} .delete { color: white; border-color: var(--pico-color-red-600); diff --git a/app/templates/index.html b/app/templates/index.html index 413b17e..e474266 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -55,6 +55,7 @@

Pynance Parser

Das Konto wird bei einem Import automatisch erstellt.

+

Logout

diff --git a/app/templates/login.html b/app/templates/login.html new file mode 100644 index 0000000..acd46f0 --- /dev/null +++ b/app/templates/login.html @@ -0,0 +1,39 @@ +{% extends 'layout.html' %} + +{% block content %} + +
+
+

Pynance Parser

+

Manage Bankaccounts like a Boss !

+
+
+ +
+ +
+ +
+ Login Passwort +
+
+
+
+ + +
+
+
+ +
+ + {% if error %} +
+

{{ error }}

+
+ {% endif %} + +
+ + +{% endblock %} diff --git a/docker-compose.yaml b/docker-compose.yaml index 6fac8ef..510e43c 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -7,7 +7,6 @@ services: volumes: - ./settings:/app/settings environment: - - AUTH_USER=username - AUTH_PASSWORD=yourpasswordhere mongo: diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh deleted file mode 100755 index 7ceed2e..0000000 --- a/docker/entrypoint.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -# Generate the .htpasswd file -htpasswd -cb /etc/apache2/.htpasswd "$AUTH_USER" "$AUTH_PASSWORD" - -# Start Apache in the foreground -exec apachectl -D FOREGROUND diff --git a/tests/config.py b/tests/config.py index 43a980c..8e9a37a 100644 --- a/tests/config.py +++ b/tests/config.py @@ -1,13 +1,19 @@ #!/usr/bin/python3 """App Settings zum Zeitpunkt der Initalisierung von PynanceParser (Testinstanz)""" +import os LOG_ACCESS_FILE = '/tmp/pynance_access.log' LOG_ERROR_FILE = '/tmp/pynance_error.log' # Options: -DATABASE_BACKEND = 'tiny' + +# - Login Password (overwrite to not use the system env variable) +PASSWORD = os.getenv('AUTH_PASSWORD', 'change_this_password') + +# - Database Backend ('tiny' or 'mongo') #DATABASE_BACKEND = 'mongo' +DATABASE_BACKEND = 'tiny' #DATABASE_URI = 'mongodb://testuser:testpassword@localhost:27017' # For mongo (URI) DATABASE_URI = '/tmp/pynance-test' # For tiny (/path/to/) diff --git a/tests/conftest.py b/tests/conftest.py index 361bb1d..90822ae 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -29,6 +29,7 @@ def test_app(): # App Context app = create_app(config_path) + app.config['TESTING'] = True with app.app_context(): yield app From 97730b879a62daf4f643fa8342d63b543138f244 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Thu, 8 Jan 2026 20:44:17 +0100 Subject: [PATCH 2/9] Fix BasicAuth to Cookie Sec --- docker/apache2.conf | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docker/apache2.conf b/docker/apache2.conf index c425473..031933a 100644 --- a/docker/apache2.conf +++ b/docker/apache2.conf @@ -6,10 +6,9 @@ WSGIScriptAlias / /app/app/server.py - AuthType Basic - AuthName "PynanceParser Basic Auth" - AuthUserFile /etc/apache2/.htpasswd - Require valid-user + Options Indexes FollowSymLinks + AllowOverride None + Require all granted From 19f6665aad4358823d3c4aa2597df39b88d3bd91 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Thu, 8 Jan 2026 20:58:54 +0100 Subject: [PATCH 3/9] Login Form Styling --- app/static/css/style.css | 5 ++--- app/templates/login.html | 28 +++++++++++++--------------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/app/static/css/style.css b/app/static/css/style.css index 6f742ec..f8ba1f3 100644 --- a/app/static/css/style.css +++ b/app/static/css/style.css @@ -15,9 +15,8 @@ main { flex: 1; } -.container.hero { - margin-top: 10%; -} +.container.hero {margin-top: 10%;} + main.container.hero {margin-top: 5%;} /* Pico Tooltip Feature with newlines*/ .long-tooltip:before { diff --git a/app/templates/login.html b/app/templates/login.html index acd46f0..dac2f99 100644 --- a/app/templates/login.html +++ b/app/templates/login.html @@ -9,21 +9,19 @@

Pynance Parser

-
- -
- -
- Login Passwort -
-
-
-
- - -
-
-
+
+ +
+ +
+
+ + +
+
From ba56ab13b6f60e7f032c5d238c94eb5fb00341c9 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Thu, 8 Jan 2026 22:17:25 +0100 Subject: [PATCH 4/9] Logout route and style --- app/static/css/style.css | 4 ++++ app/templates/iban.html | 2 +- app/templates/index.html | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/app/static/css/style.css b/app/static/css/style.css index f8ba1f3..5881f53 100644 --- a/app/static/css/style.css +++ b/app/static/css/style.css @@ -18,6 +18,10 @@ main { .container.hero {margin-top: 10%;} main.container.hero {margin-top: 5%;} +.margin-top{ + margin-top: 2em; +} + /* Pico Tooltip Feature with newlines*/ .long-tooltip:before { white-space: wrap; diff --git a/app/templates/iban.html b/app/templates/iban.html index 6dc3c89..fdac2fd 100644 --- a/app/templates/iban.html +++ b/app/templates/iban.html @@ -26,7 +26,7 @@

📊 Statistik
  • - 🚪 Logout + 📚 Konten
  • diff --git a/app/templates/index.html b/app/templates/index.html index e474266..b885618 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -5,7 +5,7 @@

    Pynance Parser

    -

    Manage Bankaccounts like a Boss !

    +

    Manage Bankaccounts like a Boss !

    @@ -55,7 +55,7 @@

    Pynance Parser

    Das Konto wird bei einem Import automatisch erstellt.

    -

    Logout

    +

    Logout

    From 7593454e84e5aefc3da71b43d65711de2c0dd161 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Thu, 8 Jan 2026 22:26:04 +0100 Subject: [PATCH 5/9] Login Input quirk --- app/templates/login.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/templates/login.html b/app/templates/login.html index dac2f99..f868953 100644 --- a/app/templates/login.html +++ b/app/templates/login.html @@ -11,7 +11,7 @@

    Pynance Parser

    -
    +
    From 0c74cdc9e00bad83a626d45dac7ac955690be39e Mon Sep 17 00:00:00 2001 From: Pitastic Date: Thu, 15 Jan 2026 20:31:23 +0100 Subject: [PATCH 6/9] testfile enhancement (multiple files loop for import tests) --- tests/test_unit_reader_Commerzbank.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tests/test_unit_reader_Commerzbank.py b/tests/test_unit_reader_Commerzbank.py index afc369f..4881547 100644 --- a/tests/test_unit_reader_Commerzbank.py +++ b/tests/test_unit_reader_Commerzbank.py @@ -26,15 +26,27 @@ def test_read_from_csv(test_app): check_transaktion_list(transaction_list) -def test_read_from_pdf(test_app): +# Look for test files and create a tuple list +test_folder = os.path.join('/tmp', 'commerzbank') +test_files = [] +if not os.path.isdir(test_folder): + test_files = [()] +else: + for file in os.listdir(test_folder): + test_files.append( + (os.path.join(test_folder, file)) + ) + +# Using every test file in its own test +@pytest.mark.parametrize("full_path", test_files) +def test_read_from_pdf(test_app, full_path): """Testet das Einlesen einer PDF Datei mit Kontoumsätzen""" - test_file_pdf = os.path.join('/tmp', 'commerzbank.pdf') - if not os.path.isfile(test_file_pdf): - # Test file not provided (sensitive data is not part of git repo) - pytest.skip("Testfile /tmp/commerzbank.pdf not found....skipping") + if not full_path: + # Test files not provided (sensitive data is not part of git repo) + pytest.skip("Testfile not provided....skipping") with test_app.app_context(): - transaction_list = Commerzbank().from_pdf(test_file_pdf) + transaction_list = Commerzbank().from_pdf(full_path) # Check Reader Ergebnisse check_transaktion_list(transaction_list) From 8d11b558ca1345c73612109289150d28e995b9c0 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Fri, 16 Jan 2026 21:51:14 +0100 Subject: [PATCH 7/9] =?UTF-8?q?Datestring=20handling=20geh=C3=A4rtet;=20Ba?= =?UTF-8?q?nk=20Layout-fails=20ber=C3=BCcksichtigt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- reader/Commerzbank.py | 15 ++++++++++----- reader/Generic.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/reader/Commerzbank.py b/reader/Commerzbank.py index 769e811..72a1e2b 100644 --- a/reader/Commerzbank.py +++ b/reader/Commerzbank.py @@ -107,24 +107,29 @@ def from_pdf(self, filepath): date_tx = 0 date_tx_year = "1970" # Default Year if not found yet enumerated_table = enumerate(self.all_rows[start_index:end_index]) + for i, row in enumerated_table: if row[0].startswith('Buchungsdatum: '): # All following rows have this 'date_tx' date_tx_year = row[0][-4:] - date_tx = datetime.datetime.strptime( + date_tx = self._parse_from_strftime( row[0][-10:], "%d.%m.%Y" - ).replace(tzinfo=datetime.timezone.utc).timestamp() + ) continue # Skip Header Rows + if len(row[1]) < 5: + # No valid date in this row, skip + # This is needed because the bank itself does not honor + # their own layout and breaking boundaries to neighbour values. + continue + # negativer Betrag in Spalte "Lasten" oder positiv "zu Gunsten" amount = f"-{row[2][:-1]}" if row[2] else row[3] line = { 'date_tx': date_tx, - 'valuta': datetime.datetime.strptime( - f"{row[1]}.{date_tx_year}", "%d.%m.%Y" - ).replace(tzinfo=datetime.timezone.utc).timestamp(), + 'valuta': self._parse_from_strftime(f"{row[1]}.{date_tx_year}", "%d.%m.%Y"), 'art': "", 'text_tx': row[0], 'amount': float(amount.replace('.', '').replace(',', '.')), diff --git a/reader/Generic.py b/reader/Generic.py index 8b3603b..9b05502 100644 --- a/reader/Generic.py +++ b/reader/Generic.py @@ -3,6 +3,7 @@ import datetime import csv +import re class Reader: @@ -95,3 +96,43 @@ def from_http(self, url): ausgelesenen Kontoumsätzen entspricht. """ raise NotImplementedError() + + def _parse_from_strftime(self, date_string, date_format): + """ + Hilfsmethode um ein Datum aus einem String mit einem Format in einen UTC-Timestamp + umzuwandeln. + + Args: + date_string (str): Datum als String + date_format (str): Formatstring wie von `datetime.strptime` verwendet + + Returns: + int: UTC-Timestamp des übergebenen Datums + """ + try: + return datetime.datetime.strptime( + date_string, date_format + ).replace(tzinfo=datetime.timezone.utc).timestamp() + + except ValueError as e: + if "day is out of range for month" in str(e): + # Handle invalid dates like 31.11.2023 -> 30.11.2023 + split_char = re.search(r'[^\d]', date_string) + if not split_char: + raise e # No valid split character found + + # Replace just the day part + split_char = split_char.group(0) + day_index = date_format.split(split_char).index('%d') + if day_index == -1: + raise e # No day part found in format + + date_string_list = date_string.split(split_char) + date_string_list[day_index] = int(date_string_list[day_index]) - 1 + date_string = split_char.join(map(str, date_string_list)) + + return self._parse_from_strftime( + date_string, date_format + ) + + raise e # Re-raise other ValueErrors From 8abce1084c9f907e0b28f26182414b1e51e8e329 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Fri, 16 Jan 2026 22:01:52 +0100 Subject: [PATCH 8/9] =?UTF-8?q?Datefunktionen=20und=20Testschleife=20?= =?UTF-8?q?=C3=BCbernommen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- reader/Comdirect.py | 8 ++----- reader/Volksbank_Mittelhessen.py | 14 +++-------- tests/test_unit_reader_Comdirect.py | 24 ++++++++++++++----- ...test_unit_reader_Volksbank-Mittelhessen.py | 24 ++++++++++++++----- 4 files changed, 41 insertions(+), 29 deletions(-) diff --git a/reader/Comdirect.py b/reader/Comdirect.py index 7811ce6..f8ed9dc 100644 --- a/reader/Comdirect.py +++ b/reader/Comdirect.py @@ -42,12 +42,8 @@ def from_csv(self, filepath): continue amount = float(row['Umsatz in EUR'].replace(',', '.')) - date_tx = datetime.datetime.strptime( - date_tx, date_format - ).replace(tzinfo=datetime.timezone.utc).timestamp() - valuta = datetime.datetime.strptime( - row['Wertstellung (Valuta)'], date_format - ).replace(tzinfo=datetime.timezone.utc).timestamp() + date_tx = self._parse_from_strftime(date_tx, date_format) + valuta = self._parse_from_strftime(row['Wertstellung (Valuta)'], date_format) text_tx = row['Buchungstext'] match = rx.match(text_tx) diff --git a/reader/Volksbank_Mittelhessen.py b/reader/Volksbank_Mittelhessen.py index d95c186..4070924 100644 --- a/reader/Volksbank_Mittelhessen.py +++ b/reader/Volksbank_Mittelhessen.py @@ -83,11 +83,7 @@ def from_pdf(self, filepath): flavor="stream", table_areas=["60,629,573,51"], columns=["75,112,440,526"], - split_text=True, - #layout_kwargs={ # übernommen von Commerzbank, da ähnliches Layout - # "char_margin": 2, - # "word_margin": 0.5, - #}, + split_text=True ) # Tabellen aller Seiten zusammenfügen @@ -133,12 +129,8 @@ def from_pdf(self, filepath): amount = amount[:-2].replace('.', '').replace(',', '.') line = { - 'date_tx': datetime.datetime.strptime( - f"{row[0]}{date_tx_year}", "%d.%m.%Y" - ).replace(tzinfo=datetime.timezone.utc).timestamp(), - 'valuta': datetime.datetime.strptime( - f"{row[1]}{date_tx_year}", "%d.%m.%Y" - ).replace(tzinfo=datetime.timezone.utc).timestamp(), + 'date_tx': self._parse_from_strftime(f"{row[0]}{date_tx_year}", "%d.%m.%Y"), + 'valuta': self._parse_from_strftime(f"{row[1]}{date_tx_year}", "%d.%m.%Y"), 'art': row[2], 'text_tx': "", 'amount': float(amount), diff --git a/tests/test_unit_reader_Comdirect.py b/tests/test_unit_reader_Comdirect.py index 5e19257..a14ee54 100644 --- a/tests/test_unit_reader_Comdirect.py +++ b/tests/test_unit_reader_Comdirect.py @@ -27,15 +27,27 @@ def test_read_from_csv(test_app): check_transaktion_list(transaction_list) -def test_read_from_pdf(test_app): +# Look for test files and create a tuple list +test_folder = os.path.join('/tmp', 'comdirect') +test_files = [] +if not os.path.isdir(test_folder): + test_files = [()] +else: + for file in os.listdir(test_folder): + test_files.append( + (os.path.join(test_folder, file)) + ) + +# Using every test file in its own test +@pytest.mark.parametrize("full_path", test_files) +def test_read_from_pdf(test_app, full_path): """Testet das Einlesen einer PDF Datei mit Kontoumsätzen""" - test_file_pdf = os.path.join('/tmp', 'comdirect.pdf') - if not os.path.isfile(test_file_pdf): - # Test file not provided (sensitive data is not part of git repo) - pytest.skip("Testfile /tmp/comdirect.pdf not found....skipping") + if not full_path: + # Test files not provided (sensitive data is not part of git repo) + pytest.skip("Testfile not provided....skipping") with test_app.app_context(): - transaction_list = Comdirect().from_pdf(test_file_pdf) + transaction_list = Comdirect().from_pdf(full_path) # Check Reader Ergebnisse check_transaktion_list(transaction_list) diff --git a/tests/test_unit_reader_Volksbank-Mittelhessen.py b/tests/test_unit_reader_Volksbank-Mittelhessen.py index dbf293a..e9d2313 100644 --- a/tests/test_unit_reader_Volksbank-Mittelhessen.py +++ b/tests/test_unit_reader_Volksbank-Mittelhessen.py @@ -27,15 +27,27 @@ def test_read_from_csv(test_app): check_transaktion_list(transaction_list) -def test_read_from_pdf(test_app): +# Look for test files and create a tuple list +test_folder = os.path.join('/tmp', 'volksbank-mittelhessen') +test_files = [] +if not os.path.isdir(test_folder): + test_files = [()] +else: + for file in os.listdir(test_folder): + test_files.append( + (os.path.join(test_folder, file)) + ) + +# Using every test file in its own test +@pytest.mark.parametrize("full_path", test_files) +def test_read_from_pdf(test_app, full_path): """Testet das Einlesen einer PDF Datei mit Kontoumsätzen""" - test_file_pdf = os.path.join('/tmp', 'volksbank-mittelhessen.pdf') - if not os.path.isfile(test_file_pdf): - # Test file not provided (sensitive data is not part of git repo) - pytest.skip("Testfile /tmp/volksbank-mittelhessen.pdf not found....skipping") + if not full_path: + # Test files not provided (sensitive data is not part of git repo) + pytest.skip("Testfile not provided....skipping") with test_app.app_context(): - transaction_list = Volksbank_Mittelhessen().from_pdf(test_file_pdf) + transaction_list = Volksbank_Mittelhessen().from_pdf(full_path) # Check Reader Ergebnisse check_transaktion_list(transaction_list) From bf978d06deca902dc14b2267b72a4bbdddd392e5 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Sat, 17 Jan 2026 22:38:20 +0100 Subject: [PATCH 9/9] Hard-Testing Importers with fixes and more Tests --- reader/Comdirect.py | 6 +++-- reader/Volksbank_Mittelhessen.py | 9 ++++--- tests/test_unit_reader_Comdirect.py | 3 ++- tests/test_unit_reader_Commerzbank.py | 3 ++- tests/test_unit_reader_Generic.py | 26 +++++++++++++++++++ ...test_unit_reader_Volksbank-Mittelhessen.py | 3 ++- 6 files changed, 41 insertions(+), 9 deletions(-) diff --git a/reader/Comdirect.py b/reader/Comdirect.py index f8ed9dc..26ab0f6 100644 --- a/reader/Comdirect.py +++ b/reader/Comdirect.py @@ -82,8 +82,9 @@ def from_pdf(self, filepath): filepath, pages="2-end", flavor="stream", - row_tol=10, - columns=["115,187,305,500"]*16 + row_tol=12, + columns=["115,187,305,500"], + table_areas=["0,830,590,0"] ) #TODO: Hack-araound: https://github.com/atlanhq/camelot/issues/357#issuecomment-520986016 @@ -118,6 +119,7 @@ def from_pdf(self, filepath): for i, row in enumerated_table: if re_datecheck.match(row[0]) is None: + print("skip row", row) continue # Skip Header and unvalid Rows amount = float(row[4].replace('.', '').replace(',', '.')) diff --git a/reader/Volksbank_Mittelhessen.py b/reader/Volksbank_Mittelhessen.py index 4070924..2964c50 100644 --- a/reader/Volksbank_Mittelhessen.py +++ b/reader/Volksbank_Mittelhessen.py @@ -82,7 +82,7 @@ def from_pdf(self, filepath): pages="all", # End -1 flavor="stream", table_areas=["60,629,573,51"], - columns=["75,112,440,526"], + columns=["75,115,455"], split_text=True ) @@ -103,11 +103,11 @@ def from_pdf(self, filepath): if row[2].replace(' ', '').lower().startswith('alterkontostand'): # Last row before transactions start_index = self.all_rows.index(row) + 1 - date_tx_year = row[2][-4:] # Jahr für die Transaktionen merken if row[2].replace(' ', '').lower().startswith('neuerkontostand'): # First row after transactions + final line end_index = self.all_rows.index(row) - 1 + date_tx_year = row[2][-4:] # Jahr für die Transaktionen merken break if date_tx_year is None or re.match(r'^\d{4}$', date_tx_year) is None: @@ -125,8 +125,9 @@ def from_pdf(self, filepath): continue # Skip Header and unvalid Rows # Positives 'Haben' oder negatives 'Soll' - amount = f'-{row[3]}' if row[3] else row[4] - amount = amount[:-2].replace('.', '').replace(',', '.') + amount_prefix = '' if row[3][-1] == 'H' else '-' + amount = f"{amount_prefix}{re.sub(r'H|S', '', row[3]).strip()}" + amount = amount.replace('.', '').replace(',', '.') line = { 'date_tx': self._parse_from_strftime(f"{row[0]}{date_tx_year}", "%d.%m.%Y"), diff --git a/tests/test_unit_reader_Comdirect.py b/tests/test_unit_reader_Comdirect.py index a14ee54..0cb157d 100644 --- a/tests/test_unit_reader_Comdirect.py +++ b/tests/test_unit_reader_Comdirect.py @@ -28,7 +28,7 @@ def test_read_from_csv(test_app): # Look for test files and create a tuple list -test_folder = os.path.join('/tmp', 'comdirect') +test_folder = os.path.join('/tmp', 'Comdirect') test_files = [] if not os.path.isdir(test_folder): test_files = [()] @@ -48,6 +48,7 @@ def test_read_from_pdf(test_app, full_path): with test_app.app_context(): transaction_list = Comdirect().from_pdf(full_path) + assert transaction_list, "No transactions found in PDF file" # Check Reader Ergebnisse check_transaktion_list(transaction_list) diff --git a/tests/test_unit_reader_Commerzbank.py b/tests/test_unit_reader_Commerzbank.py index 4881547..6663496 100644 --- a/tests/test_unit_reader_Commerzbank.py +++ b/tests/test_unit_reader_Commerzbank.py @@ -27,7 +27,7 @@ def test_read_from_csv(test_app): # Look for test files and create a tuple list -test_folder = os.path.join('/tmp', 'commerzbank') +test_folder = os.path.join('/tmp', 'Commerzbank') test_files = [] if not os.path.isdir(test_folder): test_files = [()] @@ -47,6 +47,7 @@ def test_read_from_pdf(test_app, full_path): with test_app.app_context(): transaction_list = Commerzbank().from_pdf(full_path) + assert transaction_list, "No transactions found in PDF file" # Check Reader Ergebnisse check_transaktion_list(transaction_list) diff --git a/tests/test_unit_reader_Generic.py b/tests/test_unit_reader_Generic.py index 9b036c0..36ee396 100644 --- a/tests/test_unit_reader_Generic.py +++ b/tests/test_unit_reader_Generic.py @@ -36,3 +36,29 @@ def test_read_from_pdf(): def test_read_from_http(): """Testet das Einlesen Kontoumsätzen aus einer Online-Quelle""" return None + +def test_strftime_parser(test_app): + """Testet den strftime Parser des Generic Readers""" + with test_app.app_context(): + reader = Generic() + + # Teste verschiedene Datumsformate + date_formats = [ + ("%d.%m.%Y", "25.12.2023", 1703462400.0), + ("%Y-%m-%d", "2023-12-25", 1703462400.0), + ("%m/%d/%Y", "12/25/2023", 1703462400.0), + ] + + for fmt, date_str, expected_timestamp in date_formats: + ts = reader._parse_from_strftime(date_str, fmt) # pylint: disable=protected-access + assert ts == expected_timestamp, f"Failed for format {fmt}" + + # Teste automatische Korrektur (31.11. / 30.02. etc.) + date_formats = [ + ("%d.%m.%Y", "31.11.2023", 1701302400.0), # Korrigiert zu 30.11.2023 + ("%d.%m.%Y", "30.02.2024", 1709164800.0), # Korrigiert zu 28.02.2024 + ] + + for fmt, date_str, expected_timestamp in date_formats: + ts = reader._parse_from_strftime(date_str, fmt) # pylint: disable=protected-access + assert ts == expected_timestamp, f"Failed for format {fmt}" diff --git a/tests/test_unit_reader_Volksbank-Mittelhessen.py b/tests/test_unit_reader_Volksbank-Mittelhessen.py index e9d2313..c06e952 100644 --- a/tests/test_unit_reader_Volksbank-Mittelhessen.py +++ b/tests/test_unit_reader_Volksbank-Mittelhessen.py @@ -28,7 +28,7 @@ def test_read_from_csv(test_app): # Look for test files and create a tuple list -test_folder = os.path.join('/tmp', 'volksbank-mittelhessen') +test_folder = os.path.join('/tmp', 'Volksbank_Mittelhessen') test_files = [] if not os.path.isdir(test_folder): test_files = [()] @@ -48,6 +48,7 @@ def test_read_from_pdf(test_app, full_path): with test_app.app_context(): transaction_list = Volksbank_Mittelhessen().from_pdf(full_path) + assert transaction_list, "No transactions found in PDF file" # Check Reader Ergebnisse check_transaktion_list(transaction_list)