diff --git a/scripts/_business_recon.py b/scripts/_business_recon.py index b59775d..4bfaee5 100644 --- a/scripts/_business_recon.py +++ b/scripts/_business_recon.py @@ -1,225 +1,12 @@ -# SPDX-License-Identifier: MIT -"""Business reconciliation helpers (Track D, Chapter 6). +"""Backwards-compatible shim for Track D reconciliation helpers. -Chapter 6 treats reconciliations as *data validation*. +The repo uses ``scripts/_business_recon.py`` in Business Track chapters. -This module provides: -- bank reconciliation helpers (bank feed vs book cash activity) -- AR rollforward tie-outs (AR subledger vs trial balance control) - -The intent is to keep Chapter 6 code readable and testable. +The installed package exposes the canonical implementation at +``pystatsv1.trackd.recon``. This shim keeps existing imports working for students +running scripts directly from the repo. """ from __future__ import annotations -import json -from dataclasses import dataclass -from pathlib import Path -from typing import Any - -import numpy as np -import pandas as pd - - -def write_json(obj: Any, path: str | Path) -> Path: - """Write a JSON file (pretty-printed) and return the written path.""" - p = Path(path) - p.parent.mkdir(parents=True, exist_ok=True) - with p.open("w", encoding="utf-8") as f: - json.dump(obj, f, indent=2, sort_keys=True) - f.write("\n") - return p - - -def build_cash_txns_from_gl(gl: pd.DataFrame) -> pd.DataFrame: - """Group cash lines in GL into one row per txn_id with net cash impact.""" - cash_lines = gl.loc[gl["account_id"].astype(str) == "1000", ["txn_id", "date", "description", "debit", "credit"]].copy() - if cash_lines.empty: - return pd.DataFrame(columns=["txn_id", "date", "description", "amount"]) - - cash_lines["cash_net"] = cash_lines["debit"].astype(float) - cash_lines["credit"].astype(float) - - cash_txn = ( - cash_lines.groupby("txn_id", observed=True) - .agg(date=("date", "min"), description=("description", "first"), amount=("cash_net", "sum")) - .reset_index() - ) - cash_txn = cash_txn.loc[cash_txn["amount"].abs() > 1e-9].copy() - cash_txn = cash_txn.sort_values(["date", "txn_id"], kind="mergesort").reset_index(drop=True) - return cash_txn - - -def build_cash_txn_from_gl(gl: pd.DataFrame) -> pd.DataFrame: - """Alias for build_cash_txns_from_gl (keeps chapter script imports stable).""" - return build_cash_txns_from_gl(gl) - - -@dataclass(frozen=True) -class BankReconOutputs: - cash_txns: pd.DataFrame - matches: pd.DataFrame - exceptions: pd.DataFrame - - -def bank_reconcile(*, bank_statement: pd.DataFrame, cash_txns: pd.DataFrame, amount_tol: float = 0.01) -> BankReconOutputs: - """Reconcile bank statement lines against book cash transactions.""" - bank = bank_statement.copy() - if bank.empty: - empty = pd.DataFrame() - return BankReconOutputs(cash_txns=cash_txns.copy(), matches=empty, exceptions=empty) - - bank["gl_txn_id"] = pd.to_numeric(bank["gl_txn_id"], errors="coerce").astype("Int64") - bank["amount"] = bank["amount"].astype(float) - - cash = cash_txns.copy() - cash["txn_id"] = cash["txn_id"].astype(int) - cash["amount"] = cash["amount"].astype(float) - - matches = bank.merge( - cash.rename(columns={"txn_id": "gl_txn_id", "amount": "gl_amount", "date": "gl_date"}), - on="gl_txn_id", - how="left", - validate="m:1", - ) - - # convenience flag for summaries - matches["is_matched"] = matches["gl_amount"].notna() - - exceptions: list[dict[str, Any]] = [] - - # 1) duplicate bank_txn_id - dup_mask = matches["bank_txn_id"].astype(str).duplicated(keep=False) - if dup_mask.any(): - for _, r in matches.loc[dup_mask].iterrows(): - exceptions.append( - { - "exception_type": "bank_duplicate_txn_id", - "month": str(r.get("month", "")), - "bank_txn_id": str(r.get("bank_txn_id")), - "posted_date": str(r.get("posted_date")), - "gl_txn_id": (int(r["gl_txn_id"]) if pd.notna(r["gl_txn_id"]) else np.nan), - "bank_amount": float(r.get("amount", 0.0)), - "gl_amount": float(r.get("gl_amount", np.nan)) if pd.notna(r.get("gl_amount", np.nan)) else np.nan, - "details": "Duplicate bank_txn_id appears multiple times in bank feed.", - } - ) - - # 2) unmatched bank item - unmatched_bank = matches["gl_txn_id"].isna() | matches["gl_amount"].isna() - if unmatched_bank.any(): - for _, r in matches.loc[unmatched_bank].iterrows(): - exceptions.append( - { - "exception_type": "bank_unmatched_item", - "month": str(r.get("month", "")), - "bank_txn_id": str(r.get("bank_txn_id")), - "posted_date": str(r.get("posted_date")), - "gl_txn_id": (int(r["gl_txn_id"]) if pd.notna(r["gl_txn_id"]) else np.nan), - "bank_amount": float(r.get("amount", 0.0)), - "gl_amount": np.nan, - "details": "Bank statement line has no matching GL cash transaction.", - } - ) - - # 3) amount mismatch - matched = matches.loc[matches["gl_amount"].notna()].copy() - mism = matched.loc[(matched["amount"] - matched["gl_amount"]).abs() > float(amount_tol)] - if not mism.empty: - for _, r in mism.iterrows(): - exceptions.append( - { - "exception_type": "amount_mismatch", - "month": str(r.get("month", "")), - "bank_txn_id": str(r.get("bank_txn_id")), - "posted_date": str(r.get("posted_date")), - "gl_txn_id": int(r["gl_txn_id"]), - "bank_amount": float(r.get("amount", np.nan)), - "gl_amount": float(r.get("gl_amount", np.nan)), - "details": f"Bank amount differs from book by > {amount_tol}.", - } - ) - - # 4) book-only transactions (cash txn not seen on bank) - bank_gl_ids = set(matches.loc[matches["gl_txn_id"].notna(), "gl_txn_id"].astype(int).tolist()) - book_only = cash.loc[~cash["txn_id"].isin(bank_gl_ids)].copy() - if not book_only.empty: - for _, r in book_only.iterrows(): - exceptions.append( - { - "exception_type": "book_unmatched_cash_txn", - "month": str(r["date"])[:7], - "bank_txn_id": np.nan, - "posted_date": np.nan, - "gl_txn_id": int(r["txn_id"]), - "bank_amount": np.nan, - "gl_amount": float(r["amount"]), - "details": "Cash transaction in GL does not appear in bank feed.", - } - ) - - exc_df = pd.DataFrame(exceptions) - if not exc_df.empty: - exc_df = exc_df.sort_values(["exception_type", "month", "bank_txn_id"], kind="mergesort").reset_index(drop=True) - - return BankReconOutputs(cash_txns=cash, matches=matches, exceptions=exc_df) - - -def reconcile_bank_statement(bank_statement: pd.DataFrame, gl_journal: pd.DataFrame, *, amount_tol: float = 0.01) -> BankReconOutputs: - """Wrapper used by chapter script: bank feed vs GL.""" - cash_txns = build_cash_txns_from_gl(gl_journal) - return bank_reconcile(bank_statement=bank_statement, cash_txns=cash_txns, amount_tol=amount_tol) - - -def _ending_balance_from_tb(tb_month: pd.DataFrame, account_id: str) -> float: - """Return balance in its normal direction (positive if normal-side).""" - hit = tb_month.loc[tb_month["account_id"].astype(str) == str(account_id)] - if hit.empty: - return 0.0 - normal = str(hit.iloc[0]["normal_side"]) - ending_side = str(hit.iloc[0]["ending_side"]) - bal = float(hit.iloc[0]["ending_balance"]) - return bal if ending_side == normal else -bal - - -def ar_rollforward_vs_tb(trial_balance_monthly: pd.DataFrame, ar_events: pd.DataFrame) -> pd.DataFrame: - """Compute AR rollforward (begin + activity = end) and compare to TB.""" - tb = trial_balance_monthly.copy() - tb["month"] = tb["month"].astype(str) - - months = sorted(tb["month"].unique().tolist()) - - ar_monthly = ( - ar_events.assign(month=lambda d: d["month"].astype(str)) - .groupby("month", observed=True)["ar_delta"] - .sum() - .reindex(months, fill_value=0.0) - ) - - rows: list[dict[str, Any]] = [] - ar_begin = 0.0 - for m in months: - ar_delta = float(ar_monthly.loc[m]) - ar_end_events = float(ar_begin + ar_delta) - - tb_m = tb.loc[tb["month"] == m] - ar_end_tb = float(_ending_balance_from_tb(tb_m, "1100")) - diff = float(ar_end_events - ar_end_tb) - - rows.append( - { - "month": m, - "ar_begin": float(ar_begin), - "ar_delta": float(ar_delta), - "ar_end_from_events": float(ar_end_events), - "ar_end_from_tb": float(ar_end_tb), - "diff": float(diff), - } - ) - ar_begin = ar_end_events - - return pd.DataFrame(rows) - - -def build_ar_rollforward(trial_balance_monthly: pd.DataFrame, ar_events: pd.DataFrame) -> pd.DataFrame: - """Alias for ar_rollforward_vs_tb.""" - return ar_rollforward_vs_tb(trial_balance_monthly, ar_events) +from pystatsv1.trackd.recon import * # noqa: F401,F403 diff --git a/src/pystatsv1/assets/workbook_track_d.zip b/src/pystatsv1/assets/workbook_track_d.zip index a0576d0..aa235b0 100644 Binary files a/src/pystatsv1/assets/workbook_track_d.zip and b/src/pystatsv1/assets/workbook_track_d.zip differ diff --git a/src/pystatsv1/trackd/recon.py b/src/pystatsv1/trackd/recon.py new file mode 100644 index 0000000..9e2a87e --- /dev/null +++ b/src/pystatsv1/trackd/recon.py @@ -0,0 +1,227 @@ +"""Track D reconciliation helpers. + +This module mirrors the public API of ``scripts/_business_recon.py``. + +Rationale +--------- +We want chapter/workbook code to be able to import a stable implementation from +the installed package (``pystatsv1.trackd``) while keeping the repo-local +``scripts/_business_recon.py`` as a thin, backwards-compatible shim. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +import numpy as np +import pandas as pd + + +def write_json(obj: Any, path: str | Path) -> Path: + """Write a JSON file (pretty-printed) and return the written path.""" + p = Path(path) + p.parent.mkdir(parents=True, exist_ok=True) + with p.open("w", encoding="utf-8") as f: + json.dump(obj, f, indent=2, sort_keys=True) + f.write("\n") + return p + + +def build_cash_txns_from_gl(gl: pd.DataFrame) -> pd.DataFrame: + """Group cash lines in GL into one row per txn_id with net cash impact.""" + cash_lines = gl.loc[ + gl["account_id"].astype(str) == "1000", + ["txn_id", "date", "description", "debit", "credit"], + ].copy() + if cash_lines.empty: + return pd.DataFrame(columns=["txn_id", "date", "description", "amount"]) + + cash_lines["cash_net"] = cash_lines["debit"].astype(float) - cash_lines["credit"].astype(float) + + cash_txn = ( + cash_lines.groupby("txn_id", observed=True) + .agg(date=("date", "min"), description=("description", "first"), amount=("cash_net", "sum")) + .reset_index() + ) + cash_txn = cash_txn.loc[cash_txn["amount"].abs() > 1e-9].copy() + cash_txn = cash_txn.sort_values(["date", "txn_id"], kind="mergesort").reset_index(drop=True) + return cash_txn + + +def build_cash_txn_from_gl(gl: pd.DataFrame) -> pd.DataFrame: + """Alias for build_cash_txns_from_gl (keeps chapter script imports stable).""" + return build_cash_txns_from_gl(gl) + + +@dataclass(frozen=True) +class BankReconOutputs: + cash_txns: pd.DataFrame + matches: pd.DataFrame + exceptions: pd.DataFrame + + +def bank_reconcile(*, bank_statement: pd.DataFrame, cash_txns: pd.DataFrame, amount_tol: float = 0.01) -> BankReconOutputs: + """Reconcile bank statement lines against book cash transactions.""" + bank = bank_statement.copy() + if bank.empty: + empty = pd.DataFrame() + return BankReconOutputs(cash_txns=cash_txns.copy(), matches=empty, exceptions=empty) + + bank["gl_txn_id"] = pd.to_numeric(bank["gl_txn_id"], errors="coerce").astype("Int64") + bank["amount"] = bank["amount"].astype(float) + + cash = cash_txns.copy() + cash["txn_id"] = cash["txn_id"].astype(int) + cash["amount"] = cash["amount"].astype(float) + + matches = bank.merge( + cash.rename(columns={"txn_id": "gl_txn_id", "amount": "gl_amount", "date": "gl_date"}), + on="gl_txn_id", + how="left", + validate="m:1", + ) + + # convenience flag for summaries + matches["is_matched"] = matches["gl_amount"].notna() + + exceptions: list[dict[str, Any]] = [] + + # 1) duplicate bank_txn_id + dup_mask = matches["bank_txn_id"].astype(str).duplicated(keep=False) + if dup_mask.any(): + for _, r in matches.loc[dup_mask].iterrows(): + exceptions.append( + { + "exception_type": "bank_duplicate_txn_id", + "month": str(r.get("month", "")), + "bank_txn_id": str(r.get("bank_txn_id")), + "posted_date": str(r.get("posted_date")), + "gl_txn_id": (int(r["gl_txn_id"]) if pd.notna(r["gl_txn_id"]) else np.nan), + "bank_amount": float(r.get("amount", 0.0)), + "gl_amount": float(r.get("gl_amount", np.nan)) if pd.notna(r.get("gl_amount", np.nan)) else np.nan, + "details": "Duplicate bank_txn_id appears multiple times in bank feed.", + } + ) + + # 2) unmatched bank item + unmatched_bank = matches["gl_txn_id"].isna() | matches["gl_amount"].isna() + if unmatched_bank.any(): + for _, r in matches.loc[unmatched_bank].iterrows(): + exceptions.append( + { + "exception_type": "bank_unmatched_item", + "month": str(r.get("month", "")), + "bank_txn_id": str(r.get("bank_txn_id")), + "posted_date": str(r.get("posted_date")), + "gl_txn_id": (int(r["gl_txn_id"]) if pd.notna(r["gl_txn_id"]) else np.nan), + "bank_amount": float(r.get("amount", 0.0)), + "gl_amount": np.nan, + "details": "Bank statement line has no matching GL cash transaction.", + } + ) + + # 3) amount mismatch + matched = matches.loc[matches["gl_amount"].notna()].copy() + mism = matched.loc[(matched["amount"] - matched["gl_amount"]).abs() > float(amount_tol)] + if not mism.empty: + for _, r in mism.iterrows(): + exceptions.append( + { + "exception_type": "amount_mismatch", + "month": str(r.get("month", "")), + "bank_txn_id": str(r.get("bank_txn_id")), + "posted_date": str(r.get("posted_date")), + "gl_txn_id": int(r["gl_txn_id"]), + "bank_amount": float(r.get("amount", np.nan)), + "gl_amount": float(r.get("gl_amount", np.nan)), + "details": f"Bank amount differs from book by > {amount_tol}.", + } + ) + + # 4) book-only transactions (cash txn not seen on bank) + bank_gl_ids = set(matches.loc[matches["gl_txn_id"].notna(), "gl_txn_id"].astype(int).tolist()) + book_only = cash.loc[~cash["txn_id"].isin(bank_gl_ids)].copy() + if not book_only.empty: + for _, r in book_only.iterrows(): + exceptions.append( + { + "exception_type": "book_unmatched_cash_txn", + "month": str(r["date"])[:7], + "bank_txn_id": np.nan, + "posted_date": np.nan, + "gl_txn_id": int(r["txn_id"]), + "bank_amount": np.nan, + "gl_amount": float(r["amount"]), + "details": "Cash transaction in GL does not appear in bank feed.", + } + ) + + exc_df = pd.DataFrame(exceptions) + if not exc_df.empty: + exc_df = exc_df.sort_values(["exception_type", "month", "bank_txn_id"], kind="mergesort").reset_index(drop=True) + + return BankReconOutputs(cash_txns=cash, matches=matches, exceptions=exc_df) + + +def reconcile_bank_statement(bank_statement: pd.DataFrame, gl_journal: pd.DataFrame, *, amount_tol: float = 0.01) -> BankReconOutputs: + """Wrapper used by chapter script: bank feed vs GL.""" + cash_txns = build_cash_txns_from_gl(gl_journal) + return bank_reconcile(bank_statement=bank_statement, cash_txns=cash_txns, amount_tol=amount_tol) + + +def _ending_balance_from_tb(tb_month: pd.DataFrame, account_id: str) -> float: + """Return balance in its normal direction (positive if normal-side).""" + hit = tb_month.loc[tb_month["account_id"].astype(str) == str(account_id)] + if hit.empty: + return 0.0 + normal = str(hit.iloc[0]["normal_side"]) + ending_side = str(hit.iloc[0]["ending_side"]) + bal = float(hit.iloc[0]["ending_balance"]) + return bal if ending_side == normal else -bal + + +def ar_rollforward_vs_tb(trial_balance_monthly: pd.DataFrame, ar_events: pd.DataFrame) -> pd.DataFrame: + """Compute AR rollforward (begin + activity = end) and compare to TB.""" + tb = trial_balance_monthly.copy() + tb["month"] = tb["month"].astype(str) + + months = sorted(tb["month"].unique().tolist()) + + ar_monthly = ( + ar_events.assign(month=lambda d: d["month"].astype(str)) + .groupby("month", observed=True)["ar_delta"] + .sum() + .reindex(months, fill_value=0.0) + ) + + rows: list[dict[str, Any]] = [] + ar_begin = 0.0 + for m in months: + ar_delta = float(ar_monthly.loc[m]) + ar_end_events = float(ar_begin + ar_delta) + + tb_m = tb.loc[tb["month"] == m] + ar_end_tb = float(_ending_balance_from_tb(tb_m, "1100")) + diff = float(ar_end_events - ar_end_tb) + + rows.append( + { + "month": m, + "ar_begin": float(ar_begin), + "ar_delta": float(ar_delta), + "ar_end_from_events": float(ar_end_events), + "ar_end_from_tb": float(ar_end_tb), + "diff": float(diff), + } + ) + ar_begin = ar_end_events + + return pd.DataFrame(rows) + + +def build_ar_rollforward(trial_balance_monthly: pd.DataFrame, ar_events: pd.DataFrame) -> pd.DataFrame: + """Alias for ar_rollforward_vs_tb.""" + return ar_rollforward_vs_tb(trial_balance_monthly, ar_events) diff --git a/tests/test_trackd_business_schema_shim_smoke.py b/tests/test_trackd_business_schema_shim_smoke.py index ece5458..4fd9f80 100644 --- a/tests/test_trackd_business_schema_shim_smoke.py +++ b/tests/test_trackd_business_schema_shim_smoke.py @@ -6,8 +6,10 @@ import scripts._business_schema as shim import scripts._business_etl as etl_shim +import scripts._business_recon as recon_shim from pystatsv1.trackd._errors import TrackDSchemaError from pystatsv1.trackd import etl as trackd_etl +from pystatsv1.trackd import recon as trackd_recon from pystatsv1.trackd import schema as trackd_schema @@ -25,6 +27,15 @@ def test_business_etl_shim_exports_trackd_etl() -> None: assert etl_shim.analyze_gl_preparation is trackd_etl.analyze_gl_preparation +def test_business_recon_shim_exports_trackd_recon() -> None: + # The shim should re-export the package implementation (same function objects). + assert recon_shim.write_json is trackd_recon.write_json + assert recon_shim.build_cash_txns_from_gl is trackd_recon.build_cash_txns_from_gl + assert recon_shim.bank_reconcile is trackd_recon.bank_reconcile + assert recon_shim.ar_rollforward_vs_tb is trackd_recon.ar_rollforward_vs_tb + assert recon_shim.BankReconOutputs is trackd_recon.BankReconOutputs + + def test_business_schema_shim_validate_schema_report_shape(tmp_path: Path) -> None: report = shim.validate_schema(tmp_path, dataset=trackd_schema.DATASET_NSO_V1) @@ -65,3 +76,16 @@ def test_track_d_template_business_etl_is_a_shim() -> None: assert "build_gl_tidy_dataset" in text assert "prepare_gl_monthly_summary" in text assert "analyze_gl_preparation" in text + + +def test_track_d_template_business_recon_is_a_shim() -> None: + root = Path(__file__).resolve().parents[1] + template = root / "workbooks" / "track_d_template" / "scripts" / "_business_recon.py" + assert template.exists() + + text = template.read_text(encoding="utf-8") + assert "pystatsv1.trackd.recon" in text + assert "write_json" in text + assert "build_cash_txns_from_gl" in text + assert "bank_reconcile" in text + assert "ar_rollforward_vs_tb" in text diff --git a/workbooks/track_d_template/scripts/_business_recon.py b/workbooks/track_d_template/scripts/_business_recon.py index b59775d..2a3d326 100644 --- a/workbooks/track_d_template/scripts/_business_recon.py +++ b/workbooks/track_d_template/scripts/_business_recon.py @@ -1,225 +1,33 @@ # SPDX-License-Identifier: MIT -"""Business reconciliation helpers (Track D, Chapter 6). -Chapter 6 treats reconciliations as *data validation*. +"""Backwards-compatible shim for Track D reconciliation helpers. -This module provides: -- bank reconciliation helpers (bank feed vs book cash activity) -- AR rollforward tie-outs (AR subledger vs trial balance control) - -The intent is to keep Chapter 6 code readable and testable. +The shipped Track D workbook template imports ``scripts._business_recon``. +To keep all existing chapter runners working without edits, this file remains +as the import surface, but the implementation now lives in +``pystatsv1.trackd.recon``. """ from __future__ import annotations -import json -from dataclasses import dataclass -from pathlib import Path -from typing import Any - -import numpy as np -import pandas as pd - - -def write_json(obj: Any, path: str | Path) -> Path: - """Write a JSON file (pretty-printed) and return the written path.""" - p = Path(path) - p.parent.mkdir(parents=True, exist_ok=True) - with p.open("w", encoding="utf-8") as f: - json.dump(obj, f, indent=2, sort_keys=True) - f.write("\n") - return p - - -def build_cash_txns_from_gl(gl: pd.DataFrame) -> pd.DataFrame: - """Group cash lines in GL into one row per txn_id with net cash impact.""" - cash_lines = gl.loc[gl["account_id"].astype(str) == "1000", ["txn_id", "date", "description", "debit", "credit"]].copy() - if cash_lines.empty: - return pd.DataFrame(columns=["txn_id", "date", "description", "amount"]) - - cash_lines["cash_net"] = cash_lines["debit"].astype(float) - cash_lines["credit"].astype(float) - - cash_txn = ( - cash_lines.groupby("txn_id", observed=True) - .agg(date=("date", "min"), description=("description", "first"), amount=("cash_net", "sum")) - .reset_index() - ) - cash_txn = cash_txn.loc[cash_txn["amount"].abs() > 1e-9].copy() - cash_txn = cash_txn.sort_values(["date", "txn_id"], kind="mergesort").reset_index(drop=True) - return cash_txn - - -def build_cash_txn_from_gl(gl: pd.DataFrame) -> pd.DataFrame: - """Alias for build_cash_txns_from_gl (keeps chapter script imports stable).""" - return build_cash_txns_from_gl(gl) - - -@dataclass(frozen=True) -class BankReconOutputs: - cash_txns: pd.DataFrame - matches: pd.DataFrame - exceptions: pd.DataFrame - - -def bank_reconcile(*, bank_statement: pd.DataFrame, cash_txns: pd.DataFrame, amount_tol: float = 0.01) -> BankReconOutputs: - """Reconcile bank statement lines against book cash transactions.""" - bank = bank_statement.copy() - if bank.empty: - empty = pd.DataFrame() - return BankReconOutputs(cash_txns=cash_txns.copy(), matches=empty, exceptions=empty) - - bank["gl_txn_id"] = pd.to_numeric(bank["gl_txn_id"], errors="coerce").astype("Int64") - bank["amount"] = bank["amount"].astype(float) - - cash = cash_txns.copy() - cash["txn_id"] = cash["txn_id"].astype(int) - cash["amount"] = cash["amount"].astype(float) - - matches = bank.merge( - cash.rename(columns={"txn_id": "gl_txn_id", "amount": "gl_amount", "date": "gl_date"}), - on="gl_txn_id", - how="left", - validate="m:1", - ) - - # convenience flag for summaries - matches["is_matched"] = matches["gl_amount"].notna() - - exceptions: list[dict[str, Any]] = [] - - # 1) duplicate bank_txn_id - dup_mask = matches["bank_txn_id"].astype(str).duplicated(keep=False) - if dup_mask.any(): - for _, r in matches.loc[dup_mask].iterrows(): - exceptions.append( - { - "exception_type": "bank_duplicate_txn_id", - "month": str(r.get("month", "")), - "bank_txn_id": str(r.get("bank_txn_id")), - "posted_date": str(r.get("posted_date")), - "gl_txn_id": (int(r["gl_txn_id"]) if pd.notna(r["gl_txn_id"]) else np.nan), - "bank_amount": float(r.get("amount", 0.0)), - "gl_amount": float(r.get("gl_amount", np.nan)) if pd.notna(r.get("gl_amount", np.nan)) else np.nan, - "details": "Duplicate bank_txn_id appears multiple times in bank feed.", - } - ) - - # 2) unmatched bank item - unmatched_bank = matches["gl_txn_id"].isna() | matches["gl_amount"].isna() - if unmatched_bank.any(): - for _, r in matches.loc[unmatched_bank].iterrows(): - exceptions.append( - { - "exception_type": "bank_unmatched_item", - "month": str(r.get("month", "")), - "bank_txn_id": str(r.get("bank_txn_id")), - "posted_date": str(r.get("posted_date")), - "gl_txn_id": (int(r["gl_txn_id"]) if pd.notna(r["gl_txn_id"]) else np.nan), - "bank_amount": float(r.get("amount", 0.0)), - "gl_amount": np.nan, - "details": "Bank statement line has no matching GL cash transaction.", - } - ) - - # 3) amount mismatch - matched = matches.loc[matches["gl_amount"].notna()].copy() - mism = matched.loc[(matched["amount"] - matched["gl_amount"]).abs() > float(amount_tol)] - if not mism.empty: - for _, r in mism.iterrows(): - exceptions.append( - { - "exception_type": "amount_mismatch", - "month": str(r.get("month", "")), - "bank_txn_id": str(r.get("bank_txn_id")), - "posted_date": str(r.get("posted_date")), - "gl_txn_id": int(r["gl_txn_id"]), - "bank_amount": float(r.get("amount", np.nan)), - "gl_amount": float(r.get("gl_amount", np.nan)), - "details": f"Bank amount differs from book by > {amount_tol}.", - } - ) - - # 4) book-only transactions (cash txn not seen on bank) - bank_gl_ids = set(matches.loc[matches["gl_txn_id"].notna(), "gl_txn_id"].astype(int).tolist()) - book_only = cash.loc[~cash["txn_id"].isin(bank_gl_ids)].copy() - if not book_only.empty: - for _, r in book_only.iterrows(): - exceptions.append( - { - "exception_type": "book_unmatched_cash_txn", - "month": str(r["date"])[:7], - "bank_txn_id": np.nan, - "posted_date": np.nan, - "gl_txn_id": int(r["txn_id"]), - "bank_amount": np.nan, - "gl_amount": float(r["amount"]), - "details": "Cash transaction in GL does not appear in bank feed.", - } - ) - - exc_df = pd.DataFrame(exceptions) - if not exc_df.empty: - exc_df = exc_df.sort_values(["exception_type", "month", "bank_txn_id"], kind="mergesort").reset_index(drop=True) - - return BankReconOutputs(cash_txns=cash, matches=matches, exceptions=exc_df) - - -def reconcile_bank_statement(bank_statement: pd.DataFrame, gl_journal: pd.DataFrame, *, amount_tol: float = 0.01) -> BankReconOutputs: - """Wrapper used by chapter script: bank feed vs GL.""" - cash_txns = build_cash_txns_from_gl(gl_journal) - return bank_reconcile(bank_statement=bank_statement, cash_txns=cash_txns, amount_tol=amount_tol) - - -def _ending_balance_from_tb(tb_month: pd.DataFrame, account_id: str) -> float: - """Return balance in its normal direction (positive if normal-side).""" - hit = tb_month.loc[tb_month["account_id"].astype(str) == str(account_id)] - if hit.empty: - return 0.0 - normal = str(hit.iloc[0]["normal_side"]) - ending_side = str(hit.iloc[0]["ending_side"]) - bal = float(hit.iloc[0]["ending_balance"]) - return bal if ending_side == normal else -bal - - -def ar_rollforward_vs_tb(trial_balance_monthly: pd.DataFrame, ar_events: pd.DataFrame) -> pd.DataFrame: - """Compute AR rollforward (begin + activity = end) and compare to TB.""" - tb = trial_balance_monthly.copy() - tb["month"] = tb["month"].astype(str) - - months = sorted(tb["month"].unique().tolist()) - - ar_monthly = ( - ar_events.assign(month=lambda d: d["month"].astype(str)) - .groupby("month", observed=True)["ar_delta"] - .sum() - .reindex(months, fill_value=0.0) - ) - - rows: list[dict[str, Any]] = [] - ar_begin = 0.0 - for m in months: - ar_delta = float(ar_monthly.loc[m]) - ar_end_events = float(ar_begin + ar_delta) - - tb_m = tb.loc[tb["month"] == m] - ar_end_tb = float(_ending_balance_from_tb(tb_m, "1100")) - diff = float(ar_end_events - ar_end_tb) - - rows.append( - { - "month": m, - "ar_begin": float(ar_begin), - "ar_delta": float(ar_delta), - "ar_end_from_events": float(ar_end_events), - "ar_end_from_tb": float(ar_end_tb), - "diff": float(diff), - } - ) - ar_begin = ar_end_events - - return pd.DataFrame(rows) - - -def build_ar_rollforward(trial_balance_monthly: pd.DataFrame, ar_events: pd.DataFrame) -> pd.DataFrame: - """Alias for ar_rollforward_vs_tb.""" - return ar_rollforward_vs_tb(trial_balance_monthly, ar_events) +from pystatsv1.trackd.recon import ( + BankReconOutputs, + ar_rollforward_vs_tb, + bank_reconcile, + build_ar_rollforward, + build_cash_txn_from_gl, + build_cash_txns_from_gl, + reconcile_bank_statement, + write_json, +) + +__all__ = [ + "write_json", + "build_cash_txns_from_gl", + "build_cash_txn_from_gl", + "BankReconOutputs", + "bank_reconcile", + "reconcile_bank_statement", + "ar_rollforward_vs_tb", + "build_ar_rollforward", +]