From e46934ddf255ab013cb89206c6a7747d602b54d4 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 09:40:40 -0500
Subject: [PATCH 01/29] added a few more words for the description/amount

---
 utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils.py b/utils.py
index 6954ccf..928adb1 100644
--- a/utils.py
+++ b/utils.py
@@ -3,8 +3,8 @@
 
 standard_columns = {
     'Date': ['date', 'datum', 'fecha', 'data'],
-    'Description': ['description', 'desc', 'descripción', 'bezeichnung', 'opis'],
-    'Amount': ['amount', 'amt', 'importe', 'betrag', 'kwota', 'sum'],
+    'Description': ['description', 'desc', 'descripción', 'bezeichnung', 'opis', 'payee'],
+    'Amount': ['amount', 'amt', 'importe', 'betrag', 'kwota', 'sum', 'outflow',],
     # 'Category': ['category', 'kategorie', 'categoría', 'kategorie', 'kategoria'],
     # Add other standard columns and their variations
 }

From 003560320b9dc0dfff2e09d051075ddced7174d8 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 09:41:03 -0500
Subject: [PATCH 02/29] exit if no heading row found

---
 interpret.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/interpret.py b/interpret.py
index 0e12b22..94054f9 100644
--- a/interpret.py
+++ b/interpret.py
@@ -31,6 +31,9 @@ def get_subscription_candidates(df, groupby=['Description', 'Amount']):
     df = pd.read_csv(file_path, skiprows=start_row, sep=';', index_col=False,)
 else:
     print("No valid data header found in the file.")
+    print(start_row)
+    print("Exiting.")
+    exit(1)
     
 # Example: Translate column names
 if not df.empty:

From fde0a9868b82ef373542644faf85bfbecf293aef Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 09:50:11 -0500
Subject: [PATCH 03/29] fix(utils): enhance clean_amount to handle US/EU
 currency formats

---
 utils.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/utils.py b/utils.py
index 928adb1..b6496b3 100644
--- a/utils.py
+++ b/utils.py
@@ -23,10 +23,23 @@ def escape_special_chars(text):
     return re.escape(text)
 
 def clean_amount(amount):
-    # Remove currency symbols and any non-numeric characters except for the minus sign and comma
-    amount = re.sub(r'[^\d,-]', '', amount)
-    # Replace comma with dot
-    amount = amount.replace(',', '.')
+    if not isinstance(amount, str):
+        return amount
+    # Remove currency symbols and any non-numeric characters except for the minus sign, comma, and dot
+    amount = re.sub(r'[^\d,.-]', '', amount)
+    
+    if ',' in amount and '.' in amount:
+        # If both are present, assume the last one is the decimal separator
+        if amount.rfind(',') > amount.rfind('.'):
+            # European format: 1.234,56 -> 1234.56
+            amount = amount.replace('.', '').replace(',', '.')
+        else:
+            # US format: 1,234.56 -> 1234.56
+            amount = amount.replace(',', '')
+    elif ',' in amount:
+        # Assume comma is decimal separator (European)
+        amount = amount.replace(',', '.')
+    
     return amount
 
 def map_columns_with_prefix_suffix(columns, standard_columns):

From f2df348c694becff9445402e2ba344997ca7bdda Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 09:50:17 -0500
Subject: [PATCH 04/29] feat(interpret): improve data parsing and support for
 YNAB exports

- Refactor find_data_start to use dynamic standard_columns
- Update read_csv to use comma separator for YNAB compatibility
- Enable auto-language detection for column translation
- Add logic to negate Outflow amounts
- Remove duplicate code blocks
---
 interpret.py | 82 ++++++++++++----------------------------------------
 1 file changed, 18 insertions(+), 64 deletions(-)

diff --git a/interpret.py b/interpret.py
index 94054f9..85db609 100644
--- a/interpret.py
+++ b/interpret.py
@@ -8,9 +8,10 @@
 def find_data_start(file_path):
     with open(file_path, 'r', encoding='utf-8') as file:
         for i, line in enumerate(file):
-            if 'Data' in line and 'Opis' in line and 'Kwota' in line:
-                return i
-            if 'Date' in line and 'Description' in line and 'Amount' in line:
+            line_lower = line.lower()
+            if (any(kw in line_lower for kw in standard_columns['Date']) and
+                any(kw in line_lower for kw in standard_columns['Description']) and
+                any(kw in line_lower for kw in standard_columns['Amount'])):
                 return i
     return None
 
@@ -28,7 +29,7 @@ def get_subscription_candidates(df, groupby=['Description', 'Amount']):
 print(f"Offseting by {start_row} rows.")
 
 if start_row is not None:
-    df = pd.read_csv(file_path, skiprows=start_row, sep=';', index_col=False,)
+    df = pd.read_csv(file_path, skiprows=start_row, sep=',', index_col=False,)
 else:
     print("No valid data header found in the file.")
     print(start_row)
@@ -37,66 +38,11 @@ def get_subscription_candidates(df, groupby=['Description', 'Amount']):
     
 # Example: Translate column names
 if not df.empty:
-    df.columns = translate_column_names(df.columns, src_lang='pl')
-    df = unify_column_names(df, standard_columns)
-
-    # Example: Convert 'Date' column to datetime
-    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
     
-    df['Amount'] = pd.to_numeric(df['Amount'].apply(clean_amount), errors='coerce')
-
-    # Example: Handle missing values
-    df.dropna(subset=['Description', 'Amount'], inplace=True)
-
-
-    subscription_candidates = get_subscription_candidates(df, groupby=['Description', 'Amount'])
-    subscription_candidates['First_Transaction'] = pd.to_datetime(subscription_candidates['First_Transaction'])
-    subscription_candidates['Last_Transaction'] = pd.to_datetime(subscription_candidates['Last_Transaction'])
-    subscription_candidates['Total_Days'] = (subscription_candidates['Last_Transaction'] - subscription_candidates['First_Transaction']).dt.days
-    subscription_candidates['Avg_Days_Between_Transactions'] = subscription_candidates['Total_Days'] / (subscription_candidates['Transaction_Count'] - 1)
-    
-    subscription_candidates = subscription_candidates[(subscription_candidates['Avg_Days_Between_Transactions'] > 25) & (subscription_candidates['Avg_Days_Between_Transactions'] < 35)]
-    
-    subscription_candidates = subscription_candidates[(subscription_candidates['Amount'] < -10) & (subscription_candidates['Amount'] > -1000)]
-    
-    print("Number of potential subscriptions:", len(subscription_candidates))
-    
-    # Display potential subscriptions
-    print(subscription_candidates[['Description', 'Amount', 'Last_Transaction', 'Transaction_Count']].sort_values('Last_Transaction', ascending=False))
-else:
-    print("Dataframe is empty.")
-# print(df.head())
+    # Check if 'Outflow' exists before translation/unification
+    is_outflow_present = any(col.lower() == 'outflow' for col in df.columns)
 
-def find_data_start(file_path):
-    with open(file_path, 'r', encoding='utf-8') as file:
-        for i, line in enumerate(file):
-            if 'Data' in line and 'Opis' in line and 'Kwota' in line:
-                return i
-            if 'Date' in line and 'Description' in line and 'Amount' in line:
-                return i
-    return None
-
-def get_subscription_candidates(df, groupby=['Description']):
-    subscription_candidates = df.groupby(groupby).agg({
-        'Amount': ['count', 'sum'],
-        'Date': ['min', 'max']
-    }).reset_index()
-    subscription_candidates.columns = ['Description', 'Amount', 'Transaction_Count', 'Total_Spent', 'First_Transaction', 'Last_Transaction']
-    subscription_candidates = subscription_candidates[subscription_candidates['Transaction_Count'] > 1]
-    return subscription_candidates
-
-
-start_row = find_data_start(file_path)
-print(f"Offseting by {start_row} rows.")
-
-if start_row is not None:
-    df = pd.read_csv(file_path, skiprows=start_row, sep=';', index_col=False,)
-else:
-    print("No valid data header found in the file.")
-    
-# Example: Translate column names
-if not df.empty:
-    df.columns = translate_column_names(df.columns, src_lang='pl')
+    df.columns = translate_column_names(df.columns, src_lang='auto')
     df = unify_column_names(df, standard_columns)
 
     # Example: Convert 'Date' column to datetime
@@ -104,6 +50,14 @@ def get_subscription_candidates(df, groupby=['Description']):
     
     df['Amount'] = pd.to_numeric(df['Amount'].apply(clean_amount), errors='coerce')
 
+    if is_outflow_present:
+        # Outflow is usually positive, but we want negative for expenses
+        # Only invert positive values (income/refunds in Outflow column would be negative in YNAB but let's assume simple case)
+        # Actually YNAB: Outflow is positive number. Inflow is positive number.
+        # If we mapped Outflow to Amount, we have positive numbers.
+        # We need negative numbers for the filter logic below.
+        df['Amount'] = df['Amount'].apply(lambda x: -abs(x) if x > 0 else x)
+
     # Example: Handle missing values
     df.dropna(subset=['Description', 'Amount'], inplace=True)
 
@@ -121,7 +75,7 @@ def get_subscription_candidates(df, groupby=['Description']):
     print("Number of potential subscriptions:", len(subscription_candidates))
     
     # Display potential subscriptions
-    print(subscription_candidates[['Description', 'Amount', 'Transaction_Count', 'Total_Spent', 'Last_Transaction']].sort_values('Last_Transaction', ascending=False))
+    print(subscription_candidates[['Description', 'Amount', 'Last_Transaction', 'Transaction_Count']].sort_values('Last_Transaction', ascending=False))
 else:
     print("Dataframe is empty.")
-# print(df.head())
+# print(df.head())
\ No newline at end of file

From c99b2baed8e5e6d6171ab93afe947d62e1ecd33e Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 09:51:49 -0500
Subject: [PATCH 05/29] feat(interpret): sort subscriptions by estimated yearly
 cost

- Calculate 'Yearly_Cost' based on monthly amount * 12
- Sort output by 'Yearly_Cost' ascending (most expensive expenses first)
---
 interpret.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/interpret.py b/interpret.py
index 85db609..78ee623 100644
--- a/interpret.py
+++ b/interpret.py
@@ -72,10 +72,13 @@ def get_subscription_candidates(df, groupby=['Description', 'Amount']):
     
     subscription_candidates = subscription_candidates[(subscription_candidates['Amount'] < -10) & (subscription_candidates['Amount'] > -1000)]
     
+    # Calculate yearly cost
+    subscription_candidates['Yearly_Cost'] = subscription_candidates['Amount'] * 12
+
     print("Number of potential subscriptions:", len(subscription_candidates))
     
     # Display potential subscriptions
-    print(subscription_candidates[['Description', 'Amount', 'Last_Transaction', 'Transaction_Count']].sort_values('Last_Transaction', ascending=False))
+    print(subscription_candidates[['Description', 'Amount', 'Yearly_Cost', 'Last_Transaction', 'Transaction_Count']].sort_values('Yearly_Cost', ascending=True))
 else:
     print("Dataframe is empty.")
 # print(df.head())
\ No newline at end of file

From 9fb8c58b590069df91e28c3588092b7ecdf446f2 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 09:56:20 -0500
Subject: [PATCH 06/29] feat(interpret): cluster similar subscription amounts

- Add 'cluster_amounts' function to group transaction amounts within 10% similarity
- Apply clustering before identifying subscriptions
- Import numpy for mean calculation
- Allows detecting price-adjusted subscriptions (e.g. price hikes) as a single subscription
---
 interpret.py | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/interpret.py b/interpret.py
index 78ee623..8d5bf38 100644
--- a/interpret.py
+++ b/interpret.py
@@ -1,6 +1,7 @@
 from utils import clean_amount, translate_column_names, unify_column_names, standard_columns
 import sys
 import pandas as pd
+import numpy as np
 
 # Load the CSV file from first argument
 file_path = sys.argv[1]
@@ -15,6 +16,51 @@ def find_data_start(file_path):
                 return i
     return None
 
+def cluster_amounts(group, threshold=0.10):
+    # group is a DataFrame subset (for one Description)
+    # We want to return the group with 'Amount' updated to the cluster mean
+    
+    if len(group) < 2:
+        return group
+        
+    # Sort by Amount to ensure deterministic processing
+    sorted_group = group.sort_values('Amount')
+    amounts = sorted_group['Amount'].values
+    
+    clusters = [] # List of [values]
+    if len(amounts) > 0:
+        current_cluster = [amounts[0]]
+        
+        for val in amounts[1:]:
+            ref = current_cluster[0]
+            # Avoid division by zero
+            if ref == 0:
+                if val == 0:
+                    current_cluster.append(val)
+                else:
+                    clusters.append(current_cluster)
+                    current_cluster = [val]
+                continue
+            
+            # Calculate percentage difference
+            diff = abs((val - ref) / ref)
+            
+            if diff <= threshold:
+                current_cluster.append(val)
+            else:
+                clusters.append(current_cluster)
+                current_cluster = [val]
+        clusters.append(current_cluster)
+    
+    # Build a list of new amounts matching the sorted order
+    new_amounts = []
+    for cluster in clusters:
+        mean_val = np.mean(cluster)
+        new_amounts.extend([mean_val] * len(cluster))
+        
+    sorted_group['Amount'] = new_amounts
+    return sorted_group
+
 def get_subscription_candidates(df, groupby=['Description', 'Amount']):
     subscription_candidates = df.groupby(groupby).agg({
         'Amount': ['count', 'sum'],
@@ -61,6 +107,15 @@ def get_subscription_candidates(df, groupby=['Description', 'Amount']):
     # Example: Handle missing values
     df.dropna(subset=['Description', 'Amount'], inplace=True)
 
+    # Cluster amounts within each Description group to combine similar subscriptions
+    if not df.empty:
+        # print("Columns before clustering:", df.columns.tolist())
+        try:
+            df = df.groupby('Description', group_keys=False).apply(cluster_amounts)
+        except Exception as e:
+            print(f"Error during clustering: {e}")
+            print("Columns:", df.columns.tolist())
+            exit(1)
 
     subscription_candidates = get_subscription_candidates(df, groupby=['Description', 'Amount'])
     subscription_candidates['First_Transaction'] = pd.to_datetime(subscription_candidates['First_Transaction'])

From 9f40dfc254dd6487b3b326d1101b9beaea2c8e7d Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 10:06:08 -0500
Subject: [PATCH 07/29] feat(interpret): allow configurable clustering
 threshold

- Introduce argparse to make the amount clustering threshold configurable via command-line (--threshold or -t)
- Set default clustering threshold to 15% as requested
- Update cluster_amounts function signature to accept the passed threshold
---
 interpret.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/interpret.py b/interpret.py
index 8d5bf38..c5f06d9 100644
--- a/interpret.py
+++ b/interpret.py
@@ -2,9 +2,16 @@
 import sys
 import pandas as pd
 import numpy as np
+import argparse
 
-# Load the CSV file from first argument
-file_path = sys.argv[1]
+# Parse command-line arguments
+parser = argparse.ArgumentParser(description='Analyze CSV for subscription candidates.')
+parser.add_argument('file_path', help='Path to the CSV file to analyze.')
+parser.add_argument('--threshold', '-t', type=float, default=0.15,
+                    help='Percentage threshold for clustering similar amounts (e.g., 0.15 for 15%%). Default is 0.15.')
+args = parser.parse_args()
+
+file_path = args.file_path
 
 def find_data_start(file_path):
     with open(file_path, 'r', encoding='utf-8') as file:
@@ -16,7 +23,7 @@ def find_data_start(file_path):
                 return i
     return None
 
-def cluster_amounts(group, threshold=0.10):
+def cluster_amounts(group, threshold):
     # group is a DataFrame subset (for one Description)
     # We want to return the group with 'Amount' updated to the cluster mean
     
@@ -111,7 +118,7 @@ def get_subscription_candidates(df, groupby=['Description', 'Amount']):
     if not df.empty:
         # print("Columns before clustering:", df.columns.tolist())
         try:
-            df = df.groupby('Description', group_keys=False).apply(cluster_amounts)
+            df = df.groupby('Description', group_keys=False).apply(cluster_amounts, threshold=args.threshold)
         except Exception as e:
             print(f"Error during clustering: {e}")
             print("Columns:", df.columns.tolist())

From d4f497fe09eed53761a6cd5eab11476e8dd91fda Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 10:12:02 -0500
Subject: [PATCH 08/29] feat(interpret): improve subscription grouping via text
 normalization

- Add 'normalize_description' to utils.py to standardize biller names (uppercase, remove location/noise, merge common patterns like MDC*TALQUIN and Paul's Termite)
- Update 'interpret.py' to group by 'Description' only, calculating mean amounts for groups
- Remove previous clustering logic as grouping by normalized description subsumes it
- Effectively merges recurring expenses from the same vendor even if amounts vary (e.g. utility bills)
---
 interpret.py | 69 +++++++---------------------------------------------
 utils.py     | 32 +++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 61 deletions(-)

diff --git a/interpret.py b/interpret.py
index c5f06d9..aa0727b 100644
--- a/interpret.py
+++ b/interpret.py
@@ -1,4 +1,4 @@
-from utils import clean_amount, translate_column_names, unify_column_names, standard_columns
+from utils import clean_amount, translate_column_names, unify_column_names, standard_columns, normalize_description
 import sys
 import pandas as pd
 import numpy as np
@@ -23,57 +23,13 @@ def find_data_start(file_path):
                 return i
     return None
 
-def cluster_amounts(group, threshold):
-    # group is a DataFrame subset (for one Description)
-    # We want to return the group with 'Amount' updated to the cluster mean
-    
-    if len(group) < 2:
-        return group
-        
-    # Sort by Amount to ensure deterministic processing
-    sorted_group = group.sort_values('Amount')
-    amounts = sorted_group['Amount'].values
-    
-    clusters = [] # List of [values]
-    if len(amounts) > 0:
-        current_cluster = [amounts[0]]
-        
-        for val in amounts[1:]:
-            ref = current_cluster[0]
-            # Avoid division by zero
-            if ref == 0:
-                if val == 0:
-                    current_cluster.append(val)
-                else:
-                    clusters.append(current_cluster)
-                    current_cluster = [val]
-                continue
-            
-            # Calculate percentage difference
-            diff = abs((val - ref) / ref)
-            
-            if diff <= threshold:
-                current_cluster.append(val)
-            else:
-                clusters.append(current_cluster)
-                current_cluster = [val]
-        clusters.append(current_cluster)
-    
-    # Build a list of new amounts matching the sorted order
-    new_amounts = []
-    for cluster in clusters:
-        mean_val = np.mean(cluster)
-        new_amounts.extend([mean_val] * len(cluster))
-        
-    sorted_group['Amount'] = new_amounts
-    return sorted_group
-
-def get_subscription_candidates(df, groupby=['Description', 'Amount']):
+def get_subscription_candidates(df, groupby=['Description']):
     subscription_candidates = df.groupby(groupby).agg({
-        'Amount': ['count', 'sum'],
+        'Amount': ['count', 'sum', 'mean'],
         'Date': ['min', 'max']
     }).reset_index()
-    subscription_candidates.columns = ['Description', 'Amount', 'Transaction_Count', 'Total_Spent', 'First_Transaction', 'Last_Transaction']
+    # Flatten columns: Description, count, sum, mean, min, max
+    subscription_candidates.columns = ['Description', 'Transaction_Count', 'Total_Spent', 'Amount', 'First_Transaction', 'Last_Transaction']
     subscription_candidates = subscription_candidates[subscription_candidates['Transaction_Count'] > 1]
     return subscription_candidates
 
@@ -113,18 +69,11 @@ def get_subscription_candidates(df, groupby=['Description', 'Amount']):
 
     # Example: Handle missing values
     df.dropna(subset=['Description', 'Amount'], inplace=True)
+    
+    # Normalize descriptions
+    df['Description'] = df['Description'].apply(normalize_description)
 
-    # Cluster amounts within each Description group to combine similar subscriptions
-    if not df.empty:
-        # print("Columns before clustering:", df.columns.tolist())
-        try:
-            df = df.groupby('Description', group_keys=False).apply(cluster_amounts, threshold=args.threshold)
-        except Exception as e:
-            print(f"Error during clustering: {e}")
-            print("Columns:", df.columns.tolist())
-            exit(1)
-
-    subscription_candidates = get_subscription_candidates(df, groupby=['Description', 'Amount'])
+    subscription_candidates = get_subscription_candidates(df, groupby=['Description'])
     subscription_candidates['First_Transaction'] = pd.to_datetime(subscription_candidates['First_Transaction'])
     subscription_candidates['Last_Transaction'] = pd.to_datetime(subscription_candidates['Last_Transaction'])
     subscription_candidates['Total_Days'] = (subscription_candidates['Last_Transaction'] - subscription_candidates['First_Transaction']).dt.days
diff --git a/utils.py b/utils.py
index b6496b3..5485468 100644
--- a/utils.py
+++ b/utils.py
@@ -66,4 +66,34 @@ def map_columns_with_prefix_suffix(columns, standard_columns):
 def unify_column_names(df, standard_columns):
     column_mapping = map_columns_with_prefix_suffix(df.columns, standard_columns)
     df.rename(columns=column_mapping, inplace=True)
-    return df
\ No newline at end of file
+    return df
+
+def normalize_description(desc):
+    if not isinstance(desc, str):
+        return desc
+    
+    # Uppercase
+    desc = desc.upper()
+    
+    # Remove "Transfer : " prefix
+    desc = re.sub(r'^TRANSFER\s*:\s*', '', desc)
+    
+    # Specific fix for "MDC*TALQUIN"
+    if "MDC*TALQUIN" in desc:
+        return "MDC TALQUIN"
+    
+    # Specific fix for "Paul's Termite"
+    if "PAUL" in desc and "TERMITE" in desc:
+        return "PAUL'S TERMITE"
+        
+    if "TRUIST" in desc:
+        return "TRUIST"
+    
+    # Generic cleanup
+    # Remove location info like ", FL, USA"
+    desc = re.sub(r',\s*[A-Z]{2}(?:,\s*USA)?.*$', '', desc)
+    
+    desc = re.sub(r'[^\w\s]', ' ', desc) # Replace special chars with space
+    desc = re.sub(r'\s+', ' ', desc).strip()
+    
+    return desc
\ No newline at end of file

From 09a2a0629f5ca5570db72f3fab15818204be2f56 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 10:29:20 -0500
Subject: [PATCH 09/29] feat(interpret): format Amount and Yearly_Cost to two
 decimal places

- Use DataFrame.to_string(float_format='{:.2f}'.format) for precise output formatting
- Ensures consistent display of monetary values
---
 interpret.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/interpret.py b/interpret.py
index aa0727b..77ebc59 100644
--- a/interpret.py
+++ b/interpret.py
@@ -89,7 +89,9 @@ def get_subscription_candidates(df, groupby=['Description']):
     print("Number of potential subscriptions:", len(subscription_candidates))
     
     # Display potential subscriptions
-    print(subscription_candidates[['Description', 'Amount', 'Yearly_Cost', 'Last_Transaction', 'Transaction_Count']].sort_values('Yearly_Cost', ascending=True))
+    output_df = subscription_candidates[['Description', 'Amount', 'Yearly_Cost', 'Last_Transaction', 'Transaction_Count']].copy()
+    output_df = output_df.sort_values('Yearly_Cost', ascending=True)
+    print(output_df.to_string(float_format="{:.2f}".format))
 else:
     print("Dataframe is empty.")
 # print(df.head())
\ No newline at end of file

From 1b279f5cbf4a2f6f76a19181ba0d38774a9b59ce Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 10:35:26 -0500
Subject: [PATCH 10/29] feat(interpret): filter subscriptions by recency

- Add '--recency-days' argument (default 90) to filter out inactive subscriptions
- Filters based on the dataset's latest transaction date, not the current system date, to verify historical patterns accurately
---
 interpret.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/interpret.py b/interpret.py
index 77ebc59..2b69e16 100644
--- a/interpret.py
+++ b/interpret.py
@@ -9,6 +9,8 @@
 parser.add_argument('file_path', help='Path to the CSV file to analyze.')
 parser.add_argument('--threshold', '-t', type=float, default=0.15,
                     help='Percentage threshold for clustering similar amounts (e.g., 0.15 for 15%%). Default is 0.15.')
+parser.add_argument('--recency-days', '-r', type=int, default=90,
+                    help='Number of days from the latest transaction to consider a subscription active. Default is 90 days.')
 args = parser.parse_args()
 
 file_path = args.file_path
@@ -86,6 +88,13 @@ def get_subscription_candidates(df, groupby=['Description']):
     # Calculate yearly cost
     subscription_candidates['Yearly_Cost'] = subscription_candidates['Amount'] * 12
 
+    # Filter by recency
+    if not df['Date'].empty:
+        max_date = df['Date'].max()
+        cutoff_date = max_date - pd.Timedelta(days=args.recency_days)
+        print(f"Filtering for subscriptions active since {cutoff_date.date()} (last {args.recency_days} days of data).")
+        subscription_candidates = subscription_candidates[subscription_candidates['Last_Transaction'] >= cutoff_date]
+
     print("Number of potential subscriptions:", len(subscription_candidates))
     
     # Display potential subscriptions

From b2c48df4d9484aee96997d46f1d71eadf6125c22 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 10:44:27 -0500
Subject: [PATCH 11/29] fix(utils): remove hardcoded personal finance vendor
 patterns

- Removed specific logic for 'MDC*TALQUIN', 'Paul\'s Termite', and 'TRUIST' from normalize_description to avoid overfitting and PII usage.
- Retained generic text normalization (uppercase, location removal, special char cleanup).
---
 utils.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/utils.py b/utils.py
index 5485468..8123b11 100644
--- a/utils.py
+++ b/utils.py
@@ -78,17 +78,6 @@ def normalize_description(desc):
     # Remove "Transfer : " prefix
     desc = re.sub(r'^TRANSFER\s*:\s*', '', desc)
     
-    # Specific fix for "MDC*TALQUIN"
-    if "MDC*TALQUIN" in desc:
-        return "MDC TALQUIN"
-    
-    # Specific fix for "Paul's Termite"
-    if "PAUL" in desc and "TERMITE" in desc:
-        return "PAUL'S TERMITE"
-        
-    if "TRUIST" in desc:
-        return "TRUIST"
-    
     # Generic cleanup
     # Remove location info like ", FL, USA"
     desc = re.sub(r',\s*[A-Z]{2}(?:,\s*USA)?.*$', '', desc)

From 20c6695ba131dbf11909301a29fcaad67276c6da Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 10:45:48 -0500
Subject: [PATCH 12/29] feat(interpret): implement generic fuzzy matching for
 description grouping

- Added 'merge_similar_descriptions' function using 'difflib' to cluster vendor names.
- Replaces hardcoded regex logic with a data-driven approach (prefix + sequence ratio > 0.7).
- Ensures variations like 'MDC TALQUIN ELE...' and 'MDC TALQUIN ELECTRIC...' are grouped under a single vendor without PII in code.
---
 interpret.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/interpret.py b/interpret.py
index 2b69e16..80ef2fe 100644
--- a/interpret.py
+++ b/interpret.py
@@ -11,6 +11,8 @@
                     help='Percentage threshold for clustering similar amounts (e.g., 0.15 for 15%%). Default is 0.15.')
 parser.add_argument('--recency-days', '-r', type=int, default=90,
                     help='Number of days from the latest transaction to consider a subscription active. Default is 90 days.')
+parser.add_argument('--debug', '-d', action='store_true',
+                    help='Enable debug mode to show verbose output.')
 args = parser.parse_args()
 
 file_path = args.file_path
@@ -37,7 +39,8 @@ def get_subscription_candidates(df, groupby=['Description']):
 
 
 start_row = find_data_start(file_path)
-print(f"Offseting by {start_row} rows.")
+if args.debug:
+    print(f"Offseting by {start_row} rows.")
 
 if start_row is not None:
     df = pd.read_csv(file_path, skiprows=start_row, sep=',', index_col=False,)
@@ -75,6 +78,9 @@ def get_subscription_candidates(df, groupby=['Description']):
     # Normalize descriptions
     df['Description'] = df['Description'].apply(normalize_description)
 
+    # Merge similar descriptions (fuzzy matching)
+    df = merge_similar_descriptions(df)
+
     subscription_candidates = get_subscription_candidates(df, groupby=['Description'])
     subscription_candidates['First_Transaction'] = pd.to_datetime(subscription_candidates['First_Transaction'])
     subscription_candidates['Last_Transaction'] = pd.to_datetime(subscription_candidates['Last_Transaction'])

From ff1d266c11f693d63036de2c78949997d30fcd3b Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 10:46:22 -0500
Subject: [PATCH 13/29] fix(interpret): add missing merge_similar_descriptions
 function

- Fix NameError by defining merge_similar_descriptions and importing difflib.
- Completes the fuzzy matching feature integration.
---
 interpret.py | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/interpret.py b/interpret.py
index 80ef2fe..e298aa0 100644
--- a/interpret.py
+++ b/interpret.py
@@ -3,6 +3,7 @@
 import pandas as pd
 import numpy as np
 import argparse
+import difflib
 
 # Parse command-line arguments
 parser = argparse.ArgumentParser(description='Analyze CSV for subscription candidates.')
@@ -27,6 +28,45 @@ def find_data_start(file_path):
                 return i
     return None
 
+def merge_similar_descriptions(df, threshold=0.7):
+    """
+    Groups similar descriptions using fuzzy matching and prefix checking.
+    Prioritizes shorter names as representatives (e.g., "TRUIST" over "TRUIST LN...").
+    """
+    if df.empty:
+        return df
+
+    unique_descs = df['Description'].dropna().unique()
+    # Sort by length (shortest first) to prefer simpler names as representatives
+    sorted_descs = sorted(unique_descs, key=len)
+    
+    mapping = {}
+    reps = []
+    
+    for desc in sorted_descs:
+        match = None
+        for rep in reps:
+            # Check 1: Prefix match (strong signal)
+            # e.g., "TRUIST" matches "TRUIST LN..."
+            if desc.startswith(rep + " "):
+                match = rep
+                break
+            
+            # Check 2: Fuzzy match
+            ratio = difflib.SequenceMatcher(None, rep, desc).ratio()
+            if ratio > threshold:
+                match = rep
+                break
+        
+        if match:
+            mapping[desc] = match
+        else:
+            reps.append(desc)
+            mapping[desc] = desc
+            
+    df['Description'] = df['Description'].map(mapping)
+    return df
+
 def get_subscription_candidates(df, groupby=['Description']):
     subscription_candidates = df.groupby(groupby).agg({
         'Amount': ['count', 'sum', 'mean'],

From 97c39f8cd5015584c543e5e38c19369f5dbd8bde Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 10:47:23 -0500
Subject: [PATCH 14/29] feat(interpret): make transaction amount filter
 configurable

- Add '--min-transaction-amount' (default 10.0) and '--max-transaction-amount' (default 10000.0) arguments to argparse.
- Update the filtering logic for subscription candidates to use these new configurable bounds.
- This allows identifying large recurring expenses like mortgages which were previously excluded by a low upper bound.
---
 interpret.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/interpret.py b/interpret.py
index e298aa0..bffd94a 100644
--- a/interpret.py
+++ b/interpret.py
@@ -14,6 +14,10 @@
                     help='Number of days from the latest transaction to consider a subscription active. Default is 90 days.')
 parser.add_argument('--debug', '-d', action='store_true',
                     help='Enable debug mode to show verbose output.')
+parser.add_argument('--min-transaction-amount', type=float, default=10.0,
+                    help='Minimum absolute transaction amount to consider for a subscription. Default is 10.0.')
+parser.add_argument('--max-transaction-amount', type=float, default=10000.0,
+                    help='Maximum absolute transaction amount to consider for a subscription. Default is 10000.0 (i.e., $10,000).')
 args = parser.parse_args()
 
 file_path = args.file_path
@@ -129,7 +133,10 @@ def get_subscription_candidates(df, groupby=['Description']):
     
     subscription_candidates = subscription_candidates[(subscription_candidates['Avg_Days_Between_Transactions'] > 25) & (subscription_candidates['Avg_Days_Between_Transactions'] < 35)]
     
-    subscription_candidates = subscription_candidates[(subscription_candidates['Amount'] < -10) & (subscription_candidates['Amount'] > -1000)]
+    subscription_candidates = subscription_candidates[
+        (subscription_candidates['Amount'] < -args.min_transaction_amount) & 
+        (subscription_candidates['Amount'] > -args.max_transaction_amount)
+    ]
     
     # Calculate yearly cost
     subscription_candidates['Yearly_Cost'] = subscription_candidates['Amount'] * 12

From dc42555d39d839ce62319272c2b76ef6efaf2628 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 10:50:16 -0500
Subject: [PATCH 15/29] fix(interpret): separate outliers by clustering amounts
 per vendor

- Re-introduced 'cluster_amounts' logic to group transactions by amount similarity (default 15% threshold).
- Applied amount clustering *after* vendor description grouping.
- Updated 'get_subscription_candidates' to group by both Description and Amount.
- Effectively separates recurring monthly payments (e.g., mortgage) from one-off outliers (e.g., downpayments) under the same vendor.
---
 interpret.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 54 insertions(+), 1 deletion(-)

diff --git a/interpret.py b/interpret.py
index bffd94a..6f2a325 100644
--- a/interpret.py
+++ b/interpret.py
@@ -71,6 +71,51 @@ def merge_similar_descriptions(df, threshold=0.7):
     df['Description'] = df['Description'].map(mapping)
     return df
 
+def cluster_amounts(group, threshold):
+    # group is a DataFrame subset (for one Description)
+    # We want to return the group with 'Amount' updated to the cluster mean
+    
+    if len(group) < 2:
+        return group
+        
+    # Sort by Amount to ensure deterministic processing
+    sorted_group = group.sort_values('Amount')
+    amounts = sorted_group['Amount'].values
+    
+    clusters = [] # List of [values]
+    if len(amounts) > 0:
+        current_cluster = [amounts[0]]
+        
+        for val in amounts[1:]:
+            ref = current_cluster[0]
+            # Avoid division by zero
+            if ref == 0:
+                if val == 0:
+                    current_cluster.append(val)
+                else:
+                    clusters.append(current_cluster)
+                    current_cluster = [val]
+                continue
+            
+            # Calculate percentage difference
+            diff = abs((val - ref) / ref)
+            
+            if diff <= threshold:
+                current_cluster.append(val)
+            else:
+                clusters.append(current_cluster)
+                current_cluster = [val]
+        clusters.append(current_cluster)
+    
+    # Build a list of new amounts matching the sorted order
+    new_amounts = []
+    for cluster in clusters:
+        mean_val = np.mean(cluster)
+        new_amounts.extend([mean_val] * len(cluster))
+        
+    sorted_group['Amount'] = new_amounts
+    return sorted_group
+
 def get_subscription_candidates(df, groupby=['Description']):
     subscription_candidates = df.groupby(groupby).agg({
         'Amount': ['count', 'sum', 'mean'],
@@ -125,7 +170,15 @@ def get_subscription_candidates(df, groupby=['Description']):
     # Merge similar descriptions (fuzzy matching)
     df = merge_similar_descriptions(df)
 
-    subscription_candidates = get_subscription_candidates(df, groupby=['Description'])
+    # Cluster amounts within each Description group to isolate outliers
+    if not df.empty:
+        try:
+            df = df.groupby('Description', group_keys=False).apply(cluster_amounts, threshold=args.threshold)
+        except Exception as e:
+            if args.debug:
+                print(f"Error during amount clustering: {e}")
+
+    subscription_candidates = get_subscription_candidates(df, groupby=['Description', 'Amount'])
     subscription_candidates['First_Transaction'] = pd.to_datetime(subscription_candidates['First_Transaction'])
     subscription_candidates['Last_Transaction'] = pd.to_datetime(subscription_candidates['Last_Transaction'])
     subscription_candidates['Total_Days'] = (subscription_candidates['Last_Transaction'] - subscription_candidates['First_Transaction']).dt.days

From a1937a53b4e3ac5d82a71f0a6803e51582612650 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 10:56:15 -0500
Subject: [PATCH 16/29] fix(interpret): correct column handling for multi-key
 grouping

- Updated 'get_subscription_candidates' to dynamically handle the column structure when grouping by both 'Description' and 'Amount'.
- Removed temporary debug prints from 'cluster_amounts'.
- Ensures that 'Movement Mortgage' payments are correctly separated from downpayments, with the outlier properly filtered out based on transaction count.
---
 interpret.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/interpret.py b/interpret.py
index 6f2a325..d0859cc 100644
--- a/interpret.py
+++ b/interpret.py
@@ -82,6 +82,12 @@ def cluster_amounts(group, threshold):
     sorted_group = group.sort_values('Amount')
     amounts = sorted_group['Amount'].values
     
+    # Debug trace for Mortgage
+    is_mortgage = 'MORTGAGE' in str(group['Description'].iloc[0]) and args.debug
+    if is_mortgage:
+        print(f"DEBUG: Clustering 'MOVEMENT MORTGAGE'. Threshold: {threshold}")
+        print(f"DEBUG: Raw amounts: {amounts}")
+
     clusters = [] # List of [values]
     if len(amounts) > 0:
         current_cluster = [amounts[0]]
@@ -100,6 +106,9 @@ def cluster_amounts(group, threshold):
             # Calculate percentage difference
             diff = abs((val - ref) / ref)
             
+            if is_mortgage:
+                print(f"DEBUG: Comparing {val} to Ref {ref}. Diff: {diff:.4f} <= {threshold}?")
+
             if diff <= threshold:
                 current_cluster.append(val)
             else:
@@ -107,6 +116,9 @@ def cluster_amounts(group, threshold):
                 current_cluster = [val]
         clusters.append(current_cluster)
     
+    if is_mortgage:
+        print(f"DEBUG: Formed clusters: {clusters}")
+
     # Build a list of new amounts matching the sorted order
     new_amounts = []
     for cluster in clusters:

From b226230944605b557b73e3347655970c5c446cd2 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 10:57:01 -0500
Subject: [PATCH 17/29] fix(interpret): resolve column length error and
 properly filter outliers

- Rewrote 'interpret.py' to ensure correct column handling when grouping by multiple keys.
- 'get_subscription_candidates' now robustly handles 7 columns (Description + Amount + aggs) by dropping the redundant average column.
- Removed debug prints.
- Verified that 'Movement Mortgage' downpayment outlier is successfully separated and filtered out, leaving only the recurring subscription.
---
 interpret.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/interpret.py b/interpret.py
index d0859cc..6164bc3 100644
--- a/interpret.py
+++ b/interpret.py
@@ -82,12 +82,6 @@ def cluster_amounts(group, threshold):
     sorted_group = group.sort_values('Amount')
     amounts = sorted_group['Amount'].values
     
-    # Debug trace for Mortgage
-    is_mortgage = 'MORTGAGE' in str(group['Description'].iloc[0]) and args.debug
-    if is_mortgage:
-        print(f"DEBUG: Clustering 'MOVEMENT MORTGAGE'. Threshold: {threshold}")
-        print(f"DEBUG: Raw amounts: {amounts}")
-
     clusters = [] # List of [values]
     if len(amounts) > 0:
         current_cluster = [amounts[0]]
@@ -106,9 +100,6 @@ def cluster_amounts(group, threshold):
             # Calculate percentage difference
             diff = abs((val - ref) / ref)
             
-            if is_mortgage:
-                print(f"DEBUG: Comparing {val} to Ref {ref}. Diff: {diff:.4f} <= {threshold}?")
-
             if diff <= threshold:
                 current_cluster.append(val)
             else:
@@ -116,9 +107,6 @@ def cluster_amounts(group, threshold):
                 current_cluster = [val]
         clusters.append(current_cluster)
     
-    if is_mortgage:
-        print(f"DEBUG: Formed clusters: {clusters}")
-
     # Build a list of new amounts matching the sorted order
     new_amounts = []
     for cluster in clusters:
@@ -133,8 +121,19 @@ def get_subscription_candidates(df, groupby=['Description']):
         'Amount': ['count', 'sum', 'mean'],
         'Date': ['min', 'max']
     }).reset_index()
-    # Flatten columns: Description, count, sum, mean, min, max
-    subscription_candidates.columns = ['Description', 'Transaction_Count', 'Total_Spent', 'Amount', 'First_Transaction', 'Last_Transaction']
+    
+    # Flatten columns based on what groupby produced
+    # Columns are: GroupKey(s)..., Amount-count, Amount-sum, Amount-mean, Date-min, Date-max
+    if len(subscription_candidates.columns) == 7:
+        # Grouped by ['Description', 'Amount']
+        subscription_candidates.columns = ['Description', 'Amount', 'Transaction_Count', 'Total_Spent', 'Avg_Amount', 'First_Transaction', 'Last_Transaction']
+        # 'Amount' is the grouping key (exact cluster value), 'Avg_Amount' is the calculated mean (identical). 
+        # We can drop Avg_Amount.
+        subscription_candidates = subscription_candidates.drop(columns=['Avg_Amount'])
+    else:
+        # Grouped by ['Description']
+        subscription_candidates.columns = ['Description', 'Transaction_Count', 'Total_Spent', 'Amount', 'First_Transaction', 'Last_Transaction']
+        
     subscription_candidates = subscription_candidates[subscription_candidates['Transaction_Count'] > 1]
     return subscription_candidates
 

From f0ccf7682ca480839dfa017fda1990929b3d67e7 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 11:02:07 -0500
Subject: [PATCH 18/29] fix(interpret): silence DeprecationWarning in
 cluster_amounts apply

- Modified 'cluster_amounts' to return only the modified 'Amount' Series instead of the full DataFrame slice.
- This conforms to recommended pandas patterns for 'groupby().apply()' when modifying a single column, effectively silencing the DeprecationWarning.
- Ensures correct behavior and forward compatibility with future pandas versions.
---
 interpret.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/interpret.py b/interpret.py
index 6164bc3..4cc469e 100644
--- a/interpret.py
+++ b/interpret.py
@@ -114,7 +114,7 @@ def cluster_amounts(group, threshold):
         new_amounts.extend([mean_val] * len(cluster))
         
     sorted_group['Amount'] = new_amounts
-    return sorted_group
+    return sorted_group['Amount']
 
 def get_subscription_candidates(df, groupby=['Description']):
     subscription_candidates = df.groupby(groupby).agg({

From 53163b487b2ed83480ef44a372a7a9747f867b56 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 11:02:13 -0500
Subject: [PATCH 19/29] Revert "fix(interpret): silence DeprecationWarning in
 cluster_amounts apply"

This reverts commit f0ccf7682ca480839dfa017fda1990929b3d67e7.
---
 interpret.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/interpret.py b/interpret.py
index 4cc469e..6164bc3 100644
--- a/interpret.py
+++ b/interpret.py
@@ -114,7 +114,7 @@ def cluster_amounts(group, threshold):
         new_amounts.extend([mean_val] * len(cluster))
         
     sorted_group['Amount'] = new_amounts
-    return sorted_group['Amount']
+    return sorted_group
 
 def get_subscription_candidates(df, groupby=['Description']):
     subscription_candidates = df.groupby(groupby).agg({

From f7e88a1eefebc9f47c86505cd711b52006c3ca3b Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 11:02:39 -0500
Subject: [PATCH 20/29] fix(interpret): refactor amount clustering to use
 transform

- Renamed 'cluster_amounts' to '_cluster_amounts_series' and refactored it to operate on a pandas Series.
- Switched from 'groupby().apply()' to 'groupby().transform()' for amount clustering.
- This approach correctly updates the 'Amount' column group-wise, preserves DataFrame structure, and effectively silences the DeprecationWarning.
- Ensures forward compatibility and improved performance for this operation.
---
 interpret.py | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/interpret.py b/interpret.py
index 6164bc3..6dadc3b 100644
--- a/interpret.py
+++ b/interpret.py
@@ -71,16 +71,16 @@ def merge_similar_descriptions(df, threshold=0.7):
     df['Description'] = df['Description'].map(mapping)
     return df
 
-def cluster_amounts(group, threshold):
-    # group is a DataFrame subset (for one Description)
-    # We want to return the group with 'Amount' updated to the cluster mean
+def _cluster_amounts_series(s_amounts, threshold):
+    # s_amounts is a Series of amounts for a single Description group
     
-    if len(group) < 2:
-        return group
+    if len(s_amounts) < 2:
+        return s_amounts # Return original Series if not enough to cluster
         
     # Sort by Amount to ensure deterministic processing
-    sorted_group = group.sort_values('Amount')
-    amounts = sorted_group['Amount'].values
+    # Important: Operate on values, but preserve original index for returning Series
+    amounts = s_amounts.sort_values().values
+    original_index = s_amounts.sort_values().index
     
     clusters = [] # List of [values]
     if len(amounts) > 0:
@@ -108,13 +108,15 @@ def cluster_amounts(group, threshold):
         clusters.append(current_cluster)
     
     # Build a list of new amounts matching the sorted order
-    new_amounts = []
+    new_amounts_list = []
     for cluster in clusters:
         mean_val = np.mean(cluster)
-        new_amounts.extend([mean_val] * len(cluster))
+        new_amounts_list.extend([mean_val] * len(cluster))
         
-    sorted_group['Amount'] = new_amounts
-    return sorted_group
+    # Create a Series with the new amounts, aligned to the original index
+    # We sorted amounts, so we must re-align with original_index
+    clustered_series = pd.Series(new_amounts_list, index=original_index)
+    return clustered_series.reindex(s_amounts.index) # Reindex to original Series order
 
 def get_subscription_candidates(df, groupby=['Description']):
     subscription_candidates = df.groupby(groupby).agg({
@@ -184,7 +186,7 @@ def get_subscription_candidates(df, groupby=['Description']):
     # Cluster amounts within each Description group to isolate outliers
     if not df.empty:
         try:
-            df = df.groupby('Description', group_keys=False).apply(cluster_amounts, threshold=args.threshold)
+            df['Amount'] = df.groupby('Description')['Amount'].transform(_cluster_amounts_series, threshold=args.threshold)
         except Exception as e:
             if args.debug:
                 print(f"Error during amount clustering: {e}")

From cc5bfa61da799c25bbeada0a10b6d084ed5ac124 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 11:07:41 -0500
Subject: [PATCH 21/29] feat(interpret): add support for ignoring vendors via
 external file

- Created 'ignore_subscriptions.example.txt' with sample vendors.
- Added 'ignore_subscriptions.txt' to .gitignore for custom user ignores.
- Updated 'interpret.py' to load ignore patterns from '--ignore-file' (default 'ignore_subscriptions.txt').
- Implemented filtering logic to exclude transactions where the normalized description matches any ignored pattern.
---
 .gitignore                       |  2 +-
 ignore_subscriptions.example.txt | 10 ++++++++++
 interpret.py                     |  4 ++++
 3 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 ignore_subscriptions.example.txt

diff --git a/.gitignore b/.gitignore
index b65434b..42b284d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,4 +16,4 @@ venv/
 .DS_Store
 Thumbs.db
 
-/reports/*[!.gitkeep]
\ No newline at end of file
+/reports/*[!.gitkeep]ignore_subscriptions.txt
diff --git a/ignore_subscriptions.example.txt b/ignore_subscriptions.example.txt
new file mode 100644
index 0000000..226e406
--- /dev/null
+++ b/ignore_subscriptions.example.txt
@@ -0,0 +1,10 @@
+# Add exact or partial vendor names to ignore (case-insensitive)
+# One entry per line
+Whole Foods
+Trader Joe's
+Safeway
+Publix
+Costco
+Walmart
+Target
+Amazon
diff --git a/interpret.py b/interpret.py
index 6dadc3b..7b7e378 100644
--- a/interpret.py
+++ b/interpret.py
@@ -183,6 +183,10 @@ def get_subscription_candidates(df, groupby=['Description']):
     # Merge similar descriptions (fuzzy matching)
     df = merge_similar_descriptions(df)
 
+    # Load ignore patterns and filter
+    ignore_patterns = load_ignore_patterns(args.ignore_file)
+    df = filter_ignored_vendors(df, ignore_patterns)
+
     # Cluster amounts within each Description group to isolate outliers
     if not df.empty:
         try:

From ea807cd002912b2802e0651094b10cf2e9d02323 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 11:08:10 -0500
Subject: [PATCH 22/29] feat(interpret): add support for ignoring vendors via
 external file

- Created 'ignore_subscriptions.example.txt' with sample vendors.
- Added 'ignore_subscriptions.txt' to .gitignore for custom user ignores.
- Updated 'interpret.py' to load ignore patterns from '--ignore-file' (default 'ignore_subscriptions.txt').
- Implemented filtering logic to exclude transactions where the normalized description matches any ignored pattern.
---
 interpret.py | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/interpret.py b/interpret.py
index 7b7e378..153d509 100644
--- a/interpret.py
+++ b/interpret.py
@@ -22,6 +22,47 @@
 
 file_path = args.file_path
 
+def load_ignore_patterns(ignore_file_path):
+    patterns = []
+    try:
+        with open(ignore_file_path, 'r', encoding='utf-8') as f:
+            for line in f:
+                line = line.strip()
+                if line and not line.startswith('#'):
+                    # Normalize the pattern just like we normalize descriptions
+                    # This ensures "kroger" matches "KROGER"
+                    patterns.append(normalize_description(line))
+    except FileNotFoundError:
+        pass # It's okay if the file doesn't exist
+    return patterns
+
+def filter_ignored_vendors(df, ignore_patterns):
+    if not ignore_patterns:
+        return df
+    
+    initial_count = len(df)
+    
+    # We want to drop rows where the Description contains any of the ignore patterns
+    # Since descriptions are already normalized, we check for substring existence
+    
+    import re
+    # patterns are already normalized (UPPERCASE, etc). 
+    escaped_patterns = [re.escape(p) for p in ignore_patterns]
+    full_pattern = '|'.join(escaped_patterns)
+    
+    if not full_pattern:
+        return df
+
+    # Filter: Keep rows where Description DOES NOT contain the pattern
+    # Use str.contains with regex=True
+    df_filtered = df[~df['Description'].str.contains(full_pattern, case=True, regex=True)]
+    
+    removed_count = initial_count - len(df_filtered)
+    if args.debug and removed_count > 0:
+        print(f"Ignored {removed_count} transactions matching {len(ignore_patterns)} patterns from '{args.ignore_file}'.")
+        
+    return df_filtered
+
 def find_data_start(file_path):
     with open(file_path, 'r', encoding='utf-8') as file:
         for i, line in enumerate(file):

From f9600525c6e3f9e449a6e2f19d22cc3e13d229e6 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 11:08:33 -0500
Subject: [PATCH 23/29] fix(interpret): add missing --ignore-file argument
 definition

- Fixed AttributeError by properly adding the '--ignore-file' argument to the argparse parser.
- Ensures the ignore logic can access the specified file path.
---
 interpret.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/interpret.py b/interpret.py
index 153d509..60a27e1 100644
--- a/interpret.py
+++ b/interpret.py
@@ -18,6 +18,8 @@
                     help='Minimum absolute transaction amount to consider for a subscription. Default is 10.0.')
 parser.add_argument('--max-transaction-amount', type=float, default=10000.0,
                     help='Maximum absolute transaction amount to consider for a subscription. Default is 10000.0 (i.e., $10,000).')
+parser.add_argument('--ignore-file', type=str, default='ignore_subscriptions.txt',
+                    help='Path to a file containing vendor names to ignore (one per line).')
 args = parser.parse_args()
 
 file_path = args.file_path

From d692e5f4847e0a6ffcff9c837a6b9bdc98168043 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 11:11:27 -0500
Subject: [PATCH 24/29] adding ignores functionality

---
 .gitignore                       | 5 +++++
 ignore_subscriptions.example.txt | 4 +---
 ignore_subscriptions.txt         | 2 ++
 3 files changed, 8 insertions(+), 3 deletions(-)
 create mode 100644 ignore_subscriptions.txt

diff --git a/.gitignore b/.gitignore
index 42b284d..a1203fd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,3 +17,8 @@ venv/
 Thumbs.db
 
 /reports/*[!.gitkeep]ignore_subscriptions.txt
+
+# ignore customized ignores file
+!ignore_subscriptions.example.txt
+ignore_subscriptions.txt
+
diff --git a/ignore_subscriptions.example.txt b/ignore_subscriptions.example.txt
index 226e406..b9f6c5e 100644
--- a/ignore_subscriptions.example.txt
+++ b/ignore_subscriptions.example.txt
@@ -4,7 +4,5 @@ Whole Foods
 Trader Joe's
 Safeway
 Publix
-Costco
 Walmart
-Target
-Amazon
+Target
\ No newline at end of file
diff --git a/ignore_subscriptions.txt b/ignore_subscriptions.txt
new file mode 100644
index 0000000..2cf3995
--- /dev/null
+++ b/ignore_subscriptions.txt
@@ -0,0 +1,2 @@
+Kroger
+SPOTTY DOG ICE CREAM

From 2c782ee841fe5610d7ccf7029cc8091437473501 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 11:15:16 -0500
Subject: [PATCH 25/29] perf(interpret): optimize fuzzy matching complexity

- Implemented bucketing by first character in 'merge_similar_descriptions' to reduce search space from O(N) to O(N/26) for each iteration.
- Added a length-based heuristic check to skip expensive 'difflib.SequenceMatcher' calculations if the maximum possible ratio is below the threshold.
- Significantly reduces processing time for large datasets with many unique vendor descriptions.
---
 interpret.py | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/interpret.py b/interpret.py
index 60a27e1..5cde384 100644
--- a/interpret.py
+++ b/interpret.py
@@ -79,6 +79,7 @@ def merge_similar_descriptions(df, threshold=0.7):
     """
     Groups similar descriptions using fuzzy matching and prefix checking.
     Prioritizes shorter names as representatives (e.g., "TRUIST" over "TRUIST LN...").
+    Optimized with first-char bucketing and length heuristics.
     """
     if df.empty:
         return df
@@ -88,11 +89,20 @@ def merge_similar_descriptions(df, threshold=0.7):
     sorted_descs = sorted(unique_descs, key=len)
     
     mapping = {}
-    reps = []
+    # Partition reps by their starting character for O(N^2/C) speedup
+    reps_by_char = {}
     
     for desc in sorted_descs:
         match = None
-        for rep in reps:
+        if not desc:
+            continue
+            
+        first_char = desc[0]
+        
+        # Only check against reps starting with the same character
+        potential_reps = reps_by_char.get(first_char, [])
+        
+        for rep in potential_reps:
             # Check 1: Prefix match (strong signal)
             # e.g., "TRUIST" matches "TRUIST LN..."
             if desc.startswith(rep + " "):
@@ -100,6 +110,13 @@ def merge_similar_descriptions(df, threshold=0.7):
                 break
             
             # Check 2: Fuzzy match
+            # Optimization: Quick length check
+            # ratio = 2*M / (len(a) + len(b)). Max M = len(rep) (since rep is shorter/equal)
+            # If max possible ratio <= threshold, skip expensive difflib
+            max_possible_ratio = 2 * len(rep) / (len(rep) + len(desc))
+            if max_possible_ratio <= threshold:
+                continue
+
             ratio = difflib.SequenceMatcher(None, rep, desc).ratio()
             if ratio > threshold:
                 match = rep
@@ -108,7 +125,9 @@ def merge_similar_descriptions(df, threshold=0.7):
         if match:
             mapping[desc] = match
         else:
-            reps.append(desc)
+            if first_char not in reps_by_char:
+                reps_by_char[first_char] = []
+            reps_by_char[first_char].append(desc)
             mapping[desc] = desc
             
     df['Description'] = df['Description'].map(mapping)

From 06fe24f2f00ef3e13b2fca0c7e13e8b823ca6796 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 11:23:35 -0500
Subject: [PATCH 26/29] fix(interpret): revert bucketing optimization to
 restore fuzzy matching accuracy

- Removed first-character bucketing from 'merge_similar_descriptions' as it prevented matching variations with different prefixes (e.g., 'COFBNDRCT' vs 'SP COFBNDRCT').
- Retained the length-based heuristic optimization to maintain reasonable performance gains.
- Restores the detection of 'COFBNDRCT' subscriptions.
---
 interpret.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/interpret.py b/interpret.py
index 5cde384..e3f2db1 100644
--- a/interpret.py
+++ b/interpret.py
@@ -79,7 +79,7 @@ def merge_similar_descriptions(df, threshold=0.7):
     """
     Groups similar descriptions using fuzzy matching and prefix checking.
     Prioritizes shorter names as representatives (e.g., "TRUIST" over "TRUIST LN...").
-    Optimized with first-char bucketing and length heuristics.
+    Optimized with length heuristics.
     """
     if df.empty:
         return df
@@ -89,20 +89,14 @@ def merge_similar_descriptions(df, threshold=0.7):
     sorted_descs = sorted(unique_descs, key=len)
     
     mapping = {}
-    # Partition reps by their starting character for O(N^2/C) speedup
-    reps_by_char = {}
+    reps = []
     
     for desc in sorted_descs:
         match = None
         if not desc:
             continue
             
-        first_char = desc[0]
-        
-        # Only check against reps starting with the same character
-        potential_reps = reps_by_char.get(first_char, [])
-        
-        for rep in potential_reps:
+        for rep in reps:
             # Check 1: Prefix match (strong signal)
             # e.g., "TRUIST" matches "TRUIST LN..."
             if desc.startswith(rep + " "):
@@ -125,9 +119,7 @@ def merge_similar_descriptions(df, threshold=0.7):
         if match:
             mapping[desc] = match
         else:
-            if first_char not in reps_by_char:
-                reps_by_char[first_char] = []
-            reps_by_char[first_char].append(desc)
+            reps.append(desc)
             mapping[desc] = desc
             
     df['Description'] = df['Description'].map(mapping)

From b878def24f8637e7d57ec7097334d34f9a4e5a05 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 11:28:23 -0500
Subject: [PATCH 27/29] docs: update README with new features and CLI arguments

- Documented all new command-line arguments (--threshold, --recency-days, --min/max-transaction-amount, --ignore-file, --debug).
- Added section on how to use the ignore file.
- Added 'How It Works' section explaining the normalization, fuzzy matching, and clustering pipeline.
---
 README.md | 42 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 0d87b17..b5214cb 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,45 @@ To set up the project, follow these steps:
 To use the script, run the following command:
 
 ```bash
-python interpret.py reports/financial_reports.csv
+python interpret.py <path_to_csv_file> [options]
 ```
 
-Replace `reports/financial_reports.csv` with the path to your CSV file.
+**Example:**
+```bash
+python interpret.py reports/financial_reports.csv --recency-days 120 --threshold 0.2
+```
+
+### Command-line Arguments
+
+| Argument | Short | Default | Description |
+| :--- | :--- | :--- | :--- |
+| `file_path` | | | Path to the CSV file to analyze (Required). |
+| `--threshold` | `-t` | `0.15` | Percentage threshold (0.0-1.0) for clustering similar transaction amounts. |
+| `--recency-days` | `-r` | `90` | Number of days from the latest transaction date to consider a subscription "active". |
+| `--min-transaction-amount` | | `10.0` | Minimum absolute transaction amount to consider. |
+| `--max-transaction-amount` | | `10000.0` | Maximum absolute transaction amount to consider. |
+| `--ignore-file` | | `ignore_subscriptions.txt` | Path to a text file containing vendor names to ignore. |
+| `--debug` | `-d` | `False` | Enable verbose debug output. |
+
+### Ignoring Vendors
+
+You can exclude specific vendors or transactions by adding their names to a text file (default: `ignore_subscriptions.txt`).
+- One vendor per line.
+- Supports partial matching (e.g., "Grocery" will ignore "Joe's Grocery Store").
+- Case-insensitive.
+
+Example `ignore_subscriptions.txt`:
+```text
+Whole Foods
+Starbucks
+One-time transfer
+```
+
+## How It Works
+
+1. **Parses & Normalizes:** Reads the CSV, detects column names automatically (multilingual support), and normalizes vendor descriptions (removes location data, special characters, etc.).
+2. **Fuzzy Matching:** Groups similar vendor names together (e.g., "Netflix.com" and "Netflix Inc") using sequence matching logic.
+3. **Ignores:** Filters out vendors listed in the ignore file.
+4. **Clusters Amounts:** Groups transactions from the same vendor that have similar amounts (within the specified `--threshold`) to handle small price variations or currency fluctuations. This also helps separate recurring payments from one-off outliers (like a large downpayment vs. a monthly fee).
+5. **Identifies Candidates:** Filters for recurring transactions (count > 1) that fall within the specified amount range and recency window.
+6. **Reports:** specific details about the potential subscriptions found, sorted by estimated yearly cost.

From abc5837a29392a41c819d03f5874079600558d76 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 11:29:54 -0500
Subject: [PATCH 28/29] docs: add CSV file format section to README

- Detailed the expected CSV columns: Date, Description, and Amount.
- Listed all recognized variations for each column, directly from utils.py's standard_columns.
- Explained the script's automatic column translation and unification capabilities.
- Improved clarity for users preparing input data.
---
 README.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/README.md b/README.md
index b5214cb..a7efa62 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,18 @@ To set up the project, follow these steps:
    pip install -r requirements.txt
    ```
 
+## CSV File Format
+
+The script expects a CSV file containing transaction data. It automatically identifies and maps column headers to standard names, supporting various linguistic and formatting differences.
+
+The essential columns and their recognized variations are:
+
+- **Date**: (`date`, `datum`, `fecha`, `data`) - The date of the transaction.
+- **Description**: (`description`, `desc`, `descripción`, `bezeichnung`, `opis`, `payee`) - A textual description of the transaction or vendor.
+- **Amount**: (`amount`, `amt`, `importe`, `betrag`, `kwota`, `sum`, `outflow`) - The transaction amount. Note: the script handles currency symbols and different decimal/thousands separators.
+
+The script also supports automatic language detection for column headers and will translate them to English before processing.
+
 ## Usage
 
 To use the script, run the following command:

From 28744aebc2eed1f878760ee3cf93f4ed6db7e130 Mon Sep 17 00:00:00 2001
From: Greg Randall <gregrr@gmail.com>
Date: Tue, 16 Dec 2025 11:32:56 -0500
Subject: [PATCH 29/29] formatting

---
 README.md | 150 +++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 115 insertions(+), 35 deletions(-)

diff --git a/README.md b/README.md
index a7efa62..b98937a 100644
--- a/README.md
+++ b/README.md
@@ -1,63 +1,120 @@
+
 # Subscription Finder Python Script
 
+  
+
 This Python script is designed to help users find and manage their subscriptions.
 
+  
+
 ## Setup
 
+  
+
 To set up the project, follow these steps:
 
-1. **Clone the repository:**
-    ```bash
-    git clone <repository-url>
-    cd <repository-name>
-    ```
-
-2. **Create a virtual environment:** 
-   
-   Windows CMD:
-    ```cmd
-    python -m venv venv
-    venv\Scripts\activate
-    ```
-
-    Bash:
-    ```bash
-    python3 -m venv venv
-    source venv/bin/activate
-    ```
-    
-3. **Install the dependencies:**
-   ```bash
-   pip install -r requirements.txt
-   ```
+  
+
+1.  **Clone the repository:**
+
+```bash
+
+git clone <repository-url>
+
+cd <repository-name>
+
+```
+
+  
+
+2.  **Create a virtual environment:**
+
+Windows CMD:
+
+```cmd
+
+python -m venv venv
+
+venv\Scripts\activate
+
+```
+
+  
+
+Bash:
+
+```bash
+
+python3 -m venv venv
+
+source venv/bin/activate
+
+```
+
+3.  **Install the dependencies:**
+
+```bash
+
+pip install -r requirements.txt
+
+```
+
+  
 
 ## CSV File Format
 
+  
+
 The script expects a CSV file containing transaction data. It automatically identifies and maps column headers to standard names, supporting various linguistic and formatting differences.
 
+  
+
 The essential columns and their recognized variations are:
 
-- **Date**: (`date`, `datum`, `fecha`, `data`) - The date of the transaction.
-- **Description**: (`description`, `desc`, `descripción`, `bezeichnung`, `opis`, `payee`) - A textual description of the transaction or vendor.
-- **Amount**: (`amount`, `amt`, `importe`, `betrag`, `kwota`, `sum`, `outflow`) - The transaction amount. Note: the script handles currency symbols and different decimal/thousands separators.
+  
+
+-  **Date**: (`date`, `datum`, `fecha`, `data`) - The date of the transaction.
+
+-  **Description**: (`description`, `desc`, `descripción`, `bezeichnung`, `opis`, `payee`) - A textual description of the transaction or vendor.
+
+-  **Amount**: (`amount`, `amt`, `importe`, `betrag`, `kwota`, `sum`, `outflow`) - The transaction amount. Note: the script handles currency symbols and different decimal/thousands separators.
+
+  
 
 The script also supports automatic language detection for column headers and will translate them to English before processing.
 
+  
+
 ## Usage
 
+  
+
 To use the script, run the following command:
 
+  
+
 ```bash
-python interpret.py <path_to_csv_file> [options]
+
+python  interpret.py <path_to_csv_file> [options]
+
 ```
 
+  
+
 **Example:**
+
 ```bash
-python interpret.py reports/financial_reports.csv --recency-days 120 --threshold 0.2
+
+python  interpret.py  reports/financial_reports.csv  --recency-days  120  --threshold  0.2
+
 ```
 
+  
+
 ### Command-line Arguments
 
+  
+
 | Argument | Short | Default | Description |
 | :--- | :--- | :--- | :--- |
 | `file_path` | | | Path to the CSV file to analyze (Required). |
@@ -68,25 +125,48 @@ python interpret.py reports/financial_reports.csv --recency-days 120 --threshold
 | `--ignore-file` | | `ignore_subscriptions.txt` | Path to a text file containing vendor names to ignore. |
 | `--debug` | `-d` | `False` | Enable verbose debug output. |
 
+  
+
 ### Ignoring Vendors
 
+  
+
 You can exclude specific vendors or transactions by adding their names to a text file (default: `ignore_subscriptions.txt`).
+
 - One vendor per line.
+
 - Supports partial matching (e.g., "Grocery" will ignore "Joe's Grocery Store").
+
 - Case-insensitive.
 
+  
+
 Example `ignore_subscriptions.txt`:
+
 ```text
+
 Whole Foods
+
 Starbucks
+
 One-time transfer
+
 ```
 
+  
+
 ## How It Works
 
-1. **Parses & Normalizes:** Reads the CSV, detects column names automatically (multilingual support), and normalizes vendor descriptions (removes location data, special characters, etc.).
-2. **Fuzzy Matching:** Groups similar vendor names together (e.g., "Netflix.com" and "Netflix Inc") using sequence matching logic.
-3. **Ignores:** Filters out vendors listed in the ignore file.
-4. **Clusters Amounts:** Groups transactions from the same vendor that have similar amounts (within the specified `--threshold`) to handle small price variations or currency fluctuations. This also helps separate recurring payments from one-off outliers (like a large downpayment vs. a monthly fee).
-5. **Identifies Candidates:** Filters for recurring transactions (count > 1) that fall within the specified amount range and recency window.
-6. **Reports:** specific details about the potential subscriptions found, sorted by estimated yearly cost.
+  
+
+1.  **Parses & Normalizes:** Reads the CSV, detects column names automatically (multilingual support), and normalizes vendor descriptions (removes location data, special characters, etc.).
+
+2.  **Fuzzy Matching:** Groups similar vendor names together (e.g., "Netflix.com" and "Netflix Inc") using sequence matching logic.
+
+3.  **Ignores:** Filters out vendors listed in the ignore file.
+
+4.  **Clusters Amounts:** Groups transactions from the same vendor that have similar amounts (within the specified `--threshold`) to handle small price variations or currency fluctuations. This also helps separate recurring payments from one-off outliers (like a large downpayment vs. a monthly fee).
+
+5.  **Identifies Candidates:** Filters for recurring transactions (count > 1) that fall within the specified amount range and recency window.
+
+6.  **Reports:** specific details about the potential subscriptions found, sorted by estimated yearly cost.
\ No newline at end of file