python-automation-agent/spreadsheet_reader.py at main · a-sehic-dev/python-automation-agent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
"""
Spreadsheet Reader Module
-------------------------
Reads Excel spreadsheet data using pandas.
Provides a clean interface for the automation agent to fetch rows.
"""

import pandas as pd
from pathlib import Path


def read_spreadsheet(file_path: str = "data.xlsx") -> pd.DataFrame:
    """
    Read an Excel spreadsheet and return its contents as a DataFrame.

    Args:
        file_path: Path to the Excel file (default: data.xlsx)

    Returns:
        DataFrame containing all rows from the spreadsheet

    Raises:
        FileNotFoundError: If the spreadsheet file does not exist
    """
    path = Path(file_path)
    if not path.exists():
        raise FileNotFoundError(f"Spreadsheet not found: {file_path}")

    df = pd.read_excel(file_path, engine="openpyxl")
    return df


def get_rows_as_dicts(file_path: str = "data.xlsx") -> list[dict]:
    """
    Read the spreadsheet and return each row as a dictionary.
    Useful for iterating over rows in the automation agent.

    Args:
        file_path: Path to the Excel file (default: data.xlsx)

    Returns:
        List of dictionaries, one per row
    """
    df = read_spreadsheet(file_path)
    # Replace NaN with None for cleaner handling
    df = df.where(pd.notnull(df), None)
    return df.to_dict(orient="records")


def build_search_query(row: dict) -> str:
    """
    Build a search query string from a row's name and company fields.
    Used by the automation agent to perform web searches.

    Args:
        row: Dictionary representing a spreadsheet row

    Returns:
        Search query string (e.g., "John Smith Acme Corp")
    """
    name = row.get("name") or ""
    company = row.get("company") or ""
    parts = [str(p).strip() for p in [name, company] if p]
    return " ".join(parts) if parts else ""