-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspreadsheet_reader.py
More file actions
64 lines (49 loc) · 1.75 KB
/
spreadsheet_reader.py
File metadata and controls
64 lines (49 loc) · 1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
"""
Spreadsheet Reader Module
-------------------------
Reads Excel spreadsheet data using pandas.
Provides a clean interface for the automation agent to fetch rows.
"""
import pandas as pd
from pathlib import Path
def read_spreadsheet(file_path: str = "data.xlsx") -> pd.DataFrame:
"""
Read an Excel spreadsheet and return its contents as a DataFrame.
Args:
file_path: Path to the Excel file (default: data.xlsx)
Returns:
DataFrame containing all rows from the spreadsheet
Raises:
FileNotFoundError: If the spreadsheet file does not exist
"""
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"Spreadsheet not found: {file_path}")
df = pd.read_excel(file_path, engine="openpyxl")
return df
def get_rows_as_dicts(file_path: str = "data.xlsx") -> list[dict]:
"""
Read the spreadsheet and return each row as a dictionary.
Useful for iterating over rows in the automation agent.
Args:
file_path: Path to the Excel file (default: data.xlsx)
Returns:
List of dictionaries, one per row
"""
df = read_spreadsheet(file_path)
# Replace NaN with None for cleaner handling
df = df.where(pd.notnull(df), None)
return df.to_dict(orient="records")
def build_search_query(row: dict) -> str:
"""
Build a search query string from a row's name and company fields.
Used by the automation agent to perform web searches.
Args:
row: Dictionary representing a spreadsheet row
Returns:
Search query string (e.g., "John Smith Acme Corp")
"""
name = row.get("name") or ""
company = row.get("company") or ""
parts = [str(p).strip() for p in [name, company] if p]
return " ".join(parts) if parts else ""