Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
8 changes: 8 additions & 0 deletions button_2/classes/data/button_dat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import pandas as pd

class ButtonDat:
def __init__(self):
self.edges_df = pd.read_csv('button_2/data/edges.csv')
self.nodes_df = pd.read_csv('button_2/data/nodes.csv')
self.text_df = pd.read_csv('button_2/data/text.csv')
self.employee_df = pd.read_csv('button_2/data/employee.csv')
14 changes: 14 additions & 0 deletions button_2/classes/data/data_updater.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from classes.data.gs_scraper import GsScraper

class DataUpdater:
def __init__(self):
self.edges = GsScraper('edges')
self.nodes = GsScraper('nodes')
self.text = GsScraper('text')
self.employee = GsScraper('employee')

def update_all(self):
self.edges.df.to_csv('button_2/data/edges.csv', index=False)
self.nodes.df.to_csv('button_2/data/nodes.csv', index=False)
self.text.df.to_csv('button_2/data/text.csv', index=False)
self.employee.df.to_csv('button_2/data/employee.csv', index=False)
121 changes: 121 additions & 0 deletions button_2/classes/data/gs_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
"""
GsScraper - Google Sheets Data Connector with Retry Logic
=========================================================

This module provides robust Google Sheets integration for the text-based
adventure game, with automatic retry logic and rate limiting to handle
API restrictions and network issues gracefully.

The GsScraper class serves as the primary data connector, converting Google
Sheets tabs into pandas DataFrames while handling common failure modes
like rate limiting, network timeouts, and temporary service unavailability.

Classes:
GsScraper: Google Sheets scraper with exponential backoff retry logic

Environment Requirements:
- BUTTON_SHEET_ID: Main Google Sheets document identifier
- BUTTON_SHEET_EDGES_GID: GID for edges data tab
- BUTTON_SHEET_NODES_GID: GID for nodes data tab
- BUTTON_SHEET_TEXT_GID: GID for text content tab
- BUTTON_SHEET_EMPLOYEE_GID: GID for employee data tab

Key Features:
- Automatic retry with exponential backoff
- Environment-specific .env file loading
- Comprehensive error handling and reporting
- Rate limiting protection for API compliance
"""

import pandas as pd
import os
from dotenv import load_dotenv
import time
from urllib.error import HTTPError
from pathlib import Path


# Load .env file from the button_2 directory (not root)
button_2_dir = Path(__file__).parent.parent.parent # Go up from classes/data to button_2/
env_path = button_2_dir / '.env'
load_dotenv(env_path) # Load environment variables from button_2/.env

# Google Sheets CSV export URL format:
# https://docs.google.com/spreadsheets/d/[SHEET_ID]/export?format=csv&gid=[SHEET_GID]

class GsScraper:
"""
Google Sheets data connector with robust error handling and retry logic.

GsScraper provides reliable access to Google Sheets data by implementing
exponential backoff retry logic and comprehensive error handling. It
automatically handles rate limiting, network issues, and temporary
service unavailability while providing clear error messages for
configuration problems.

The class supports multiple sheet tabs within a single Google Sheets
document, using environment variables to map sheet names to their
specific GID identifiers.

Attributes:
df (pd.DataFrame): Loaded data from the specified sheet
sheet_name (str): Name of the sheet tab being accessed

Supported Sheet Names:
- 'edges': State transition definitions
- 'nodes': Node content and configuration
- 'text': Reusable text snippets
- 'titles': Job title variations (future feature)

Environment Variables Required:
BUTTON_SHEET_ID: Main Google Sheets document ID
BUTTON_SHEET_*_GID: Individual tab GID for each sheet type
"""

def __init__(self, sheet_name):
"""
Initialize Google Sheets connection for specified tab.

Sets up the connection parameters and loads data from the specified
Google Sheets tab using the appropriate GID from environment variables.

Args:
sheet_name (str): Name of sheet tab to load
Must be one of: 'edges', 'nodes', 'text', 'titles'

Raises:
ValueError: If sheet_name is not recognized
EnvironmentError: If required environment variables are missing
ConnectionError: If data cannot be loaded after all retries
"""
# Load sheet configuration from environment variables
self._sheet_id = os.getenv("BUTTON_SHEET_ID")
if sheet_name == 'edges':
self._sheet_gid = os.getenv("BUTTON_SHEET_EDGES_GID")
elif sheet_name == 'nodes':
self._sheet_gid = os.getenv("BUTTON_SHEET_NODES_GID")
elif sheet_name == 'text':
self._sheet_gid = os.getenv("BUTTON_SHEET_TEXT_GID")
elif sheet_name == 'employee':
self._sheet_gid = os.getenv("BUTTON_SHEET_EMPLOYEE_GID")
else:
raise ValueError(f"Unknown sheet name: {sheet_name}. Must be one of: edges, nodes, text, employee")
self.data_url = f"https://docs.google.com/spreadsheets/d/{self._sheet_id}/export?format=csv&gid={self._sheet_gid}"

# Load data with retry logic for rate limiting
self.df = self._load_data_with_retry()

def _load_data_with_retry(self, max_retries=3, delay=1):
"""Load CSV data with retry logic for rate limiting"""
for attempt in range(max_retries):
try:
return pd.read_csv(self.data_url)
except HTTPError as e:
if e.code == 404 and attempt < max_retries - 1:
print(f"⚠️ Rate limited, retrying in {delay} seconds... (attempt {attempt + 1})")
time.sleep(delay)
delay *= 2 # Exponential backoff
else:
raise e
except Exception as e:
raise e
14 changes: 14 additions & 0 deletions button_2/data/edges.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
source,target,outro_text,desired
start_game,welcome,You begin your career in data.,True
welcome,onboarding,You proceed to onboarding.,True
onboarding,initiate_project,Well done for completing onboarding. Your button-pressing mastery qualifies you to begin consulting with TechCo stakeholders! ,True
initiate_project,source_data,"With a question defined by the stakeholder, you are ready to source the data. Leadership promise there will be cake after the analysis presentation.",True
source_data,transform_data,You find clearly-marked tables with interpretable column names.,True
transform_data,analyse_data,You architect a clean data lineage that pulls the sources together into datasets ready for analysis.,True
analyse_data,report_analytics,You build dashboards that answer the question the stakeholder framed.,True
report_analytics,decision_maker,You present your findings with flair in the boardroom to the stakeholders.,True
report_analytics,source_data,"During your presentation one of the stakeholders points out there's no integration of a data source that was never mentioned until now. You will need to go back to scouring through the names of databases in the datawarehouse to find the needle in the haystack desired column required, it likely doesn't have a very informative column name.",False
decision_maker,initiate_project,"When the analysis reaches leadership, they state it doesn't answer the question posed. An abortive blame game erupts over whether you misunderstood the question, your manager miscommunicated ir, or whether leadership failed to make their actual needs known. The upshot it is, you begin the analysis again.",False
analyse_data,transform_data,"Your analysis shows duplicates and missingness, empty bar plots, weird-looking scatter plots. Time to dive back into the data lineage spaghetti and figure out what went wrong.",False
decision_maker,end,Leadership declare the results satisfactory but there is a sense that this only sort-of answered the questions they had. There is no cake. You switch your professional profile to availabe for hire.,True
end,summary,,True
6 changes: 6 additions & 0 deletions button_2/data/employee.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
job_title,department
data scientist,data
data analyst,data analytics
data engineer,business intelligence
analytics engineer,data & analytics
business intelligence analyst,data science
12 changes: 12 additions & 0 deletions button_2/data/nodes.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
node,edge_selector,title_text,intro_text,event_text,pbn
start_game,start,PRESS A BUTTON NOW,Explore the lived experience of working in data.,,to enter the world of data
welcome,auto,Welcome!,Welcome employee #eiLaSi-LV-426-ekaCehT to your first day at TechCo! You are a data scientist. It is your job to press buttons. ,,to proceed to onboarding
onboarding,auto,Onboarding,Welcome employee #eiLaSi-LV-426-ekaCehT to onboarding at TechCo! You are a data scientist. It is your job to press buttons. ,,to complete onboarding
initiate_project,auto,A question that needs answering,"An executive at TechCo has a question that needs answering. You are told the data can be found in the data warehouse, no problem. Once you are done with the anlaysis, there will be cake.",,to consult stakeholders about their evidence-based needs
source_data,auto,Source the data,"The data warehouse is home to hundreds of databases. Each database is home to dozens of tables, each of which has hundreds of columns. There are billions of rows of data.",,to dive into the warehouse and live in matrix-scroll land of scrutinising seemingly endless columns and rows
transform_data,auto,Shape the data,"Now that you have figured out which data sources you need to extract from, you are ready to transform the data into analytic tables that will form the foundation of your shiny analyses. You are sure looking forward to that cake.",,to architect a data lineage
analyse_data,random,Analyse the data,"Finally, the fun part! Making pretty pictures out of data to answering questions in meaningful ways. Spurred on by the promise of cake, you lose yourself in the finesse of choosing colour palettes and developing visualisations that answer the multi-faceted question that was posed. You work late into the night.",,to build interactive data visualisations
report_analytics,random,Report your findings,"The big day has arrived! The boardroom is filled with middle-managers somewhat awfully assembled around croissants and tepid coffee. No cake yet, but getting ever closer.",,to report your findings
decision_maker,random,Decision maker assessment,You receive a calendar invite from your manager saying they have met with leadership and want to discuss how the analysis was received. There is no mention of cake.,,to meet with your manager about the outcome of the analysis and whether you will get cake
end,end,End game,"The project ends in a whimper, just a meeting with your manager. There was never going to be any cake. You resignedly flicked you professional profile to ""available for work"".",,to reflect on your time at TechCo
summary,end,Your TechCo experience,,,
5 changes: 5 additions & 0 deletions button_2/data/text.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
id_text,text_type,text_context,text
pbn,generic,input,Press a button now
edge_good,edge,edge_selector,This step went as expected.
edge_bad,edge,edge_selector,"Oh, no! Something isn't quite right."
employee_id,employee,,
Empty file added button_2/tests/data/__init__.py
Empty file.
17 changes: 17 additions & 0 deletions button_2/tests/data/test_button_dat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from button_2.classes.data.button_dat import ButtonDat

def test_button_dat_initialisation():
"""Test that ButtonDat initializes correctly with both datasets"""
game_data = ButtonDat()

# Test that both DataFrames exist
assert hasattr(game_data, 'edges_df'), "ButtonDat should have edges_df attribute"
assert hasattr(game_data, 'nodes_df'), "ButtonDat should have nodes_df attribute"
assert hasattr(game_data, 'text_df'), "ButtonDat should have text_df attribute"
assert hasattr(game_data, 'employee_df'), "ButtonDat should have employee_df attribute"

# Test DataFrames are not empty
assert not game_data.edges_df.empty, "Edges DataFrame should not be empty"
assert not game_data.nodes_df.empty, "Nodes DataFrame should not be empty"
assert not game_data.text_df.empty, "Text DataFrame should not be empty"
assert not game_data.employee_df.empty, "Employee DataFrame should not be empty"
11 changes: 11 additions & 0 deletions button_2/tests/data/test_gs_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from button_2.classes.data.gs_scraper import GsScraper

def test_gs_scraper_edges_non_empty():
gs_scraper = GsScraper('edges')
assert gs_scraper.df is not None, "GsScraper.df should not be None after initialization, sheet name likely misspecified or env vars missing"
assert not gs_scraper.df.empty, "GsScraper.df should not be empty after initialization, sheet name likely misspecified or env vars missing"

def test_gs_scraper_nodes_non_empty():
gs_scraper = GsScraper('nodes')
assert gs_scraper.df is not None, "GsScraper.df should not be None after initialization, sheet name likely misspecified or env vars missing"
assert not gs_scraper.df.empty, "GsScraper.df should not be empty after initialization, sheet name likely misspecified or env vars missing"
4 changes: 4 additions & 0 deletions button_2/update_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from classes.data.data_updater import DataUpdater

dat = DataUpdater()
dat.update_all()