-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathd1rct.py
More file actions
59 lines (47 loc) · 2.05 KB
/
d1rct.py
File metadata and controls
59 lines (47 loc) · 2.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import fastf1 as f1
import pandas as pd
from datetime import datetime
import pyarrow
import pyarrow._hdfs as pahdfs
import tempfile
from hdfs import InsecureClient
# Create a temporary directory for caching
temp_cache_dir = tempfile.gettempdir()
# Disable caching by setting a temporary directory
f1.Cache.enable_cache(temp_cache_dir)
# Specify the seasons you want to fetch data for
seasons = [2022, 2023, 2024] # this doesn't work so manually enter the seasons and remove which ever.
def fetch_season_data(season):
# Get all races in the season
races = f1.get_event_schedule(season)
for index, race in races.iterrows():
event = race['EventName']
race_date = race['EventDate']
year = race['EventDate'].year
round_number = race['RoundNumber']
print(f"Fetching data for {event} - Round {round_number}, {year}")
# Skip testing sessions
if 'testing' in event.lower() or 'pre-season' in event.lower():
print(f"Skipping testing event: {event}")
continue
# Fetch session data
session = f1.get_session(year, round_number, 'R') # 'R' stands for Race session
session.load() # Load all available data
# Check if the session has lap data
if session.laps is not None and not session.laps.empty:
df = session.laps # Access lap data
# Convert to pandas DataFrame if needed
df = pd.DataFrame(df)
# Store to HDFS
save_to_hdfs(df, season, event)
else:
print(f"No lap data available for {event}. Session status: {session.status}")
def save_to_hdfs(df, season, event):
hdfs_path = f'/user/f1/data/{season}/{event}/laps.csv'
client = InsecureClient('http://localhost:9870', user='hadoop')
csv_data = df.to_csv(index=False).encode('utf-8')
client.write(hdfs_path, csv_data, overwrite=True)
print(f"Data for {event} saved to HDFS at {hdfs_path}")
# Loop through all specified seasons
for season in seasons:
fetch_season_data(season)