-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcleaning.py
More file actions
39 lines (31 loc) · 1.08 KB
/
cleaning.py
File metadata and controls
39 lines (31 loc) · 1.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#%%
import pandas as pd
# %%
# Load the original CSV file
original_csv_file = 'unicorn_startup_companies_cleaned.csv'
df = pd.read_csv(original_csv_file)
# %%
# Remove '$' symbol from the "Valuation ($B)" column and convert it to numeric
df['Valuation ($B)'] = df['Valuation ($B)'].str.replace('$', '').astype(float)
# %%
# Name the numbering column as "ID" (you can use any name you prefer)
df = df.rename(columns={df.columns[0]: 'ID'})
# Save the dataset with the updated column name
df.to_csv(original_csv_file, index=False)
# %%
# Load the mapping from a CSV file
country_to_continent_df = pd.read_csv('Countries_Continents.csv')
# Merge the mapping with the main DataFrame based on the 'Country' column
df = df.merge(country_to_continent_df, on='Country', how='left')
#%%
df[df.isna().any(axis=1)]
#%%
df['Country'] = df['Country'].replace('United States', 'United States of America')
#%%
df
# %%
# Save the cleaned data to a new CSV file
cleaned_csv_file = 'unicorn_startup_companies_cleaned.csv'
df.to_csv(cleaned_csv_file, index=False)
print(f"Cleaned data saved to {cleaned_csv_file}")
# %%