Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions module-2/lab-ufo/heatmap.html

Large diffs are not rendered by default.

397 changes: 387 additions & 10 deletions module-2/lab-ufo/ufos.ipynb

Large diffs are not rendered by default.

871 changes: 871 additions & 0 deletions module-2/python-bi-project/BI Project.ipynb

Large diffs are not rendered by default.

Binary file not shown.
4,980 changes: 4,980 additions & 0 deletions module-2/python-bi-project/df_raw.csv

Large diffs are not rendered by default.

35 changes: 35 additions & 0 deletions module-2/python-bi-project/metric_df.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
,local_comp,dist_param
"New York, USA",127,0.7202062041288978
"San Francisco, USA",127,0.8094719621596773
"London, GBR",86,0.6087381051802471
"Chicago, USA",32,0.3605870092451125
"San Jose, USA",31,0.7936718628305861
"Tokyo, JPN",29,0.1
"Paris, FRA",29,0.5689527779835312
"Seattle, USA",25,0.1
"Beijing, CHN",21,0.1
"Austin, USA",20,0.1
"Los Angeles, USA",20,0.562872573904924
"San Mateo, USA",20,0.8183716195267812
"Singapore, SGP",18,0.1
"Shanghai, CHN",18,0.1
"Mountain View, USA",17,0.823823967045704
"Cambridge, USA",17,0.6086369769434281
"Palo Alto, USA",16,0.8237324609881684
"San Diego, USA",15,0.4185381440838583
"Santa Clara, USA",15,0.812496692923227
"Sunnyvale, USA",14,0.8235792639325478
"Bangalore, IND",14,0.1
"Madrid, ESP",14,0.1
"Berlin, DEU",14,0.2923575960956589
"Chennai, IND",13,0.1
"Boston, USA",12,0.608420594056505
"Toronto, CAN",12,0.45024313014705203
"Fremont, USA",12,0.8220903454219931
"Santa Monica, USA",12,0.569035745488191
"Mumbai, IND",11,0.1
"Irvine, USA",11,0.5226334520222252
"Amsterdam, NLD",11,0.5505807400621622
"Atlanta, USA",11,0.24224177569067704
"Scottsdale, USA",11,0.42199717629140016
"Reston, USA",10,0.533608118658283
90 changes: 90 additions & 0 deletions module-2/python-bi-project/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import pandas as pd
import math
from pymongo import MongoClient


import matplotlib.pyplot as plt
import seaborn as sns


def distance(origin, destination):
lat1, lon1 = origin
lat2, lon2 = destination
radius = 6371 # km

dlat = math.radians(lat2-lat1)
dlon = math.radians(lon2-lon1)
a = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \
* math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2)
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
d = radius * c

return abs(d)



def top_cities(df):
cities = [str(c['city'])+", "+str(c['country_code']) for i,e in df.iterrows() for c in e.offices if c['city'] != '' and c['country_code'] != '']
s = pd.Series(cities).value_counts()
return {k:v for k,v in s.to_dict().items() if v >= 10}



def get_dist_param(df):
distances_metric = []

for i,r in df.iterrows():
for e in r['offices']:
for k,v in major_cities.items():
if e['latitude'] and e['longitude'] and e['city']:
if distance(v, (e['latitude'], e['longitude']) ) <= 1000:
d_n = 1 - distance( v, (e['latitude'], e['longitude']))/1000
distances_metric.append({k: d_n})


dist_scores = []
for k in major_cities.keys():
if len([d.get(k) for d in distances_metric if k in d.keys()]) >= 100:
dist_scores.append({k:sum([d.get(k) for d in distances_metric if k in d.keys()])/len([d.get(k) for d in distances_metric if k in d.keys()])})
else:
dist_scores.append({k:0.1})

return {k:v for e in dist_scores for k,v in e.items()}



def metric_score(df):
n = df['local_comp']
c = df['dist_param']
cost = df['cost_param']

score = (1000000.*math.log(n, 100.)*math.log10(n)*(c**6)*math.pow(100., c**8))/(cost**0.2*math.log10(cost)**12)

return score



major_cities = {'New York': (40.7128, -74.0060), 'San Francisco': (37.7749, -122.4194), 'London': (51.5074, 0.1278),
'Chicago': (41.8781, -87.6298), 'San Jose': (37.3382, -121.8863), 'Tokyo': (35.6762, 139.6503),
'Paris': (48.8566, 2.3522), 'Seattle': (47.6062, -122.3321), 'Beijing': (39.9042, 116.4074),
'Austin': (30.2672, -97.7431), 'Los Angeles': (34.0522, -118.2437), 'San Mateo': (37.5630, -122.3255),
'Singapore': (1.3667, 103.8198), 'Shanghai': (31.2222195, 121.4580612), 'Mountain View': (37.3861, -122.0839),
'Cambridge': (42.3736, -71.1097), 'Palo Alto': (37.4419, -122.1430), 'San Diego': (32.7157, -117.1611),
'Santa Clara': (37.3541, -121.9552), 'Sunnyvale': (37.3688, -122.0363), 'Bangalore': (12.9716, 77.5946),
'Madrid': (40.4168, 3.7038), 'Berlin': (52.5200, 13.4050), 'Chennai': (13.0827, 80.2707),
'Boston': (42.3601, -71.0589), 'Toronto': (43.6532, -79.3832), 'Fremont': (37.5485, -121.9886),
'Santa Monica': (34.0195, -118.4912), 'Mumbai': (19.0760, 72.8777), 'Irvine': (33.6846, -117.8265),
'Amsterdam': (52.3667, 4.8945), 'Atlanta': (33.7490, -84.3880), 'Scottsdale': (33.4942, -111.9261),
'Reston': (38.9586, -77.3570)}



cost_cities = {'New York': 187.2, 'San Francisco': 269.3, 'London': 187.2*0.75, 'Chicago': 106.9,
'San Jose': 214.5, 'Tokyo': 187.2*2/3, 'Paris': 187.2*5/6, 'Seattle': 172.3,
'Beijing': 187.2/2, 'Austin': 119.3, 'Los Angeles': 173.3, 'San Mateo': 270.6,
'Singapore': 187.2*3/2, 'Shanghai': 187.2*0.634, 'Mountain View': 315.4, 'Cambridge': 181.8,
'Palo Alto': 471.0, 'San Diego': 160.1, 'Santa Clara': 250.0, 'Sunnyvale': 300.1,
'Bangalore': 187.2/10, 'Madrid': 187.2/2, 'Berlin': 187.2/2, 'Chennai': 187.2/15,
'Boston': 162.4, 'Toronto': 187.2*0.89, 'Fremont': 227.3, 'Santa Monica': 304.5,
'Mumbai': 187.2/4, 'Irvine': 187.1, 'Amsterdam': 187.2*0.54, 'Atlanta': 107.5,
'Scottsdale': 133.2, 'Reston': 138.9}