-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
91 lines (66 loc) · 2.38 KB
/
utils.py
File metadata and controls
91 lines (66 loc) · 2.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python3
import os
import json
import csv
import requests
import pandas as pd
def get_csv_as_json(path_to_csv):
csv_pd = pd.read_csv(path_to_csv, sep=",")
return json.loads(csv_pd .to_json(orient='records'))
# write json to csv file
def json_to_csv_file(output_file, data):
# now we will open a file for writing
data_file = open(output_file, 'w')
# create the csv writer object
csv_writer = csv.writer(data_file)
# Counter variable used for writing
# headers to the CSV file
count = 0
for row in data:
if count == 0:
# Writing headers of CSV file
header = row.keys()
csv_writer.writerow(header)
count += 1
# Writing data of CSV file
csv_writer.writerow(row.values())
data_file.close()
def join_col(d1, idx_d2, k, col):
for row in d1:
if row[k] is not None:
#print("Record: ", idx_d2[row[k]])
if idx_d2.get(row[k]) is not None:
row[col] = idx_d2[row[k]][col]
else:
print(f"no match for '{k}'", row[k])
row[col] = None
#print(row)
return d1
def joiner(d1, d2, k, cols):
if k in d1[0].keys() and k in d2[0].keys():
# need to check for potential column clashes
# index d2 by key
d2_idx = {x[k]: x for x in d2}
for col in cols:
if col in d2[0].keys():
d1 = join_col(d1, d2_idx, k, col)
return d1
def fetch_json_from_endpoint(endpoint):
json_url = requests.get(endpoint)
return json_url.json()
def name_to_identifier(n):
return n.lower().replace(" ", "-").replace(",", "")
# test by calling something like
def testjoiner(cols):
d1 = pd.read_csv("./data/hubs.csv", sep=",")
jd1 = json.loads(d1.to_json(orient='records'))
# organisation_csv = os.environ.get("organisation_csv", "https://raw.githubusercontent.com/digital-land/organisation-dataset/main/collection/organisation.csv")
# d2 = pd.read_csv(organisation_csv, sep=",")
# jd2 = json.loads(d2.to_json(orient='records'))
d2 = pd.read_csv("./data/hubsv2.csv", sep=",")
jd2 = json.loads(d2.to_json(orient='records'))
new_data = joiner(jd1, jd2, 'informal-name', cols)
# print sample record
print(new_data[3])
# save to csv
# json_to_csv_file('data/rishi_data.csv', new_data)