-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpopweigh_grid_trajectories.py
More file actions
239 lines (185 loc) · 8.58 KB
/
popweigh_grid_trajectories.py
File metadata and controls
239 lines (185 loc) · 8.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 6 10:22:03 2022
@author: TuoVaisanen-e01
"""
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import argparse as ap
# set up argument parser
ap = argparse.ArgumentParser()
# Get grid file
ap.add_argument("-dg", "--divgrid", required=True,
help="Path to folder containing grid diversity history files. For example: /path/to/folder/")
# Get path to input file
ap.add_argument("-sg", "--somagrid", required=True,
help="Path to geopackage with grid history of Somali population count")
# Get path to input file
ap.add_argument("-eg", "--estogrid", required=True,
help="Path to geopackage with grid history of Estonian population count")
# Get path to input file
ap.add_argument("-fg", "--fingrid", required=True,
help="Path to geopackage with grid history of Finnish population count")
# Get path to input file
ap.add_argument("-hg", "--hmagrid", required=True,
help="Path to geopackage with grid history of total population count")
# Get path to output file
ap.add_argument("-o", "--output", required=True,
help="Path to output folder. For example: /path/to/folder/. This script assumes you have access to FOLK data within Fiona")
# parse arguments
args = vars(ap.parse_args())
def get_cell_trajectories(dataframe):
# first drop NaNs
stable = dataframe.dropna()
# get unstable grid cells from those not present in stable dataframe
unstable = dataframe[~dataframe['euref_250'].isin(stable['euref_250'].values.tolist())]
# get new and old cells
new = unstable.dropna(subset=['2019'])
old = unstable[~unstable['euref_250'].isin(new['euref_250'].values.tolist())]
# return dataframes
return stable, new, old
def get_popweigh_values(valdf, popdf, outputtype):
# initialize returnable dataframe
result = pd.DataFrame()
# loop over years in the data
for year in range(1987, 2020):
# get year as string
column = str(year)
# get population sum
pop = popdf[column].sum()
# get intermediate score
score = valdf[column] * popdf[column]
# get sum score
score = score.sum()
# get final score
score = score / pop
# add result in dataframe
result.at[year, 'score'] = score
result.at[year, 'year'] = year
result.at[year, 'type'] = outputtype
# return the population weighed dataframe
return result
# initialize a outputlist
outputlist = []
# set labels
labs = ['Unique languages','Shannon entropy']
# loop over files
for x, file in enumerate(['unique_langs','shannon']):
# read grid history files in
df = gpd.read_file(args['divgrid'] + '{}_grid_history.gpkg'.format(file))
sdf = gpd.read_file(args['somagrid'])
edf = gpd.read_file(args['estogrid'])
hma = gpd.read_file(args['hmagrid'])
fin = gpd.read_file(args['fingrid'])
# set somali annual range to start from 1992 to ensure enough observations
sdf = sdf.drop(columns=['1987','1988','1989','1990','1991',])
# drop all rows that have only zero observations throughout
# to remove cells without any somali or estonian speaking inhabitants
sdf = sdf.drop_duplicates(subset=list(sdf.columns[1:29]), keep=False)
edf = edf.drop_duplicates(subset=list(edf.columns[1:34]), keep=False)
fdf = fin.drop_duplicates(subset=list(fin.columns[1:34]), keep=False)
# replace zeros with nans for population count dataframes
edf = edf.replace(0, np.nan)
sdf = sdf.replace(0, np.nan)
fdf = fdf.replace(0, np.nan)
# get dataframe triplets
hma_ey, hma_new, hma_old = get_cell_trajectories(hma)
est_ey, est_new, est_old = get_cell_trajectories(edf)
som_ey, som_new, som_old = get_cell_trajectories(sdf)
fin_ey, fin_new, fin_old = get_cell_trajectories(fdf)
# result list
resultlist = []
# loop over dataframes
for i, data in enumerate([som_ey, som_new, som_old, est_ey, est_new,
est_old, fin_ey, fin_new, fin_old]):
# set year range
if i <= 2:
yrange = range(1992,2020)
elif i > 2:
yrange = range(1987,2020)
# loop over years
for year in yrange:
# get current year from pop and value dataframe
current = data[['euref_250', str(year)]]
vals = df[['euref_250', str(year)]]
# get grid ids from current year
gids = current['euref_250'].values.tolist()
# get value grids present in language group dataframe
vals = vals[vals['euref_250'].isin(gids)]
# get population sum for population weighting
pop = current[str(year)].sum()
# get intermediate score
score = vals[str(year)] * current[str(year)]
# get sum score for population weighting
score = score.sum()
# get final population weighted score
score = score / pop
# create a dataframe
result = pd.DataFrame()
# add values to dataframe
result.at[year, 'year'] = year
result.at[year, 'score'] = score
# add correct values
if i == 0:
result.at[year, 'type'] = 'Somali-inhabited'
result.at[year, 'celltype'] = 'Present every year'
elif i == 1:
result.at[year, 'type'] = 'Somali-inhabited'
result.at[year, 'celltype'] = 'New'
elif i == 2:
result.at[year, 'type'] = 'Somali-inhabited'
result.at[year, 'celltype'] = 'Ceased'
elif i == 3:
result.at[year, 'type'] = 'Estonian-inhabited'
result.at[year, 'celltype'] = 'Present every year'
elif i == 4:
result.at[year, 'type'] = 'Estonian-inhabited'
result.at[year, 'celltype'] = 'New'
elif i == 5:
result.at[year, 'type'] = 'Estonian-inhabited'
result.at[year, 'celltype'] = 'Ceased'
elif i == 6:
result.at[year, 'type'] = 'Finnish-inhabited'
result.at[year, 'celltype'] = 'Present every year'
elif i == 7:
result.at[year, 'type'] = 'Finnish-inhabited'
result.at[year, 'celltype'] = 'New'
elif i == 8:
result.at[year, 'type'] = 'Finnish-inhabited'
result.at[year, 'celltype'] = 'Ceased'
# append to list
resultlist.append(result)
# concatenate dataframes
result = pd.concat(resultlist)
result.to_pickle(args['output'] + 'new_old_neighbourhoods_popweigh_{}.pkl'.format(file))
# drop rows with ceased
result = result[result['celltype'] != 'Ceased']
# set name of metric
result['metric'] = file
# send to output dataframe list
outputlist.append(result)
# plot temporal development of grid cells
sns.set(font_scale=1.3)
fig, ax = plt.subplots(figsize=(7,6))
palette = {'Somali-inhabited':'C1','Estonian-inhabited':'C0','Finnish-inhabited':'C3'}
g = sns.lineplot(x='year', y='score', hue='type', style='celltype', data=result,
ci=99, estimator=np.nanmean, n_boot=100, palette=palette, ax=ax)
g.set(xlabel='', ylabel=labs[x])
if x == 1:
handles = ax.get_legend().legendHandles
ax.get_legend().remove()
ax.legend(handles, ['Residential neighbourhood', 'Somali-inhabited','Estonian-inhabited', 'Finnish-inhabited',
'\nTemporal type','Present every year', 'New grid cells'])
else:
handles = ax.get_legend().legendHandles
ax.get_legend().remove()
plt.savefig(args['output'] + 'grid_trajectory_popweigh_{}.pdf'.format(file), dpi=300,
bbox_inches='tight')
# concatenate the outputs into a dataframe
output = pd.concat(outputlist, ignore_index=True)
# save dataframe
output.to_pickle(args['output'] + 'popweigh_divs_gridtypes.pkl')
output.to_csv(args['output'] + 'popweigh_divs_gridtypes.csv', sep=';',encoding='utf-8')