-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodvars_share.py
More file actions
150 lines (138 loc) · 4.61 KB
/
modvars_share.py
File metadata and controls
150 lines (138 loc) · 4.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import pandas as pd
import numpy as np
import glob
#part 1 combining GEE files
#meant to work from blank file based on folders named by variable where data is located
#m = pd.read_csv('/gpfs/data1/cmongp1/ginsburga/sp24_2/ms1216.csv')
master= pd.DataFrame(columns=['time','ADM1_NAME','value','var'])
#put path name with * to wherever extracted GEE data organized by year are saved
#files = glob.glob('/gpfs/data1/cmongp1/ginsburga/sp24_2/newf/2021/*') #my path
files = glob.glob('ke_data/2006/*') #example
print([f[-15:-7] for f in files]) #find out what order the files are in, adjust indices based on length of path string
ele = 'nan'
var = []
while ele != 'done':
ele = input('enter names of var returned individually/in order. type done when finished.')
var.append(ele)
vars = ['cmax','esi',
'tmax',
'cmin','cmax',
'tmin',
'fldas',
'ndvi',
'tmax',
'cmin',
'tmin']
#Transpose each file so that they are in long format with 1 column containing all admin 1 values
#vars = var[1:-2] #run if you used the input method to give file names
n = 0
mods = {}
for f in files:
try:
df1 = pd.read_csv(f)
#try:
df1.drop(columns=['.geo','system:index'],inplace=True) #remove meaningless columns
df2 = df1.melt(id_vars=['time'],var_name='ADM1_NAME')
df2['var'] = vars[n]
df2.sort_values(by=['time','ADM1_NAME'],inplace=True)
if vars[n] == 'cmin' or 'cmax' or 'tmin' or 'tmax':
try:
a = mods.get(vars[n]) #append same variable if necessary
mods[vars[n]] = pd.concat([a,df2],axis=0,ignore_index=True)
except:
continue
else:
mods[vars[n]]=df2
n+=1
#master = pd.concat([m, df2])
except IndexError:
continue
print(mods.keys())
adding columns for date and year and filling
for v in vars:
vdf = mods.get(v)
vdf['date']=pd.to_datetime(vdf['time'])
vdf['year']=np.NaN
for i,j in vdf.iterrows():
vdf.loc[i,'year'] = vdf.loc[i,'date'].year #year from datetime object
#if v=='tmin' or v=='tmax': #could make these edits to concattenated version instead
# vdf['value']= vdf['value']-273.15
if v=='ndvi':
vdf['value']=vdf['value']/10000
mods[v] = vdf
#extracting variable values from dictionaries and compiling all into 1 dataframe
vars_init = list(mods.keys())
dfc = mods.get('tmax')
for v in vars_init:
#vdf = pd.read_csv('vars/full_{}_ke.csv'.format(v)) #code if files are fully organized in dir
vdf2 = mods.get(v)
adm1 = pd.unique(vdf2['ADM1_NAME']).tolist()
dts = pd.unique(vdf2['time']).tolist()
print(v)
for a in adm1:
y = 365
for d in dts:
#identify index of admin 2 region in admins file
try:
ind = vdf2.loc[(vdf2['time'] == d)&(vdf2['ADM1_NAME']==a)].index
#grab the admin 1 region for that district
value = vdf2.loc[ind, 'value'].tolist()
inputt = value[0]
except IndexError:
y-=1
#get index of d
place = dfc.loc[(dfc['time'] == d)&(dfc['ADM1_NAME']==a)].index
#print(d,a, place, v)
#list admin 1 region
dfc.loc[place, v] = inputt
#Part 3: adding yield values and crop cal values to 365 day variables file (yield from UCSB/UAH SERVIR)
files = glob.glob('/gpfs/data1/cmongp1/ginsburga/sp24_2/vars/full_eo/*')
filescc = {}
for f in files:
print(f)
df = pd.read_csv(f)
df['date']=pd.to_datetime(df['time'])
#f = glob.glob.read_csv('vars/full*.csv')
#ndvi = pd.read_csv('vars/full_ndvi_ke.csv')
#adm1 = pd.unique(df['ADM1_NAME']).tolist()
df['crop_cal'] = np.ones(len(df['date']))
for i,j in df.iterrows():
m = df.loc[i,'date'].month
if (m >= 9) or (m< 3):
df.loc[i,'crop_cal'] = 0
else:
pass
df.rename(columns={'year':'Year'}, inplace=True)
y = df.loc[0,'Year']
filescc[y] = df
dfc['date']=pd.to_datetime(dfc['time'])
yld = pd.read_csv('/gpfs/data1/cmongp1/ginsburga/sp24_2/fewsnet_yield_c2001.csv')
adm1 = pd.unique(dfc['ADM1_NAME']).tolist()
yrs = np.arange(2001,2017,1)
yrs.append(2019)
#add yield column from other file into the dataframe
for a in adm1:
for y in yrs:
try:
ind = yld.loc[(yld['Year'] == y)&(yld['admin_1']==a)].index
#grab the year for that district
value = yld.loc[ind, 'value'].tolist()
inputt = value[0]
except IndexError:
#y-=1
continue
#get index of d
place = dfc.loc[(dfc['year'] == y)&(dfc['ADM1_NAME']==a)].index
#list admin 1 region
dfc.loc[place, 'yield'] = inputt
print('done with yield')
dfc['crop_cal'] = np.ones(len(df['date']))
for i,j in dfc.iterrows():
m = dfc.loc[i,'date'].month
if (m >= 9) or (m< 3):
dfc.loc[i,'crop_cal'] = 0
else:
pass
dfc.rename(columns={'year':'Year'}, inplace=True)
print('done with crop_cal')
dfc.to_csv('/gpfs/data1/cmongp1/ginsburga/sp24_2/kenya_maize_yr.csv') #change path