-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscript.py
More file actions
154 lines (124 loc) · 5.54 KB
/
script.py
File metadata and controls
154 lines (124 loc) · 5.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import geopandas as gpd
import pandas as pd
import os
import shutil
from zipfile import ZipFile
from glob import glob
import subprocess
# Define Data Paths
data_path = os.getenv('DATA_PATH', '/data')
inputs_path = os.path.join(data_path,'inputs')
grids_path = os.path.join(inputs_path,'grids')
boundary_path = os.path.join(inputs_path,'boundary')
outputs_path = os.path.join(data_path, 'outputs')
outputs_path_ = data_path + '/' + 'outputs'
if not os.path.exists(outputs_path):
os.mkdir(outputs_path_)
buildings_path = os.path.join(outputs_path, 'buildings')
buildings_path_ = outputs_path + '/' + 'buildings'
if not os.path.exists(buildings_path):
os.mkdir(buildings_path_)
vector_path = os.path.join(inputs_path, 'vectors')
# Identify input polygons and shapes (boundary of city, and OS grid cell references)
boundary_1 = glob(boundary_path + "/*.*", recursive = True)
print('Boundary File:',boundary_1)
# Identify the name of the boundary file for the city name
file_path = os.path.splitext(boundary_1[0])
print('File_path:',file_path)
filename=file_path[0].split("/")
print('filename:',filename)
location = filename[-1]
print('Location:',location)
vector_output = os.path.join(outputs_path, location + '.gpkg')
print('Vector Output File Name:', vector_output)
buildings_premade = glob(vector_path + '/*.gpkg', recursive= True)
print('buildings_premade:', buildings_premade)
boundary = gpd.read_file(boundary_1[0])
if len(buildings_premade)==1:
buildings = gpd.read_file(buildings_premade[0])
clipped = gpd.clip(buildings,boundary)
# Print to a gpkg file
clipped.to_file(os.path.join(buildings_path, location + '.gpkg'),driver='GPKG',index=False)
if len(buildings_premade) == 0:
grid = glob(grids_path + "/*_5km.gpkg", recursive = True)
print('Grid File:',grid)
grid = gpd.read_file(grid[0])
# Ensure all of the polygons are defined by the same crs
boundary.set_crs(epsg=27700, inplace=True)
grid.set_crs(epsg=27700, inplace=True)
# Identify which of the 5km OS grid cells fall within the chosen city boundary
cells_needed = gpd.overlay(boundary,grid, how='intersection')
list = cells_needed['tile_name']
# Identify which of the 100km OS grid cells fall within the chosen city boundary
# This will determine which folders are needed to retrieve the DTM for the area
check=[]
check=pd.DataFrame(check)
check['cell_code']=['AAAAAA' for n in range(len(list))]
a_length = len(list[0])
cell='A'
# Look at each 5km cell that falls in the area and examine the first two digits
for i in range(0,len(list)):
cell=list[i]
check.cell_code[i] = cell[a_length - 6:a_length - 4]
# Remove any duplicates, reset the index - dataframe for the 100km cells
grid_100 = check.drop_duplicates()
grid_100.reset_index(inplace=True, drop=True)
# Create a dataframe for the 5km cells
grid_5=cells_needed['tile_name']
grid_5=pd.DataFrame(grid_5)
# Establish which zip files need to be unzipped
files_to_unzip=[]
files_to_unzip=pd.DataFrame(files_to_unzip)
files_to_unzip=['XX' for n in range(len(grid_100))]
for i in range(0,len(grid_100)):
name=grid_100.cell_code[i]
name_path = os.path.join(vector_path, name + '.zip')
files_to_unzip[i] = name_path
# Unzip the required files
for i in range (0,len(files_to_unzip)):
if os.path.exists(files_to_unzip[i]) :
with ZipFile(files_to_unzip[i],'r') as zip:
# extract the files into the inputs directory
zip.extractall(vector_path)
# Create a list of each grid cell that lies within the boundary (which gpkg are we looking for)
grid_5['file_name'] = grid_5['tile_name']+'.gpkg'
archive=[]
archive=pd.DataFrame(archive)
archive=['XX' for n in range(len(grid_5))]
# Check if the gpkgs for each cell exist
for i in range(0,len(grid_5)):
name = grid_5.file_name[i]
path = glob(vector_path + '/**/' + name, recursive=True)
archive[i] = path
# Remove the empty grid cells from the list
while([] in archive):
archive.remove([])
# Create a list of all of the gpkgs to be merged
to_merge=[]
to_merge=['XX' for n in range(len(archive))]
for i in range (0,len(archive)):
file_path = os.path.splitext(archive[i][0])
filename=file_path[0].split("/")
to_merge[i]=filename[4]+'.gpkg'
# Create a geodatabase and merge the data from each gpkg together
original = []
original=gpd.GeoDataFrame(original)
for cell in to_merge:
gdf = gpd.read_file('/data/inputs/vectors/%s' %cell)
original = pd.concat([gdf, original],ignore_index=True)
# Print to a gpkg file
original.to_file(os.path.join(vector_output),driver='GPKG',index=False)
print('Running vector clip')
vector = gpd.read_file(vector_output)
clipped = gpd.clip(vector,boundary)
clipped = clipped[clipped.geometry.type != 'GeometryCollection']
# Print to a gpkg file
clipped.to_file(os.path.join(outputs_path, location + '_clip.gpkg'),driver='GPKG',index=False)
# Remove unclipped file
os.remove(vector_output)
# Move the clipped file into a new folder and remove the _clip
src=os.path.join(outputs_path, location + '_clip.gpkg')
dst=os.path.join(buildings_path, location + '.gpkg')
shutil.copy(src,dst)
# Remove duplicate file
os.remove(os.path.join(outputs_path, location + '_clip.gpkg'))