Skip to content

Commit 42a335e

Browse files
Update script.py
1 parent be7fdac commit 42a335e

1 file changed

Lines changed: 113 additions & 102 deletions

File tree

script.py

Lines changed: 113 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -37,106 +37,117 @@
3737
vector_output = os.path.join(outputs_path, location + '.gpkg')
3838
print('Vector Output File Name:', vector_output)
3939

40+
buildings_premade = glob(vector_path + '/*.gpkg', recursive= True)
41+
print('buildings_premade:', buildings_premade)
42+
4043
boundary = gpd.read_file(boundary_1[0])
41-
grid = glob(grids_path + "/*_5km.gpkg", recursive = True)
42-
print('Grid File:',grid)
43-
grid = gpd.read_file(grid[0])
44-
45-
# Ensure all of the polygons are defined by the same crs
46-
boundary.set_crs(epsg=27700, inplace=True)
47-
grid.set_crs(epsg=27700, inplace=True)
48-
49-
# Identify which of the 5km OS grid cells fall within the chosen city boundary
50-
cells_needed = gpd.overlay(boundary,grid, how='intersection')
51-
list = cells_needed['tile_name']
52-
53-
# Identify which of the 100km OS grid cells fall within the chosen city boundary
54-
# This will determine which folders are needed to retrieve the DTM for the area
55-
56-
check=[]
57-
check=pd.DataFrame(check)
58-
check['cell_code']=['AAAAAA' for n in range(len(list))]
59-
a_length = len(list[0])
60-
cell='A'
61-
62-
# Look at each 5km cell that falls in the area and examine the first two digits
63-
for i in range(0,len(list)):
64-
cell=list[i]
65-
check.cell_code[i] = cell[a_length - 6:a_length - 4]
66-
67-
# Remove any duplicates, reset the index - dataframe for the 100km cells
68-
grid_100 = check.drop_duplicates()
69-
grid_100.reset_index(inplace=True, drop=True)
70-
71-
# Create a dataframe for the 5km cells
72-
grid_5=cells_needed['tile_name']
73-
grid_5=pd.DataFrame(grid_5)
74-
75-
# Establish which zip files need to be unzipped
76-
files_to_unzip=[]
77-
files_to_unzip=pd.DataFrame(files_to_unzip)
78-
files_to_unzip=['XX' for n in range(len(grid_100))]
79-
for i in range(0,len(grid_100)):
80-
name=grid_100.cell_code[i]
81-
name_path = os.path.join(vector_path, name + '.zip')
82-
files_to_unzip[i] = name_path
83-
84-
# Unzip the required files
85-
for i in range (0,len(files_to_unzip)):
86-
if os.path.exists(files_to_unzip[i]) :
87-
with ZipFile(files_to_unzip[i],'r') as zip:
88-
# extract the files into the inputs directory
89-
zip.extractall(vector_path)
90-
91-
# Create a list of each grid cell that lies within the boundary (which gpkg are we looking for)
92-
grid_5['file_name'] = grid_5['tile_name']+'.gpkg'
93-
archive=[]
94-
archive=pd.DataFrame(archive)
95-
archive=['XX' for n in range(len(grid_5))]
96-
97-
# Check if the gpkgs for each cell exist
98-
for i in range(0,len(grid_5)):
99-
name = grid_5.file_name[i]
100-
path = glob(vector_path + '/**/' + name, recursive=True)
101-
archive[i] = path
102-
103-
# Remove the empty grid cells from the list
104-
while([] in archive):
105-
archive.remove([])
106-
107-
# Create a list of all of the gpkgs to be merged
108-
to_merge=[]
109-
to_merge=['XX' for n in range(len(archive))]
110-
for i in range (0,len(archive)):
111-
file_path = os.path.splitext(archive[i][0])
112-
filename=file_path[0].split("/")
113-
to_merge[i]=filename[4]+'.gpkg'
114-
115-
# Create a geodatabase and merge the data from each gpkg together
116-
original = []
117-
original=gpd.GeoDataFrame(original)
118-
for cell in to_merge:
119-
gdf = gpd.read_file('/data/inputs/vectors/%s' %cell)
120-
original = pd.concat([gdf, original],ignore_index=True)
121-
122-
# Print to a gpkg file
123-
original.to_file(os.path.join(vector_output),driver='GPKG',index=False)
124-
125-
print('Running vector clip')
126-
127-
vector = gpd.read_file(vector_output)
128-
clipped = gpd.clip(vector,boundary)
129-
130-
# Print to a gpkg file
131-
clipped.to_file(os.path.join(outputs_path, location + '_clip.gpkg'),driver='GPKG',index=False)
132-
133-
# Remove unclipped file
134-
os.remove(vector_output)
135-
136-
# Move the clipped file into a new folder and remove the _clip
137-
src=os.path.join(outputs_path, location + '_clip.gpkg')
138-
dst=os.path.join(buildings_path, location + '.gpkg')
139-
shutil.copy(src,dst)
140-
141-
# Remove duplicate file
142-
os.remove(os.path.join(outputs_path, location + '_clip.gpkg'))
44+
45+
if len(buildings_premade)==1:
46+
buildings = gpd.read_file(buildings_premade[0])
47+
clipped = gpd.clip(buildings,boundary)
48+
# Print to a gpkg file
49+
clipped.to_file(os.path.join(buildings_path, location + '.gpkg'),driver='GPKG',index=False)
50+
51+
if len(buildings_premade) == 0:
52+
grid = glob(grids_path + "/*_5km.gpkg", recursive = True)
53+
print('Grid File:',grid)
54+
grid = gpd.read_file(grid[0])
55+
56+
# Ensure all of the polygons are defined by the same crs
57+
boundary.set_crs(epsg=27700, inplace=True)
58+
grid.set_crs(epsg=27700, inplace=True)
59+
60+
# Identify which of the 5km OS grid cells fall within the chosen city boundary
61+
cells_needed = gpd.overlay(boundary,grid, how='intersection')
62+
list = cells_needed['tile_name']
63+
64+
# Identify which of the 100km OS grid cells fall within the chosen city boundary
65+
# This will determine which folders are needed to retrieve the DTM for the area
66+
67+
check=[]
68+
check=pd.DataFrame(check)
69+
check['cell_code']=['AAAAAA' for n in range(len(list))]
70+
a_length = len(list[0])
71+
cell='A'
72+
73+
# Look at each 5km cell that falls in the area and examine the first two digits
74+
for i in range(0,len(list)):
75+
cell=list[i]
76+
check.cell_code[i] = cell[a_length - 6:a_length - 4]
77+
78+
# Remove any duplicates, reset the index - dataframe for the 100km cells
79+
grid_100 = check.drop_duplicates()
80+
grid_100.reset_index(inplace=True, drop=True)
81+
82+
# Create a dataframe for the 5km cells
83+
grid_5=cells_needed['tile_name']
84+
grid_5=pd.DataFrame(grid_5)
85+
86+
# Establish which zip files need to be unzipped
87+
files_to_unzip=[]
88+
files_to_unzip=pd.DataFrame(files_to_unzip)
89+
files_to_unzip=['XX' for n in range(len(grid_100))]
90+
for i in range(0,len(grid_100)):
91+
name=grid_100.cell_code[i]
92+
name_path = os.path.join(vector_path, name + '.zip')
93+
files_to_unzip[i] = name_path
94+
95+
# Unzip the required files
96+
for i in range (0,len(files_to_unzip)):
97+
if os.path.exists(files_to_unzip[i]) :
98+
with ZipFile(files_to_unzip[i],'r') as zip:
99+
# extract the files into the inputs directory
100+
zip.extractall(vector_path)
101+
102+
# Create a list of each grid cell that lies within the boundary (which gpkg are we looking for)
103+
grid_5['file_name'] = grid_5['tile_name']+'.gpkg'
104+
archive=[]
105+
archive=pd.DataFrame(archive)
106+
archive=['XX' for n in range(len(grid_5))]
107+
108+
# Check if the gpkgs for each cell exist
109+
for i in range(0,len(grid_5)):
110+
name = grid_5.file_name[i]
111+
path = glob(vector_path + '/**/' + name, recursive=True)
112+
archive[i] = path
113+
114+
# Remove the empty grid cells from the list
115+
while([] in archive):
116+
archive.remove([])
117+
118+
# Create a list of all of the gpkgs to be merged
119+
to_merge=[]
120+
to_merge=['XX' for n in range(len(archive))]
121+
for i in range (0,len(archive)):
122+
file_path = os.path.splitext(archive[i][0])
123+
filename=file_path[0].split("/")
124+
to_merge[i]=filename[4]+'.gpkg'
125+
126+
# Create a geodatabase and merge the data from each gpkg together
127+
original = []
128+
original=gpd.GeoDataFrame(original)
129+
for cell in to_merge:
130+
gdf = gpd.read_file('/data/inputs/vectors/%s' %cell)
131+
original = pd.concat([gdf, original],ignore_index=True)
132+
133+
# Print to a gpkg file
134+
original.to_file(os.path.join(vector_output),driver='GPKG',index=False)
135+
136+
print('Running vector clip')
137+
138+
vector = gpd.read_file(vector_output)
139+
clipped = gpd.clip(vector,boundary)
140+
141+
# Print to a gpkg file
142+
clipped.to_file(os.path.join(outputs_path, location + '_clip.gpkg'),driver='GPKG',index=False)
143+
144+
# Remove unclipped file
145+
os.remove(vector_output)
146+
147+
# Move the clipped file into a new folder and remove the _clip
148+
src=os.path.join(outputs_path, location + '_clip.gpkg')
149+
dst=os.path.join(buildings_path, location + '.gpkg')
150+
shutil.copy(src,dst)
151+
152+
# Remove duplicate file
153+
os.remove(os.path.join(outputs_path, location + '_clip.gpkg'))

0 commit comments

Comments
 (0)