|
37 | 37 | vector_output = os.path.join(outputs_path, location + '.gpkg') |
38 | 38 | print('Vector Output File Name:', vector_output) |
39 | 39 |
|
| 40 | +buildings_premade = glob(vector_path + '/*.gpkg', recursive= True) |
| 41 | +print('buildings_premade:', buildings_premade) |
| 42 | + |
40 | 43 | boundary = gpd.read_file(boundary_1[0]) |
41 | | -grid = glob(grids_path + "/*_5km.gpkg", recursive = True) |
42 | | -print('Grid File:',grid) |
43 | | -grid = gpd.read_file(grid[0]) |
44 | | - |
45 | | -# Ensure all of the polygons are defined by the same crs |
46 | | -boundary.set_crs(epsg=27700, inplace=True) |
47 | | -grid.set_crs(epsg=27700, inplace=True) |
48 | | - |
49 | | -# Identify which of the 5km OS grid cells fall within the chosen city boundary |
50 | | -cells_needed = gpd.overlay(boundary,grid, how='intersection') |
51 | | -list = cells_needed['tile_name'] |
52 | | - |
53 | | -# Identify which of the 100km OS grid cells fall within the chosen city boundary |
54 | | -# This will determine which folders are needed to retrieve the DTM for the area |
55 | | - |
56 | | -check=[] |
57 | | -check=pd.DataFrame(check) |
58 | | -check['cell_code']=['AAAAAA' for n in range(len(list))] |
59 | | -a_length = len(list[0]) |
60 | | -cell='A' |
61 | | - |
62 | | -# Look at each 5km cell that falls in the area and examine the first two digits |
63 | | -for i in range(0,len(list)): |
64 | | - cell=list[i] |
65 | | - check.cell_code[i] = cell[a_length - 6:a_length - 4] |
66 | | - |
67 | | -# Remove any duplicates, reset the index - dataframe for the 100km cells |
68 | | -grid_100 = check.drop_duplicates() |
69 | | -grid_100.reset_index(inplace=True, drop=True) |
70 | | - |
71 | | -# Create a dataframe for the 5km cells |
72 | | -grid_5=cells_needed['tile_name'] |
73 | | -grid_5=pd.DataFrame(grid_5) |
74 | | - |
75 | | -# Establish which zip files need to be unzipped |
76 | | -files_to_unzip=[] |
77 | | -files_to_unzip=pd.DataFrame(files_to_unzip) |
78 | | -files_to_unzip=['XX' for n in range(len(grid_100))] |
79 | | -for i in range(0,len(grid_100)): |
80 | | - name=grid_100.cell_code[i] |
81 | | - name_path = os.path.join(vector_path, name + '.zip') |
82 | | - files_to_unzip[i] = name_path |
83 | | - |
84 | | -# Unzip the required files |
85 | | -for i in range (0,len(files_to_unzip)): |
86 | | - if os.path.exists(files_to_unzip[i]) : |
87 | | - with ZipFile(files_to_unzip[i],'r') as zip: |
88 | | - # extract the files into the inputs directory |
89 | | - zip.extractall(vector_path) |
90 | | - |
91 | | -# Create a list of each grid cell that lies within the boundary (which gpkg are we looking for) |
92 | | -grid_5['file_name'] = grid_5['tile_name']+'.gpkg' |
93 | | -archive=[] |
94 | | -archive=pd.DataFrame(archive) |
95 | | -archive=['XX' for n in range(len(grid_5))] |
96 | | - |
97 | | -# Check if the gpkgs for each cell exist |
98 | | -for i in range(0,len(grid_5)): |
99 | | - name = grid_5.file_name[i] |
100 | | - path = glob(vector_path + '/**/' + name, recursive=True) |
101 | | - archive[i] = path |
102 | | - |
103 | | -# Remove the empty grid cells from the list |
104 | | -while([] in archive): |
105 | | - archive.remove([]) |
106 | | - |
107 | | -# Create a list of all of the gpkgs to be merged |
108 | | -to_merge=[] |
109 | | -to_merge=['XX' for n in range(len(archive))] |
110 | | -for i in range (0,len(archive)): |
111 | | - file_path = os.path.splitext(archive[i][0]) |
112 | | - filename=file_path[0].split("/") |
113 | | - to_merge[i]=filename[4]+'.gpkg' |
114 | | - |
115 | | -# Create a geodatabase and merge the data from each gpkg together |
116 | | -original = [] |
117 | | -original=gpd.GeoDataFrame(original) |
118 | | -for cell in to_merge: |
119 | | - gdf = gpd.read_file('/data/inputs/vectors/%s' %cell) |
120 | | - original = pd.concat([gdf, original],ignore_index=True) |
121 | | - |
122 | | -# Print to a gpkg file |
123 | | -original.to_file(os.path.join(vector_output),driver='GPKG',index=False) |
124 | | - |
125 | | -print('Running vector clip') |
126 | | - |
127 | | -vector = gpd.read_file(vector_output) |
128 | | -clipped = gpd.clip(vector,boundary) |
129 | | - |
130 | | -# Print to a gpkg file |
131 | | -clipped.to_file(os.path.join(outputs_path, location + '_clip.gpkg'),driver='GPKG',index=False) |
132 | | - |
133 | | -# Remove unclipped file |
134 | | -os.remove(vector_output) |
135 | | - |
136 | | -# Move the clipped file into a new folder and remove the _clip |
137 | | -src=os.path.join(outputs_path, location + '_clip.gpkg') |
138 | | -dst=os.path.join(buildings_path, location + '.gpkg') |
139 | | -shutil.copy(src,dst) |
140 | | - |
141 | | -# Remove duplicate file |
142 | | -os.remove(os.path.join(outputs_path, location + '_clip.gpkg')) |
| 44 | + |
| 45 | +if len(buildings_premade)==1: |
| 46 | + buildings = gpd.read_file(buildings_premade[0]) |
| 47 | + clipped = gpd.clip(buildings,boundary) |
| 48 | + # Print to a gpkg file |
| 49 | + clipped.to_file(os.path.join(buildings_path, location + '.gpkg'),driver='GPKG',index=False) |
| 50 | + |
| 51 | +if len(buildings_premade) == 0: |
| 52 | + grid = glob(grids_path + "/*_5km.gpkg", recursive = True) |
| 53 | + print('Grid File:',grid) |
| 54 | + grid = gpd.read_file(grid[0]) |
| 55 | + |
| 56 | + # Ensure all of the polygons are defined by the same crs |
| 57 | + boundary.set_crs(epsg=27700, inplace=True) |
| 58 | + grid.set_crs(epsg=27700, inplace=True) |
| 59 | + |
| 60 | + # Identify which of the 5km OS grid cells fall within the chosen city boundary |
| 61 | + cells_needed = gpd.overlay(boundary,grid, how='intersection') |
| 62 | + list = cells_needed['tile_name'] |
| 63 | + |
| 64 | + # Identify which of the 100km OS grid cells fall within the chosen city boundary |
| 65 | + # This will determine which folders are needed to retrieve the DTM for the area |
| 66 | + |
| 67 | + check=[] |
| 68 | + check=pd.DataFrame(check) |
| 69 | + check['cell_code']=['AAAAAA' for n in range(len(list))] |
| 70 | + a_length = len(list[0]) |
| 71 | + cell='A' |
| 72 | + |
| 73 | + # Look at each 5km cell that falls in the area and examine the first two digits |
| 74 | + for i in range(0,len(list)): |
| 75 | + cell=list[i] |
| 76 | + check.cell_code[i] = cell[a_length - 6:a_length - 4] |
| 77 | + |
| 78 | + # Remove any duplicates, reset the index - dataframe for the 100km cells |
| 79 | + grid_100 = check.drop_duplicates() |
| 80 | + grid_100.reset_index(inplace=True, drop=True) |
| 81 | + |
| 82 | + # Create a dataframe for the 5km cells |
| 83 | + grid_5=cells_needed['tile_name'] |
| 84 | + grid_5=pd.DataFrame(grid_5) |
| 85 | + |
| 86 | + # Establish which zip files need to be unzipped |
| 87 | + files_to_unzip=[] |
| 88 | + files_to_unzip=pd.DataFrame(files_to_unzip) |
| 89 | + files_to_unzip=['XX' for n in range(len(grid_100))] |
| 90 | + for i in range(0,len(grid_100)): |
| 91 | + name=grid_100.cell_code[i] |
| 92 | + name_path = os.path.join(vector_path, name + '.zip') |
| 93 | + files_to_unzip[i] = name_path |
| 94 | + |
| 95 | + # Unzip the required files |
| 96 | + for i in range (0,len(files_to_unzip)): |
| 97 | + if os.path.exists(files_to_unzip[i]) : |
| 98 | + with ZipFile(files_to_unzip[i],'r') as zip: |
| 99 | + # extract the files into the inputs directory |
| 100 | + zip.extractall(vector_path) |
| 101 | + |
| 102 | + # Create a list of each grid cell that lies within the boundary (which gpkg are we looking for) |
| 103 | + grid_5['file_name'] = grid_5['tile_name']+'.gpkg' |
| 104 | + archive=[] |
| 105 | + archive=pd.DataFrame(archive) |
| 106 | + archive=['XX' for n in range(len(grid_5))] |
| 107 | + |
| 108 | + # Check if the gpkgs for each cell exist |
| 109 | + for i in range(0,len(grid_5)): |
| 110 | + name = grid_5.file_name[i] |
| 111 | + path = glob(vector_path + '/**/' + name, recursive=True) |
| 112 | + archive[i] = path |
| 113 | + |
| 114 | + # Remove the empty grid cells from the list |
| 115 | + while([] in archive): |
| 116 | + archive.remove([]) |
| 117 | + |
| 118 | + # Create a list of all of the gpkgs to be merged |
| 119 | + to_merge=[] |
| 120 | + to_merge=['XX' for n in range(len(archive))] |
| 121 | + for i in range (0,len(archive)): |
| 122 | + file_path = os.path.splitext(archive[i][0]) |
| 123 | + filename=file_path[0].split("/") |
| 124 | + to_merge[i]=filename[4]+'.gpkg' |
| 125 | + |
| 126 | + # Create a geodatabase and merge the data from each gpkg together |
| 127 | + original = [] |
| 128 | + original=gpd.GeoDataFrame(original) |
| 129 | + for cell in to_merge: |
| 130 | + gdf = gpd.read_file('/data/inputs/vectors/%s' %cell) |
| 131 | + original = pd.concat([gdf, original],ignore_index=True) |
| 132 | + |
| 133 | + # Print to a gpkg file |
| 134 | + original.to_file(os.path.join(vector_output),driver='GPKG',index=False) |
| 135 | + |
| 136 | + print('Running vector clip') |
| 137 | + |
| 138 | + vector = gpd.read_file(vector_output) |
| 139 | + clipped = gpd.clip(vector,boundary) |
| 140 | + |
| 141 | + # Print to a gpkg file |
| 142 | + clipped.to_file(os.path.join(outputs_path, location + '_clip.gpkg'),driver='GPKG',index=False) |
| 143 | + |
| 144 | + # Remove unclipped file |
| 145 | + os.remove(vector_output) |
| 146 | + |
| 147 | + # Move the clipped file into a new folder and remove the _clip |
| 148 | + src=os.path.join(outputs_path, location + '_clip.gpkg') |
| 149 | + dst=os.path.join(buildings_path, location + '.gpkg') |
| 150 | + shutil.copy(src,dst) |
| 151 | + |
| 152 | + # Remove duplicate file |
| 153 | + os.remove(os.path.join(outputs_path, location + '_clip.gpkg')) |
0 commit comments