diff --git a/satellite_seagrass_classification_hpc.py b/satellite_seagrass_classification_hpc.py new file mode 100644 index 0000000..e192ad4 --- /dev/null +++ b/satellite_seagrass_classification_hpc.py @@ -0,0 +1,956 @@ +# -*- coding: utf-8 -*- +"""Satellite_Seagrass_Classification.ipynb + +Automatically generated by Colab. + +Original file is located at + https://colab.research.google.com/drive/1tC8D1ZsGV5IypEfMyQ3xrnD9JfeI1KMr + +# This code will enable you to classify a satellite scene usinging Deep Convolutional Neural Network machine learning + +## You will first need to draw polygon shapefiles for Regions of Interest (ROIs) in QGIS and export the shapefile + +# Install packages (only needs to be done once) +""" + +# !pip install pandas +# !pip install datetime +# !pip install numpy +# !pip install tensorflow==2.15 --force-reinstall +# #!pip install keras +# !pip install h5py +# !pip install matplotlib +# !pip install scikit-learn +# !pip install geopandas +# !pip install earthpy +# !pip install spectral +# !pip install rasterio +# !pip install rioxarray +# !pip install shapely +# !pip install plotly +# !pip install fiona +# !pip install seaborn + +"""## Download and install GDAL (only needs to be done once)""" + +# you will only need to run this line once to determine the version of Python you have to then go download GDAL +# use this link to download the GDAL wheel file for your Python version and Windows Operating Version +# you do not need to run this Box of code if you have a Mac or Linux +# see instructions for more details +import sys +print(sys.version) + +# You will need to run this line once, even if you have a Windows +# Install the GDAL Wheel File +# update this file path to reflect where you are storing the GDAL file + +#!pip install "C:/Users/fishr/Satellite Seagrass/Python_library/GDAL-3.4.3-cp311-cp311-win_amd64.whl" + +"""# Import Libraries""" + +import os +import sys + +"""# Define File Path Working Dirctory and Satellite Scene Name""" + +# Define all necessary input variables + +# define the EPSG code as a character string +# use the following link to change the EPSG code if you will work in a different region https://spatialreference.org/ref/epsg/ +# EPSG code is a standardized numerical identifier for coordinate systems and spatial reference systems +# used to locate geographic entities on the Earth's surface. +# The EPSG code uniquely identifies a particular coordinate reference system (CRS), +# which includes definitions for coordinate axes, units of measurement, and transformations from one CRS to another. + +epsg_code = "EPSG:32651" + +# set path directory where your data are kept +# path = "C:/Users/mmama/Documents/COASTAL_WATER/Indo-Pacific_Seagrass/Satellite_data/TEST" +path = "/mnt/lustre/hackathons/hack_teams/hack_team_02/Hackathon_2024/Philippines_Test_Data" + +# name of the Satellite file you will be processing +# sat_file = "Masinloc_PS_20230620_Nonnormalized" +sat_file = "Masinloc_20230620_014002_99_24cc_3B_AnalyticMS_SR_8b_clip" + +wdir = os.path.join(path, sat_file) + +# import seagrass library file which contains many Python code functions that this code depends on +# this seagrass_lib.py file should be stored in a sub-folder called "Python_library" which is in the folder path defined above + +library_dir = os.path.join(path, "Python_library") +sys.path.insert(0, library_dir) +from seagrass_lib import * + +# increase cache size to avoid memory constraints +from osgeo import gdal +gdal.SetCacheMax(2000000000) + +"""# Import your Shapefile from QGIS""" + +# Open the shapefile that is stored in an ROIs subfolder under the Input_data subfolder, which is in your working directory +input_shp = os.path.join(wdir, "Input_data", "ROIs", sat_file + ".shp") + +shapefile = gpd.read_file(input_shp) + +# Display the first few rows of the attribute table to see what the Classification ID name caterogies for your shapefile are +# these should reflect the different categories you drew in QGIS + +print(shapefile.head()) + +# Get the column names +column_names = shapefile.columns + +# Print the column names +print(column_names) + +# Rename the first column of the shapefile to "Classname" +shapefile = shapefile.rename(columns={shapefile.columns[0]: "Classname"}) +print(shapefile.head()) + +# View unique values in ID column +unique_values = shapefile["Classname"].unique() + +# prints all of the categories we are trying to classify +print(unique_values) + +# Define output file for newly renamed shapefile +output_shapefile_path = os.path.join(wdir, "Input_data", "ROIs", sat_file + ".shp") + +shapefile.to_file(output_shapefile_path) + +# Now you will split multipart polygon into singlepart polygon, but first +# define where you will save the new singlepart polygon output +output_shp = os.path.join(wdir, "Input_data", "ROIs", sat_file + "_singlepart.shp") + +# Split multipart polygon into singlepart polygon +multipart_to_singlepart(shp_fp = input_shp, out_fp = output_shp) + +"""# Load your Satellite Scene""" + +satellite_scene = gdal.Open(os.path.join(wdir, 'Input_data', 'Rrs_image', sat_file + ".TIF")) +print(os.path.join(wdir, 'Input_data', 'Rrs_image', sat_file + ".TIF")) + +# see how many bands satellite scene has +if satellite_scene is not None: + print ("band count: " + str(satellite_scene.RasterCount)) + +band_num = satellite_scene.RasterCount + +# Look at the pixel values of the satellite scene +satellite_scene = os.path.join(wdir, 'Input_data', 'Rrs_image', sat_file + ".TIF") + +# Open the image file +with rio.open(satellite_scene) as src: + # Read the raster data as a numpy array + raster_data = src.read(1, masked=True) # Use masked=True to handle NoData values + + # Get the number of rows and columns + rows, cols = raster_data.shape + + # Calculate the total number of pixels + total_pixels = rows * cols + + print("Total number of pixels:", total_pixels) + + # Get metadata including NoData value + nodata_value = src.nodata + metadata = src.meta + + # Check the data range + data_min = np.nanmin(raster_data) # Minimum value excluding NoData + data_max = np.nanmax(raster_data) # Maximum value excluding NoData + + # Print data range + print("Data Range (excluding NoData):", data_min, "-", data_max) + + + # Check for NoData values + if nodata_value is not None: + nodata_count = np.sum(raster_data.mask) # Count NoData pixels + print("NoData Value:", nodata_value) + print("Number of NoData Pixels:", nodata_count) + +total_pixels - nodata_count + +# Open and View original satellite image +satellite_scene = os.path.join(wdir, 'Input_data', 'Rrs_image', sat_file + ".TIF") + +# Open the image file +with rio.open(satellite_scene) as src: + # NIR (8), red (6), and green (4), blue (2) bands for Planet + image = src.read([6, 4, 2]) + +# Plot the false-color composite +plt.imshow(image.transpose(1, 2, 0)) +plt.title('False-Color Composite (Red-Green-Blue)') +plt.show() + +# View each band of the satellite scene +satellite_scene = os.path.join(wdir, 'Input_data', 'Rrs_image', sat_file + ".TIF") + +with rio.open(satellite_scene) as src: + # Read all bands + image = src.read(list(range(1, band_num+1))) + +# Plot each band separately +fig, axes = plt.subplots(1, band_num, figsize=(20, 15)) +for i in range(band_num): + im = axes[i].imshow(image[i], cmap='gist_earth') + axes[i].set_title(f'Band {i+1}') + +# Add color bar after the last subplot +cbar = fig.colorbar(im, ax=axes[-1], orientation='vertical', fraction=0.05, pad=0.04) +cbar.set_label('Reflectance') # Add label to the color bar + +plt.tight_layout() # Adjust layout to prevent overlapping +plt.show() + +"""# Now Extract Category ROIs from Satellite Scene using Singlepart Polygons""" + +# re-define the satellite scen you are working with +input_image = satellite_scene + +# define the shapefile to be the singlepart shapefile you created above +input_shp = os.path.join(wdir, "Input_data", "ROIs", sat_file + "_singlepart.shp") + +# pick a place to save the new output tiffs, it should be in the ROI subfolder +output_ROIs = os.path.join(wdir, "Input_data", "ROIs") + +# Display the first few rows of the attribute table to see what the ID name caterogies for your shapefile are +# these should reflect the different categories you drew in QGIS + +singlepart_shapefile = gpd.read_file(input_shp) +print(singlepart_shapefile.head()) + +# View unique values in ID column +unique_values = singlepart_shapefile["Classname"].unique() + +# prints all of the categories we are trying to classify +print(unique_values) + +# extract the image spectra for each category ROI drawn in QGIS and save to the location you defined above +shp_to_roi(image_fp = input_image, output_dir = output_ROIs, shp_fp = input_shp, field_name = 'Classname') + +"""# Train DCNN with Image ROIs""" + +#creat a folder to store the DCNN model, saved as a .h5 file +dcnn_fp = os.path.join(wdir, "Input_data", "DCNN_model", sat_file + ".h5") + +#input data for the training are the tiffs of each ROI we extracted above +input_data = os.path.join(wdir, 'Input_data', 'ROIs') + +#Run the training code +image_classes = roi_classes(shp_fp = input_shp, field_name = 'Classname') + +train_dcnn(cnnFileName = dcnn_fp, epochs = 100 , training_data_directory = input_data, class_names = image_classes, numChannels = band_num, dimension = 3) + +#View Accuracy and Loss Plots + +#The accuracy plot shows how well the model is performing in terms of correctly classifying the data. +# displays the training accuracy and validation accuracy over epochs (or iterations). +# The training accuracy measures the accuracy of the model on the training dataset, +# while the validation accuracy measures the accuracy on a separate validation dataset. +# A rising trend in both training and validation accuracy indicates that the model is learning and generalizing well. +# Fluctuations or a decrease in validation accuracy relative to training accuracy may suggest overfitting, +# where the model performs well on the training data but fails to generalize to unseen data. + + +# The loss plot shows the value of the loss function (e.g., cross-entropy loss) over epochs. +# quantifies how well the model is performing. +# A measure of the model's error or how far the predicted outputs are from the true labels. +# A decreasing trend in both training and validation loss indicates that the model is improving in its predictions +# and learning the underlying patterns in the data. +# If the training loss decreases while the validation loss increases or remains stagnant, it may indicate overfitting. + +from IPython.display import Image, display + +accuracy_path = os.path.join(wdir, "Input_data", "DCNN_model", "accuracy.png") +loss_path = os.path.join(wdir, "Input_data", "DCNN_model", "loss.png") + +# List of paths to your PNG files +image_paths = [accuracy_path, loss_path] + +# Display each image +for i in image_paths: + display(Image(i)) + +"""# Classify input image with trained dcnn""" + +# re-define satellite scene you are working with +input_image = satellite_scene + +# make a string name to save the new classified image under +classified_scene = 'classified_'+ sat_file + +# define where you will save the classfied image output +output_classification = os.path.join(wdir, "Classified_image", classified_scene + ".TIF") + +# define where you saved the trained DCNN .h5 model from above +dcnn_fp = os.path.join(wdir, "Input_data", "DCNN_model", sat_file + ".h5") + +# Run the classification +dcnn_classification(image_fp = input_image, dcnn_fp = dcnn_fp, output_fp = output_classification) + +"""# Open and View the Classified Satellite Image""" + +# make a string name to save the new classified image under +classified_scene = 'classified_'+ sat_file + +# open the classified satellite scene +Classification = os.path.join(wdir, "Classified_image", classified_scene + ".TIF") + +# Open the classified satellite scene raster dataset using rioxarray +raster = rioxarray.open_rasterio(Classification) + +# Plot the raster +raster.plot() +plt.title('Classified Satellite Scene') +plt.show() + +# Check to see how many categories of classifications there are + +# Open the GeoTIFF file +with rio.open(Classification) as src: + # Read the single band + band = src.read(1) + + # Get unique values and their counts + unique_values, counts = np.unique(band, return_counts=True) + + # Print unique values and their counts + for value, count in zip(unique_values, counts): + print(f"Value: {value}, Count: {count}") + +# Open the classified satellite GeoTIFF file and plot it in a different way +# viewing the classified satellite scene will be better in QGIS, but this is a quick way to see if the classification worked + +with rio.open(Classification) as src: + data = src.read(1) + + # Get unique values and their counts + unique_values, counts = np.unique(data, return_counts=True) + + # Print unique values and their counts + for value, count in zip(unique_values, counts): + print(f"Value: {value}, Count: {count}") + + cmap = ListedColormap(['black','blue', 'green','gray','yellow','red']) # Modify colors as needed + bounds = [0, 1, 2, 3, 4, 5,6] # Modify class boundaries as needed + + # Plot the data + plt.figure(figsize=(8, 8)) + plt.imshow(data, cmap=cmap, extent=[src.bounds.left, src.bounds.right, src.bounds.bottom, src.bounds.top]) + + # Add color legend + cb = plt.colorbar(ticks=bounds, boundaries=bounds) + cb.set_label('Classifications') + + plt.show() + +"""# Calculate the area of seagrass cover in the satellite scene""" + +# Open the GeoTIFF file of the Classified Satellite Scene +# you will need to go manually inspect the classified satellite scene, either here in Jupyter Notebook, +# or open the scene in QGIS +# to see which number corresponds to seagrass, open water, land, submerged sand, turbid water, and NA +# these numbers can unforutnately change each time you run the code + +# from visual inspection, +# 0 is NA +# 1 is Open Water +# 2 is Seagrass +# 3 is Land +# 4 is Submerged Sand +# 5 is Turbid Water + +# make a string name to save the new classified image under +classified_scene = 'classified_'+ sat_file + +# open the classified satellite scene +Classification = os.path.join(wdir, "Classified_image", classified_scene + ".TIF") + +with rio.open(Classification) as src: + # Read the single band + band = src.read(1) + + # Get unique values and their counts + unique_values, counts = np.unique(band, return_counts=True) + print(unique_values) + + # Initialize variables for each class count + Unknown = 0 + Land = 0 + Turbid_water = 0 + Sand = 0 + Seagrass = 0 + Open_water = 0 + + # Assign counts to respective variables + # the order number can change + for value, count in zip(unique_values, counts): + if value == 0: + Unknown = count + if value == 1: + Open_water = count + elif value == 2: + Seagrass = count + elif value == 3: + Land = count + elif value == 4: + Sand = count + elif value == 5: + Turbid_water = count + + +# Print class counts +print("Unknown count:", Unknown) +print("Land count:", Land) +print("Turbid Water count:", Turbid_water) +print("Submerged Sand count:", Sand) +print("Seagrass count:", Seagrass) +print("Open Water count:", Open_water) + +square_meters_seagrass = Seagrass*9 +km2_seagrass = (square_meters_seagrass / 1000000) + +print("Seagrass Area in square kilometers:", km2_seagrass, "km²") + +"""# Optional: Apply Land Mask to the Classified Satellite Scene""" + +# Read the land shapefile +Land_shapefile = gpd.read_file(os.path.join(path, "Philippines shoreline shapefile", 'Philippines_boundary.shp')) + +# Plot the shapefile to visualize it +Land_shapefile.plot() +plt.title('National Shapefile') +plt.xlabel('Longitude') +plt.ylabel('Latitude') +plt.show() + +# Open the raster dataset using rioxarray +raster = rioxarray.open_rasterio(Classification) + +# Plot the raster +raster.plot() +plt.title('Classified Raster') +plt.show() + +# Check the CRS of both the raster and the shapefile +print("Raster CRS:", raster.rio.crs) +print("Shapefile CRS:", Land_shapefile.crs) + +# Mask the Satellite Scene using the land shapefile and save it as a GeoTIFF +masked_raster = raster.rio.clip(Land_shapefile.geometry.apply(mapping), crs=Land_shapefile.crs, invert=True) + +# Plot the masked raster +masked_raster.plot() +plt.title('Masked Classified Satellite Scene') +plt.show() + +# Specify the output file path +output_folder = os.path.join(wdir, "Classified_image") +# make a string name to save the new classified image with land mask +masked_scene = 'masked_'+ classified_scene +output_file = os.path.join(output_folder, masked_scene + '.TIF') + +# Save the masked raster as GeoTIFF +masked_raster.rio.to_raster(output_file) + +#Open and view the classified satellite scene with the land mask applied + +Masked_scene = os.path.join(wdir, "Classified_image", masked_scene + '.TIF') + +# Open the GeoTIFF file +with rio.open(Masked_scene) as src: + data = src.read(1) + cmap = ListedColormap(['gray', 'red', 'yellow','brown','green','blue']) # Modify colors as needed + bounds = [0, 1, 2, 3, 4, 5, 6 ] # Modify class boundaries as needed + + # Plot the data + plt.figure(figsize=(8, 8)) + plt.imshow(data, cmap=cmap, extent=[src.bounds.left, src.bounds.right, src.bounds.bottom, src.bounds.top]) + + # Add color legend + cb = plt.colorbar(ticks=bounds, boundaries=bounds) + cb.set_label('Classifications') + + plt.show() + +"""# Statistical Assesment with USAID Shapefile""" + +# Read in USAID reference data +usaid = gpd.read_file(os.path.join(path, "USAID Seagrass Shapefile", 'Philippines_Seagrass.shp')) + +# Print the attribute table +usaid.head() + +# Check the unique values in the column you want to use for classification +unique_values = usaid['id'].unique() +print(unique_values) + +# Plot the shapefile to visualize it +# Create a new figure and axis with the desired figsize +fig, ax = plt.subplots(figsize=(10, 8)) + +# Plot the shapefile with adjusted linewidth +usaid.plot(ax=ax, color = 'red') # Adjust the linewidth as needed + +plt.title('National Seagrass/Seaweed USAID Shapefile') +plt.xlabel('Longitude') +plt.ylabel('Latitude') +plt.show() + +# Read in satellite classification data + +# make a string name to save the new classified image under +classified_scene = 'classified_'+ sat_file + +# open the classified satellite scene +Classification = os.path.join(wdir, "Classified_image", classified_scene + ".TIF") + +#check EPSG code for shapefile and satellite image +with rio.open(Classification) as src: + sat_classification = src.read(1) # Read the first band, assuming it's the classified image + sat_classification_meta = src.meta + sat_classification_crs = src.crs + +print("Satellite CRS:", sat_classification_crs) +print("Shapefile CRS:", usaid.crs) + +# Plot the satellite classification raster + +# from visual inspection, +# 0 is NA +# 1 is Open Water +# 2 is Seagrass +# 3 is Land +# 4 is Submerged Sand +# 5 is Turbid Water + +plt.figure(figsize=(10, 10)) +plt.imshow(sat_classification, cmap='viridis') +cbar = plt.colorbar(label='Classifications', shrink=0.8) # Adjust shrink parameter +plt.show() + +# Reproject the shapefile to the same coordinate system as the satellite image and +# crop it to the extent of the satellite image +# and plot the shapefile on top of the satellite image + +from shapely.geometry import shape, box +from shapely.ops import unary_union + + +# Open the raster image +with rio.open(Classification) as src: + sat_classification = src.read(1) # Read the first band, assuming it's the classified image + # Mask the raster where the values are 0 + sat_classification_extent = [src.bounds.left, src.bounds.right, src.bounds.bottom, src.bounds.top] # Get the extent of the raster + sat_classification_crs = src.crs + +# Create a bounding box geometry from the raster bounds +bbox = box(*src.bounds) + +# Open the shapefile +usaid_shapefile_path = os.path.join(path, "USAID Seagrass Shapefile", 'Philippines_Seagrass.shp') +usaid_shapefile = gpd.read_file(usaid_shapefile_path) + +# Reproject the shapefile to match the CRS of the raster +usaid_shapefile = usaid_shapefile.to_crs(sat_classification_crs) + +# Crop the shapefile to the extent of the bounding box +usaid_cropped = gpd.overlay(usaid_shapefile, gpd.GeoDataFrame(geometry=[bbox], crs=usaid_shapefile.crs), how='intersection') + +# Display the first few rows to verify the retention of attributes +print(usaid_cropped.head()) + +# Plot the raster image +plt.figure(figsize=(10, 10)) +plt.imshow(sat_classification, extent=sat_classification_extent, cmap='viridis') +cbar = plt.colorbar(label='Classifications', shrink=0.7) # Adjust shrink parameter of colorbar +cbar.ax.yaxis.label.set_size(18) # Set font size of color bar label + +# Plot the cropped shapefile on top of the raster +usaid_cropped.plot(ax=plt.gca(), facecolor='none', edgecolor='red', linewidth=2) + +# Create custom legend handles and labels +legend_handles = [plt.Line2D([0], [0], marker='_', color='r', linewidth=1, label='USAID Shapefile')] +legend_labels = ['USAID Shapefile'] + +# Add legend +plt.legend(handles=legend_handles, labels=legend_labels) + +plt.show() + +print(usaid_cropped.columns) + +# View the usaid cropped shapefile to make sure it cropped properly and retained its attributes and ID +# Check the unique values in the 'id' column +unique_values = usaid_cropped['id'].unique() +print(unique_values) + +usaid_cropped.head() + +# Reclassify satellite results to match the categories used in the USAID data +# You'll need to define your reclassification rules based on the categories used in the USAID data +# For simplicity, let's assume the categories are "Land", "No_data", "Submerged_sand", "Deep_water", and "Seagrass" + +# from visual inspection, +# 0 is Unknown +# 1 is Seagrass +# 2 is Submerged sand +# 3 is Land +# 4 is Turbid water +# 5 is Corals +# 6 is Deep water + + +sat_reclass = np.copy(sat_classification) # get the classification numbers from the satellite scene + +# Convert the array to a signed integer type +sat_reclass = sat_reclass.astype(np.int16) + +# Set "Unknown", "Land", and "Turbid_water" to NaN +sat_reclass[(sat_reclass == 0) | (sat_reclass == 3)| (sat_reclass == 4)] = -9999 + +# Set "Submerged_sand," "Corals," and "Deep_water" to 100 +sat_reclass[(sat_reclass == 2) | (sat_reclass == 5) | (sat_reclass == 6)] = 100 + +# Set Seagrass to 200 +sat_reclass[sat_reclass == 1] = 200 + +# Rasterize the shapefile +# Define raster dimensions and resolution based on the classification raster +width = sat_classification_meta['width'] +height = sat_classification_meta['height'] +transform = sat_classification_meta['transform'] +dtype = sat_classification.dtype + +# Define the rasterized ID values (2 to 200, and all others to 100) +values = [(200 if id_val == 2 else 100) for id_val in usaid_cropped['id']] + +# Rasterize the GeoDataFrame into a new raster +usaid_rasterized = rasterize( + [(geom, val) for geom, val in zip(usaid_cropped.geometry, values)], + out_shape=(height, width), + transform=transform, + fill=100, + dtype=dtype, +) + +# Plot the rasterized result +plt.figure(figsize=(10, 10)) +plt.imshow(usaid_rasterized, cmap='viridis', extent=sat_classification_extent) +plt.colorbar(label='Rasterized Values', shrink=0.8) +plt.title('Rasterized USAID Result') +plt.show() + +# Look for unique values in usaid raster +unique_values = np.unique(usaid_rasterized) +print("Unique raster values:", unique_values) + +# Flatten arrays for confusion matrix calculation +sat_flat = sat_reclass.flatten() +usaid_cropped_rast_flat = usaid_rasterized.flatten() + +# Check the unique values in the USAID flatted shapefile raster -> should only be 100 and 200 +np.unique(usaid_cropped_rast_flat) + +# Check the unique values in the Satellite raster -> should only be -9999, 100, and 200 +np.unique(sat_flat) + +# Calculate confusion matrix + +# Define the actual predictions and true labels +# "True data" or "Field data" should come first, so that is the usaid_cropped_rast_flat data +# "Predicted data" comes second, that is from the satellite classification sat_flat +# true_labels = usaid_cropped_rast_flat +# predicted_labels = sat_flat + +# Create a confusion matrix +# Labels are specified to ensure the order in the confusion matrix +# For example, 200 represent "Seagrass" and 100 represent "Non-seagrass" + +cm = confusion_matrix(usaid_cropped_rast_flat, sat_flat, labels=[200, 100]) + +print(cm) + +from sklearn.metrics import confusion_matrix +from matplotlib.ticker import FuncFormatter + + +# Create a confusion matrix (assuming you have defined usaid_cropped_rast_flat and sat_flat) +cm = confusion_matrix(usaid_cropped_rast_flat, sat_flat, labels=[200, 100]) + +# Plot the confusion matrix as a heatmap +plt.figure(figsize=(8, 6)) +sns.heatmap(cm, cmap='Blues', annot=False, fmt='d', + xticklabels=['Seagrass', 'Non-Seagrass'], yticklabels=['Seagrass', 'Non-Seagrass'], + annot_kws={"fontsize": 18, "color": "red"}) + +# Add annotations manually +for i in range(len(cm)): + for j in range(len(cm)): + plt.text(j + 0.5, i + 0.5, '{:,}'.format(cm[i, j]), ha='center', va='center', color='red', fontsize=18) + +# Adjust the font size of the tick labels +plt.xticks(fontsize=14) +plt.yticks(fontsize=14) + + +plt.title('Agreement Matrix', fontsize=20 ) +plt.xlabel('PlanetScope Classification', fontsize=18) +plt.ylabel('USAID Reference Data', fontsize=18) + +plt.show() + +True_positive = cm[0,0] +True_positive + +False_positive = cm[1,0] +False_positive + +False_negative = cm[0,1] +False_negative + +True_negative = cm[1,1] +True_negative + +sensitivity = (True_positive/(True_positive + False_negative)) +print("Sesitivity = ", sensitivity) + +specificity = (True_negative/(True_negative + False_positive)) +print("Specificity = ", specificity) + +balanced_agreement = ((sensitivity+specificity)/2) +print("Balanced Agreement = ", balanced_agreement) + +print("Sesitivity = ", sensitivity) +print("Specificity = ", specificity) +print("Balanced Agreement = ", balanced_agreement) + +# """# Analyze Field Data""" + +# # Analyze field data +# field_data = os.path.join(path, "Field_data",'Philippines_seagrass_field_data.csv') + +# # Read the CSV file containing latitude and longitude data +# data = pd.read_csv(field_data) # Update with your file path +# data.head() + +# # make a string name to save the new classified image under +# classified_scene = 'classified_'+ sat_file + +# # open the classified satellite scene +# Classification = os.path.join(wdir, "Classified_image", classified_scene + ".TIF") + +# # Plot the Field Data on top of the satellite raster + +# # Open the raster image +# with rio.open(Classification) as src: +# sat_classification = src.read(1) # Read the first band of the classified image +# sat_classification_extent = src.bounds # Get the extent of the raster +# sat_classification_crs = src.crs +# print(sat_classification_crs) + +# # Get the CRS of the raster +# raster_crs = sat_classification_crs + + +# # Read the CSV file containing field data coordinates +# field_data_path = os.path.join(path, "Field_data", 'Philippines_seagrass_field_data.csv') +# field_data = pd.read_csv(field_data_path) # Update with your file path + +# # Convert the Latitude and Longitude columns to float +# field_data['Latitude'] = field_data['Latitude'].astype(float) +# field_data['Longitude'] = field_data['Longitude'].astype(float) + +# # Create a GeoDataFrame from the CSV data with Point geometries +# geometry = [Point(lon, lat) for lon, lat in zip(field_data['Longitude'], field_data['Latitude'])] +# field_gdf = gpd.GeoDataFrame(field_data, geometry=geometry, crs='EPSG:4326') # Assuming WGS84 lat/lon coordinates + +# # Reproject the GeoDataFrame to the CRS of the raster +# field_gdf = field_gdf.to_crs(raster_crs) + +# # Clip the field data points to the spatial extent of the raster +# field_subset = field_gdf[field_gdf.geometry.within(box(*src.bounds))] + +# # Plot the raster image +# plt.figure(figsize=(10, 10)) +# plt.imshow(sat_classification, extent=[sat_classification_extent.left, +# sat_classification_extent.right, +# sat_classification_extent.bottom, +# sat_classification_extent.top], cmap='viridis') +# cbar = plt.colorbar(label='Classifications', shrink=0.8) # Adjust shrink parameter of colorbar +# cbar.ax.yaxis.label.set_size(18) # Set font size of color bar label + +# plt.title('Satellite Classification', fontsize=20) +# plt.xlabel('X Coordinate', fontsize=14) +# plt.ylabel('Y Coordinate', fontsize=14) + +# # Plot the clipped field data points on top of the raster image +# field_subset.plot(ax=plt.gca(), color='red', markersize=5, label='Field Data', legend=True) +# plt.legend() + +# plt.show() + +# # view the clipped field data +# field_subset.head() + +# # Read the CRS from the vector +# vector_crs = field_subset.crs +# vector_crs + +# # Check if the CRS of field sample points and satellite raster are the same +# if sat_classification_crs == vector_crs: +# print("CRS match.") +# else: +# print("CRS do not match.") + +# # Look at all the unique measurement types in the field data under the column "Measurement" +# # so you can decide how you want to subset the data for further anaylyses +# unique_measurements = field_subset['Measurement'].unique() +# unique_measurements + +# #Select the Measurement type that you are interested in +# #here we are interested in field measrements of percent cover +# percent_cover = field_subset[field_subset['Measurement'] == 'Percent cover'] +# percent_cover + +# # Extract the satellite classification data +# # Create an empty list to store the extracted pixel values +# extracted_pixel_values = [] + +# # Iterate over the clipped field data points +# for index, row in percent_cover.iterrows(): +# # Get the coordinates of the field data point +# lon, lat = row['geometry'].x, row['geometry'].y + +# # Convert the coordinates to pixel coordinates +# col, row = src.index(lon, lat) + +# # Extract the pixel value from the raster +# Classification_pixel_value = sat_classification[col, row] + +# # Append the extracted pixel value to the list +# extracted_pixel_values.append(Classification_pixel_value) + +# # Add the extracted pixel values to the clipped field data DataFrame +# percent_cover.loc[:, 'Classification_Pixel_Values'] = extracted_pixel_values + +# # Display the DataFrame +# percent_cover.head() + +# # Compare the Field Data percent cover to the classification +# # you will need to remember the classification numbers and what categories they correspond to + +# # from visual inspection, +# # 0 is NA +# # 1 is Open Water +# # 2 is Seagrass +# # 3 is Land +# # 4 is Submerged Sand +# # 5 is Turbid Water + +# # Print the total number of rows +# total_rows = percent_cover.shape[0] +# print("Total number of samples:", total_rows) + +# #Print the total number of Seagrass pixel samples +# count_of_seagrass = (percent_cover['Classification_Pixel_Values'] == 2).sum() +# print("Total number of Seagrass:", count_of_seagrass) + +# #Print the total number of Land pixel samples +# count_of_land = (percent_cover['Classification_Pixel_Values'] == 3).sum() +# print("Total number of Land:", count_of_land) + +# #Print the total number of Open Water pixel samples +# count_of_open_water = (percent_cover['Classification_Pixel_Values'] == 1).sum() +# print("Total number of Open Water:", count_of_open_water) + +# #Print the total number of Turbid Water pixel samples +# count_of_turbid = (percent_cover['Classification_Pixel_Values'] == 5).sum() +# print("Total number of Turbid:", count_of_turbid) + +# #Print the total number of Sand pixel samples +# count_of_sand = (percent_cover['Classification_Pixel_Values'] == 4).sum() +# print("Total number of Sand:", count_of_sand) + +# total_compare = total_rows-count_of_turbid +# Accurate_seagrass = (count_of_seagrass/total_compare)*100 +# print("Classification vs Field Data Overlap =", f"{Accurate_seagrass:.2f}%") + +# #Can compare satellite classification to field data of above ground biomass if interested +# above_ground_biomass = field_subset[field_subset['Measurement'] == 'Aboveground biomass'] +# above_ground_biomass + +# # Remove any "NA" rows, such as Turbid water, human-made objects, aquaculture, if those are categories in your classification + +# # from visual inspection, +# # 0 is NA +# # 1 is Open Water +# # 2 is Seagrass +# # 3 is Land +# # 4 is Submerged Sand +# # 5 is Turbid Water + +# # remove Turbid water which is classified as 3 in this case +# seagrass_compare = percent_cover[percent_cover["Classification_Pixel_Values"]!=5] + +# #get number of rows and columns in new dataframe +# print(seagrass_compare.shape) + +# #visually inspect new dataframe +# seagrass_compare + +# # from visual inspection, +# # 0 is NA +# # 1 is Open Water +# # 2 is Seagrass +# # 3 is Land +# # 4 is Submerged Sand +# # 5 is Turbid Water + +# #Convert Seagrass pixels to "200" +# seagrass_compare.loc[seagrass_compare['Classification_Pixel_Values'] == 2, 'Classification_Pixel_Values'] = 200 + +# #Convert all non-seagrass pixels to "100" +# seagrass_compare.loc[seagrass_compare['Classification_Pixel_Values'] == 1, 'Classification_Pixel_Values'] = 100 +# seagrass_compare.loc[seagrass_compare['Classification_Pixel_Values'] == 3, 'Classification_Pixel_Values'] = 100 +# seagrass_compare.loc[seagrass_compare['Classification_Pixel_Values'] == 4, 'Classification_Pixel_Values'] = 100 + +# # check unique values, all values should be either 200 for seagrass or 100 for non-seagrass +# print(seagrass_compare['Classification_Pixel_Values'].unique()) + +# #visually inspect +# seagrass_compare + +# #make a boxplot to see how the satellite classification compared to the Field Data %Cover + +# # Convert 'Value' column to numeric type +# seagrass_compare['Value'] = pd.to_numeric(seagrass_compare['Value'], errors='coerce') + +# # Create boxplots +# plt.figure(figsize=(10, 10)) +# seagrass_compare.boxplot(column='Value', by='Classification_Pixel_Values', widths=0.5, grid=False) + +# plt.title("Satellite Seagrass Classification vs Field Data", fontsize = 18) +# plt.xlabel("Satellite Classification", fontsize = 16) +# plt.ylabel("Field Data Seagrass % Cover", fontsize = 16) +# plt.xticks([1, 2], labels=["No seagrass", "Seagrass"], rotation=0, fontsize = 14) +# plt.tight_layout() +# plt.show() + +# from scipy.stats import mannwhitneyu + +# # Compare two samples - Sample 1 = non-seagrass, Sample 2 = seagrass - see if their distributions are the same +# non_seagrass = seagrass_compare[seagrass_compare['Classification_Pixel_Values'] == 100]['Value'] +# seagrass = seagrass_compare[seagrass_compare['Classification_Pixel_Values'] == 200]['Value'] + +# # Perform Mann-Whitney U test +# # we hypothesize non-seagrass %cover distribution will be less than seagrass +# statistic, p_value = mannwhitneyu(non_seagrass, seagrass, alternative='less') + +# # Print the results +# print("Mann-Whitney U statistic:", statistic) +# print("p-value:", p_value) +