BritishGeologicalSurvey · m-haines · Apr 12, 2023
diff --git a/README.md b/README.md
@@ -58,6 +58,64 @@ The final output from the images themselves looks like what is below- with varyi
 ![Example of fully processed image](Images/S00128821.Cropped_Top_2_Countoured.png)
 
 
+## CoreScore Application Flowchart
+```mermaid
+flowchart TD
+id_1[("Large image store ")]
+id_2[("Unlabelled images")]
+id_3[("Labelled images")]
+id_4_1["Create masks for training <br>
+make_masks.py, using masks.py"]
+id_4_2["tests.py <br> 
+Run trained model on test data"]
+id_4[" local_train.py <br>
+Train ML model (unet Resnet54 CNN)"]
+id_5["Image Process request"]
+id_6["Trained ML model"]
+id_7["ML output API"]
+id_8["Client side"]
+id_9["Image labelling tool"]
+id_10["ML manual checks <br>
+accuracy, IoU, check against <br> previous test results <br> using
+get_metrics.py, get_test_accuracy.py"]
+id_11["Download results"]
+id_12["Results feedback"]
+
+subgraph data_store["Large Image Store"]
+id_1 --> id_2
+id_1 --> id_3
+end
+
+subgraph ml_model ["Production ML Model"]
+id_6
+end
+
+subgraph im_lab ["Image Labelling"]
+id_2 --> id_9
+id_9 --> id_3
+end
+
+subgraph ml_train [Train / Update Machine Learning model]
+id_3 -->|All labelled images| id_4_1
+id_4_1 --> id_4
+id_4 --> id_4_2
+id_4_2 --> id_10
+id_10 -->|Checks pass <br> Updated model| id_6
+id_10 -->|Checks fail <br> New model retrain| id_4
+end
+
+subgraph cs_app [CoreScore Application]
+id_8 -->id_5
+id_5 -->|large image API| id_1
+id_1 -->|downsampled image| id_6
+id_6 --> id_7
+id_7 -->|Output to client side| id_8
+id_8 --> id_11
+id_8 --> id_12
+end
+
+```
+
 ## Contributors
 
  * Zayad Al Zaher

diff --git a/corescore/mlflowregistry.py b/corescore/mlflowregistry.py
@@ -47,26 +47,26 @@ def _find_model(self, name=None, version=None):
             Return model's path """
         filter_str = f"name='{name}'"
         if version:
-             models = self.search_model_versions(filter_string=filter_str)
-             if not models:
-                 raise MlflowRegistryError(f'Model named {name} does not exist')
-             model = list(filter(lambda model: model.version == version,  models))
-             try:
-                 model_path = os.path.join(model[0].source, 'model')
-             except IndexError:
-                raise MlflowRegistryError((f'Model named {name},'
-                                           f'version {version} does not exist')) from None
+            models = self.search_model_versions(filter_string=filter_str)
+            if not models:
+                raise MlflowRegistryError(f'Model named {name} does not exist')
+            model = list(filter(lambda model: model.version == version,  models))
+            try:
+                model_path = os.path.join(model[0].source, 'model')
+            except IndexError:
+               raise MlflowRegistryError((f'Model named {name},'
+                                          f'version {version} does not exist')) from None
         else:
             try:
-               models = self.list_models()
-               latest = list(filter(lambda model: model.name == name, models))[0]  
+                models = self.list_models()
+                latest = list(filter(lambda model: model.name == name, models))[0]
             except IndexError:
-               raise MlflowRegistryError(f'Model named {name} does not exist') from None
+                raise MlflowRegistryError(f'Model named {name} does not exist') from None
             model_path = os.path.join(latest.latest_versions[0].source, 'model')
         return model_path
-	
+
     def load_model(self, name=None, version=None):
-         """ Load registered model based on supplied arguments """
-         model_path = self._find_model(name=name, version=version)
-         return mlflow.pyfunc.load_model(model_path)
-         
+        """ Load registered model based on supplied arguments """
+        model_path = self._find_model(name=name, version=version)
+
+        return mlflow.pyfunc.load_model(model_path)
diff --git a/corescore/models.py b/corescore/models.py
@@ -3,6 +3,8 @@
 from functools import partial
 
 import numpy as np
+import os
+os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
 import mlflow
 import mlflow.fastai
 from fastai.vision import models
@@ -12,7 +14,7 @@
 from fastai.vision.image import open_mask
 from fastai.vision.data import SegmentationItemList
 from fastai.callbacks import *
-from fastai.vision.all import *
+# from fastai.vision.all import *
 
 from corescore.masks import LABELS
 
@@ -89,7 +91,7 @@ def fit(self, learner, lr=5.20E-05):
                               slice(lr),
                               pct_start=self.pct_start)
 
-        learner.save('/home/ahall/CoreScore/tmp')
+        learner.save('C:/Users/mathai/Documents/Gitlab_Projects/CoreScore/tmp')
 
     def get_y_fn(self, x):
         """Return a file path to a mask given an image path"""

diff --git a/make_masks_fast.py b/make_masks_fast.py
@@ -14,15 +14,10 @@
 def process_images(image_dir, label):
     """Create masks for labelled images.
     For now, merge rock fragment labels from both core boxes"""
-
-
-
     coreProcessor = CoreImageProcessor("Images",
                                        labels=label,
                                        merge_fragment_labels=True)
-
-
-
+
     image = coreProcessor.core_types
     mask_file = coreProcessor.processImage(image)
 
@@ -40,7 +35,6 @@ def process_images(image_dir, label):
     if not labels_dir:
         labels_dir = 'train_labels'
 
-
     for f in os.listdir(labels_dir):
 
         process_images(images, os.path.join(labels_dir,f))
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,11 @@
 fastapi
+fastai
 numpy
 mlflow
-opencv
+opencv-python
+pandas
 pillow
+pydantic
 scipy
 scikit-image
+torchvision
diff --git a/scripts/get_metrics.py b/scripts/get_metrics.py
@@ -11,14 +11,14 @@
 
 RESULTS_DIR = '../Images/results/test_predictions'
 
-df = pd.DataFrame(columns = ['filename'] + parameter_extraction.CORE_PARAMETERS)
+df = pd.DataFrame(columns=['filename'] + parameter_extraction.CORE_PARAMETERS)
 
 for f in os.listdir(RESULTS_DIR):
 
-    im = parameter_extraction.Image(os.path.join(RESULTS_DIR,f), 64, 100000000)
+    im = parameter_extraction.Image(os.path.join(RESULTS_DIR, f), 64, 100000000)
     params = im.parameters()
-    row = pd.DataFrame(columns = ['filename'] + parameter_extraction.CORE_PARAMETERS, data=[params])
+    row = pd.DataFrame(columns=['filename'] + parameter_extraction.CORE_PARAMETERS, data=[params])
 
     df = df.append(row)
 
-df.to_csv('../Images/results/test_parameters.csv')
+df.to_csv('../Images/results/test_parameters.csv')
diff --git a/scripts/get_test_accuracy.py b/scripts/get_test_accuracy.py
@@ -5,6 +5,7 @@
 @author: Alex
 """
 
+# TODO, needs to be made PEP8 compliant
 
 import cv2
 
@@ -60,15 +61,15 @@
 
     pred = image.imread(os.path.join(PREDICTIONS_DIR, f_name + '.bmp'))
     original = image.imread(os.path.join(TEST_DIR, f_name + '.jpg'))
-    #upscale prediction to match original
+    # upscale prediction to match original
     pred = cv2.resize(pred, dsize = (np.shape(original)[1], np.shape(original)[0]), interpolation = cv2.INTER_NEAREST)
 
     if(INFER_ONLY == False):
         mask = image.imread(os.path.join(MASK_DIR, f_name + '.png'))
 
-        #resize prediction to the mask - there can sometimes be a minor disparity between the mask and original
+        # resize prediction to the mask - there can sometimes be a minor disparity between the mask and original
         pred = cv2.resize(pred, dsize = (np.shape(mask)[1], np.shape(mask)[0]), interpolation = cv2.INTER_NEAREST)
-        #convert mask to integers
+        # convert mask to integers
         mask=mask*255
         mask=mask.astype(int)
 

diff --git a/scripts/local_train.py b/scripts/local_train.py
@@ -1,20 +1,16 @@
-
 import argparse
-from time import sleep
 import os
-
 from corescore.models import CoreModel
-#from corescore.mlflowregistry import MlflowRegistry
-#import mlflow
-
-
 import warnings
 warnings.filterwarnings("ignore", category=UserWarning, module="torch.nn.functional")
 
+test = os.getcwd()
 
 def train(epochs=2, lr=0.00001, resize=8, batch_size=1, path=os.getcwd()):
-#    mlflow.fastai.autolog()
-#    mlflow.set_tag('model', 'corescore')
+    #    mlflow.fastai.autolog()
+    #    mlflow.set_tag('model', 'corescore')
+
+    # test = os.getcwd()
     coremodel = CoreModel(path, epochs=epochs, batch_size=batch_size)
     unet_learn = coremodel.learner(resize=resize)
     coremodel.fit(lr=lr, learner=unet_learn)
@@ -46,9 +42,8 @@ def train(epochs=2, lr=0.00001, resize=8, batch_size=1, path=os.getcwd()):
 
     # Register the model
     # Picks up MLFLOW_TRACKING_URI from environment.
-#    MlflowRegistry().register_model("tags.model = 'corescore'",
-#                                    name="corescore")
+    #    MlflowRegistry().register_model("tags.model = 'corescore'",
+    #                                    name="corescore")
 
     # Long sleep to ensure model version is created
-   # sleep(300)
-
+    # sleep(300)
diff --git a/scripts/make_masks.py b/scripts/make_masks.py
@@ -7,8 +7,8 @@ def process_images(image_dir, labels):
     """Create masks for labelled images.
     For now, merge rock fragment labels from both core boxes"""
     coreProcessor = CoreImageProcessor("Images",
-                                        labels=labels,
-                                        merge_fragment_labels=True)
+                                       labels=labels,
+                                       merge_fragment_labels=True)
     for image in coreProcessor.core_types:
         mask_file = coreProcessor.processImage(image)
 

diff --git a/scripts/test.py b/scripts/test.py
@@ -5,15 +5,15 @@
 @author: Alex
 """
 
-from corescore import load_model
+from corescore.api import load_model
 from fastai.vision import *
-import os
+
 
 TEST_DIR = './Images/test'
 MODEL_NAME = 'tmp'
 PRED_DIR = './Images/test_predictions'
 
-unet = load_model.load_corescore_model(MODEL_NAME)
+unet = load_model.load_corescore_model(MODEL_NAME)  # TODO, does not work, load_model changed and has not been updated
 
 
 if __name__ == "_main__":
@@ -23,4 +23,4 @@
         img = open_image(os.path.join(TEST_DIR, im))
         pred = unet.predict(img)[0]
         pred.show()
-        plt.savefig(os.path.join(PRED_DIR, im)
+        plt.savefig(os.path.join(PRED_DIR, im))
diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
     long_description = fh.read()
 
 setuptools.setup(
-    name="corescore", # Replace with your own username
+    name="corescore",
     version="0.0.2",
     author="Zayad AlZaher, Jo Walsh",
     author_email="jowalsh@bgs.ac.uk",
@@ -20,4 +20,3 @@
     ],
     python_requires='>=3.6',
 )
-