From 028d35919481dc1f51e2934d827393e5bc10cb7b Mon Sep 17 00:00:00 2001
From: mathai <mathai@bgs.ac.uk>
Date: Wed, 12 Apr 2023 15:25:08 +0100
Subject: [PATCH] Created flowchart for rough process flow for CoreScore
 application, and updates to the ML model for the application.

Made some files PEP8 compliant, added more Python libraries to requirements.txt

Runs on Windows 10 up to test.py, this needs refactoring to work with new version of load_model.
---
 README.md                    | 58 ++++++++++++++++++++++++++++++++++++
 corescore/mlflowregistry.py  | 34 ++++++++++-----------
 corescore/models.py          |  6 ++--
 make_masks_fast.py           |  8 +----
 requirements.txt             |  6 +++-
 scripts/get_metrics.py       |  8 ++---
 scripts/get_test_accuracy.py |  7 +++--
 scripts/local_train.py       | 21 +++++--------
 scripts/make_masks.py        |  4 +--
 scripts/test.py              |  8 ++---
 setup.py                     |  3 +-
 11 files changed, 108 insertions(+), 55 deletions(-)
diff --git a/README.md b/README.md
index 2efd254..89f9dce 100644
--- a/README.md
+++ b/README.md
@@ -58,6 +58,64 @@ The final output from the images themselves looks like what is below- with varyi
 ![Example of fully processed image](Images/S00128821.Cropped_Top_2_Countoured.png)
 
 
+## CoreScore Application Flowchart
+```mermaid
+flowchart TD
+id_1[("Large image store ")]
+id_2[("Unlabelled images")]
+id_3[("Labelled images")]
+id_4_1["Create masks for training <br>
+make_masks.py, using masks.py"]
+id_4_2["tests.py <br> 
+Run trained model on test data"]
+id_4[" local_train.py <br>
+Train ML model (unet Resnet54 CNN)"]
+id_5["Image Process request"]
+id_6["Trained ML model"]
+id_7["ML output API"]
+id_8["Client side"]
+id_9["Image labelling tool"]
+id_10["ML manual checks <br>
+accuracy, IoU, check against <br> previous test results <br> using
+get_metrics.py, get_test_accuracy.py"]
+id_11["Download results"]
+id_12["Results feedback"]
+
+subgraph data_store["Large Image Store"]
+id_1 --> id_2
+id_1 --> id_3
+end
+
+subgraph ml_model ["Production ML Model"]
+id_6
+end
+
+subgraph im_lab ["Image Labelling"]
+id_2 --> id_9
+id_9 --> id_3
+end
+
+subgraph ml_train [Train / Update Machine Learning model]
+id_3 -->|All labelled images| id_4_1
+id_4_1 --> id_4
+id_4 --> id_4_2
+id_4_2 --> id_10
+id_10 -->|Checks pass <br> Updated model| id_6
+id_10 -->|Checks fail <br> New model retrain| id_4
+end
+
+subgraph cs_app [CoreScore Application]
+id_8 -->id_5
+id_5 -->|large image API| id_1
+id_1 -->|downsampled image| id_6
+id_6 --> id_7
+id_7 -->|Output to client side| id_8
+id_8 --> id_11
+id_8 --> id_12
+end
+
+```
+
 ## Contributors
 
  * Zayad Al Zaher
diff --git a/corescore/mlflowregistry.py b/corescore/mlflowregistry.py
index e62be86..d9df8d3 100644
--- a/corescore/mlflowregistry.py
+++ b/corescore/mlflowregistry.py
@@ -47,26 +47,26 @@ def _find_model(self, name=None, version=None):
             Return model's path """
         filter_str = f"name='{name}'"
         if version:
-             models = self.search_model_versions(filter_string=filter_str)
-             if not models:
-                 raise MlflowRegistryError(f'Model named {name} does not exist')
-             model = list(filter(lambda model: model.version == version,  models))
-             try:
-                 model_path = os.path.join(model[0].source, 'model')
-             except IndexError:
-                raise MlflowRegistryError((f'Model named {name},'
-                                           f'version {version} does not exist')) from None
+            models = self.search_model_versions(filter_string=filter_str)
+            if not models:
+                raise MlflowRegistryError(f'Model named {name} does not exist')
+            model = list(filter(lambda model: model.version == version,  models))
+            try:
+                model_path = os.path.join(model[0].source, 'model')
+            except IndexError:
+               raise MlflowRegistryError((f'Model named {name},'
+                                          f'version {version} does not exist')) from None
         else:
             try:
-               models = self.list_models()
-               latest = list(filter(lambda model: model.name == name, models))[0]  
+                models = self.list_models()
+                latest = list(filter(lambda model: model.name == name, models))[0]
             except IndexError:
-               raise MlflowRegistryError(f'Model named {name} does not exist') from None
+                raise MlflowRegistryError(f'Model named {name} does not exist') from None
             model_path = os.path.join(latest.latest_versions[0].source, 'model')
         return model_path
-	
+
     def load_model(self, name=None, version=None):
-         """ Load registered model based on supplied arguments """
-         model_path = self._find_model(name=name, version=version)
-         return mlflow.pyfunc.load_model(model_path)
-         
+        """ Load registered model based on supplied arguments """
+        model_path = self._find_model(name=name, version=version)
+
+        return mlflow.pyfunc.load_model(model_path)
diff --git a/corescore/models.py b/corescore/models.py
index 857b7b3..56708dc 100644
--- a/corescore/models.py
+++ b/corescore/models.py
@@ -3,6 +3,8 @@
 from functools import partial
 
 import numpy as np
+import os
+os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
 import mlflow
 import mlflow.fastai
 from fastai.vision import models
@@ -12,7 +14,7 @@
 from fastai.vision.image import open_mask
 from fastai.vision.data import SegmentationItemList
 from fastai.callbacks import *
-from fastai.vision.all import *
+# from fastai.vision.all import *
 
 from corescore.masks import LABELS
 
@@ -89,7 +91,7 @@ def fit(self, learner, lr=5.20E-05):
                               slice(lr),
                               pct_start=self.pct_start)
        
-        learner.save('/home/ahall/CoreScore/tmp')
+        learner.save('C:/Users/mathai/Documents/Gitlab_Projects/CoreScore/tmp')
 
     def get_y_fn(self, x):
         """Return a file path to a mask given an image path"""
diff --git a/make_masks_fast.py b/make_masks_fast.py
index 10a3138..4eeddb6 100644
--- a/make_masks_fast.py
+++ b/make_masks_fast.py
@@ -14,15 +14,10 @@
 def process_images(image_dir, label):
     """Create masks for labelled images.
     For now, merge rock fragment labels from both core boxes"""
-    
-    
-        
     coreProcessor = CoreImageProcessor("Images",
                                        labels=label,
                                        merge_fragment_labels=True)
-        
-        
-        
+
     image = coreProcessor.core_types
     mask_file = coreProcessor.processImage(image)
 
@@ -40,7 +35,6 @@ def process_images(image_dir, label):
     if not labels_dir:
         labels_dir = 'train_labels'
     
-    
     for f in os.listdir(labels_dir):
         
         process_images(images, os.path.join(labels_dir,f))
diff --git a/requirements.txt b/requirements.txt
index 99e0d16..c9127fb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,11 @@
 fastapi
+fastai
 numpy
 mlflow
-opencv
+opencv-python
+pandas
 pillow
+pydantic
 scipy
 scikit-image
+torchvision
diff --git a/scripts/get_metrics.py b/scripts/get_metrics.py
index 49cb098..cb4e5f3 100644
--- a/scripts/get_metrics.py
+++ b/scripts/get_metrics.py
@@ -11,14 +11,14 @@
 
 RESULTS_DIR = '../Images/results/test_predictions'
 
-df = pd.DataFrame(columns = ['filename'] + parameter_extraction.CORE_PARAMETERS)
+df = pd.DataFrame(columns=['filename'] + parameter_extraction.CORE_PARAMETERS)
 
 for f in os.listdir(RESULTS_DIR):
 
-    im = parameter_extraction.Image(os.path.join(RESULTS_DIR,f), 64, 100000000)
+    im = parameter_extraction.Image(os.path.join(RESULTS_DIR, f), 64, 100000000)
     params = im.parameters()
-    row = pd.DataFrame(columns = ['filename'] + parameter_extraction.CORE_PARAMETERS, data=[params])
+    row = pd.DataFrame(columns=['filename'] + parameter_extraction.CORE_PARAMETERS, data=[params])
     
     df = df.append(row)
     
-df.to_csv('../Images/results/test_parameters.csv')
\ No newline at end of file
+df.to_csv('../Images/results/test_parameters.csv')
diff --git a/scripts/get_test_accuracy.py b/scripts/get_test_accuracy.py
index 000ff69..ef547ca 100644
--- a/scripts/get_test_accuracy.py
+++ b/scripts/get_test_accuracy.py
@@ -5,6 +5,7 @@
 @author: Alex
 """
 
+# TODO, needs to be made PEP8 compliant
 
 import cv2
 
@@ -60,15 +61,15 @@
     
     pred = image.imread(os.path.join(PREDICTIONS_DIR, f_name + '.bmp'))
     original = image.imread(os.path.join(TEST_DIR, f_name + '.jpg'))
-    #upscale prediction to match original
+    # upscale prediction to match original
     pred = cv2.resize(pred, dsize = (np.shape(original)[1], np.shape(original)[0]), interpolation = cv2.INTER_NEAREST)
     
     if(INFER_ONLY == False):
         mask = image.imread(os.path.join(MASK_DIR, f_name + '.png'))
         
-        #resize prediction to the mask - there can sometimes be a minor disparity between the mask and original
+        # resize prediction to the mask - there can sometimes be a minor disparity between the mask and original
         pred = cv2.resize(pred, dsize = (np.shape(mask)[1], np.shape(mask)[0]), interpolation = cv2.INTER_NEAREST)
-        #convert mask to integers
+        # convert mask to integers
         mask=mask*255
         mask=mask.astype(int)
         
diff --git a/scripts/local_train.py b/scripts/local_train.py
index 4cb341b..fc14051 100644
--- a/scripts/local_train.py
+++ b/scripts/local_train.py
@@ -1,20 +1,16 @@
-
 import argparse
-from time import sleep
 import os
-
 from corescore.models import CoreModel
-#from corescore.mlflowregistry import MlflowRegistry
-#import mlflow
-
-
 import warnings
 warnings.filterwarnings("ignore", category=UserWarning, module="torch.nn.functional")
 
+test = os.getcwd()
 
 def train(epochs=2, lr=0.00001, resize=8, batch_size=1, path=os.getcwd()):
-#    mlflow.fastai.autolog()
-#    mlflow.set_tag('model', 'corescore')
+    #    mlflow.fastai.autolog()
+    #    mlflow.set_tag('model', 'corescore')
+
+    # test = os.getcwd()
     coremodel = CoreModel(path, epochs=epochs, batch_size=batch_size)
     unet_learn = coremodel.learner(resize=resize)
     coremodel.fit(lr=lr, learner=unet_learn)
@@ -46,9 +42,8 @@ def train(epochs=2, lr=0.00001, resize=8, batch_size=1, path=os.getcwd()):
 
     # Register the model
     # Picks up MLFLOW_TRACKING_URI from environment.
-#    MlflowRegistry().register_model("tags.model = 'corescore'",
-#                                    name="corescore")
+    #    MlflowRegistry().register_model("tags.model = 'corescore'",
+    #                                    name="corescore")
     
     # Long sleep to ensure model version is created
-   # sleep(300)
-
+    # sleep(300)
diff --git a/scripts/make_masks.py b/scripts/make_masks.py
index f988bd0..5078231 100644
--- a/scripts/make_masks.py
+++ b/scripts/make_masks.py
@@ -7,8 +7,8 @@ def process_images(image_dir, labels):
     """Create masks for labelled images.
     For now, merge rock fragment labels from both core boxes"""
     coreProcessor = CoreImageProcessor("Images",
-                                        labels=labels,
-                                        merge_fragment_labels=True)
+                                       labels=labels,
+                                       merge_fragment_labels=True)
     for image in coreProcessor.core_types:
         mask_file = coreProcessor.processImage(image)
 
diff --git a/scripts/test.py b/scripts/test.py
index 99f302b..7d3d38d 100644
--- a/scripts/test.py
+++ b/scripts/test.py
@@ -5,15 +5,15 @@
 @author: Alex
 """
 
-from corescore import load_model
+from corescore.api import load_model
 from fastai.vision import *
-import os
+
 
 TEST_DIR = './Images/test'
 MODEL_NAME = 'tmp'
 PRED_DIR = './Images/test_predictions'
 
-unet = load_model.load_corescore_model(MODEL_NAME)
+unet = load_model.load_corescore_model(MODEL_NAME)  # TODO, does not work, load_model changed and has not been updated
 
 
 if __name__ == "_main__":
@@ -23,4 +23,4 @@
         img = open_image(os.path.join(TEST_DIR, im))
         pred = unet.predict(img)[0]
         pred.show()
-        plt.savefig(os.path.join(PRED_DIR, im)
+        plt.savefig(os.path.join(PRED_DIR, im))
diff --git a/setup.py b/setup.py
index 8723ff7..8a75860 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@
     long_description = fh.read()
 
 setuptools.setup(
-    name="corescore", # Replace with your own username
+    name="corescore",
     version="0.0.2",
     author="Zayad AlZaher, Jo Walsh",
     author_email="jowalsh@bgs.ac.uk",
@@ -20,4 +20,3 @@
     ],
     python_requires='>=3.6',
 )
-