diff --git a/README.md b/README.md
index 2efd254..89f9dce 100644
--- a/README.md
+++ b/README.md
@@ -58,6 +58,64 @@ The final output from the images themselves looks like what is below- with varyi

+## CoreScore Application Flowchart
+```mermaid
+flowchart TD
+id_1[("Large image store ")]
+id_2[("Unlabelled images")]
+id_3[("Labelled images")]
+id_4_1["Create masks for training
+make_masks.py, using masks.py"]
+id_4_2["tests.py
+Run trained model on test data"]
+id_4[" local_train.py
+Train ML model (unet Resnet54 CNN)"]
+id_5["Image Process request"]
+id_6["Trained ML model"]
+id_7["ML output API"]
+id_8["Client side"]
+id_9["Image labelling tool"]
+id_10["ML manual checks
+accuracy, IoU, check against
previous test results
using
+get_metrics.py, get_test_accuracy.py"]
+id_11["Download results"]
+id_12["Results feedback"]
+
+subgraph data_store["Large Image Store"]
+id_1 --> id_2
+id_1 --> id_3
+end
+
+subgraph ml_model ["Production ML Model"]
+id_6
+end
+
+subgraph im_lab ["Image Labelling"]
+id_2 --> id_9
+id_9 --> id_3
+end
+
+subgraph ml_train [Train / Update Machine Learning model]
+id_3 -->|All labelled images| id_4_1
+id_4_1 --> id_4
+id_4 --> id_4_2
+id_4_2 --> id_10
+id_10 -->|Checks pass
Updated model| id_6
+id_10 -->|Checks fail
New model retrain| id_4
+end
+
+subgraph cs_app [CoreScore Application]
+id_8 -->id_5
+id_5 -->|large image API| id_1
+id_1 -->|downsampled image| id_6
+id_6 --> id_7
+id_7 -->|Output to client side| id_8
+id_8 --> id_11
+id_8 --> id_12
+end
+
+```
+
## Contributors
* Zayad Al Zaher
diff --git a/corescore/mlflowregistry.py b/corescore/mlflowregistry.py
index e62be86..d9df8d3 100644
--- a/corescore/mlflowregistry.py
+++ b/corescore/mlflowregistry.py
@@ -47,26 +47,26 @@ def _find_model(self, name=None, version=None):
Return model's path """
filter_str = f"name='{name}'"
if version:
- models = self.search_model_versions(filter_string=filter_str)
- if not models:
- raise MlflowRegistryError(f'Model named {name} does not exist')
- model = list(filter(lambda model: model.version == version, models))
- try:
- model_path = os.path.join(model[0].source, 'model')
- except IndexError:
- raise MlflowRegistryError((f'Model named {name},'
- f'version {version} does not exist')) from None
+ models = self.search_model_versions(filter_string=filter_str)
+ if not models:
+ raise MlflowRegistryError(f'Model named {name} does not exist')
+ model = list(filter(lambda model: model.version == version, models))
+ try:
+ model_path = os.path.join(model[0].source, 'model')
+ except IndexError:
+ raise MlflowRegistryError((f'Model named {name},'
+ f'version {version} does not exist')) from None
else:
try:
- models = self.list_models()
- latest = list(filter(lambda model: model.name == name, models))[0]
+ models = self.list_models()
+ latest = list(filter(lambda model: model.name == name, models))[0]
except IndexError:
- raise MlflowRegistryError(f'Model named {name} does not exist') from None
+ raise MlflowRegistryError(f'Model named {name} does not exist') from None
model_path = os.path.join(latest.latest_versions[0].source, 'model')
return model_path
-
+
def load_model(self, name=None, version=None):
- """ Load registered model based on supplied arguments """
- model_path = self._find_model(name=name, version=version)
- return mlflow.pyfunc.load_model(model_path)
-
+ """ Load registered model based on supplied arguments """
+ model_path = self._find_model(name=name, version=version)
+
+ return mlflow.pyfunc.load_model(model_path)
diff --git a/corescore/models.py b/corescore/models.py
index 857b7b3..56708dc 100644
--- a/corescore/models.py
+++ b/corescore/models.py
@@ -3,6 +3,8 @@
from functools import partial
import numpy as np
+import os
+os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import mlflow
import mlflow.fastai
from fastai.vision import models
@@ -12,7 +14,7 @@
from fastai.vision.image import open_mask
from fastai.vision.data import SegmentationItemList
from fastai.callbacks import *
-from fastai.vision.all import *
+# from fastai.vision.all import *
from corescore.masks import LABELS
@@ -89,7 +91,7 @@ def fit(self, learner, lr=5.20E-05):
slice(lr),
pct_start=self.pct_start)
- learner.save('/home/ahall/CoreScore/tmp')
+ learner.save('C:/Users/mathai/Documents/Gitlab_Projects/CoreScore/tmp')
def get_y_fn(self, x):
"""Return a file path to a mask given an image path"""
diff --git a/make_masks_fast.py b/make_masks_fast.py
index 10a3138..4eeddb6 100644
--- a/make_masks_fast.py
+++ b/make_masks_fast.py
@@ -14,15 +14,10 @@
def process_images(image_dir, label):
"""Create masks for labelled images.
For now, merge rock fragment labels from both core boxes"""
-
-
-
coreProcessor = CoreImageProcessor("Images",
labels=label,
merge_fragment_labels=True)
-
-
-
+
image = coreProcessor.core_types
mask_file = coreProcessor.processImage(image)
@@ -40,7 +35,6 @@ def process_images(image_dir, label):
if not labels_dir:
labels_dir = 'train_labels'
-
for f in os.listdir(labels_dir):
process_images(images, os.path.join(labels_dir,f))
diff --git a/requirements.txt b/requirements.txt
index 99e0d16..c9127fb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,11 @@
fastapi
+fastai
numpy
mlflow
-opencv
+opencv-python
+pandas
pillow
+pydantic
scipy
scikit-image
+torchvision
diff --git a/scripts/get_metrics.py b/scripts/get_metrics.py
index 49cb098..cb4e5f3 100644
--- a/scripts/get_metrics.py
+++ b/scripts/get_metrics.py
@@ -11,14 +11,14 @@
RESULTS_DIR = '../Images/results/test_predictions'
-df = pd.DataFrame(columns = ['filename'] + parameter_extraction.CORE_PARAMETERS)
+df = pd.DataFrame(columns=['filename'] + parameter_extraction.CORE_PARAMETERS)
for f in os.listdir(RESULTS_DIR):
- im = parameter_extraction.Image(os.path.join(RESULTS_DIR,f), 64, 100000000)
+ im = parameter_extraction.Image(os.path.join(RESULTS_DIR, f), 64, 100000000)
params = im.parameters()
- row = pd.DataFrame(columns = ['filename'] + parameter_extraction.CORE_PARAMETERS, data=[params])
+ row = pd.DataFrame(columns=['filename'] + parameter_extraction.CORE_PARAMETERS, data=[params])
df = df.append(row)
-df.to_csv('../Images/results/test_parameters.csv')
\ No newline at end of file
+df.to_csv('../Images/results/test_parameters.csv')
diff --git a/scripts/get_test_accuracy.py b/scripts/get_test_accuracy.py
index 000ff69..ef547ca 100644
--- a/scripts/get_test_accuracy.py
+++ b/scripts/get_test_accuracy.py
@@ -5,6 +5,7 @@
@author: Alex
"""
+# TODO, needs to be made PEP8 compliant
import cv2
@@ -60,15 +61,15 @@
pred = image.imread(os.path.join(PREDICTIONS_DIR, f_name + '.bmp'))
original = image.imread(os.path.join(TEST_DIR, f_name + '.jpg'))
- #upscale prediction to match original
+ # upscale prediction to match original
pred = cv2.resize(pred, dsize = (np.shape(original)[1], np.shape(original)[0]), interpolation = cv2.INTER_NEAREST)
if(INFER_ONLY == False):
mask = image.imread(os.path.join(MASK_DIR, f_name + '.png'))
- #resize prediction to the mask - there can sometimes be a minor disparity between the mask and original
+ # resize prediction to the mask - there can sometimes be a minor disparity between the mask and original
pred = cv2.resize(pred, dsize = (np.shape(mask)[1], np.shape(mask)[0]), interpolation = cv2.INTER_NEAREST)
- #convert mask to integers
+ # convert mask to integers
mask=mask*255
mask=mask.astype(int)
diff --git a/scripts/local_train.py b/scripts/local_train.py
index 4cb341b..fc14051 100644
--- a/scripts/local_train.py
+++ b/scripts/local_train.py
@@ -1,20 +1,16 @@
-
import argparse
-from time import sleep
import os
-
from corescore.models import CoreModel
-#from corescore.mlflowregistry import MlflowRegistry
-#import mlflow
-
-
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="torch.nn.functional")
+test = os.getcwd()
def train(epochs=2, lr=0.00001, resize=8, batch_size=1, path=os.getcwd()):
-# mlflow.fastai.autolog()
-# mlflow.set_tag('model', 'corescore')
+ # mlflow.fastai.autolog()
+ # mlflow.set_tag('model', 'corescore')
+
+ # test = os.getcwd()
coremodel = CoreModel(path, epochs=epochs, batch_size=batch_size)
unet_learn = coremodel.learner(resize=resize)
coremodel.fit(lr=lr, learner=unet_learn)
@@ -46,9 +42,8 @@ def train(epochs=2, lr=0.00001, resize=8, batch_size=1, path=os.getcwd()):
# Register the model
# Picks up MLFLOW_TRACKING_URI from environment.
-# MlflowRegistry().register_model("tags.model = 'corescore'",
-# name="corescore")
+ # MlflowRegistry().register_model("tags.model = 'corescore'",
+ # name="corescore")
# Long sleep to ensure model version is created
- # sleep(300)
-
+ # sleep(300)
diff --git a/scripts/make_masks.py b/scripts/make_masks.py
index f988bd0..5078231 100644
--- a/scripts/make_masks.py
+++ b/scripts/make_masks.py
@@ -7,8 +7,8 @@ def process_images(image_dir, labels):
"""Create masks for labelled images.
For now, merge rock fragment labels from both core boxes"""
coreProcessor = CoreImageProcessor("Images",
- labels=labels,
- merge_fragment_labels=True)
+ labels=labels,
+ merge_fragment_labels=True)
for image in coreProcessor.core_types:
mask_file = coreProcessor.processImage(image)
diff --git a/scripts/test.py b/scripts/test.py
index 99f302b..7d3d38d 100644
--- a/scripts/test.py
+++ b/scripts/test.py
@@ -5,15 +5,15 @@
@author: Alex
"""
-from corescore import load_model
+from corescore.api import load_model
from fastai.vision import *
-import os
+
TEST_DIR = './Images/test'
MODEL_NAME = 'tmp'
PRED_DIR = './Images/test_predictions'
-unet = load_model.load_corescore_model(MODEL_NAME)
+unet = load_model.load_corescore_model(MODEL_NAME) # TODO, does not work, load_model changed and has not been updated
if __name__ == "_main__":
@@ -23,4 +23,4 @@
img = open_image(os.path.join(TEST_DIR, im))
pred = unet.predict(img)[0]
pred.show()
- plt.savefig(os.path.join(PRED_DIR, im)
+ plt.savefig(os.path.join(PRED_DIR, im))
diff --git a/setup.py b/setup.py
index 8723ff7..8a75860 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@
long_description = fh.read()
setuptools.setup(
- name="corescore", # Replace with your own username
+ name="corescore",
version="0.0.2",
author="Zayad AlZaher, Jo Walsh",
author_email="jowalsh@bgs.ac.uk",
@@ -20,4 +20,3 @@
],
python_requires='>=3.6',
)
-