diff --git a/README.md b/README.md index 2efd254..89f9dce 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,64 @@ The final output from the images themselves looks like what is below- with varyi ![Example of fully processed image](Images/S00128821.Cropped_Top_2_Countoured.png) +## CoreScore Application Flowchart +```mermaid +flowchart TD +id_1[("Large image store ")] +id_2[("Unlabelled images")] +id_3[("Labelled images")] +id_4_1["Create masks for training
+make_masks.py, using masks.py"] +id_4_2["tests.py
+Run trained model on test data"] +id_4[" local_train.py
+Train ML model (unet Resnet54 CNN)"] +id_5["Image Process request"] +id_6["Trained ML model"] +id_7["ML output API"] +id_8["Client side"] +id_9["Image labelling tool"] +id_10["ML manual checks
+accuracy, IoU, check against
previous test results
using +get_metrics.py, get_test_accuracy.py"] +id_11["Download results"] +id_12["Results feedback"] + +subgraph data_store["Large Image Store"] +id_1 --> id_2 +id_1 --> id_3 +end + +subgraph ml_model ["Production ML Model"] +id_6 +end + +subgraph im_lab ["Image Labelling"] +id_2 --> id_9 +id_9 --> id_3 +end + +subgraph ml_train [Train / Update Machine Learning model] +id_3 -->|All labelled images| id_4_1 +id_4_1 --> id_4 +id_4 --> id_4_2 +id_4_2 --> id_10 +id_10 -->|Checks pass
Updated model| id_6 +id_10 -->|Checks fail
New model retrain| id_4 +end + +subgraph cs_app [CoreScore Application] +id_8 -->id_5 +id_5 -->|large image API| id_1 +id_1 -->|downsampled image| id_6 +id_6 --> id_7 +id_7 -->|Output to client side| id_8 +id_8 --> id_11 +id_8 --> id_12 +end + +``` + ## Contributors * Zayad Al Zaher diff --git a/corescore/mlflowregistry.py b/corescore/mlflowregistry.py index e62be86..d9df8d3 100644 --- a/corescore/mlflowregistry.py +++ b/corescore/mlflowregistry.py @@ -47,26 +47,26 @@ def _find_model(self, name=None, version=None): Return model's path """ filter_str = f"name='{name}'" if version: - models = self.search_model_versions(filter_string=filter_str) - if not models: - raise MlflowRegistryError(f'Model named {name} does not exist') - model = list(filter(lambda model: model.version == version, models)) - try: - model_path = os.path.join(model[0].source, 'model') - except IndexError: - raise MlflowRegistryError((f'Model named {name},' - f'version {version} does not exist')) from None + models = self.search_model_versions(filter_string=filter_str) + if not models: + raise MlflowRegistryError(f'Model named {name} does not exist') + model = list(filter(lambda model: model.version == version, models)) + try: + model_path = os.path.join(model[0].source, 'model') + except IndexError: + raise MlflowRegistryError((f'Model named {name},' + f'version {version} does not exist')) from None else: try: - models = self.list_models() - latest = list(filter(lambda model: model.name == name, models))[0] + models = self.list_models() + latest = list(filter(lambda model: model.name == name, models))[0] except IndexError: - raise MlflowRegistryError(f'Model named {name} does not exist') from None + raise MlflowRegistryError(f'Model named {name} does not exist') from None model_path = os.path.join(latest.latest_versions[0].source, 'model') return model_path - + def load_model(self, name=None, version=None): - """ Load registered model based on supplied arguments """ - model_path = self._find_model(name=name, version=version) - return mlflow.pyfunc.load_model(model_path) - + """ Load registered model based on supplied arguments """ + model_path = self._find_model(name=name, version=version) + + return mlflow.pyfunc.load_model(model_path) diff --git a/corescore/models.py b/corescore/models.py index 857b7b3..56708dc 100644 --- a/corescore/models.py +++ b/corescore/models.py @@ -3,6 +3,8 @@ from functools import partial import numpy as np +import os +os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" import mlflow import mlflow.fastai from fastai.vision import models @@ -12,7 +14,7 @@ from fastai.vision.image import open_mask from fastai.vision.data import SegmentationItemList from fastai.callbacks import * -from fastai.vision.all import * +# from fastai.vision.all import * from corescore.masks import LABELS @@ -89,7 +91,7 @@ def fit(self, learner, lr=5.20E-05): slice(lr), pct_start=self.pct_start) - learner.save('/home/ahall/CoreScore/tmp') + learner.save('C:/Users/mathai/Documents/Gitlab_Projects/CoreScore/tmp') def get_y_fn(self, x): """Return a file path to a mask given an image path""" diff --git a/make_masks_fast.py b/make_masks_fast.py index 10a3138..4eeddb6 100644 --- a/make_masks_fast.py +++ b/make_masks_fast.py @@ -14,15 +14,10 @@ def process_images(image_dir, label): """Create masks for labelled images. For now, merge rock fragment labels from both core boxes""" - - - coreProcessor = CoreImageProcessor("Images", labels=label, merge_fragment_labels=True) - - - + image = coreProcessor.core_types mask_file = coreProcessor.processImage(image) @@ -40,7 +35,6 @@ def process_images(image_dir, label): if not labels_dir: labels_dir = 'train_labels' - for f in os.listdir(labels_dir): process_images(images, os.path.join(labels_dir,f)) diff --git a/requirements.txt b/requirements.txt index 99e0d16..c9127fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,11 @@ fastapi +fastai numpy mlflow -opencv +opencv-python +pandas pillow +pydantic scipy scikit-image +torchvision diff --git a/scripts/get_metrics.py b/scripts/get_metrics.py index 49cb098..cb4e5f3 100644 --- a/scripts/get_metrics.py +++ b/scripts/get_metrics.py @@ -11,14 +11,14 @@ RESULTS_DIR = '../Images/results/test_predictions' -df = pd.DataFrame(columns = ['filename'] + parameter_extraction.CORE_PARAMETERS) +df = pd.DataFrame(columns=['filename'] + parameter_extraction.CORE_PARAMETERS) for f in os.listdir(RESULTS_DIR): - im = parameter_extraction.Image(os.path.join(RESULTS_DIR,f), 64, 100000000) + im = parameter_extraction.Image(os.path.join(RESULTS_DIR, f), 64, 100000000) params = im.parameters() - row = pd.DataFrame(columns = ['filename'] + parameter_extraction.CORE_PARAMETERS, data=[params]) + row = pd.DataFrame(columns=['filename'] + parameter_extraction.CORE_PARAMETERS, data=[params]) df = df.append(row) -df.to_csv('../Images/results/test_parameters.csv') \ No newline at end of file +df.to_csv('../Images/results/test_parameters.csv') diff --git a/scripts/get_test_accuracy.py b/scripts/get_test_accuracy.py index 000ff69..ef547ca 100644 --- a/scripts/get_test_accuracy.py +++ b/scripts/get_test_accuracy.py @@ -5,6 +5,7 @@ @author: Alex """ +# TODO, needs to be made PEP8 compliant import cv2 @@ -60,15 +61,15 @@ pred = image.imread(os.path.join(PREDICTIONS_DIR, f_name + '.bmp')) original = image.imread(os.path.join(TEST_DIR, f_name + '.jpg')) - #upscale prediction to match original + # upscale prediction to match original pred = cv2.resize(pred, dsize = (np.shape(original)[1], np.shape(original)[0]), interpolation = cv2.INTER_NEAREST) if(INFER_ONLY == False): mask = image.imread(os.path.join(MASK_DIR, f_name + '.png')) - #resize prediction to the mask - there can sometimes be a minor disparity between the mask and original + # resize prediction to the mask - there can sometimes be a minor disparity between the mask and original pred = cv2.resize(pred, dsize = (np.shape(mask)[1], np.shape(mask)[0]), interpolation = cv2.INTER_NEAREST) - #convert mask to integers + # convert mask to integers mask=mask*255 mask=mask.astype(int) diff --git a/scripts/local_train.py b/scripts/local_train.py index 4cb341b..fc14051 100644 --- a/scripts/local_train.py +++ b/scripts/local_train.py @@ -1,20 +1,16 @@ - import argparse -from time import sleep import os - from corescore.models import CoreModel -#from corescore.mlflowregistry import MlflowRegistry -#import mlflow - - import warnings warnings.filterwarnings("ignore", category=UserWarning, module="torch.nn.functional") +test = os.getcwd() def train(epochs=2, lr=0.00001, resize=8, batch_size=1, path=os.getcwd()): -# mlflow.fastai.autolog() -# mlflow.set_tag('model', 'corescore') + # mlflow.fastai.autolog() + # mlflow.set_tag('model', 'corescore') + + # test = os.getcwd() coremodel = CoreModel(path, epochs=epochs, batch_size=batch_size) unet_learn = coremodel.learner(resize=resize) coremodel.fit(lr=lr, learner=unet_learn) @@ -46,9 +42,8 @@ def train(epochs=2, lr=0.00001, resize=8, batch_size=1, path=os.getcwd()): # Register the model # Picks up MLFLOW_TRACKING_URI from environment. -# MlflowRegistry().register_model("tags.model = 'corescore'", -# name="corescore") + # MlflowRegistry().register_model("tags.model = 'corescore'", + # name="corescore") # Long sleep to ensure model version is created - # sleep(300) - + # sleep(300) diff --git a/scripts/make_masks.py b/scripts/make_masks.py index f988bd0..5078231 100644 --- a/scripts/make_masks.py +++ b/scripts/make_masks.py @@ -7,8 +7,8 @@ def process_images(image_dir, labels): """Create masks for labelled images. For now, merge rock fragment labels from both core boxes""" coreProcessor = CoreImageProcessor("Images", - labels=labels, - merge_fragment_labels=True) + labels=labels, + merge_fragment_labels=True) for image in coreProcessor.core_types: mask_file = coreProcessor.processImage(image) diff --git a/scripts/test.py b/scripts/test.py index 99f302b..7d3d38d 100644 --- a/scripts/test.py +++ b/scripts/test.py @@ -5,15 +5,15 @@ @author: Alex """ -from corescore import load_model +from corescore.api import load_model from fastai.vision import * -import os + TEST_DIR = './Images/test' MODEL_NAME = 'tmp' PRED_DIR = './Images/test_predictions' -unet = load_model.load_corescore_model(MODEL_NAME) +unet = load_model.load_corescore_model(MODEL_NAME) # TODO, does not work, load_model changed and has not been updated if __name__ == "_main__": @@ -23,4 +23,4 @@ img = open_image(os.path.join(TEST_DIR, im)) pred = unet.predict(img)[0] pred.show() - plt.savefig(os.path.join(PRED_DIR, im) + plt.savefig(os.path.join(PRED_DIR, im)) diff --git a/setup.py b/setup.py index 8723ff7..8a75860 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ long_description = fh.read() setuptools.setup( - name="corescore", # Replace with your own username + name="corescore", version="0.0.2", author="Zayad AlZaher, Jo Walsh", author_email="jowalsh@bgs.ac.uk", @@ -20,4 +20,3 @@ ], python_requires='>=3.6', ) -