From 028d35919481dc1f51e2934d827393e5bc10cb7b Mon Sep 17 00:00:00 2001 From: mathai Date: Wed, 12 Apr 2023 15:25:08 +0100 Subject: [PATCH] Created flowchart for rough process flow for CoreScore application, and updates to the ML model for the application. Made some files PEP8 compliant, added more Python libraries to requirements.txt Runs on Windows 10 up to test.py, this needs refactoring to work with new version of load_model. --- README.md | 58 ++++++++++++++++++++++++++++++++++++ corescore/mlflowregistry.py | 34 ++++++++++----------- corescore/models.py | 6 ++-- make_masks_fast.py | 8 +---- requirements.txt | 6 +++- scripts/get_metrics.py | 8 ++--- scripts/get_test_accuracy.py | 7 +++-- scripts/local_train.py | 21 +++++-------- scripts/make_masks.py | 4 +-- scripts/test.py | 8 ++--- setup.py | 3 +- 11 files changed, 108 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 2efd254..89f9dce 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,64 @@ The final output from the images themselves looks like what is below- with varyi ![Example of fully processed image](Images/S00128821.Cropped_Top_2_Countoured.png) +## CoreScore Application Flowchart +```mermaid +flowchart TD +id_1[("Large image store ")] +id_2[("Unlabelled images")] +id_3[("Labelled images")] +id_4_1["Create masks for training
+make_masks.py, using masks.py"] +id_4_2["tests.py
+Run trained model on test data"] +id_4[" local_train.py
+Train ML model (unet Resnet54 CNN)"] +id_5["Image Process request"] +id_6["Trained ML model"] +id_7["ML output API"] +id_8["Client side"] +id_9["Image labelling tool"] +id_10["ML manual checks
+accuracy, IoU, check against
previous test results
using +get_metrics.py, get_test_accuracy.py"] +id_11["Download results"] +id_12["Results feedback"] + +subgraph data_store["Large Image Store"] +id_1 --> id_2 +id_1 --> id_3 +end + +subgraph ml_model ["Production ML Model"] +id_6 +end + +subgraph im_lab ["Image Labelling"] +id_2 --> id_9 +id_9 --> id_3 +end + +subgraph ml_train [Train / Update Machine Learning model] +id_3 -->|All labelled images| id_4_1 +id_4_1 --> id_4 +id_4 --> id_4_2 +id_4_2 --> id_10 +id_10 -->|Checks pass
Updated model| id_6 +id_10 -->|Checks fail
New model retrain| id_4 +end + +subgraph cs_app [CoreScore Application] +id_8 -->id_5 +id_5 -->|large image API| id_1 +id_1 -->|downsampled image| id_6 +id_6 --> id_7 +id_7 -->|Output to client side| id_8 +id_8 --> id_11 +id_8 --> id_12 +end + +``` + ## Contributors * Zayad Al Zaher diff --git a/corescore/mlflowregistry.py b/corescore/mlflowregistry.py index e62be86..d9df8d3 100644 --- a/corescore/mlflowregistry.py +++ b/corescore/mlflowregistry.py @@ -47,26 +47,26 @@ def _find_model(self, name=None, version=None): Return model's path """ filter_str = f"name='{name}'" if version: - models = self.search_model_versions(filter_string=filter_str) - if not models: - raise MlflowRegistryError(f'Model named {name} does not exist') - model = list(filter(lambda model: model.version == version, models)) - try: - model_path = os.path.join(model[0].source, 'model') - except IndexError: - raise MlflowRegistryError((f'Model named {name},' - f'version {version} does not exist')) from None + models = self.search_model_versions(filter_string=filter_str) + if not models: + raise MlflowRegistryError(f'Model named {name} does not exist') + model = list(filter(lambda model: model.version == version, models)) + try: + model_path = os.path.join(model[0].source, 'model') + except IndexError: + raise MlflowRegistryError((f'Model named {name},' + f'version {version} does not exist')) from None else: try: - models = self.list_models() - latest = list(filter(lambda model: model.name == name, models))[0] + models = self.list_models() + latest = list(filter(lambda model: model.name == name, models))[0] except IndexError: - raise MlflowRegistryError(f'Model named {name} does not exist') from None + raise MlflowRegistryError(f'Model named {name} does not exist') from None model_path = os.path.join(latest.latest_versions[0].source, 'model') return model_path - + def load_model(self, name=None, version=None): - """ Load registered model based on supplied arguments """ - model_path = self._find_model(name=name, version=version) - return mlflow.pyfunc.load_model(model_path) - + """ Load registered model based on supplied arguments """ + model_path = self._find_model(name=name, version=version) + + return mlflow.pyfunc.load_model(model_path) diff --git a/corescore/models.py b/corescore/models.py index 857b7b3..56708dc 100644 --- a/corescore/models.py +++ b/corescore/models.py @@ -3,6 +3,8 @@ from functools import partial import numpy as np +import os +os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" import mlflow import mlflow.fastai from fastai.vision import models @@ -12,7 +14,7 @@ from fastai.vision.image import open_mask from fastai.vision.data import SegmentationItemList from fastai.callbacks import * -from fastai.vision.all import * +# from fastai.vision.all import * from corescore.masks import LABELS @@ -89,7 +91,7 @@ def fit(self, learner, lr=5.20E-05): slice(lr), pct_start=self.pct_start) - learner.save('/home/ahall/CoreScore/tmp') + learner.save('C:/Users/mathai/Documents/Gitlab_Projects/CoreScore/tmp') def get_y_fn(self, x): """Return a file path to a mask given an image path""" diff --git a/make_masks_fast.py b/make_masks_fast.py index 10a3138..4eeddb6 100644 --- a/make_masks_fast.py +++ b/make_masks_fast.py @@ -14,15 +14,10 @@ def process_images(image_dir, label): """Create masks for labelled images. For now, merge rock fragment labels from both core boxes""" - - - coreProcessor = CoreImageProcessor("Images", labels=label, merge_fragment_labels=True) - - - + image = coreProcessor.core_types mask_file = coreProcessor.processImage(image) @@ -40,7 +35,6 @@ def process_images(image_dir, label): if not labels_dir: labels_dir = 'train_labels' - for f in os.listdir(labels_dir): process_images(images, os.path.join(labels_dir,f)) diff --git a/requirements.txt b/requirements.txt index 99e0d16..c9127fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,11 @@ fastapi +fastai numpy mlflow -opencv +opencv-python +pandas pillow +pydantic scipy scikit-image +torchvision diff --git a/scripts/get_metrics.py b/scripts/get_metrics.py index 49cb098..cb4e5f3 100644 --- a/scripts/get_metrics.py +++ b/scripts/get_metrics.py @@ -11,14 +11,14 @@ RESULTS_DIR = '../Images/results/test_predictions' -df = pd.DataFrame(columns = ['filename'] + parameter_extraction.CORE_PARAMETERS) +df = pd.DataFrame(columns=['filename'] + parameter_extraction.CORE_PARAMETERS) for f in os.listdir(RESULTS_DIR): - im = parameter_extraction.Image(os.path.join(RESULTS_DIR,f), 64, 100000000) + im = parameter_extraction.Image(os.path.join(RESULTS_DIR, f), 64, 100000000) params = im.parameters() - row = pd.DataFrame(columns = ['filename'] + parameter_extraction.CORE_PARAMETERS, data=[params]) + row = pd.DataFrame(columns=['filename'] + parameter_extraction.CORE_PARAMETERS, data=[params]) df = df.append(row) -df.to_csv('../Images/results/test_parameters.csv') \ No newline at end of file +df.to_csv('../Images/results/test_parameters.csv') diff --git a/scripts/get_test_accuracy.py b/scripts/get_test_accuracy.py index 000ff69..ef547ca 100644 --- a/scripts/get_test_accuracy.py +++ b/scripts/get_test_accuracy.py @@ -5,6 +5,7 @@ @author: Alex """ +# TODO, needs to be made PEP8 compliant import cv2 @@ -60,15 +61,15 @@ pred = image.imread(os.path.join(PREDICTIONS_DIR, f_name + '.bmp')) original = image.imread(os.path.join(TEST_DIR, f_name + '.jpg')) - #upscale prediction to match original + # upscale prediction to match original pred = cv2.resize(pred, dsize = (np.shape(original)[1], np.shape(original)[0]), interpolation = cv2.INTER_NEAREST) if(INFER_ONLY == False): mask = image.imread(os.path.join(MASK_DIR, f_name + '.png')) - #resize prediction to the mask - there can sometimes be a minor disparity between the mask and original + # resize prediction to the mask - there can sometimes be a minor disparity between the mask and original pred = cv2.resize(pred, dsize = (np.shape(mask)[1], np.shape(mask)[0]), interpolation = cv2.INTER_NEAREST) - #convert mask to integers + # convert mask to integers mask=mask*255 mask=mask.astype(int) diff --git a/scripts/local_train.py b/scripts/local_train.py index 4cb341b..fc14051 100644 --- a/scripts/local_train.py +++ b/scripts/local_train.py @@ -1,20 +1,16 @@ - import argparse -from time import sleep import os - from corescore.models import CoreModel -#from corescore.mlflowregistry import MlflowRegistry -#import mlflow - - import warnings warnings.filterwarnings("ignore", category=UserWarning, module="torch.nn.functional") +test = os.getcwd() def train(epochs=2, lr=0.00001, resize=8, batch_size=1, path=os.getcwd()): -# mlflow.fastai.autolog() -# mlflow.set_tag('model', 'corescore') + # mlflow.fastai.autolog() + # mlflow.set_tag('model', 'corescore') + + # test = os.getcwd() coremodel = CoreModel(path, epochs=epochs, batch_size=batch_size) unet_learn = coremodel.learner(resize=resize) coremodel.fit(lr=lr, learner=unet_learn) @@ -46,9 +42,8 @@ def train(epochs=2, lr=0.00001, resize=8, batch_size=1, path=os.getcwd()): # Register the model # Picks up MLFLOW_TRACKING_URI from environment. -# MlflowRegistry().register_model("tags.model = 'corescore'", -# name="corescore") + # MlflowRegistry().register_model("tags.model = 'corescore'", + # name="corescore") # Long sleep to ensure model version is created - # sleep(300) - + # sleep(300) diff --git a/scripts/make_masks.py b/scripts/make_masks.py index f988bd0..5078231 100644 --- a/scripts/make_masks.py +++ b/scripts/make_masks.py @@ -7,8 +7,8 @@ def process_images(image_dir, labels): """Create masks for labelled images. For now, merge rock fragment labels from both core boxes""" coreProcessor = CoreImageProcessor("Images", - labels=labels, - merge_fragment_labels=True) + labels=labels, + merge_fragment_labels=True) for image in coreProcessor.core_types: mask_file = coreProcessor.processImage(image) diff --git a/scripts/test.py b/scripts/test.py index 99f302b..7d3d38d 100644 --- a/scripts/test.py +++ b/scripts/test.py @@ -5,15 +5,15 @@ @author: Alex """ -from corescore import load_model +from corescore.api import load_model from fastai.vision import * -import os + TEST_DIR = './Images/test' MODEL_NAME = 'tmp' PRED_DIR = './Images/test_predictions' -unet = load_model.load_corescore_model(MODEL_NAME) +unet = load_model.load_corescore_model(MODEL_NAME) # TODO, does not work, load_model changed and has not been updated if __name__ == "_main__": @@ -23,4 +23,4 @@ img = open_image(os.path.join(TEST_DIR, im)) pred = unet.predict(img)[0] pred.show() - plt.savefig(os.path.join(PRED_DIR, im) + plt.savefig(os.path.join(PRED_DIR, im)) diff --git a/setup.py b/setup.py index 8723ff7..8a75860 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ long_description = fh.read() setuptools.setup( - name="corescore", # Replace with your own username + name="corescore", version="0.0.2", author="Zayad AlZaher, Jo Walsh", author_email="jowalsh@bgs.ac.uk", @@ -20,4 +20,3 @@ ], python_requires='>=3.6', ) -