DimaBir · DimaBir · Dec 22, 2023 · Dec 22, 2023 · Dec 22, 2023 · Dec 22, 2023
diff --git a/Dockerfile b/Dockerfile
@@ -1,5 +1,5 @@
 # Argument for base image. Default is a neutral Python image.
-ARG BASE_IMAGE=python:3.8-slim
+ARG BASE_IMAGE=python:3.10-slim
 
 # Use the base image specified by the BASE_IMAGE argument
 FROM $BASE_IMAGE

diff --git a/Dockerfile.flask b/Dockerfile.flask
@@ -0,0 +1,22 @@
+# Use an official Python runtime as a parent image
+FROM python:3.10-slim
+
+# Set the working directory in the container
+WORKDIR /usr/src/app
+
+# Copy the Flask app requirements file into the container at /usr/src/app
+COPY requirements.flask.txt ./
+
+# Install any needed packages specified in requirements.flask.txt
+RUN pip install -r requirements.flask.txt
+
+COPY ./src /usr/src/app/src
+COPY ./common /usr/src/app/common
+COPY ./inference /usr/src/app/inference
+
+# Make port 5000 available to the world outside this container
+EXPOSE 5000
+
+# Define the command to run the Flask app
+CMD ["python", "./src/web_demo/app.py"]
+sudo docker
diff --git a/README.md b/README.md
@@ -3,14 +3,16 @@
 
 ## Table of Contents
 1. [Overview](#overview)
-2. [Requirements](#requirements)
+2. [Live Demo](#live-demo) ![New](https://img.shields.io/badge/-New-842E5B)
+3. [Modularity and Extended Model Support](#modularity-and-extended-model-support) ![New](https://img.shields.io/badge/-New-842E5B)
+4. [Requirements](#requirements)
     - [Steps to Run](#steps-to-run)
     - [Example Command](#example-command)
-3. [Results](#results)
-   - [CPU Results](#cpu-results) ![Static Badge](https://img.shields.io/badge/update-orange)
-   - [GPU (CUDA) Results](#gpu-cuda-results) ![Static Badge](https://img.shields.io/badge/update-orange)
-   - [CPU Results M1 Pro](#cpu-results-m1-pro) ![New](https://img.shields.io/badge/-New-842E5B)
-6. [Benchmark Implementation Details](#benchmark-implementation-details) ![New](https://img.shields.io/badge/-New-842E5B)
+5. [Results](#results)
+   - [CPU Results](#cpu-results)
+   - [GPU (CUDA) Results](#gpu-cuda-results)
+   - [CPU Results M1 Pro](#cpu-results-m1-pro)
+6. [Benchmark Implementation Details](#benchmark-implementation-details)
     - [PyTorch CPU & CUDA](#pytorch-cpu--cuda)
     - [TensorRT FP32 & FP16](#tensorrt-fp32--fp16)
     - [ONNX](#onnx)
@@ -30,6 +32,16 @@ The project is Dockerized for easy deployment:
 
 Refer to the [Steps to Run](#steps-to-run) section for Docker instructions.
 
+## Live Demo
+We have added a live demo feature to the project, allowing users to interact with the model through a web interface. The demo is built using Flask and can be easily accessed and used for real-time inference.
+
+- [Live Demo](https://birenbaum.co:5000/demo)
+
+## Modularity and Extended Model Support
+The project now supports a range of models beyond the original ResNet-50. This modularity allows users to easily integrate and test different models for their specific use cases.
+
+- Supported models include EfficientNet, MobileNet, and more.
+- For details on adding new models, refer to [Model Loader](src/model.py).
 
 ## Requirements
 - This repo cloned

diff --git a/common/utils.py b/common/utils.py
@@ -1,3 +1,5 @@
+import torch
+import logging
 import argparse
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -111,3 +113,25 @@ def parse_arguments():
     )
 
     return parser.parse_args()
+
+
+def cuda_is_available() -> bool:
+    """
+    Check the availability of CUDA and TensorRT on the system.
+
+    Determines if CUDA is available and if the 'torch_tensorrt' package is
+    installed. Logs a warning if 'torch_tensorrt' is not installed.
+
+    :return: True if CUDA is available and 'torch_tensorrt' is installed, False otherwise.
+    """
+    cuda_available = False
+    if torch.cuda.is_available():
+        try:
+            import torch_tensorrt
+
+            cuda_available = True
+        except ImportError:
+            logging.warning(
+                "torch-tensorrt is not installed. Running on CPU mode only."
+            )
+    return cuda_available
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,20 @@
+version: '3'
+
+services:
+  resnet_tensorrt:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "8000:8000"
+
+  flask_app:
+    build:
+      context: .
+      dockerfile: Dockerfile.flask
+    ports:
+      - "5000:5000"
+    volumes:
+      - ./keys:/usr/src/app/keys
+    depends_on:
+      - resnet_tensorrt
diff --git a/requirements.flask.txt b/requirements.flask.txt
@@ -0,0 +1,17 @@
+torch
+Flask
+gunicorn
+requests
+Pillow
+pandas
+torchvision
+pandas
+numpy
+packaging
+onnx
+onnxruntime
+openvino==2023.1.0.dev20230811
+seaborn
+matplotlib
+Flask-Limiter==1.5.0
+Werkzeug==2.0.2
diff --git a/src/image_processor.py b/src/image_processor.py
@@ -1,4 +1,5 @@
 from torchvision import transforms
+from torchvision.models import resnet50, ResNet50_Weights
 from PIL import Image
 import torch
 
@@ -38,3 +39,16 @@ def process_image(self) -> torch.Tensor:
         img_batch = torch.unsqueeze(img_transformed, 0).to(self.device)
 
         return img_batch
+
+    def process_image_official(self) -> torch.Tensor:
+        img = Image.open(self.img_path)
+
+        # Initialize the Weight Transforms
+        weights = ResNet50_Weights.DEFAULT
+        preprocess = weights.transforms()
+
+        # Apply it to the input image
+        img_transformed = preprocess(img)
+        img_batch = torch.unsqueeze(img_transformed, 0).to(self.device)
+
+        return img_batch
diff --git a/src/inference_base.py b/src/inference_base.py
@@ -78,8 +78,8 @@ def benchmark(self, input_data, num_runs=100, warmup_runs=50):
             for img in input_batch:
                 self.predict(img.unsqueeze(0), is_benchmark=True)
         avg_time = (
-            (time.time() - start_time) / (num_runs * self.batch_size)
-        ) * 1000  # Convert to ms
+                           (time.time() - start_time) / (num_runs * self.batch_size)
+                   ) * 1000  # Convert to ms
 
         logging.info(f"Average inference time for {num_runs} runs: {avg_time:.4f} ms")
         if self.debug_mode:
@@ -102,26 +102,6 @@ def benchmark(self, input_data, num_runs=100, warmup_runs=50):
 
         return avg_time, throughput
 
-    def get_top_predictions(self, prob: np.ndarray, is_benchmark=False):
-        """
-        Get the top predictions based on the probabilities.
+    def get_top_predictions(self, logits: np.ndarray, is_benchmark=False):
+        raise NotImplementedError
 
-        :param prob: Array of probabilities.
-        :param is_benchmark: If True, the method is called during a benchmark run.
-        :return: Array of probabilities.
-        """
-        if is_benchmark:
-            return None
-
-        # Get the top indices and probabilities
-        top_indices = prob.argsort()[-self.topk :][::-1]
-        top_probs = prob[top_indices]
-
-        # Log and print the top predictions
-        for i in range(self.topk):
-            probability = top_probs[i]
-            class_label = self.categories[0][int(top_indices[i])]
-            logging.info(f"#{i + 1}: {int(probability * 100)}% {class_label}")
-            if self.debug_mode:
-                print(f"#{i + 1}: {int(probability * 100)}% {class_label}")
-        return prob
diff --git a/src/model.py b/src/model.py
@@ -3,17 +3,29 @@
 
 
 class ModelLoader:
-    def __init__(self, device: str = "cuda") -> None:
+    def __init__(self, model_type: str = "resnet50", device: str = "cuda") -> None:
         """
         Initialize the ModelLoader object.
 
+        :param model_type: Type of the model to load ("resnet50", "efficientnet", etc.).
         :param device: The device to load the model on ("cpu" or "cuda").
         """
-        self.model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2).to(
-            device
-        )
         self.device = device
+        self.model = self.load_model(model_type)
+        self.model_type = model_type
         self.categories: pd.DataFrame = pd.read_csv(
             "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt",
             header=None,
         )
+
+    def load_model(self, model_type: str):
+        if model_type == "resnet50":
+            return models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2).to(self.device)
+        elif model_type == "efficientnet_b0":
+            return models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1).to(self.device)
+        elif model_type == "efficientnet_b7":
+            return models.efficientnet_b7(weights=models.EfficientNet_B7_Weights.IMAGENET1K_V1).to(self.device)
+        elif model_type == "mobilenet_v2":
+            return models.mobilenet_v2(weights=models.MobileNet_V2_Weights.IMAGENET1K_V1).to(self.device)
+        else:
+            raise ValueError(f"Unsupported model type: {model_type}")
diff --git a/src/onnx_inference.py b/src/onnx_inference.py
@@ -1,4 +1,6 @@
 import os
+import torch
+import torch.nn.functional as F
 import logging
 import onnxruntime as ort
 import numpy as np
@@ -44,12 +46,13 @@ def predict(self, input_data, is_benchmark=False):
         ort_inputs = {input_name: input_data.cpu().numpy()}
         ort_outs = self.model.run(None, ort_inputs)
 
-        # Extract probabilities from the output and normalize them
+        # Extract probabilities from the output
         if len(ort_outs) > 0:
             prob = ort_outs[0]
             if prob.ndim > 1:
                 prob = prob[0]
-            prob = np.exp(prob) / np.sum(np.exp(prob))
+            prob = F.softmax(torch.from_numpy(prob), dim=0).numpy()
+
         return self.get_top_predictions(prob, is_benchmark)
 
     def benchmark(self, input_data, num_runs=100, warmup_runs=50):
@@ -62,3 +65,30 @@ def benchmark(self, input_data, num_runs=100, warmup_runs=50):
         :return: Average inference time in milliseconds.
         """
         return super().benchmark(input_data, num_runs, warmup_runs)
+
+    def get_top_predictions(self, prob: np.ndarray, is_benchmark=False):
+        """
+        Get the top predictions based on the probabilities.
+
+        :param prob: Array of probabilities.
+        :param is_benchmark: If True, the method is called during a benchmark run.
+        :return: List of dictionaries with label and confidence.
+        """
+        if is_benchmark:
+            return None
+
+            # Get the top indices and probabilities
+        top_indices = prob.argsort()[-self.topk:][::-1]
+        top_probs = prob[top_indices]
+
+        # Prepare the list of predictions
+        predictions = []
+        for i in range(self.topk):
+            probability = top_probs[i]
+            class_label = self.categories[0][int(top_indices[i])]
+            predictions.append({"label": class_label, "confidence": float(probability)})
+
+            # Log the top predictions
+            logging.info(f"#{i + 1}: {probability * 100:.2f}% {class_label}")
+
+        return predictions
diff --git a/src/ov_inference.py b/src/ov_inference.py
@@ -1,4 +1,7 @@
 import os
+import logging
+import torch
+import torch.nn.functional as F
 import numpy as np
 import openvino as ov
 from src.inference_base import InferenceBase
@@ -52,10 +55,12 @@ def predict(self, input_data, is_benchmark=False):
         input_name = next(iter(self.compiled_model.inputs))
         outputs = self.compiled_model(inputs={input_name: input_data.cpu().numpy()})
 
-        # Extract probabilities from the output and normalize them
+        # Extract probabilities from the output
         prob_key = next(iter(outputs))
         prob = outputs[prob_key]
-        prob = np.exp(prob[0]) / np.sum(np.exp(prob[0]))
+
+        # Apply softmax to the probabilities
+        prob = F.softmax(torch.from_numpy(prob[0]), dim=0).numpy()
 
         return self.get_top_predictions(prob, is_benchmark)
 
@@ -69,3 +74,26 @@ def benchmark(self, input_data, num_runs=100, warmup_runs=50):
         :return: Average inference time in milliseconds.
         """
         return super().benchmark(input_data, num_runs, warmup_runs)
+
+    def get_top_predictions(self, prob: np.ndarray, is_benchmark=False):
+        """
+        Get the top predictions based on the probabilities.
+        """
+        if is_benchmark:
+            return None
+
+        # Get the top indices and probabilities
+        top_indices = prob.argsort()[-self.topk :][::-1]
+        top_probs = prob[top_indices]
+
+        # Prepare the list of predictions
+        predictions = []
+        for i in range(self.topk):
+            probability = top_probs[i]
+            class_label = self.categories[0][int(top_indices[i])]
+            predictions.append({"label": class_label, "confidence": float(probability)})
+
+            # Log the top predictions
+            logging.info(f"#{i + 1}: {probability * 100:.2f}% {class_label}")
+
+        return predictions
diff --git a/src/pytorch_inference.py b/src/pytorch_inference.py
@@ -1,4 +1,7 @@
 import torch
+import torch.nn.functional as F
+import logging
+import numpy as np
 from src.inference_base import InferenceBase
 
 
@@ -37,11 +40,8 @@ def predict(self, input_data, is_benchmark=False):
         with torch.no_grad():
             outputs = self.model(input_data.to(self.device))
 
-        # Compute the softmax probabilities
-        prob = torch.nn.functional.softmax(outputs[0], dim=0)
-        prob = prob.cpu().numpy()
-
-        return self.get_top_predictions(prob, is_benchmark)
+        # Pass the raw output tensor to get_top_predictions
+        return self.get_top_predictions(outputs, is_benchmark)
 
     def benchmark(self, input_data, num_runs=100, warmup_runs=50):
         """
@@ -53,3 +53,30 @@ def benchmark(self, input_data, num_runs=100, warmup_runs=50):
         :return: Average inference time in milliseconds.
         """
         return super().benchmark(input_data, num_runs, warmup_runs)
+
+    def get_top_predictions(self, output_tensor, is_benchmark=False):
+        """
+        Get the top predictions based on the model's output.
+        """
+        if is_benchmark:
+            return None
+
+        # Apply softmax to convert logits to probabilities
+        probabilities = F.softmax(output_tensor, dim=1)
+
+        # Get the top probabilities and their indices
+        top_probs, top_indices = torch.topk(probabilities, self.topk)
+        top_probs = top_probs[0].tolist()
+        top_indices = top_indices[0].tolist()
+
+        # Prepare the list of predictions
+        predictions = []
+        for i in range(self.topk):
+            probability = top_probs[i]
+            class_label = self.categories[0][top_indices[i]]
+            predictions.append({"label": class_label, "confidence": float(probability)})
+
+            # Log the top predictions
+            logging.info(f"#{i + 1}: {probability * 100:.2f}% {class_label}")
+
+        return predictions