diff --git a/Dockerfile b/Dockerfile index 9050db0..33f9251 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Argument for base image. Default is a neutral Python image. -ARG BASE_IMAGE=python:3.8-slim +ARG BASE_IMAGE=python:3.10-slim # Use the base image specified by the BASE_IMAGE argument FROM $BASE_IMAGE diff --git a/Dockerfile.flask b/Dockerfile.flask new file mode 100644 index 0000000..13ce295 --- /dev/null +++ b/Dockerfile.flask @@ -0,0 +1,22 @@ +# Use an official Python runtime as a parent image +FROM python:3.10-slim + +# Set the working directory in the container +WORKDIR /usr/src/app + +# Copy the Flask app requirements file into the container at /usr/src/app +COPY requirements.flask.txt ./ + +# Install any needed packages specified in requirements.flask.txt +RUN pip install -r requirements.flask.txt + +COPY ./src /usr/src/app/src +COPY ./common /usr/src/app/common +COPY ./inference /usr/src/app/inference + +# Make port 5000 available to the world outside this container +EXPOSE 5000 + +# Define the command to run the Flask app +CMD ["python", "./src/web_demo/app.py"] +sudo docker \ No newline at end of file diff --git a/README.md b/README.md index ce8e272..a56390e 100644 --- a/README.md +++ b/README.md @@ -3,14 +3,16 @@ ## Table of Contents 1. [Overview](#overview) -2. [Requirements](#requirements) +2. [Live Demo](#live-demo) ![New](https://img.shields.io/badge/-New-842E5B) +3. [Modularity and Extended Model Support](#modularity-and-extended-model-support) ![New](https://img.shields.io/badge/-New-842E5B) +4. [Requirements](#requirements) - [Steps to Run](#steps-to-run) - [Example Command](#example-command) -3. [Results](#results) - - [CPU Results](#cpu-results) ![Static Badge](https://img.shields.io/badge/update-orange) - - [GPU (CUDA) Results](#gpu-cuda-results) ![Static Badge](https://img.shields.io/badge/update-orange) - - [CPU Results M1 Pro](#cpu-results-m1-pro) ![New](https://img.shields.io/badge/-New-842E5B) -6. [Benchmark Implementation Details](#benchmark-implementation-details) ![New](https://img.shields.io/badge/-New-842E5B) +5. [Results](#results) + - [CPU Results](#cpu-results) + - [GPU (CUDA) Results](#gpu-cuda-results) + - [CPU Results M1 Pro](#cpu-results-m1-pro) +6. [Benchmark Implementation Details](#benchmark-implementation-details) - [PyTorch CPU & CUDA](#pytorch-cpu--cuda) - [TensorRT FP32 & FP16](#tensorrt-fp32--fp16) - [ONNX](#onnx) @@ -30,6 +32,16 @@ The project is Dockerized for easy deployment: Refer to the [Steps to Run](#steps-to-run) section for Docker instructions. +## Live Demo +We have added a live demo feature to the project, allowing users to interact with the model through a web interface. The demo is built using Flask and can be easily accessed and used for real-time inference. + +- [Live Demo](https://birenbaum.co:5000/demo) + +## Modularity and Extended Model Support +The project now supports a range of models beyond the original ResNet-50. This modularity allows users to easily integrate and test different models for their specific use cases. + +- Supported models include EfficientNet, MobileNet, and more. +- For details on adding new models, refer to [Model Loader](src/model.py). ## Requirements - This repo cloned diff --git a/common/utils.py b/common/utils.py index a8e0a33..d103eb3 100644 --- a/common/utils.py +++ b/common/utils.py @@ -1,3 +1,5 @@ +import torch +import logging import argparse import pandas as pd import matplotlib.pyplot as plt @@ -111,3 +113,25 @@ def parse_arguments(): ) return parser.parse_args() + + +def cuda_is_available() -> bool: + """ + Check the availability of CUDA and TensorRT on the system. + + Determines if CUDA is available and if the 'torch_tensorrt' package is + installed. Logs a warning if 'torch_tensorrt' is not installed. + + :return: True if CUDA is available and 'torch_tensorrt' is installed, False otherwise. + """ + cuda_available = False + if torch.cuda.is_available(): + try: + import torch_tensorrt + + cuda_available = True + except ImportError: + logging.warning( + "torch-tensorrt is not installed. Running on CPU mode only." + ) + return cuda_available diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..d611282 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,20 @@ +version: '3' + +services: + resnet_tensorrt: + build: + context: . + dockerfile: Dockerfile + ports: + - "8000:8000" + + flask_app: + build: + context: . + dockerfile: Dockerfile.flask + ports: + - "5000:5000" + volumes: + - ./keys:/usr/src/app/keys + depends_on: + - resnet_tensorrt diff --git a/requirements.flask.txt b/requirements.flask.txt new file mode 100644 index 0000000..5c6aaa7 --- /dev/null +++ b/requirements.flask.txt @@ -0,0 +1,17 @@ +torch +Flask +gunicorn +requests +Pillow +pandas +torchvision +pandas +numpy +packaging +onnx +onnxruntime +openvino==2023.1.0.dev20230811 +seaborn +matplotlib +Flask-Limiter==1.5.0 +Werkzeug==2.0.2 \ No newline at end of file diff --git a/src/image_processor.py b/src/image_processor.py index 2cdaecc..466cede 100644 --- a/src/image_processor.py +++ b/src/image_processor.py @@ -1,4 +1,5 @@ from torchvision import transforms +from torchvision.models import resnet50, ResNet50_Weights from PIL import Image import torch @@ -38,3 +39,16 @@ def process_image(self) -> torch.Tensor: img_batch = torch.unsqueeze(img_transformed, 0).to(self.device) return img_batch + + def process_image_official(self) -> torch.Tensor: + img = Image.open(self.img_path) + + # Initialize the Weight Transforms + weights = ResNet50_Weights.DEFAULT + preprocess = weights.transforms() + + # Apply it to the input image + img_transformed = preprocess(img) + img_batch = torch.unsqueeze(img_transformed, 0).to(self.device) + + return img_batch diff --git a/src/inference_base.py b/src/inference_base.py index 45f5c02..a694923 100644 --- a/src/inference_base.py +++ b/src/inference_base.py @@ -78,8 +78,8 @@ def benchmark(self, input_data, num_runs=100, warmup_runs=50): for img in input_batch: self.predict(img.unsqueeze(0), is_benchmark=True) avg_time = ( - (time.time() - start_time) / (num_runs * self.batch_size) - ) * 1000 # Convert to ms + (time.time() - start_time) / (num_runs * self.batch_size) + ) * 1000 # Convert to ms logging.info(f"Average inference time for {num_runs} runs: {avg_time:.4f} ms") if self.debug_mode: @@ -102,26 +102,6 @@ def benchmark(self, input_data, num_runs=100, warmup_runs=50): return avg_time, throughput - def get_top_predictions(self, prob: np.ndarray, is_benchmark=False): - """ - Get the top predictions based on the probabilities. + def get_top_predictions(self, logits: np.ndarray, is_benchmark=False): + raise NotImplementedError - :param prob: Array of probabilities. - :param is_benchmark: If True, the method is called during a benchmark run. - :return: Array of probabilities. - """ - if is_benchmark: - return None - - # Get the top indices and probabilities - top_indices = prob.argsort()[-self.topk :][::-1] - top_probs = prob[top_indices] - - # Log and print the top predictions - for i in range(self.topk): - probability = top_probs[i] - class_label = self.categories[0][int(top_indices[i])] - logging.info(f"#{i + 1}: {int(probability * 100)}% {class_label}") - if self.debug_mode: - print(f"#{i + 1}: {int(probability * 100)}% {class_label}") - return prob diff --git a/src/model.py b/src/model.py index 4eb1415..4605106 100644 --- a/src/model.py +++ b/src/model.py @@ -3,17 +3,29 @@ class ModelLoader: - def __init__(self, device: str = "cuda") -> None: + def __init__(self, model_type: str = "resnet50", device: str = "cuda") -> None: """ Initialize the ModelLoader object. + :param model_type: Type of the model to load ("resnet50", "efficientnet", etc.). :param device: The device to load the model on ("cpu" or "cuda"). """ - self.model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2).to( - device - ) self.device = device + self.model = self.load_model(model_type) + self.model_type = model_type self.categories: pd.DataFrame = pd.read_csv( "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt", header=None, ) + + def load_model(self, model_type: str): + if model_type == "resnet50": + return models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2).to(self.device) + elif model_type == "efficientnet_b0": + return models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1).to(self.device) + elif model_type == "efficientnet_b7": + return models.efficientnet_b7(weights=models.EfficientNet_B7_Weights.IMAGENET1K_V1).to(self.device) + elif model_type == "mobilenet_v2": + return models.mobilenet_v2(weights=models.MobileNet_V2_Weights.IMAGENET1K_V1).to(self.device) + else: + raise ValueError(f"Unsupported model type: {model_type}") diff --git a/src/onnx_inference.py b/src/onnx_inference.py index 1329fcf..b834750 100644 --- a/src/onnx_inference.py +++ b/src/onnx_inference.py @@ -1,4 +1,6 @@ import os +import torch +import torch.nn.functional as F import logging import onnxruntime as ort import numpy as np @@ -44,12 +46,13 @@ def predict(self, input_data, is_benchmark=False): ort_inputs = {input_name: input_data.cpu().numpy()} ort_outs = self.model.run(None, ort_inputs) - # Extract probabilities from the output and normalize them + # Extract probabilities from the output if len(ort_outs) > 0: prob = ort_outs[0] if prob.ndim > 1: prob = prob[0] - prob = np.exp(prob) / np.sum(np.exp(prob)) + prob = F.softmax(torch.from_numpy(prob), dim=0).numpy() + return self.get_top_predictions(prob, is_benchmark) def benchmark(self, input_data, num_runs=100, warmup_runs=50): @@ -62,3 +65,30 @@ def benchmark(self, input_data, num_runs=100, warmup_runs=50): :return: Average inference time in milliseconds. """ return super().benchmark(input_data, num_runs, warmup_runs) + + def get_top_predictions(self, prob: np.ndarray, is_benchmark=False): + """ + Get the top predictions based on the probabilities. + + :param prob: Array of probabilities. + :param is_benchmark: If True, the method is called during a benchmark run. + :return: List of dictionaries with label and confidence. + """ + if is_benchmark: + return None + + # Get the top indices and probabilities + top_indices = prob.argsort()[-self.topk:][::-1] + top_probs = prob[top_indices] + + # Prepare the list of predictions + predictions = [] + for i in range(self.topk): + probability = top_probs[i] + class_label = self.categories[0][int(top_indices[i])] + predictions.append({"label": class_label, "confidence": float(probability)}) + + # Log the top predictions + logging.info(f"#{i + 1}: {probability * 100:.2f}% {class_label}") + + return predictions diff --git a/src/ov_inference.py b/src/ov_inference.py index 5d94bb6..aa3b396 100644 --- a/src/ov_inference.py +++ b/src/ov_inference.py @@ -1,4 +1,7 @@ import os +import logging +import torch +import torch.nn.functional as F import numpy as np import openvino as ov from src.inference_base import InferenceBase @@ -52,10 +55,12 @@ def predict(self, input_data, is_benchmark=False): input_name = next(iter(self.compiled_model.inputs)) outputs = self.compiled_model(inputs={input_name: input_data.cpu().numpy()}) - # Extract probabilities from the output and normalize them + # Extract probabilities from the output prob_key = next(iter(outputs)) prob = outputs[prob_key] - prob = np.exp(prob[0]) / np.sum(np.exp(prob[0])) + + # Apply softmax to the probabilities + prob = F.softmax(torch.from_numpy(prob[0]), dim=0).numpy() return self.get_top_predictions(prob, is_benchmark) @@ -69,3 +74,26 @@ def benchmark(self, input_data, num_runs=100, warmup_runs=50): :return: Average inference time in milliseconds. """ return super().benchmark(input_data, num_runs, warmup_runs) + + def get_top_predictions(self, prob: np.ndarray, is_benchmark=False): + """ + Get the top predictions based on the probabilities. + """ + if is_benchmark: + return None + + # Get the top indices and probabilities + top_indices = prob.argsort()[-self.topk :][::-1] + top_probs = prob[top_indices] + + # Prepare the list of predictions + predictions = [] + for i in range(self.topk): + probability = top_probs[i] + class_label = self.categories[0][int(top_indices[i])] + predictions.append({"label": class_label, "confidence": float(probability)}) + + # Log the top predictions + logging.info(f"#{i + 1}: {probability * 100:.2f}% {class_label}") + + return predictions diff --git a/src/pytorch_inference.py b/src/pytorch_inference.py index 9984594..6dfe835 100644 --- a/src/pytorch_inference.py +++ b/src/pytorch_inference.py @@ -1,4 +1,7 @@ import torch +import torch.nn.functional as F +import logging +import numpy as np from src.inference_base import InferenceBase @@ -37,11 +40,8 @@ def predict(self, input_data, is_benchmark=False): with torch.no_grad(): outputs = self.model(input_data.to(self.device)) - # Compute the softmax probabilities - prob = torch.nn.functional.softmax(outputs[0], dim=0) - prob = prob.cpu().numpy() - - return self.get_top_predictions(prob, is_benchmark) + # Pass the raw output tensor to get_top_predictions + return self.get_top_predictions(outputs, is_benchmark) def benchmark(self, input_data, num_runs=100, warmup_runs=50): """ @@ -53,3 +53,30 @@ def benchmark(self, input_data, num_runs=100, warmup_runs=50): :return: Average inference time in milliseconds. """ return super().benchmark(input_data, num_runs, warmup_runs) + + def get_top_predictions(self, output_tensor, is_benchmark=False): + """ + Get the top predictions based on the model's output. + """ + if is_benchmark: + return None + + # Apply softmax to convert logits to probabilities + probabilities = F.softmax(output_tensor, dim=1) + + # Get the top probabilities and their indices + top_probs, top_indices = torch.topk(probabilities, self.topk) + top_probs = top_probs[0].tolist() + top_indices = top_indices[0].tolist() + + # Prepare the list of predictions + predictions = [] + for i in range(self.topk): + probability = top_probs[i] + class_label = self.categories[0][top_indices[i]] + predictions.append({"label": class_label, "confidence": float(probability)}) + + # Log the top predictions + logging.info(f"#{i + 1}: {probability * 100:.2f}% {class_label}") + + return predictions diff --git a/src/tensorrt_inference.py b/src/tensorrt_inference.py index 46bb060..d31669d 100644 --- a/src/tensorrt_inference.py +++ b/src/tensorrt_inference.py @@ -1,6 +1,9 @@ import torch -# import torch_tensorrt +import torch.nn.functional as F import logging +import numpy as np + +# import torch_tensorrt from src.inference_base import InferenceBase # Check for CUDA and TensorRT availability @@ -41,9 +44,9 @@ def load_model(self): # Compile the TorchScript model with TensorRT if CUDA_AVAILABLE: - self.model = torch_tensorrt.compile( + self.model = trt.compile( scripted_model, - inputs=[torch_tensorrt.Input((1, 3, 224, 224), dtype=self.precision)], + inputs=[trt.Input((1, 3, 224, 224), dtype=self.precision)], enabled_precisions={self.precision}, ) @@ -76,3 +79,37 @@ def benchmark(self, input_data, num_runs=100, warmup_runs=50): :return: Average inference time in milliseconds. """ return super().benchmark(input_data, num_runs, warmup_runs) + + def get_top_predictions(self, logits: np.ndarray, is_benchmark=False): + """ + Get the top predictions based on the logits. + + :param logits: Array of logits. + :param is_benchmark: If True, the method is called during a benchmark run. + :return: List of dictionaries with label and confidence. + """ + if is_benchmark: + return None + + # Convert logits to tensor and apply softmax + logits_tensor = torch.from_numpy(logits) + probs_tensor = F.softmax(logits_tensor, dim=0) + + # Extract top probabilities and indices + top_probs, top_indices = torch.topk(probs_tensor, self.topk) + + # Convert to numpy arrays for processing + top_probs = top_probs.detach().numpy() + top_indices = top_indices.detach().numpy() + + # Prepare the list of predictions + predictions = [] + for i in range(self.topk): + probability = top_probs[i] + class_label = self.categories[0][int(top_indices[i])] + predictions.append({"label": class_label, "confidence": float(probability)}) + + # Log the top predictions + logging.info(f"#{i + 1}: {probability * 100:.2f}% {class_label}") + + return predictions diff --git a/src/web_demo/app.py b/src/web_demo/app.py new file mode 100644 index 0000000..13fdaca --- /dev/null +++ b/src/web_demo/app.py @@ -0,0 +1,223 @@ +import time +import logging +from concurrent.futures import ThreadPoolExecutor, as_completed + +from flask import Flask, render_template, request, jsonify, flash, redirect, url_for +from PIL import Image +from io import BytesIO +from config import SSL_CERT_PATH, SSL_KEY_PATH +from werkzeug.exceptions import RequestEntityTooLarge +from flask_limiter.util import get_remote_address +from flask_limiter import Limiter, RateLimitExceeded +import os +import uuid + +import sys + +sys.path.append("/usr/src/app") +from common.utils import cuda_is_available + +# Importing model and inference classes +from src.image_processor import ImageProcessor +from src.model import ModelLoader +from src.onnx_inference import ONNXInference +from src.ov_inference import OVInference +from src.pytorch_inference import PyTorchInference +from src.tensorrt_inference import TensorRTInference + +app = Flask(__name__) + + +UPLOAD_FOLDER = "static/user_files" +ALLOWED_EXTENSIONS = {"png", "jpg", "jpeg", "gif"} +MAX_CONTENT_LENGTH = 500 * 1024 * 1024 # 500MB +MAX_FILES_IN_UPLOAD_FOLDER = 10 + +app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER +app.config["MAX_CONTENT_LENGTH"] = MAX_CONTENT_LENGTH + + +# Configure rate limiting +limiter = Limiter(key_func=get_remote_address, app=app, default_limits=["5 per minute"]) + + +@app.errorhandler(RateLimitExceeded) +def handle_rate_limit_error(e): + response = jsonify({"error": "Rate limit exceeded"}) + response.status_code = 429 # Too Many Requests + return response + + +# Function to check if the file extension is allowed +def allowed_file(filename): + return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS + + +# Function to process the uploaded image +def process_image(file_path): + # Open the image file + with Image.open(file_path) as image: + img_processor = ImageProcessor(device="cpu") + return img_processor.process_image(image) + + +# Function to manage file limit in the upload folder +def manage_file_limit(upload_folder): + files_in_directory = os.listdir(upload_folder) + number_of_files = len(files_in_directory) + + if number_of_files >= MAX_FILES_IN_UPLOAD_FOLDER: + oldest_file = min( + files_in_directory, + key=lambda x: os.path.getctime(os.path.join(upload_folder, x)), + ) + os.remove(os.path.join(upload_folder, oldest_file)) + + +def get_inference_class(model_type, model_loader): + model_path_prefix = "./models/" # Base path for models + + if model_type == "pytorch": + # For PyTorch, no specific model file is needed, but you can modify as needed + return PyTorchInference(model_loader, device="cpu") + + elif model_type == "onnx": + model_path = model_path_prefix + model_loader.model_type + "_onnx_model.onnx" # Prefix for ONNX models + return ONNXInference(model_loader, model_path) + + elif model_type == "ov": + model_path = model_path_prefix + model_loader.model_type + "_ov_model.ov" # Prefix for OpenVINO models + return OVInference(model_loader, model_path) + + elif model_type == "tensorrt": + # For TensorRT, no specific model file is needed, but you can modify as needed + return TensorRTInference(model_loader, device="cpu") + + elif model_type == "all": + return None # Placeholder for handling 'all' models + + else: + raise ValueError(f"Unsupported model type: {model_type}") + + +def run_all_benchmarks(img_batch): + model_loader = ModelLoader(device="cpu") + benchmark_results = {} + + # PyTorch CPU Benchmark + pytorch_cpu_inference = PyTorchInference(model_loader, device="cpu") + benchmark_results["PyTorch (CPU)"] = pytorch_cpu_inference.benchmark(img_batch) + + # PyTorch GPU Benchmark + if cuda_is_available(): + pytorch_gpu_inference = PyTorchInference(model_loader, device="cuda") + benchmark_results["PyTorch (GPU)"] = pytorch_gpu_inference.benchmark(img_batch) + + # ONNX CPU Benchmark + onnx_inference = ONNXInference(model_loader, "path_to_onnx_model") + benchmark_results["ONNX (CPU)"] = onnx_inference.benchmark(img_batch) + + # OpenVINO CPU Benchmark + ov_inference = OVInference(model_loader, "path_to_ov_model") + benchmark_results["OpenVINO (CPU)"] = ov_inference.benchmark(img_batch) + + # TensorRT CPU Benchmark + if cuda_is_available(): + tensorrt_inference = TensorRTInference(model_loader, device="cuda") + benchmark_results["TensorRT (GPU)"] = tensorrt_inference.benchmark(img_batch) + + return benchmark_results + + +@app.errorhandler(RequestEntityTooLarge) +def handle_file_too_large(e): + return "File is too large", 413 + + +@app.route("/demo") +def index(): + return render_template("demo.html") + + +@app.route("/process", methods=["POST"]) +def process_request(): + image_file = request.files.get("image") + model_type = request.form.get("inferenceMode") + mode = request.form.get("mode") + cnn_model = request.form.get("cnnModel") # Retrieve the selected CNN model + + # Add logging statements + logging.info( + "Received request with model_type: %s, mode: %s, image_file: %s", + model_type, + mode, + image_file.filename, + ) + if not os.path.exists(UPLOAD_FOLDER): + os.makedirs(UPLOAD_FOLDER) + + if image_file is None or image_file.filename == "": + logging.error("No file part or no selected file") + return jsonify({"error": "No file part or no selected file"}), 400 + + if not allowed_file(image_file.filename): + logging.error("Invalid file type: %s", image_file.filename) + return jsonify({"error": "Invalid file format. Allowed formats are png, jpg, jpeg, gif."}), 400 + + # Generate a unique filename using UUID + ext = image_file.filename.rsplit(".", 1)[1].lower() # Get the file extension + unique_filename = f"{uuid.uuid4().hex}.{ext}" + file_path = os.path.join(app.config["UPLOAD_FOLDER"], unique_filename) + + # Save the uploaded file with the unique name + image_file.seek(0) + image_file.save(file_path) + + logging.info("Saved file: %s", file_path) + + # Process the uploaded image using ImageProcessor + device = "cuda" if cuda_is_available() else "cpu" + img_processor = ImageProcessor(img_path=file_path, device=device) + img_batch = img_processor.process_image() + # img_batch = img_processor.process_image_official() + + if img_batch is None: + return jsonify({"error": "Invalid file type"}), 400 + + logging.info("Loading pre-trained model, for %s", cnn_model) + model_loader = ModelLoader(model_type=cnn_model, device=device) + + if mode == "benchmark": + + # Benchmark mode logic + logging.info("Running all benchmarks") + results = run_all_benchmarks(img_batch) + return jsonify({"benchmark": results}) + + elif mode == "predict": + + # Predict mode logic + logging.info("Running prediction for model type: %s", model_type) + inference_class = get_inference_class(model_type, model_loader) + if inference_class is None: + logging.error("Invalid model type selected: %s", model_type) + return jsonify({"error": "Invalid model type selected"}), 400 + + start_time = time.time() + predictions = inference_class.predict(img_batch) + end_time = time.time() + inference_time = (end_time - start_time) * 1000 + + return jsonify({"predictions": predictions, "inference_time": inference_time}) + else: + # Handle unexpected mode + logging.error("Invalid mode selected: %s", mode) + return jsonify({"error": "Invalid mode selected"}), 400 + + +if __name__ == "__main__": + # Configure logging + logging.basicConfig(level=logging.INFO) + app.run( + host="0.0.0.0", port=5000, debug=True + ) diff --git a/src/web_demo/config.py b/src/web_demo/config.py new file mode 100644 index 0000000..a29f39f --- /dev/null +++ b/src/web_demo/config.py @@ -0,0 +1,2 @@ +SSL_CERT_PATH = "/usr/src/app/keys/certificate.crt" +SSL_KEY_PATH = "/usr/src/app/keys/private.key" diff --git a/src/web_demo/static/css/styles.css b/src/web_demo/static/css/styles.css new file mode 100644 index 0000000..8112325 --- /dev/null +++ b/src/web_demo/static/css/styles.css @@ -0,0 +1,190 @@ +.inference-time-container { + background-color: #007bff; /* Bootstrap primary color */ + color: white; + text-align: center; + padding: 10px; + border-radius: 20px; + margin-top: 15px; + animation: popIn 0.5s ease; +} + +.top-prediction-container { + background-color: #28A745CC; + color: white; + text-align: center; + padding: 10px; + border-radius: 20px; + margin-top: 7px; + animation: popIn 0.5s; + animation-delay: 0.25s; /* Delay the animation */ +} + +@keyframes popIn { + 0% { + transform: scale(0); + opacity: 0; + } + 100% { + transform: scale(1); + opacity: 1; + } +} + +.benchmark-info { + background: linear-gradient( + 60deg, + rgba(0, 123, 255, 0.8), + rgba(23, 162, 184, 0.8), + rgba(40, 167, 69, 0.8) + ); + background-size: 300% 300%; + color: white; + padding: 20px; + border-radius: 10px; + margin-top: 20px; + text-align: center; + animation: gradientAnimation 15s ease infinite; +} + +@keyframes gradientAnimation { + 0% { background-position: 0% 50%; } + 50% { background-position: 100% 50%; } + 100% { background-position: 0% 50%; } +} + +/* Typing animation */ +@keyframes typing { + from { width: 0; } + to { width: 100%; } +} + +#animatedText { + display: inline-block; + overflow: hidden; + white-space: nowrap; + animation: typing 2s steps(40, end), blink-caret 0.75s step-end infinite; + border-right: 2px solid; +} + +.flash-message { + position: relative; + padding-bottom: 6px; /* Adjust space for the countdown bar */ +} + +.countdown-bar { + position: absolute; + bottom: 0; + left: 0; + width: 100%; + height: 5px; /* Adjust height of the countdown bar */ + background-color: red; + transition: width 1s linear, background-color 1s linear; +} + +.close-button { + cursor: pointer; + border: none; + background: none; + font-size: 20px; + color: black; + position: absolute; + right: 10px; + top: 5px; +} + +/* Additional styles for the title and list */ +.info-card-title { + margin-bottom: 15px; + font-weight: bold; +} + +.info-card-list { + list-style: decimal inside; /* Decimal list style inside the content flow */ + margin: 0; /* Reset default margins */ + padding: 0; /* Reset default padding */ +} + +.info-card-list li { + font-size: 16px; + margin-bottom: 10px; /* Space between list items */ + text-indent: -20px; /* Indent to align numbers properly */ + padding-left: 20px; /* Padding to the left of the text */ +} + +.info-card-header { + display: flex; + align-items: center; + gap: 10px; /* Adjust as needed for spacing */ + color: white; +} + +.info-card-help { + width: 30px; /* Adjust size as desired */ + height: 30px; /* Adjust size as desired */ + border-radius: 50%; + background-color: white; + color: #007bff; + display: flex; + justify-content: center; + align-items: center; + font-size: 16px; /* Adjust font size as desired */ + margin-right: 5px; + margin-bottom: 5px; +} + +.info-card-close { + cursor: pointer; + border: none; + background: rgba(255, 255, 255, 0.5); + border-radius: 50%; + font-size: 12px; + color: #007bff; + line-height: 0; + width: 20px; /* Smaller size */ + height: 20px; /* Smaller size */ + display: flex; + align-items: center; + justify-content: center; + padding: 0; + margin-left: auto; + margin-right: 5px; + outline: none; + position: absolute; + right: 10px; + top: 10px; +} + +.info-card-close:hover { + background: rgba(255, 255, 255, 0.7); +} + +.info-card { + position: relative; + border-radius: 5px; + padding: 20px; /* Padding for the outer card */ + margin-bottom: 20px; + background: linear-gradient(135deg, #007bff, #00b0ff, #007bff, #82d4f2, #007bff); + background-size: 200% 200%; + animation: gradientAnimation 15s ease infinite; + color: white; + box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); +} + +.info-card-inner { + background-color: white; /* White background for inner card */ + border-radius: 5px; /* Match the border-radius of the outer card */ + padding: 20px; + color: #007bff; /* Blue color for the text inside the inner card */ +} + +@keyframes gradientAnimation { + 0% { background-position: 0% 50%; } + 50% { background-position: 100% 50%; } + 100% { background-position: 0% 50%; } +} + +.info-card-line { + border-top: 3px solid white; /* Made the line wider */ + margin-top: 0; + margin-bottom: 15px; /* Provide spacing between the line and the list */ +} diff --git a/src/web_demo/static/js/scripts.js b/src/web_demo/static/js/scripts.js new file mode 100644 index 0000000..b65971f --- /dev/null +++ b/src/web_demo/static/js/scripts.js @@ -0,0 +1,383 @@ +let probChart = null; // Global variable to hold the chart instance + +document.getElementById('image-form').addEventListener('submit', function(e) { + e.preventDefault(); + let formData = new FormData(this); + let submitButton = document.querySelector("#image-form button[type='submit']"); + let mode = document.getElementById('mode').value; + let cnnModel = document.getElementById('cnnModel').value; + + // Hide the info card + document.getElementById('info-card').style.display = 'none'; + + // Hide benchmark graphs when switching to prediction mode + if (mode === 'predict') { + document.getElementById('timeGraphContainer').style.display = 'none'; + document.getElementById('throughputGraphContainer').style.display = 'none'; + } else { + // Hide prediction elements when switching to benchmark mode + document.getElementById('processedImageContainer').style.display = 'none'; + document.getElementById('probGraphContainer').style.display = 'none'; + + // Start updating benchmark info + updateBenchmarkInfo(); + } + + // Disable the submit button and show the spinner + submitButton.disabled = true; + document.getElementById('spinner').style.display = 'block'; + + fetch('/process', { + method: 'POST', + body: formData + }) + .then(response => { + if (response.status === 400) { // Check for rate limit exceeded + throw new Error("File format invalid. Please use: JPG, Jpeg, PNG, Gif. Max 500MB"); + } + if (response.status === 429) { // Check for rate limit exceeded + throw new Error("Rate limit exceeded. Please try again later."); + } + return response.json(); + }) + .then(data => { + // Enable the submit button and hide the spinner + submitButton.disabled = false; + document.getElementById('spinner').style.display = 'none'; + document.getElementById('benchmarkInfo').style.display = 'none'; + + if (data.predictions) { + displayPredictions(data.predictions, data.inference_time); + } else if (data.benchmark) { + displayBenchmark(data.benchmark); + } + }) + .catch(error => { + console.error('Error:', error); + + // Display error message to the user + displayFlashMessage("danger", error.message); + // Enable the submit button in case of an error + submitButton.disabled = false; + document.getElementById('spinner').style.display = 'none'; + document.getElementById('benchmarkInfo').style.display = 'none'; + }); +}); + +function displayFlashMessage(category, message) { + let container = document.querySelector('.container'); + + // Create the flash message div + let flashMessageDiv = document.createElement('div'); + flashMessageDiv.className = `alert alert-${category} flash-message`; + flashMessageDiv.role = 'alert'; + + // Create the message text + let messageText = document.createElement('span'); + messageText.textContent = message; + flashMessageDiv.appendChild(messageText); + + // Create the countdown bar + let countdownBar = document.createElement('div'); + countdownBar.className = 'countdown-bar'; + flashMessageDiv.appendChild(countdownBar); + + // Create the close button + let closeButton = document.createElement('button'); + closeButton.className = 'close-button'; + closeButton.innerHTML = '×'; + closeButton.onclick = function() { + clearInterval(countdownInterval); + flashMessageDiv.remove(); + }; + flashMessageDiv.appendChild(closeButton); + + // Insert the flash message into the container + container.insertBefore(flashMessageDiv, container.firstChild); + + // Start the countdown + let timeLeft = 5; // Duration in seconds + let countdownInterval = setInterval(() => { + countdownBar.style.width = `${(timeLeft / 5) * 100}%`; + countdownBar.style.backgroundColor = `rgba(255, 0, 0, ${timeLeft / 5})`; + if (timeLeft <= 0) { + clearInterval(countdownInterval); + fadeOutElement(flashMessageDiv, countdownBar); + } + timeLeft--; + }, 1000); +} + +function fadeOutElement(element, excludeElement) { + var fadeEffect = setInterval(function () { + if (!element.style.opacity) { + element.style.opacity = 1; + } + if (element.style.opacity > 0) { + element.style.opacity -= 0.1; + if (excludeElement) { + excludeElement.style.opacity = 1; // Keep the countdown bar fully visible + } + } else { + clearInterval(fadeEffect); + element.remove(); + } + }, 100); +} + + +document.getElementById('mode').addEventListener('change', updateModelOptions); +updateModelOptions(); + +function displayPredictions(predictions, inferenceTime) { + const processedImageContainer = document.getElementById('processedImageContainer'); + const probGraphContainer = document.getElementById('probGraphContainer'); + + processedImageContainer.style.display = 'block'; + probGraphContainer.style.display = 'block'; + + // Display the mini image + let imageInput = document.getElementById('image'); + if (imageInput.files && imageInput.files[0]) { + let reader = new FileReader(); + reader.onload = function(e) { + let processedImage = document.getElementById('processedImage'); + if (processedImage) { + processedImage.src = e.target.result; + processedImage.style.maxWidth = '450px'; // Adjust width as needed + processedImage.style.height = 'auto'; // Maintain aspect ratio + } + }; + reader.readAsDataURL(imageInput.files[0]); + } + + // Render prediction probabilities graph + renderProbGraph(predictions); + + // Update the inference time container + let inferenceTimeDiv = document.getElementById('inferenceTime'); + if (inferenceTimeDiv) { + // Remove the element + inferenceTimeDiv.remove(); + + // Create a new div element for inference time + let newInferenceTimeDiv = document.createElement('div'); + newInferenceTimeDiv.id = 'inferenceTime'; + newInferenceTimeDiv.className = 'inference-time-container'; + newInferenceTimeDiv.innerHTML = `Inference Time: ${inferenceTime.toFixed(2)} ms`; + + // Re-add the element to the DOM + let probGraphContainer = document.getElementById('probGraphContainer'); + probGraphContainer.appendChild(newInferenceTimeDiv); + } + + // Update the inference time container + let cnnModel = document.getElementById('cnnModel').value; + let topPrediction = document.getElementById('topPrediction'); + if (topPrediction) { + // Remove the element + topPrediction.remove(); + + // Create a new div element for inference time + let newTopPrediction = document.createElement('div'); + newTopPrediction.id = 'topPrediction'; + newTopPrediction.className = 'top-prediction-container'; + newTopPrediction.innerHTML = `${cnnModel.toUpperCase()} thinks it is: ${predictions[0].label}`; + + // Re-add the element to the DOM + let probGraphContainer = document.getElementById('probGraphContainer'); + probGraphContainer.appendChild(newTopPrediction); + } +} + +function renderProbGraph(predictions) { + const ctx = document.getElementById('probGraph').getContext('2d'); + + // Destroy the existing chart if it exists + if (probChart) { + probChart.destroy(); + } + + const labels = predictions.map(prediction => prediction.label); + const probs = predictions.map(prediction => (prediction.confidence * 100).toFixed(2)); // Convert to percentage + + // Define a blue color palette + const bluePurplePalette = [ + 'rgba(0, 123, 255, 0.8)', // Bootstrap primary blue + 'rgba(23, 162, 184, 0.8)', // A lighter shade of blue + 'rgba(40, 167, 69, 0.8)', // A greenish-blue shade + 'rgba(0, 105, 217, 0.8)', // A darker shade of blue + 'rgba(3, 169, 244, 0.8)' // A sky blue shade + ]; + + // Assign colors from the palette to each bar + const backgroundColors = probs.map((_, index) => bluePurplePalette[index % bluePurplePalette.length]); + + probChart = new Chart(ctx, { + type: 'bar', + data: { + labels: labels, + datasets: [{ + label: 'Confidence (%)', + data: probs, + backgroundColor: backgroundColors, + borderColor: backgroundColors.map(color => color.replace('0.8', '1')), // Darker border color + borderWidth: 1 + }] + }, + options: { + indexAxis: 'y', // Set to 'y' for horizontal bars + scales: { + x: { + beginAtZero: true + } + } + } + }); +} + +function randomRGB() { + return Math.floor(Math.random() * 255); +} + +function displayBenchmark(benchmarkResults) { + console.log(benchmarkResults) + // Hide prediction elements + document.getElementById('processedImageContainer').style.display = 'none'; + document.getElementById('probGraphContainer').style.display = 'none'; + + // Display benchmark graphs + document.getElementById('timeGraphContainer').style.display = 'block'; + document.getElementById('throughputGraphContainer').style.display = 'block'; + + // Prepare data for line graphs + const labels = Object.keys(benchmarkResults); + const times = labels.map(label => benchmarkResults[label][0]); + const throughputs = labels.map(label => benchmarkResults[label][1]); + + // Display line graphs + displayLineGraph(labels, times, throughputs); +} + +function displayLineGraph(labels, times, throughputs) { + const timeGraphContainer = document.getElementById('timeGraphContainer'); + const throughputGraphContainer = document.getElementById('throughputGraphContainer'); + + // Colors for the benchmark graphs + const timeGraphColor = 'rgba(0, 123, 255, 0.8)'; // Bootstrap primary blue for the time graph + const throughputGraphColor = 'rgba(23, 162, 184, 0.8)'; // A lighter shade of blue for the throughput graph + + if (timeGraphContainer && throughputGraphContainer) { + timeGraphContainer.style.display = 'block'; + throughputGraphContainer.style.display = 'block'; + + // Inference Time Graph + const timeCtx = document.getElementById('timeGraph').getContext('2d'); + new Chart(timeCtx, { + type: 'bar', + data: { + labels: labels, + datasets: [{ + label: 'Average Inference Time (ms)', + data: times, + backgroundColor: timeGraphColor, + borderColor: timeGraphColor.replace('0.8', '1'), + borderWidth: 1 + }] + }, + options: { + indexAxis: 'y', // Set to 'y' for horizontal bars + scales: { + x: { + beginAtZero: true + } + } + } + }); + + // Throughput Graph + const throughputCtx = document.getElementById('throughputGraph').getContext('2d'); + new Chart(throughputCtx, { + type: 'bar', + data: { + labels: labels, + datasets: [{ + label: 'Average Throughput (samples/sec)', + data: throughputs, + backgroundColor: throughputGraphColor, + borderColor: throughputGraphColor.replace('0.8', '1'), + borderWidth: 1 + }] + }, + options: { + indexAxis: 'y', // Set to 'y' for horizontal bars + scales: { + x: { + beginAtZero: true + } + } + } + }); + } else { + console.error('Error: Graph containers not found'); + } +} + +function updateModelOptions() { + const modeSelect = document.getElementById('mode'); + const modelSelect = document.getElementById('inferenceMode'); + + // Clear existing options + modelSelect.innerHTML = ''; + + if (modeSelect.value === 'predict') { + // Options for 'Predict' mode + const options = ['ov', 'pytorch', 'onnx']; + options.forEach(opt => { + let option = document.createElement('option'); + option.value = opt; + option.text = opt.toUpperCase(); // Capitalize first letter + modelSelect.appendChild(option); + }); + } else if (modeSelect.value === 'benchmark') { + // Only 'ALL' option for 'Benchmark' mode + let option = document.createElement('option'); + option.value = 'all'; + option.text = 'ALL'; + modelSelect.appendChild(option); + } +} + +function updateBenchmarkInfo() { + const sentences = [ + "Analyzing model performance...", + "Running benchmarks on different models...", + "Calculating average inference time...", + "Evaluating throughput metrics..." + ]; + let currentSentence = 0; + let currentChar = 0; + + const animatedText = document.getElementById('animatedText'); + const benchmarkInfo = document.getElementById('benchmarkInfo'); + benchmarkInfo.style.display = 'block'; + + function typeSentence() { + if (currentChar < sentences[currentSentence].length) { + animatedText.textContent += sentences[currentSentence].charAt(currentChar); + currentChar++; + setTimeout(typeSentence, 100); // Delay between each character + } else { + // Wait before starting the next sentence + setTimeout(() => { + currentSentence = (currentSentence + 1) % sentences.length; + animatedText.textContent = ''; // Clear the text + currentChar = 0; // Reset character position + typeSentence(); // Start typing the next sentence + }, 3000); // Delay between sentences + } + } + + typeSentence(); +} + diff --git a/src/web_demo/templates/base.html b/src/web_demo/templates/base.html new file mode 100644 index 0000000..0813424 --- /dev/null +++ b/src/web_demo/templates/base.html @@ -0,0 +1,20 @@ + + + + {% block title %}{% endblock %} - ResNetTensorRT Demo + + + + + {% block head %}{% endblock %} + + +
+ {% block content %}{% endblock %} +
+ + + + {% block scripts %}{% endblock %} + + \ No newline at end of file diff --git a/src/web_demo/templates/demo.html b/src/web_demo/templates/demo.html new file mode 100644 index 0000000..3758dd5 --- /dev/null +++ b/src/web_demo/templates/demo.html @@ -0,0 +1,136 @@ +{% extends "base.html" %} + +{% block head %} + + +{% endblock %} + +{% block content %} +
+ + {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for category, message in messages %} + + {% endfor %} + {% endif %} + {% endwith %} +
+
+

CNN Model Inference Live Demo

+
+
+
+ ? +
Instructions:
+ +
+
+
+
    +
  1. Choose and upload an image in PNG, JPG, JPEG, or GIF format.
  2. +
  3. Choose between ResNet50 or EfficientNet for image processing.
  4. +
  5. Run the model to get predictions on your uploaded image.
  6. +
  7. Choose between OV, PyTorch CPU, or ONNX for model inference.
  8. +
  9. Compare performance across different models and inference settings.
  10. +
+
+
+
+
+
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+ +
+
+
+ + + +
+
+
+ + +
+ + +
+ + +
+ + +
+
+{% endblock %} + +{% block scripts %} + +{% endblock %}