From f7b4ffa69b7d4771a3b772170f2439e1f0dfeaae Mon Sep 17 00:00:00 2001 From: Tyler Termini <69917602+waycroft@users.noreply.github.com> Date: Sat, 4 Feb 2023 15:37:44 -0800 Subject: [PATCH 01/13] update gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index b6e4761..9b5e917 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,6 @@ dmypy.json # Pyre type checker .pyre/ + +# Misc / user +.history From e134253a83da7e86f005b86ee605f186532db80c Mon Sep 17 00:00:00 2001 From: Tyler Termini <69917602+waycroft@users.noreply.github.com> Date: Sat, 4 Feb 2023 15:37:59 -0800 Subject: [PATCH 02/13] update model to mpnet --- download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/download.py b/download.py index 9f2956d..e848970 100644 --- a/download.py +++ b/download.py @@ -7,7 +7,7 @@ def download_model(): # do a dry run of loading the huggingface model, which will download weights - pipeline('fill-mask', model='bert-base-uncased') + pipeline('fill-mask', model='sentence-transformers/paraphrase-mpnet-base-v2') if __name__ == "__main__": download_model() \ No newline at end of file From 6d9f8e6f024596e8a7ee6e6647b9c4d47a2aa12f Mon Sep 17 00:00:00 2001 From: Tyler Termini <69917602+waycroft@users.noreply.github.com> Date: Sat, 4 Feb 2023 16:07:31 -0800 Subject: [PATCH 03/13] replace bert with mpnet --- app.py | 17 ++++++++--------- requirements.txt | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/app.py b/app.py index 7f6b061..12387d4 100644 --- a/app.py +++ b/app.py @@ -1,24 +1,23 @@ -from transformers import pipeline -import torch +from sentence_transformers import SentenceTransformer + # Init is ran on server startup # Load your model to GPU as a global variable here using the variable name "model" def init(): global model - - device = 0 if torch.cuda.is_available() else -1 - model = pipeline('fill-mask', model='bert-base-uncased', device=device) + model = SentenceTransformer("sentence-transformers/paraphrase-mpnet-base-v2") + # Inference is ran for every server call # Reference your preloaded global model variable here. -def inference(model_inputs:dict) -> dict: +def inference(model_inputs: dict) -> dict: global model # Parse out your arguments - prompt = model_inputs.get('prompt', None) + prompt = model_inputs.get("prompt", None) if prompt == None: - return {'message': "No prompt provided"} - + return {"message": "No prompt provided"} + # Run the model result = model(prompt) diff --git a/requirements.txt b/requirements.txt index f9cbeac..40b5e65 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ sanic==22.6.2 -transformers +sentence_transfomers accelerate From 6390b745999c4037a051122e19e94a6d0f7f4285 Mon Sep 17 00:00:00 2001 From: Tyler Termini <69917602+waycroft@users.noreply.github.com> Date: Sat, 4 Feb 2023 16:16:53 -0800 Subject: [PATCH 04/13] add specific sentence_transformers version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 40b5e65..6ee458e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ sanic==22.6.2 -sentence_transfomers +sentence_transfomers==2.2.2 accelerate From f8edacc4315390546c94aa09295d5b568c8b068a Mon Sep 17 00:00:00 2001 From: Tyler Termini <69917602+waycroft@users.noreply.github.com> Date: Sat, 4 Feb 2023 16:22:17 -0800 Subject: [PATCH 05/13] hyphen, not underscore --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6ee458e..fa57448 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ sanic==22.6.2 -sentence_transfomers==2.2.2 +sentence-transfomers==2.2.2 accelerate From 4dd89efe5f7f8f786e7f1c17e5580a2bc6927da6 Mon Sep 17 00:00:00 2001 From: Tyler Termini <69917602+waycroft@users.noreply.github.com> Date: Sat, 4 Feb 2023 16:27:03 -0800 Subject: [PATCH 06/13] I am retarded --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index fa57448..a80190b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ sanic==22.6.2 -sentence-transfomers==2.2.2 +sentence-transformers==2.2.2 accelerate From f5d3a2ecaa6b2d964cfa0a13c3b5da25c393ac4d Mon Sep 17 00:00:00 2001 From: Tyler Termini <69917602+waycroft@users.noreply.github.com> Date: Sat, 4 Feb 2023 16:38:29 -0800 Subject: [PATCH 07/13] add .encode() --- app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app.py b/app.py index 12387d4..c333ae3 100644 --- a/app.py +++ b/app.py @@ -19,7 +19,7 @@ def inference(model_inputs: dict) -> dict: return {"message": "No prompt provided"} # Run the model - result = model(prompt) + result = model.encode(prompt) # Return the results as a dictionary return result From 2efd5aa439bd4dcf41fedd6d50f171f2d4be3ab4 Mon Sep 17 00:00:00 2001 From: Tyler Termini <69917602+waycroft@users.noreply.github.com> Date: Sat, 4 Feb 2023 17:31:36 -0800 Subject: [PATCH 08/13] return JSON output --- app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app.py b/app.py index c333ae3..2fef3ea 100644 --- a/app.py +++ b/app.py @@ -22,4 +22,4 @@ def inference(model_inputs: dict) -> dict: result = model.encode(prompt) # Return the results as a dictionary - return result + return {"data": result} From 1fd0f229d025a8843541f1cd3390fb511c6ca989 Mon Sep 17 00:00:00 2001 From: Tyler Termini <69917602+waycroft@users.noreply.github.com> Date: Sat, 4 Feb 2023 18:00:21 -0800 Subject: [PATCH 09/13] push a commit for banana re-deploy --- app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app.py b/app.py index 2fef3ea..4d4418a 100644 --- a/app.py +++ b/app.py @@ -22,4 +22,4 @@ def inference(model_inputs: dict) -> dict: result = model.encode(prompt) # Return the results as a dictionary - return {"data": result} + return { "data": result } From 9c5d6b17117f603ca505fbbc9462f7b6d374472f Mon Sep 17 00:00:00 2001 From: Tyler Termini <69917602+waycroft@users.noreply.github.com> Date: Sat, 4 Feb 2023 18:40:35 -0800 Subject: [PATCH 10/13] a python array isn't a list! --- app.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/app.py b/app.py index 4d4418a..d23600e 100644 --- a/app.py +++ b/app.py @@ -1,4 +1,5 @@ from sentence_transformers import SentenceTransformer +from sklearn.preprocessing import normalize # Init is ran on server startup @@ -19,7 +20,11 @@ def inference(model_inputs: dict) -> dict: return {"message": "No prompt provided"} # Run the model - result = model.encode(prompt) + sentence_embeddings = model.encode(prompt) + normalized_embeddings = normalize(sentence_embeddings) + + # Convert the output array to a list + output = normalized_embeddings.tolist() # Return the results as a dictionary - return { "data": result } + return { "data": output } From 31566659bc48f69dc8e66688de8a5fae6dce952c Mon Sep 17 00:00:00 2001 From: Tyler Termini <69917602+waycroft@users.noreply.github.com> Date: Wed, 8 Feb 2023 20:02:54 -0800 Subject: [PATCH 11/13] remove unecessary argument to pipeline in download.py --- download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/download.py b/download.py index e848970..611be8d 100644 --- a/download.py +++ b/download.py @@ -7,7 +7,7 @@ def download_model(): # do a dry run of loading the huggingface model, which will download weights - pipeline('fill-mask', model='sentence-transformers/paraphrase-mpnet-base-v2') + pipeline(model='sentence-transformers/paraphrase-mpnet-base-v2') if __name__ == "__main__": download_model() \ No newline at end of file From 4a35620b12e9efd69fc557ff1f1b66e71fb6ee2d Mon Sep 17 00:00:00 2001 From: Tyler Termini <69917602+waycroft@users.noreply.github.com> Date: Wed, 8 Feb 2023 20:12:25 -0800 Subject: [PATCH 12/13] doh --- download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/download.py b/download.py index 611be8d..d693872 100644 --- a/download.py +++ b/download.py @@ -3,11 +3,11 @@ # In this example: A Huggingface BERT model -from transformers import pipeline +from sentence_transformers import SentenceTransformer def download_model(): # do a dry run of loading the huggingface model, which will download weights - pipeline(model='sentence-transformers/paraphrase-mpnet-base-v2') + SentenceTransformer('sentence-transformers/paraphrase-mpnet-base-v2') if __name__ == "__main__": download_model() \ No newline at end of file From a9d1e3c8432e0caff70e798061c506e23b4cd932 Mon Sep 17 00:00:00 2001 From: ramsis Date: Tue, 4 Jul 2023 20:13:32 +0100 Subject: [PATCH 13/13] migrate to potassium --- Dockerfile | 10 ++++------ README.md | 34 ++++++++++++++-------------------- app.py | 41 ++++++++++++++++++++++++----------------- banana_config.json | 24 ++++++++++++++++++++++++ requirements.txt | 2 +- server.py | 42 ------------------------------------------ test.py | 10 ---------- 7 files changed, 67 insertions(+), 96 deletions(-) create mode 100644 banana_config.json delete mode 100644 server.py delete mode 100644 test.py diff --git a/Dockerfile b/Dockerfile index 888a8da..950601a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,5 @@ +# This is a potassium-standard dockerfile, compatible with Banana + # Must use a Cuda version 11+ FROM pytorch/pytorch:1.11.0-cuda11.3-cudnn8-runtime @@ -11,18 +13,14 @@ RUN pip3 install --upgrade pip ADD requirements.txt requirements.txt RUN pip3 install -r requirements.txt -# We add the banana boilerplate here -ADD server.py . # Add your model weight files # (in this case we have a python script) ADD download.py . RUN python3 download.py - -# Add your custom app code, init() and inference() -ADD app.py . +ADD . . EXPOSE 8000 -CMD python3 -u server.py +CMD python3 -u app.py \ No newline at end of file diff --git a/README.md b/README.md index 89068bd..28815aa 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,14 @@ - -# 🍌 Banana Serverless - -This repo gives a framework to serve ML models in production using simple HTTP servers. - -# Quickstart -**[Follow the quickstart guide in Banana's documentation to use this repo](https://docs.banana.dev/banana-docs/quickstart).** - -*(choose "GitHub Repository" deployment method)* - -
- -# Helpful Links -Understand the 🍌 [Serverless framework](https://docs.banana.dev/banana-docs/core-concepts/inference-server/serverless-framework) and functionality of each file within it. - -Generalize this framework to [deploy anything on Banana](https://docs.banana.dev/banana-docs/resources/how-to-serve-anything-on-banana). - -
- -## Use Banana for scale. +# My Potassium App +This is a Potassium HTTP server, created with `banana init` CLI + +### Testing +Start a local dev server with `banana dev` + +### Deployment +1. Create empty repo on [Github](https://github.com) +2. Push this repo to github +``` +git remote add origin https://github.com/{username}/{repo-name}.git +``` +3. [Log into Banana](https://app.banana.dev/onboard) +4. Select this repo to build and deploy! \ No newline at end of file diff --git a/app.py b/app.py index d23600e..e21af78 100644 --- a/app.py +++ b/app.py @@ -1,24 +1,26 @@ +from potassium import Potassium, Request, Response + from sentence_transformers import SentenceTransformer from sklearn.preprocessing import normalize +app = Potassium("my_app") -# Init is ran on server startup -# Load your model to GPU as a global variable here using the variable name "model" +# @app.init runs at startup, and loads models into the app's context +@app.init def init(): - global model model = SentenceTransformer("sentence-transformers/paraphrase-mpnet-base-v2") - - -# Inference is ran for every server call -# Reference your preloaded global model variable here. -def inference(model_inputs: dict) -> dict: - global model - - # Parse out your arguments - prompt = model_inputs.get("prompt", None) - if prompt == None: - return {"message": "No prompt provided"} - + + context = { + "model": model + } + + return context + +# @app.handler runs for every call +@app.handler() +def handler(context: dict, request: Request) -> Response: + prompt = request.json.get("prompt") + model = context.get("model") # Run the model sentence_embeddings = model.encode(prompt) normalized_embeddings = normalize(sentence_embeddings) @@ -26,5 +28,10 @@ def inference(model_inputs: dict) -> dict: # Convert the output array to a list output = normalized_embeddings.tolist() - # Return the results as a dictionary - return { "data": output } + return Response( + json = {"data": output}, + status=200 + ) + +if __name__ == "__main__": + app.serve() \ No newline at end of file diff --git a/banana_config.json b/banana_config.json new file mode 100644 index 0000000..cd9edbe --- /dev/null +++ b/banana_config.json @@ -0,0 +1,24 @@ +{ + "name": "", + "category": "", + "example_input": { + "prompt": "Hello I am a [MASK] model." + }, + "example_output": { + "outputs":[ + { + "score":0.13177461922168732, + "token":4827, + "token_str":"fashion", + "sequence":"hello i am a fashion model." + }, + { + "score":0.1120428815484047, + "token":2535, + "token_str":"role", + "sequence":"hello i am a role model." + } + ] + }, + "version": "1" +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a80190b..10f94c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -sanic==22.6.2 +potassium sentence-transformers==2.2.2 accelerate diff --git a/server.py b/server.py deleted file mode 100644 index d66cf1a..0000000 --- a/server.py +++ /dev/null @@ -1,42 +0,0 @@ -# Do not edit if deploying to Banana Serverless -# This file is boilerplate for the http server, and follows a strict interface. - -# Instead, edit the init() and inference() functions in app.py - -from sanic import Sanic, response -import subprocess -import app as user_src - -# We do the model load-to-GPU step on server startup -# so the model object is available globally for reuse -user_src.init() - -# Create the http server app -server = Sanic("my_app") - -# Healthchecks verify that the environment is correct on Banana Serverless -@server.route('/healthcheck', methods=["GET"]) -def healthcheck(request): - # dependency free way to check if GPU is visible - gpu = False - out = subprocess.run("nvidia-smi", shell=True) - if out.returncode == 0: # success state on shell command - gpu = True - - return response.json({"state": "healthy", "gpu": gpu}) - -# Inference POST handler at '/' is called for every http call from Banana -@server.route('/', methods=["POST"]) -def inference(request): - try: - model_inputs = response.json.loads(request.json) - except: - model_inputs = request.json - - output = user_src.inference(model_inputs) - - return response.json(output) - - -if __name__ == '__main__': - server.run(host='0.0.0.0', port=8000, workers=1) diff --git a/test.py b/test.py deleted file mode 100644 index 3c88413..0000000 --- a/test.py +++ /dev/null @@ -1,10 +0,0 @@ -# This file is used to verify your http server acts as expected -# Run it with `python3 test.py`` - -import requests - -model_inputs = {'prompt': 'Hello I am a [MASK] model.'} - -res = requests.post('http://localhost:8000/', json = model_inputs) - -print(res.json()) \ No newline at end of file