From b9e84f8ded992f871d72f32eca5f048ab7cc25ee Mon Sep 17 00:00:00 2001 From: ErikKaum Date: Fri, 16 Dec 2022 09:58:16 +0200 Subject: [PATCH 1/4] model id specified in dockerfile --- Dockerfile | 3 +++ app.py | 4 +++- download.py | 5 ++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 888a8da..8c3ff55 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,6 +11,9 @@ RUN pip3 install --upgrade pip ADD requirements.txt requirements.txt RUN pip3 install -r requirements.txt +# Set model id as an ENV variable +ENV MODEL_ID bert-base-uncased + # We add the banana boilerplate here ADD server.py . diff --git a/app.py b/app.py index 7f6b061..a75cead 100644 --- a/app.py +++ b/app.py @@ -1,13 +1,15 @@ from transformers import pipeline import torch +import os # Init is ran on server startup # Load your model to GPU as a global variable here using the variable name "model" def init(): global model + model_id = os.getenv("MODEL_ID") device = 0 if torch.cuda.is_available() else -1 - model = pipeline('fill-mask', model='bert-base-uncased', device=device) + model = pipeline('fill-mask', model=model_id, device=device) # Inference is ran for every server call # Reference your preloaded global model variable here. diff --git a/download.py b/download.py index 9f2956d..b93604d 100644 --- a/download.py +++ b/download.py @@ -4,10 +4,13 @@ # In this example: A Huggingface BERT model from transformers import pipeline +import os def download_model(): + model_id = os.getenv("MODEL_ID") + # do a dry run of loading the huggingface model, which will download weights - pipeline('fill-mask', model='bert-base-uncased') + pipeline('fill-mask', model=model_id) if __name__ == "__main__": download_model() \ No newline at end of file From 07a40c150a71cec15e2e1b1e9694448da0fa3e1c Mon Sep 17 00:00:00 2001 From: ErikKaum Date: Sun, 18 Dec 2022 20:34:20 +0200 Subject: [PATCH 2/4] comments for clarity & changed variable name --- Dockerfile | 5 +++-- app.py | 6 ++++-- download.py | 6 ++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8c3ff55..0454cf0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,8 +11,9 @@ RUN pip3 install --upgrade pip ADD requirements.txt requirements.txt RUN pip3 install -r requirements.txt -# Set model id as an ENV variable -ENV MODEL_ID bert-base-uncased +# In this example, we can define the hugging face model as an ENV variable +# and from here pass it to download.py & app.py +ENV HF_MODEL_NAME bert-base-uncased # We add the banana boilerplate here ADD server.py . diff --git a/app.py b/app.py index a75cead..2137df3 100644 --- a/app.py +++ b/app.py @@ -6,10 +6,12 @@ # Load your model to GPU as a global variable here using the variable name "model" def init(): global model - model_id = os.getenv("MODEL_ID") + + # In this example, we get the model name as an ENV variable defined in the Dockerfile + hf_model_name = os.getenv("HF_MODEL_NAME") device = 0 if torch.cuda.is_available() else -1 - model = pipeline('fill-mask', model=model_id, device=device) + model = pipeline('fill-mask', model=hf_model_name, device=device) # Inference is ran for every server call # Reference your preloaded global model variable here. diff --git a/download.py b/download.py index b93604d..b9fc706 100644 --- a/download.py +++ b/download.py @@ -7,10 +7,12 @@ import os def download_model(): - model_id = os.getenv("MODEL_ID") + + # In this example, we get the model name as an ENV variable defined in the Dockerfile + hf_model_name = os.getenv("HF_MODEL_NAME") # do a dry run of loading the huggingface model, which will download weights - pipeline('fill-mask', model=model_id) + pipeline('fill-mask', model=hf_model_name) if __name__ == "__main__": download_model() \ No newline at end of file From d0ffecd710315a0185f9df4643cb8917d5de6c13 Mon Sep 17 00:00:00 2001 From: ErikKaum Date: Sun, 18 Dec 2022 21:21:59 +0200 Subject: [PATCH 3/4] how to test locally added to readme --- README.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/README.md b/README.md index 89068bd..a17a210 100644 --- a/README.md +++ b/README.md @@ -17,4 +17,35 @@ Generalize this framework to [deploy anything on Banana](https://docs.banana.dev
+# Local testing + +## With docker + +To test the Serverless Framework with docker locally, you need to build the docker container and then run it. +In the root of this directory, run: +``` +docker build . -t serverless-template +``` +After which you can run the container. Here we also forward the port to access the localhost url outside of the +container and enable gpu acceleration. + +``` +docker run serverless-template -p 8000:8000 --gpus +``` + +## Without docker + +Testing your code without docker is straight forward. Remember to pass in the Hugging Face model name as +an ENV variable. In this case: +``` +export HF_MODEL_NAME=bert-base-uncased +``` + +And then simply run the server.py +``` +python3 server.py +``` + +
+ ## Use Banana for scale. From fdf7d7d1541651bfa37d059b8543a8cbc3eed226 Mon Sep 17 00:00:00 2001 From: ErikKaum Date: Sun, 18 Dec 2022 21:40:55 +0200 Subject: [PATCH 4/4] added requirements & docker run command was in wrong order --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a17a210..4ff47d6 100644 --- a/README.md +++ b/README.md @@ -28,9 +28,8 @@ docker build . -t serverless-template ``` After which you can run the container. Here we also forward the port to access the localhost url outside of the container and enable gpu acceleration. - ``` -docker run serverless-template -p 8000:8000 --gpus +docker run -p 8000:8000 --gpus=all serverless-template ``` ## Without docker @@ -40,7 +39,10 @@ an ENV variable. In this case: ``` export HF_MODEL_NAME=bert-base-uncased ``` - +Make sure you have the required dependencies: +``` +pip3 install -r requirements.txt +``` And then simply run the server.py ``` python3 server.py